feat: HTMLへのサニタイズを外部ライブラリで行うように

This commit is contained in:
usbharu 2024-01-24 21:25:27 +09:00
parent ea04150501
commit 06ce40991c
8 changed files with 111 additions and 70 deletions

View File

@ -216,6 +216,7 @@ dependencies {
implementation("dev.usbharu:emoji-kt:2.0.0") implementation("dev.usbharu:emoji-kt:2.0.0")
implementation("org.jsoup:jsoup:1.17.2") implementation("org.jsoup:jsoup:1.17.2")
implementation("com.googlecode.owasp-java-html-sanitizer:owasp-java-html-sanitizer:20220608.1")
implementation("io.ktor:ktor-client-logging-jvm:$ktor_version") implementation("io.ktor:ktor-client-logging-jvm:$ktor_version")

View File

@ -0,0 +1,21 @@
package dev.usbharu.hideout.application.config
import org.owasp.html.HtmlPolicyBuilder
import org.owasp.html.PolicyFactory
import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
@Configuration
class HtmlSanitizeConfig {
@Bean
fun policy(): PolicyFactory {
return HtmlPolicyBuilder()
.allowElements("p")
.allowElements("a")
.allowElements("br")
.allowAttributes("href").onElements("a")
.allowUrlProtocols("http", "https")
.allowElements({ _, _ -> return@allowElements "p" }, "h1", "h2", "h3", "h4", "h5", "h6")
.toFactory()
}
}

View File

@ -1,26 +1,34 @@
package dev.usbharu.hideout.core.service.post package dev.usbharu.hideout.core.service.post
import org.jsoup.Jsoup import org.jsoup.Jsoup
import org.jsoup.nodes.Attributes import org.jsoup.nodes.Document
import org.jsoup.nodes.Element import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode import org.jsoup.nodes.TextNode
import org.jsoup.parser.Tag
import org.jsoup.select.Elements import org.jsoup.select.Elements
import org.owasp.html.PolicyFactory
import org.springframework.stereotype.Service import org.springframework.stereotype.Service
@Service @Service
class DefaultPostContentFormatter : PostContentFormatter { class DefaultPostContentFormatter(private val policyFactory: PolicyFactory) : PostContentFormatter {
override fun format(content: String): FormattedPostContent { override fun format(content: String): FormattedPostContent {
val document =
Jsoup.parseBodyFragment(content).getElementsByTag("body").first() ?: return FormattedPostContent("", "")
val flattenHtml = document.childNodes().mapNotNull { //まず不正なHTMLを整形する
val document = Jsoup.parseBodyFragment(content)
val outputSettings = Document.OutputSettings()
outputSettings.prettyPrint(false)
document.outputSettings(outputSettings)
val unsafeElement = document.getElementsByTag("body").first() ?: return FormattedPostContent(
"",
""
)
//文字だけのHTMLなどはここでpタグで囲む
val flattenHtml = unsafeElement.childNodes().mapNotNull {
if (it is Element) { if (it is Element) {
if (it.tagName() == "p") { it
p(it)
} else {
p(Element("p").appendChildren(document.childNodes()))
}
} else if (it is TextNode) { } else if (it is TextNode) {
Element("p").appendText(it.text()) Element("p").appendText(it.text())
} else { } else {
@ -28,9 +36,20 @@ class DefaultPostContentFormatter : PostContentFormatter {
} }
}.filter { it.text().isNotBlank() } }.filter { it.text().isNotBlank() }
// HTMLのサニタイズをする
val unsafeHtml = Elements(flattenHtml).outerHtml()
val safeHtml = policyFactory.sanitize(unsafeHtml)
val safeDocument =
Jsoup.parseBodyFragment(safeHtml).getElementsByTag("body").first() ?: return FormattedPostContent("", "")
val formattedHtml = mutableListOf<Element>() val formattedHtml = mutableListOf<Element>()
for (element in flattenHtml) {
//連続するbrタグを段落に変換する
for (element in safeDocument.children()) {
var brCount = 0 var brCount = 0
var prevIndex = 0 var prevIndex = 0
val childNodes = element.childNodes() val childNodes = element.childNodes()
@ -51,46 +70,6 @@ class DefaultPostContentFormatter : PostContentFormatter {
return FormattedPostContent(elements.outerHtml().replace("\n", ""), printHtml(elements)) return FormattedPostContent(elements.outerHtml().replace("\n", ""), printHtml(elements))
} }
private fun p(element: Element): Element {
val childNodes = element.childNodes()
if (childNodes.size == 1 && childNodes.first() is TextNode) {
val pTag = Element("p")
pTag.appendText(element.text())
return pTag
}
val map = childNodes.mapNotNull {
if (it is Element) {
if (it.tagName() == "a") {
a(it)
} else if (it.tagName() == "br") {
Element("br")
} else {
TextNode(it.text())
}
} else if (it is TextNode) {
it
} else {
null
}
}
val pTag = Element("p")
pTag.appendChildren(map)
return pTag
}
private fun a(element: Element): Element {
val attributes = Attributes()
attributes.put("href", element.attribute("href").value)
return Element(Tag.valueOf("a"), "", attributes).appendText(element.text())
}
private fun printHtml(element: Elements): String { private fun printHtml(element: Elements): String {
return element.joinToString("\n\n") { return element.joinToString("\n\n") {
it.childNodes().joinToString("") { it.childNodes().joinToString("") {

View File

@ -12,10 +12,12 @@ import dev.usbharu.hideout.activitypub.service.common.APResourceResolveService
import dev.usbharu.hideout.activitypub.service.objects.note.APNoteServiceImpl.Companion.public import dev.usbharu.hideout.activitypub.service.objects.note.APNoteServiceImpl.Companion.public
import dev.usbharu.hideout.activitypub.service.objects.user.APUserService import dev.usbharu.hideout.activitypub.service.objects.user.APUserService
import dev.usbharu.hideout.application.config.CharacterLimit import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService
import dev.usbharu.hideout.core.domain.model.actor.ActorRepository import dev.usbharu.hideout.core.domain.model.actor.ActorRepository
import dev.usbharu.hideout.core.domain.model.post.Post import dev.usbharu.hideout.core.domain.model.post.Post
import dev.usbharu.hideout.core.domain.model.post.PostRepository import dev.usbharu.hideout.core.domain.model.post.PostRepository
import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter
import dev.usbharu.hideout.core.service.post.PostService import dev.usbharu.hideout.core.service.post.PostService
import io.ktor.client.* import io.ktor.client.*
import io.ktor.client.call.* import io.ktor.client.call.*
@ -42,7 +44,7 @@ import java.time.Instant
class APNoteServiceImplTest { class APNoteServiceImplTest {
val postBuilder = Post.PostBuilder(CharacterLimit()) val postBuilder = Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy()))
@Test @Test
fun `fetchNote(String,String) ートが既に存在する場合はDBから取得したものを返す`() = runTest { fun `fetchNote(String,String) ートが既に存在する場合はDBから取得したものを返す`() = runTest {
@ -71,7 +73,10 @@ class APNoteServiceImplTest {
apUserService = mock(), apUserService = mock(),
postService = mock(), postService = mock(),
apResourceResolveService = mock(), apResourceResolveService = mock(),
postBuilder = Post.PostBuilder(CharacterLimit()), postBuilder = Post.PostBuilder(
CharacterLimit(),
DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
),
noteQueryService = noteQueryService, noteQueryService = noteQueryService,
mock(), mock(),
mock() mock()
@ -141,7 +146,10 @@ class APNoteServiceImplTest {
apUserService = apUserService, apUserService = apUserService,
postService = mock(), postService = mock(),
apResourceResolveService = apResourceResolveService, apResourceResolveService = apResourceResolveService,
postBuilder = Post.PostBuilder(CharacterLimit()), postBuilder = Post.PostBuilder(
CharacterLimit(),
DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
),
noteQueryService = noteQueryService, noteQueryService = noteQueryService,
mock(), mock(),
mock { } mock { }
@ -190,7 +198,10 @@ class APNoteServiceImplTest {
apUserService = mock(), apUserService = mock(),
postService = mock(), postService = mock(),
apResourceResolveService = apResourceResolveService, apResourceResolveService = apResourceResolveService,
postBuilder = Post.PostBuilder(CharacterLimit()), postBuilder = Post.PostBuilder(
CharacterLimit(),
DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
),
noteQueryService = noteQueryService, noteQueryService = noteQueryService,
mock(), mock(),
mock() mock()

View File

@ -1,15 +1,18 @@
package dev.usbharu.hideout.core.service.post package dev.usbharu.hideout.core.service.post
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
class DefaultPostContentFormatterTest { class DefaultPostContentFormatterTest {
val defaultPostContentFormatter = DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
@Test @Test
fun pタグがpタグになる() { fun pタグがpタグになる() {
//language=HTML //language=HTML
val html = """<p>hoge</p>""" val html = """<p>hoge</p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
} }
@ -19,7 +22,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<h1>hoge</h1>""" val html = """<h1>hoge</h1>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
} }
@ -29,7 +32,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<p>hoge<p>fuga</p>piyo</p>""" val html = """<p>hoge<p>fuga</p>piyo</p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p><p>piyo</p>", "hoge\n\nfuga\n\npiyo")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p><p>piyo</p>", "hoge\n\nfuga\n\npiyo"))
} }
@ -39,7 +42,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<p><span>hoge</span></p>""" val html = """<p><span>hoge</span></p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
} }
@ -49,7 +52,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<p>hoge<br><br>fuga</p>""" val html = """<p>hoge<br><br>fuga</p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p>", "hoge\n\nfuga")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p>", "hoge\n\nfuga"))
} }
@ -59,7 +62,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<p><i>hoge</i></p>""" val html = """<p><i>hoge</i></p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
} }
@ -69,7 +72,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<p><a href='https://example.com' class='u-url' target='_blank'>hoge</a></p>""" val html = """<p><a href='https://example.com' class='u-url' target='_blank'>hoge</a></p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge"))
} }
@ -79,7 +82,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<p><a href='https://example.com'><span>hoge</span></a></p>""" val html = """<p><a href='https://example.com'><span>hoge</span></a></p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge"))
} }
@ -89,7 +92,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """<p>hoge<br>fuga</p>""" val html = """<p>hoge<br>fuga</p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge<br> fuga</p>", "hoge\nfuga")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge<br> fuga</p>", "hoge\nfuga"))
} }
@ -99,7 +102,7 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """hoge""" val html = """hoge"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
} }
@ -109,8 +112,24 @@ class DefaultPostContentFormatterTest {
//language=HTML //language=HTML
val html = """</body><p>hoge</p>""" val html = """</body><p>hoge</p>"""
val actual = DefaultPostContentFormatter().format(html) val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge")) assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
} }
@Test
fun pタグの中のspanは無視される() {
//language=HTML
val html =
"""<p><span class="h-card" translate="no"><a href="https://test-hideout.usbharu.dev/users/testuser14" class="u-url mention">@<span>testuser14</span></a></span> tes</p>"""
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(
FormattedPostContent(
"<p><a href=\"https://test-hideout.usbharu.dev/users/testuser14\">@testuser14</a> tes</p>",
"@testuser14 tes"
)
)
}
} }

View File

@ -3,6 +3,7 @@ package dev.usbharu.hideout.core.service.post
import dev.usbharu.hideout.activitypub.service.activity.create.ApSendCreateService import dev.usbharu.hideout.activitypub.service.activity.create.ApSendCreateService
import dev.usbharu.hideout.activitypub.service.activity.delete.APSendDeleteService import dev.usbharu.hideout.activitypub.service.activity.delete.APSendDeleteService
import dev.usbharu.hideout.application.config.CharacterLimit import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.core.domain.exception.resource.DuplicateException import dev.usbharu.hideout.core.domain.exception.resource.DuplicateException
import dev.usbharu.hideout.core.domain.model.actor.ActorRepository import dev.usbharu.hideout.core.domain.model.actor.ActorRepository
import dev.usbharu.hideout.core.domain.model.post.Post import dev.usbharu.hideout.core.domain.model.post.Post
@ -36,7 +37,11 @@ class PostServiceImplTest {
@Mock @Mock
private lateinit var timelineService: TimelineService private lateinit var timelineService: TimelineService
@Spy @Spy
private var postBuilder: Post.PostBuilder = Post.PostBuilder(CharacterLimit()) private var postBuilder: Post.PostBuilder = Post.PostBuilder(
CharacterLimit(), DefaultPostContentFormatter(
HtmlSanitizeConfig().policy()
)
)
@Mock @Mock
private lateinit var apSendCreateService: ApSendCreateService private lateinit var apSendCreateService: ApSendCreateService

View File

@ -4,9 +4,11 @@ package dev.usbharu.hideout.core.service.user
import dev.usbharu.hideout.application.config.ApplicationConfig import dev.usbharu.hideout.application.config.ApplicationConfig
import dev.usbharu.hideout.application.config.CharacterLimit import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.core.domain.model.actor.Actor import dev.usbharu.hideout.core.domain.model.actor.Actor
import dev.usbharu.hideout.core.domain.model.actor.ActorRepository import dev.usbharu.hideout.core.domain.model.actor.ActorRepository
import dev.usbharu.hideout.core.domain.model.post.Post import dev.usbharu.hideout.core.domain.model.post.Post
import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter
import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.test.runTest import kotlinx.coroutines.test.runTest
import org.junit.jupiter.api.Test import org.junit.jupiter.api.Test
@ -20,7 +22,7 @@ import kotlin.test.assertNull
class ActorServiceTest { class ActorServiceTest {
val actorBuilder = Actor.UserBuilder(CharacterLimit(), ApplicationConfig(URL("https://example.com"))) val actorBuilder = Actor.UserBuilder(CharacterLimit(), ApplicationConfig(URL("https://example.com")))
val postBuilder = Post.PostBuilder(CharacterLimit()) val postBuilder = Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy()))
@Test @Test
fun `createLocalUser ローカルユーザーを作成できる`() = runTest { fun `createLocalUser ローカルユーザーを作成できる`() = runTest {

View File

@ -1,15 +1,18 @@
package utils package utils
import dev.usbharu.hideout.application.config.CharacterLimit import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService
import dev.usbharu.hideout.core.domain.model.post.Post import dev.usbharu.hideout.core.domain.model.post.Post
import dev.usbharu.hideout.core.domain.model.post.Visibility import dev.usbharu.hideout.core.domain.model.post.Visibility
import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter
import kotlinx.coroutines.runBlocking import kotlinx.coroutines.runBlocking
import java.time.Instant import java.time.Instant
object PostBuilder { object PostBuilder {
private val postBuilder = Post.PostBuilder(CharacterLimit()) private val postBuilder =
Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy()))
private val idGenerator = TwitterSnowflakeIdGenerateService private val idGenerator = TwitterSnowflakeIdGenerateService