From 06ce40991cbd192eac8722040af87e92abb2fcfe Mon Sep 17 00:00:00 2001 From: usbharu <64310155+usbharu@users.noreply.github.com> Date: Wed, 24 Jan 2024 21:25:27 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20HTML=E3=81=B8=E3=81=AE=E3=82=B5?= =?UTF-8?q?=E3=83=8B=E3=82=BF=E3=82=A4=E3=82=BA=E3=82=92=E5=A4=96=E9=83=A8?= =?UTF-8?q?=E3=83=A9=E3=82=A4=E3=83=96=E3=83=A9=E3=83=AA=E3=81=A7=E8=A1=8C?= =?UTF-8?q?=E3=81=86=E3=82=88=E3=81=86=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build.gradle.kts | 1 + .../application/config/HtmlSanitizeConfig.kt | 21 +++++ .../post/DefaultPostContentFormatter.kt | 83 +++++++------------ .../objects/note/APNoteServiceImplTest.kt | 19 ++++- .../post/DefaultPostContentFormatterTest.kt | 41 ++++++--- .../core/service/post/PostServiceImplTest.kt | 7 +- .../core/service/user/ActorServiceTest.kt | 4 +- src/test/kotlin/utils/PostBuilder.kt | 5 +- 8 files changed, 111 insertions(+), 70 deletions(-) create mode 100644 src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt diff --git a/build.gradle.kts b/build.gradle.kts index 2dc4aed8..14213211 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -216,6 +216,7 @@ dependencies { implementation("dev.usbharu:emoji-kt:2.0.0") implementation("org.jsoup:jsoup:1.17.2") + implementation("com.googlecode.owasp-java-html-sanitizer:owasp-java-html-sanitizer:20220608.1") implementation("io.ktor:ktor-client-logging-jvm:$ktor_version") diff --git a/src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt b/src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt new file mode 100644 index 00000000..e7781fe7 --- /dev/null +++ b/src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt @@ -0,0 +1,21 @@ +package dev.usbharu.hideout.application.config + +import org.owasp.html.HtmlPolicyBuilder +import org.owasp.html.PolicyFactory +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration + +@Configuration +class HtmlSanitizeConfig { + @Bean + fun policy(): PolicyFactory { + return HtmlPolicyBuilder() + .allowElements("p") + .allowElements("a") + .allowElements("br") + .allowAttributes("href").onElements("a") + .allowUrlProtocols("http", "https") + .allowElements({ _, _ -> return@allowElements "p" }, "h1", "h2", "h3", "h4", "h5", "h6") + .toFactory() + } +} diff --git a/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt b/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt index 05521940..5418f009 100644 --- a/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt +++ b/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt @@ -1,26 +1,34 @@ package dev.usbharu.hideout.core.service.post import org.jsoup.Jsoup -import org.jsoup.nodes.Attributes +import org.jsoup.nodes.Document import org.jsoup.nodes.Element import org.jsoup.nodes.TextNode -import org.jsoup.parser.Tag import org.jsoup.select.Elements +import org.owasp.html.PolicyFactory import org.springframework.stereotype.Service @Service -class DefaultPostContentFormatter : PostContentFormatter { +class DefaultPostContentFormatter(private val policyFactory: PolicyFactory) : PostContentFormatter { override fun format(content: String): FormattedPostContent { - val document = - Jsoup.parseBodyFragment(content).getElementsByTag("body").first() ?: return FormattedPostContent("", "") - val flattenHtml = document.childNodes().mapNotNull { + //まず不正なHTMLを整形する + val document = Jsoup.parseBodyFragment(content) + val outputSettings = Document.OutputSettings() + outputSettings.prettyPrint(false) + + document.outputSettings(outputSettings) + + val unsafeElement = document.getElementsByTag("body").first() ?: return FormattedPostContent( + "", + "" + ) + + + //文字だけのHTMLなどはここでpタグで囲む + val flattenHtml = unsafeElement.childNodes().mapNotNull { if (it is Element) { - if (it.tagName() == "p") { - p(it) - } else { - p(Element("p").appendChildren(document.childNodes())) - } + it } else if (it is TextNode) { Element("p").appendText(it.text()) } else { @@ -28,9 +36,20 @@ class DefaultPostContentFormatter : PostContentFormatter { } }.filter { it.text().isNotBlank() } + + // HTMLのサニタイズをする + val unsafeHtml = Elements(flattenHtml).outerHtml() + + val safeHtml = policyFactory.sanitize(unsafeHtml) + + val safeDocument = + Jsoup.parseBodyFragment(safeHtml).getElementsByTag("body").first() ?: return FormattedPostContent("", "") + val formattedHtml = mutableListOf() - for (element in flattenHtml) { + + //連続するbrタグを段落に変換する + for (element in safeDocument.children()) { var brCount = 0 var prevIndex = 0 val childNodes = element.childNodes() @@ -51,46 +70,6 @@ class DefaultPostContentFormatter : PostContentFormatter { return FormattedPostContent(elements.outerHtml().replace("\n", ""), printHtml(elements)) } - private fun p(element: Element): Element { - val childNodes = element.childNodes() - - if (childNodes.size == 1 && childNodes.first() is TextNode) { - val pTag = Element("p") - - pTag.appendText(element.text()) - return pTag - } - - val map = childNodes.mapNotNull { - if (it is Element) { - if (it.tagName() == "a") { - a(it) - } else if (it.tagName() == "br") { - Element("br") - } else { - TextNode(it.text()) - } - } else if (it is TextNode) { - it - } else { - null - } - } - - val pTag = Element("p") - - pTag.appendChildren(map) - - return pTag - } - - private fun a(element: Element): Element { - val attributes = Attributes() - - attributes.put("href", element.attribute("href").value) - return Element(Tag.valueOf("a"), "", attributes).appendText(element.text()) - } - private fun printHtml(element: Elements): String { return element.joinToString("\n\n") { it.childNodes().joinToString("") { diff --git a/src/test/kotlin/dev/usbharu/hideout/activitypub/service/objects/note/APNoteServiceImplTest.kt b/src/test/kotlin/dev/usbharu/hideout/activitypub/service/objects/note/APNoteServiceImplTest.kt index e5e1c957..8109bb2d 100644 --- a/src/test/kotlin/dev/usbharu/hideout/activitypub/service/objects/note/APNoteServiceImplTest.kt +++ b/src/test/kotlin/dev/usbharu/hideout/activitypub/service/objects/note/APNoteServiceImplTest.kt @@ -12,10 +12,12 @@ import dev.usbharu.hideout.activitypub.service.common.APResourceResolveService import dev.usbharu.hideout.activitypub.service.objects.note.APNoteServiceImpl.Companion.public import dev.usbharu.hideout.activitypub.service.objects.user.APUserService import dev.usbharu.hideout.application.config.CharacterLimit +import dev.usbharu.hideout.application.config.HtmlSanitizeConfig import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService import dev.usbharu.hideout.core.domain.model.actor.ActorRepository import dev.usbharu.hideout.core.domain.model.post.Post import dev.usbharu.hideout.core.domain.model.post.PostRepository +import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter import dev.usbharu.hideout.core.service.post.PostService import io.ktor.client.* import io.ktor.client.call.* @@ -42,7 +44,7 @@ import java.time.Instant class APNoteServiceImplTest { - val postBuilder = Post.PostBuilder(CharacterLimit()) + val postBuilder = Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy())) @Test fun `fetchNote(String,String) ノートが既に存在する場合はDBから取得したものを返す`() = runTest { @@ -71,7 +73,10 @@ class APNoteServiceImplTest { apUserService = mock(), postService = mock(), apResourceResolveService = mock(), - postBuilder = Post.PostBuilder(CharacterLimit()), + postBuilder = Post.PostBuilder( + CharacterLimit(), + DefaultPostContentFormatter(HtmlSanitizeConfig().policy()) + ), noteQueryService = noteQueryService, mock(), mock() @@ -141,7 +146,10 @@ class APNoteServiceImplTest { apUserService = apUserService, postService = mock(), apResourceResolveService = apResourceResolveService, - postBuilder = Post.PostBuilder(CharacterLimit()), + postBuilder = Post.PostBuilder( + CharacterLimit(), + DefaultPostContentFormatter(HtmlSanitizeConfig().policy()) + ), noteQueryService = noteQueryService, mock(), mock { } @@ -190,7 +198,10 @@ class APNoteServiceImplTest { apUserService = mock(), postService = mock(), apResourceResolveService = apResourceResolveService, - postBuilder = Post.PostBuilder(CharacterLimit()), + postBuilder = Post.PostBuilder( + CharacterLimit(), + DefaultPostContentFormatter(HtmlSanitizeConfig().policy()) + ), noteQueryService = noteQueryService, mock(), mock() diff --git a/src/test/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatterTest.kt b/src/test/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatterTest.kt index ef5ac49b..5b8bc90b 100644 --- a/src/test/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatterTest.kt +++ b/src/test/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatterTest.kt @@ -1,15 +1,18 @@ package dev.usbharu.hideout.core.service.post +import dev.usbharu.hideout.application.config.HtmlSanitizeConfig import org.assertj.core.api.Assertions.assertThat import org.junit.jupiter.api.Test class DefaultPostContentFormatterTest { + val defaultPostContentFormatter = DefaultPostContentFormatter(HtmlSanitizeConfig().policy()) + @Test fun pタグがpタグになる() { //language=HTML val html = """

hoge

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } @@ -19,7 +22,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } @@ -29,7 +32,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

fuga

piyo

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

fuga

piyo

", "hoge\n\nfuga\n\npiyo")) } @@ -39,7 +42,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } @@ -49,7 +52,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

fuga

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

fuga

", "hoge\n\nfuga")) } @@ -59,7 +62,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } @@ -69,7 +72,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } @@ -79,7 +82,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } @@ -89,7 +92,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge
fuga

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge
fuga

", "hoge\nfuga")) } @@ -99,7 +102,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """hoge""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } @@ -109,8 +112,24 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """

hoge

""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) } + + @Test + fun pタグの中のspanは無視される() { + //language=HTML + val html = + """

@testuser14 tes

""" + + val actual = defaultPostContentFormatter.format(html) + + assertThat(actual).isEqualTo( + FormattedPostContent( + "

@testuser14 tes

", + "@testuser14 tes" + ) + ) + } } diff --git a/src/test/kotlin/dev/usbharu/hideout/core/service/post/PostServiceImplTest.kt b/src/test/kotlin/dev/usbharu/hideout/core/service/post/PostServiceImplTest.kt index 733e8f93..955f72fd 100644 --- a/src/test/kotlin/dev/usbharu/hideout/core/service/post/PostServiceImplTest.kt +++ b/src/test/kotlin/dev/usbharu/hideout/core/service/post/PostServiceImplTest.kt @@ -3,6 +3,7 @@ package dev.usbharu.hideout.core.service.post import dev.usbharu.hideout.activitypub.service.activity.create.ApSendCreateService import dev.usbharu.hideout.activitypub.service.activity.delete.APSendDeleteService import dev.usbharu.hideout.application.config.CharacterLimit +import dev.usbharu.hideout.application.config.HtmlSanitizeConfig import dev.usbharu.hideout.core.domain.exception.resource.DuplicateException import dev.usbharu.hideout.core.domain.model.actor.ActorRepository import dev.usbharu.hideout.core.domain.model.post.Post @@ -36,7 +37,11 @@ class PostServiceImplTest { @Mock private lateinit var timelineService: TimelineService @Spy - private var postBuilder: Post.PostBuilder = Post.PostBuilder(CharacterLimit()) + private var postBuilder: Post.PostBuilder = Post.PostBuilder( + CharacterLimit(), DefaultPostContentFormatter( + HtmlSanitizeConfig().policy() + ) + ) @Mock private lateinit var apSendCreateService: ApSendCreateService diff --git a/src/test/kotlin/dev/usbharu/hideout/core/service/user/ActorServiceTest.kt b/src/test/kotlin/dev/usbharu/hideout/core/service/user/ActorServiceTest.kt index 430c4ecd..f3f850a9 100644 --- a/src/test/kotlin/dev/usbharu/hideout/core/service/user/ActorServiceTest.kt +++ b/src/test/kotlin/dev/usbharu/hideout/core/service/user/ActorServiceTest.kt @@ -4,9 +4,11 @@ package dev.usbharu.hideout.core.service.user import dev.usbharu.hideout.application.config.ApplicationConfig import dev.usbharu.hideout.application.config.CharacterLimit +import dev.usbharu.hideout.application.config.HtmlSanitizeConfig import dev.usbharu.hideout.core.domain.model.actor.Actor import dev.usbharu.hideout.core.domain.model.actor.ActorRepository import dev.usbharu.hideout.core.domain.model.post.Post +import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.test.runTest import org.junit.jupiter.api.Test @@ -20,7 +22,7 @@ import kotlin.test.assertNull class ActorServiceTest { val actorBuilder = Actor.UserBuilder(CharacterLimit(), ApplicationConfig(URL("https://example.com"))) - val postBuilder = Post.PostBuilder(CharacterLimit()) + val postBuilder = Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy())) @Test fun `createLocalUser ローカルユーザーを作成できる`() = runTest { diff --git a/src/test/kotlin/utils/PostBuilder.kt b/src/test/kotlin/utils/PostBuilder.kt index 97b8f6fb..747b8212 100644 --- a/src/test/kotlin/utils/PostBuilder.kt +++ b/src/test/kotlin/utils/PostBuilder.kt @@ -1,15 +1,18 @@ package utils import dev.usbharu.hideout.application.config.CharacterLimit +import dev.usbharu.hideout.application.config.HtmlSanitizeConfig import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService import dev.usbharu.hideout.core.domain.model.post.Post import dev.usbharu.hideout.core.domain.model.post.Visibility +import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter import kotlinx.coroutines.runBlocking import java.time.Instant object PostBuilder { - private val postBuilder = Post.PostBuilder(CharacterLimit()) + private val postBuilder = + Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy())) private val idGenerator = TwitterSnowflakeIdGenerateService