From 06ce40991cbd192eac8722040af87e92abb2fcfe Mon Sep 17 00:00:00 2001
From: usbharu <64310155+usbharu@users.noreply.github.com>
Date: Wed, 24 Jan 2024 21:25:27 +0900
Subject: [PATCH] =?UTF-8?q?feat:=20HTML=E3=81=B8=E3=81=AE=E3=82=B5?=
=?UTF-8?q?=E3=83=8B=E3=82=BF=E3=82=A4=E3=82=BA=E3=82=92=E5=A4=96=E9=83=A8?=
=?UTF-8?q?=E3=83=A9=E3=82=A4=E3=83=96=E3=83=A9=E3=83=AA=E3=81=A7=E8=A1=8C?=
=?UTF-8?q?=E3=81=86=E3=82=88=E3=81=86=E3=81=AB?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
build.gradle.kts | 1 +
.../application/config/HtmlSanitizeConfig.kt | 21 +++++
.../post/DefaultPostContentFormatter.kt | 83 +++++++------------
.../objects/note/APNoteServiceImplTest.kt | 19 ++++-
.../post/DefaultPostContentFormatterTest.kt | 41 ++++++---
.../core/service/post/PostServiceImplTest.kt | 7 +-
.../core/service/user/ActorServiceTest.kt | 4 +-
src/test/kotlin/utils/PostBuilder.kt | 5 +-
8 files changed, 111 insertions(+), 70 deletions(-)
create mode 100644 src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt
diff --git a/build.gradle.kts b/build.gradle.kts
index 2dc4aed8..14213211 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -216,6 +216,7 @@ dependencies {
implementation("dev.usbharu:emoji-kt:2.0.0")
implementation("org.jsoup:jsoup:1.17.2")
+ implementation("com.googlecode.owasp-java-html-sanitizer:owasp-java-html-sanitizer:20220608.1")
implementation("io.ktor:ktor-client-logging-jvm:$ktor_version")
diff --git a/src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt b/src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt
new file mode 100644
index 00000000..e7781fe7
--- /dev/null
+++ b/src/main/kotlin/dev/usbharu/hideout/application/config/HtmlSanitizeConfig.kt
@@ -0,0 +1,21 @@
+package dev.usbharu.hideout.application.config
+
+import org.owasp.html.HtmlPolicyBuilder
+import org.owasp.html.PolicyFactory
+import org.springframework.context.annotation.Bean
+import org.springframework.context.annotation.Configuration
+
+@Configuration
+class HtmlSanitizeConfig {
+ @Bean
+ fun policy(): PolicyFactory {
+ return HtmlPolicyBuilder()
+ .allowElements("p")
+ .allowElements("a")
+ .allowElements("br")
+ .allowAttributes("href").onElements("a")
+ .allowUrlProtocols("http", "https")
+ .allowElements({ _, _ -> return@allowElements "p" }, "h1", "h2", "h3", "h4", "h5", "h6")
+ .toFactory()
+ }
+}
diff --git a/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt b/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt
index 05521940..5418f009 100644
--- a/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt
+++ b/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt
@@ -1,26 +1,34 @@
package dev.usbharu.hideout.core.service.post
import org.jsoup.Jsoup
-import org.jsoup.nodes.Attributes
+import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
-import org.jsoup.parser.Tag
import org.jsoup.select.Elements
+import org.owasp.html.PolicyFactory
import org.springframework.stereotype.Service
@Service
-class DefaultPostContentFormatter : PostContentFormatter {
+class DefaultPostContentFormatter(private val policyFactory: PolicyFactory) : PostContentFormatter {
override fun format(content: String): FormattedPostContent {
- val document =
- Jsoup.parseBodyFragment(content).getElementsByTag("body").first() ?: return FormattedPostContent("", "")
- val flattenHtml = document.childNodes().mapNotNull {
+ //まず不正なHTMLを整形する
+ val document = Jsoup.parseBodyFragment(content)
+ val outputSettings = Document.OutputSettings()
+ outputSettings.prettyPrint(false)
+
+ document.outputSettings(outputSettings)
+
+ val unsafeElement = document.getElementsByTag("body").first() ?: return FormattedPostContent(
+ "",
+ ""
+ )
+
+
+ //文字だけのHTMLなどはここでpタグで囲む
+ val flattenHtml = unsafeElement.childNodes().mapNotNull {
if (it is Element) {
- if (it.tagName() == "p") {
- p(it)
- } else {
- p(Element("p").appendChildren(document.childNodes()))
- }
+ it
} else if (it is TextNode) {
Element("p").appendText(it.text())
} else {
@@ -28,9 +36,20 @@ class DefaultPostContentFormatter : PostContentFormatter {
}
}.filter { it.text().isNotBlank() }
+
+ // HTMLのサニタイズをする
+ val unsafeHtml = Elements(flattenHtml).outerHtml()
+
+ val safeHtml = policyFactory.sanitize(unsafeHtml)
+
+ val safeDocument =
+ Jsoup.parseBodyFragment(safeHtml).getElementsByTag("body").first() ?: return FormattedPostContent("", "")
+
val formattedHtml = mutableListOf hoge hoge hoge hoge fugahoge
"""
- val actual = DefaultPostContentFormatter().format(html)
+ val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("
hoge
fuga
piyo
", "hoge\n\nfuga\n\npiyo")) } @@ -39,7 +42,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """hoge
""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("hoge
", "hoge")) } @@ -49,7 +52,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """hoge
fuga
hoge
fuga
", "hoge\n\nfuga")) } @@ -59,7 +62,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """hoge
""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("hoge
", "hoge")) } @@ -69,7 +72,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("", "hoge")) } @@ -79,7 +82,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """""" - val actual = DefaultPostContentFormatter().format(html) + val actual = defaultPostContentFormatter.format(html) assertThat(actual).isEqualTo(FormattedPostContent("", "hoge")) } @@ -89,7 +92,7 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """hoge
fuga
hoge
fuga
hoge
", "hoge")) } @@ -109,8 +112,24 @@ class DefaultPostContentFormatterTest { //language=HTML val html = """