feat: HTMLへのサニタイズを外部ライブラリで行うように

This commit is contained in:
usbharu 2024-01-24 21:25:27 +09:00
parent ea04150501
commit 06ce40991c
8 changed files with 111 additions and 70 deletions

View File

@ -216,6 +216,7 @@ dependencies {
implementation("dev.usbharu:emoji-kt:2.0.0")
implementation("org.jsoup:jsoup:1.17.2")
implementation("com.googlecode.owasp-java-html-sanitizer:owasp-java-html-sanitizer:20220608.1")
implementation("io.ktor:ktor-client-logging-jvm:$ktor_version")

View File

@ -0,0 +1,21 @@
package dev.usbharu.hideout.application.config
import org.owasp.html.HtmlPolicyBuilder
import org.owasp.html.PolicyFactory
import org.springframework.context.annotation.Bean
import org.springframework.context.annotation.Configuration
@Configuration
class HtmlSanitizeConfig {
@Bean
fun policy(): PolicyFactory {
return HtmlPolicyBuilder()
.allowElements("p")
.allowElements("a")
.allowElements("br")
.allowAttributes("href").onElements("a")
.allowUrlProtocols("http", "https")
.allowElements({ _, _ -> return@allowElements "p" }, "h1", "h2", "h3", "h4", "h5", "h6")
.toFactory()
}
}

View File

@ -1,26 +1,34 @@
package dev.usbharu.hideout.core.service.post
import org.jsoup.Jsoup
import org.jsoup.nodes.Attributes
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
import org.jsoup.parser.Tag
import org.jsoup.select.Elements
import org.owasp.html.PolicyFactory
import org.springframework.stereotype.Service
@Service
class DefaultPostContentFormatter : PostContentFormatter {
class DefaultPostContentFormatter(private val policyFactory: PolicyFactory) : PostContentFormatter {
override fun format(content: String): FormattedPostContent {
val document =
Jsoup.parseBodyFragment(content).getElementsByTag("body").first() ?: return FormattedPostContent("", "")
val flattenHtml = document.childNodes().mapNotNull {
//まず不正なHTMLを整形する
val document = Jsoup.parseBodyFragment(content)
val outputSettings = Document.OutputSettings()
outputSettings.prettyPrint(false)
document.outputSettings(outputSettings)
val unsafeElement = document.getElementsByTag("body").first() ?: return FormattedPostContent(
"",
""
)
//文字だけのHTMLなどはここでpタグで囲む
val flattenHtml = unsafeElement.childNodes().mapNotNull {
if (it is Element) {
if (it.tagName() == "p") {
p(it)
} else {
p(Element("p").appendChildren(document.childNodes()))
}
it
} else if (it is TextNode) {
Element("p").appendText(it.text())
} else {
@ -28,9 +36,20 @@ class DefaultPostContentFormatter : PostContentFormatter {
}
}.filter { it.text().isNotBlank() }
// HTMLのサニタイズをする
val unsafeHtml = Elements(flattenHtml).outerHtml()
val safeHtml = policyFactory.sanitize(unsafeHtml)
val safeDocument =
Jsoup.parseBodyFragment(safeHtml).getElementsByTag("body").first() ?: return FormattedPostContent("", "")
val formattedHtml = mutableListOf<Element>()
for (element in flattenHtml) {
//連続するbrタグを段落に変換する
for (element in safeDocument.children()) {
var brCount = 0
var prevIndex = 0
val childNodes = element.childNodes()
@ -51,46 +70,6 @@ class DefaultPostContentFormatter : PostContentFormatter {
return FormattedPostContent(elements.outerHtml().replace("\n", ""), printHtml(elements))
}
private fun p(element: Element): Element {
val childNodes = element.childNodes()
if (childNodes.size == 1 && childNodes.first() is TextNode) {
val pTag = Element("p")
pTag.appendText(element.text())
return pTag
}
val map = childNodes.mapNotNull {
if (it is Element) {
if (it.tagName() == "a") {
a(it)
} else if (it.tagName() == "br") {
Element("br")
} else {
TextNode(it.text())
}
} else if (it is TextNode) {
it
} else {
null
}
}
val pTag = Element("p")
pTag.appendChildren(map)
return pTag
}
private fun a(element: Element): Element {
val attributes = Attributes()
attributes.put("href", element.attribute("href").value)
return Element(Tag.valueOf("a"), "", attributes).appendText(element.text())
}
private fun printHtml(element: Elements): String {
return element.joinToString("\n\n") {
it.childNodes().joinToString("") {

View File

@ -12,10 +12,12 @@ import dev.usbharu.hideout.activitypub.service.common.APResourceResolveService
import dev.usbharu.hideout.activitypub.service.objects.note.APNoteServiceImpl.Companion.public
import dev.usbharu.hideout.activitypub.service.objects.user.APUserService
import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService
import dev.usbharu.hideout.core.domain.model.actor.ActorRepository
import dev.usbharu.hideout.core.domain.model.post.Post
import dev.usbharu.hideout.core.domain.model.post.PostRepository
import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter
import dev.usbharu.hideout.core.service.post.PostService
import io.ktor.client.*
import io.ktor.client.call.*
@ -42,7 +44,7 @@ import java.time.Instant
class APNoteServiceImplTest {
val postBuilder = Post.PostBuilder(CharacterLimit())
val postBuilder = Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy()))
@Test
fun `fetchNote(String,String) ートが既に存在する場合はDBから取得したものを返す`() = runTest {
@ -71,7 +73,10 @@ class APNoteServiceImplTest {
apUserService = mock(),
postService = mock(),
apResourceResolveService = mock(),
postBuilder = Post.PostBuilder(CharacterLimit()),
postBuilder = Post.PostBuilder(
CharacterLimit(),
DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
),
noteQueryService = noteQueryService,
mock(),
mock()
@ -141,7 +146,10 @@ class APNoteServiceImplTest {
apUserService = apUserService,
postService = mock(),
apResourceResolveService = apResourceResolveService,
postBuilder = Post.PostBuilder(CharacterLimit()),
postBuilder = Post.PostBuilder(
CharacterLimit(),
DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
),
noteQueryService = noteQueryService,
mock(),
mock { }
@ -190,7 +198,10 @@ class APNoteServiceImplTest {
apUserService = mock(),
postService = mock(),
apResourceResolveService = apResourceResolveService,
postBuilder = Post.PostBuilder(CharacterLimit()),
postBuilder = Post.PostBuilder(
CharacterLimit(),
DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
),
noteQueryService = noteQueryService,
mock(),
mock()

View File

@ -1,15 +1,18 @@
package dev.usbharu.hideout.core.service.post
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import org.assertj.core.api.Assertions.assertThat
import org.junit.jupiter.api.Test
class DefaultPostContentFormatterTest {
val defaultPostContentFormatter = DefaultPostContentFormatter(HtmlSanitizeConfig().policy())
@Test
fun pタグがpタグになる() {
//language=HTML
val html = """<p>hoge</p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
}
@ -19,7 +22,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<h1>hoge</h1>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
}
@ -29,7 +32,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<p>hoge<p>fuga</p>piyo</p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p><p>piyo</p>", "hoge\n\nfuga\n\npiyo"))
}
@ -39,7 +42,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<p><span>hoge</span></p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
}
@ -49,7 +52,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<p>hoge<br><br>fuga</p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p>", "hoge\n\nfuga"))
}
@ -59,7 +62,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<p><i>hoge</i></p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
}
@ -69,7 +72,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<p><a href='https://example.com' class='u-url' target='_blank'>hoge</a></p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge"))
}
@ -79,7 +82,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<p><a href='https://example.com'><span>hoge</span></a></p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge"))
}
@ -89,7 +92,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """<p>hoge<br>fuga</p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge<br> fuga</p>", "hoge\nfuga"))
}
@ -99,7 +102,7 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """hoge"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
}
@ -109,8 +112,24 @@ class DefaultPostContentFormatterTest {
//language=HTML
val html = """</body><p>hoge</p>"""
val actual = DefaultPostContentFormatter().format(html)
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
}
@Test
fun pタグの中のspanは無視される() {
//language=HTML
val html =
"""<p><span class="h-card" translate="no"><a href="https://test-hideout.usbharu.dev/users/testuser14" class="u-url mention">@<span>testuser14</span></a></span> tes</p>"""
val actual = defaultPostContentFormatter.format(html)
assertThat(actual).isEqualTo(
FormattedPostContent(
"<p><a href=\"https://test-hideout.usbharu.dev/users/testuser14\">@testuser14</a> tes</p>",
"@testuser14 tes"
)
)
}
}

View File

@ -3,6 +3,7 @@ package dev.usbharu.hideout.core.service.post
import dev.usbharu.hideout.activitypub.service.activity.create.ApSendCreateService
import dev.usbharu.hideout.activitypub.service.activity.delete.APSendDeleteService
import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.core.domain.exception.resource.DuplicateException
import dev.usbharu.hideout.core.domain.model.actor.ActorRepository
import dev.usbharu.hideout.core.domain.model.post.Post
@ -36,7 +37,11 @@ class PostServiceImplTest {
@Mock
private lateinit var timelineService: TimelineService
@Spy
private var postBuilder: Post.PostBuilder = Post.PostBuilder(CharacterLimit())
private var postBuilder: Post.PostBuilder = Post.PostBuilder(
CharacterLimit(), DefaultPostContentFormatter(
HtmlSanitizeConfig().policy()
)
)
@Mock
private lateinit var apSendCreateService: ApSendCreateService

View File

@ -4,9 +4,11 @@ package dev.usbharu.hideout.core.service.user
import dev.usbharu.hideout.application.config.ApplicationConfig
import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.core.domain.model.actor.Actor
import dev.usbharu.hideout.core.domain.model.actor.ActorRepository
import dev.usbharu.hideout.core.domain.model.post.Post
import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter
import kotlinx.coroutines.ExperimentalCoroutinesApi
import kotlinx.coroutines.test.runTest
import org.junit.jupiter.api.Test
@ -20,7 +22,7 @@ import kotlin.test.assertNull
class ActorServiceTest {
val actorBuilder = Actor.UserBuilder(CharacterLimit(), ApplicationConfig(URL("https://example.com")))
val postBuilder = Post.PostBuilder(CharacterLimit())
val postBuilder = Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy()))
@Test
fun `createLocalUser ローカルユーザーを作成できる`() = runTest {

View File

@ -1,15 +1,18 @@
package utils
import dev.usbharu.hideout.application.config.CharacterLimit
import dev.usbharu.hideout.application.config.HtmlSanitizeConfig
import dev.usbharu.hideout.application.service.id.TwitterSnowflakeIdGenerateService
import dev.usbharu.hideout.core.domain.model.post.Post
import dev.usbharu.hideout.core.domain.model.post.Visibility
import dev.usbharu.hideout.core.service.post.DefaultPostContentFormatter
import kotlinx.coroutines.runBlocking
import java.time.Instant
object PostBuilder {
private val postBuilder = Post.PostBuilder(CharacterLimit())
private val postBuilder =
Post.PostBuilder(CharacterLimit(), DefaultPostContentFormatter(HtmlSanitizeConfig().policy()))
private val idGenerator = TwitterSnowflakeIdGenerateService