diff --git a/build.gradle.kts b/build.gradle.kts index 2034697c..2dc4aed8 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -215,6 +215,7 @@ dependencies { implementation("org.flywaydb:flyway-core") implementation("dev.usbharu:emoji-kt:2.0.0") + implementation("org.jsoup:jsoup:1.17.2") implementation("io.ktor:ktor-client-logging-jvm:$ktor_version") diff --git a/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt b/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt new file mode 100644 index 00000000..49548f8e --- /dev/null +++ b/src/main/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatter.kt @@ -0,0 +1,107 @@ +package dev.usbharu.hideout.core.service.post + +import org.jsoup.Jsoup +import org.jsoup.nodes.Attributes +import org.jsoup.nodes.Element +import org.jsoup.nodes.TextNode +import org.jsoup.parser.Tag +import org.jsoup.select.Elements + +class DefaultPostContentFormatter() : PostContentFormatter { + override suspend fun format(content: String): FormattedPostContent { + val document = + Jsoup.parseBodyFragment(content).getElementsByTag("body").first() ?: return FormattedPostContent("", "") + + val flattenHtml = document.childNodes().mapNotNull { + if (it is Element) { + if (it.tagName() == "p") { + p(it) + } else { + p(Element("p").appendChildren(document.childNodes())) + } + } else if (it is TextNode) { + Element("p").appendText(it.text()) + } else { + null + } + }.filter { it.text().isNotBlank() } + + val formattedHtml = mutableListOf() + + for (element in flattenHtml) { + var brCount = 0 + var prevIndex = 0 + val childNodes = element.childNodes() + for ((index, childNode) in childNodes.withIndex()) { + if (childNode is Element && childNode.tagName() == "br") { + brCount++ + } else if (brCount >= 2) { + formattedHtml.add(Element("p").appendChildren(childNodes.subList(prevIndex, index - brCount))) + prevIndex = index + } + } + formattedHtml.add(Element("p").appendChildren(childNodes.subList(prevIndex, childNodes.size))) + } + + + val elements = Elements(formattedHtml) + + return FormattedPostContent(elements.outerHtml().replace("\n", ""), printHtml(elements)) + } + + private fun p(element: Element): Element { + val childNodes = element.childNodes() + + if (childNodes.size == 1 && childNodes.first() is TextNode) { + val pTag = Element("p") + + pTag.appendText(element.text()) + return pTag + } + + val map = childNodes.mapNotNull { + if (it is Element) { + if (it.tagName() == "a") { + a(it) + } else if (it.tagName() == "br") { + Element("br") + } else { + TextNode(it.text()) + } + } else if (it is TextNode) { + it + } else { + null + } + } + + val pTag = Element("p") + + pTag.appendChildren(map) + + return pTag + } + + private fun a(element: Element): Element { + val attributes = Attributes() + + attributes.put("href", element.attribute("href").value) + return Element(Tag.valueOf("a"), "", attributes).appendText(element.text()) + } + + private fun printHtml(element: Elements): String { + return element.joinToString("\n\n") { + it.childNodes().joinToString("") { + if (it is Element && it.tagName() == "br") { + "\n" + } else if (it is Element) { + it.text() + } else if (it is TextNode) { + it.text() + } else { + "" + } + } + } + } +} diff --git a/src/main/kotlin/dev/usbharu/hideout/core/service/post/FormattedPostContent.kt b/src/main/kotlin/dev/usbharu/hideout/core/service/post/FormattedPostContent.kt new file mode 100644 index 00000000..763c75c7 --- /dev/null +++ b/src/main/kotlin/dev/usbharu/hideout/core/service/post/FormattedPostContent.kt @@ -0,0 +1,6 @@ +package dev.usbharu.hideout.core.service.post + +data class FormattedPostContent( + val html: String, + val content: String +) diff --git a/src/main/kotlin/dev/usbharu/hideout/core/service/post/PostContentFormatter.kt b/src/main/kotlin/dev/usbharu/hideout/core/service/post/PostContentFormatter.kt new file mode 100644 index 00000000..7713adea --- /dev/null +++ b/src/main/kotlin/dev/usbharu/hideout/core/service/post/PostContentFormatter.kt @@ -0,0 +1,5 @@ +package dev.usbharu.hideout.core.service.post + +interface PostContentFormatter { + suspend fun format(content: String): FormattedPostContent +} diff --git a/src/test/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatterTest.kt b/src/test/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatterTest.kt new file mode 100644 index 00000000..7ac2e857 --- /dev/null +++ b/src/test/kotlin/dev/usbharu/hideout/core/service/post/DefaultPostContentFormatterTest.kt @@ -0,0 +1,117 @@ +package dev.usbharu.hideout.core.service.post + +import kotlinx.coroutines.test.runTest +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test + +class DefaultPostContentFormatterTest { + @Test + fun pタグがpタグになる() = runTest { + //language=HTML + val html = """

hoge

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } + + @Test + fun hタグがpタグになる() = runTest { + //language=HTML + val html = """

hoge

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } + + @Test + fun pタグのネストは破棄される() = runTest { + //language=HTML + val html = """

hoge

fuga

piyo

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

fuga

piyo

", "hoge\n\nfuga\n\npiyo")) + } + + @Test + fun spanタグは無視される() = runTest { + //language=HTML + val html = """

hoge

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } + + @Test + fun `2連続改行は段落に変換される`() = runTest { + //language=HTML + val html = """

hoge

fuga

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

fuga

", "hoge\n\nfuga")) + } + + @Test + fun iタグは無視される() = runTest { + //language=HTML + val html = """

hoge

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } + + @Test + fun aタグはhrefの中身のみ引き継がれる() = runTest { + //language=HTML + val html = """

hoge

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } + + @Test + fun aタグの中のspanは無視される() = runTest { + //language=HTML + val html = """

hoge

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } + + @Test + fun brタグのコンテンツは改行になる() = runTest { + //language=HTML + val html = """

hoge
fuga

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge
fuga

", "hoge\nfuga")) + } + + @Test + fun いきなりテキストが来たらpタグで囲む() = runTest { + //language=HTML + val html = """hoge""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } + + @Test + fun bodyタグが含まれていた場合消す() = runTest { + //language=HTML + val html = """

hoge

""" + + val actual = DefaultPostContentFormatter().format(html) + + assertThat(actual).isEqualTo(FormattedPostContent("

hoge

", "hoge")) + } +}