mirror of https://github.com/usbharu/Hideout.git
feat: HTMLを整形し、不正なHTMLを含まないように
This commit is contained in:
parent
20012782b7
commit
45e0bd8edc
|
@ -215,6 +215,7 @@ dependencies {
|
|||
implementation("org.flywaydb:flyway-core")
|
||||
|
||||
implementation("dev.usbharu:emoji-kt:2.0.0")
|
||||
implementation("org.jsoup:jsoup:1.17.2")
|
||||
|
||||
implementation("io.ktor:ktor-client-logging-jvm:$ktor_version")
|
||||
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
package dev.usbharu.hideout.core.service.post
|
||||
|
||||
import org.jsoup.Jsoup
|
||||
import org.jsoup.nodes.Attributes
|
||||
import org.jsoup.nodes.Element
|
||||
import org.jsoup.nodes.TextNode
|
||||
import org.jsoup.parser.Tag
|
||||
import org.jsoup.select.Elements
|
||||
|
||||
class DefaultPostContentFormatter() : PostContentFormatter {
|
||||
override suspend fun format(content: String): FormattedPostContent {
|
||||
val document =
|
||||
Jsoup.parseBodyFragment(content).getElementsByTag("body").first() ?: return FormattedPostContent("", "")
|
||||
|
||||
val flattenHtml = document.childNodes().mapNotNull {
|
||||
if (it is Element) {
|
||||
if (it.tagName() == "p") {
|
||||
p(it)
|
||||
} else {
|
||||
p(Element("p").appendChildren(document.childNodes()))
|
||||
}
|
||||
} else if (it is TextNode) {
|
||||
Element("p").appendText(it.text())
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}.filter { it.text().isNotBlank() }
|
||||
|
||||
val formattedHtml = mutableListOf<Element>()
|
||||
|
||||
for (element in flattenHtml) {
|
||||
var brCount = 0
|
||||
var prevIndex = 0
|
||||
val childNodes = element.childNodes()
|
||||
for ((index, childNode) in childNodes.withIndex()) {
|
||||
if (childNode is Element && childNode.tagName() == "br") {
|
||||
brCount++
|
||||
} else if (brCount >= 2) {
|
||||
formattedHtml.add(Element("p").appendChildren(childNodes.subList(prevIndex, index - brCount)))
|
||||
prevIndex = index
|
||||
}
|
||||
}
|
||||
formattedHtml.add(Element("p").appendChildren(childNodes.subList(prevIndex, childNodes.size)))
|
||||
}
|
||||
|
||||
|
||||
val elements = Elements(formattedHtml)
|
||||
|
||||
return FormattedPostContent(elements.outerHtml().replace("\n", ""), printHtml(elements))
|
||||
}
|
||||
|
||||
private fun p(element: Element): Element {
|
||||
val childNodes = element.childNodes()
|
||||
|
||||
if (childNodes.size == 1 && childNodes.first() is TextNode) {
|
||||
val pTag = Element("p")
|
||||
|
||||
pTag.appendText(element.text())
|
||||
return pTag
|
||||
}
|
||||
|
||||
val map = childNodes.mapNotNull {
|
||||
if (it is Element) {
|
||||
if (it.tagName() == "a") {
|
||||
a(it)
|
||||
} else if (it.tagName() == "br") {
|
||||
Element("br")
|
||||
} else {
|
||||
TextNode(it.text())
|
||||
}
|
||||
} else if (it is TextNode) {
|
||||
it
|
||||
} else {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
val pTag = Element("p")
|
||||
|
||||
pTag.appendChildren(map)
|
||||
|
||||
return pTag
|
||||
}
|
||||
|
||||
private fun a(element: Element): Element {
|
||||
val attributes = Attributes()
|
||||
|
||||
attributes.put("href", element.attribute("href").value)
|
||||
return Element(Tag.valueOf("a"), "", attributes).appendText(element.text())
|
||||
}
|
||||
|
||||
private fun printHtml(element: Elements): String {
|
||||
return element.joinToString("\n\n") {
|
||||
it.childNodes().joinToString("") {
|
||||
if (it is Element && it.tagName() == "br") {
|
||||
"\n"
|
||||
} else if (it is Element) {
|
||||
it.text()
|
||||
} else if (it is TextNode) {
|
||||
it.text()
|
||||
} else {
|
||||
""
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,6 @@
|
|||
package dev.usbharu.hideout.core.service.post
|
||||
|
||||
data class FormattedPostContent(
|
||||
val html: String,
|
||||
val content: String
|
||||
)
|
|
@ -0,0 +1,5 @@
|
|||
package dev.usbharu.hideout.core.service.post
|
||||
|
||||
interface PostContentFormatter {
|
||||
suspend fun format(content: String): FormattedPostContent
|
||||
}
|
|
@ -0,0 +1,117 @@
|
|||
package dev.usbharu.hideout.core.service.post
|
||||
|
||||
import kotlinx.coroutines.test.runTest
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
import org.junit.jupiter.api.Test
|
||||
|
||||
class DefaultPostContentFormatterTest {
|
||||
@Test
|
||||
fun pタグがpタグになる() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p>hoge</p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun hタグがpタグになる() = runTest {
|
||||
//language=HTML
|
||||
val html = """<h1>hoge</h1>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun pタグのネストは破棄される() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p>hoge<p>fuga</p>piyo</p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p><p>piyo</p>", "hoge\n\nfuga\n\npiyo"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun spanタグは無視される() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p><span>hoge</span></p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `2連続改行は段落に変換される`() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p>hoge<br><br>fuga</p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p><p>fuga</p>", "hoge\n\nfuga"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun iタグは無視される() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p><i>hoge</i></p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun aタグはhrefの中身のみ引き継がれる() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p><a href='https://example.com' class='u-url' target='_blank'>hoge</a></p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun aタグの中のspanは無視される() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p><a href='https://example.com'><span>hoge</span></a></p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p><a href=\"https://example.com\">hoge</a></p>", "hoge"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun brタグのコンテンツは改行になる() = runTest {
|
||||
//language=HTML
|
||||
val html = """<p>hoge<br>fuga</p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge<br> fuga</p>", "hoge\nfuga"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun いきなりテキストが来たらpタグで囲む() = runTest {
|
||||
//language=HTML
|
||||
val html = """hoge"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
|
||||
}
|
||||
|
||||
@Test
|
||||
fun bodyタグが含まれていた場合消す() = runTest {
|
||||
//language=HTML
|
||||
val html = """</body><p>hoge</p>"""
|
||||
|
||||
val actual = DefaultPostContentFormatter().format(html)
|
||||
|
||||
assertThat(actual).isEqualTo(FormattedPostContent("<p>hoge</p>", "hoge"))
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue