htmlに対応

This commit is contained in:
usbharu 2024-11-15 01:33:24 +09:00
parent 6c580cfe0e
commit 49498d581e
Signed by: usbharu
GPG Key ID: 95CBCF7046307B77
3 changed files with 409 additions and 58 deletions

View File

@ -10,6 +10,7 @@ class Lexer {
var inQuote = false //引用中の判断 var inQuote = false //引用中の判断
var inCode = false //コードブロック内の判断 var inCode = false //コードブロック内の判断
var inline = false //行頭の判断 var inline = false //行頭の判断
var htmlNest = 0
val codeBuffer = StringBuilder() val codeBuffer = StringBuilder()
line@ while (lines.hasNext()) { line@ while (lines.hasNext()) {
@ -26,9 +27,11 @@ class Lexer {
inCode = codeblock(iterator, next, tokens, inCode, codeBuffer, inline) inCode = codeblock(iterator, next, tokens, inCode, codeBuffer, inline)
} }
inCode -> { inCode -> codeBuffer.append(next)
codeBuffer.append(next)
} next == '<' -> htmlNest = html(iterator, htmlNest, codeBuffer, tokens, next)
htmlNest != 0 -> codeBuffer.append(next)
(next == '#' || next == '') && !inline -> header(iterator, tokens) (next == '#' || next == '') && !inline -> header(iterator, tokens)
(next == '>' || next == '') && !inQuote && !inline -> { (next == '>' || next == '') && !inQuote && !inline -> {
@ -80,6 +83,8 @@ class Lexer {
if (inCode) { if (inCode) {
codeBuffer.append("\n") codeBuffer.append("\n")
} else if (htmlNest != 0) {
codeBuffer.append(" ")
} else { } else {
tokens.add(Break(1)) tokens.add(Break(1))
} }
@ -98,6 +103,95 @@ class Lexer {
return tokens return tokens
} }
private fun html(
iterator: PeekableCharIterator,
htmlNest: Int,
codeBuffer: StringBuilder,
tokens: MutableList<Token>,
next: Char
): Int {
var htmlNest1 = htmlNest
var endTag = false
var counter = 0
if (iterator.peekOrNull() == '/') {
endTag = true
counter++
}
val tagNameBuilder = StringBuilder()
counter = skipPeekWhitespace(iterator, counter)
while (iterator.peekOrNull(counter) != null &&
iterator.peekOrNull(counter)?.isWhitespace() != true &&
iterator.peekOrNull(counter) != '>' &&
iterator.peekOrNull(counter) != '/'
) {
tagNameBuilder.append(iterator.peekOrNull(counter))
counter++
}
counter = skipPeekWhitespace(iterator, counter)
val attributeList = mutableListOf<Token>()
intag@ while (iterator.peekOrNull(counter) != null &&
(iterator.peekOrNull(counter) != '/' && iterator.peekOrNull(counter) != '>')
) {
val attrBuilder = StringBuilder()
attr@ while (iterator.peekOrNull(counter) != null && (iterator.peekOrNull(counter) != '=' && iterator.peekOrNull(
counter
)?.isWhitespace() != true)
) {
attrBuilder.append(iterator.peekOrNull(counter))
counter++
}
attributeList.add(AttributeName(attrBuilder.toString()))
counter = skipPeekWhitespace(iterator, counter)
if (iterator.peekOrNull(counter) == '=') {
counter++
if (iterator.peekOrNull(counter) == '"') {
counter++
//todo エスケープシーケンス
val peekString = offsetPeekString(iterator, counter, '"')
counter = peekString?.second?.minus(1) ?: counter
if (peekString != null) {
attributeList.add(AttributeValue(peekString.first))
} else {
break@intag
}
}
}
counter++
counter = skipPeekWhitespace(iterator, counter)
}
val void = if (iterator.peekOrNull(counter) == '/') {//閉じタグ省略
counter = skipPeekWhitespace(iterator, counter)
val peekString = offsetPeekString(iterator, counter, '>')
counter = peekString?.second?.minus(1) ?: counter
htmlNest1-- //あとで1増えるので相殺するためにあらかじめ1減らしておく
true
} else {
false
}
if (iterator.peekOrNull(counter) == '>') { //タグか判定
if (codeBuffer.isNotBlank()) { //タグ間に文字があれば追加する
tokens.add(HtmlValue(codeBuffer.toString().trim()))
codeBuffer.clear()
}
if (endTag) {//閉じタグ判定
htmlNest1-- //閉じタグならネストを一つ減らす
tokens.add(EndTagStart(tagNameBuilder.toString()))
} else {
htmlNest1++
tokens.add(StartTagStart(tagNameBuilder.toString(), void))
}
tokens.addAll(attributeList)
tokens.add(TagEnd(tagNameBuilder.toString()))
iterator.skip(counter + 1)
} else {
addText(tokens, next.toString())
}
return htmlNest1
}
private fun strike( private fun strike(
iterator: PeekableCharIterator, iterator: PeekableCharIterator,
next: Char, next: Char,
@ -357,6 +451,34 @@ class Lexer {
return count return count
} }
fun skipPeekWhitespace(iterator: PeekableCharIterator, currentOffset: Int = 0): Int {
var offset = currentOffset
while (iterator.peekOrNull(offset)?.isWhitespace() == true) {
offset++
}
return offset
}
fun offsetPeekString(iterator: PeekableCharIterator, offset: Int = 0, vararg chars: Char): Pair<String, Int>? {
var counter = offset
val stringBuilder = StringBuilder()
var checkCounter = 0
while (iterator.peekOrNull(counter) != null && checkCounter < chars.size) {
stringBuilder.append(iterator.peekOrNull(counter))
if (iterator.peekOrNull(counter) == chars[checkCounter]) {
checkCounter++
} else {
checkCounter = 0
}
counter++
}
if (iterator.peekOrNull(counter) == null && checkCounter != chars.size) {
return null
}
val string = stringBuilder.toString()
return string.substring(0, string.length - chars.size) to counter
}
fun peekString(iterator: PeekableCharIterator, vararg char: Char): String? { fun peekString(iterator: PeekableCharIterator, vararg char: Char): String? {
var counter = 0 var counter = 0
val stringBuilder = StringBuilder() val stringBuilder = StringBuilder()

View File

@ -30,3 +30,11 @@ data class InlineCodeBlock(val text: String) : Token()
data class CodeBlock(val text: String) : Token() data class CodeBlock(val text: String) : Token()
data class CodeBlockLanguage(val language: String, val filename: String) : Token() data class CodeBlockLanguage(val language: String, val filename: String) : Token()
data class Strike(val strike: String) : Token() data class Strike(val strike: String) : Token()
abstract class Html() : Token()
data class StartTagStart(var tag: String, val void: Boolean) : Html()
data class EndTagStart(var tag: String) : Html()
data class TagEnd(var tag: String) : Html()
data class AttributeName(val name: String) : Html()
data class AttributeValue(val value: String) : Html()
data class HtmlValue(val value: String) : Html()

View File

@ -342,8 +342,7 @@ class LexerTest {
println(actual) println(actual)
assertContentEquals( assertContentEquals(
listOf(DiscList, Text("aiueo"), Break(1), Whitespace(4, ' '), DiscList, Text("abcd")), listOf(DiscList, Text("aiueo"), Break(1), Whitespace(4, ' '), DiscList, Text("abcd")), actual
actual
) )
} }
@ -357,14 +356,8 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
DecimalList('1'), DecimalList('1'), Text("aiueo"), Break(1), Whitespace(4, ' '), DecimalList('2'), Text("abcd")
Text("aiueo"), ), actual
Break(1),
Whitespace(4, ' '),
DecimalList('2'),
Text("abcd")
),
actual
) )
} }
@ -378,14 +371,8 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
DecimalList(''), DecimalList(''), Text("aiueo"), Break(1), Whitespace(4, ' '), DecimalList(''), Text("abcd")
Text("aiueo"), ), actual
Break(1),
Whitespace(4, ' '),
DecimalList(''),
Text("abcd")
),
actual
) )
} }
@ -399,14 +386,8 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
DecimalList('1'), DecimalList('1'), Text("aiueo"), Break(1), Whitespace(4, ' '), DecimalList('2'), Text("abcd")
Text("aiueo"), ), actual
Break(1),
Whitespace(4, ' '),
DecimalList('2'),
Text("abcd")
),
actual
) )
} }
@ -446,11 +427,7 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
Text("こんにちは~"), Text("こんにちは~"), Whitespace(1, ' '), Url("https://example.com"), Break(1), Text("あいうえお")
Whitespace(1, ' '),
Url("https://example.com"),
Break(1),
Text("あいうえお")
), actual ), actual
) )
} }
@ -495,9 +472,7 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
Asterisk(1, '*'), Asterisk(1, '*'), Text("a"), Asterisk(1, '*')
Text("a"),
Asterisk(1, '*')
), actual ), actual
) )
} }
@ -512,10 +487,7 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
Quote(1), Quote(1), Asterisk(1, '*'), Text("a"), Asterisk(1, '*')
Asterisk(1, '*'),
Text("a"),
Asterisk(1, '*')
), actual ), actual
) )
} }
@ -530,9 +502,7 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
Asterisk(2, '*'), Asterisk(2, '*'), Text("a"), Asterisk(2, '*')
Text("a"),
Asterisk(2, '*')
), actual ), actual
) )
} }
@ -547,9 +517,7 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
Asterisk(2, '_'), Asterisk(2, '_'), Text("a"), Asterisk(2, '_')
Text("a"),
Asterisk(2, '_')
), actual ), actual
) )
} }
@ -817,12 +785,7 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
Text("aiueo"), Text("aiueo"), Whitespace(1, ' '), Text("```abcd"), Asterisk(1, '*'), Text("a"), Asterisk(1, '*')
Whitespace(1, ' '),
Text("```abcd"),
Asterisk(1, '*'),
Text("a"),
Asterisk(1, '*')
), actual ), actual
) )
} }
@ -863,10 +826,7 @@ class LexerTest {
assertContentEquals( assertContentEquals(
listOf( listOf(
Text("aiueo"), Text("aiueo"), Whitespace(1, ' '), Strike("aiueo"), Text("bcde")
Whitespace(1, ' '),
Strike("aiueo"),
Text("bcde")
), actual ), actual
) )
} }
@ -900,4 +860,265 @@ class LexerTest {
), actual ), actual
) )
} }
@Test
fun html() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\">")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false), AttributeName("attr"), AttributeValue("value"), TagEnd("tagName")
), actual
)
}
@Test
fun html2() {
val lexer = Lexer()
val actual = lexer.lex("<tagName>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false), TagEnd("tagName")
), actual
)
}
@Test
fun html閉じタグ() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\"></tagName>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false),
AttributeName("attr"),
AttributeValue("value"),
TagEnd("tagName"),
EndTagStart("tagName"),
TagEnd("tagName")
), actual
)
}
@Test
fun html内容() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\">hello</tagName>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false),
AttributeName("attr"),
AttributeValue("value"),
TagEnd("tagName"),
HtmlValue("hello"),
EndTagStart("tagName"),
TagEnd("tagName")
), actual
)
}
@Test
fun htmlネスト() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\"><tagB>hello</tagB></tagName>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false),
AttributeName("attr"),
AttributeValue("value"),
TagEnd("tagName"),
StartTagStart("tagB", false),
TagEnd("tagB"),
HtmlValue("hello"),
EndTagStart("tagB"),
TagEnd("tagB"),
EndTagStart("tagName"),
TagEnd("tagName")
), actual
)
}
@Test
fun htmlかと思ったら違った() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\"")
println(actual)
assertContentEquals(
listOf(
Text("<tagName"), Whitespace(1, ' '), Text("attr=\"value\"")
), actual
)
}
@Test
fun htmlのアトリビュートかと思ったら違った() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value>")
println(actual)
assertContentEquals(
listOf(
Text("<tagName"), Whitespace(1, ' '), Text("attr=\"value>")
), actual
)
}
@Test
fun html複数行() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\">\nvalue\n</tagName>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false),
AttributeName("attr"),
AttributeValue("value"),
TagEnd("tagName"),
HtmlValue("value"),
EndTagStart("tagName"),
TagEnd("tagName")
), actual
)
}
@Test
fun html改行() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\">\nvalue\nfaaaa</tagName>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false),
AttributeName("attr"),
AttributeValue("value"),
TagEnd("tagName"),
HtmlValue("value faaaa"),
EndTagStart("tagName"),
TagEnd("tagName")
), actual
)
}
@Test
fun htmlアトリビュートいっぱい() {
val lexer = Lexer()
val actual = lexer.lex("<tagName attr=\"value\" attr2=\"aaaaaaa\">")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", false),
AttributeName("attr"),
AttributeValue("value"),
AttributeName("attr2"),
AttributeValue("aaaaaaa"),
TagEnd("tagName")
), actual
)
}
@Test
fun `html騙し続ける`() {
val lexer = Lexer()
val actual = lexer.lex("<<<<<<")
println(actual)
assertContentEquals(
listOf(
Text("<<<<<<")
), actual
)
}
@Test
fun html閉じタグ省略() {
val lexer = Lexer()
val actual = lexer.lex("<tagName/>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("tagName", true),
TagEnd("tagName"),
), actual
)
}
@Test
fun html閉じタグ省略ネスト() {
val lexer = Lexer()
val actual = lexer.lex("<b><a/></b>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("b", false),
TagEnd("b"),
StartTagStart("a", true),
TagEnd("a"),
EndTagStart("b"),
TagEnd("b"),
), actual
)
}
@Test
fun html閉じタグ省略ネストと内容() {
val lexer = Lexer()
val actual = lexer.lex("<b><a/>aaaa</b>")
println(actual)
assertContentEquals(
listOf(
StartTagStart("b", false),
TagEnd("b"),
StartTagStart("a", true),
TagEnd("a"),
HtmlValue("aaaa"),
EndTagStart("b"),
TagEnd("b"),
), actual
)
}
} }