diff --git a/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt b/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt index d0d0aeb..8887aae 100644 --- a/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt +++ b/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt @@ -10,6 +10,7 @@ class Lexer { var inQuote = false //引用中の判断 var inCode = false //コードブロック内の判断 var inline = false //行頭の判断 + var htmlNest = 0 val codeBuffer = StringBuilder() line@ while (lines.hasNext()) { @@ -26,9 +27,11 @@ class Lexer { inCode = codeblock(iterator, next, tokens, inCode, codeBuffer, inline) } - inCode -> { - codeBuffer.append(next) - } + inCode -> codeBuffer.append(next) + + next == '<' -> htmlNest = html(iterator, htmlNest, codeBuffer, tokens, next) + + htmlNest != 0 -> codeBuffer.append(next) (next == '#' || next == '#') && !inline -> header(iterator, tokens) (next == '>' || next == '>') && !inQuote && !inline -> { @@ -80,6 +83,8 @@ class Lexer { if (inCode) { codeBuffer.append("\n") + } else if (htmlNest != 0) { + codeBuffer.append(" ") } else { tokens.add(Break(1)) } @@ -98,6 +103,95 @@ class Lexer { return tokens } + private fun html( + iterator: PeekableCharIterator, + htmlNest: Int, + codeBuffer: StringBuilder, + tokens: MutableList, + next: Char + ): Int { + var htmlNest1 = htmlNest + var endTag = false + var counter = 0 + if (iterator.peekOrNull() == '/') { + endTag = true + counter++ + } + val tagNameBuilder = StringBuilder() + counter = skipPeekWhitespace(iterator, counter) + + while (iterator.peekOrNull(counter) != null && + iterator.peekOrNull(counter)?.isWhitespace() != true && + iterator.peekOrNull(counter) != '>' && + iterator.peekOrNull(counter) != '/' + ) { + tagNameBuilder.append(iterator.peekOrNull(counter)) + counter++ + } + + counter = skipPeekWhitespace(iterator, counter) + val attributeList = mutableListOf() + intag@ while (iterator.peekOrNull(counter) != null && + (iterator.peekOrNull(counter) != '/' && iterator.peekOrNull(counter) != '>') + ) { + val attrBuilder = StringBuilder() + attr@ while (iterator.peekOrNull(counter) != null && (iterator.peekOrNull(counter) != '=' && iterator.peekOrNull( + counter + )?.isWhitespace() != true) + ) { + attrBuilder.append(iterator.peekOrNull(counter)) + counter++ + } + attributeList.add(AttributeName(attrBuilder.toString())) + counter = skipPeekWhitespace(iterator, counter) + if (iterator.peekOrNull(counter) == '=') { + counter++ + if (iterator.peekOrNull(counter) == '"') { + counter++ + //todo エスケープシーケンス + val peekString = offsetPeekString(iterator, counter, '"') + counter = peekString?.second?.minus(1) ?: counter + if (peekString != null) { + attributeList.add(AttributeValue(peekString.first)) + } else { + break@intag + } + } + } + counter++ + counter = skipPeekWhitespace(iterator, counter) + } + + val void = if (iterator.peekOrNull(counter) == '/') {//閉じタグ省略 + counter = skipPeekWhitespace(iterator, counter) + val peekString = offsetPeekString(iterator, counter, '>') + counter = peekString?.second?.minus(1) ?: counter + htmlNest1-- //あとで1増えるので相殺するためにあらかじめ1減らしておく + true + } else { + false + } + if (iterator.peekOrNull(counter) == '>') { //タグか判定 + if (codeBuffer.isNotBlank()) { //タグ間に文字があれば追加する + tokens.add(HtmlValue(codeBuffer.toString().trim())) + codeBuffer.clear() + } + if (endTag) {//閉じタグ判定 + htmlNest1-- //閉じタグならネストを一つ減らす + tokens.add(EndTagStart(tagNameBuilder.toString())) + } else { + htmlNest1++ + tokens.add(StartTagStart(tagNameBuilder.toString(), void)) + } + tokens.addAll(attributeList) + tokens.add(TagEnd(tagNameBuilder.toString())) + iterator.skip(counter + 1) + } else { + addText(tokens, next.toString()) + } + return htmlNest1 + } + private fun strike( iterator: PeekableCharIterator, next: Char, @@ -357,6 +451,34 @@ class Lexer { return count } + fun skipPeekWhitespace(iterator: PeekableCharIterator, currentOffset: Int = 0): Int { + var offset = currentOffset + while (iterator.peekOrNull(offset)?.isWhitespace() == true) { + offset++ + } + return offset + } + + fun offsetPeekString(iterator: PeekableCharIterator, offset: Int = 0, vararg chars: Char): Pair? { + var counter = offset + val stringBuilder = StringBuilder() + var checkCounter = 0 + while (iterator.peekOrNull(counter) != null && checkCounter < chars.size) { + stringBuilder.append(iterator.peekOrNull(counter)) + if (iterator.peekOrNull(counter) == chars[checkCounter]) { + checkCounter++ + } else { + checkCounter = 0 + } + counter++ + } + if (iterator.peekOrNull(counter) == null && checkCounter != chars.size) { + return null + } + val string = stringBuilder.toString() + return string.substring(0, string.length - chars.size) to counter + } + fun peekString(iterator: PeekableCharIterator, vararg char: Char): String? { var counter = 0 val stringBuilder = StringBuilder() diff --git a/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt b/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt index 5a45610..fee0355 100644 --- a/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt +++ b/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt @@ -29,4 +29,12 @@ data class UrlTitle(val title: String) : Token() data class InlineCodeBlock(val text: String) : Token() data class CodeBlock(val text: String) : Token() data class CodeBlockLanguage(val language: String, val filename: String) : Token() -data class Strike(val strike: String) : Token() \ No newline at end of file +data class Strike(val strike: String) : Token() + +abstract class Html() : Token() +data class StartTagStart(var tag: String, val void: Boolean) : Html() +data class EndTagStart(var tag: String) : Html() +data class TagEnd(var tag: String) : Html() +data class AttributeName(val name: String) : Html() +data class AttributeValue(val value: String) : Html() +data class HtmlValue(val value: String) : Html() \ No newline at end of file diff --git a/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt b/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt index bc02762..8f12903 100644 --- a/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt +++ b/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt @@ -342,8 +342,7 @@ class LexerTest { println(actual) assertContentEquals( - listOf(DiscList, Text("aiueo"), Break(1), Whitespace(4, ' '), DiscList, Text("abcd")), - actual + listOf(DiscList, Text("aiueo"), Break(1), Whitespace(4, ' '), DiscList, Text("abcd")), actual ) } @@ -357,14 +356,8 @@ class LexerTest { assertContentEquals( listOf( - DecimalList('1'), - Text("aiueo"), - Break(1), - Whitespace(4, ' '), - DecimalList('2'), - Text("abcd") - ), - actual + DecimalList('1'), Text("aiueo"), Break(1), Whitespace(4, ' '), DecimalList('2'), Text("abcd") + ), actual ) } @@ -378,14 +371,8 @@ class LexerTest { assertContentEquals( listOf( - DecimalList('1'), - Text("aiueo"), - Break(1), - Whitespace(4, ' '), - DecimalList('2'), - Text("abcd") - ), - actual + DecimalList('1'), Text("aiueo"), Break(1), Whitespace(4, ' '), DecimalList('2'), Text("abcd") + ), actual ) } @@ -399,14 +386,8 @@ class LexerTest { assertContentEquals( listOf( - DecimalList('1'), - Text("aiueo"), - Break(1), - Whitespace(4, ' '), - DecimalList('2'), - Text("abcd") - ), - actual + DecimalList('1'), Text("aiueo"), Break(1), Whitespace(4, ' '), DecimalList('2'), Text("abcd") + ), actual ) } @@ -446,11 +427,7 @@ class LexerTest { assertContentEquals( listOf( - Text("こんにちは~"), - Whitespace(1, ' '), - Url("https://example.com"), - Break(1), - Text("あいうえお") + Text("こんにちは~"), Whitespace(1, ' '), Url("https://example.com"), Break(1), Text("あいうえお") ), actual ) } @@ -495,9 +472,7 @@ class LexerTest { assertContentEquals( listOf( - Asterisk(1, '*'), - Text("a"), - Asterisk(1, '*') + Asterisk(1, '*'), Text("a"), Asterisk(1, '*') ), actual ) } @@ -512,10 +487,7 @@ class LexerTest { assertContentEquals( listOf( - Quote(1), - Asterisk(1, '*'), - Text("a"), - Asterisk(1, '*') + Quote(1), Asterisk(1, '*'), Text("a"), Asterisk(1, '*') ), actual ) } @@ -530,9 +502,7 @@ class LexerTest { assertContentEquals( listOf( - Asterisk(2, '*'), - Text("a"), - Asterisk(2, '*') + Asterisk(2, '*'), Text("a"), Asterisk(2, '*') ), actual ) } @@ -547,9 +517,7 @@ class LexerTest { assertContentEquals( listOf( - Asterisk(2, '_'), - Text("a"), - Asterisk(2, '_') + Asterisk(2, '_'), Text("a"), Asterisk(2, '_') ), actual ) } @@ -817,12 +785,7 @@ class LexerTest { assertContentEquals( listOf( - Text("aiueo"), - Whitespace(1, ' '), - Text("```abcd"), - Asterisk(1, '*'), - Text("a"), - Asterisk(1, '*') + Text("aiueo"), Whitespace(1, ' '), Text("```abcd"), Asterisk(1, '*'), Text("a"), Asterisk(1, '*') ), actual ) } @@ -863,10 +826,7 @@ class LexerTest { assertContentEquals( listOf( - Text("aiueo"), - Whitespace(1, ' '), - Strike("aiueo"), - Text("bcde") + Text("aiueo"), Whitespace(1, ' '), Strike("aiueo"), Text("bcde") ), actual ) } @@ -900,4 +860,265 @@ class LexerTest { ), actual ) } + + @Test + fun html() { + val lexer = Lexer() + + val actual = lexer.lex("") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), AttributeName("attr"), AttributeValue("value"), TagEnd("tagName") + ), actual + ) + } + + @Test + fun html2() { + val lexer = Lexer() + + val actual = lexer.lex("") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), TagEnd("tagName") + ), actual + ) + } + + @Test + fun html閉じタグ() { + val lexer = Lexer() + + val actual = lexer.lex("") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), + AttributeName("attr"), + AttributeValue("value"), + TagEnd("tagName"), + EndTagStart("tagName"), + TagEnd("tagName") + ), actual + ) + } + + @Test + fun html内容() { + val lexer = Lexer() + + val actual = lexer.lex("hello") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), + AttributeName("attr"), + AttributeValue("value"), + TagEnd("tagName"), + HtmlValue("hello"), + EndTagStart("tagName"), + TagEnd("tagName") + ), actual + ) + } + + @Test + fun htmlネスト() { + val lexer = Lexer() + + val actual = lexer.lex("hello") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), + AttributeName("attr"), + AttributeValue("value"), + TagEnd("tagName"), + StartTagStart("tagB", false), + TagEnd("tagB"), + HtmlValue("hello"), + EndTagStart("tagB"), + TagEnd("tagB"), + EndTagStart("tagName"), + TagEnd("tagName") + ), actual + ) + } + + @Test + fun htmlかと思ったら違った() { + val lexer = Lexer() + + val actual = lexer.lex("") + + println(actual) + + assertContentEquals( + listOf( + Text("") + ), actual + ) + } + + @Test + fun html複数行() { + val lexer = Lexer() + + val actual = lexer.lex("\nvalue\n") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), + AttributeName("attr"), + AttributeValue("value"), + TagEnd("tagName"), + HtmlValue("value"), + EndTagStart("tagName"), + TagEnd("tagName") + ), actual + ) + } + + @Test + fun html改行() { + val lexer = Lexer() + + val actual = lexer.lex("\nvalue\nfaaaa") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), + AttributeName("attr"), + AttributeValue("value"), + TagEnd("tagName"), + HtmlValue("value faaaa"), + EndTagStart("tagName"), + TagEnd("tagName") + ), actual + ) + } + + @Test + fun htmlアトリビュートいっぱい() { + val lexer = Lexer() + + val actual = lexer.lex("") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", false), + AttributeName("attr"), + AttributeValue("value"), + AttributeName("attr2"), + AttributeValue("aaaaaaa"), + TagEnd("tagName") + ), actual + ) + } + + @Test + fun `html騙し続ける`() { + val lexer = Lexer() + + val actual = lexer.lex("<<<<<<") + + println(actual) + + assertContentEquals( + listOf( + Text("<<<<<<") + ), actual + ) + } + + @Test + fun html閉じタグ省略() { + val lexer = Lexer() + + val actual = lexer.lex("") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("tagName", true), + TagEnd("tagName"), + + ), actual + ) + } + + @Test + fun html閉じタグ省略ネスト() { + val lexer = Lexer() + + val actual = lexer.lex("") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("b", false), + TagEnd("b"), + StartTagStart("a", true), + TagEnd("a"), + EndTagStart("b"), + TagEnd("b"), + ), actual + ) + } + + @Test + fun html閉じタグ省略ネストと内容() { + val lexer = Lexer() + + val actual = lexer.lex("aaaa") + + println(actual) + + assertContentEquals( + listOf( + StartTagStart("b", false), + TagEnd("b"), + StartTagStart("a", true), + TagEnd("a"), + HtmlValue("aaaa"), + EndTagStart("b"), + TagEnd("b"), + ), actual + ) + } } \ No newline at end of file