From 6659720f2c1329f8d72ce195a8c9699196f31820 Mon Sep 17 00:00:00 2001 From: usbharu Date: Thu, 14 Nov 2024 12:23:31 +0900 Subject: [PATCH] =?UTF-8?q?=E8=A1=8C=E9=A0=AD=E4=BB=A5=E5=A4=96=E3=81=AB?= =?UTF-8?q?=E3=83=98=E3=83=83=E3=83=80=E3=83=BC=E7=AD=89=E3=81=8C=E3=81=82?= =?UTF-8?q?=E3=81=A3=E3=81=9F=E5=A0=B4=E5=90=88=E3=81=AB=E3=83=98=E3=83=83?= =?UTF-8?q?=E3=83=80=E3=83=BC=E3=81=A8=E3=81=97=E3=81=A6=E8=AA=8D=E8=AD=98?= =?UTF-8?q?=E3=81=95=E3=82=8C=E3=81=A6=E3=81=97=E3=81=BE=E3=81=86=E5=95=8F?= =?UTF-8?q?=E9=A1=8C=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kotlin/dev/usbharu/markdown/Lexer.kt | 146 ++++++++++-------- .../kotlin/dev/usbharu/markdown/Token.kt | 3 +- .../kotlin/dev/usbharu/markdown/LexerTest.kt | 45 ++++++ 3 files changed, 129 insertions(+), 65 deletions(-) diff --git a/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt b/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt index af08faf..d281ec0 100644 --- a/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt +++ b/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt @@ -7,91 +7,36 @@ class Lexer { val tokens = mutableListOf() val lines = PeekableStringIterator(input.lines()) - var inQuote = false - var inCode = false - + var inQuote = false //引用中の判断 + var inCode = false //コードブロック内の判断 + var inline = false //行頭の判断 val codeBuffer = StringBuilder() line@ while (lines.hasNext()) { - + inline = false //改行時にリセット if (lines.peekOrNull() == "") { blankLine(lines, tokens) } else { val line = lines.next() - val iterator = PeekableCharIterator(line.toCharArray()) char@ while (iterator.hasNext()) { val next = iterator.next() when { next == '`' || next == '`' -> { - //todo ````` のようなやつが来たときのことを考える - if (iterator.peekOrNull() == next) { - val codeBlockBuilder = StringBuilder() - codeBlockBuilder.append(next) - codeBlockBuilder.append(iterator.next()) - if (iterator.peekOrNull() == next) { - codeBlockBuilder.append(iterator.next()) - if (iterator.peekOrNull() == next) { - tokens.add(Text(codeBlockBuilder.toString())) - } else { - if (inCode) { - inCode = false - tokens.add(CodeBlock(codeBuffer.toString().trimStart('\n').trimEnd('\n'))) - codeBuffer.clear() - } else { - inCode = true - var inFilename = false - val language = StringBuilder() - val filename = StringBuilder() - if (iterator.hasNext()) { - codeBlock@ while (iterator.hasNext()) { - val nextLanguage = iterator.next() - if ((nextLanguage == ':' || nextLanguage == ':') && !inFilename) { - inFilename = true - continue@codeBlock - } - if (inFilename) { - filename.append(nextLanguage) - } else { - language.append(nextLanguage) - } - - } - tokens.add(CodeBlockLanguage(language.toString(), filename.toString())) - } - - } - } - - } else if (iterator.peekOrNull() == null) { - tokens.add(Text(codeBlockBuilder.toString())) - } - - } else { - val codeBuilder = StringBuilder() - while (iterator.hasNext() && iterator.peekOrNull() != next) { - codeBuilder.append(iterator.next()) - } - if (iterator.hasNext() && iterator.next() == next) { //インラインコードブロックかと思ったら違った - tokens.add(InlineCodeBlock(codeBuilder.toString())) - } else { - tokens.add(Text(codeBuilder.insert(0, next).toString())) - } - - } + inCode = codeblock(iterator, next, tokens, inCode, codeBuffer, inline) } inCode -> { codeBuffer.append(next) } - next == '#' || next == '#' -> header(iterator, tokens) - (next == '>' || next == '>') && !inQuote -> { + (next == '#' || next == '#') && !inline -> header(iterator, tokens) + (next == '>' || next == '>') && !inQuote && !inline -> { inQuote = true quote(iterator, tokens) } - next == '-' || next == '=' || next == 'ー' || next == '=' -> { + (next == '-' || next == '=' || next == 'ー' || next == '=') && !inline -> { if (iterator.peekOrNull()?.isWhitespace() == true) { //-の直後がスペースならリストの可能性 list(iterator, tokens) } else {//それ以外ならセパレーターの可能性 @@ -99,7 +44,7 @@ class Lexer { } } - next in '0'..'9' || next in '0'..'9' -> + (next in '0'..'9' || next in '0'..'9') && !inline -> decimalList(iterator, tokens, next) next == '[' || next == '「' -> tokens.add(SquareBracketStart) @@ -133,6 +78,9 @@ class Lexer { } } } + if (!inline && tokens.lastOrNull() !is Whitespace) { //行頭が空白の場合は一旦無視する + inline = true + } } @@ -157,6 +105,76 @@ class Lexer { return tokens } + private fun codeblock( + iterator: PeekableCharIterator, + next: Char, + tokens: MutableList, + inCode: Boolean, + codeBuffer: StringBuilder, + inline: Boolean + ): Boolean { + var inCode1 = inCode + if (iterator.peekOrNull() == next && !inline) { //行頭かつ次の文字が` + val codeBlockBuilder = StringBuilder() + codeBlockBuilder.append(next) + codeBlockBuilder.append(iterator.next()) + if (iterator.peekOrNull() == next) { + codeBlockBuilder.append(iterator.next()) + if (iterator.peekOrNull() == next) { + tokens.add(Text(codeBlockBuilder.toString())) + } else { + if (inCode1) { + inCode1 = false + tokens.add(CodeBlock(codeBuffer.toString().trimStart('\n').trimEnd('\n'))) + codeBuffer.clear() + } else { + inCode1 = true + var inFilename = false + val language = StringBuilder() + val filename = StringBuilder() + if (iterator.hasNext()) { + codeBlock@ while (iterator.hasNext()) { + val nextLanguage = iterator.next() + if ((nextLanguage == ':' || nextLanguage == ':') && !inFilename) { + inFilename = true + continue@codeBlock + } + if (inFilename) { + filename.append(nextLanguage) + } else { + language.append(nextLanguage) + } + + } + tokens.add(CodeBlockLanguage(language.toString(), filename.toString())) + } + + } + } + + } else if (iterator.peekOrNull() == null) { + tokens.add(Text(codeBlockBuilder.toString())) + } + + } else { + val codeBuilder = StringBuilder() + while (iterator.hasNext() && iterator.peekOrNull() != next) { + codeBuilder.append(iterator.next()) + } + if (iterator.hasNext() && iterator.next() == next) { //インラインコードブロックかと思ったら違った + if (codeBuilder.isEmpty()) { + tokens.add(Text("$next$next")) + } else { + tokens.add(InlineCodeBlock(codeBuilder.toString())) + } + } else { + tokens.add(Text(codeBuilder.insert(0, next).toString())) + } + + } + return inCode1 + } + private fun asterisk( iterator: PeekableCharIterator, next: Char, diff --git a/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt b/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt index 5fd7130..5540861 100644 --- a/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt +++ b/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt @@ -28,4 +28,5 @@ data object Exclamation : Token() data class UrlTitle(val title: String) : Token() data class InlineCodeBlock(val text: String) : Token() data class CodeBlock(val text: String) : Token() -data class CodeBlockLanguage(val language: String, val filename: String) : Token() \ No newline at end of file +data class CodeBlockLanguage(val language: String, val filename: String) : Token() +data object Tilde : Token() \ No newline at end of file diff --git a/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt b/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt index 1b11ce9..d9d4328 100644 --- a/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt +++ b/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt @@ -760,4 +760,49 @@ class LexerTest { ), actual ) } + + @Test + fun 唐突のヘッダー() { + val lexer = Lexer() + + val actual = lexer.lex("aiueo #a") + + println(actual) + + assertContentEquals( + listOf( + Text("aiueo"), Whitespace(1, ' '), Text("#a") + ), actual + ) + } + + @Test + fun 唐突のリスト() { + val lexer = Lexer() + + val actual = lexer.lex("aiueo - a") + + println(actual) + + assertContentEquals( + listOf( + Text("aiueo"), Whitespace(1, ' '), Text("-"), Whitespace(1, ' '), Text("a") + ), actual + ) + } + + @Test + fun 唐突のコードブロック() { + val lexer = Lexer() + + val actual = lexer.lex("aiueo ```abcd") + + println(actual) + + assertContentEquals( + listOf( + Text("aiueo"), Whitespace(1, ' '), Text("```abcd") + ), actual + ) + } } \ No newline at end of file