From 9a9de9e062c19c1540362071bdf977d9c2503185 Mon Sep 17 00:00:00 2001 From: usbharu Date: Tue, 12 Nov 2024 12:24:35 +0900 Subject: [PATCH] =?UTF-8?q?feat:=20=E3=83=AA=E3=82=B9=E3=83=88=E3=81=A8?= =?UTF-8?q?=E3=82=BB=E3=83=91=E3=83=AC=E3=83=BC=E3=82=BF=E3=83=BC=E3=81=AE?= =?UTF-8?q?=E5=AD=97=E5=8F=A5=E8=A7=A3=E6=9E=90=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../kotlin/dev/usbharu/markdown/Lexer.kt | 86 +++++++- .../dev/usbharu/markdown/PeekableIterator.kt | 2 + .../kotlin/dev/usbharu/markdown/Token.kt | 14 +- .../kotlin/dev/usbharu/markdown/LexerTest.kt | 206 ++++++++++++++++++ 4 files changed, 303 insertions(+), 5 deletions(-) diff --git a/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt b/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt index fa7b778..a322f7f 100644 --- a/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt +++ b/library/src/commonMain/kotlin/dev/usbharu/markdown/Lexer.kt @@ -1,5 +1,7 @@ package dev.usbharu.markdown +import kotlin.collections.List + class Lexer { fun lex(input: String): List { val tokens = mutableListOf() @@ -14,8 +16,20 @@ class Lexer { val iterator = PeekableCharIterator(line.toCharArray()) while (iterator.hasNext()) { when (val next = iterator.next()) { - '#' -> header(iterator, tokens) - '>' -> quote(iterator, tokens) + '#', '#' -> header(iterator, tokens) + '>', '>' -> quote(iterator, tokens) + '-', '=', 'ー', '=' -> { + if (iterator.peekOrNull()?.isWhitespace() == true) { //-の直後がスペースならリストの可能性 + list(iterator, tokens, next) + } else {//それ以外ならセパレーターの可能性 + separator(next, iterator, tokens) + } + } + + ' ', ' ' -> { + tokens.add(Whitespace(skipWhitespace(iterator) + 1, next)) //nextの分1足す + } + else -> { tokens.add(Text(next + collect(iterator))) tokens.add(Break(1)) @@ -39,6 +53,61 @@ class Lexer { return tokens } + private fun list( + iterator: PeekableCharIterator, + tokens: MutableList, + next: Char + ) { + + if (iterator.peekOrNull()?.isWhitespace() == true) { + tokens.add(DiscList) + } + + skipWhitespace(iterator) + if (iterator.peekOrNull() == '[') { + iterator.next() + val checkedChar = iterator.peekOrNull() ?: return + iterator.next() + if ((checkedChar == 'x' || checkedChar == ' ' || checkedChar == ' ').not()) { + tokens.add(Text("[$checkedChar")) + return + } + val checked = checkedChar == 'x' + if (iterator.peekOrNull() == ']') { + iterator.next() + if (iterator.peekOrNull()?.isWhitespace() == true) { + iterator.next() + tokens.add(CheckBox(checked)) + return + } + } + tokens.add(Text("[$checkedChar")) + } + } + + private fun separator( + next: Char, + iterator: PeekableCharIterator, + tokens: MutableList + ) { + val builder = StringBuilder() + builder.append(next) + + while (iterator.peekOrNull() == next) { + builder.append(iterator.next()) + } + if (iterator.peekOrNull() == null && builder.length >= 3) { //行末まで到達していてかつ長さが3以上か + tokens.add(Separator(builder.length, next)) //セパレーターとして追加 + } else { + val token = tokens.lastOrNull() //ただの文字として追加 + if (token is Text) { + tokens[tokens.lastIndex] = Text(token.text + builder.toString()) + } else { + tokens.add(Text(builder.toString())) + } + } + } + private fun quote( iterator: PeekableCharIterator, tokens: MutableList @@ -49,7 +118,7 @@ class Lexer { count++ } tokens.add(Quote(count)) - iterator.next() //スペースを無視 + skipWhitespace(iterator) tokens.add(Text(collect(iterator))) tokens.add(Break(1)) } @@ -64,11 +133,20 @@ class Lexer { count++ } tokens.add(Header(count)) - iterator.next() //スペースを無視 + skipWhitespace(iterator) tokens.add(Text(collect(iterator))) tokens.add(Break(1)) } + fun skipWhitespace(iterator: PeekableCharIterator): Int { + var count = 0 + while (iterator.peekOrNull()?.isWhitespace() == true) { + iterator.next() + count++ + } + return count + } + fun collect(iterator: PeekableCharIterator): String { val char = mutableListOf() while (iterator.hasNext()) { diff --git a/library/src/commonMain/kotlin/dev/usbharu/markdown/PeekableIterator.kt b/library/src/commonMain/kotlin/dev/usbharu/markdown/PeekableIterator.kt index 87d190e..3b0126e 100644 --- a/library/src/commonMain/kotlin/dev/usbharu/markdown/PeekableIterator.kt +++ b/library/src/commonMain/kotlin/dev/usbharu/markdown/PeekableIterator.kt @@ -1,5 +1,7 @@ package dev.usbharu.markdown +import kotlin.collections.List + class PeekableCharIterator(private val charArray: CharArray) : Iterator { private var index = 0 override fun hasNext(): Boolean = index < charArray.size diff --git a/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt b/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt index e38007b..3f290a9 100644 --- a/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt +++ b/library/src/commonMain/kotlin/dev/usbharu/markdown/Token.kt @@ -5,4 +5,16 @@ sealed class Token() data class Text(var text: String) : Token() data class Break(var count: Int) : Token() data class Header(var count: Int) : Token() -data class Quote(var count: Int) : Token() \ No newline at end of file +data class Quote(var count: Int) : Token() +data class Separator(var count: Int, val char: Char) : Token() +data class Whitespace(var count: Int, val whitespace: Char) : Token() +abstract class List(val type: ListType) : Token() { + enum class ListType { + DISC, + DECIMAL + } +} + +data object DiscList : List(ListType.DISC) +data class DecimalList(val number: Int) : List(ListType.DECIMAL) +data class CheckBox(val checked:Boolean): Token() \ No newline at end of file diff --git a/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt b/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt index ba393c2..afb1b14 100644 --- a/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt +++ b/library/src/commonTest/kotlin/dev/usbharu/markdown/LexerTest.kt @@ -16,6 +16,17 @@ class LexerTest { assertContentEquals(listOf(Break(1)), actual) } + @Test + fun 改行2() { + val lexer = Lexer() + + val actual = lexer.lex("\r\n") + + println(actual) + + assertContentEquals(listOf(Break(1)), actual) + } + @Test fun 複数の改行() { val lexer = Lexer() @@ -82,6 +93,29 @@ class LexerTest { assertContentEquals(listOf(Header(2), Text("abcd efgh")), actual) } + @Test + fun ヘッダー後の空白は無視() { + + val lexer = Lexer() + + val actual = lexer.lex("## abcd efgh") + + println(actual) + + assertContentEquals(listOf(Header(2), Text("abcd efgh")), actual) + } + + @Test + fun 全角ヘッダー() { + val lexer = Lexer() + + val actual = lexer.lex("# abcd efgh") + + println(actual) + + assertContentEquals(listOf(Header(1), Text("abcd efgh")), actual) + } + @Test fun `ヘッダーの中にヘッダー`() { val lexer = Lexer() @@ -125,4 +159,176 @@ class LexerTest { assertContentEquals(listOf(Quote(2), Text(">abcd")), actual) } + + @Test + fun 全角引用() { + val lexer = Lexer() + + val actual = lexer.lex("> >abcd") + + println(actual) + + assertContentEquals(listOf(Quote(1), Text(">abcd")), actual) + } + + @Test + fun 引用後の空白は無視() { + val lexer = Lexer() + + val actual = lexer.lex(">> >abcd") + + println(actual) + + assertContentEquals(listOf(Quote(2), Text(">abcd")), actual) + } + + @Test + fun セパレーター() { + val lexer = Lexer() + + val actual = lexer.lex("---") + + println(actual) + + assertContentEquals(listOf(Separator(3, '-')), actual) + } + + @Test + fun セパレーター2() { + val lexer = Lexer() + + val actual = lexer.lex("===") + + println(actual) + + assertContentEquals(listOf(Separator(3, '=')), actual) + } + + @Test + fun セパレーター混在() { + val lexer = Lexer() + + val actual = lexer.lex("-=-") + + println(actual) + + assertContentEquals(listOf(Text("-=-")), actual) + } + + @Test + fun セパレーターかと思ったら本文だった() { + val lexer = Lexer() + + val actual = lexer.lex("---aiueo") + + println(actual) + + assertContentEquals(listOf(Text("---"), Text("aiueo")), actual) + } + + @Test + fun チェックボックス() { + val lexer = Lexer() + + val actual = lexer.lex("- [x] a") + + println(actual) + + assertContentEquals(listOf(DiscList, CheckBox(true), Text("a")), actual) + } + + @Test + fun チェックボックス2() { + val lexer = Lexer() + + val actual = lexer.lex("- [ ] a") + + println(actual) + + assertContentEquals(listOf(DiscList, CheckBox(false), Text("a")), actual) + } + + @Test + fun チェックボックスかと思ったら違った() { + val lexer = Lexer() + + val actual = lexer.lex("- [xa a") + + println(actual) + + assertContentEquals(listOf(DiscList, Text("[x"), Text("a a")), actual) + } + + @Test + fun チェックボックスかと思ったら違った2() { + val lexer = Lexer() + + val actual = lexer.lex("- [a a") + + println(actual) + + assertContentEquals(listOf(DiscList, Text("[a"), Whitespace(1, ' '), Text("a")), actual) + } + + @Test + fun チェックボックスかと思ったら違った3() { + val lexer = Lexer() + + val actual = lexer.lex("-aiueo") + + println(actual) + + assertContentEquals(listOf(Text("-"), Text("aiueo")), actual) + } + + @Test + fun チェックボックスいっぱい() { + val lexer = Lexer() + + val actual = lexer.lex("- [ ] a\n- [x] b\n- [ ] c\n- [x] d") + + println(actual) + + assertContentEquals( + listOf( + DiscList, + CheckBox(false), + Text("a"), + Break(1), + DiscList, + CheckBox(true), + Text("b"), + Break(1), + DiscList, + CheckBox(false), + Text("c"), + Break(1), + DiscList, + CheckBox(true), + Text("d"), + ), actual + ) + } + + @Test + fun ディスクリスト() { + val lexer = Lexer() + + val actual = lexer.lex("- aiueo") + + println(actual) + + assertContentEquals(listOf(DiscList, Text("aiueo")), actual) + } + + @Test + fun ディスクリストいっぱい() { + val lexer = Lexer() + + val actual = lexer.lex("- aiueo\n- abcd") + + println(actual) + + assertContentEquals(listOf(DiscList, Text("aiueo"), Break(1), DiscList, Text("abcd")), actual) + } } \ No newline at end of file