From 2524fb3272171a166f777bbd0486aef6ab0ca85e Mon Sep 17 00:00:00 2001 From: anatawa12 Date: Thu, 9 Jan 2025 13:38:59 +0900 Subject: [PATCH] refactor: move `splitSegments` to another file --- packages/backend/src/core/MfmService.ts | 45 +---------------- packages/backend/src/misc/split-segments.ts | 48 +++++++++++++++++++ .../backend/test/unit/misc/split-segments.ts | 20 ++++++++ 3 files changed, 69 insertions(+), 44 deletions(-) create mode 100644 packages/backend/src/misc/split-segments.ts create mode 100644 packages/backend/test/unit/misc/split-segments.ts diff --git a/packages/backend/src/core/MfmService.ts b/packages/backend/src/core/MfmService.ts index befe443d98..90a657b484 100644 --- a/packages/backend/src/core/MfmService.ts +++ b/packages/backend/src/core/MfmService.ts @@ -14,6 +14,7 @@ import { intersperse } from '@/misc/prelude/array.js'; import { normalizeForSearch } from '@/misc/normalize-for-search.js'; import type { IMentionedRemoteUsers } from '@/models/Note.js'; import { bindThis } from '@/decorators.js'; +import { splitSegments } from '@/misc/split-segments.js'; import type { DefaultTreeAdapterMap } from 'parse5'; import type * as mfm from 'mfm-js'; @@ -250,50 +251,6 @@ export class MfmService { } } - /** - * textをregexesで分割するが、分割するときに regexes にマッチした部分も含める。 - */ - function splitSegments(text: string, regexes: RegExp[]): [regexIdx: number, text: string][] { - const result: [regexIdx: number, text: string][] = []; - - let rest = text; - for (;;) { - let matchRegex: [number, RegExpExecArray] | null = null; - - for (let i = 0; i < regexes.length; i++) { - const regex = regexes[i]; - regex.lastIndex = 0; - const matchCurrent = regex.exec(rest); - if (matchCurrent) { - if (matchRegex != null) { - if (matchCurrent.index < matchRegex[1].index) { - matchRegex = [i, matchCurrent]; - } - } else { - matchRegex = [i, matchCurrent]; - } - } - } - - if (matchRegex != null) { - const [i, match] = matchRegex; - - const head = rest.slice(0, match.index); - const segment = match[0]; - const tail = rest.slice(match.index + segment.length); - - result.push([-1, head]); - result.push([i, segment]); - rest = tail; - } else { - result.push([-1, rest]); - break; - } - } - - return result; - } - const emojiCodeRegex = /(? { + test('simple', () => { + expect(splitSegments('abcdefghijklmn', [/c/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'defghijklmn']]); + expect(splitSegments('abcdefgabcdefg', [/c/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'defgab'], [0, 'c'], [-1, 'defg']]); + + expect(splitSegments('abcdefghijklmn', [/c/, /x/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'defghijklmn']]); + expect(splitSegments('abcdefghijklmn', [/x/, /c/])).toStrictEqual([[-1, 'ab'], [1, 'c'], [-1, 'defghijklmn']]); + }); + test('match multiple regex', () => { + expect(splitSegments('abcdefgabcdefg', [/c/, /f/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'de'], [1, 'f'], [-1, 'gab'], [0, 'c'], [-1, 'de'], [1, 'f'], [-1, 'g']]); + }); +});