refactor: move `splitSegments` to another file
This commit is contained in:
parent
fdf574c248
commit
2524fb3272
|
@ -14,6 +14,7 @@ import { intersperse } from '@/misc/prelude/array.js';
|
||||||
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
|
import { normalizeForSearch } from '@/misc/normalize-for-search.js';
|
||||||
import type { IMentionedRemoteUsers } from '@/models/Note.js';
|
import type { IMentionedRemoteUsers } from '@/models/Note.js';
|
||||||
import { bindThis } from '@/decorators.js';
|
import { bindThis } from '@/decorators.js';
|
||||||
|
import { splitSegments } from '@/misc/split-segments.js';
|
||||||
import type { DefaultTreeAdapterMap } from 'parse5';
|
import type { DefaultTreeAdapterMap } from 'parse5';
|
||||||
import type * as mfm from 'mfm-js';
|
import type * as mfm from 'mfm-js';
|
||||||
|
|
||||||
|
@ -250,50 +251,6 @@ export class MfmService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* textをregexesで分割するが、分割するときに regexes にマッチした部分も含める。
|
|
||||||
*/
|
|
||||||
function splitSegments(text: string, regexes: RegExp[]): [regexIdx: number, text: string][] {
|
|
||||||
const result: [regexIdx: number, text: string][] = [];
|
|
||||||
|
|
||||||
let rest = text;
|
|
||||||
for (;;) {
|
|
||||||
let matchRegex: [number, RegExpExecArray] | null = null;
|
|
||||||
|
|
||||||
for (let i = 0; i < regexes.length; i++) {
|
|
||||||
const regex = regexes[i];
|
|
||||||
regex.lastIndex = 0;
|
|
||||||
const matchCurrent = regex.exec(rest);
|
|
||||||
if (matchCurrent) {
|
|
||||||
if (matchRegex != null) {
|
|
||||||
if (matchCurrent.index < matchRegex[1].index) {
|
|
||||||
matchRegex = [i, matchCurrent];
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
matchRegex = [i, matchCurrent];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (matchRegex != null) {
|
|
||||||
const [i, match] = matchRegex;
|
|
||||||
|
|
||||||
const head = rest.slice(0, match.index);
|
|
||||||
const segment = match[0];
|
|
||||||
const tail = rest.slice(match.index + segment.length);
|
|
||||||
|
|
||||||
result.push([-1, head]);
|
|
||||||
result.push([i, segment]);
|
|
||||||
rest = tail;
|
|
||||||
} else {
|
|
||||||
result.push([-1, rest]);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
const emojiCodeRegex = /(?<![a-z0-9]):[a-z0-9_]+:(?![a-z0-9])/i;
|
const emojiCodeRegex = /(?<![a-z0-9]):[a-z0-9_]+:(?![a-z0-9])/i;
|
||||||
|
|
||||||
return splitSegments(text, [twemojiRegex.default, emojiCodeRegex])
|
return splitSegments(text, [twemojiRegex.default, emojiCodeRegex])
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: syuilo and misskey-project
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* textをregexesで分割するが、分割するときに regexes にマッチした部分も含める。
|
||||||
|
*/
|
||||||
|
export function splitSegments(text: string, regexes: RegExp[]): [regexIdx: number, text: string][] {
|
||||||
|
const result: [regexIdx: number, text: string][] = [];
|
||||||
|
|
||||||
|
let rest = text;
|
||||||
|
for (;;) {
|
||||||
|
let matchRegex: [number, RegExpExecArray] | null = null;
|
||||||
|
|
||||||
|
for (let i = 0; i < regexes.length; i++) {
|
||||||
|
const regex = regexes[i];
|
||||||
|
regex.lastIndex = 0;
|
||||||
|
const matchCurrent = regex.exec(rest);
|
||||||
|
if (matchCurrent) {
|
||||||
|
if (matchRegex != null) {
|
||||||
|
if (matchCurrent.index < matchRegex[1].index) {
|
||||||
|
matchRegex = [i, matchCurrent];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
matchRegex = [i, matchCurrent];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (matchRegex != null) {
|
||||||
|
const [i, match] = matchRegex;
|
||||||
|
|
||||||
|
const head = rest.slice(0, match.index);
|
||||||
|
const segment = match[0];
|
||||||
|
const tail = rest.slice(match.index + segment.length);
|
||||||
|
|
||||||
|
result.push([-1, head]);
|
||||||
|
result.push([i, segment]);
|
||||||
|
rest = tail;
|
||||||
|
} else {
|
||||||
|
result.push([-1, rest]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
/*
|
||||||
|
* SPDX-FileCopyrightText: syuilo and misskey-project
|
||||||
|
* SPDX-License-Identifier: AGPL-3.0-only
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { describe, expect, test } from '@jest/globals';
|
||||||
|
import { splitSegments } from '@/misc/split-segments.js';
|
||||||
|
|
||||||
|
describe('misc:split-segments', () => {
|
||||||
|
test('simple', () => {
|
||||||
|
expect(splitSegments('abcdefghijklmn', [/c/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'defghijklmn']]);
|
||||||
|
expect(splitSegments('abcdefgabcdefg', [/c/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'defgab'], [0, 'c'], [-1, 'defg']]);
|
||||||
|
|
||||||
|
expect(splitSegments('abcdefghijklmn', [/c/, /x/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'defghijklmn']]);
|
||||||
|
expect(splitSegments('abcdefghijklmn', [/x/, /c/])).toStrictEqual([[-1, 'ab'], [1, 'c'], [-1, 'defghijklmn']]);
|
||||||
|
});
|
||||||
|
test('match multiple regex', () => {
|
||||||
|
expect(splitSegments('abcdefgabcdefg', [/c/, /f/])).toStrictEqual([[-1, 'ab'], [0, 'c'], [-1, 'de'], [1, 'f'], [-1, 'gab'], [0, 'c'], [-1, 'de'], [1, 'f'], [-1, 'g']]);
|
||||||
|
});
|
||||||
|
});
|
Loading…
Reference in New Issue