From 4bdbe794a69d196dfb94ee28180b2680531b400c Mon Sep 17 00:00:00 2001 From: syuilo <4439005+syuilo@users.noreply.github.com> Date: Sat, 29 Nov 2025 21:19:55 +0900 Subject: [PATCH] =?UTF-8?q?perf(backend):=20parse5=E3=82=92=E3=82=84?= =?UTF-8?q?=E3=82=81=E3=81=A6=E8=BB=BD=E9=87=8F=E3=81=AA=E5=AE=9F=E8=A3=85?= =?UTF-8?q?=E3=81=AB=E3=81=97=E3=80=81=E3=83=A1=E3=83=A2=E3=83=AA=E5=89=8A?= =?UTF-8?q?=E6=B8=9B=E3=83=BB=E9=AB=98=E9=80=9F=E5=8C=96=20(#16892)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * wip * test * Revert "test" This reverts commit b7c5ae72144874a378cef7a13320e8b89f40d6e9. * Update MfmService.ts --- packages/backend/package.json | 1 - packages/backend/src/core/MfmService.ts | 155 +++++++++--------- .../src/server/oauth/OAuth2ProviderService.ts | 2 +- pnpm-lock.yaml | 3 - 4 files changed, 80 insertions(+), 81 deletions(-) diff --git a/packages/backend/package.json b/packages/backend/package.json index edb8524330..c06da99760 100644 --- a/packages/backend/package.json +++ b/packages/backend/package.json @@ -150,7 +150,6 @@ "oauth2orize-pkce": "0.1.2", "os-utils": "0.0.14", "otpauth": "9.4.1", - "parse5": "7.3.0", "pg": "8.16.3", "pkce-challenge": "4.1.0", "probe-image-size": "7.2.3", diff --git a/packages/backend/src/core/MfmService.ts b/packages/backend/src/core/MfmService.ts index a359d5c838..b9f1c62d9d 100644 --- a/packages/backend/src/core/MfmService.ts +++ b/packages/backend/src/core/MfmService.ts @@ -5,7 +5,7 @@ import { URL } from 'node:url'; import { Inject, Injectable } from '@nestjs/common'; -import * as parse5 from 'parse5'; +import * as htmlParser from 'node-html-parser'; import { DI } from '@/di-symbols.js'; import type { Config } from '@/config.js'; import { intersperse } from '@/misc/prelude/array.js'; @@ -13,13 +13,8 @@ import { normalizeForSearch } from '@/misc/normalize-for-search.js'; import type { IMentionedRemoteUsers } from '@/models/Note.js'; import { bindThis } from '@/decorators.js'; import { escapeHtml } from '@/misc/escape-html.js'; -import type { DefaultTreeAdapterMap } from 'parse5'; import type * as mfm from 'mfm-js'; -const treeAdapter = parse5.defaultTreeAdapter; -type Node = DefaultTreeAdapterMap['node']; -type ChildNode = DefaultTreeAdapterMap['childNode']; - const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; @@ -38,68 +33,68 @@ export class MfmService { const normalizedHashtagNames = hashtagNames == null ? undefined : new Set(hashtagNames.map(x => normalizeForSearch(x))); - const dom = parse5.parseFragment(html); + const doc = htmlParser.parse(`
${html}
`); let text = ''; - for (const n of dom.childNodes) { + for (const n of doc.childNodes) { analyze(n); } return text.trim(); - function getText(node: Node): string { - if (treeAdapter.isTextNode(node)) return node.value; - if (!treeAdapter.isElementNode(node)) return ''; - if (node.nodeName === 'br') return '\n'; + function getText(node: htmlParser.Node): string { + if (node instanceof htmlParser.TextNode) return node.textContent; + if (!(node instanceof htmlParser.HTMLElement)) return ''; + if (node.tagName === 'BR') return '\n'; - if (node.childNodes) { + if (node.childNodes != null) { return node.childNodes.map(n => getText(n)).join(''); } return ''; } - function appendChildren(childNodes: ChildNode[]): void { - if (childNodes) { + function analyzeChildren(childNodes: htmlParser.Node[] | null): void { + if (childNodes != null) { for (const n of childNodes) { analyze(n); } } } - function analyze(node: Node) { - if (treeAdapter.isTextNode(node)) { - text += node.value; + function analyze(node: htmlParser.Node) { + if (node instanceof htmlParser.TextNode) { + text += node.textContent; return; } // Skip comment or document type node - if (!treeAdapter.isElementNode(node)) { + if (!(node instanceof htmlParser.HTMLElement)) { return; } - switch (node.nodeName) { - case 'br': { + switch (node.tagName) { + case 'BR': { text += '\n'; break; } - case 'a': { + case 'A': { const txt = getText(node); - const rel = node.attrs.find(x => x.name === 'rel'); - const href = node.attrs.find(x => x.name === 'href'); + const rel = node.attributes.rel; + const href = node.attributes.href; // ハッシュタグ - if (normalizedHashtagNames && href && normalizedHashtagNames.has(normalizeForSearch(txt))) { + if (normalizedHashtagNames && href != null && normalizedHashtagNames.has(normalizeForSearch(txt))) { text += txt; // メンション - } else if (txt.startsWith('@') && !(rel && rel.value.startsWith('me '))) { + } else if (txt.startsWith('@') && !(rel != null && rel.startsWith('me '))) { const part = txt.split('@'); if (part.length === 2 && href) { //#region ホスト名部分が省略されているので復元する - const acct = `${txt}@${(new URL(href.value)).hostname}`; + const acct = `${txt}@${(new URL(href)).hostname}`; text += acct; //#endregion } else if (part.length === 3) { @@ -114,17 +109,17 @@ export class MfmService { if (!href) { return txt; } - if (!txt || txt === href.value) { // #6383: Missing text node - if (href.value.match(urlRegexFull)) { - return href.value; + if (!txt || txt === href) { // #6383: Missing text node + if (href.match(urlRegexFull)) { + return href; } else { - return `<${href.value}>`; + return `<${href}>`; } } - if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) { - return `[${txt}](<${href.value}>)`; // #6846 + if (href.match(urlRegex) && !href.match(urlRegexFull)) { + return `[${txt}](<${href}>)`; // #6846 } else { - return `[${txt}](${href.value})`; + return `[${txt}](${href})`; } }; @@ -133,60 +128,64 @@ export class MfmService { break; } - case 'h1': { + case 'H1': { text += '【'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '】\n'; break; } - case 'b': - case 'strong': { + case 'B': + case 'STRONG': { text += '**'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '**'; break; } - case 'small': { + case 'SMALL': { text += ''; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += ''; break; } - case 's': - case 'del': { + case 'S': + case 'DEL': { text += '~~'; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += '~~'; break; } - case 'i': - case 'em': { + case 'I': + case 'EM': { text += ''; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); text += ''; break; } - case 'ruby': { + case 'RUBY': { let ruby: [string, string][] = []; for (const child of node.childNodes) { - if (child.nodeName === 'rp') { + if ((child instanceof htmlParser.TextNode) && !/\s|\[|\]/.test(child.textContent)) { + ruby.push([child.textContent, '']); continue; } - if (treeAdapter.isTextNode(child) && !/\s|\[|\]/.test(child.value)) { - ruby.push([child.value, '']); + + if (!(child instanceof htmlParser.HTMLElement)) continue; + + if (child.tagName === 'RP') { continue; } - if (child.nodeName === 'rt' && ruby.length > 0) { + + if (child.tagName === 'RT' && ruby.length > 0) { const rt = getText(child); if (/\s|\[|\]/.test(rt)) { // If any space is included in rt, it is treated as a normal text ruby = []; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); break; } else { ruby.at(-1)![1] = rt; @@ -195,7 +194,7 @@ export class MfmService { } // If any other element is included in ruby, it is treated as a normal text ruby = []; - appendChildren(node.childNodes); + analyzeChildren(node.childNodes); break; } for (const [base, rt] of ruby) { @@ -205,26 +204,30 @@ export class MfmService { } // block code (
)
-				case 'pre': {
-					if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
+				case 'PRE': {
+					if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.HTMLElement) && node.childNodes[0].tagName === 'CODE') {
 						text += '\n```\n';
 						text += getText(node.childNodes[0]);
 						text += '\n```\n';
+					} else if (node.childNodes.length === 1 && (node.childNodes[0] instanceof htmlParser.TextNode) && node.childNodes[0].textContent.startsWith('') && node.childNodes[0].textContent.endsWith('')) {
+						text += '\n```\n';
+						text += node.childNodes[0].textContent.slice(6, -7);
+						text += '\n```\n';
 					} else {
-						appendChildren(node.childNodes);
+						analyzeChildren(node.childNodes);
 					}
 					break;
 				}
 
 				// inline code ()
-				case 'code': {
+				case 'CODE': {
 					text += '`';
-					appendChildren(node.childNodes);
+					analyzeChildren(node.childNodes);
 					text += '`';
 					break;
 				}
 
-				case 'blockquote': {
+				case 'BLOCKQUOTE': {
 					const t = getText(node);
 					if (t) {
 						text += '\n> ';
@@ -233,33 +236,33 @@ export class MfmService {
 					break;
 				}
 
-				case 'p':
-				case 'h2':
-				case 'h3':
-				case 'h4':
-				case 'h5':
-				case 'h6': {
+				case 'P':
+				case 'H2':
+				case 'H3':
+				case 'H4':
+				case 'H5':
+				case 'H6': {
 					text += '\n\n';
-					appendChildren(node.childNodes);
+					analyzeChildren(node.childNodes);
 					break;
 				}
 
 				// other block elements
-				case 'div':
-				case 'header':
-				case 'footer':
-				case 'article':
-				case 'li':
-				case 'dt':
-				case 'dd': {
+				case 'DIV':
+				case 'HEADER':
+				case 'FOOTER':
+				case 'ARTICLE':
+				case 'LI':
+				case 'DT':
+				case 'DD': {
 					text += '\n';
-					appendChildren(node.childNodes);
+					analyzeChildren(node.childNodes);
 					break;
 				}
 
 				default:	// includes inline elements
 				{
-					appendChildren(node.childNodes);
+					analyzeChildren(node.childNodes);
 					break;
 				}
 			}
diff --git a/packages/backend/src/server/oauth/OAuth2ProviderService.ts b/packages/backend/src/server/oauth/OAuth2ProviderService.ts
index 102998e8be..2b0b303b98 100644
--- a/packages/backend/src/server/oauth/OAuth2ProviderService.ts
+++ b/packages/backend/src/server/oauth/OAuth2ProviderService.ts
@@ -120,7 +120,7 @@ async function discoverClientInformation(logger: Logger, httpRequestService: Htt
 		}
 
 		const text = await res.text();
-		const fragment = htmlParser.parse(text);
+		const fragment = htmlParser.parse(`
${text}
`); redirectUris.push(...[...fragment.querySelectorAll('link[rel=redirect_uri][href]')].map(el => el.attributes.href)); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0317a4a6f4..6ec5c21ed0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -333,9 +333,6 @@ importers: otpauth: specifier: 9.4.1 version: 9.4.1 - parse5: - specifier: 7.3.0 - version: 7.3.0 pg: specifier: 8.16.3 version: 8.16.3