diff --git a/.changeset/clipboard-html-hardening.md b/.changeset/clipboard-html-hardening.md new file mode 100644 index 000000000..b9792647f --- /dev/null +++ b/.changeset/clipboard-html-hardening.md @@ -0,0 +1,5 @@ +--- +'@eigenpal/docx-editor-core': patch +--- + +Harden clipboard HTML paste against script injection and slow-input denial of service. Pasted HTML is now sanitized (via DOMPurify) and parsed into an inert document instead of being assigned to `innerHTML`, so embedded scripts, event handlers, and `javascript:` URLs cannot run. Word comment stripping and Office/Word namespace-tag removal now use linear scans that cannot backtrack on hostile input or leave a stray comment opener behind. diff --git a/bun.lock b/bun.lock index 46eacec43..e191e27da 100644 --- a/bun.lock +++ b/bun.lock @@ -238,7 +238,7 @@ }, "packages/agents": { "name": "@eigenpal/docx-editor-agents", - "version": "1.5.0", + "version": "1.8.3", "dependencies": { "docxtemplater": "^3.50.0", "jszip": "^3.10.1", @@ -269,12 +269,13 @@ }, "packages/core": { "name": "@eigenpal/docx-editor-core", - "version": "1.5.0", + "version": "1.8.3", "bin": { "docx-editor-mcp": "./dist/mcp-cli.mjs", }, "dependencies": { "docxtemplater": "^3.50.0", + "dompurify": "^3.2.0", "jszip": "^3.10.1", "pizzip": "^3.1.7", "xml-js": "^1.6.11", @@ -305,7 +306,7 @@ }, "packages/i18n": { "name": "@eigenpal/docx-editor-i18n", - "version": "1.5.0", + "version": "1.8.3", "devDependencies": { "tsup": "^8.0.1", "typescript": "^5.3.3", @@ -313,7 +314,7 @@ }, "packages/nuxt": { "name": "@eigenpal/nuxt-docx-editor", - "version": "1.5.0", + "version": "1.8.3", "dependencies": { "@eigenpal/docx-editor-vue": "^1.0.3", "@nuxt/kit": "^3.14.0 || ^4.0.0", @@ -339,7 +340,7 @@ }, "packages/react": { "name": "@eigenpal/docx-editor-react", - "version": "1.5.0", + "version": "1.8.3", "dependencies": { "@eigenpal/docx-editor-agents": "^1.5.0", "@eigenpal/docx-editor-core": "^1.5.0", @@ -379,7 +380,7 @@ }, "packages/vue": { "name": "@eigenpal/docx-editor-vue", - "version": "1.5.0", + "version": "1.8.3", "dependencies": { "@eigenpal/docx-editor-agents": "^1.3.1", "@eigenpal/docx-editor-core": "^1.3.1", @@ -1245,6 +1246,8 @@ "@types/resolve": ["@types/resolve@1.20.2", "", {}, "sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q=="], + "@types/trusted-types": ["@types/trusted-types@2.0.7", "", {}, "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw=="], + "@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="], "@types/whatwg-mimetype": ["@types/whatwg-mimetype@3.0.2", "", {}, "sha512-c2AKvDT8ToxLIOUlN51gTiHXflsfIFisS4pO7pDPoKouJCESkhZnEy623gwP9laCy5lnLDAw1vAzu2vM2YLOrA=="], @@ -1729,6 +1732,8 @@ "domhandler": ["domhandler@5.0.3", "", { "dependencies": { "domelementtype": "^2.3.0" } }, "sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w=="], + "dompurify": ["dompurify@3.4.10", "", { "optionalDependencies": { "@types/trusted-types": "^2.0.7" } }, "sha512-0xzNv0e7oYC6yyuOGZIABPM4qtg3QxLFniDNPP4ZP90wR8Yq3zgwpRbrNiT4N3IKqDbbYFEJLV+JWEs19aZ//w=="], + "domutils": ["domutils@3.2.2", "", { "dependencies": { "dom-serializer": "^2.0.0", "domelementtype": "^2.3.0", "domhandler": "^5.0.3" } }, "sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw=="], "dot-prop": ["dot-prop@10.1.0", "", { "dependencies": { "type-fest": "^5.0.0" } }, "sha512-MVUtAugQMOff5RnBy2d9N31iG0lNwg1qAoAOn7pOK5wf94WIaE3My2p3uwTQuvS2AcqchkcR3bHByjaM0mmi7Q=="], diff --git a/packages/core/package.json b/packages/core/package.json index 1e712eb90..6d6003378 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -608,6 +608,7 @@ }, "dependencies": { "docxtemplater": "^3.50.0", + "dompurify": "^3.2.0", "jszip": "^3.10.1", "pizzip": "^3.1.7", "xml-js": "^1.6.11" diff --git a/packages/core/src/utils/__tests__/clipboard-html.test.ts b/packages/core/src/utils/__tests__/clipboard-html.test.ts new file mode 100644 index 000000000..791124f82 --- /dev/null +++ b/packages/core/src/utils/__tests__/clipboard-html.test.ts @@ -0,0 +1,104 @@ +import { GlobalRegistrator } from '@happy-dom/global-registrator'; +import { afterAll, beforeAll, describe, expect, test } from 'bun:test'; + +import { cleanWordHtml, htmlToRuns } from '../clipboard'; + +// htmlToRuns binds DOMPurify to the live window lazily on first call, so a +// window registered before any test runs is sufficient. +beforeAll(() => GlobalRegistrator.register()); +afterAll(() => GlobalRegistrator.unregister()); + +describe('cleanWordHtml comment stripping', () => { + test('removes plain HTML comments', () => { + expect(cleanWordHtml('ab')).toBe('ab'); + }); + + test('removes Word downlevel conditional comments', () => { + const html = 'xy'; + expect(cleanWordHtml(html)).toBe('xy'); + }); + + test('leaves no stray "`). + * + * Uses a single linear scan instead of a regex: clipboard HTML is + * attacker-controlled, and a lazy `` against a multi-character + * terminator backtracks polynomially. The scan also guarantees no stray + * `', start + 4); + if (end === -1) { + // Unterminated comment: drop the remainder so no `/gi, ''); - cleaned = cleaned.replace(//g, ''); + // Remove Word-specific (and all other) HTML comments + cleaned = stripHtmlComments(cleaned); // Remove XML declarations cleaned = cleaned.replace(/<\?xml[^>]*>/gi, ''); - // Remove o: (Office) namespace tags - cleaned = cleaned.replace(/]*>[\s\S]*?<\/o:[^>]*>/gi, ''); + // Remove o: (Office) namespace tags (linear scan; see stripPairedNamespaceTags) + cleaned = stripPairedNamespaceTags(cleaned, 'o:'); cleaned = cleaned.replace(/]*\/>/gi, ''); // Remove w: (Word) namespace tags - cleaned = cleaned.replace(/]*>[\s\S]*?<\/w:[^>]*>/gi, ''); + cleaned = stripPairedNamespaceTags(cleaned, 'w:'); cleaned = cleaned.replace(/]*\/>/gi, ''); // Remove mso styles but keep other styles @@ -463,8 +546,14 @@ export function htmlToRuns(html: string, plainTextFallback: string): Run[] { return plainTextFallback ? [createTextRun(plainTextFallback)] : []; } - const container = document.createElement('div'); - container.innerHTML = html; + // Sanitize the attacker-controlled clipboard HTML at this trust boundary + // (scripts, event handlers, javascript: URLs, dangerous tags all stripped), + // then parse the cleaned markup into an inert document. We only walk the + // resulting node tree for text and formatting — nothing is ever inserted + // into the live DOM. + const sanitized = getDomPurify().sanitize(html); + const parsed = new DOMParser().parseFromString(sanitized, 'text/html'); + const container = parsed.body; const runs: Run[] = []; processNode(container, runs, {});