From 724bddfd6489054c014ce0083e1afadc1212ae5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?TATSUNO=20=E2=80=9CTaz=E2=80=9D=20Yasuhiro?= Date: Tue, 27 Jan 2026 15:02:24 +0900 Subject: [PATCH] feat: CSS-compatible workBreak, hyphens and hyphenateCharacter --- .changeset/wicked-rockets-check.md | 8 + .../vite/src/examples/soft-hyphens/index.tsx | 197 ++++++++++- packages/layout/src/text/layoutText.ts | 3 + packages/layout/tests/text/layoutText.test.ts | 80 +++++ packages/stylesheet/src/types.ts | 7 + packages/textkit/package.json | 1 + .../textkit/src/engines/linebreaker/index.ts | 99 +++++- packages/textkit/src/layout/wrapWords.ts | 121 ++++++- packages/textkit/src/types.ts | 20 ++ packages/textkit/src/types/linebreak.d.ts | 13 + .../textkit/tests/engines/linebreaker.test.ts | 309 ++---------------- .../textkit/tests/layout/wrapWords.test.ts | 117 +++++++ 12 files changed, 688 insertions(+), 287 deletions(-) create mode 100644 .changeset/wicked-rockets-check.md create mode 100644 packages/textkit/src/types/linebreak.d.ts diff --git a/.changeset/wicked-rockets-check.md b/.changeset/wicked-rockets-check.md new file mode 100644 index 000000000..7059d21e0 --- /dev/null +++ b/.changeset/wicked-rockets-check.md @@ -0,0 +1,8 @@ +--- +"@react-pdf/vite-example": minor +"@react-pdf/stylesheet": minor +"@react-pdf/textkit": minor +"@react-pdf/layout": minor +--- + +Add CSS-compatible text layout properties: wordBreak, hyphens and hyphenateCharacterord diff --git a/packages/examples/vite/src/examples/soft-hyphens/index.tsx b/packages/examples/vite/src/examples/soft-hyphens/index.tsx index b9c8166b0..daea2f892 100644 --- a/packages/examples/vite/src/examples/soft-hyphens/index.tsx +++ b/packages/examples/vite/src/examples/soft-hyphens/index.tsx @@ -8,13 +8,18 @@ import { StyleSheet, } from '@react-pdf/renderer'; -const shy = '\u00ad'; +const shy = '­'; Font.register({ family: 'Oswald', src: 'https://fonts.gstatic.com/s/oswald/v13/Y_TKV6o8WovbUd3m_X9aAA.ttf', }); +Font.register({ + family: 'NotoSansJP', + src: 'https://fonts.gstatic.com/s/notosansjp/v52/-F6jfjtqLzI2JPCgQBnw7HFyzSD-AsregP8VFBEj75s.ttf', +}); + const styles = StyleSheet.create({ page: { padding: 40, @@ -74,6 +79,60 @@ const styles = StyleSheet.create({ borderRadius: 3, padding: 8, }, + // Page 2 (hyphens-control) styles + page2: { + padding: 30, + backgroundColor: '#fafafa', + }, + sectionTitle: { + fontFamily: 'Oswald', + fontSize: 12, + fontWeight: 'bold', + color: '#1a1a1a', + marginBottom: 2, + }, + sectionSubtitle: { + fontSize: 9, + color: '#888', + marginBottom: 6, + }, + sectionSubtitleJP: { + fontFamily: 'NotoSansJP', + fontSize: 9, + color: '#888', + marginBottom: 6, + }, + controlSection: { + marginBottom: 10, + }, + smallBox: { + backgroundColor: '#ffffff', + borderRadius: 3, + borderWidth: 1, + borderColor: '#e8e8e8', + padding: 6, + width: 95, + marginRight: 8, + }, + wideBox: { + backgroundColor: '#ffffff', + borderRadius: 3, + borderWidth: 1, + borderColor: '#e8e8e8', + padding: 6, + width: 240, + marginRight: 8, + }, + englishText: { + fontFamily: 'Oswald', + fontSize: 11, + color: '#1a1a1a', + }, + japaneseText: { + fontFamily: 'NotoSansJP', + fontSize: 11, + color: '#1a1a1a', + }, }); const dutchWord = `Potentieel broeikas${shy}gas${shy}emissie${shy}rapport`; @@ -84,6 +143,7 @@ const widths = [80, 120, 180]; const SoftHyphens = () => ( + {/* Page 1: Soft hyphen (U+00AD) auto break demonstration */} Soft Hyphens @@ -159,12 +219,143 @@ const SoftHyphens = () => ( + + {/* Page 2: hyphens / hyphenateCharacter / wordBreak CSS controls */} + + Hyphen & Word Break Controls + + Demonstrating hyphens, hyphenateCharacter, and wordBreak CSS properties + + + + 1. Hyphen Control (English) + + Long word in narrow container — control hyphen character + + + + + Default (hyphen) + + Potentieelbroeikasgasemissierapport + + + + + hyphens: none + + Potentieelbroeikasgasemissierapport + + + + + hyphenateCharacter: ... + + Potentieelbroeikasgasemissierapport + + + + + + + 2. CJK Text — wordBreak + + Problem: "グレートブリテン" alone on a line due to + script-based run splitting + + + + + wordBreak: keep-all (problem) + + グレートブリテンおよび北アイルランド連合王国という言葉は本当に長い言葉 + + + + + + wordBreak: normal (CJK breaks anywhere) + + + グレートブリテンおよび北アイルランド連合王国という言葉は本当に長い言葉 + + + + + + + + 3. Mixed Content (Japanese + English) + + + CJK breaks anywhere, Latin only at hyphenation points + + + + + wordBreak: normal + + This is a long and Honorificabilitudinitatibus + califragilisticexpialidocious + Taumatawhakatangihangakoauauotamateaturipukakapikimaungahoronukupokaiwhenuakitanatahu + グレートブリテンおよび北アイルランド連合王国という言葉は本当に長い言葉 + + + + + wordBreak: break-all + + This is a long and Honorificabilitudinitatibus + califragilisticexpialidocious + Taumatawhakatangihangakoauauotamateaturipukakapikimaungahoronukupokaiwhenuakitanatahu + グレートブリテンおよび北アイルランド連合王国という言葉は本当に長い言葉 + + + + + + + 4. Long URLs + + break-all allows URLs to wrap at any character + + + + + wordBreak: normal (overflow) + + https://example.com/very/very/loooong/path/to/resource + + + + + + wordBreak: break-all, hyphens: none + + + https://example.com/very/very/loooong/path/to/resource + + + + + ); export default { id: 'soft-hyphens', - name: 'Soft Hyphens', - description: '', + name: 'Hyphenation', + description: + 'Soft hyphen (U+00AD) auto-break and the hyphens / hyphenateCharacter / wordBreak CSS controls', Document: SoftHyphens, }; diff --git a/packages/layout/src/text/layoutText.ts b/packages/layout/src/text/layoutText.ts index e87235b67..04a63d121 100644 --- a/packages/layout/src/text/layoutText.ts +++ b/packages/layout/src/text/layoutText.ts @@ -63,6 +63,9 @@ const getLayoutOptions = (fontStore, node) => ({ node.props.hyphenationCallback || fontStore?.getHyphenationCallback() || null, + hyphens: node.style?.hyphens, + hyphenateCharacter: node.style?.hyphenateCharacter, + wordBreak: node.style?.wordBreak, }); /** diff --git a/packages/layout/tests/text/layoutText.test.ts b/packages/layout/tests/text/layoutText.test.ts index 34441e008..47aa10625 100644 --- a/packages/layout/tests/text/layoutText.test.ts +++ b/packages/layout/tests/text/layoutText.test.ts @@ -103,4 +103,84 @@ describe('text layoutText', () => { expect.any(Function), ); }); + + test('should not add hyphens when hyphens style is "none"', async () => { + const text = 'reallylongtext'; + const hyphens = ['really­', 'long', 'text']; + const hyphenationCallback = vi.fn().mockReturnValue(hyphens); + + const node = createTextNode( + text, + { hyphens: 'none' }, + { hyphenationCallback }, + ); + const lines = layoutText(node, 50, 100, fontStore); + + expect(lines[0].string).toEqual('really'); + expect(lines[1].string).toEqual('long'); + expect(lines[2].string).toEqual('text'); + }); + + test('should use custom hyphenate character when hyphenateCharacter is set', async () => { + const text = 'reallylongtext'; + const hyphens = ['really­', 'long', 'text']; + const hyphenationCallback = vi.fn().mockReturnValue(hyphens); + + const node = createTextNode( + text, + { hyphenateCharacter: '・' }, + { hyphenationCallback }, + ); + const lines = layoutText(node, 50, 100, fontStore); + + expect(lines[0].string).toEqual('really・'); + expect(lines[1].string).toEqual('long・'); + expect(lines[2].string).toEqual('text'); + }); + + test('should not add hyphens when hyphenateCharacter is empty string', async () => { + const text = 'reallylongtext'; + const hyphens = ['really­', 'long', 'text']; + const hyphenationCallback = vi.fn().mockReturnValue(hyphens); + + const node = createTextNode( + text, + { hyphenateCharacter: '' }, + { hyphenationCallback }, + ); + const lines = layoutText(node, 50, 100, fontStore); + + expect(lines[0].string).toEqual('really'); + expect(lines[1].string).toEqual('long'); + expect(lines[2].string).toEqual('text'); + }); + + test('should keep CJK text together with wordBreak: keep-all', async () => { + const text = '東京都区'; + const hyphenationCallback = vi.fn().mockImplementation((word) => [...word]); + + const node = createTextNode( + text, + { wordBreak: 'keep-all' }, + { hyphenationCallback }, + ); + const lines = layoutText(node, 300, 100, fontStore); + + expect(lines).toHaveLength(1); + expect(lines[0].string).toBe('東京都区'); + }); + + test('should break all characters with wordBreak: break-all', async () => { + const text = 'Hello'; + const node = createTextNode(text, { + wordBreak: 'break-all', + hyphens: 'none', + }); + const lines = layoutText(node, 15, 100, fontStore); + + expect(lines.length).toBeGreaterThan(1); + + const allChars = lines.map((line) => line.string).join(''); + expect(allChars).toBe('Hello'); + }); }); diff --git a/packages/stylesheet/src/types.ts b/packages/stylesheet/src/types.ts index e7bd5adc6..f45bb6f1c 100644 --- a/packages/stylesheet/src/types.ts +++ b/packages/stylesheet/src/types.ts @@ -321,12 +321,19 @@ export type TextTransform = export type VerticalAlign = 'sub' | 'super'; +export type Hyphens = 'none' | 'manual' | 'auto'; + +export type WordBreak = 'normal' | 'break-all' | 'keep-all'; + export type TextStyle = { direction?: 'ltr' | 'rtl'; fontSize?: number | string; fontFamily?: string | string[]; fontStyle?: FontStyle; fontWeight?: FontWeight; + hyphens?: Hyphens; + hyphenateCharacter?: string; + wordBreak?: WordBreak; letterSpacing?: number | string; lineHeight?: number | string; maxLines?: number | string; diff --git a/packages/textkit/package.json b/packages/textkit/package.json index 3f593b219..5dbd86821 100644 --- a/packages/textkit/package.json +++ b/packages/textkit/package.json @@ -28,6 +28,7 @@ "@react-pdf/fns": "3.1.3", "bidi-js": "^1.0.2", "hyphen": "^1.6.4", + "linebreak": "^1.1.0", "unicode-properties": "^1.4.1" }, "devDependencies": { diff --git a/packages/textkit/src/engines/linebreaker/index.ts b/packages/textkit/src/engines/linebreaker/index.ts index db55a1c6a..28fb9d241 100644 --- a/packages/textkit/src/engines/linebreaker/index.ts +++ b/packages/textkit/src/engines/linebreaker/index.ts @@ -1,3 +1,5 @@ +import unicode from 'unicode-properties'; + import bestFit from './bestFit'; import knuthPlass from './knuthPlass'; import slice from '../../attributedString/slice'; @@ -6,6 +8,17 @@ import advanceWidthBetween from '../../attributedString/advanceWidthBetween'; import { AttributedString, Attributes, LayoutOptions } from '../../types'; import { Node } from './types'; +/** + * Check if a character is East Asian Wide or Fullwidth. + * These characters don't need hyphens when wrapping. + */ +const isEastAsianWide = (char: string): boolean => { + const codePoint = char.codePointAt(0); + if (codePoint === undefined) return false; + const eaw = unicode.getEastAsianWidth(codePoint); + return eaw === 'W' || eaw === 'F'; +}; + const HYPHEN = 0x002d; const TOLERANCE_STEPS = 5; const TOLERANCE_LIMIT = 50; @@ -16,22 +29,83 @@ const opts = { shrink: 9, }; +/** + * Get the hyphen character code point(s) based on options + * + * @param options - Layout options + * @returns Array of code points for the hyphen character, or null if no hyphen should be inserted + */ +const getHyphenCodePoints = (options: LayoutOptions): number[] | null => { + // If hyphens is 'none', don't insert any hyphen character + if (options.hyphens === 'none') { + return null; + } + + // If hyphenateCharacter is explicitly set + if (options.hyphenateCharacter !== undefined) { + // Empty string means no hyphen + if (options.hyphenateCharacter === '') { + return null; + } + // Convert custom character to code points + const codePoints: number[] = []; + for (const char of options.hyphenateCharacter) { + const codePoint = char.codePointAt(0); + if (codePoint !== undefined) { + codePoints.push(codePoint); + } + } + return codePoints.length > 0 ? codePoints : null; + } + + // Default: use standard hyphen + return [HYPHEN]; +}; + +/** + * Check if a hyphen should be inserted at the end of a line. + * CJK characters don't need hyphens when wrapping. + * + * @param line - Line attributed string + * @param hyphenCodePoints - Hyphen code points to use + * @returns True if hyphen should be inserted + */ +const shouldInsertHyphen = ( + line: AttributedString, + hyphenCodePoints: number[] | null, +): boolean => { + if (hyphenCodePoints === null) return false; + + // Get the last character of the line + const lastChar = line.string.slice(-1); + if (!lastChar) return false; + + // Don't insert hyphen after East Asian Wide characters (CJK, etc.) + if (isEastAsianWide(lastChar)) return false; + + return true; +}; + /** * Slice attributed string to many lines * * @param attributedString - Attributed string * @param nodes * @param breaks + * @param options - Layout options * @returns Attributed strings */ const breakLines = ( attributedString: AttributedString, nodes: Node[], breaks: number[], + options: LayoutOptions, ) => { let start = 0; let end = null; + const hyphenCodePoints = getHyphenCodePoints(options); + const lines: AttributedString[] = breaks.reduce((acc, breakPoint) => { const node = nodes[breakPoint]; const prevNode = nodes[breakPoint - 1]; @@ -46,7 +120,12 @@ const breakLines = ( line = slice(start, end, attributedString); - line = insertGlyph(line.string.length, HYPHEN, line); + // Insert hyphen character(s) if configured and appropriate + if (shouldInsertHyphen(line, hyphenCodePoints)) { + for (const codePoint of hyphenCodePoints!) { + line = insertGlyph(line.string.length, codePoint, line); + } + } } else { end = node.end; line = slice(start, end, attributedString); @@ -54,7 +133,8 @@ const breakLines = ( start = end; - return [...acc, line]; + acc.push(line); + return acc; }, []); lines.push(slice(start, attributedString.string.length, attributedString)); @@ -77,13 +157,15 @@ const getNodes = ( ): Node[] => { let start = 0; - const hyphenWidth = 5; + const hyphenWidth = getHyphenCodePoints(options) === null ? 0 : 5; const { syllables } = attributedString; const hyphenPenalty = options.hyphenationPenalty || (align === 'justify' ? 100 : 600); + const allowCJKBreak = options.wordBreak !== 'keep-all'; + const result = syllables.reduce((acc: Node[], s: string, index: number) => { const width = advanceWidthBetween( start, @@ -106,8 +188,13 @@ const getNodes = ( acc.push(knuthPlass.box(width, start, end, hyphenated)); if (syllables[index + 1] && hyphenated) { - // Add penalty node. Penalty nodes are used to represent hyphenation points. - acc.push(knuthPlass.penalty(hyphenWidth, hyphenPenalty, 1)); + // CJK boundaries are soft wrap opportunities (penalty 0, no hyphen width) + // unless keep-all is set, which suppresses CJK breaks + const isSoftWrap = allowCJKBreak && isEastAsianWide(s.slice(-1)); + const penaltyValue = isSoftWrap ? 0 : hyphenPenalty; + const penaltyWidth = isSoftWrap ? 0 : hyphenWidth; + + acc.push(knuthPlass.penalty(penaltyWidth, penaltyValue, 1)); } } @@ -161,7 +248,7 @@ const linebreaker = (options: LayoutOptions) => { breaks = bestFit(nodes, availableWidths); } - return breakLines(attributedString, nodes, breaks.slice(1)); + return breakLines(attributedString, nodes, breaks.slice(1), options); }; }; diff --git a/packages/textkit/src/layout/wrapWords.ts b/packages/textkit/src/layout/wrapWords.ts index 0339736a9..dd0ed97fb 100644 --- a/packages/textkit/src/layout/wrapWords.ts +++ b/packages/textkit/src/layout/wrapWords.ts @@ -1,9 +1,60 @@ +import LineBreaker from 'linebreak'; + import fromFragments from '../attributedString/fromFragments'; import { Engines } from '../engines'; import { AttributedString, LayoutOptions } from '../types'; const SOFT_HYPHEN = '\u00ad'; +/** + * Check if a character is prohibited at the start of a line using UAX #14. + * Uses a CJK character as a neutral test character since CJK allows breaking after. + */ +const isLineStartProhibited = (char: string): boolean => { + // Use a CJK character (あ) as neutral test character + // CJK characters allow breaking after them by default in UAX #14 + const testStr = 'あ' + char; + const breaker = new LineBreaker(testStr); + const bk = breaker.nextBreak(); + // If first break is at position 1, breaking before char is allowed (not prohibited) + // If first break is > 1 or null, char is line-start prohibited + return bk === null || bk.position > 1; +}; + +/** + * Apply line-breaking rules (UAX #14) to merge only line-start prohibited characters + * with the preceding syllable. This preserves hyphenation points while handling + * language-specific rules. + * + * @param syllables - Array of syllables + * @returns Array of syllables with line-breaking rules applied + */ +const applyLineBreakingRules = (syllables: string[]): string[] => { + if (syllables.length <= 1) return syllables; + + const result: string[] = []; + + for (let i = 0; i < syllables.length; i++) { + const syllable = syllables[i]; + + // Check if this syllable starts with a line-start prohibited character + // and should be merged with the previous syllable + if ( + result.length > 0 && + syllable.length > 0 && + !syllable[0].match(/\s/) && // Don't merge whitespace + isLineStartProhibited(syllable[0]) + ) { + // Merge with previous syllable + result[result.length - 1] += syllable; + } else { + result.push(syllable); + } + } + + return result; +}; + /** * Default word hyphenation engine used when no one provided. * Does not perform word hyphenation at all @@ -23,6 +74,65 @@ const removeSoftHyphens = (word: string) => { return word.replaceAll(SOFT_HYPHEN, ''); }; +/** + * Split text using UAX #14 line break algorithm. + * This properly handles CJK characters and other Unicode text. + * + * @param text - Text to split + * @returns Array of segments + */ +const splitByLineBreak = (text: string): string[] => { + const breaker = new LineBreaker(text); + const segments: string[] = []; + let lastBreak = 0; + let bk; + + while ((bk = breaker.nextBreak())) { + const segment = text.slice(lastBreak, bk.position); + if (segment) segments.push(segment); + lastBreak = bk.position; + } + + return segments; +}; + +/** + * Apply word-break rules to split parts into syllables. + * + * @param parts - Array of parts from hyphenation + * @param wordBreak - Word break mode + * @returns Array of syllables + */ +const applyWordBreak = ( + parts: string[], + wordBreak: LayoutOptions['wordBreak'], +): string[] => { + if (wordBreak === 'keep-all') { + return parts; + } + + const breakAll = wordBreak === 'break-all'; + const result: string[] = []; + + for (const part of parts) { + // Whitespace is kept as-is + if (part.trim() === '') { + result.push(part); + continue; + } + + if (breakAll) { + // break-all: split all characters + result.push(...[...part]); + } else { + // normal (default): use UAX #14 to split at valid break points + result.push(...splitByLineBreak(part)); + } + } + + return result; +}; + /** * Wrap words of attribute string * @@ -38,7 +148,7 @@ const wrapWords = ( * @returns Attributed string including syllables */ return (attributedString: AttributedString) => { - const syllables = []; + const rawSyllables: string[] = []; const fragments = []; const builtinHyphenate = engines.wordHyphenation?.() || defaultHyphenate; @@ -61,8 +171,11 @@ const wrapWords = ( const word = words[j]; const parts = hyphenate(word, builtinHyphenate).map(removeSoftHyphens); - syllables.push(...parts); - string += parts.join(''); + // Apply word-break rules to split parts further if needed + const splitParts = applyWordBreak(parts, options.wordBreak); + + rawSyllables.push(...splitParts); + string += splitParts.join(''); } // Modify run start and end based on removed soft hyphens. @@ -75,6 +188,8 @@ const wrapWords = ( offset += runOffset; } + const syllables = applyLineBreakingRules(rawSyllables); + const result: AttributedString = { ...fromFragments(fragments), syllables }; return result; diff --git a/packages/textkit/src/types.ts b/packages/textkit/src/types.ts index fa31bf753..433944185 100644 --- a/packages/textkit/src/types.ts +++ b/packages/textkit/src/types.ts @@ -140,6 +140,26 @@ export type LayoutOptions = { shrinkCharFactor?: JustificationFactor; expandWhitespaceFactor?: JustificationFactor; shrinkWhitespaceFactor?: JustificationFactor; + /** + * CSS-like hyphens property. + * - 'none': No hyphenation at line breaks + * - 'manual': Hyphenation only at soft hyphens (default behavior) + * - 'auto': Automatic hyphenation (same as 'manual' currently) + */ + hyphens?: 'none' | 'auto' | 'manual'; + /** + * CSS-like hyphenate-character property. + * Character to display when hyphenating. Default is '-'. + * Set to empty string to disable hyphen display. + */ + hyphenateCharacter?: string; + /** + * CSS-like word-break property. + * - 'normal': CJK text can break at any character (default) + * - 'break-all': All text can break at any character + * - 'keep-all': CJK text only breaks at word boundaries + */ + wordBreak?: 'normal' | 'break-all' | 'keep-all'; }; export type { Font } from '@react-pdf/font'; diff --git a/packages/textkit/src/types/linebreak.d.ts b/packages/textkit/src/types/linebreak.d.ts new file mode 100644 index 000000000..5e3548898 --- /dev/null +++ b/packages/textkit/src/types/linebreak.d.ts @@ -0,0 +1,13 @@ +declare module 'linebreak' { + interface Break { + position: number; + required: boolean; + } + + class LineBreaker { + constructor(text: string); + nextBreak(): Break | null; + } + + export = LineBreaker; +} diff --git a/packages/textkit/tests/engines/linebreaker.test.ts b/packages/textkit/tests/engines/linebreaker.test.ts index 76522cc45..db09ac2e1 100644 --- a/packages/textkit/tests/engines/linebreaker.test.ts +++ b/packages/textkit/tests/engines/linebreaker.test.ts @@ -7,285 +7,44 @@ import font from '../internal/font'; const width = 50; +const CJK_ADVANCE = 10; + +const makePosition = (xAdvance: number) => ({ + xAdvance, + yAdvance: 0, + xOffset: 0, + yOffset: 0, +}); + +const makeAttributedString = ( + str: string, + syllables: string[], + advanceWidth = CJK_ADVANCE, +) => ({ + string: str, + runs: [ + { + start: 0, + end: str.length, + attributes: { font: [font] }, + glyphIndices: Array.from({ length: str.length }, (_, i) => i), + positions: Array.from({ length: str.length }, () => + makePosition(advanceWidth), + ), + glyphs: [], + string: str, + }, + ], + syllables, +}); + describe('linebreaker', () => { const linebreaker = linebreakerFactory({}); test('should break lines and adds hyphens only where indicated', () => { - const attributedString = { - string: 'Potentieel broeikasgasemissierapport', - runs: [ - { - start: 0, - end: 36, - attributes: { font: [font] }, - stringIndices: [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - ], - glyphIndices: [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - ], - positions: [ - { - xAdvance: 11.106, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 667, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 5.004, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 278, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 5.004, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 278, - }, - { - xAdvance: 3.9959999999999996, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 222, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 3.9959999999999996, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 222, - }, - { - xAdvance: 5.004, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 278, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 5.994, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 333, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 3.9959999999999996, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 222, - }, - { - xAdvance: 9, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 500, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 9, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 500, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 9, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 500, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 14.993999999999998, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 833, - }, - { - xAdvance: 3.9959999999999996, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 222, - }, - { - xAdvance: 9, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 500, - }, - { - xAdvance: 9, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 500, - }, - { - xAdvance: 3.9959999999999996, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 222, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 5.813999999999999, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 333, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 10.008, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 556, - }, - { - xAdvance: 6.7139999999999995, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 333, - }, - { - xAdvance: 5.004, - yAdvance: 0, - xOffset: 0, - yOffset: 0, - advanceWidth: 278, - }, - ], - glyphs: [], - string: 'Potentieel broeikasgasemissierapport', - }, - ], - syllables: ['Potentieel', ' ', 'broeikasgas', 'emissie', 'rapport'], - }; + const str = 'Potentieel broeikasgasemissierapport'; + const syllables = ['Potentieel', ' ', 'broeikasgas', 'emissie', 'rapport']; + const attributedString = makeAttributedString(str, syllables, 8); const result = linebreaker(attributedString, [10]); diff --git a/packages/textkit/tests/layout/wrapWords.test.ts b/packages/textkit/tests/layout/wrapWords.test.ts index 4c44fa94f..76bf8a9be 100644 --- a/packages/textkit/tests/layout/wrapWords.test.ts +++ b/packages/textkit/tests/layout/wrapWords.test.ts @@ -156,4 +156,121 @@ describe('wrapWords', () => { expect(result.runs[1]).toHaveProperty('end', 11); }); }); + + describe('word-break option', () => { + test('should split CJK characters individually with wordBreak: normal (default)', () => { + const instance = wrapWords({}, {}); + const result = instance({ + string: '本当に長いテキスト', + runs: [{ start: 0, end: 9, attributes: {} }], + }); + + // CJK characters should be split individually + expect(result.syllables).toEqual([ + '本', + '当', + 'に', + '長', + 'い', + 'テ', + 'キ', + 'ス', + 'ト', + ]); + }); + + test('should split CJK characters individually with wordBreak: normal (explicit)', () => { + const instance = wrapWords({}, { wordBreak: 'normal' }); + const result = instance({ + string: '本当に長いテキスト', + runs: [{ start: 0, end: 9, attributes: {} }], + }); + + expect(result.syllables).toEqual([ + '本', + '当', + 'に', + '長', + 'い', + 'テ', + 'キ', + 'ス', + 'ト', + ]); + }); + + test('should not split Latin characters with wordBreak: normal', () => { + const instance = wrapWords({}, { wordBreak: 'normal' }); + const result = instance({ + string: 'Hello world', + runs: [{ start: 0, end: 11, attributes: {} }], + }); + + // Latin characters should stay grouped + expect(result.syllables).toEqual(['Hello', ' ', 'world']); + }); + + test('should split mixed CJK and Latin with wordBreak: normal', () => { + const instance = wrapWords({}, { wordBreak: 'normal' }); + const result = instance({ + string: 'Hello世界', + runs: [{ start: 0, end: 7, attributes: {} }], + }); + + // Latin stays grouped, CJK splits + expect(result.syllables).toEqual(['Hello', '世', '界']); + }); + + test('should split all characters with wordBreak: break-all', () => { + const instance = wrapWords({}, { wordBreak: 'break-all' }); + const result = instance({ + string: 'Hello', + runs: [{ start: 0, end: 5, attributes: {} }], + }); + + // All characters should be split + expect(result.syllables).toEqual(['H', 'e', 'l', 'l', 'o']); + }); + + test('should not split CJK characters with wordBreak: keep-all', () => { + const instance = wrapWords({}, { wordBreak: 'keep-all' }); + const result = instance({ + string: '本当に長いテキスト', + runs: [{ start: 0, end: 9, attributes: {} }], + }); + + // CJK characters should stay grouped (not split) + expect(result.syllables).toEqual(['本当に長いテキスト']); + }); + + test('should handle Hiragana correctly', () => { + const instance = wrapWords({}, { wordBreak: 'normal' }); + const result = instance({ + string: 'あいうえお', + runs: [{ start: 0, end: 5, attributes: {} }], + }); + + expect(result.syllables).toEqual(['あ', 'い', 'う', 'え', 'お']); + }); + + test('should handle Katakana correctly', () => { + const instance = wrapWords({}, { wordBreak: 'normal' }); + const result = instance({ + string: 'アイウエオ', + runs: [{ start: 0, end: 5, attributes: {} }], + }); + + expect(result.syllables).toEqual(['ア', 'イ', 'ウ', 'エ', 'オ']); + }); + + test('should handle Korean correctly', () => { + const instance = wrapWords({}, { wordBreak: 'normal' }); + const result = instance({ + string: '한글테스트', + runs: [{ start: 0, end: 5, attributes: {} }], + }); + + expect(result.syllables).toEqual(['한', '글', '테', '스', '트']); + }); + }); });