diff --git a/docs/text.md b/docs/text.md index 411505ae9..64b4c5838 100644 --- a/docs/text.md +++ b/docs/text.md @@ -260,5 +260,91 @@ every time you want to use it. doc.font('Heading Font') .text('This is a heading.'); -That's about all there is too it for text in PDFKit. Let's move on now to +## Color Emoji + +PDFKit supports rendering color emoji as inline bitmap images when an emoji +font is registered. Emoji are automatically detected in your text and rendered +using the glyphs from the emoji font, while surrounding text continues to use +the current document font. + +### Registering an emoji font + +To enable emoji support, register an emoji font that contains an `sbix` +(Standard Bitmap Graphics) table. On macOS, the built-in Apple Color Emoji +font works out of the box. + +You can register the emoji font via constructor options: + +```javascript +const doc = new PDFDocument({ + emojiFont: '/System/Library/Fonts/Apple Color Emoji.ttc', + emojiFontFamily: 'AppleColorEmoji', +}); +``` + +Or register it at any time using the `registerEmojiFont` method: + +```javascript +doc.registerEmojiFont('/System/Library/Fonts/Apple Color Emoji.ttc', 'AppleColorEmoji'); +``` + +The `emojiFontFamily` (or second argument) is the PostScript name or family +name used to select the correct font from a TrueType Collection (`.ttc`) file. +If the font file contains only a single font, this parameter can be omitted. + +### Using emoji in text + +Once an emoji font is registered, you can use emoji characters directly in +any `text` call. PDFKit automatically segments the string into text and emoji +runs, rendering each with the appropriate font. + +```javascript +doc.font('Helvetica') + .fontSize(18) + .text('Hello ๐Ÿ˜€ World ๐ŸŽ‰ PDFKit ๐Ÿš€'); +``` + +All standard text options (alignment, line wrapping, `continued`, columns, +etc.) work with emoji. The `widthOfString` method is also emoji-aware, so +layout calculations account for emoji width correctly. + +### Supported emoji types + +The emoji segmenter handles the full range of modern emoji sequences: + +* **Simple emoji** โ€” single code point emoji like ๐Ÿ˜€, ๐ŸŽ‰, ๐Ÿš€ +* **ZWJ sequences** โ€” composite emoji joined with Zero-Width Joiner, such as + family groups (๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ) and profession emoji (๐Ÿ‘ฉโ€๐Ÿ’ป) +* **Flag emoji** โ€” regional indicator pairs like ๐Ÿ‡บ๐Ÿ‡ธ, ๐Ÿ‡ฏ๐Ÿ‡ต, ๐Ÿ‡ซ๐Ÿ‡ท +* **Skin tone modifiers** โ€” emoji with Fitzpatrick skin tone modifiers (๐Ÿ‘‹๐Ÿป ๐Ÿ‘‹๐Ÿฟ) +* **Variation selectors** โ€” text vs emoji presentation (โค๏ธ) +* **Keycap sequences** โ€” digit + variation selector + combining enclosing + keycap (1๏ธโƒฃ 2๏ธโƒฃ 3๏ธโƒฃ) + +### Different font sizes + +Emoji scale to match the current font size. The emoji font's SBIX table +contains bitmaps at several predefined sizes; PDFKit selects the closest +available size and scales it to fit. + +```javascript +doc.font('Helvetica'); + +doc.fontSize(12).text('Small emoji: ๐ŸŽ‰'); +doc.fontSize(24).text('Medium emoji: ๐ŸŽ‰'); +doc.fontSize(48).text('Large emoji: ๐ŸŽ‰'); +``` + +### Limitations + +* Currently only `sbix` (bitmap) emoji fonts are supported. This includes + Apple Color Emoji on macOS. Support for `COLR`/`CPAL` (vector) and + `CBDT`/`CBLC` (Google Noto Color Emoji) formats may be added in the future. +* The emoji font file must be accessible on the system where the PDF is + generated. Apple Color Emoji is included with macOS but is not + redistributable. +* Emoji are rendered as raster images (PNG), so they may appear slightly + less sharp than vector text at very large sizes. + +That's about all there is to it for text in PDFKit. Let's move on now to images. diff --git a/lib/document.js b/lib/document.js index b3296f4cd..746bb00de 100644 --- a/lib/document.js +++ b/lib/document.js @@ -102,6 +102,11 @@ class PDFDocument extends stream.Readable { this.initTables(); this.initSubset(options); + // Register emoji font if provided in options + if (options.emojiFont) { + this.registerEmojiFont(options.emojiFont, options.emojiFontFamily); + } + // Initialize the metadata this.info = { Producer: 'PDFKit', diff --git a/lib/emoji/segmenter.js b/lib/emoji/segmenter.js new file mode 100644 index 000000000..fd252219f --- /dev/null +++ b/lib/emoji/segmenter.js @@ -0,0 +1,243 @@ +/** + * Emoji Segmenter โ€” splits text into plain-text vs emoji runs. + * + * Uses Unicode ranges as a heuristic to detect emoji code points, + * then groups consecutive emoji (including ZWJ sequences, variation + * selectors, skin-tone modifiers, and regional indicators) into + * single segments. + */ + +// Zero-Width Joiner +const ZWJ = 0x200d; + +// Variation Selector 16 (emoji presentation) +const VS16 = 0xfe0f; + +// Variation Selector 15 (text presentation) +const VS15 = 0xfe0e; + +// Combining Enclosing Keycap +const KEYCAP = 0x20e3; + +/** + * Check if a code point is in an emoji-related Unicode range. + * This is a heuristic โ€” it catches the vast majority of emoji + * but may include some non-emoji symbols. The font's glyph + * lookup is the final authority at render time. + */ +function isEmojiCodePoint(cp) { + return ( + // Miscellaneous Symbols and Dingbats + (cp >= 0x2600 && cp <= 0x27bf) || + // Supplemental Arrows / Misc Symbols + (cp >= 0x2b50 && cp <= 0x2b55) || + // CJK Symbols (wavy dash, etc.) + cp === 0x3030 || + cp === 0x303d || + // Enclosed CJK Letters + cp === 0x3297 || + cp === 0x3299 || + // Enclosed Alphanumeric Supplement (circled letters, etc.) + (cp >= 0x1f100 && cp <= 0x1f1ff) || + // Regional Indicator Symbols + (cp >= 0x1f1e6 && cp <= 0x1f1ff) || + // Miscellaneous Symbols and Pictographs + (cp >= 0x1f300 && cp <= 0x1f5ff) || + // Emoticons + (cp >= 0x1f600 && cp <= 0x1f64f) || + // Transport and Map Symbols + (cp >= 0x1f680 && cp <= 0x1f6ff) || + // Supplemental Symbols and Pictographs + (cp >= 0x1f900 && cp <= 0x1f9ff) || + // Symbols and Pictographs Extended-A + (cp >= 0x1fa00 && cp <= 0x1fa6f) || + // Symbols and Pictographs Extended-B + (cp >= 0x1fa70 && cp <= 0x1faff) || + // Miscellaneous Symbols (copyright, registered, etc.) + cp === 0x00a9 || + cp === 0x00ae || + // General Punctuation (trade mark, etc.) + cp === 0x2122 || + // Arrows + (cp >= 0x2190 && cp <= 0x21aa) || + // Misc Technical (phone, hourglass, etc.) + (cp >= 0x2300 && cp <= 0x23ff) || + // Enclosed Alphanumerics + (cp >= 0x24c2 && cp <= 0x24c2) || + // Geometric Shapes + (cp >= 0x25aa && cp <= 0x25fe) || + // Playing cards, mahjong + (cp >= 0x1f004 && cp <= 0x1f0cf) || + // Skin tone modifiers (Fitzpatrick) + (cp >= 0x1f3fb && cp <= 0x1f3ff) || + // Tags block (used in flag sequences like England, Scotland) + (cp >= 0xe0020 && cp <= 0xe007f) + ); +} + +/** + * Check if a code point is a modifier/joiner that extends an emoji sequence. + */ +function isEmojiModifier(cp) { + return ( + cp === ZWJ || + cp === VS16 || + cp === KEYCAP || + // Skin tone modifiers + (cp >= 0x1f3fb && cp <= 0x1f3ff) || + // Tags block (flag sub-sequences) + (cp >= 0xe0020 && cp <= 0xe007f) + ); +} + +/** + * Check if a code point is a keycap base character (0-9, #, *). + * These become emoji when followed by VS16 + Combining Enclosing Keycap. + */ +function isKeycapBase(cp) { + return (cp >= 0x30 && cp <= 0x39) || cp === 0x23 || cp === 0x2a; +} + +/** + * Check if a code point is a Regional Indicator Symbol letter. + */ +function isRegionalIndicator(cp) { + return cp >= 0x1f1e6 && cp <= 0x1f1ff; +} + +/** + * Extract code points from a string, handling UTF-16 surrogate pairs. + * Returns an array of { cp: number, index: number, length: number } + * where index is the position in the original string and length is the + * number of UTF-16 code units consumed. + */ +function codePoints(str) { + const result = []; + for (let i = 0; i < str.length; ) { + const code = str.codePointAt(i); + const len = code > 0xffff ? 2 : 1; + result.push({ cp: code, index: i, length: len }); + i += len; + } + return result; +} + +/** + * Segment a text string into runs of plain text and emoji. + * + * @param {string} text - The input text + * @returns {Array<{type: 'text'|'emoji', text: string}>} Segments in order + */ +function segmentEmojiText(text) { + if (!text) return []; + + const cps = codePoints(text); + const segments = []; + let currentType = null; + let currentStart = 0; + let i = 0; + + while (i < cps.length) { + const { cp } = cps[i]; + + // Keycap sequences: digit/# /* + VS16 + KEYCAP (U+20E3) + // The base character (0-9, #, *) is NOT in the emoji range, + // so we must detect the full sequence by lookahead. + if ( + isKeycapBase(cp) && + i + 1 < cps.length && + (cps[i + 1].cp === VS16 || cps[i + 1].cp === KEYCAP) + ) { + // Looks like a keycap sequence โ€” treat as emoji + if (currentType === 'text') { + const end = cps[i].index; + if (end > currentStart) { + segments.push({ type: 'text', text: text.slice(currentStart, end) }); + } + currentStart = cps[i].index; + } + currentType = 'emoji'; + i++; // consume base character + // Consume VS16 if present + if (i < cps.length && cps[i].cp === VS16) { + i++; + } + // Consume KEYCAP if present + if (i < cps.length && cps[i].cp === KEYCAP) { + i++; + } + } else if (isEmojiCodePoint(cp)) { + // Start or continue an emoji segment + if (currentType === 'text') { + // Flush text segment + const end = cps[i].index; + if (end > currentStart) { + segments.push({ type: 'text', text: text.slice(currentStart, end) }); + } + currentStart = cps[i].index; + } + currentType = 'emoji'; + + // Consume the full emoji sequence + i++; + + // Handle regional indicator pairs (flags) + if (isRegionalIndicator(cp) && i < cps.length && isRegionalIndicator(cps[i].cp)) { + i++; // consume second regional indicator + } + + // Consume trailing modifiers, VS16, ZWJ + next emoji + while (i < cps.length) { + const next = cps[i].cp; + if (isEmojiModifier(next) || next === VS16) { + i++; + } else if (next === ZWJ && i + 1 < cps.length) { + // ZWJ sequence: consume ZWJ + following emoji + i++; // consume ZWJ + if (i < cps.length && (isEmojiCodePoint(cps[i].cp) || cps[i].cp === VS16)) { + i++; // consume next emoji + // Continue consuming modifiers after ZWJ target + } else { + break; + } + } else { + break; + } + } + } else if (cp === VS15) { + // Text presentation selector โ€” force previous emoji to text if it was a single cp + // For simplicity, just treat VS15 as part of the current run + if (currentType !== 'emoji') { + if (currentType !== 'text') { + currentType = 'text'; + currentStart = cps[i].index; + } + } + i++; + } else { + // Plain text code point + if (currentType === 'emoji') { + // Flush emoji segment + const end = cps[i].index; + if (end > currentStart) { + segments.push({ type: 'emoji', text: text.slice(currentStart, end) }); + } + currentStart = cps[i].index; + } + if (currentType !== 'text') { + currentType = 'text'; + currentStart = cps[i].index; + } + i++; + } + } + + // Flush final segment + if (currentType && currentStart < text.length) { + segments.push({ type: currentType, text: text.slice(currentStart) }); + } + + return segments; +} + +export { segmentEmojiText, isEmojiCodePoint, codePoints }; diff --git a/lib/font_factory.js b/lib/font_factory.js index e9c0a6089..9313d732d 100644 --- a/lib/font_factory.js +++ b/lib/font_factory.js @@ -25,6 +25,53 @@ class PDFFontFactory { return new EmbeddedFont(document, font, id); } + + /** + * Open an emoji font (SBIX/COLR) for bitmap or vector emoji rendering. + * Returns the raw fontkit font object โ€” not an EmbeddedFont, since emoji + * glyphs are rendered as image XObjects rather than CIDFont text. + * + * @param {string|Uint8Array|ArrayBuffer} src - Path or buffer + * @param {string} [family] - Font name within a TTC collection + * @returns {object} fontkit font object + */ + static openEmoji(src, family) { + if (typeof src === 'string') { + src = fs.readFileSync(src); + } + let font; + + if (src instanceof Uint8Array) { + font = fontkit.create(src); + } else if (src instanceof ArrayBuffer) { + font = fontkit.create(new Uint8Array(src)); + } + + if (font == null) { + throw new Error('Not a supported emoji font format.'); + } + + // If fontkit returned a TrueTypeCollection, find the right font + if (typeof font.fonts !== 'undefined' && Array.isArray(font.fonts)) { + if (!family) { + font = font.fonts[0]; + } else { + // Match by postscriptName or familyName + const match = font.fonts.find( + (f) => f.postscriptName === family || f.familyName === family, + ); + font = match || font.fonts[0]; + } + } + + if (font == null) { + throw new Error( + `Could not find font "${family}" in the emoji font collection.`, + ); + } + + return font; + } } export default PDFFontFactory; diff --git a/lib/mixins/fonts.js b/lib/mixins/fonts.js index 9b8d915d0..0a7cb058a 100644 --- a/lib/mixins/fonts.js +++ b/lib/mixins/fonts.js @@ -1,5 +1,6 @@ import PDFFontFactory from '../font_factory'; import { CM_TO_IN, IN_TO_PT, MM_TO_CM, PC_TO_PT, PX_TO_IN } from '../utils'; +import { segmentEmojiText } from '../emoji/segmenter'; const isEqualFont = (font1, font2) => { // compare font checksum @@ -44,6 +45,10 @@ export default { this._registeredFonts = {}; + // Emoji font state + this._emojiFont = null; // fontkit font object for emoji + this._emojiImageCache = new Map(); // glyphId+ppem โ†’ PDFImage object + // Set the default font if (defaultFont) { this.font(defaultFont, defaultFontFamily); @@ -124,6 +129,65 @@ export default { return this; }, + /** + * Register an emoji font for color emoji rendering. + * The font is loaded via fontkit and stored on the document. + * Emoji glyphs from this font are rendered as image XObjects (SBIX) + * rather than embedded CIDFont text. + * + * @param {string|Uint8Array|ArrayBuffer} src - Path or buffer of the emoji font + * @param {string} [family] - Font name within a TTC collection (e.g. 'AppleColorEmoji') + * @returns {this} + */ + registerEmojiFont(src, family) { + this._emojiFont = PDFFontFactory.openEmoji(src, family); + return this; + }, + + /** + * Segment text into plain-text and emoji runs. + * Returns null if no emoji font is registered (fast path). + * + * @param {string} text + * @returns {Array<{type: 'text'|'emoji', text: string}>|null} + */ + _segmentEmojiText(text) { + if (!this._emojiFont) return null; + const segments = segmentEmojiText(text); + // If there's only one text segment covering the whole string, no emoji detected + if ( + segments.length === 1 && + segments[0].type === 'text' && + segments[0].text === text + ) { + return null; + } + // If there are any emoji segments, return the segmentation + if (segments.some((s) => s.type === 'emoji')) { + return segments; + } + return null; + }, + + /** + * Calculate the width of an emoji string using the emoji font metrics. + * Uses font shaping (layout) to get the advance width of the full sequence, + * which handles ZWJ sequences, regional indicators, etc. + * + * @param {string} text - The emoji text (a single emoji sequence) + * @param {number} fontSize - The font size in points + * @returns {number} Width in points + */ + _emojiWidthOfString(text, fontSize) { + if (!this._emojiFont) return 0; + const run = this._emojiFont.layout(text); + let totalAdvance = 0; + for (let i = 0; i < run.glyphs.length; i++) { + totalAdvance += run.positions[i].xAdvance; + } + return (totalAdvance / this._emojiFont.unitsPerEm) * fontSize; + }, + /** * Convert a {@link Size} into a point measurement * diff --git a/lib/mixins/text.js b/lib/mixins/text.js index 829065319..b159277b4 100644 --- a/lib/mixins/text.js +++ b/lib/mixins/text.js @@ -1,5 +1,6 @@ import LineWrapper from '../line_wrapper'; import PDFObject from '../object'; +import PDFImage from '../image'; import { cosine, sine } from '../utils'; const { number } = PDFObject; @@ -112,6 +113,23 @@ export default { widthOfString(string, options = {}) { const horizontalScaling = options.horizontalScaling || 100; + + // If we have an emoji font, check for emoji segments + const segments = this._segmentEmojiText(string); + if (segments) { + let totalWidth = 0; + for (const seg of segments) { + if (seg.type === 'emoji') { + totalWidth += this._emojiWidthOfString(seg.text, this._fontSize); + } else { + totalWidth += + this._font.widthOfString(seg.text, this._fontSize, options.features) + + (options.characterSpacing || 0) * (seg.text.length - 1); + } + } + return (totalWidth * horizontalScaling) / 100; + } + return ( ((this._font.widthOfString(string, this._fontSize, options.features) + (options.characterSpacing || 0) * (string.length - 1)) * @@ -440,13 +458,51 @@ export default { }, _line(text, options = {}, wrapper) { - this._fragment(text, this.x, this.y, options); + const segments = this._segmentEmojiText(text); - if (wrapper) { - const lineGap = options.lineGap || this._lineGap || 0; - this.y += this.currentLineHeight(true) + lineGap; + if (segments) { + // Mixed text/emoji line โ€” render each segment sequentially + let curX = this.x; + const curY = this.y; + + for (const seg of segments) { + if (seg.type === 'emoji') { + this._emojiFragment(seg.text, curX, curY, options); + curX += this._emojiWidthOfString(seg.text, this._fontSize); + } else { + // Build sub-options with correct textWidth/wordCount for this sub-fragment + const subOpts = Object.assign({}, options, { + textWidth: this._font.widthOfString( + seg.text, + this._fontSize, + options.features, + ), + wordCount: seg.text.trim().split(/\s+/).length, + // Disable alignment for sub-fragments โ€” alignment was already + // applied at the line level by the LineWrapper + width: undefined, + }); + this._fragment(seg.text, curX, curY, subOpts); + curX += this.widthOfString(seg.text, options); + } + } + + if (wrapper) { + const lineGap = options.lineGap || this._lineGap || 0; + this.y += this.currentLineHeight(true) + lineGap; + } else { + this.x = curX; + } } else { - this.x += this.widthOfString(text, options); + // No emoji โ€” original path + this._fragment(text, this.x, this.y, options); + + if (wrapper) { + const lineGap = options.lineGap || this._lineGap || 0; + this.y += this.currentLineHeight(true) + lineGap; + } else { + this.x += this.widthOfString(text, options); + } } }, @@ -742,4 +798,138 @@ export default { // restore flipped coordinate system this.restore(); }, + + /** + * Render an emoji sequence as an SBIX bitmap image XObject. + * Uses the emoji font's shaping to get the correct glyph for + * ZWJ sequences, then extracts the PNG bitmap and embeds it. + * + * @param {string} text - The emoji text (single emoji sequence) + * @param {number} x - X position + * @param {number} y - Y position + */ + _emojiFragment(text, x, y) { + if (!this._emojiFont) return; + + const font = this._emojiFont; + const fontSize = this._fontSize; + + // Use font shaping to resolve ZWJ sequences, flags, etc. + const run = font.layout(text); + + let curX = x; + for (let i = 0; i < run.glyphs.length; i++) { + const glyph = run.glyphs[i]; + const position = run.positions[i]; + const advancePt = (position.xAdvance / font.unitsPerEm) * fontSize; + + // Try to get the SBIX bitmap image + const img = this._getEmojiImage(glyph, fontSize); + if (img) { + // Calculate image size: scale to match the font size + // Use fontSize as the height, maintain aspect ratio + const imgW = advancePt; + const imgH = fontSize; + + // Position: baseline-aligned within the text line + // y is the top of the text line in PDFKit coordinates + const imgX = curX + (position.xOffset / font.unitsPerEm) * fontSize; + const imgY = y; + + // Use the low-level image rendering (similar to images mixin) + this._placeEmojiImage(img, imgX, imgY, imgW, imgH); + } + // If no image (e.g. unsupported sequence), silently skip + + curX += advancePt; + } + }, + + /** + * Get or create a cached PDFImage for an emoji glyph at a given size. + * + * @param {object} glyph - fontkit glyph object (SBIXGlyph) + * @param {number} fontSize - Target font size in points + * @returns {object|null} PDFImage object or null if no bitmap available + */ + _getEmojiImage(glyph, fontSize) { + // Choose the best SBIX ppem size for the requested font size + // Available sizes for Apple Color Emoji: [20, 26, 32, 40, 48, 52, 64, 96, 160] + // Convert fontSize (points) to approximate ppem + const ppem = Math.round(fontSize); + + const cacheKey = `${glyph.id}:${ppem}`; + if (this._emojiImageCache.has(cacheKey)) { + return this._emojiImageCache.get(cacheKey); + } + + // Try to get the bitmap image from the glyph + let imgData; + try { + imgData = glyph.getImageForSize(ppem); + } catch (e) { + console.warn(`getImageForSize error: ${e.message}`); + // Not an SBIX glyph or no image available + this._emojiImageCache.set(cacheKey, null); + return null; + } + + if (!imgData?.data?.length) { + this._emojiImageCache.set(cacheKey, null); + return null; + } + + // The imgType has a trailing space (e.g. "png ") โ€” trim it + const imgType = (imgData.type || '').trim().toLowerCase(); + if (imgType !== 'png' && imgType !== 'jpg' && imgType !== 'jpeg') { + this._emojiImageCache.set(cacheKey, null); + return null; + } + + // Create a PDFImage from the raw bitmap buffer + const label = `EI${++this._imageCount}`; + let image; + try { + image = PDFImage.open(imgData.data, label); + } catch (e) { + console.warn(`could not open image for emoji: ${e.message}`); + this._emojiImageCache.set(cacheKey, null); + return null; + } + + this._emojiImageCache.set(cacheKey, image); + return image; + }, + + /** + * Place an emoji image XObject at the specified position and size. + * Handles the coordinate system flip that PDFKit applies. + * + * @param {object} image - PDFImage object + * @param {number} x - X position (PDFKit coordinates, top-left origin) + * @param {number} y - Y position (PDFKit coordinates, top-left origin) + * @param {number} w - Width in points + * @param {number} h - Height in points + */ + _placeEmojiImage(image, x, y, w, h) { + // Ensure the image is embedded + if (!image.obj) { + image.embed(this); + } + + // Register the XObject on the current page + if (this.page.xobjects[image.label] == null) { + this.page.xobjects[image.label] = image.obj; + } + + // PDFKit has a flipped coordinate system (origin top-left). + // The default page transform is (1, 0, 0, -1, 0, pageHeight). + // To place an image, we need to flip it back: + // y_pdf = pageHeight - y_pdkit + // And images are drawn bottom-up, so we add h to y. + this.save(); + this.transform(w, 0, 0, -h, x, y + h); + this.addContent(`/${image.label} Do`); + this.restore(); + }, }; diff --git a/scripts/probe-emoji-fonts.mjs b/scripts/probe-emoji-fonts.mjs new file mode 100644 index 000000000..ab0f2288e --- /dev/null +++ b/scripts/probe-emoji-fonts.mjs @@ -0,0 +1,131 @@ +/** + * Phase 1 โ€“ fontkit color-emoji API probe + * + * Usage: node scripts/probe-emoji-fonts.mjs + * + * Tests SBIX (Apple Color Emoji) and COLR/CPAL APIs exposed by fontkit 2.x. + */ + +import * as fontkit from 'fontkit'; +import fs from 'fs'; + +const TEST_EMOJI = [ + { label: 'grinning face', cp: 0x1F600 }, + { label: 'thumbs up', cp: 0x1F44D }, + { label: 'red heart', cp: 0x2764 }, + { label: 'flag: US (ZWJ)', cp: 0x1F1FA }, // first cp of ๐Ÿ‡บ๐Ÿ‡ธ +]; + +function probeFont(path, family) { + console.log(`\n${'='.repeat(60)}`); + console.log(`Font: ${path}${family ? ` [${family}]` : ''}`); + console.log('='.repeat(60)); + + let font; + try { + const buf = fs.readFileSync(path); + const top = fontkit.create(buf); + + // For TTC collections, enumerate fonts then pick the right one + if (top.type === 'TTC') { + const psNames = top.fonts.map(f => f.postscriptName); + console.log(` TTC fonts (postscriptName): ${JSON.stringify(psNames)}`); + // Pick by exact family match or first that includes the family string + const match = family + ? (psNames.find(n => n === family) || + psNames.find(n => n && n.toLowerCase().includes(family.toLowerCase().replace(/\s/g, '')))) + : psNames[0]; + if (!match) { + console.error(` ERROR: no font matching "${family}" in TTC`); + return; + } + console.log(` Using postscriptName: ${match}`); + font = top.getFont(match); + } else { + font = top; + } + } catch (e) { + console.error(' ERROR loading font:', e.message); + return; + } + + console.log(` postscriptName : ${font.postscriptName}`); + console.log(` unitsPerEm : ${font.unitsPerEm}`); + console.log(` ascent : ${font.ascent}`); + console.log(` descent : ${font.descent}`); + + // Detect color table presence + const dir = font.directory.tables; + console.log(`\n Table presence:`); + console.log(` sbix : ${!!dir.sbix}`); + console.log(` COLR : ${!!dir.COLR}`); + console.log(` CPAL : ${!!dir.CPAL}`); + console.log(` CBDT : ${!!dir.CBDT}`); + console.log(` CBLC : ${!!dir.CBLC}`); + console.log(` SVG : ${!!(dir['SVG '] || dir.SVG)}`); + + console.log(`\n Per-emoji glyph audit:`); + for (const { label, cp } of TEST_EMOJI) { + const has = font.hasGlyphForCodePoint(cp); + process.stdout.write(` U+${cp.toString(16).toUpperCase().padStart(4,'0')} ${label}: has=${has}`); + if (!has) { console.log(); continue; } + + const glyph = font.glyphForCodePoint(cp); + process.stdout.write(` type=${glyph.type} id=${glyph.id} advW=${glyph.advanceWidth}`); + + if (glyph.type === 'SBIX') { + const img = glyph.getImageForSize(64); + if (img) { + process.stdout.write(` imgType=${JSON.stringify(img.type)} dataLen=${img.data.length} origin=(${img.originX},${img.originY})`); + } else { + process.stdout.write(' img=null'); + } + } + + if (glyph.type === 'COLR') { + const layers = glyph.layers; + process.stdout.write(` layers=${layers.length}`); + for (const { glyph: lg, color } of layers.slice(0, 2)) { + const cmds = lg.path.commands.length; + process.stdout.write(` [pathCmds=${cmds} rgba=(${color.red},${color.green},${color.blue},${color.alpha})]`); + } + } + + console.log(); + } + + // Probe SBIX sizes if table exists + if (dir.sbix) { + try { + const sbix = font.sbix; + const ppems = sbix.imageTables.map(t => t.ppem).join(', '); + console.log(`\n SBIX available ppem sizes: [${ppems}]`); + } catch(e) { + console.log(`\n SBIX read error: ${e.message}`); + } + } + + // Probe COLR version if table exists + if (dir.COLR) { + try { + const colr = font.COLR; + console.log(`\n COLR version: ${colr.version} baseGlyphRecords: ${colr.numBaseGlyphRecords}`); + } catch(e) { + console.log(`\n COLR read error: ${e.message}`); + } + } +} + +// โ”€โ”€ Probe fonts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + +probeFont('/System/Library/Fonts/Apple Color Emoji.ttc', 'Apple Color Emoji'); + +// If a Noto Color Emoji is present locally or was downloaded +const notoPath = 'tests/fonts/NotoColorEmoji.ttf'; +if (fs.existsSync(notoPath)) { + probeFont(notoPath); +} else { + console.log(`\nNoto Color Emoji not found at ${notoPath} โ€“ skipping CBDT probe.`); + console.log('Download from: https://github.com/googlefonts/noto-emoji/releases'); +} + diff --git a/tests/unit/emoji.spec.js b/tests/unit/emoji.spec.js new file mode 100644 index 000000000..183174c4a --- /dev/null +++ b/tests/unit/emoji.spec.js @@ -0,0 +1,169 @@ +import { + segmentEmojiText, + isEmojiCodePoint, + codePoints, +} from '../../lib/emoji/segmenter'; + +describe('Emoji Segmenter', () => { + describe('segmentEmojiText', () => { + test('empty string returns empty array', () => { + expect(segmentEmojiText('')).toEqual([]); + }); + + test('null/undefined returns empty array', () => { + expect(segmentEmojiText(null)).toEqual([]); + expect(segmentEmojiText(undefined)).toEqual([]); + }); + + test('plain text only returns single text segment', () => { + const result = segmentEmojiText('Hello World'); + expect(result).toEqual([{ type: 'text', text: 'Hello World' }]); + }); + + test('text with no emoji returns single text segment matching input', () => { + const input = 'The quick brown fox jumps over the lazy dog.'; + const result = segmentEmojiText(input); + expect(result).toHaveLength(1); + expect(result[0]).toEqual({ type: 'text', text: input }); + }); + + test('emoji only returns single emoji segment', () => { + const result = segmentEmojiText('๐Ÿ˜€'); + expect(result).toEqual([{ type: 'emoji', text: '๐Ÿ˜€' }]); + }); + + test('mixed text and emoji returns alternating segments', () => { + const result = segmentEmojiText('Hello ๐Ÿ˜€ World'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ type: 'text', text: 'Hello ' }); + expect(result[1]).toEqual({ type: 'emoji', text: '๐Ÿ˜€' }); + expect(result[2]).toEqual({ type: 'text', text: ' World' }); + }); + + test('ZWJ sequences grouped as single emoji segment', () => { + // Family: man, woman, girl, boy (ZWJ sequence) + const family = '๐Ÿ‘จ\u200D๐Ÿ‘ฉ\u200D๐Ÿ‘ง\u200D๐Ÿ‘ฆ'; + const result = segmentEmojiText(family); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(family); + }); + + test('flag emoji (regional indicator pairs) as single segment', () => { + // US flag: regional indicator U + regional indicator S + const usFlag = '๐Ÿ‡บ๐Ÿ‡ธ'; + const result = segmentEmojiText(usFlag); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(usFlag); + }); + + test('skin tone modifiers grouped with base emoji', () => { + // Waving hand + medium skin tone + const wave = '๐Ÿ‘‹๐Ÿฝ'; + const result = segmentEmojiText(wave); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(wave); + }); + + test('keycap sequences (digit + VS16 + keycap) as emoji', () => { + // 1๏ธโƒฃ = "1" + VS16 + Combining Enclosing Keycap + const keycap = '1\uFE0F\u20E3'; + const result = segmentEmojiText(keycap); + expect(result).toHaveLength(1); + expect(result[0].type).toBe('emoji'); + expect(result[0].text).toBe(keycap); + }); + + test('variation selector 16 (heart with VS16)', () => { + // โค๏ธ = โค (U+2764) + VS16 (U+FE0F) + const heart = '\u2764\uFE0F'; + const result = segmentEmojiText('I ' + heart + ' PDFKit'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ type: 'text', text: 'I ' }); + expect(result[1].type).toBe('emoji'); + expect(result[1].text).toBe(heart); + expect(result[2]).toEqual({ type: 'text', text: ' PDFKit' }); + }); + + test('multiple emoji in a row form consecutive emoji segments', () => { + const result = segmentEmojiText('๐ŸŽ๐ŸŠ๐Ÿ‹'); + // All consecutive emoji should be in emoji segment(s) + for (const seg of result) { + expect(seg.type).toBe('emoji'); + } + const combined = result.map((s) => s.text).join(''); + expect(combined).toBe('๐ŸŽ๐ŸŠ๐Ÿ‹'); + }); + + test('multiple separate emoji with text between them', () => { + const result = segmentEmojiText('Hello ๐Ÿ˜€ World ๐ŸŽ‰ Test ๐Ÿš€'); + expect(result.length).toBeGreaterThanOrEqual(5); + expect(result[0]).toEqual({ type: 'text', text: 'Hello ' }); + expect(result[1].type).toBe('emoji'); + expect(result[2].type).toBe('text'); + expect(result[3].type).toBe('emoji'); + expect(result[4].type).toBe('text'); + }); + }); + + describe('isEmojiCodePoint', () => { + test('returns true for common emoji code points', () => { + // ๐Ÿ˜€ = U+1F600 + expect(isEmojiCodePoint(0x1f600)).toBe(true); + // ๐ŸŽ‰ = U+1F389 + expect(isEmojiCodePoint(0x1f389)).toBe(true); + // ๐Ÿš€ = U+1F680 + expect(isEmojiCodePoint(0x1f680)).toBe(true); + // โค = U+2764 + expect(isEmojiCodePoint(0x2764)).toBe(true); + // โ˜€ = U+2600 + expect(isEmojiCodePoint(0x2600)).toBe(true); + }); + + test('returns false for ASCII letters and digits', () => { + // 'A' = 0x41 + expect(isEmojiCodePoint(0x41)).toBe(false); + // 'z' = 0x7A + expect(isEmojiCodePoint(0x7a)).toBe(false); + // '0' = 0x30 + expect(isEmojiCodePoint(0x30)).toBe(false); + // '9' = 0x39 + expect(isEmojiCodePoint(0x39)).toBe(false); + // space = 0x20 + expect(isEmojiCodePoint(0x20)).toBe(false); + }); + }); + + describe('codePoints', () => { + test('correctly handles surrogate pairs (emoji > U+FFFF)', () => { + // ๐Ÿ˜€ = U+1F600, encoded as surrogate pair in UTF-16 + const result = codePoints('๐Ÿ˜€'); + expect(result).toHaveLength(1); + expect(result[0].cp).toBe(0x1f600); + expect(result[0].index).toBe(0); + expect(result[0].length).toBe(2); // surrogate pair = 2 UTF-16 code units + }); + + test('handles basic ASCII', () => { + const result = codePoints('ABC'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ cp: 0x41, index: 0, length: 1 }); + expect(result[1]).toEqual({ cp: 0x42, index: 1, length: 1 }); + expect(result[2]).toEqual({ cp: 0x43, index: 2, length: 1 }); + }); + + test('handles mixed ASCII and emoji', () => { + const result = codePoints('A๐Ÿ˜€B'); + expect(result).toHaveLength(3); + expect(result[0]).toEqual({ cp: 0x41, index: 0, length: 1 }); + expect(result[1]).toEqual({ cp: 0x1f600, index: 1, length: 2 }); + expect(result[2]).toEqual({ cp: 0x42, index: 3, length: 1 }); + }); + + test('empty string returns empty array', () => { + expect(codePoints('')).toEqual([]); + }); + }); +}); \ No newline at end of file diff --git a/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-1-snap.png b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-1-snap.png new file mode 100644 index 000000000..c3f49c182 Binary files /dev/null and b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-1-snap.png differ diff --git a/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-2-snap.png b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-2-snap.png new file mode 100644 index 000000000..cec323b36 Binary files /dev/null and b/tests/visual/__image_snapshots__/emoji-spec-js-emoji-simple-emoji-mixed-with-text-2-snap.png differ diff --git a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-default-helvetica-1-snap.png b/tests/visual/__image_snapshots__/fonts-spec-js-fonts-default-helvetica-1-snap.png deleted file mode 100644 index 1c4f75324..000000000 Binary files a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-default-helvetica-1-snap.png and /dev/null differ diff --git a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-helvetica-bold-1-snap.png b/tests/visual/__image_snapshots__/fonts-spec-js-fonts-helvetica-bold-1-snap.png deleted file mode 100644 index ea3f81936..000000000 Binary files a/tests/visual/__image_snapshots__/fonts-spec-js-fonts-helvetica-bold-1-snap.png and /dev/null differ diff --git a/tests/visual/emoji.spec.js b/tests/visual/emoji.spec.js new file mode 100644 index 000000000..881a07e73 --- /dev/null +++ b/tests/visual/emoji.spec.js @@ -0,0 +1,118 @@ +import { runDocTest } from './helpers'; +import fs from 'fs'; + +const EMOJI_FONT = '/System/Library/Fonts/Apple Color Emoji.ttc'; +const emojiAvailable = fs.existsSync(EMOJI_FONT); +const emojiDescribe = emojiAvailable ? describe : describe.skip; + +emojiDescribe('emoji', function () { + const docOptions = { + compress: false, + emojiFont: EMOJI_FONT, + emojiFontFamily: 'AppleColorEmoji', + }; + + test('simple emoji mixed with text', function () { + return runDocTest(docOptions, function (doc) { + const font = 'tests/fonts/Roboto-Regular.ttf'; + doc.font(font); + let y = 30; + const gap = 6; + + // --- Basic emoji mixed with text --- + doc.fontSize(18); + doc.text('Hello ๐Ÿ˜€ World ๐ŸŽ‰ Test ๐Ÿš€', 50, y); + y += 24 + gap; + + // --- Emoji-only line (no surrounding text) --- + doc.text('๐Ÿ˜€๐ŸŽ‰๐Ÿš€๐ŸŒˆโญ๐Ÿ”ฅ๐Ÿ’ฏ', 50, y); + y += 24 + gap; + + // --- Text-only line (no emoji) --- + doc.text('No emoji here, just plain text.', 50, y); + y += 24 + gap; + + // --- ZWJ family sequences --- + doc.fontSize(24); + doc.text('Family: ๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ Couple: ๐Ÿ‘ฉโ€โค๏ธโ€๐Ÿ‘จ', 50, y); + y += 30 + gap; + + // --- Skin tone modifiers --- + doc.fontSize(18); + doc.text('Thumbs: ๐Ÿ‘ ๐Ÿ‘๐Ÿป ๐Ÿ‘๐Ÿผ ๐Ÿ‘๐Ÿฝ ๐Ÿ‘๐Ÿพ ๐Ÿ‘๐Ÿฟ', 50, y); + y += 24 + gap; + + // --- Flag sequences (regional indicators) --- + doc.text('Flags: ๐Ÿ‡บ๐Ÿ‡ธ ๐Ÿ‡ฌ๐Ÿ‡ง ๐Ÿ‡ฏ๐Ÿ‡ต ๐Ÿ‡ซ๐Ÿ‡ท ๐Ÿ‡ฉ๐Ÿ‡ช ๐Ÿ‡ง๐Ÿ‡ท', 50, y); + y += 24 + gap; + + // --- Keycap sequences --- + doc.text('Keycaps: 1๏ธโƒฃ 2๏ธโƒฃ 3๏ธโƒฃ #๏ธโƒฃ *๏ธโƒฃ', 50, y); + y += 24 + gap; + + // --- Emoji at start and end of line --- + doc.text('๐Ÿ”ฅ Fire at start', 50, y); + y += 24 + gap; + doc.text('Fire at end ๐Ÿ”ฅ', 50, y); + y += 24 + gap; + + // --- Multiple emoji adjacent with no spaces --- + doc.text('No spaces:๐ŸŽ๐ŸŠ๐Ÿ‹๐Ÿ‡๐Ÿ‰๐Ÿ“', 50, y); + y += 24 + gap; + + // --- Different font sizes --- + doc.fontSize(10); + doc.text('Small 10pt: Hello ๐ŸŒ World', 50, y); + y += 16 + gap; + + doc.fontSize(36); + doc.text('Large 36pt: ๐ŸŽจ๐Ÿ–Œ๏ธ', 50, y); + y += 42 + gap; + + // --- Emoji with variation selector (text vs emoji presentation) --- + doc.fontSize(18); + doc.text('Heart: โค๏ธ vs โค๏ธŽ Star: โญ vs โœฉ', 50, y); + y += 24 + gap; + + // --- Mixed scripts with emoji --- + doc.text('ๆ—ฅๆœฌ่ชžใƒ†ใ‚นใƒˆ ๐Ÿ—พ ไธญๆ–‡ๆต‹่ฏ• ๐Ÿ‰', 50, y); + y += 24 + gap; + + // --- Emoji in the middle of a long sentence --- + doc.fontSize(14); + doc.text( + 'The quick brown ๐ŸฆŠ jumps over the lazy ๐Ÿถ and runs through the ๐ŸŒฒ๐ŸŒฒ๐ŸŒฒ forest.', + 50, + y, + { width: 450 }, + ); + y += 40 + gap; + + // --- Multiple lines of emoji text --- + doc.fontSize(16); + doc.text('Line 1: Good morning โ˜€๏ธ', 50, y); + y += 22 + gap; + doc.text('Line 2: Good night ๐ŸŒ™', 50, y); + y += 22 + gap; + doc.text('Line 3: Weather ๐ŸŒง๏ธโ›ˆ๏ธ๐ŸŒค๏ธ', 50, y); + y += 22 + gap; + + // --- Animals and nature --- + doc.fontSize(20); + doc.text('๐Ÿฑ๐Ÿถ๐Ÿญ๐Ÿน๐Ÿฐ๐ŸฆŠ๐Ÿป๐Ÿผ๐Ÿจ๐Ÿฏ๐Ÿฆ๐Ÿฎ', 50, y); + y += 26 + gap; + + // --- Food emoji --- + doc.text('๐Ÿ•๐Ÿ”๐ŸŒฎ๐Ÿฃ๐Ÿœ๐Ÿฉ๐Ÿช๐ŸŽ‚๐Ÿฐ๐Ÿง', 50, y); + y += 26 + gap; + + // --- Sports and activities --- + doc.text('โšฝ๐Ÿ€๐Ÿˆโšพ๐ŸŽพ๐Ÿ๐Ÿ‰๐ŸŽฑ๐Ÿ“๐Ÿธ', 50, y); + y += 26 + gap; + + // --- Profession ZWJ sequences --- + doc.fontSize(24); + doc.text('๐Ÿ‘จโ€๐Ÿš€ ๐Ÿ‘ฉโ€๐Ÿ”ฌ ๐Ÿ‘จโ€๐Ÿณ ๐Ÿ‘ฉโ€๐ŸŽค', 50, y); + }); + }); +});