From f16c8038b65e84659eaa523be54419f88e721a2e Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Tue, 17 Mar 2026 16:06:38 -0600 Subject: [PATCH 1/5] Add failing unit test for RichTextData conversion --- packages/core-data/src/utils/test/crdt.ts | 48 ++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/packages/core-data/src/utils/test/crdt.ts b/packages/core-data/src/utils/test/crdt.ts index 13487638a9cedd..8a8de21fbe4b49 100644 --- a/packages/core-data/src/utils/test/crdt.ts +++ b/packages/core-data/src/utils/test/crdt.ts @@ -8,6 +8,30 @@ import { Y } from '@wordpress/sync'; */ import { describe, expect, it, jest, beforeEach } from '@jest/globals'; +/** + * Mock getBlockTypes so isRichTextAttribute can identify rich-text attrs. + */ +jest.mock( '@wordpress/blocks', () => { + const actual = jest.requireActual( '@wordpress/blocks' ) as Record< + string, + unknown + >; + return { + ...actual, + getBlockTypes: () => [ + { + name: 'core/paragraph', + attributes: { content: { type: 'rich-text' } }, + }, + ], + }; +} ); + +/** + * WordPress dependencies + */ +import { RichTextData } from '@wordpress/rich-text'; + /** * Internal dependencies */ @@ -515,6 +539,25 @@ describe( 'crdt', () => { expect( changes ).toHaveProperty( 'blocks' ); } ); + it( 'returns rich-text block attributes as RichTextData, not strings', () => { + // Simulate User A writing a paragraph block into the CRDT doc. + addBlockToDoc( map, 'block-1', 'Hello world' ); + + // Simulate User B reading the CRDT doc with no local blocks. + const editedRecord = { blocks: [] } as unknown as Post; + + const changes = getPostChangesFromCRDTDoc( + doc, + editedRecord, + defaultSyncedProperties + ); + + const block = ( changes.blocks as any[] )?.[ 0 ]; + expect( block ).toBeDefined(); + expect( block.attributes.content ).toBeInstanceOf( RichTextData ); + expect( block.attributes.content.text ).toBe( 'Hello world' ); + } ); + it( 'includes undefined blocks in changes', () => { map.set( 'blocks', undefined ); @@ -801,11 +844,13 @@ describe( 'crdt', () => { * @param map * @param clientId Block client ID. * @param content Initial text content. + * @param name Block name (default: 'core/paragraph'). */ function addBlockToDoc( map: YMapWrap< YPostRecord >, clientId: string, - content: string + content: string, + name = 'core/paragraph' ): Y.Text { let blocks = map.get( 'blocks' ); if ( ! ( blocks instanceof Y.Array ) ) { @@ -814,6 +859,7 @@ function addBlockToDoc( } const block = createYMap< YBlockRecord >(); + block.set( 'name', name ); block.set( 'clientId', clientId ); const attrs = new Y.Map(); const ytext = new Y.Text( content ); From af680cd00d3b29d629b295c0b2329078141861d1 Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Tue, 17 Mar 2026 17:08:03 -0600 Subject: [PATCH 2/5] Fix RichText attribute type via deserializeBlockAttributes() call in getPostChangesFromCRDTDoc() --- packages/core-data/src/utils/crdt-blocks.ts | 51 +++++++++++++++++++++ packages/core-data/src/utils/crdt.ts | 10 ++++ 2 files changed, 61 insertions(+) diff --git a/packages/core-data/src/utils/crdt-blocks.ts b/packages/core-data/src/utils/crdt-blocks.ts index c4815b0bc93426..c9b364443c0d24 100644 --- a/packages/core-data/src/utils/crdt-blocks.ts +++ b/packages/core-data/src/utils/crdt-blocks.ts @@ -122,6 +122,57 @@ function makeBlocksSerializable( blocks: Block[] ): Block[] { } ); } +/** + * Convert rich-text string attributes in a block back to RichTextData + * instances. This is the inverse of makeBlockAttributesSerializable and is + * needed when blocks are extracted from the CRDT document, where rich-text + * values are stored as Y.Text (which serializes to plain strings via + * toJSON()). Without this conversion, block edit components that rely on + * RichTextData methods (e.g. `.text`) will receive a raw string and + * malfunction. + * + * @param blockName The block type name, e.g. 'core/code'. + * @param attributes The plain-object attributes from CRDT (toJSON). + * @return Attributes with rich-text strings replaced by RichTextData. + */ +function deserializeBlockAttributeValues( + blockName: string, + attributes: BlockAttributes +): BlockAttributes { + const newAttributes = { ...attributes }; + + for ( const [ key, value ] of Object.entries( attributes ) ) { + if ( + isRichTextAttribute( blockName, key ) && + typeof value === 'string' + ) { + newAttributes[ key ] = RichTextData.fromHTMLString( value ); + } + } + + return newAttributes; +} + +/** + * Convert blocks from their CRDT-serialized form back to the runtime form + * expected by the block editor. This ensures that rich-text attributes are + * RichTextData instances rather than raw strings. + * + * @param blocks Blocks as extracted from the CRDT document via toJSON(). + * @return Blocks with rich-text attributes restored to RichTextData. + */ +export function deserializeBlockAttributes( blocks: Block[] ): Block[] { + return blocks.map( ( block: Block ) => { + const { name, innerBlocks, attributes, ...rest } = block; + return { + ...rest, + name, + attributes: deserializeBlockAttributeValues( name, attributes ), + innerBlocks: deserializeBlockAttributes( innerBlocks ?? [] ), + }; + } ); +} + /** * @param {any} gblock * @param {Y.Map} yblock diff --git a/packages/core-data/src/utils/crdt.ts b/packages/core-data/src/utils/crdt.ts index 6b674623c43862..59f0c9439385c8 100644 --- a/packages/core-data/src/utils/crdt.ts +++ b/packages/core-data/src/utils/crdt.ts @@ -20,6 +20,7 @@ import { */ import { BaseAwareness } from '../awareness/base-awareness'; import { + deserializeBlockAttributes, mergeCrdtBlocks, mergeRichTextUpdate, type Block, @@ -382,6 +383,15 @@ export function getPostChangesFromCRDTDoc( } ) ); + // Blocks extracted from the CRDT document have rich-text attributes as + // plain strings (from Y.Text.toJSON()). Convert them back to RichTextData + // so block edit components receive the same types as locally-created blocks. + if ( changes.blocks ) { + changes.blocks = deserializeBlockAttributes( + changes.blocks as Block[] + ); + } + // Meta changes must be merged with the edited record since not all meta // properties are synced. if ( 'object' === typeof changes.meta ) { From 9ea1add9cf990a9a1a8f0be676719874947f8cd3 Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Wed, 18 Mar 2026 09:56:20 -0600 Subject: [PATCH 3/5] Add test for nested RichText value --- packages/core-data/src/utils/test/crdt.ts | 62 +++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/packages/core-data/src/utils/test/crdt.ts b/packages/core-data/src/utils/test/crdt.ts index 8a8de21fbe4b49..d14af7ccc68d23 100644 --- a/packages/core-data/src/utils/test/crdt.ts +++ b/packages/core-data/src/utils/test/crdt.ts @@ -23,6 +23,25 @@ jest.mock( '@wordpress/blocks', () => { name: 'core/paragraph', attributes: { content: { type: 'rich-text' } }, }, + { + name: 'core/table', + attributes: { + hasFixedLayout: { type: 'boolean' }, + caption: { type: 'rich-text' }, + body: { + type: 'array', + query: { + cells: { + type: 'array', + query: { + content: { type: 'rich-text' }, + tag: { type: 'string' }, + }, + }, + }, + }, + }, + }, ], }; } ); @@ -558,6 +577,49 @@ describe( 'crdt', () => { expect( block.attributes.content.text ).toBe( 'Hello world' ); } ); + it( 'returns nested rich-text in array attributes as RichTextData', () => { + // Add a table block to the CRDT doc with nested cell content + // stored as plain strings. + let blocks = map.get( 'blocks' ); + if ( ! ( blocks instanceof Y.Array ) ) { + blocks = new Y.Array< YBlock >(); + map.set( 'blocks', blocks ); + } + + const tableBlock = createYMap< YBlockRecord >(); + tableBlock.set( 'name', 'core/table' ); + tableBlock.set( 'clientId', 'table-1' ); + const attrs = new Y.Map(); + attrs.set( 'body', [ + { + cells: [ + { content: 'Cell', tag: 'td' }, + { content: 'Plain', tag: 'td' }, + ], + }, + ] ); + tableBlock.set( 'attributes', attrs ); + tableBlock.set( 'innerBlocks', new Y.Array() ); + ( blocks as YBlocks ).push( [ tableBlock ] ); + + const editedRecord = { blocks: [] } as unknown as Post; + + const changes = getPostChangesFromCRDTDoc( + doc, + editedRecord, + defaultSyncedProperties + ); + + const block = ( changes.blocks as any[] )?.[ 0 ]; + expect( block ).toBeDefined(); + + const cell = block.attributes.body[ 0 ].cells[ 0 ]; + expect( cell.content ).toBeInstanceOf( RichTextData ); + expect( ( cell.content as RichTextData ).toHTMLString() ).toBe( + 'Cell' + ); + } ); + it( 'includes undefined blocks in changes', () => { map.set( 'blocks', undefined ); From 1f1ea8a405c2da3d2ebc3a1e41e7d5ee24fba36a Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Wed, 18 Mar 2026 12:37:41 -0600 Subject: [PATCH 4/5] Modify deserializeAttributeValue() to recurse arrays and object, using block schema for deeply nested strings --- packages/core-data/src/utils/crdt-blocks.ts | 57 ++++++++++++++++++--- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/packages/core-data/src/utils/crdt-blocks.ts b/packages/core-data/src/utils/crdt-blocks.ts index c9b364443c0d24..0e67be95522ca9 100644 --- a/packages/core-data/src/utils/crdt-blocks.ts +++ b/packages/core-data/src/utils/crdt-blocks.ts @@ -25,6 +25,7 @@ interface BlockAttributes { interface BlockAttributeType { role?: string; type?: string; + query?: Record< string, BlockAttributeType >; } interface BlockType { @@ -122,6 +123,49 @@ function makeBlocksSerializable( blocks: Block[] ): Block[] { } ); } +/** + * Recursively walk an attribute value and convert any strings that correspond + * to rich-text schema nodes into RichTextData instances. This is the inverse + * of serializeAttributeValue and handles nested structures like table cells. + * + * @param schema The attribute type definition for this value. + * @param value The attribute value from CRDT (toJSON). + * @return The value with rich-text strings replaced by RichTextData. + */ +function deserializeAttributeValue( + schema: BlockAttributeType | undefined, + value: unknown +): unknown { + if ( schema?.type === 'rich-text' && typeof value === 'string' ) { + return RichTextData.fromHTMLString( value ); + } + + // e.g. core/table `body`: [ { cells: [ { content: RichTextData } ] } ] + if ( Array.isArray( value ) ) { + return value.map( ( item ) => + deserializeAttributeValue( schema, item ) + ); + } + + // e.g. a single row inside core/table `body`: { cells: [ ... ] } + if ( value && typeof value === 'object' ) { + const result: Record< string, unknown > = {}; + + for ( const [ key, innerValue ] of Object.entries( + value as Record< string, unknown > + ) ) { + result[ key ] = deserializeAttributeValue( + schema?.query?.[ key ], + innerValue + ); + } + + return result; + } + + return value; +} + /** * Convert rich-text string attributes in a block back to RichTextData * instances. This is the inverse of makeBlockAttributesSerializable and is @@ -142,11 +186,10 @@ function deserializeBlockAttributeValues( const newAttributes = { ...attributes }; for ( const [ key, value ] of Object.entries( attributes ) ) { - if ( - isRichTextAttribute( blockName, key ) && - typeof value === 'string' - ) { - newAttributes[ key ] = RichTextData.fromHTMLString( value ); + const schema = getBlockAttributeType( blockName, key ); + + if ( schema ) { + newAttributes[ key ] = deserializeAttributeValue( schema, value ); } } @@ -532,8 +575,8 @@ function getBlockAttributeType( new Map< string, BlockAttributeType >( Object.entries( blockType.attributes ?? {} ).map( ( [ name, definition ] ) => { - const { role, type } = definition; - return [ name, { role, type } ]; + const { role, type, query } = definition; + return [ name, { role, type, query } ]; } ) ) From 02b1df4bcccc7a8496db0b885fd55b7f268ce99d Mon Sep 17 00:00:00 2001 From: Alec Geatches Date: Wed, 18 Mar 2026 13:30:27 -0600 Subject: [PATCH 5/5] Remove deserializeBlockAttributeValues to avoid 3 different deserialization functions --- packages/core-data/src/utils/crdt-blocks.ts | 84 ++++++++++++--------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/packages/core-data/src/utils/crdt-blocks.ts b/packages/core-data/src/utils/crdt-blocks.ts index 0e67be95522ca9..029a1388a75282 100644 --- a/packages/core-data/src/utils/crdt-blocks.ts +++ b/packages/core-data/src/utils/crdt-blocks.ts @@ -123,6 +123,36 @@ function makeBlocksSerializable( blocks: Block[] ): Block[] { } ); } +const RICH_TEXT_CACHE_MAX_SIZE = 500; +const richTextCache = new Map< string, RichTextData >(); + +/** + * Returns a RichTextData instance for the given HTML string, using a cache to + * avoid re-parsing identical strings. Repeated calls with the same string + * (e.g. unchanged blocks on each remote CRDT update) return the cached instance + * without re-running the HTML parser and DOM traversal. + * + * @param value The HTML string to parse. + * @return The RichTextData instance. + */ +function cachedFromHTMLString( value: string ): RichTextData { + const cached = richTextCache.get( value ); + + if ( cached ) { + return cached; + } + + const result = RichTextData.fromHTMLString( value ); + + if ( richTextCache.size >= RICH_TEXT_CACHE_MAX_SIZE ) { + // Evict the oldest entry (Map preserves insertion order). + richTextCache.delete( richTextCache.keys().next().value! ); + } + + richTextCache.set( value, result ); + return result; +} + /** * Recursively walk an attribute value and convert any strings that correspond * to rich-text schema nodes into RichTextData instances. This is the inverse @@ -137,7 +167,7 @@ function deserializeAttributeValue( value: unknown ): unknown { if ( schema?.type === 'rich-text' && typeof value === 'string' ) { - return RichTextData.fromHTMLString( value ); + return cachedFromHTMLString( value ); } // e.g. core/table `body`: [ { cells: [ { content: RichTextData } ] } ] @@ -166,40 +196,12 @@ function deserializeAttributeValue( return value; } -/** - * Convert rich-text string attributes in a block back to RichTextData - * instances. This is the inverse of makeBlockAttributesSerializable and is - * needed when blocks are extracted from the CRDT document, where rich-text - * values are stored as Y.Text (which serializes to plain strings via - * toJSON()). Without this conversion, block edit components that rely on - * RichTextData methods (e.g. `.text`) will receive a raw string and - * malfunction. - * - * @param blockName The block type name, e.g. 'core/code'. - * @param attributes The plain-object attributes from CRDT (toJSON). - * @return Attributes with rich-text strings replaced by RichTextData. - */ -function deserializeBlockAttributeValues( - blockName: string, - attributes: BlockAttributes -): BlockAttributes { - const newAttributes = { ...attributes }; - - for ( const [ key, value ] of Object.entries( attributes ) ) { - const schema = getBlockAttributeType( blockName, key ); - - if ( schema ) { - newAttributes[ key ] = deserializeAttributeValue( schema, value ); - } - } - - return newAttributes; -} - /** * Convert blocks from their CRDT-serialized form back to the runtime form - * expected by the block editor. This ensures that rich-text attributes are - * RichTextData instances rather than raw strings. + * expected by the block editor. Rich-text attributes are stored as Y.Text in + * the CRDT document, which serializes to plain strings via toJSON(). This + * function restores them to RichTextData instances so that block edit + * components that rely on RichTextData methods (e.g. `.text`) work correctly. * * @param blocks Blocks as extracted from the CRDT document via toJSON(). * @return Blocks with rich-text attributes restored to RichTextData. @@ -207,10 +209,24 @@ function deserializeBlockAttributeValues( export function deserializeBlockAttributes( blocks: Block[] ): Block[] { return blocks.map( ( block: Block ) => { const { name, innerBlocks, attributes, ...rest } = block; + + const newAttributes = { ...attributes }; + + for ( const [ key, value ] of Object.entries( attributes ) ) { + const schema = getBlockAttributeType( name, key ); + + if ( schema ) { + newAttributes[ key ] = deserializeAttributeValue( + schema, + value + ); + } + } + return { ...rest, name, - attributes: deserializeBlockAttributeValues( name, attributes ), + attributes: newAttributes, innerBlocks: deserializeBlockAttributes( innerBlocks ?? [] ), }; } );