diff --git a/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java b/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java index a78759d50..f7de86a12 100644 --- a/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java +++ b/marklogic-client-api/src/main/java/com/marklogic/client/datamovement/filter/IncrementalWriteFilter.java @@ -3,11 +3,13 @@ */ package com.marklogic.client.datamovement.filter; +import com.fasterxml.jackson.core.JsonPointer; import com.marklogic.client.datamovement.DocumentWriteSetFilter; import com.marklogic.client.document.DocumentWriteOperation; import com.marklogic.client.document.DocumentWriteSet; import com.marklogic.client.impl.DocumentWriteOperationImpl; import com.marklogic.client.impl.HandleAccessor; +import com.marklogic.client.impl.XmlFactories; import com.marklogic.client.io.BaseHandle; import com.marklogic.client.io.DocumentMetadataHandle; import com.marklogic.client.io.Format; @@ -16,6 +18,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathExpressionException; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.time.Instant; @@ -114,11 +118,53 @@ public Builder xmlExclusions(String... xpathExpressions) { } public IncrementalWriteFilter build() { + validateJsonExclusions(); + validateXmlExclusions(); if (useEvalQuery) { return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions); } return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions); } + + private void validateJsonExclusions() { + if (jsonExclusions == null) { + return; + } + for (String jsonPointer : jsonExclusions) { + if (jsonPointer == null || jsonPointer.trim().isEmpty()) { + throw new IllegalArgumentException( + "Empty JSON Pointer expression is not valid for excluding content from incremental write hash calculation; " + + "it would exclude the entire document. JSON Pointer expressions must start with '/'."); + } + try { + JsonPointer.compile(jsonPointer); + } catch (IllegalArgumentException e) { + throw new IllegalArgumentException( + String.format("Invalid JSON Pointer expression '%s' for excluding content from incremental write hash calculation. " + + "JSON Pointer expressions must start with '/'; cause: %s", jsonPointer, e.getMessage()), e); + } + } + } + + private void validateXmlExclusions() { + if (xmlExclusions == null) { + return; + } + XPath xpath = XmlFactories.getXPathFactory().newXPath(); + for (String xpathExpression : xmlExclusions) { + if (xpathExpression == null || xpathExpression.trim().isEmpty()) { + throw new IllegalArgumentException( + "Empty XPath expression is not valid for excluding content from incremental write hash calculation."); + } + try { + xpath.compile(xpathExpression); + } catch (XPathExpressionException e) { + throw new IllegalArgumentException( + String.format("Invalid XPath expression '%s' for excluding content from incremental write hash calculation; cause: %s", + xpathExpression, e.getMessage()), e); + } + } + } } protected final String hashKeyName; diff --git a/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/ApplyExclusionsToIncrementalWriteTest.java b/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/ApplyExclusionsToIncrementalWriteTest.java index 19f5dd339..4c418e578 100644 --- a/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/ApplyExclusionsToIncrementalWriteTest.java +++ b/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/ApplyExclusionsToIncrementalWriteTest.java @@ -139,4 +139,107 @@ void xmlExclusions() { assertEquals(10, writtenCount.get(), "Documents should be written since non-excluded content changed"); assertEquals(5, skippedCount.get(), "Skip count should remain at 5"); } + + /** + * Verifies that JSON Pointer exclusions are only applied to JSON documents and are ignored for XML documents. + * The XML document should use its full content for hashing since no XML exclusions are configured. + */ + @Test + void jsonExclusionsIgnoredForXmlDocuments() { + filter = IncrementalWriteFilter.newBuilder() + .jsonExclusions("/timestamp") + .onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length)) + .build(); + + // Write one JSON doc and one XML doc + docs = new ArrayList<>(); + ObjectNode jsonDoc = objectMapper.createObjectNode(); + jsonDoc.put("id", 1); + jsonDoc.put("timestamp", "2025-01-01T10:00:00Z"); + docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.json", METADATA, new JacksonHandle(jsonDoc))); + + String xmlDoc = "12025-01-01T10:00:00Z"; + docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.xml", METADATA, new StringHandle(xmlDoc).withFormat(Format.XML))); + + writeDocs(docs); + assertEquals(2, writtenCount.get()); + assertEquals(0, skippedCount.get()); + + // Write again with different timestamp values + docs = new ArrayList<>(); + jsonDoc = objectMapper.createObjectNode(); + jsonDoc.put("id", 1); + jsonDoc.put("timestamp", "2026-01-02T15:30:00Z"); // Changed + docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.json", METADATA, new JacksonHandle(jsonDoc))); + + xmlDoc = "12026-01-02T15:30:00Z"; // Changed + docs.add(new DocumentWriteOperationImpl("/incremental/test/mixed-doc.xml", METADATA, new StringHandle(xmlDoc).withFormat(Format.XML))); + + writeDocs(docs); + assertEquals(3, writtenCount.get(), "XML doc should be written since its timestamp changed and no XML exclusions are configured"); + assertEquals(1, skippedCount.get(), "JSON doc should be skipped since only the excluded timestamp field changed"); + } + + /** + * Verifies that when canonicalizeJson is false, documents with logically identical content + * but different key ordering will produce different hashes, causing a write to occur. + */ + @Test + void jsonNotCanonicalizedCausesDifferentHashForReorderedKeys() { + filter = IncrementalWriteFilter.newBuilder() + .canonicalizeJson(false) + .onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length)) + .build(); + + // Write initial document with keys in a specific order + docs = new ArrayList<>(); + String json1 = "{\"name\":\"Test\",\"id\":1,\"value\":100}"; + docs.add(new DocumentWriteOperationImpl("/incremental/test/non-canonical.json", METADATA, + new StringHandle(json1).withFormat(Format.JSON))); + + writeDocs(docs); + assertEquals(1, writtenCount.get()); + assertEquals(0, skippedCount.get()); + + // Write again with same logical content but different key order + docs = new ArrayList<>(); + String json2 = "{\"id\":1,\"value\":100,\"name\":\"Test\"}"; + docs.add(new DocumentWriteOperationImpl("/incremental/test/non-canonical.json", METADATA, + new StringHandle(json2).withFormat(Format.JSON))); + + writeDocs(docs); + assertEquals(2, writtenCount.get(), "Document should be written because key order differs and JSON is not canonicalized"); + assertEquals(0, skippedCount.get(), "No documents should be skipped"); + } + + /** + * Verifies that with the default canonicalizeJson(true), documents with logically identical content + * but different key ordering will produce the same hash, causing the document to be skipped. + */ + @Test + void jsonCanonicalizedProducesSameHashForReorderedKeys() { + filter = IncrementalWriteFilter.newBuilder() + .onDocumentsSkipped(docs -> skippedCount.addAndGet(docs.length)) + .build(); + + // Write initial document with keys in a specific order + docs = new ArrayList<>(); + String json1 = "{\"name\":\"Test\",\"id\":1,\"value\":100}"; + docs.add(new DocumentWriteOperationImpl("/incremental/test/canonical.json", METADATA, + new StringHandle(json1).withFormat(Format.JSON))); + + writeDocs(docs); + assertEquals(1, writtenCount.get()); + assertEquals(0, skippedCount.get()); + + // Write again with same logical content but different key order + docs = new ArrayList<>(); + String json2 = "{\"id\":1,\"value\":100,\"name\":\"Test\"}"; + docs.add(new DocumentWriteOperationImpl("/incremental/test/canonical.json", METADATA, + new StringHandle(json2).withFormat(Format.JSON))); + + writeDocs(docs); + assertEquals(1, writtenCount.get(), "Document should be skipped because canonicalized JSON produces the same hash"); + assertEquals(1, skippedCount.get(), "One document should be skipped"); + } } diff --git a/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/ApplyInvalidExclusionsToIncrementalWriteTest.java b/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/ApplyInvalidExclusionsToIncrementalWriteTest.java new file mode 100644 index 000000000..c71eaa90b --- /dev/null +++ b/marklogic-client-api/src/test/java/com/marklogic/client/datamovement/filter/ApplyInvalidExclusionsToIncrementalWriteTest.java @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved. + */ +package com.marklogic.client.datamovement.filter; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class ApplyInvalidExclusionsToIncrementalWriteTest extends AbstractIncrementalWriteTest { + + /** + * Verifies that an invalid JSON Pointer expression (missing leading slash) causes the build to fail + * immediately, allowing the user to fix the configuration before any documents are processed. + */ + @Test + void invalidJsonPointerExpression() { + IncrementalWriteFilter.Builder builder = IncrementalWriteFilter.newBuilder() + .jsonExclusions("timestamp"); // Invalid - missing leading slash + + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, builder::build); + + assertTrue(ex.getMessage().contains("Invalid JSON Pointer expression 'timestamp'"), + "Error message should include the invalid expression. Actual: " + ex.getMessage()); + assertTrue(ex.getMessage().contains("incremental write"), + "Error message should mention incremental write context. Actual: " + ex.getMessage()); + assertTrue(ex.getMessage().contains("must start with '/'"), + "Error message should hint at the fix. Actual: " + ex.getMessage()); + } + + /** + * Verifies that an empty JSON Pointer expression is rejected since it would exclude the entire document, + * leaving nothing to hash. + */ + @Test + void emptyJsonPointerExpression() { + IncrementalWriteFilter.Builder builder = IncrementalWriteFilter.newBuilder() + .jsonExclusions(""); // Invalid - would exclude entire document + + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, builder::build); + + assertTrue(ex.getMessage().contains("Empty JSON Pointer expression"), + "Error message should indicate empty expression. Actual: " + ex.getMessage()); + assertTrue(ex.getMessage().contains("would exclude the entire document"), + "Error message should explain why it's invalid. Actual: " + ex.getMessage()); + } + + /** + * Verifies that an invalid XPath expression causes the build to fail immediately, + * allowing the user to fix the configuration before any documents are processed. + */ + @Test + void invalidXPathExpression() { + IncrementalWriteFilter.Builder builder = IncrementalWriteFilter.newBuilder() + .xmlExclusions("[[[invalid xpath"); // Invalid XPath syntax + + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, builder::build); + + assertTrue(ex.getMessage().contains("Invalid XPath expression '[[[invalid xpath'"), + "Error message should include the invalid expression. Actual: " + ex.getMessage()); + assertTrue(ex.getMessage().contains("incremental write"), + "Error message should mention incremental write context. Actual: " + ex.getMessage()); + } + +}