Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.transform.OutputKeys;
Expand All @@ -29,6 +30,8 @@
import java.io.ByteArrayInputStream;
import java.io.StringWriter;
import java.nio.charset.StandardCharsets;
import java.util.Iterator;
import java.util.Map;

/**
* Utility class for applying content exclusions to documents before hash calculation.
Expand Down Expand Up @@ -99,23 +102,28 @@ private static void removeNodeAtPointer(String uri, JsonNode rootNode, String js
*
* @param uri the document URI (used for logging purposes)
* @param xmlContent the XML content as a string
* @param namespaces a map of namespace prefixes to URIs for use in XPath expressions, or null
* @param xpathExpressions array of XPath expressions identifying elements to exclude
* @return the modified XML content with specified elements removed
* @throws Exception if the XML content cannot be parsed or serialized
*/
static String applyXmlExclusions(String uri, String xmlContent, String... xpathExpressions) throws Exception {
static String applyXmlExclusions(String uri, String xmlContent, Map<String, String> namespaces, String... xpathExpressions) throws Exception {
if (xpathExpressions == null || xpathExpressions.length == 0) {
return xmlContent;
}

DocumentBuilder builder = XmlFactories.getDocumentBuilderFactory().newDocumentBuilder();
Document document = builder.parse(new ByteArrayInputStream(xmlContent.getBytes(StandardCharsets.UTF_8)));
applyXmlExclusions(uri, document, xpathExpressions);
applyXmlExclusions(uri, document, namespaces, xpathExpressions);
return serializeDocument(document);
}

private static void applyXmlExclusions(String uri, Document document, String[] xpathExpressions) {
private static void applyXmlExclusions(String uri, Document document, Map<String, String> namespaces, String[] xpathExpressions) {
final XPath xpath = XmlFactories.getXPathFactory().newXPath();
if (namespaces != null && !namespaces.isEmpty()) {
xpath.setNamespaceContext(new SimpleNamespaceContext(namespaces));
}

Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove the trailing whitespace on this blank line to maintain consistent code formatting.

Suggested change

Copilot uses AI. Check for mistakes.
for (String xpathExpression : xpathExpressions) {
try {
XPathExpression expr = xpath.compile(xpathExpression);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import com.marklogic.client.document.DocumentWriteSet;
import com.marklogic.client.io.JacksonHandle;

import java.util.Map;
import java.util.function.Consumer;

/**
Expand All @@ -31,8 +32,8 @@ class IncrementalWriteEvalFilter extends IncrementalWriteFilter {
""";

IncrementalWriteEvalFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.function.Function;

Expand All @@ -51,6 +52,7 @@ public static class Builder {
private Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
private String[] jsonExclusions;
private String[] xmlExclusions;
private Map<String, String> xmlNamespaces;

/**
* @param keyName the name of the MarkLogic metadata key that will hold the hash value; defaults to "incrementalWriteHash".
Expand Down Expand Up @@ -117,13 +119,22 @@ public Builder xmlExclusions(String... xpathExpressions) {
return this;
}

/**
* @param namespaces a map of namespace prefixes to URIs for use in XPath exclusion expressions.
* For example, Map.of("ns", "http://example.com/ns") allows XPath like "//ns:timestamp".
Comment on lines +123 to +124
Copy link

Copilot AI Jan 16, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add @return documentation to match the pattern of other builder methods, indicating that this method returns the Builder instance for method chaining.

Copilot uses AI. Check for mistakes.
*/
public Builder xmlNamespaces(Map<String, String> namespaces) {
this.xmlNamespaces = namespaces;
return this;
}

public IncrementalWriteFilter build() {
validateJsonExclusions();
validateXmlExclusions();
if (useEvalQuery) {
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
return new IncrementalWriteEvalFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
}
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
return new IncrementalWriteOpticFilter(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
}

private void validateJsonExclusions() {
Expand Down Expand Up @@ -151,6 +162,9 @@ private void validateXmlExclusions() {
return;
}
XPath xpath = XmlFactories.getXPathFactory().newXPath();
if (xmlNamespaces != null && !xmlNamespaces.isEmpty()) {
xpath.setNamespaceContext(new SimpleNamespaceContext(xmlNamespaces));
}
for (String xpathExpression : xmlExclusions) {
if (xpathExpression == null || xpathExpression.trim().isEmpty()) {
throw new IllegalArgumentException(
Expand All @@ -173,18 +187,20 @@ private void validateXmlExclusions() {
private final Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer;
private final String[] jsonExclusions;
private final String[] xmlExclusions;
private final Map<String, String> xmlNamespaces;

// Hardcoding this for now, with a good general purpose hashing function.
// See https://xxhash.com for benchmarks.
private final LongHashFunction hashFunction = LongHashFunction.xx3();

public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions) {
public IncrementalWriteFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson, Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
this.hashKeyName = hashKeyName;
this.timestampKeyName = timestampKeyName;
this.canonicalizeJson = canonicalizeJson;
this.skippedDocumentsConsumer = skippedDocumentsConsumer;
this.jsonExclusions = jsonExclusions;
this.xmlExclusions = xmlExclusions;
this.xmlNamespaces = xmlNamespaces;
}

protected final DocumentWriteSet filterDocuments(Context context, Function<String, String> hashRetriever) {
Expand Down Expand Up @@ -260,7 +276,7 @@ private String serializeContent(DocumentWriteOperation doc) {
}
} else if (xmlExclusions != null && xmlExclusions.length > 0) {
try {
content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, xmlExclusions);
content = ContentExclusionUtil.applyXmlExclusions(doc.getUri(), content, xmlNamespaces, xmlExclusions);
} catch (Exception e) {
logger.warn("Unable to apply XML exclusions for URI {}, using original content for hashing; cause: {}",
doc.getUri(), e.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
class IncrementalWriteOpticFilter extends IncrementalWriteFilter {

IncrementalWriteOpticFilter(String hashKeyName, String timestampKeyName, boolean canonicalizeJson,
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions);
Consumer<DocumentWriteOperation[]> skippedDocumentsConsumer, String[] jsonExclusions, String[] xmlExclusions, Map<String, String> xmlNamespaces) {
super(hashKeyName, timestampKeyName, canonicalizeJson, skippedDocumentsConsumer, jsonExclusions, xmlExclusions, xmlNamespaces);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Copyright (c) 2010-2026 Progress Software Corporation and/or its subsidiaries or affiliates. All Rights Reserved.
*/
package com.marklogic.client.datamovement.filter;

import javax.xml.namespace.NamespaceContext;
import java.util.Iterator;
import java.util.Map;

/**
* A simple implementation of {@link NamespaceContext} backed by a Map of prefix to namespace URI mappings.
* Used for XPath evaluation with namespace-qualified expressions.
*
* @since 8.1.0
*/
class SimpleNamespaceContext implements NamespaceContext {

private final Map<String, String> prefixToNamespaceUri;

SimpleNamespaceContext(Map<String, String> prefixToNamespaceUri) {
this.prefixToNamespaceUri = prefixToNamespaceUri;
}

@Override
public String getNamespaceURI(String prefix) {
return prefixToNamespaceUri.get(prefix);
}

@Override
public String getPrefix(String namespaceURI) {
for (Map.Entry<String, String> entry : prefixToNamespaceUri.entrySet()) {
if (entry.getValue().equals(namespaceURI)) {
return entry.getKey();
}
}
return null;
}

@Override
public Iterator<String> getPrefixes(String namespaceURI) {
return prefixToNamespaceUri.entrySet().stream()
.filter(entry -> entry.getValue().equals(namespaceURI))
.map(Map.Entry::getKey)
.iterator();
}
}
Loading