Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,85 @@ describe("Gremlin > keywordSearchTemplate", () => {
),
);
});

it("Should return a case-insensitive template for partial match", () => {
const template = keywordSearchTemplate({
searchTerm: "JFK",
searchByAttributes: ["city", "code"],
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize(
'g.V().or(has("city",regex("(?i).*JFK.*")),has("code",regex("(?i).*JFK.*")))',
),
);
});

it("Should return a case-insensitive template for exact match", () => {
const template = keywordSearchTemplate({
searchTerm: "JFK",
searchByAttributes: ["city", "code"],
exactMatch: true,
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize(
'g.V().or(has("city",regex("(?i)^JFK\\$")),has("code",regex("(?i)^JFK\\$")))',
),
);
});

it("Should not apply case-insensitive to ID searches", () => {
const template = keywordSearchTemplate({
vertexTypes: ["airport"],
searchTerm: "JFK",
exactMatch: false,
caseInsensitive: true,
searchByAttributes: [SEARCH_TOKENS.NODE_ID],
});

expect(normalize(template)).toBe(
normalize('g.V().hasLabel("airport").or(has(id,containing("JFK")))'),
);
});

it("Should not apply regex when caseInsensitive is false", () => {
const template = keywordSearchTemplate({
searchTerm: "JFK",
searchByAttributes: ["city"],
exactMatch: false,
caseInsensitive: false,
});

expect(normalize(template)).toBe(
normalize('g.V().or(has("city",containing("JFK")))'),
);
});

it("Should escape regex metacharacters in case-insensitive partial match", () => {
const template = keywordSearchTemplate({
searchTerm: "test.com",
searchByAttributes: ["url"],
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize('g.V().or(has("url",regex("(?i).*test\\\\.com.*")))'),
);
});

it("Should escape regex metacharacters in case-insensitive exact match", () => {
const template = keywordSearchTemplate({
searchTerm: "test.com",
searchByAttributes: ["url"],
exactMatch: true,
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize('g.V().or(has("url",regex("(?i)^test\\\\.com\\$")))'),
);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ import type { KeywordSearchRequest } from "@/connector";

import { escapeString, SEARCH_TOKENS } from "@/utils";

function escapeRegexForGremlin(text: string): string {
return text.replace(/[.*+?^${}()|[\]]/g, "\\\\$&");
}

/**
* @example
* searchTerm = "JFK"
Expand All @@ -28,6 +32,7 @@ export default function keywordSearchTemplate({
limit,
offset = 0,
exactMatch = false,
caseInsensitive = false,
}: KeywordSearchRequest): string {
let template = "g.V()";

Expand Down Expand Up @@ -55,6 +60,14 @@ export default function keywordSearchTemplate({
}
return `has(id,containing("${escapedSearchTerm}"))`;
}

if (caseInsensitive) {
const regexTerm = escapeRegexForGremlin(escapedSearchTerm);
return exactMatch === true
? `has("${attr}",regex("(?i)^${regexTerm}\\$"))`
: `has("${attr}",regex("(?i).*${regexTerm}.*"))`;
}

if (exactMatch === true) {
return `has("${attr}","${escapedSearchTerm}")`;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,4 +258,98 @@ describe("OpenCypher > keywordSearchTemplate", () => {
`),
);
});

it("Should return a case-insensitive template for searched attributes containing the search term", () => {
const template = keywordSearchTemplate({
vertexTypes: ["airport"],
searchTerm: "JFK",
searchByAttributes: ["city", "code"],
exactMatch: false,
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize(`
MATCH (v:\`airport\`)
WHERE (toLower(toString(v.city)) CONTAINS toLower("JFK") OR toLower(toString(v.code)) CONTAINS toLower("JFK"))
RETURN v AS object
`),
);
});

it("Should return a case-insensitive template for searched attributes exactly matching the search term", () => {
const template = keywordSearchTemplate({
vertexTypes: ["airport"],
searchTerm: "JFK",
searchByAttributes: ["city", "code"],
exactMatch: true,
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize(`
MATCH (v:\`airport\`)
WHERE (toLower(toString(v.city)) = toLower("JFK") OR toLower(toString(v.code)) = toLower("JFK"))
RETURN v AS object
`),
);
});

it("Should not apply case-insensitive to ID searches", () => {
const template = keywordSearchTemplate({
vertexTypes: ["airport"],
searchTerm: "JFK",
exactMatch: false,
caseInsensitive: true,
searchByAttributes: [SEARCH_TOKENS.NODE_ID],
});

expect(normalize(template)).toBe(
normalize(`
MATCH (v:\`airport\`)
WHERE (toString(id(v)) CONTAINS "JFK")
RETURN v AS object
`),
);
});

it("Should not apply toLower when caseInsensitive is false", () => {
const template = keywordSearchTemplate({
vertexTypes: ["airport"],
searchTerm: "JFK",
searchByAttributes: ["city"],
exactMatch: false,
caseInsensitive: false,
});

expect(normalize(template)).toBe(
normalize(`
MATCH (v:\`airport\`)
WHERE (v.city CONTAINS "JFK")
RETURN v AS object
`),
);
});

it("Should return a case-insensitive template with multiple vertex types and all search parameters", () => {
const template = keywordSearchTemplate({
vertexTypes: ["airport", "country"],
searchTerm: "JFK",
exactMatch: false,
caseInsensitive: true,
searchByAttributes: ["city", "code", SEARCH_TOKENS.ALL_ATTRIBUTES],
limit: 50,
offset: 25,
});

expect(normalize(template)).toBe(
normalize(`
MATCH (v)
WHERE (v:\`airport\` OR v:\`country\`)
AND (toString(id(v)) CONTAINS "JFK" OR toLower(toString(v.city)) CONTAINS toLower("JFK") OR toLower(toString(v.code)) CONTAINS toLower("JFK"))
RETURN v AS object
SKIP 25 LIMIT 50
`),
);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const keywordSearchTemplate = ({
limit,
offset,
exactMatch,
caseInsensitive,
}: KeywordSearchRequest): string => {
// Check if we're searching for nodes with no type by checking that the MISSING_TYPE is the only type defined
const isMissingTypeSearch =
Expand Down Expand Up @@ -62,6 +63,12 @@ const keywordSearchTemplate = ({
: `toString(id(v)) CONTAINS "${escapeString(searchTerm)}"`;
}

if (caseInsensitive) {
return exactMatch === true
? `toLower(toString(v.${attr})) = toLower("${escapeString(searchTerm)}")`
: `toLower(toString(v.${attr})) CONTAINS toLower("${escapeString(searchTerm)}")`;
}

return exactMatch === true
? `v.${attr} = "${escapeString(searchTerm)}"`
: `v.${attr} CONTAINS "${escapeString(searchTerm)}"`;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,106 @@ describe("SPARQL > keywordSearchTemplate", () => {
`),
);
});

it("Should return a case-insensitive template for partial match", () => {
const template = keywordSearchTemplate({
subjectClasses: ["air:airport"],
searchTerm: "JFK",
predicates: ["air:city"],
exactMatch: false,
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize(`
SELECT DISTINCT ?subject ?predicate ?object
WHERE {
{
# This sub-query will find any matching instances to the given filters and limit the results
SELECT DISTINCT ?subject
WHERE {
?subject ?pValue ?value .
OPTIONAL { ?subject a ?class } .
FILTER (?pValue IN (<air:city>))
FILTER (?class IN (<air:airport>))
FILTER (regex(str(?value), "JFK", "i"))
}
}
{
# Values and types
?subject ?predicate ?object
FILTER(isLiteral(?object) || ?predicate = <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
}
}
`),
);
});

it("Should return a case-insensitive template for exact match", () => {
const template = keywordSearchTemplate({
subjectClasses: ["air:airport"],
searchTerm: "JFK",
predicates: ["air:city"],
exactMatch: true,
caseInsensitive: true,
});

expect(normalize(template)).toBe(
normalize(`
SELECT DISTINCT ?subject ?predicate ?object
WHERE {
{
# This sub-query will find any matching instances to the given filters and limit the results
SELECT DISTINCT ?subject
WHERE {
?subject ?pValue ?value .
OPTIONAL { ?subject a ?class } .
FILTER (?pValue IN (<air:city>))
FILTER (?class IN (<air:airport>))
FILTER (lcase(str(?value)) = lcase("JFK"))
}
}
{
# Values and types
?subject ?predicate ?object
FILTER(isLiteral(?object) || ?predicate = <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
}
}
`),
);
});

it("Should keep partial match case-insensitive even when caseInsensitive is false", () => {
const template = keywordSearchTemplate({
subjectClasses: ["air:airport"],
searchTerm: "JFK",
predicates: ["air:city"],
exactMatch: false,
caseInsensitive: false,
});

expect(normalize(template)).toBe(
normalize(`
SELECT DISTINCT ?subject ?predicate ?object
WHERE {
{
# This sub-query will find any matching instances to the given filters and limit the results
SELECT DISTINCT ?subject
WHERE {
?subject ?pValue ?value .
OPTIONAL { ?subject a ?class } .
FILTER (?pValue IN (<air:city>))
FILTER (?class IN (<air:airport>))
FILTER (regex(str(?value), "JFK", "i"))
}
}
{
# Values and types
?subject ?predicate ?object
FILTER(isLiteral(?object) || ?predicate = <http://www.w3.org/1999/02/22-rdf-syntax-ns#type>)
}
}
`),
);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ export function findSubjectsMatchingFilters(
OPTIONAL { ?subject a ?class } .
${getFilterPredicates(request.predicates)}
${getSubjectClasses(request.subjectClasses)}
${getFilterObject(request.exactMatch, request.searchTerm)}
${getFilterObject(request.exactMatch, request.searchTerm, request.caseInsensitive)}
}
${getLimit(request.limit, request.offset)}
`;
Expand All @@ -125,14 +125,25 @@ function getFilterPredicates(predicates?: string[]) {
return `FILTER (?pValue IN (${filteredPredicates.map(idParam).join(", ")}))`;
}

function getFilterObject(exactMatch?: boolean, searchTerm?: string) {
function getFilterObject(
exactMatch?: boolean,
searchTerm?: string,
caseInsensitive?: boolean,
) {
if (!searchTerm) {
return "";
}

const escapedSearchTerm = escapeString(searchTerm);

return exactMatch === true
? `FILTER (?value = "${escapedSearchTerm}")`
: `FILTER (regex(str(?value), "${escapedSearchTerm}", "i"))`;
if (exactMatch === true) {
return caseInsensitive
? `FILTER (lcase(str(?value)) = lcase("${escapedSearchTerm}"))`
: `FILTER (?value = "${escapedSearchTerm}")`;
}

// Always use case-insensitive regex for partial match to maintain
// backward compatibility. SPARQL search was always case-insensitive
// before the caseInsensitive parameter was introduced.
return `FILTER (regex(str(?value), "${escapedSearchTerm}", "i"))`;
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ export function createSparqlExplorer(
limit: req.limit,
offset: req.offset,
exactMatch: req.exactMatch,
caseInsensitive: req.caseInsensitive,
};

const response = await keywordSearch(
Expand Down
Loading