From dab463e4ff9afa4de564561d54c08d7a7214bcb4 Mon Sep 17 00:00:00 2001 From: "renan m." Date: Wed, 6 May 2026 08:14:15 -0300 Subject: [PATCH] =?UTF-8?q?feat:=20SHACL=20=E2=86=92=20schema=20CLI=20conv?= =?UTF-8?q?erters=20(closes=20#140)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two CLI subcommands deriving LDkit schemas from SHACL shapes: - shacl-to-schema — single-file output, parallels the existing shexc-to-schema / context-to-schema converters. - shacl-to-package — multi-file output (one .ts per namespace + a barrel index.ts), with cross-file imports for relations. Enables tree-shaking for large vocabularies. Highlights: - Idiomatic SHACL support: sh:targetClass, sh:property, sh:path (incl. sh:inversePath), sh:datatype, sh:nodeKind sh:IRI, sh:node, sh:class, sh:minCount / sh:maxCount, sh:uniqueLang, sh:and / sh:or / sh:in. Self-references and merge of duplicate sh:property paths handled. - Namespace-aware naming: schema names are prefixed by their declared @prefix (meta:Campaign → MetaCampaignSchema) to disambiguate cross-vocabulary local-part collisions. --prefix-alias prefix=Alias renames short prefixes (e.g. m: → Marketer*). - Default-safe cross-refs: sh:node X emits "@type": ldkit.IRI by default (not "@schema": XSchema) so findByIri() doesn't recursively walk a whole graph. Consumers opt into nested decoding at call sites via spread. - Multi-file output (shacl-to-package): per-namespace files import their namespace consts from a single namespaces.ts to break ESM cycles. Drop-in compatible: import { XSchema } from "./.ldkit" still works via the barrel. Coverage: - 28 unit tests in tests/scripts/shacl_to_schema.test.ts - 3 unit tests in tests/scripts/shacl_to_package.test.ts - All existing tests still pass Breaking changes: none — purely additive. Existing CLI commands (shexc-to-schema, shexj-to-schema, context-to-schema) untouched. --- cli.ts | 100 ++- docs/components/schema-generators.md | 67 +- scripts/schema_to_package.ts | 153 ++++ scripts/schema_to_script.ts | 200 ++++- scripts/shacl_to_schema.ts | 575 +++++++++++++ tests/scripts/schema_to_script.test.ts | 238 ++++++ tests/scripts/shacl_to_package.test.ts | 93 ++ tests/scripts/shacl_to_schema.test.ts | 1072 ++++++++++++++++++++++++ 8 files changed, 2475 insertions(+), 23 deletions(-) create mode 100644 scripts/schema_to_package.ts create mode 100644 scripts/shacl_to_schema.ts create mode 100644 tests/scripts/shacl_to_package.test.ts create mode 100644 tests/scripts/shacl_to_schema.test.ts diff --git a/cli.ts b/cli.ts index 5932aa7..536b68b 100644 --- a/cli.ts +++ b/cli.ts @@ -1,11 +1,14 @@ import { argv } from "node:process"; import { styleText } from "node:util"; -import { readFileSync } from "node:fs"; +import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; import { Argument, Command } from "npm:commander@^13.1.0"; import { contextToSchema } from "./scripts/context_to_schema.ts"; import { shexcToSchema, shexjToSchema } from "./scripts/shex_to_schema.ts"; +import { shaclToSchema } from "./scripts/shacl_to_schema.ts"; import { schemaToScript } from "./scripts/schema_to_script.ts"; +import { schemaToPackage } from "./scripts/schema_to_package.ts"; const asciiArt = String.raw` _ ____ _ _ _ @@ -95,6 +98,82 @@ program.command("shexj-to-schema") } }); +program.command("shacl-to-schema") + .description( + "Convert a SHACL shapes graph from a file or URL to a LDkit schema", + ) + .addArgument( + new Argument("", "type of input").choices([ + "url", + "file", + "arg", + ]), + ) + .argument("", "input SHACL Turtle - file, URL, or string") + .option( + "--prefix-alias ", + "rename a SHACL prefix in generated schema names (format: prefix=Alias). Repeatable.", + (value: string, previous: string[]) => [...previous, value], + [] as string[], + ) + .action(async (method, input, opts: { prefixAlias?: string[] }) => { + try { + const resolvedInput = await resolve(method, input); + const prefixAliases = parsePrefixAliases(opts.prefixAlias); + const { schemas, extraNamespaces } = shaclToSchema(resolvedInput, { + prefixAliases, + }); + console.log(schemaToScript(schemas, extraNamespaces)); + } catch (error: unknown) { + console.error(styleText("red", `${(error as Error).message}`)); + } + }); + +program.command("shacl-to-package") + .description( + "Convert a SHACL shapes graph into a directory of per-namespace LDkit schema files (one .ts per prefix plus an index.ts barrel)", + ) + .addArgument( + new Argument("", "type of input").choices([ + "url", + "file", + "arg", + ]), + ) + .argument("", "input SHACL Turtle - file, URL, or string") + .argument("", "output directory for the generated package") + .option( + "--prefix-alias ", + "rename a SHACL prefix in generated schema names AND in the per-namespace file name (format: prefix=Alias). Repeatable.", + (value: string, previous: string[]) => [...previous, value], + [] as string[], + ) + .action( + async ( + method, + input, + outDir, + opts: { prefixAlias?: string[] }, + ) => { + try { + const resolvedInput = await resolve(method, input); + const prefixAliases = parsePrefixAliases(opts.prefixAlias); + const { schemas, extraNamespaces, schemaSourcePrefixes } = + shaclToSchema(resolvedInput, { prefixAliases }); + const { files } = schemaToPackage(schemas, extraNamespaces, { + prefixAliases, + schemaSourcePrefixes, + }); + mkdirSync(outDir, { recursive: true }); + for (const [base, contents] of files) { + writeFileSync(join(outDir, `${base}.ts`), contents); + } + } catch (error: unknown) { + console.error(styleText("red", `${(error as Error).message}`)); + } + }, + ); + // Check if no arguments were provided if (argv.length <= 2) { console.log(styleText("red", asciiArt)); @@ -103,6 +182,25 @@ if (argv.length <= 2) { program.parse(argv); } +function parsePrefixAliases( + pairs: string[] | undefined, +): Record { + if (!pairs || pairs.length === 0) return {}; + const result: Record = {}; + for (const pair of pairs) { + const eq = pair.indexOf("="); + if (eq <= 0 || eq === pair.length - 1) { + throw new Error( + `Invalid --prefix-alias value "${pair}" (expected format: prefix=Alias)`, + ); + } + const prefix = pair.substring(0, eq); + const alias = pair.substring(eq + 1); + result[prefix] = alias; + } + return result; +} + async function resolve(method: string, input: string): Promise { if (method === "url") { try { diff --git a/docs/components/schema-generators.md b/docs/components/schema-generators.md index 5ca25e2..4cf0016 100644 --- a/docs/components/schema-generators.md +++ b/docs/components/schema-generators.md @@ -3,8 +3,9 @@ LDkit provides experimental schema generators that help you convert existing Linked Data definitions into TypeScript schemas compatible with LDkit. These tools are available via the LDkit CLI and support generating code directly from -[JSON-LD contexts](https://www.w3.org/TR/json-ld11/), or -[ShEx shapes](https://shex.io/). +[JSON-LD contexts](https://www.w3.org/TR/json-ld11/), +[ShEx shapes](https://shex.io/), or +[SHACL shapes](https://www.w3.org/TR/shacl/). > ⚠️ **Note:** These generators are experimental and currently support only a > subset of the respective technologies. Manual review and adjustments of the @@ -61,14 +62,66 @@ features are the same as for ShExC. npx ldkit shexj-to-schema url https://ldkit.io/examples/person.shex.jsonld ``` +### 4. `shacl-to-schema` + +Converts a [SHACL](https://www.w3.org/TR/shacl/) shapes graph (Turtle) into an +LDkit TypeScript schema. + +Supported SHACL features: + +- **Project namespace generation** — every `@prefix` declaration in the source + whose IRI is not an LDkit built-in is re-emitted at the top of the generated + file as a `createNamespace()` call, and IRIs under that prefix render as e.g. + `ex.totalRevenue` instead of raw IRI strings. When a user-declared prefix + shadows an LDkit built-in's name (e.g. `schema` for `https://schema.org/` vs + LDkit's built-in `http://schema.org/`), the user's prefix wins the clean + variable name; the built-in is not imported and IRIs under it fall back to + literal strings. +- `sh:NodeShape` discovery (named shapes only) +- `sh:targetClass` mapped to schema `@type` (multiple targets allowed) +- shapes that are also `rdfs:Class` use the shape IRI as `@type` +- `sh:property` shapes (named or blank node) with simple `sh:path` IRIs +- `sh:inversePath` mapped to `@inverse` +- `sh:datatype` mapped to property `@type` (XSD datatypes) +- `sh:nodeKind sh:IRI` mapped to IRI references (`ldkit.IRI`) +- `sh:node` and `sh:class` mapped to nested schema references +- `sh:datatype rdf:langString` and `sh:uniqueLang true` mapped to `@multilang` +- cardinality via `sh:minCount` / `sh:maxCount` mapped to `@optional` / `@array` +- simplified `sh:and` / `sh:or` shapes logic (mirrors `shexc-to-schema`): + - `sh:and` branches are merged into the same property spec (last-wins for + conflicting fields) + - `sh:or` of numeric datatypes is reduced to the widest type + - `sh:or` of identical datatypes uses that datatype + - `sh:or` of `sh:node` / `sh:class` alternatives is reduced to an untyped IRI + reference + - `sh:or` of mixed or unrepresentable branches drops the type and the property + is marked `@optional` +- `sh:in` enumerations use the type of the first list element (no TypeScript + literal union — runtime cannot enforce it) +- `sh:not` and validation-only constraints (`sh:minLength`, `sh:maxLength`, + `sh:pattern`, `sh:hasValue`, `sh:minInclusive`, etc.) are silently ignored, + since LDkit's schema is for querying rather than validation + +Unsupported (the converter throws a clear error if encountered): + +- complex `sh:path` expressions other than `sh:inversePath` (sequence, + alternative, zero-or-more) + +Manual review of the generated schema is recommended, especially after `sh:or` +reduction. + +```bash +npx ldkit shacl-to-schema file ./shapes.ttl +``` + ## Command Syntax ```bash npx ldkit ``` -- ``: One of `context-to-schema`, `shexc-to-schema`, or - `shexj-to-schema`. +- ``: One of `context-to-schema`, `shexc-to-schema`, `shexj-to-schema`, + or `shacl-to-schema`. - ``: Defines how the input is provided. Possible values: @@ -113,9 +166,9 @@ ldkit context-to-schema file ./person.jsonld > person.ts ## Limitations -The generators do not fully cover all features of JSON-LD or ShEx. Complex -validation rules, advanced constraints, and some specialized constructs may be -omitted or simplified. +The generators do not fully cover all features of JSON-LD, ShEx, or SHACL. +Complex validation rules, advanced constraints, and some specialized constructs +may be omitted or simplified. Manual post-processing of the generated schemas may be necessary for production use. diff --git a/scripts/schema_to_package.ts b/scripts/schema_to_package.ts new file mode 100644 index 0000000..b2de65b --- /dev/null +++ b/scripts/schema_to_package.ts @@ -0,0 +1,153 @@ +import { + type ExtraNamespace, + type SchemaSpec, + schemaToScript, +} from "./schema_to_script.ts"; + +export type SchemaToPackageOptions = { + prefixAliases?: Record; + schemaSourcePrefixes?: Map; +}; + +export type SchemaPackage = { + files: Map; +}; + +const FALLBACK_FILE = "_unknown"; +const NAMESPACES_FILE = "namespaces"; + +export function schemaToPackage( + schemas: SchemaSpec[], + extraNamespaces: ExtraNamespace[] = [], + options: SchemaToPackageOptions = {}, +): SchemaPackage { + const aliases = options.prefixAliases ?? {}; + const sourcePrefixes = options.schemaSourcePrefixes ?? new Map(); + const fileForPrefix = (prefix: string | undefined): string => { + if (!prefix) return FALLBACK_FILE; + const aliased = aliases[prefix]; + return (aliased ?? prefix).toLowerCase(); + }; + + const groups = new Map(); + const schemaLocations = new Map(); + for (const schema of schemas) { + const file = fileForPrefix(sourcePrefixes.get(schema.name)); + schemaLocations.set(schema.name, file); + let bucket = groups.get(file); + if (!bucket) { + bucket = []; + groups.set(file, bucket); + } + bucket.push(schema); + } + + const extraNamespaceTermsOverride = collectGlobalNamespaceTerms( + extraNamespaces, + schemas, + ); + + const files = new Map(); + for ( + const [file, fileSchemas] of [...groups.entries()].toSorted(([a], [b]) => + a.localeCompare(b) + ) + ) { + const scopedExtras = filterExtras(extraNamespaces, fileSchemas); + const contents = schemaToScript(fileSchemas, scopedExtras, { + schemaLocations, + currentFile: file, + extraNamespacesImportFrom: NAMESPACES_FILE, + }); + files.set(file, contents); + } + + if (extraNamespaces.length > 0) { + files.set( + NAMESPACES_FILE, + buildNamespacesFile(extraNamespaces, extraNamespaceTermsOverride), + ); + } + files.set("index", buildIndex(files)); + return { files }; +} + +function buildNamespacesFile( + extras: ExtraNamespace[], + termsByPrefix: Map>, +): string { + const lines: string[] = [`import { createNamespace } from "ldkit";`, ""]; + const sorted = [...extras].sort((a, b) => a.prefix.localeCompare(b.prefix)); + for (const ns of sorted) { + const terms = [...(termsByPrefix.get(ns.prefix) ?? new Set())] + .toSorted(); + lines.push(`export const ${ns.prefix} = createNamespace(`); + lines.push(` {`); + lines.push(` iri: ${JSON.stringify(ns.iri)},`); + lines.push(` prefix: ${JSON.stringify(`${ns.prefix}:`)},`); + lines.push(` terms: [`); + for (const term of terms) { + lines.push(` ${JSON.stringify(term)},`); + } + lines.push(` ],`); + lines.push(` } as const,`); + lines.push(`);`); + lines.push(""); + } + return lines.join("\n"); +} + +function filterExtras( + extras: ExtraNamespace[], + schemas: SchemaSpec[], +): ExtraNamespace[] { + if (extras.length === 0) return []; + const usedIris = collectUsedIris(schemas); + return extras.filter((ns) => + [...usedIris].some((iri) => iri.startsWith(ns.iri)) + ); +} + +function collectUsedIris(schemas: SchemaSpec[]): Set { + const used = new Set(); + for (const schema of schemas) { + for (const t of schema.type) used.add(t); + for (const prop of Object.values(schema.properties)) { + used.add(prop.id); + if (prop.type) used.add(prop.type); + if (prop.schema) { + for (const iri of collectUsedIris([prop.schema])) used.add(iri); + } + } + } + return used; +} + +function collectGlobalNamespaceTerms( + extras: ExtraNamespace[], + schemas: SchemaSpec[], +): Map> { + const sortedExtras = [...extras].sort((a, b) => b.iri.length - a.iri.length); + const result = new Map>(); + const allIris = collectUsedIris(schemas); + for (const iri of allIris) { + for (const ns of sortedExtras) { + if (iri.startsWith(ns.iri)) { + const term = iri.substring(ns.iri.length); + let bucket = result.get(ns.prefix); + if (!bucket) { + bucket = new Set(); + result.set(ns.prefix, bucket); + } + bucket.add(term); + break; + } + } + } + return result; +} + +function buildIndex(files: Map): string { + const names = [...files.keys()].filter((f) => f !== "index").toSorted(); + return names.map((name) => `export * from "./${name}";`).join("\n") + "\n"; +} diff --git a/scripts/schema_to_script.ts b/scripts/schema_to_script.ts index 5de2b5e..dfda53b 100644 --- a/scripts/schema_to_script.ts +++ b/scripts/schema_to_script.ts @@ -14,7 +14,7 @@ import { xsd, } from "../namespaces.ts"; -const NAMESPACES = [ +export const NAMESPACES = [ dbo, dc, dcterms, @@ -49,14 +49,60 @@ export type SchemaSpec = { }; }; -export function schemaToScript(schemas: SchemaSpec[]): string { - const printer = new SchemaPrinter(); +export type ExtraNamespace = { + iri: string; + prefix: string; +}; + +export type PrinterOptions = { + schemaLocations?: Map; + currentFile?: string; + extraNamespaceFiles?: Map; + extraNamespaceTermsOverride?: Map>; + extraNamespacesImportFrom?: string; +}; + +export function schemaToScript( + schemas: SchemaSpec[], + extraNamespaces: ExtraNamespace[] = [], + options: PrinterOptions = {}, +): string { + const printer = new SchemaPrinter(extraNamespaces, options); return printer.print(schemas); } class SchemaPrinter { private usedNamespaces = new Set(); private space = " "; + // Sorted by IRI length desc so longer prefixes match before shorter ones. + private extraNamespaces: ExtraNamespace[]; + private extraNamespaceTerms = new Map>(); + private readonly shadowedBuiltins: Set; + private readonly schemaLocations: Map; + private readonly currentFile: string | undefined; + private readonly extraNamespaceFiles: Map; + private readonly extraNamespaceTermsOverride: + | Map> + | undefined; + private readonly extraNamespacesImportFrom: string | undefined; + private readonly crossFileImports = new Map>(); + + constructor( + extraNamespaces: ExtraNamespace[] = [], + options: PrinterOptions = {}, + ) { + this.extraNamespaces = [...extraNamespaces].sort( + (a, b) => b.iri.length - a.iri.length, + ); + this.shadowedBuiltins = new Set( + this.extraNamespaces.map((ns) => ns.prefix), + ); + this.schemaLocations = options.schemaLocations ?? new Map(); + this.currentFile = options.currentFile; + this.extraNamespaceFiles = options.extraNamespaceFiles ?? new Map(); + this.extraNamespaceTermsOverride = options.extraNamespaceTermsOverride; + this.extraNamespacesImportFrom = options.extraNamespacesImportFrom; + } public print(schemas: SchemaSpec[]): string { const orderedSchemas = this.orderSchemasByDependencies(schemas); @@ -69,8 +115,9 @@ class SchemaPrinter { printedSchemas.push(printedSchema); } - if (this.usedNamespaces.size > 0) { - printedSchemas.unshift(this.printImports()); + const header = this.printHeader(); + if (header) { + printedSchemas.unshift(header); } return printedSchemas.join("\n"); @@ -79,11 +126,14 @@ class SchemaPrinter { private orderSchemasByDependencies(schemas: SchemaSpec[]): SchemaSpec[] { const orderedSchemas: SchemaSpec[] = []; const processedSchemas = new Set(); + const localNames = new Set(schemas.map((s) => s.name)); const dependencies = schemas.map((schema) => { return { schemaName: schema.name, - dependencies: this.getSchemaDependencies(schema), + dependencies: this.getSchemaDependencies(schema).filter((dep) => + localNames.has(dep) + ), }; }); @@ -139,9 +189,25 @@ class SchemaPrinter { this.usedNamespaces.add(this.printPrefix(ldkit)); return; } + for (const ns of this.extraNamespaces) { + if (value.startsWith(ns.iri)) { + const localPart = value.substring(ns.iri.length); + let terms = this.extraNamespaceTerms.get(ns.prefix); + if (!terms) { + terms = new Set(); + this.extraNamespaceTerms.set(ns.prefix, terms); + } + terms.add(localPart); + return; + } + } for (const namespace of NAMESPACES) { if (value.startsWith(namespace.$iri)) { - this.usedNamespaces.add(this.printPrefix(namespace)); + const name = this.printPrefix(namespace); + if (this.shadowedBuiltins.has(name)) { + return; + } + this.usedNamespaces.add(name); return; } } @@ -163,12 +229,100 @@ class SchemaPrinter { } } - private printImports(): string { - const namespacesString = Array.from(this.usedNamespaces) - .toSorted() - .join(", "); + private trackCrossFileRef(schemaRef: string): void { + if (!this.currentFile) return; + const refFile = this.schemaLocations.get(schemaRef); + if (!refFile || refFile === this.currentFile) return; + let names = this.crossFileImports.get(refFile); + if (!names) { + names = new Set(); + this.crossFileImports.set(refFile, names); + } + names.add(schemaRef); + } + + private printHeader(): string { + const lines: string[] = []; + + const usedPrefixes = new Set(this.extraNamespaces.map((ns) => ns.prefix)) + .intersection(new Set(this.extraNamespaceTerms.keys())); + const usedExtras = this.extraNamespaces.filter((ns) => + usedPrefixes.has(ns.prefix) + ); + + const declaredExtras: ExtraNamespace[] = []; + const importedByFile = new Map>(); + if (this.extraNamespacesImportFrom !== undefined) { + if (usedExtras.length > 0) { + importedByFile.set( + this.extraNamespacesImportFrom, + new Set(usedExtras.map((ns) => ns.prefix)), + ); + } + } else { + for (const ns of usedExtras) { + const home = this.extraNamespaceFiles.get(ns.prefix); + if (!home || !this.currentFile || home === this.currentFile) { + declaredExtras.push(ns); + continue; + } + let names = importedByFile.get(home); + if (!names) { + names = new Set(); + importedByFile.set(home, names); + } + names.add(ns.prefix); + } + } - return `import { ${namespacesString} } from "ldkit/namespaces";\n`; + if (declaredExtras.length > 0) { + lines.push(`import { createNamespace } from "ldkit";`); + } + + if (this.usedNamespaces.size > 0) { + const namespacesString = Array.from(this.usedNamespaces) + .toSorted() + .join(", "); + lines.push(`import { ${namespacesString} } from "ldkit/namespaces";`); + } + + const crossFileEntries = [...this.crossFileImports.entries()] + .toSorted(([a], [b]) => a.localeCompare(b)); + for (const [file, names] of crossFileEntries) { + const sortedNames = [...names].toSorted().join(", "); + lines.push(`import { ${sortedNames} } from "./${file}";`); + } + + const importedExtraEntries = [...importedByFile.entries()] + .toSorted(([a], [b]) => a.localeCompare(b)); + for (const [file, prefixes] of importedExtraEntries) { + const sortedPrefixes = [...prefixes].toSorted().join(", "); + lines.push(`import { ${sortedPrefixes} } from "./${file}";`); + } + + if (lines.length > 0) { + lines.push(""); + } + + for (const ns of declaredExtras) { + const termSet = this.extraNamespaceTermsOverride?.get(ns.prefix) ?? + this.extraNamespaceTerms.get(ns.prefix)!; + const terms = Array.from(termSet).toSorted(); + lines.push(`export const ${ns.prefix} = createNamespace(`); + lines.push(` {`); + lines.push(` iri: ${JSON.stringify(ns.iri)},`); + lines.push(` prefix: ${JSON.stringify(`${ns.prefix}:`)},`); + lines.push(` terms: [`); + for (const term of terms) { + lines.push(` ${JSON.stringify(term)},`); + } + lines.push(` ],`); + lines.push(` } as const,`); + lines.push(`);`); + lines.push(""); + } + + return lines.join("\n"); } private printSchema(schema: SchemaSpec): string { @@ -228,6 +382,7 @@ class SchemaPrinter { const subSchema = this.printSubSchema(prop.schema); builder.push(this.indent(subSchema)); } else if (prop.schemaRef) { + this.trackCrossFileRef(prop.schemaRef); builder.push(this.indent(`"@schema": ${prop.schemaRef},`)); } @@ -267,16 +422,31 @@ class SchemaPrinter { if (value === "@id") { return `${this.printPrefix(ldkit)}.IRI`; } + for (const ns of this.extraNamespaces) { + if (value.startsWith(ns.iri)) { + const localPart = value.substring(ns.iri.length); + return this.formatNamespaceAccess(ns.prefix, localPart); + } + } for (const namespace of NAMESPACES) { if (value.startsWith(namespace.$iri)) { - return `${this.printPrefix(namespace)}.${ - value.substring(namespace.$iri.length) - }`; + const name = this.printPrefix(namespace); + if (this.shadowedBuiltins.has(name)) { + return `"${value}"`; + } + return `${name}.${value.substring(namespace.$iri.length)}`; } } return `"${value}"`; } + private formatNamespaceAccess(prefix: string, localPart: string): string { + if (/^[A-Za-z_$]\w*$/.test(localPart)) { + return `${prefix}.${localPart}`; + } + return `${prefix}["${localPart}"]`; + } + private printKey(key: string): string { if (key.match(/^[a-zA-Z0-9_]+$/)) { return key; diff --git a/scripts/shacl_to_schema.ts b/scripts/shacl_to_schema.ts new file mode 100644 index 0000000..1d266c2 --- /dev/null +++ b/scripts/shacl_to_schema.ts @@ -0,0 +1,575 @@ +import { Parser, Store, type Term } from "npm:n3@^1"; + +import { + type ExtraNamespace, + NAMESPACES, + type PropertySpec, + type SchemaSpec, +} from "./schema_to_script.ts"; + +const BUILTIN_NAMESPACE_IRIS: Set = new Set( + NAMESPACES.map((n) => n.$iri), +); + +const RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; +const RDFS = "http://www.w3.org/2000/01/rdf-schema#"; +const XSD = "http://www.w3.org/2001/XMLSchema#"; +const SH = "http://www.w3.org/ns/shacl#"; + +const RDF_TYPE = `${RDF}type`; +const RDF_FIRST = `${RDF}first`; +const RDF_REST = `${RDF}rest`; +const RDF_NIL = `${RDF}nil`; +const RDF_LANG_STRING = `${RDF}langString`; +const XSD_STRING = `${XSD}string`; +const RDFS_CLASS = `${RDFS}Class`; + +const SH_NODE_SHAPE = `${SH}NodeShape`; +const SH_TARGET_CLASS = `${SH}targetClass`; +const SH_PROPERTY = `${SH}property`; +const SH_PATH = `${SH}path`; +const SH_INVERSE_PATH = `${SH}inversePath`; +const SH_DATATYPE = `${SH}datatype`; +const SH_NODE_KIND = `${SH}nodeKind`; +const SH_IRI = `${SH}IRI`; +const SH_NODE = `${SH}node`; +const SH_CLASS = `${SH}class`; +const SH_MIN_COUNT = `${SH}minCount`; +const SH_MAX_COUNT = `${SH}maxCount`; +const SH_UNIQUE_LANG = `${SH}uniqueLang`; +const SH_AND = `${SH}and`; +const SH_OR = `${SH}or`; +const SH_IN = `${SH}in`; + +// Numeric widening order: leftmost = widest. +const NUMERIC_WIDENING = [ + `${XSD}decimal`, + `${XSD}double`, + `${XSD}float`, + `${XSD}long`, + `${XSD}integer`, + `${XSD}int`, + `${XSD}short`, + `${XSD}byte`, + `${XSD}nonNegativeInteger`, + `${XSD}positiveInteger`, + `${XSD}unsignedLong`, + `${XSD}unsignedInt`, +]; + +type Constraints = { + datatype?: string; + nodeKind?: string; + refNode?: string; + refClass?: string; + uniqueLang?: boolean; + inFirstType?: string; +}; + +type ReducedOr = + | { kind: "datatype"; value: string } + | { kind: "iri" } + | { kind: "untyped" }; + +export type ShaclConversionResult = { + schemas: SchemaSpec[]; + extraNamespaces: ExtraNamespace[]; + schemaSourcePrefixes: Map; +}; + +export type ShaclConversionOptions = { + prefixAliases?: Record; +}; + +export function shaclToSchema( + turtle: string, + options: ShaclConversionOptions = {}, +): ShaclConversionResult { + const converter = new ShaclConverter(options); + return converter.process(turtle); +} + +class ShaclConverter { + private store!: Store; + private schemas: SchemaSpec[] = []; + private shapeIriToName = new Map(); + private usedNames = new Set(); + private prefixMap: Record = {}; + private prefixAliases: Record; + + constructor(options: ShaclConversionOptions = {}) { + this.prefixAliases = options.prefixAliases ?? {}; + } + + public process(turtle: string): ShaclConversionResult { + this.parseWithPrefixes(turtle); + + const shapeIris = this.findNodeShapes(); + + for (const shapeIri of shapeIris) { + const name = this.deriveSchemaName(shapeIri); + this.shapeIriToName.set(shapeIri, name); + } + + const schemaSourcePrefixes = new Map(); + for (const shapeIri of shapeIris) { + const schema = this.buildSchema(shapeIri); + this.schemas.push(schema); + const baseIri = schema.type[0] ?? shapeIri; + const prefix = this.findNamespacePrefix(baseIri); + if (prefix) schemaSourcePrefixes.set(schema.name, prefix); + } + + return { + schemas: this.schemas, + extraNamespaces: this.deriveExtraNamespaces(), + schemaSourcePrefixes, + }; + } + + private parseWithPrefixes(turtle: string): void { + const parser = new Parser(); + let quads; + try { + quads = parser.parse(turtle); + } catch (error) { + const detail = error instanceof Error ? error.message : String(error); + throw new Error(`Failed to parse Turtle input: ${detail}`); + } + this.store = new Store(quads); + + const prefixRe = + /@prefix\s+([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*<([^>]+)>\s*\./g; + for (const match of turtle.matchAll(prefixRe)) { + const [, prefix, iri] = match; + this.prefixMap[prefix] = iri; + } + } + + private deriveExtraNamespaces(): ExtraNamespace[] { + const usedIris = new Set(); + for (const schema of this.schemas) { + for (const t of schema.type) usedIris.add(t); + for (const prop of Object.values(schema.properties)) { + usedIris.add(prop.id); + if (prop.type) usedIris.add(prop.type); + } + } + + const result: ExtraNamespace[] = []; + const seenIris = new Set(); + const usedNames = new Set(); + for (const [prefix, iri] of Object.entries(this.prefixMap)) { + if (BUILTIN_NAMESPACE_IRIS.has(iri)) continue; + if (seenIris.has(iri)) continue; + const isUsed = [...usedIris].some((u) => u.startsWith(iri)); + if (!isUsed) continue; + seenIris.add(iri); + let safeName = prefix; + while (usedNames.has(safeName)) { + safeName += "_"; + } + usedNames.add(safeName); + result.push({ iri, prefix: safeName }); + } + return result; + } + + private findNodeShapes(): string[] { + const quads = this.store.getQuads(null, RDF_TYPE, SH_NODE_SHAPE, null); + const iris: string[] = []; + for (const q of quads) { + if (q.subject.termType === "NamedNode") { + iris.push(q.subject.value); + } + } + return iris; + } + + private deriveSchemaName(shapeIri: string): string { + const targetClass = this.getObjectIri(shapeIri, SH_TARGET_CLASS); + const baseIri = targetClass ?? shapeIri; + let local = this.getSuffix(baseIri); + if (!targetClass && local.endsWith("Shape")) { + local = local.substring(0, local.length - "Shape".length); + } + return this.uniqueName(this.composeSchemaName(baseIri, local)); + } + + // Schema names are always namespace-prefixed (when a @prefix declaration + // covers the IRI) so that classes sharing a local-part across vocabularies + // — e.g. m:Campaign, meta:Campaign, google:Campaign — produce + // self-documenting, deterministic names like MCampaignSchema, + // MetaCampaignSchema, GoogleCampaignSchema. IRIs without a declared prefix + // fall back to the bare local-part (preserves prior behavior for + // hand-written SHACL test fixtures that omit @prefix declarations). + private composeSchemaName(iri: string, local: string): string { + const prefix = this.findNamespacePrefix(iri); + const prefixPart = prefix + ? (this.prefixAliases[prefix] ?? + this.capitalize(this.sanitizeIdentifier(prefix))) + : ""; + const localPart = this.capitalize(this.sanitizeIdentifier(local)); + return `${prefixPart}${localPart}Schema`; + } + + private findNamespacePrefix(iri: string): string | null { + let bestPrefix: string | null = null; + let bestLength = 0; + for (const [prefix, ns] of Object.entries(this.prefixMap)) { + if (iri.startsWith(ns) && ns.length > bestLength) { + bestPrefix = prefix; + bestLength = ns.length; + } + } + return bestPrefix; + } + + private uniqueName(name: string): string { + if (!this.usedNames.has(name)) { + this.usedNames.add(name); + return name; + } + for (let i = 1; i < 1000; i++) { + const candidate = `${name}${i}`; + if (!this.usedNames.has(candidate)) { + this.usedNames.add(candidate); + return candidate; + } + } + throw new Error(`Could not generate a unique name for ${name}`); + } + + private buildSchema(shapeIri: string): SchemaSpec { + return { + name: this.shapeIriToName.get(shapeIri)!, + type: this.deriveType(shapeIri), + properties: this.buildProperties(shapeIri), + }; + } + + private deriveType(shapeIri: string): string[] { + const targetClasses = this.getObjectIris(shapeIri, SH_TARGET_CLASS); + if (targetClasses.length > 0) { + return targetClasses; + } + const isRdfsClass = + this.store.getQuads(shapeIri, RDF_TYPE, RDFS_CLASS, null).length > 0; + return isRdfsClass ? [shapeIri] : []; + } + + private buildProperties(shapeIri: string): SchemaSpec["properties"] { + const propertyNodes = this.store.getQuads( + shapeIri, + SH_PROPERTY, + null, + null, + ); + const properties: SchemaSpec["properties"] = {}; + + for (const q of propertyNodes) { + if ( + q.object.termType !== "NamedNode" && + q.object.termType !== "BlankNode" + ) { + console.error( + `[shacl-to-schema] warning: skipping non-node sh:property value on shape <${shapeIri}> (got ${q.object.termType})`, + ); + continue; + } + const { name, spec } = this.buildProperty(q.object, shapeIri); + if (properties[name]) { + properties[name] = this.mergePropertySpecs(properties[name], spec); + } else { + properties[name] = spec; + } + } + + return properties; + } + + // SHACL conjoins multiple property shapes on the same path (AND). LDkit's + // runtime ignores @type when @schema is set, so schemaRef wins over type. + private mergePropertySpecs(a: PropertySpec, b: PropertySpec): PropertySpec { + const merged: PropertySpec = { id: a.id }; + if (b.schemaRef !== undefined || a.schemaRef !== undefined) { + merged.schemaRef = b.schemaRef ?? a.schemaRef; + } else if (b.type !== undefined) { + merged.type = b.type; + } else if (a.type !== undefined) { + merged.type = a.type; + } + if (a.optional && b.optional) merged.optional = true; + if (a.array && b.array) merged.array = true; + if (a.multilang || b.multilang) merged.multilang = true; + if (a.inverse || b.inverse) merged.inverse = true; + return merged; + } + + private buildProperty( + propertyNode: Term, + enclosingShapeIri?: string, + ): { name: string; spec: PropertySpec } { + const { iri: pathIri, inverse } = this.resolvePath( + propertyNode, + enclosingShapeIri, + ); + const name = this.getSuffix(pathIri); + + const spec: PropertySpec = { id: pathIri }; + if (inverse) { + spec.inverse = true; + } + + const direct = this.collectConstraints(propertyNode); + const orBranches = this.collectOrBranches(propertyNode); + + let forceOptional = false; + const refTarget = direct.refNode ?? direct.refClass; + + if (direct.uniqueLang || direct.datatype === RDF_LANG_STRING) { + spec.multilang = true; + } else if (refTarget) { + // Default to IRI for sh:node / sh:class references + spec.type = "@id"; + } else if (direct.datatype && direct.datatype !== XSD_STRING) { + spec.type = direct.datatype; + } else if (direct.nodeKind === SH_IRI) { + spec.type = "@id"; + } else if (direct.inFirstType) { + if (direct.inFirstType === "@id") { + spec.type = "@id"; + } else if (direct.inFirstType !== XSD_STRING) { + spec.type = direct.inFirstType; + } + } else if (orBranches.length > 0) { + const reduced = this.reduceOrBranches(orBranches); + forceOptional = true; + if (reduced.kind === "datatype" && reduced.value !== XSD_STRING) { + spec.type = reduced.value; + } else if (reduced.kind === "iri") { + spec.type = "@id"; + } + } + + const minCount = this.getObjectInteger(propertyNode, SH_MIN_COUNT); + const maxCount = this.getObjectInteger(propertyNode, SH_MAX_COUNT); + + if (forceOptional || minCount === undefined || minCount === 0) { + spec.optional = true; + } + if (maxCount === undefined || maxCount > 1) { + spec.array = true; + } + + return { name, spec }; + } + + private resolvePath( + propertyNode: Term, + enclosingShapeIri?: string, + ): { iri: string; inverse: boolean } { + const ctx = enclosingShapeIri ? ` on shape <${enclosingShapeIri}>` : ""; + const pathTerm = this.getObjectTerm(propertyNode, SH_PATH); + if (!pathTerm) { + throw new Error(`Property shape${ctx} is missing sh:path`); + } + if (pathTerm.termType === "NamedNode") { + return { iri: pathTerm.value, inverse: false }; + } + if (pathTerm.termType === "BlankNode") { + const inverseIri = this.getObjectIri(pathTerm, SH_INVERSE_PATH); + if (inverseIri) { + return { iri: inverseIri, inverse: true }; + } + } + throw new Error( + `Unsupported sh:path${ctx}: only simple predicate IRIs and sh:inversePath are supported (got ${pathTerm.termType})`, + ); + } + + private collectConstraints(node: Term): Constraints { + const c: Constraints = { + datatype: this.getObjectIri(node, SH_DATATYPE), + nodeKind: this.getObjectIri(node, SH_NODE_KIND), + refNode: this.getObjectIri(node, SH_NODE), + refClass: this.getObjectIri(node, SH_CLASS), + uniqueLang: this.getObjectBoolean(node, SH_UNIQUE_LANG), + }; + + const inListTerm = this.getObjectTerm(node, SH_IN); + if (inListTerm) { + const items = this.walkList(inListTerm); + const first = items[0]; + if (first?.termType === "NamedNode") { + c.inFirstType = "@id"; + } else if (first?.termType === "Literal") { + const dt = + (first as { datatype?: { value: string } }).datatype?.value ?? + XSD_STRING; + c.inFirstType = dt; + } + } + + const andListTerm = this.getObjectTerm(node, SH_AND); + if (andListTerm) { + for (const branch of this.walkList(andListTerm)) { + const sub = this.collectConstraints(branch); + if (sub.datatype !== undefined) c.datatype = sub.datatype; + if (sub.nodeKind !== undefined) c.nodeKind = sub.nodeKind; + if (sub.refNode !== undefined) c.refNode = sub.refNode; + if (sub.refClass !== undefined) c.refClass = sub.refClass; + if (sub.uniqueLang !== undefined) c.uniqueLang = sub.uniqueLang; + if (sub.inFirstType !== undefined) c.inFirstType = sub.inFirstType; + } + } + + return c; + } + + private collectOrBranches(node: Term): Constraints[] { + const orListTerm = this.getObjectTerm(node, SH_OR); + if (!orListTerm) return []; + return this.walkList(orListTerm).map((branch) => + this.collectConstraints(branch) + ); + } + + private reduceOrBranches(branches: Constraints[]): ReducedOr { + if (branches.length === 0) { + return { kind: "untyped" }; + } + + const allDatatypes = branches.every( + (b) => b.datatype && !b.refNode && !b.refClass, + ); + const allRefs = branches.every( + (b) => (b.refNode || b.refClass) && !b.datatype, + ); + + if (allDatatypes) { + const dts = branches.map((b) => b.datatype!); + const widened = this.pickWidestNumeric(dts); + if (widened) { + return { kind: "datatype", value: widened }; + } + const unique = new Set(dts); + if (unique.size === 1) { + return { kind: "datatype", value: dts[0] }; + } + return { kind: "untyped" }; + } + + if (allRefs) { + return { kind: "iri" }; + } + + return { kind: "untyped" }; + } + + private pickWidestNumeric(types: string[]): string | undefined { + const indices = types.map((t) => NUMERIC_WIDENING.indexOf(t)); + if (indices.every((i) => i >= 0)) { + return NUMERIC_WIDENING[Math.min(...indices)]; + } + return undefined; + } + + private walkList(listHead: Term): Term[] { + const items: Term[] = []; + const visited = new Set(); + let current: Term | undefined = listHead; + while ( + current && + !(current.termType === "NamedNode" && current.value === RDF_NIL) + ) { + const key = `${current.termType}:${current.value}`; + if (visited.has(key)) break; + visited.add(key); + const first = this.getObjectTerm(current, RDF_FIRST); + if (!first) break; + items.push(first); + const rest = this.getObjectTerm(current, RDF_REST); + if (!rest) break; + current = rest; + } + return items; + } + + private sanitizeIdentifier(value: string): string { + let cleaned = value.replace(/[^A-Za-z0-9_$]/g, "_"); + if (cleaned.length > 0 && /^[0-9]/.test(cleaned)) { + cleaned = `_${cleaned}`; + } + return cleaned; + } + + private getObjectTerm( + subject: Term | string, + predicate: string, + ): Term | undefined { + const quads = this.store.getQuads(subject, predicate, null, null); + return quads[0]?.object; + } + + private getObjectIri( + subject: Term | string, + predicate: string, + ): string | undefined { + const term = this.getObjectTerm(subject, predicate); + if (term && term.termType === "NamedNode") { + return term.value; + } + return undefined; + } + + private getObjectIris(subject: Term | string, predicate: string): string[] { + const quads = this.store.getQuads(subject, predicate, null, null); + const result: string[] = []; + for (const q of quads) { + if (q.object.termType === "NamedNode") { + result.push(q.object.value); + } + } + return result; + } + + private getObjectInteger( + subject: Term | string, + predicate: string, + ): number | undefined { + const term = this.getObjectTerm(subject, predicate); + if (term && term.termType === "Literal") { + const parsed = parseInt(term.value, 10); + if (!Number.isNaN(parsed)) { + return parsed; + } + } + return undefined; + } + + private getObjectBoolean( + subject: Term | string, + predicate: string, + ): boolean | undefined { + const term = this.getObjectTerm(subject, predicate); + if (term && term.termType === "Literal") { + if (term.value === "true") return true; + if (term.value === "false") return false; + } + return undefined; + } + + private getSuffix(value: string): string { + const cutoff = Math.max(value.lastIndexOf("#"), value.lastIndexOf("/")); + if (cutoff === -1) { + return value; + } + return value.substring(cutoff + 1); + } + + private capitalize(value: string): string { + if (value.length === 0) return value; + return value.charAt(0).toUpperCase() + value.slice(1); + } +} diff --git a/tests/scripts/schema_to_script.test.ts b/tests/scripts/schema_to_script.test.ts index 1a24775..a163db1 100644 --- a/tests/scripts/schema_to_script.test.ts +++ b/tests/scripts/schema_to_script.test.ts @@ -1,5 +1,6 @@ import { assertEquals } from "../test_deps.ts"; import { + type ExtraNamespace, type SchemaSpec, schemaToScript, } from "../../scripts/schema_to_script.ts"; @@ -16,6 +17,15 @@ const test = (schemas: SchemaSpec[], script: string) => { assertEquals(result, script); }; +const testWithExtras = ( + schemas: SchemaSpec[], + extras: ExtraNamespace[], + script: string, +) => { + const result = schemaToScript(schemas, extras); + assertEquals(result, script); +}; + Deno.test("Scripts / Schema To Script / Empty schema", () => { const schema = { name: "TheSchema", @@ -305,3 +315,231 @@ Deno.test("Scripts / Schema To Script / Property flags", () => { test([schema], script); }); + +Deno.test("Scripts / Schema To Script / Extra namespace emits createNamespace block", () => { + const schema: SchemaSpec = { + name: "PersonSchema", + type: ["http://example.org/vocab#Person"], + properties: { + name: { id: "http://example.org/vocab#name" }, + age: { + id: "http://example.org/vocab#age", + type: "http://www.w3.org/2001/XMLSchema#integer", + }, + }, + }; + + const extras: ExtraNamespace[] = [ + { iri: "http://example.org/vocab#", prefix: "ex" }, + ]; + + const script = s` + import { createNamespace } from "ldkit"; + import { xsd } from "ldkit/namespaces"; + + export const ex = createNamespace( + { + iri: "http://example.org/vocab#", + prefix: "ex:", + terms: [ + "Person", + "age", + "name", + ], + } as const, + ); + + export const PersonSchema = { + "@type": ex.Person, + name: ex.name, + age: { + "@id": ex.age, + "@type": xsd.integer, + }, + } as const; + `; + + testWithExtras([schema], extras, script); +}); + +Deno.test("Scripts / Schema To Script / Extra namespace bracket access for non-identifier local parts", () => { + // Hyphens, dots, etc. are valid in IRI local parts but invalid in TS dot + // access. The printer must use bracket access for those. + const schema: SchemaSpec = { + name: "AdSchema", + type: ["https://ex.org/Ad-Type"], + properties: { + ref: { id: "https://ex.org/foo-bar" }, + }, + }; + + const extras: ExtraNamespace[] = [ + { iri: "https://ex.org/", prefix: "ex" }, + ]; + + const script = s` + import { createNamespace } from "ldkit"; + + export const ex = createNamespace( + { + iri: "https://ex.org/", + prefix: "ex:", + terms: [ + "Ad-Type", + "foo-bar", + ], + } as const, + ); + + export const AdSchema = { + "@type": ex["Ad-Type"], + ref: ex["foo-bar"], + } as const; + `; + + testWithExtras([schema], extras, script); +}); + +Deno.test("Scripts / Schema To Script / Extra namespaces sorted by IRI length descending", () => { + // When multiple extras share a base IRI prefix, the longest must match + // first so that `https://ex.org/sub/foo` resolves to `exsub.foo`, not + // `ex.sub/foo` (which would be a lexer error). + const schema: SchemaSpec = { + name: "TheSchema", + type: [], + properties: { + a: { id: "https://ex.org/a" }, + b: { id: "https://ex.org/sub/b" }, + }, + }; + + const extras: ExtraNamespace[] = [ + { iri: "https://ex.org/", prefix: "ex" }, + { iri: "https://ex.org/sub/", prefix: "exsub" }, + ]; + + const script = s` + import { createNamespace } from "ldkit"; + + export const exsub = createNamespace( + { + iri: "https://ex.org/sub/", + prefix: "exsub:", + terms: [ + "b", + ], + } as const, + ); + + export const ex = createNamespace( + { + iri: "https://ex.org/", + prefix: "ex:", + terms: [ + "a", + ], + } as const, + ); + + export const TheSchema = { + a: ex.a, + b: exsub.b, + } as const; + `; + + testWithExtras([schema], extras, script); +}); + +Deno.test("Scripts / Schema To Script / Unused extra namespaces are dropped", () => { + // An extra namespace passed in but unreferenced by any schema should NOT + // produce a createNamespace block (would be dead code in the output). + const schema: SchemaSpec = { + name: "TheSchema", + type: [], + properties: { + name: { id: "http://schema.org/name" }, + }, + }; + + const extras: ExtraNamespace[] = [ + { iri: "https://unused.example/", prefix: "unused" }, + ]; + + const script = s` + import { schema } from "ldkit/namespaces"; + + export const TheSchema = { + name: schema.name, + } as const; + `; + + testWithExtras([schema], extras, script); +}); + +Deno.test("Scripts / Schema To Script / Extra namespace shadowing a built-in: built-in import dropped, IRIs under it fall back to literal", () => { + // When a user-defined extra namespace shares its prefix name with an + // LDkit built-in (e.g. user `schema:` for HTTPS schema.org vs LDkit's + // built-in `schema` for HTTP schema.org), the user's prefix wins the + // clean name. The corresponding built-in is NOT imported, so any IRIs + // that would have matched it render as literal strings. + const schema: SchemaSpec = { + name: "TheSchema", + type: ["https://schema.org/Person"], + properties: { + name: { id: "https://schema.org/name" }, // matches user's HTTPS namespace + legacyName: { id: "http://schema.org/name" }, // would match LDkit built-in but built-in is shadowed + }, + }; + + const extras: ExtraNamespace[] = [ + { iri: "https://schema.org/", prefix: "schema" }, + ]; + + const script = s` + import { createNamespace } from "ldkit"; + + export const schema = createNamespace( + { + iri: "https://schema.org/", + prefix: "schema:", + terms: [ + "Person", + "name", + ], + } as const, + ); + + export const TheSchema = { + "@type": schema.Person, + name: schema.name, + legacyName: "http://schema.org/name", + } as const; + `; + + testWithExtras([schema], extras, script); +}); + +Deno.test("Scripts / Schema To Script / IRI not under any namespace falls back to literal", () => { + // If no extra or built-in namespace matches, the IRI is emitted as a raw + // string literal. Confirms existing behavior survives extra-namespace + // handling. + const schema: SchemaSpec = { + name: "TheSchema", + type: [], + properties: { + orphan: { id: "https://nobody.example/foo" }, + }, + }; + + const extras: ExtraNamespace[] = [ + { iri: "https://other.example/", prefix: "other" }, + ]; + + const script = s` + export const TheSchema = { + orphan: "https://nobody.example/foo", + } as const; + `; + + testWithExtras([schema], extras, script); +}); diff --git a/tests/scripts/shacl_to_package.test.ts b/tests/scripts/shacl_to_package.test.ts new file mode 100644 index 0000000..31c85a3 --- /dev/null +++ b/tests/scripts/shacl_to_package.test.ts @@ -0,0 +1,93 @@ +import { assertEquals } from "../test_deps.ts"; +import { shaclToSchema } from "../../scripts/shacl_to_schema.ts"; +import { schemaToPackage } from "../../scripts/schema_to_package.ts"; + +Deno.test("Scripts / SHACL to Package / Two namespaces split into two files", () => { + const input = ` +@prefix ex: . +@prefix gad: . +@prefix sh: . +@prefix xsd: . + +ex:WidgetShape a sh:NodeShape ; + sh:targetClass ex:Widget ; + sh:property [ sh:path ex:label ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . + +gad:GadgetShape a sh:NodeShape ; + sh:targetClass gad:Gadget ; + sh:property [ sh:path gad:size ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const { schemas, extraNamespaces, schemaSourcePrefixes } = shaclToSchema( + input, + { prefixAliases: { ex: "Widget" } }, + ); + const { files } = schemaToPackage(schemas, extraNamespaces, { + prefixAliases: { ex: "Widget" }, + schemaSourcePrefixes, + }); + + assertEquals( + [...files.keys()].toSorted(), + ["gad", "index", "namespaces", "widget"], + ); + + const widget = files.get("widget")!; + const gad = files.get("gad")!; + const namespaces = files.get("namespaces")!; + const index = files.get("index")!; + + if (!widget.includes("WidgetWidgetSchema")) { + throw new Error("widget.ts missing WidgetWidgetSchema:\n" + widget); + } + if (!gad.includes("GadGadgetSchema")) { + throw new Error("gad.ts missing GadGadgetSchema:\n" + gad); + } + if (!widget.includes(`from "./namespaces"`)) { + throw new Error( + "widget.ts must import its namespace const from ./namespaces:\n" + + widget, + ); + } + if (widget.includes('from "./gad"')) { + throw new Error( + "widget.ts should not import from gad (no cross-prefix imports between schema files):\n" + + widget, + ); + } + if (!namespaces.includes(`export const ex = createNamespace`)) { + throw new Error("namespaces.ts missing ex declaration:\n" + namespaces); + } + if (!namespaces.includes(`export const gad = createNamespace`)) { + throw new Error("namespaces.ts missing gad declaration:\n" + namespaces); + } + assertEquals( + index, + `export * from "./gad";\nexport * from "./namespaces";\nexport * from "./widget";\n`, + ); +}); + +Deno.test("Scripts / SHACL to Package / Schemas without source prefix land in fallback file", () => { + const input = ` +@prefix sh: . +@prefix xsd: . + + a sh:NodeShape ; + sh:targetClass ; + sh:property [ sh:path ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const { schemas, extraNamespaces, schemaSourcePrefixes } = shaclToSchema( + input, + ); + const { files } = schemaToPackage(schemas, extraNamespaces, { + schemaSourcePrefixes, + }); + + if (!files.has("_unknown")) { + throw new Error( + "Expected _unknown fallback file. Got: " + + [...files.keys()].join(", "), + ); + } +}); diff --git a/tests/scripts/shacl_to_schema.test.ts b/tests/scripts/shacl_to_schema.test.ts new file mode 100644 index 0000000..0e7d9bd --- /dev/null +++ b/tests/scripts/shacl_to_schema.test.ts @@ -0,0 +1,1072 @@ +import { assertEquals, assertThrows } from "../test_deps.ts"; +import { shaclToSchema } from "../../scripts/shacl_to_schema.ts"; +import { + type ExtraNamespace, + type SchemaSpec, +} from "../../scripts/schema_to_script.ts"; + +const testSchemas = (ttl: string, schemas: SchemaSpec[]) => { + const result = shaclToSchema(ttl); + assertEquals(result.schemas, schemas); +}; + +const testSchema = (ttl: string, schema: SchemaSpec) => { + return testSchemas(ttl, [schema]); +}; + +const PREFIXES = ` +@prefix ex: . +@prefix rdf: . +@prefix rdfs: . +@prefix sh: . +@prefix xsd: . +`; + +Deno.test( + "Scripts / SHACL to Schema / Mixed HTTP and HTTPS schema.org IRIs: user HTTPS wins; HTTP IRIs left raw in IR", + () => { + // Real-world case: a SHACL file declares schema: as HTTPS schema.org + // (modern W3C convention) but also references an HTTP schema.org IRI + // directly via full-URI form. The user's HTTPS prefix wins the clean + // `schema` name in the registered extras. The HTTP IRI lands in the + // schema IR as a raw string — the printer (covered separately) falls + // back to a literal-string emission because LDkit's built-in `schema` + // import (which would have matched the HTTP IRI) is shadowed. + const input = ` +@prefix schema: . +@prefix sh: . +@prefix xsd: . + + a sh:NodeShape ; + sh:targetClass ; + sh:property [ + sh:path schema:name ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:maxCount 1 + ] ; + sh:property [ + sh:path ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const result = shaclToSchema(input); + + assertEquals(result.schemas, [ + { + name: "MixedSchema", + type: ["http://example.org/Mixed"], + properties: { + name: { id: "https://schema.org/name" }, + legacy: { id: "http://schema.org/legacy" }, + }, + }, + ]); + + // Only the user's HTTPS schema.org is registered as an extra. The HTTP + // IRI has no @prefix declaration in the input, so it doesn't appear here. + assertEquals(result.extraNamespaces, [ + { iri: "https://schema.org/", prefix: "schema" }, + ]); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Project namespaces emitted as createNamespace specs", + () => { + // User-declared @prefix declarations whose IRI is not an LDkit built-in + // surface as `extraNamespaces`. Built-in IRIs (xsd, sh) and unused ones + // are filtered out. The user's prefix wins the clean name even if it + // shadows an LDkit built-in's prefix (e.g. `schema:` here shadows LDkit's + // built-in `schema` namespace, which uses HTTP schema.org). + const input = ` +@prefix ex: . +@prefix sub: . +@prefix schema: . +@prefix unused: . +@prefix sh: . +@prefix xsd: . + +ex:ItemShape a sh:NodeShape ; + sh:targetClass ex:Item ; + sh:property [ sh:path ex:label ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] ; + sh:property [ sh:path schema:dateCreated ; sh:datatype xsd:dateTime ; sh:minCount 1 ; sh:maxCount 1 ] ; + sh:property [ sh:path sub:source ; sh:nodeKind sh:IRI ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const result = shaclToSchema(input); + + // Schemas use raw IRIs in the IR; the printer applies the namespace + // prefixes downstream. + assertEquals(result.schemas, [ + { + name: "ExItemSchema", + type: ["http://example.org/vocab#Item"], + properties: { + label: { id: "http://example.org/vocab#label" }, + dateCreated: { + id: "https://schema.org/dateCreated", + type: "http://www.w3.org/2001/XMLSchema#dateTime", + }, + source: { + id: "http://example.org/vocab/sub#source", + type: "@id", + }, + }, + }, + ]); + + // Three project namespaces emitted: `ex`, `sub`, `schema`. Built-in `sh` + // and `xsd` are filtered. `unused` is dropped because no IRI references + // it. `schema` keeps its clean name even though LDkit has a built-in + // namespace by the same name — IRIs under the LDkit built-in fall back + // to literal strings (not exercised here; covered by a printer test). + const expected: ExtraNamespace[] = [ + { iri: "http://example.org/vocab#", prefix: "ex" }, + { iri: "http://example.org/vocab/sub#", prefix: "sub" }, + { iri: "https://schema.org/", prefix: "schema" }, + ]; + assertEquals(result.extraNamespaces, expected); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Malformed Turtle wraps the parser error with context", + () => { + // Without the wrapper, n3's error lands as an unstructured stack trace. + // Wrapping it with a "Failed to parse Turtle" prefix gives users a clear + // signal that the input — not the converter — is the issue. + const input = ` +@prefix ex: . + +ex:PersonShape a sh:NodeShape // <- missing terminating dot +`; + + assertThrows(() => shaclToSchema(input), Error, "Failed to parse Turtle"); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Non-node sh:property values are skipped, others convert", + () => { + // Some real-world SHACL files have malformed sh:property values (typos, + // generated by buggy tools, etc.). Skipping them gracefully — instead of + // crashing the whole conversion — lets the rest of the file succeed. + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property "literal-not-a-property-shape" ; + sh:property [ sh:path ex:name ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + // The literal sh:property value is dropped (with a stderr warning); the + // valid blank-node property still converts. + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + name: { id: "http://example.org/name" }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Missing sh:path error names the enclosing shape", + () => { + // When a property shape has no sh:path, the error message must identify + // *which* shape it belongs to — otherwise debugging a 14k-line SHACL is + // a needle-in-a-haystack. + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + assertThrows(() => shaclToSchema(input), Error, "PersonShape"); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Complex sh:path error names the enclosing shape", + () => { + // Same context-in-error rule for the unsupported-path case. + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ( ex:hop1 ex:hop2 ) ; + sh:datatype xsd:string + ] . +`; + + assertThrows(() => shaclToSchema(input), Error, "PersonShape"); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Single property with default datatype", + () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ex:name ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + name: { id: "http://example.org/name" }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test("Scripts / SHACL to Schema / Datatype mapping", () => { + const input = `${PREFIXES} +ex:ThingShape a sh:NodeShape ; + sh:targetClass ex:Thing ; + sh:property [ sh:path ex:age ; sh:datatype xsd:integer ; sh:minCount 1 ; sh:maxCount 1 ] ; + sh:property [ sh:path ex:active ; sh:datatype xsd:boolean ; sh:minCount 1 ; sh:maxCount 1 ] ; + sh:property [ sh:path ex:born ; sh:datatype xsd:date ; sh:minCount 1 ; sh:maxCount 1 ] ; + sh:property [ sh:path ex:created ; sh:datatype xsd:dateTime ; sh:minCount 1 ; sh:maxCount 1 ] ; + sh:property [ sh:path ex:price ; sh:datatype xsd:decimal ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const schema: SchemaSpec = { + name: "ExThingSchema", + type: ["http://example.org/Thing"], + properties: { + age: { + id: "http://example.org/age", + type: "http://www.w3.org/2001/XMLSchema#integer", + }, + active: { + id: "http://example.org/active", + type: "http://www.w3.org/2001/XMLSchema#boolean", + }, + born: { + id: "http://example.org/born", + type: "http://www.w3.org/2001/XMLSchema#date", + }, + created: { + id: "http://example.org/created", + type: "http://www.w3.org/2001/XMLSchema#dateTime", + }, + price: { + id: "http://example.org/price", + type: "http://www.w3.org/2001/XMLSchema#decimal", + }, + }, + }; + + testSchema(input, schema); +}); + +Deno.test( + "Scripts / SHACL to Schema / Optional property when minCount is missing", + () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ sh:path ex:nickname ; sh:datatype xsd:string ; sh:maxCount 1 ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + nickname: { + id: "http://example.org/nickname", + optional: true, + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Array when maxCount is unbounded", + () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ sh:path ex:tag ; sh:datatype xsd:string ; sh:minCount 1 ] ; + sh:property [ sh:path ex:alias ; sh:datatype xsd:string ; sh:maxCount 5 ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + tag: { + id: "http://example.org/tag", + array: true, + }, + alias: { + id: "http://example.org/alias", + optional: true, + array: true, + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test("Scripts / SHACL to Schema / IRI reference via sh:nodeKind", () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ sh:path ex:homepage ; sh:nodeKind sh:IRI ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + homepage: { + id: "http://example.org/homepage", + type: "@id", + }, + }, + }; + + testSchema(input, schema); +}); + +Deno.test("Scripts / SHACL to Schema / Multilang via rdf:langString", () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ sh:path ex:bio ; sh:datatype rdf:langString ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + bio: { + id: "http://example.org/bio", + multilang: true, + }, + }, + }; + + testSchema(input, schema); +}); + +Deno.test("Scripts / SHACL to Schema / Nested shape via sh:node", () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ex:address ; + sh:node ex:AddressShape ; + sh:minCount 1 ; + sh:maxCount 1 + ] . + +ex:AddressShape a sh:NodeShape ; + sh:targetClass ex:Address ; + sh:property [ sh:path ex:street ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const personSchema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + address: { + id: "http://example.org/address", + type: "@id", + }, + }, + }; + + const addressSchema: SchemaSpec = { + name: "ExAddressSchema", + type: ["http://example.org/Address"], + properties: { + street: { id: "http://example.org/street" }, + }, + }; + + testSchemas(input, [personSchema, addressSchema]); +}); + +Deno.test( + "Scripts / SHACL to Schema / Shape without targetClass uses shape IRI as type", + () => { + const input = `${PREFIXES} +ex:Memory a rdfs:Class, sh:NodeShape ; + sh:property [ sh:path ex:label ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const schema: SchemaSpec = { + name: "ExMemorySchema", + type: ["http://example.org/Memory"], + properties: { + label: { id: "http://example.org/label" }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test("Scripts / SHACL to Schema / Multiple shapes in one file", () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ sh:path ex:name ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . + +ex:CompanyShape a sh:NodeShape ; + sh:targetClass ex:Company ; + sh:property [ sh:path ex:name ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const personSchema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + name: { id: "http://example.org/name" }, + }, + }; + + const companySchema: SchemaSpec = { + name: "ExCompanySchema", + type: ["http://example.org/Company"], + properties: { + name: { id: "http://example.org/name" }, + }, + }; + + testSchemas(input, [personSchema, companySchema]); +}); + +Deno.test( + "Scripts / SHACL to Schema / Realistic shape with mixed property kinds", + () => { + // Exercises a single shape combining: an rdfs:label literal property with + // default xsd:string, an xsd:dateTime literal, an IRI-kind reference, a + // nested-shape reference, and an unbounded-cardinality string array. + const input = `${PREFIXES} +@prefix schema: . + +ex:ItemShape a sh:NodeShape ; + sh:targetClass ex:Item ; + sh:property [ + sh:path rdfs:label ; + sh:datatype xsd:string ; + sh:maxCount 1 + ] ; + sh:property [ + sh:path schema:dateModified ; + sh:datatype xsd:dateTime ; + sh:maxCount 1 + ] ; + sh:property [ + sh:path ex:status ; + sh:nodeKind sh:IRI ; + sh:maxCount 1 + ] ; + sh:property [ + sh:path ex:category ; + sh:node ex:CategoryShape ; + sh:maxCount 1 + ] ; + sh:property [ + sh:path ex:tags ; + sh:datatype xsd:string + ] . + +ex:CategoryShape a sh:NodeShape ; + sh:targetClass ex:Category ; + sh:property [ sh:path rdfs:label ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const itemSchema: SchemaSpec = { + name: "ExItemSchema", + type: ["http://example.org/Item"], + properties: { + label: { + id: "http://www.w3.org/2000/01/rdf-schema#label", + optional: true, + }, + dateModified: { + id: "https://schema.org/dateModified", + type: "http://www.w3.org/2001/XMLSchema#dateTime", + optional: true, + }, + status: { + id: "http://example.org/status", + type: "@id", + optional: true, + }, + category: { + id: "http://example.org/category", + type: "@id", + optional: true, + }, + tags: { + id: "http://example.org/tags", + optional: true, + array: true, + }, + }, + }; + + const categorySchema: SchemaSpec = { + name: "ExCategorySchema", + type: ["http://example.org/Category"], + properties: { + label: { id: "http://www.w3.org/2000/01/rdf-schema#label" }, + }, + }; + + testSchemas(input, [itemSchema, categorySchema]); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / sh:or of numeric datatypes picks widest", + () => { + const input = `${PREFIXES} +ex:ProductShape a sh:NodeShape ; + sh:targetClass ex:Product ; + sh:property [ + sh:path ex:price ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:or ( + [ sh:datatype xsd:integer ] + [ sh:datatype xsd:decimal ] + [ sh:datatype xsd:double ] + ) + ] . +`; + + const schema: SchemaSpec = { + name: "ExProductSchema", + type: ["http://example.org/Product"], + properties: { + price: { + id: "http://example.org/price", + type: "http://www.w3.org/2001/XMLSchema#decimal", + optional: true, + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / sh:or of sh:node refs reduces to untyped IRI", + () => { + const input = `${PREFIXES} +ex:AdShape a sh:NodeShape ; + sh:targetClass ex:Ad ; + sh:property [ + sh:path ex:creative ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:or ( + [ sh:node ex:ImageShape ] + [ sh:node ex:VideoShape ] + ) + ] . + +ex:ImageShape a sh:NodeShape ; + sh:targetClass ex:Image ; + sh:property [ sh:path ex:url ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . + +ex:VideoShape a sh:NodeShape ; + sh:targetClass ex:Video ; + sh:property [ sh:path ex:url ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const adSchema: SchemaSpec = { + name: "ExAdSchema", + type: ["http://example.org/Ad"], + properties: { + creative: { + id: "http://example.org/creative", + type: "@id", + optional: true, + }, + }, + }; + + const imageSchema: SchemaSpec = { + name: "ExImageSchema", + type: ["http://example.org/Image"], + properties: { url: { id: "http://example.org/url" } }, + }; + + const videoSchema: SchemaSpec = { + name: "ExVideoSchema", + type: ["http://example.org/Video"], + properties: { url: { id: "http://example.org/url" } }, + }; + + testSchemas(input, [adSchema, imageSchema, videoSchema]); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / sh:or of validation-only branches drops to plain default", + () => { + const input = `${PREFIXES} +ex:LinkShape a sh:NodeShape ; + sh:targetClass ex:Link ; + sh:property [ + sh:path ex:href ; + sh:minCount 1 ; + sh:maxCount 1 ; + sh:or ( + [ sh:maxLength 3 ] + [ sh:pattern "^https?://" ] + ) + ] . +`; + + const schema: SchemaSpec = { + name: "ExLinkSchema", + type: ["http://example.org/Link"], + properties: { + href: { + id: "http://example.org/href", + optional: true, + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / sh:and merges branch constraints", + () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ex:age ; + sh:and ( + [ sh:datatype xsd:integer ] + [ sh:minCount 1 ] + ) ; + sh:maxCount 1 + ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + age: { + id: "http://example.org/age", + type: "http://www.w3.org/2001/XMLSchema#integer", + // sh:and merged the datatype constraint, but sh:minCount lives only + // inside the sh:and branch — top-level minCount is missing, so the + // property remains optional. (Validators would still enforce it.) + optional: true, + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test("Scripts / SHACL to Schema / sh:not is silently ignored", () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ex:age ; + sh:datatype xsd:integer ; + sh:not [ sh:hasValue 0 ] ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + age: { + id: "http://example.org/age", + type: "http://www.w3.org/2001/XMLSchema#integer", + }, + }, + }; + + testSchema(input, schema); +}); + +Deno.test( + "Scripts / SHACL to Schema / sh:in with string values yields default string type", + () => { + const input = `${PREFIXES} +ex:TaskShape a sh:NodeShape ; + sh:targetClass ex:Task ; + sh:property [ + sh:path ex:status ; + sh:in ( "active" "paused" "deleted" ) ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const schema: SchemaSpec = { + name: "ExTaskSchema", + type: ["http://example.org/Task"], + properties: { + status: { id: "http://example.org/status" }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / sh:in with IRI values yields IRI reference", + () => { + const input = `${PREFIXES} +ex:TaskShape a sh:NodeShape ; + sh:targetClass ex:Task ; + sh:property [ + sh:path ex:state ; + sh:in ( ex:Active ex:Paused ex:Deleted ) ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const schema: SchemaSpec = { + name: "ExTaskSchema", + type: ["http://example.org/Task"], + properties: { + state: { + id: "http://example.org/state", + type: "@id", + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Self-referential sh:node falls back to untyped IRI", + () => { + // A Person shape with a `friend` property that points back at PersonShape + // would create a circular schema dependency that LDkit's printer cannot + // emit. The converter should detect the self-reference and fall back to + // an untyped IRI reference rather than throwing. + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path ex:friend ; + sh:node ex:PersonShape + ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + friend: { + id: "http://example.org/friend", + type: "@id", + optional: true, + array: true, + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Schema names with hyphens are sanitized for TS", + () => { + // SHACL local parts with hyphens (or other non-identifier chars) must not + // bleed into TypeScript const names. `Foo-Bar` becomes `Foo_Bar`. + const input = `${PREFIXES} +ex:FacebookCarouselCard-InputShape a sh:NodeShape ; + sh:targetClass ex:FacebookCarouselCard-Input ; + sh:property [ sh:path ex:asset ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const schema: SchemaSpec = { + name: "ExFacebookCarouselCard_InputSchema", + type: ["http://example.org/FacebookCarouselCard-Input"], + properties: { + asset: { id: "http://example.org/asset" }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Merging sh:nodeKind sh:IRI with sh:node yields type: @id", + () => { + // Some SHACL files declare multiple sh:property shapes on the same path, + // one with `sh:nodeKind sh:IRI` and another with `sh:node X`. Both + // produce `type: "@id"` by default; the merge must keep `type: "@id"`. + const input = `${PREFIXES} +ex:SummaryShape a sh:NodeShape ; + sh:targetClass ex:Summary ; + sh:property [ sh:path ex:campaign ; sh:nodeKind sh:IRI ; sh:maxCount 1 ] ; + sh:property [ sh:path ex:campaign ; sh:node ex:CampaignShape ; sh:maxCount 1 ] . + +ex:CampaignShape a sh:NodeShape ; + sh:targetClass ex:Campaign ; + sh:property [ sh:path ex:label ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const summarySchema: SchemaSpec = { + name: "ExSummarySchema", + type: ["http://example.org/Summary"], + properties: { + campaign: { + id: "http://example.org/campaign", + type: "@id", + optional: true, + }, + }, + }; + + const campaignSchema: SchemaSpec = { + name: "ExCampaignSchema", + type: ["http://example.org/Campaign"], + properties: { + label: { id: "http://example.org/label" }, + }, + }; + + testSchemas(input, [summarySchema, campaignSchema]); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / Multiple property shapes on same path are merged", + () => { + // SHACL semantics: each sh:property is independently applied (AND). + // The converter merges them into a single property spec to fit LDkit's + // one-slot-per-property model. + const input = `${PREFIXES} +ex:ReportShape a sh:NodeShape ; + sh:targetClass ex:Report ; + sh:property [ + sh:path ex:value ; + sh:datatype xsd:integer + ] ; + sh:property [ + sh:path ex:value ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const schema: SchemaSpec = { + name: "ExReportSchema", + type: ["http://example.org/Report"], + properties: { + value: { + id: "http://example.org/value", + type: "http://www.w3.org/2001/XMLSchema#integer", + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / sh:inversePath sets inverse flag", + () => { + const input = `${PREFIXES} +ex:PersonShape a sh:NodeShape ; + sh:targetClass ex:Person ; + sh:property [ + sh:path [ sh:inversePath ex:parent ] ; + sh:nodeKind sh:IRI + ] . +`; + + const schema: SchemaSpec = { + name: "ExPersonSchema", + type: ["http://example.org/Person"], + properties: { + parent: { + id: "http://example.org/parent", + type: "@id", + inverse: true, + optional: true, + array: true, + }, + }, + }; + + testSchema(input, schema); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / prefixAliases option renames matching prefix in generated names", + () => { + const input = ` +@prefix m: . +@prefix sh: . +@prefix xsd: . + +m:CampaignShape a sh:NodeShape ; + sh:targetClass m:Campaign ; + sh:property [ + sh:path m:name ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const result = shaclToSchema(input, { prefixAliases: { m: "Test" } }); + + assertEquals(result.schemas, [ + { + name: "TestCampaignSchema", + type: ["https://example.com/vocab#Campaign"], + properties: { + name: { id: "https://example.com/vocab#name" }, + }, + }, + ]); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / prefixAliases empty/unset preserves capitalize-prefix default", + () => { + const input = ` +@prefix m: . +@prefix sh: . +@prefix xsd: . + +m:CampaignShape a sh:NodeShape ; + sh:targetClass m:Campaign ; + sh:property [ + sh:path m:name ; + sh:datatype xsd:string ; + sh:minCount 1 ; + sh:maxCount 1 + ] . +`; + + const result = shaclToSchema(input); + + assertEquals(result.schemas[0].name, "MCampaignSchema"); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / prefixAliases only applies to matching prefix; others unchanged", + () => { + const input = ` +@prefix m: . +@prefix ex: . +@prefix sh: . +@prefix xsd: . + +m:WidgetShape a sh:NodeShape ; + sh:targetClass m:Widget ; + sh:property [ sh:path m:name ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . + +ex:GadgetShape a sh:NodeShape ; + sh:targetClass ex:Gadget ; + sh:property [ sh:path ex:label ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const result = shaclToSchema(input, { prefixAliases: { m: "Example" } }); + + const names = result.schemas.map((s) => s.name).sort(); + assertEquals(names, ["ExGadgetSchema", "ExampleWidgetSchema"]); + }, +); + +Deno.test( + "Scripts / SHACL to Schema / sh:node never emits schemaRef by default; emits type: @id", + () => { + const input = `${PREFIXES} +ex:OrderShape a sh:NodeShape ; + sh:targetClass ex:Order ; + sh:property [ + sh:path ex:customer ; + sh:node ex:CustomerShape ; + sh:minCount 1 ; + sh:maxCount 1 + ] ; + sh:property [ + sh:path ex:item ; + sh:class ex:Item ; + sh:maxCount 1 + ] . + +ex:CustomerShape a sh:NodeShape ; + sh:targetClass ex:Customer ; + sh:property [ sh:path ex:name ; sh:datatype xsd:string ; sh:minCount 1 ; sh:maxCount 1 ] . +`; + + const result = shaclToSchema(input); + const order = result.schemas.find((s) => s.name === "ExOrderSchema")!; + + assertEquals(order.properties.customer, { + id: "http://example.org/customer", + type: "@id", + }); + assertEquals(order.properties.item, { + id: "http://example.org/item", + type: "@id", + optional: true, + }); + + for (const schema of result.schemas) { + for (const [key, prop] of Object.entries(schema.properties)) { + if (prop.schemaRef !== undefined) { + throw new Error( + `Expected no schemaRef in default output, but ${schema.name}.${key} has schemaRef=${prop.schemaRef}`, + ); + } + } + } + }, +);