Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 99 additions & 1 deletion cli.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import { argv } from "node:process";
import { styleText } from "node:util";
import { readFileSync } from "node:fs";
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { Argument, Command } from "npm:commander@^13.1.0";

import { contextToSchema } from "./scripts/context_to_schema.ts";
import { shexcToSchema, shexjToSchema } from "./scripts/shex_to_schema.ts";
import { shaclToSchema } from "./scripts/shacl_to_schema.ts";
import { schemaToScript } from "./scripts/schema_to_script.ts";
import { schemaToPackage } from "./scripts/schema_to_package.ts";

const asciiArt = String.raw`
_ ____ _ _ _
Expand Down Expand Up @@ -95,6 +98,82 @@ program.command("shexj-to-schema")
}
});

program.command("shacl-to-schema")
.description(
"Convert a SHACL shapes graph from a file or URL to a LDkit schema",
)
.addArgument(
new Argument("<method>", "type of input").choices([
"url",
"file",
"arg",
]),
)
.argument("<input>", "input SHACL Turtle - file, URL, or string")
.option(
"--prefix-alias <mapping>",
"rename a SHACL prefix in generated schema names (format: prefix=Alias). Repeatable.",
(value: string, previous: string[]) => [...previous, value],
[] as string[],
)
.action(async (method, input, opts: { prefixAlias?: string[] }) => {
try {
const resolvedInput = await resolve(method, input);
const prefixAliases = parsePrefixAliases(opts.prefixAlias);
const { schemas, extraNamespaces } = shaclToSchema(resolvedInput, {
prefixAliases,
});
console.log(schemaToScript(schemas, extraNamespaces));
} catch (error: unknown) {
console.error(styleText("red", `${(error as Error).message}`));
}
});

program.command("shacl-to-package")
.description(
"Convert a SHACL shapes graph into a directory of per-namespace LDkit schema files (one .ts per prefix plus an index.ts barrel)",
)
.addArgument(
new Argument("<method>", "type of input").choices([
"url",
"file",
"arg",
]),
)
.argument("<input>", "input SHACL Turtle - file, URL, or string")
.argument("<outDir>", "output directory for the generated package")
.option(
"--prefix-alias <mapping>",
"rename a SHACL prefix in generated schema names AND in the per-namespace file name (format: prefix=Alias). Repeatable.",
(value: string, previous: string[]) => [...previous, value],
[] as string[],
)
.action(
async (
method,
input,
outDir,
opts: { prefixAlias?: string[] },
) => {
try {
const resolvedInput = await resolve(method, input);
const prefixAliases = parsePrefixAliases(opts.prefixAlias);
const { schemas, extraNamespaces, schemaSourcePrefixes } =
shaclToSchema(resolvedInput, { prefixAliases });
const { files } = schemaToPackage(schemas, extraNamespaces, {
prefixAliases,
schemaSourcePrefixes,
});
mkdirSync(outDir, { recursive: true });
for (const [base, contents] of files) {
writeFileSync(join(outDir, `${base}.ts`), contents);
}
} catch (error: unknown) {
console.error(styleText("red", `${(error as Error).message}`));
}
},
);

// Check if no arguments were provided
if (argv.length <= 2) {
console.log(styleText("red", asciiArt));
Expand All @@ -103,6 +182,25 @@ if (argv.length <= 2) {
program.parse(argv);
}

function parsePrefixAliases(
pairs: string[] | undefined,
): Record<string, string> {
if (!pairs || pairs.length === 0) return {};
const result: Record<string, string> = {};
for (const pair of pairs) {
const eq = pair.indexOf("=");
if (eq <= 0 || eq === pair.length - 1) {
throw new Error(
`Invalid --prefix-alias value "${pair}" (expected format: prefix=Alias)`,
);
}
const prefix = pair.substring(0, eq);
const alias = pair.substring(eq + 1);
result[prefix] = alias;
}
return result;
}

async function resolve(method: string, input: string): Promise<string> {
if (method === "url") {
try {
Expand Down
67 changes: 60 additions & 7 deletions docs/components/schema-generators.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
LDkit provides experimental schema generators that help you convert existing
Linked Data definitions into TypeScript schemas compatible with LDkit. These
tools are available via the LDkit CLI and support generating code directly from
[JSON-LD contexts](https://www.w3.org/TR/json-ld11/), or
[ShEx shapes](https://shex.io/).
[JSON-LD contexts](https://www.w3.org/TR/json-ld11/),
[ShEx shapes](https://shex.io/), or
[SHACL shapes](https://www.w3.org/TR/shacl/).

> ⚠️ **Note:** These generators are experimental and currently support only a
> subset of the respective technologies. Manual review and adjustments of the
Expand Down Expand Up @@ -61,14 +62,66 @@ features are the same as for ShExC.
npx ldkit shexj-to-schema url https://ldkit.io/examples/person.shex.jsonld
```

### 4. `shacl-to-schema`

Converts a [SHACL](https://www.w3.org/TR/shacl/) shapes graph (Turtle) into an
LDkit TypeScript schema.

Supported SHACL features:

- **Project namespace generation** — every `@prefix` declaration in the source
whose IRI is not an LDkit built-in is re-emitted at the top of the generated
file as a `createNamespace()` call, and IRIs under that prefix render as e.g.
`ex.totalRevenue` instead of raw IRI strings. When a user-declared prefix
shadows an LDkit built-in's name (e.g. `schema` for `https://schema.org/` vs
LDkit's built-in `http://schema.org/`), the user's prefix wins the clean
variable name; the built-in is not imported and IRIs under it fall back to
literal strings.
- `sh:NodeShape` discovery (named shapes only)
- `sh:targetClass` mapped to schema `@type` (multiple targets allowed)
- shapes that are also `rdfs:Class` use the shape IRI as `@type`
- `sh:property` shapes (named or blank node) with simple `sh:path` IRIs
- `sh:inversePath` mapped to `@inverse`
- `sh:datatype` mapped to property `@type` (XSD datatypes)
- `sh:nodeKind sh:IRI` mapped to IRI references (`ldkit.IRI`)
- `sh:node` and `sh:class` mapped to nested schema references
- `sh:datatype rdf:langString` and `sh:uniqueLang true` mapped to `@multilang`
- cardinality via `sh:minCount` / `sh:maxCount` mapped to `@optional` / `@array`
- simplified `sh:and` / `sh:or` shapes logic (mirrors `shexc-to-schema`):
- `sh:and` branches are merged into the same property spec (last-wins for
conflicting fields)
- `sh:or` of numeric datatypes is reduced to the widest type
- `sh:or` of identical datatypes uses that datatype
- `sh:or` of `sh:node` / `sh:class` alternatives is reduced to an untyped IRI
reference
- `sh:or` of mixed or unrepresentable branches drops the type and the property
is marked `@optional`
- `sh:in` enumerations use the type of the first list element (no TypeScript
literal union — runtime cannot enforce it)
- `sh:not` and validation-only constraints (`sh:minLength`, `sh:maxLength`,
`sh:pattern`, `sh:hasValue`, `sh:minInclusive`, etc.) are silently ignored,
since LDkit's schema is for querying rather than validation

Unsupported (the converter throws a clear error if encountered):

- complex `sh:path` expressions other than `sh:inversePath` (sequence,
alternative, zero-or-more)

Manual review of the generated schema is recommended, especially after `sh:or`
reduction.

```bash
npx ldkit shacl-to-schema file ./shapes.ttl
```

## Command Syntax

```bash
npx ldkit <command> <method> <input>
```

- `<command>`: One of `context-to-schema`, `shexc-to-schema`, or
`shexj-to-schema`.
- `<command>`: One of `context-to-schema`, `shexc-to-schema`, `shexj-to-schema`,
or `shacl-to-schema`.

- `<method>`: Defines how the input is provided. Possible values:

Expand Down Expand Up @@ -113,9 +166,9 @@ ldkit context-to-schema file ./person.jsonld > person.ts

## Limitations

The generators do not fully cover all features of JSON-LD or ShEx. Complex
validation rules, advanced constraints, and some specialized constructs may be
omitted or simplified.
The generators do not fully cover all features of JSON-LD, ShEx, or SHACL.
Complex validation rules, advanced constraints, and some specialized constructs
may be omitted or simplified.

Manual post-processing of the generated schemas may be necessary for production
use.
153 changes: 153 additions & 0 deletions scripts/schema_to_package.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import {
type ExtraNamespace,
type SchemaSpec,
schemaToScript,
} from "./schema_to_script.ts";

export type SchemaToPackageOptions = {
prefixAliases?: Record<string, string>;
schemaSourcePrefixes?: Map<string, string>;
};

export type SchemaPackage = {
files: Map<string, string>;
};

const FALLBACK_FILE = "_unknown";
const NAMESPACES_FILE = "namespaces";

export function schemaToPackage(
schemas: SchemaSpec[],
extraNamespaces: ExtraNamespace[] = [],
options: SchemaToPackageOptions = {},
): SchemaPackage {
const aliases = options.prefixAliases ?? {};
const sourcePrefixes = options.schemaSourcePrefixes ?? new Map();
const fileForPrefix = (prefix: string | undefined): string => {
if (!prefix) return FALLBACK_FILE;
const aliased = aliases[prefix];
return (aliased ?? prefix).toLowerCase();
};

const groups = new Map<string, SchemaSpec[]>();
const schemaLocations = new Map<string, string>();
for (const schema of schemas) {
const file = fileForPrefix(sourcePrefixes.get(schema.name));
schemaLocations.set(schema.name, file);
let bucket = groups.get(file);
if (!bucket) {
bucket = [];
groups.set(file, bucket);
}
bucket.push(schema);
}

const extraNamespaceTermsOverride = collectGlobalNamespaceTerms(
extraNamespaces,
schemas,
);

const files = new Map<string, string>();
for (
const [file, fileSchemas] of [...groups.entries()].toSorted(([a], [b]) =>
a.localeCompare(b)
)
) {
const scopedExtras = filterExtras(extraNamespaces, fileSchemas);
const contents = schemaToScript(fileSchemas, scopedExtras, {
schemaLocations,
currentFile: file,
extraNamespacesImportFrom: NAMESPACES_FILE,
});
files.set(file, contents);
}

if (extraNamespaces.length > 0) {
files.set(
NAMESPACES_FILE,
buildNamespacesFile(extraNamespaces, extraNamespaceTermsOverride),
);
}
files.set("index", buildIndex(files));
return { files };
}

function buildNamespacesFile(
extras: ExtraNamespace[],
termsByPrefix: Map<string, Set<string>>,
): string {
const lines: string[] = [`import { createNamespace } from "ldkit";`, ""];
const sorted = [...extras].sort((a, b) => a.prefix.localeCompare(b.prefix));
for (const ns of sorted) {
const terms = [...(termsByPrefix.get(ns.prefix) ?? new Set<string>())]
.toSorted();
lines.push(`export const ${ns.prefix} = createNamespace(`);
lines.push(` {`);
lines.push(` iri: ${JSON.stringify(ns.iri)},`);
lines.push(` prefix: ${JSON.stringify(`${ns.prefix}:`)},`);
lines.push(` terms: [`);
for (const term of terms) {
lines.push(` ${JSON.stringify(term)},`);
}
lines.push(` ],`);
lines.push(` } as const,`);
lines.push(`);`);
lines.push("");
}
return lines.join("\n");
}

function filterExtras(
extras: ExtraNamespace[],
schemas: SchemaSpec[],
): ExtraNamespace[] {
if (extras.length === 0) return [];
const usedIris = collectUsedIris(schemas);
return extras.filter((ns) =>
[...usedIris].some((iri) => iri.startsWith(ns.iri))
);
}

function collectUsedIris(schemas: SchemaSpec[]): Set<string> {
const used = new Set<string>();
for (const schema of schemas) {
for (const t of schema.type) used.add(t);
for (const prop of Object.values(schema.properties)) {
used.add(prop.id);
if (prop.type) used.add(prop.type);
if (prop.schema) {
for (const iri of collectUsedIris([prop.schema])) used.add(iri);
}
}
}
return used;
}

function collectGlobalNamespaceTerms(
extras: ExtraNamespace[],
schemas: SchemaSpec[],
): Map<string, Set<string>> {
const sortedExtras = [...extras].sort((a, b) => b.iri.length - a.iri.length);
const result = new Map<string, Set<string>>();
const allIris = collectUsedIris(schemas);
for (const iri of allIris) {
for (const ns of sortedExtras) {
if (iri.startsWith(ns.iri)) {
const term = iri.substring(ns.iri.length);
let bucket = result.get(ns.prefix);
if (!bucket) {
bucket = new Set();
result.set(ns.prefix, bucket);
}
bucket.add(term);
break;
}
}
}
return result;
}

function buildIndex(files: Map<string, string>): string {
const names = [...files.keys()].filter((f) => f !== "index").toSorted();
return names.map((name) => `export * from "./${name}";`).join("\n") + "\n";
}
Loading
Loading