diff --git a/data-code-hosting-manifest-guard/package.json b/data-code-hosting-manifest-guard/package.json
new file mode 100644
index 00000000..22c04385
--- /dev/null
+++ b/data-code-hosting-manifest-guard/package.json
@@ -0,0 +1,11 @@
+{
+ "name": "data-code-hosting-manifest-guard",
+ "version": "1.0.0",
+ "private": true,
+ "type": "module",
+ "scripts": {
+ "check": "node --check src/index.js && node --check scripts/demo.js && node --check test/index.test.js",
+ "test": "node --test test/index.test.js",
+ "demo": "node scripts/demo.js"
+ }
+}
diff --git a/data-code-hosting-manifest-guard/readme.md b/data-code-hosting-manifest-guard/readme.md
new file mode 100644
index 00000000..c2fa9062
--- /dev/null
+++ b/data-code-hosting-manifest-guard/readme.md
@@ -0,0 +1,26 @@
+# Data and Code Hosting Manifest Guard
+
+This module is a focused implementation slice for SCIBASE issue #14, Scientific/Engineering Data & Code Hosting.
+
+It evaluates a scientific project repository manifest before a data/code release is published. The guard checks:
+
+- FAIR-style metadata presence, including DOI, license, authors, keywords, and schema version.
+- Dataset and executable artifact registration.
+- SHA-256 artifact hash locks.
+- Dataset metadata schemas and code runtime declarations.
+- Visibility, embargo release dates, and access policy shape.
+- Executable environment definitions.
+- Reproducibility commands and output digests.
+- Versioning strategy, release tag, and diffable review paths.
+
+The implementation is dependency-free and uses synthetic manifests only. It does not call external services, read private projects, mutate repositories, issue DOIs, upload files, or contact storage providers.
+
+## Commands
+
+```bash
+npm run check
+npm test
+npm run demo
+```
+
+`npm run demo` writes a JSON packet, Markdown report, and SVG summary under `reports/`.
diff --git a/data-code-hosting-manifest-guard/reports/manifest-guard-packet.json b/data-code-hosting-manifest-guard/reports/manifest-guard-packet.json
new file mode 100644
index 00000000..9829b697
--- /dev/null
+++ b/data-code-hosting-manifest-guard/reports/manifest-guard-packet.json
@@ -0,0 +1,123 @@
+{
+ "generatedAt": "2026-06-04T00:00:00.000Z",
+ "readyResult": {
+ "status": "ready_for_repository_release",
+ "summary": {
+ "artifactCount": 2,
+ "datasetCount": 1,
+ "codeCount": 1,
+ "blockerCount": 0,
+ "warningCount": 0
+ },
+ "blockers": [],
+ "warnings": [],
+ "auditDigest": "b00f635cbe73f436a63d6338a7b6fe87dad6a923c75943df5d0e2b33c704e44a"
+ },
+ "blockedResult": {
+ "status": "block_publication",
+ "summary": {
+ "artifactCount": 1,
+ "datasetCount": 1,
+ "codeCount": 0,
+ "blockerCount": 10,
+ "warningCount": 5
+ },
+ "blockers": [
+ {
+ "code": "missing_metadata",
+ "message": "Required metadata field 'authors' is missing.",
+ "evidence": "authors"
+ },
+ {
+ "code": "missing_metadata",
+ "message": "Required metadata field 'license' is missing.",
+ "evidence": "license"
+ },
+ {
+ "code": "missing_metadata",
+ "message": "Required metadata field 'doi' is missing.",
+ "evidence": "doi"
+ },
+ {
+ "code": "missing_metadata",
+ "message": "Required metadata field 'keywords' is missing.",
+ "evidence": "keywords"
+ },
+ {
+ "code": "missing_metadata",
+ "message": "Required metadata field 'schemaVersion' is missing.",
+ "evidence": "schemaVersion"
+ },
+ {
+ "code": "invalid_artifact_hash",
+ "message": "Artifact 'unhashed-export' does not contain a valid SHA-256 hash lock.",
+ "evidence": "missing"
+ },
+ {
+ "code": "embargo_without_release_date",
+ "message": "Embargoed repositories need an explicit release date.",
+ "evidence": {
+ "visibility": "embargoed"
+ }
+ },
+ {
+ "code": "missing_execution_environment",
+ "message": "Executable repository hosting needs a Dockerfile, environment.yml, notebook kernel, or equivalent runtime definition.",
+ "evidence": {
+ "kind": "docker",
+ "definition": ""
+ }
+ },
+ {
+ "code": "missing_reproducibility_commands",
+ "message": "Repository needs at least one command that reproduces or verifies the hosted outputs.",
+ "evidence": {
+ "commands": []
+ }
+ },
+ {
+ "code": "missing_versioning_policy",
+ "message": "Repository needs explicit versioning strategy and current tag metadata.",
+ "evidence": {
+ "strategy": "",
+ "currentTag": ""
+ }
+ }
+ ],
+ "warnings": [
+ {
+ "code": "missing_code_artifact",
+ "message": "No executable code, notebook, package, or model artifact is registered.",
+ "evidence": [
+ "dataset"
+ ]
+ },
+ {
+ "code": "dataset_schema_missing",
+ "message": "Dataset 'unhashed-export' does not declare a metadata schema.",
+ "evidence": "unhashed-export"
+ },
+ {
+ "code": "docker_definition_unclear",
+ "message": "Docker runtime is declared without a Dockerfile-style definition pointer.",
+ "evidence": ""
+ },
+ {
+ "code": "missing_output_digest",
+ "message": "Reproducibility command has no expected output digest.",
+ "evidence": {
+ "commands": []
+ }
+ },
+ {
+ "code": "missing_diffable_paths",
+ "message": "No diffable paths are declared for dataset/code review.",
+ "evidence": {
+ "strategy": "",
+ "currentTag": ""
+ }
+ }
+ ],
+ "auditDigest": "777f3022d220fed8225ca1f9586d09274e35b149e2cd764a753769c72c8dd951"
+ }
+}
diff --git a/data-code-hosting-manifest-guard/reports/manifest-guard-report.md b/data-code-hosting-manifest-guard/reports/manifest-guard-report.md
new file mode 100644
index 00000000..d9f7ee24
--- /dev/null
+++ b/data-code-hosting-manifest-guard/reports/manifest-guard-report.md
@@ -0,0 +1,33 @@
+# Data and Code Hosting Manifest Guard
+
+Status: block_publication
+Audit digest: 777f3022d220fed8225ca1f9586d09274e35b149e2cd764a753769c72c8dd951
+
+## Summary
+
+- Artifacts: 1
+- Dataset artifacts: 1
+- Code artifacts: 0
+- Blockers: 10
+- Warnings: 5
+
+## Blockers
+
+- missing_metadata: Required metadata field 'authors' is missing.
+- missing_metadata: Required metadata field 'license' is missing.
+- missing_metadata: Required metadata field 'doi' is missing.
+- missing_metadata: Required metadata field 'keywords' is missing.
+- missing_metadata: Required metadata field 'schemaVersion' is missing.
+- invalid_artifact_hash: Artifact 'unhashed-export' does not contain a valid SHA-256 hash lock.
+- embargo_without_release_date: Embargoed repositories need an explicit release date.
+- missing_execution_environment: Executable repository hosting needs a Dockerfile, environment.yml, notebook kernel, or equivalent runtime definition.
+- missing_reproducibility_commands: Repository needs at least one command that reproduces or verifies the hosted outputs.
+- missing_versioning_policy: Repository needs explicit versioning strategy and current tag metadata.
+
+## Warnings
+
+- missing_code_artifact: No executable code, notebook, package, or model artifact is registered.
+- dataset_schema_missing: Dataset 'unhashed-export' does not declare a metadata schema.
+- docker_definition_unclear: Docker runtime is declared without a Dockerfile-style definition pointer.
+- missing_output_digest: Reproducibility command has no expected output digest.
+- missing_diffable_paths: No diffable paths are declared for dataset/code review.
diff --git a/data-code-hosting-manifest-guard/reports/summary.svg b/data-code-hosting-manifest-guard/reports/summary.svg
new file mode 100644
index 00000000..6120056c
--- /dev/null
+++ b/data-code-hosting-manifest-guard/reports/summary.svg
@@ -0,0 +1,11 @@
+
diff --git a/data-code-hosting-manifest-guard/scripts/demo.js b/data-code-hosting-manifest-guard/scripts/demo.js
new file mode 100644
index 00000000..b41c46fd
--- /dev/null
+++ b/data-code-hosting-manifest-guard/scripts/demo.js
@@ -0,0 +1,92 @@
+import { mkdir, writeFile } from "node:fs/promises";
+import { createSampleManifest, evaluateRepositoryManifest } from "../src/index.js";
+
+const reportDir = new URL("../reports/", import.meta.url);
+
+function escapeXml(value) {
+ return String(value)
+ .replaceAll("&", "&")
+ .replaceAll("<", "<")
+ .replaceAll(">", ">")
+ .replaceAll('"', """);
+}
+
+function renderMarkdown(result) {
+ const lines = [
+ "# Data and Code Hosting Manifest Guard",
+ "",
+ `Status: ${result.status}`,
+ `Audit digest: ${result.auditDigest}`,
+ "",
+ "## Summary",
+ "",
+ `- Artifacts: ${result.summary.artifactCount}`,
+ `- Dataset artifacts: ${result.summary.datasetCount}`,
+ `- Code artifacts: ${result.summary.codeCount}`,
+ `- Blockers: ${result.summary.blockerCount}`,
+ `- Warnings: ${result.summary.warningCount}`,
+ "",
+ "## Blockers",
+ ""
+ ];
+
+ lines.push(...(result.blockers.length ? result.blockers.map((finding) => `- ${finding.code}: ${finding.message}`) : ["- None"]));
+ lines.push("", "## Warnings", "");
+ lines.push(...(result.warnings.length ? result.warnings.map((finding) => `- ${finding.code}: ${finding.message}`) : ["- None"]));
+ lines.push("");
+ return lines.join("\n");
+}
+
+function renderSvg(result) {
+ const statusColor = result.status === "ready_for_repository_release" ? "#15803d" : result.status === "needs_metadata_review" ? "#a16207" : "#b91c1c";
+ return `
+`;
+}
+
+await mkdir(reportDir, { recursive: true });
+
+const readyResult = evaluateRepositoryManifest(createSampleManifest());
+const blockedResult = evaluateRepositoryManifest({
+ metadata: { title: "Private lab dump" },
+ artifacts: [
+ {
+ id: "unhashed-export",
+ type: "dataset",
+ format: "xlsx",
+ sha256: "missing",
+ version: "draft",
+ license: "custom-lab-license"
+ }
+ ],
+ access: { visibility: "embargoed" },
+ environment: { kind: "docker", definition: "" },
+ reproducibility: { commands: [] },
+ versioning: { strategy: "", currentTag: "" }
+});
+
+const packet = {
+ generatedAt: new Date("2026-06-04T00:00:00.000Z").toISOString(),
+ readyResult,
+ blockedResult
+};
+
+await writeFile(new URL("manifest-guard-packet.json", reportDir), `${JSON.stringify(packet, null, 2)}\n`);
+await writeFile(new URL("manifest-guard-report.md", reportDir), renderMarkdown(blockedResult));
+await writeFile(new URL("summary.svg", reportDir), renderSvg(blockedResult));
+
+console.log(JSON.stringify({
+ status: blockedResult.status,
+ blockers: blockedResult.summary.blockerCount,
+ warnings: blockedResult.summary.warningCount,
+ auditDigest: blockedResult.auditDigest
+}, null, 2));
diff --git a/data-code-hosting-manifest-guard/src/index.js b/data-code-hosting-manifest-guard/src/index.js
new file mode 100644
index 00000000..1a03274d
--- /dev/null
+++ b/data-code-hosting-manifest-guard/src/index.js
@@ -0,0 +1,183 @@
+import { createHash } from "node:crypto";
+
+const REQUIRED_METADATA = ["title", "authors", "license", "doi", "keywords", "schemaVersion"];
+const REQUIRED_ARTIFACT_FIELDS = ["id", "type", "format", "sha256", "version", "license"];
+const DATASET_TYPES = new Set(["dataset", "supplement", "instrument-output"]);
+const CODE_TYPES = new Set(["script", "notebook", "package", "model"]);
+const ALLOWED_VISIBILITY = new Set(["public", "private", "institutional", "embargoed"]);
+const REUSABLE_LICENSES = new Set(["CC-BY-4.0", "CC0-1.0", "MIT", "Apache-2.0", "BSD-3-Clause"]);
+
+function hasText(value) {
+ return typeof value === "string" && value.trim().length > 0;
+}
+
+function hasArray(value) {
+ return Array.isArray(value) && value.length > 0;
+}
+
+function addFinding(collection, code, message, evidence) {
+ collection.push({ code, message, evidence });
+}
+
+function normalizeArtifacts(manifest) {
+ return Array.isArray(manifest.artifacts) ? manifest.artifacts : [];
+}
+
+function digestPayload(payload) {
+ return createHash("sha256").update(JSON.stringify(payload)).digest("hex");
+}
+
+export function evaluateRepositoryManifest(manifest, options = {}) {
+ const blockers = [];
+ const warnings = [];
+ const metadata = manifest?.metadata ?? {};
+ const artifacts = normalizeArtifacts(manifest);
+ const datasetCount = artifacts.filter((artifact) => DATASET_TYPES.has(artifact.type)).length;
+ const codeCount = artifacts.filter((artifact) => CODE_TYPES.has(artifact.type)).length;
+
+ for (const field of REQUIRED_METADATA) {
+ const value = metadata[field];
+ const present = Array.isArray(value) ? hasArray(value) : hasText(value);
+ if (!present) {
+ addFinding(blockers, "missing_metadata", `Required metadata field '${field}' is missing.`, field);
+ }
+ }
+
+ if (metadata.license && !REUSABLE_LICENSES.has(metadata.license)) {
+ addFinding(warnings, "weak_reuse_license", "Repository license is not in the preferred reusable-license set.", metadata.license);
+ }
+
+ if (!hasArray(artifacts)) {
+ addFinding(blockers, "missing_artifacts", "At least one data, code, notebook, model, or supplementary artifact must be registered.", "artifacts");
+ }
+
+ if (datasetCount === 0) {
+ addFinding(blockers, "missing_dataset_artifact", "Scientific data hosting requires at least one registered dataset-like artifact.", artifacts.map((artifact) => artifact.type));
+ }
+
+ if (codeCount === 0) {
+ addFinding(warnings, "missing_code_artifact", "No executable code, notebook, package, or model artifact is registered.", artifacts.map((artifact) => artifact.type));
+ }
+
+ for (const artifact of artifacts) {
+ for (const field of REQUIRED_ARTIFACT_FIELDS) {
+ if (!hasText(artifact[field])) {
+ addFinding(blockers, "incomplete_artifact_manifest", `Artifact '${artifact.id ?? "unknown"}' is missing '${field}'.`, artifact);
+ }
+ }
+
+ if (artifact.sha256 && !/^[a-f0-9]{64}$/i.test(artifact.sha256)) {
+ addFinding(blockers, "invalid_artifact_hash", `Artifact '${artifact.id}' does not contain a valid SHA-256 hash lock.`, artifact.sha256);
+ }
+
+ if (artifact.type === "dataset" && !hasText(artifact.metadataSchema)) {
+ addFinding(warnings, "dataset_schema_missing", `Dataset '${artifact.id}' does not declare a metadata schema.`, artifact.id);
+ }
+
+ if (CODE_TYPES.has(artifact.type) && !hasText(artifact.runtime)) {
+ addFinding(warnings, "runtime_missing", `Executable artifact '${artifact.id}' does not declare its runtime.`, artifact.id);
+ }
+ }
+
+ const access = manifest?.access ?? {};
+ if (!ALLOWED_VISIBILITY.has(access.visibility)) {
+ addFinding(blockers, "invalid_visibility", "Repository visibility must be public, private, institutional, or embargoed.", access.visibility);
+ }
+
+ if (access.visibility === "embargoed" && !hasText(access.embargoEndsAt)) {
+ addFinding(blockers, "embargo_without_release_date", "Embargoed repositories need an explicit release date.", access);
+ }
+
+ const environment = manifest?.environment ?? {};
+ if (!hasText(environment.kind) || !hasText(environment.definition)) {
+ addFinding(blockers, "missing_execution_environment", "Executable repository hosting needs a Dockerfile, environment.yml, notebook kernel, or equivalent runtime definition.", environment);
+ }
+
+ if (environment.kind === "docker" && !environment.definition.toLowerCase().includes("dockerfile")) {
+ addFinding(warnings, "docker_definition_unclear", "Docker runtime is declared without a Dockerfile-style definition pointer.", environment.definition);
+ }
+
+ const reproducibility = manifest?.reproducibility ?? {};
+ if (!hasArray(reproducibility.commands)) {
+ addFinding(blockers, "missing_reproducibility_commands", "Repository needs at least one command that reproduces or verifies the hosted outputs.", reproducibility);
+ }
+
+ if (!hasText(reproducibility.expectedOutputHash)) {
+ addFinding(warnings, "missing_output_digest", "Reproducibility command has no expected output digest.", reproducibility);
+ }
+
+ const versioning = manifest?.versioning ?? {};
+ if (!hasText(versioning.strategy) || !hasText(versioning.currentTag)) {
+ addFinding(blockers, "missing_versioning_policy", "Repository needs explicit versioning strategy and current tag metadata.", versioning);
+ }
+
+ if (!hasArray(versioning.diffablePaths)) {
+ addFinding(warnings, "missing_diffable_paths", "No diffable paths are declared for dataset/code review.", versioning);
+ }
+
+ const status = blockers.length > 0 ? "block_publication" : warnings.length > (options.warningBudget ?? 3) ? "needs_metadata_review" : "ready_for_repository_release";
+ const result = {
+ status,
+ summary: {
+ artifactCount: artifacts.length,
+ datasetCount,
+ codeCount,
+ blockerCount: blockers.length,
+ warningCount: warnings.length
+ },
+ blockers,
+ warnings
+ };
+
+ return { ...result, auditDigest: digestPayload(result) };
+}
+
+export function createSampleManifest(overrides = {}) {
+ return {
+ metadata: {
+ title: "Open river sensor calibration study",
+ authors: ["SCIBASE Synthetic Lab"],
+ license: "CC-BY-4.0",
+ doi: "10.5555/scibase.synthetic.001",
+ keywords: ["hydrology", "sensor-calibration", "open-data"],
+ schemaVersion: "scibase-repository-manifest/v1"
+ },
+ artifacts: [
+ {
+ id: "river-readings-2026",
+ type: "dataset",
+ format: "csv",
+ sha256: "a".repeat(64),
+ version: "v1.0.0",
+ license: "CC-BY-4.0",
+ metadataSchema: "DataCite 4.5"
+ },
+ {
+ id: "calibration-notebook",
+ type: "notebook",
+ format: "ipynb",
+ sha256: "b".repeat(64),
+ version: "v1.0.0",
+ license: "MIT",
+ runtime: "python-3.12"
+ }
+ ],
+ access: {
+ visibility: "public"
+ },
+ environment: {
+ kind: "docker",
+ definition: "Dockerfile"
+ },
+ reproducibility: {
+ commands: ["python notebooks/reproduce.py"],
+ expectedOutputHash: "c".repeat(64)
+ },
+ versioning: {
+ strategy: "semantic-versioning",
+ currentTag: "v1.0.0",
+ diffablePaths: ["data/", "notebooks/", "metadata.json"]
+ },
+ ...overrides
+ };
+}
diff --git a/data-code-hosting-manifest-guard/test/index.test.js b/data-code-hosting-manifest-guard/test/index.test.js
new file mode 100644
index 00000000..1c49e553
--- /dev/null
+++ b/data-code-hosting-manifest-guard/test/index.test.js
@@ -0,0 +1,84 @@
+import test from "node:test";
+import assert from "node:assert/strict";
+import { createSampleManifest, evaluateRepositoryManifest } from "../src/index.js";
+
+test("ready repository manifests pass with stable audit evidence", () => {
+ const result = evaluateRepositoryManifest(createSampleManifest());
+
+ assert.equal(result.status, "ready_for_repository_release");
+ assert.equal(result.summary.artifactCount, 2);
+ assert.equal(result.summary.datasetCount, 1);
+ assert.equal(result.summary.codeCount, 1);
+ assert.equal(result.summary.blockerCount, 0);
+ assert.match(result.auditDigest, /^[a-f0-9]{64}$/);
+});
+
+test("missing FAIR metadata and artifacts block publication", () => {
+ const result = evaluateRepositoryManifest({
+ metadata: { title: "Incomplete repository" },
+ artifacts: [],
+ access: { visibility: "public" }
+ });
+
+ assert.equal(result.status, "block_publication");
+ assert.ok(result.blockers.some((finding) => finding.code === "missing_metadata" && finding.evidence === "doi"));
+ assert.ok(result.blockers.some((finding) => finding.code === "missing_dataset_artifact"));
+ assert.ok(result.blockers.some((finding) => finding.code === "missing_execution_environment"));
+ assert.ok(result.blockers.some((finding) => finding.code === "missing_reproducibility_commands"));
+});
+
+test("invalid hash locks and embargo metadata are caught", () => {
+ const manifest = createSampleManifest({
+ artifacts: [
+ {
+ id: "supplement",
+ type: "dataset",
+ format: "json",
+ sha256: "not-a-real-hash",
+ version: "v1.0.0",
+ license: "CC0-1.0",
+ metadataSchema: "schema.org/Dataset"
+ }
+ ],
+ access: { visibility: "embargoed" }
+ });
+
+ const result = evaluateRepositoryManifest(manifest);
+
+ assert.equal(result.status, "block_publication");
+ assert.ok(result.blockers.some((finding) => finding.code === "invalid_artifact_hash"));
+ assert.ok(result.blockers.some((finding) => finding.code === "embargo_without_release_date"));
+});
+
+test("warning budget moves risky manifests into metadata review", () => {
+ const manifest = createSampleManifest({
+ metadata: {
+ ...createSampleManifest().metadata,
+ license: "custom-lab-license"
+ },
+ artifacts: [
+ {
+ id: "river-readings-2026",
+ type: "dataset",
+ format: "csv",
+ sha256: "a".repeat(64),
+ version: "v1.0.0",
+ license: "custom-lab-license"
+ }
+ ],
+ reproducibility: {
+ commands: ["python notebooks/reproduce.py"]
+ },
+ versioning: {
+ strategy: "semantic-versioning",
+ currentTag: "v1.0.0"
+ }
+ });
+
+ const result = evaluateRepositoryManifest(manifest, { warningBudget: 2 });
+
+ assert.equal(result.status, "needs_metadata_review");
+ assert.ok(result.warnings.some((finding) => finding.code === "weak_reuse_license"));
+ assert.ok(result.warnings.some((finding) => finding.code === "missing_code_artifact"));
+ assert.ok(result.warnings.some((finding) => finding.code === "missing_output_digest"));
+});