Skip to content

Commit 287533d

Browse files
Gap Fix Pass 12: final 5 gaps (D-12/G-9/R-19/R-20/T-8) — 92/92 compliance complete
1 parent e558efb commit 287533d

12 files changed

Lines changed: 2166 additions & 0 deletions

File tree

.github/workflows/ci.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,8 @@ jobs:
303303
run: cmake --build build-fuzz --parallel
304304

305305
- name: Run fuzzers (60s each)
306+
env:
307+
ASAN_OPTIONS: allocator_may_return_null=1:detect_leaks=0
306308
run: |
307309
mkdir -p fuzz-artifacts
308310
for target in fuzz_parquet_reader fuzz_thrift_decoder fuzz_wal_reader \
@@ -313,6 +315,8 @@ jobs:
313315
mkdir -p fuzz-artifacts/$target
314316
./build-fuzz/$target \
315317
-max_total_time=60 \
318+
-rss_limit_mb=2048 \
319+
-malloc_limit_mb=2048 \
316320
-print_final_stats=1 \
317321
-artifact_prefix=fuzz-artifacts/$target/ \
318322
|| true

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ if(SIGNET_BUILD_TESTS)
314314
tests/test_encryption.cpp
315315
tests/test_audit_chain.cpp
316316
tests/test_compliance_reports.cpp
317+
tests/test_gap_pass12.cpp
317318
)
318319
endif()
319320
add_executable(signet_tests ${SIGNET_TEST_SOURCES})

examples/basic_read.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,11 @@
2020
#include <algorithm>
2121
#include <iostream>
2222

23+
// Windows <mmsystem.h> defines TIME_MS as a macro — undefine to avoid collision.
24+
#ifdef TIME_MS
25+
#undef TIME_MS
26+
#endif
27+
2328
using namespace signet::forge;
2429

2530
// Helper: convert PhysicalType enum to a human-readable name.
Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
// SPDX-License-Identifier: BUSL-1.1
2+
// Copyright 2026 Johnson Ogundeji
3+
// Change Date: January 1, 2030 | Change License: Apache-2.0
4+
// See LICENSE_COMMERCIAL for full terms.
5+
#pragma once
6+
7+
#if !defined(SIGNET_ENABLE_COMMERCIAL) || !SIGNET_ENABLE_COMMERCIAL
8+
#error "signet/ai/data_classification.hpp is a BSL 1.1 commercial module. Build with -DSIGNET_ENABLE_COMMERCIAL=ON."
9+
#endif
10+
11+
// ---------------------------------------------------------------------------
12+
// data_classification.hpp -- Formal Data Classification Ontology
13+
//
14+
// Gap G-9: Formal data classification ontology per DORA Art.8 + GDPR Art.32.
15+
//
16+
// Provides a structured, machine-readable data classification framework:
17+
// - DORA Art.8: ICT asset classification (data at rest, in transit)
18+
// - GDPR Art.9: Special categories of personal data
19+
// - GDPR Art.32: Appropriate security measures per classification
20+
// - NIST SP 800-60: Information types and security categorization
21+
//
22+
// Components:
23+
// - DataClassification: 4-tier confidentiality levels
24+
// - DataSensitivity: GDPR Art.9 special category types
25+
// - RegulatoryRegime: applicable regulatory frameworks
26+
// - DataClassificationRule: per-field classification + handling policy
27+
// - DataClassificationOntology: rule registry with validation
28+
//
29+
// Header-only. Part of the signet::forge AI module.
30+
// ---------------------------------------------------------------------------
31+
32+
#include "signet/error.hpp"
33+
34+
#include <algorithm>
35+
#include <cstdint>
36+
#include <string>
37+
#include <unordered_map>
38+
#include <vector>
39+
40+
namespace signet::forge {
41+
42+
// ---------------------------------------------------------------------------
43+
// Enumerations
44+
// ---------------------------------------------------------------------------
45+
46+
/// Data confidentiality level per DORA Art.8 + ISO 27001 Annex A.
47+
enum class DataClassification : int32_t {
48+
PUBLIC = 0, ///< No confidentiality requirement
49+
INTERNAL = 1, ///< Business-internal, not for external sharing
50+
RESTRICTED = 2, ///< Regulated data (GDPR, FCA, MiFID II)
51+
HIGHLY_RESTRICTED = 3 ///< Cryptographic keys, trading secrets, PII
52+
};
53+
54+
/// Data sensitivity per GDPR Art.9 special categories.
55+
enum class DataSensitivity : int32_t {
56+
NEUTRAL = 0, ///< No special sensitivity
57+
PSEUDONYMISED = 1, ///< Identifiable only with additional key (Art.25)
58+
ANONYMISED = 2, ///< Irreversibly de-identified (Art.4(1))
59+
PII = 3, ///< Personally Identifiable Information
60+
FINANCIAL_PII = 4, ///< Financial account data, trading activity
61+
BIOMETRIC = 5, ///< Biometric data (Art.9 special category)
62+
HEALTH = 6 ///< Health/genetic data (Art.9 special category)
63+
};
64+
65+
/// Regulatory regime(s) applicable to the data.
66+
enum class RegulatoryRegime : int32_t {
67+
NONE = 0,
68+
GDPR = 1, ///< EU General Data Protection Regulation
69+
MIFID2 = 2, ///< Markets in Financial Instruments Directive II
70+
DORA = 3, ///< Digital Operational Resilience Act
71+
EU_AI_ACT = 4, ///< EU Artificial Intelligence Act
72+
SOX = 5, ///< Sarbanes-Oxley Act
73+
SEC_17A4 = 6, ///< SEC Rule 17a-4 (records retention)
74+
PCI_DSS = 7, ///< Payment Card Industry Data Security Standard
75+
HIPAA = 8 ///< Health Insurance Portability and Accountability Act
76+
};
77+
78+
// ---------------------------------------------------------------------------
79+
// DataClassificationRule
80+
// ---------------------------------------------------------------------------
81+
82+
/// Per-field data classification and handling policy.
83+
/// Defined at namespace scope for Apple Clang compatibility.
84+
struct DataClassificationRule {
85+
std::string field_name; ///< Column/field path (e.g., "user.email", "price")
86+
DataClassification classification = DataClassification::INTERNAL;
87+
DataSensitivity sensitivity = DataSensitivity::NEUTRAL;
88+
RegulatoryRegime regime = RegulatoryRegime::NONE;
89+
90+
// --- Retention lifecycle ---
91+
int64_t min_retention_ns = 0; ///< Minimum retention (0 = no min)
92+
int64_t max_retention_ns = INT64_C(157788000000000000); ///< Max retention (default 5y)
93+
94+
// --- Processing restrictions ---
95+
bool require_encryption = false; ///< RESTRICTED/HIGHLY_RESTRICTED → true
96+
bool allow_pseudonymisation = true;
97+
bool allow_aggregation = true;
98+
bool allow_ml_training = true; ///< PII, secrets → false
99+
bool allow_export = true; ///< HIGHLY_RESTRICTED → false
100+
bool allow_logging = true; ///< Biometric, health → false in plaintext
101+
102+
// --- Purpose limitation (GDPR Art.5(1)(b)) ---
103+
std::vector<std::string> allowed_purposes;
104+
};
105+
106+
// ---------------------------------------------------------------------------
107+
// DataClassificationOntology
108+
// ---------------------------------------------------------------------------
109+
110+
/// A named collection of data classification rules forming a formal ontology.
111+
///
112+
/// Validates field-level data handling against the registered rules.
113+
/// Supports lookup by field name and bulk validation.
114+
class DataClassificationOntology {
115+
public:
116+
/// Construct an ontology with the given identifier.
117+
explicit DataClassificationOntology(const std::string& ontology_id = "default")
118+
: ontology_id_(ontology_id) {}
119+
120+
/// Add a classification rule for a field.
121+
void add_rule(const DataClassificationRule& rule) {
122+
rules_[rule.field_name] = rule;
123+
}
124+
125+
/// Look up the classification rule for a field.
126+
/// Returns a default PUBLIC/NEUTRAL rule if the field is not registered.
127+
[[nodiscard]] DataClassificationRule lookup(const std::string& field_name) const {
128+
auto it = rules_.find(field_name);
129+
if (it != rules_.end()) return it->second;
130+
DataClassificationRule dflt;
131+
dflt.field_name = field_name;
132+
dflt.classification = DataClassification::PUBLIC;
133+
dflt.sensitivity = DataSensitivity::NEUTRAL;
134+
return dflt;
135+
}
136+
137+
/// Get all registered rules.
138+
[[nodiscard]] std::vector<DataClassificationRule> all_rules() const {
139+
std::vector<DataClassificationRule> out;
140+
out.reserve(rules_.size());
141+
for (const auto& [_, r] : rules_) out.push_back(r);
142+
return out;
143+
}
144+
145+
/// Number of registered rules.
146+
[[nodiscard]] size_t size() const { return rules_.size(); }
147+
148+
/// Ontology identifier.
149+
[[nodiscard]] const std::string& ontology_id() const { return ontology_id_; }
150+
151+
/// Validate that a field's actual handling meets classification requirements.
152+
///
153+
/// Returns an error if the field is classified above the actual sensitivity
154+
/// level (e.g., a HIGHLY_RESTRICTED field being processed without encryption).
155+
[[nodiscard]] expected<void> validate_handling(
156+
const std::string& field_name,
157+
bool is_encrypted,
158+
bool is_pseudonymised,
159+
bool purpose_is_allowed = true) const
160+
{
161+
auto rule = lookup(field_name);
162+
163+
// HIGHLY_RESTRICTED or RESTRICTED fields must be encrypted
164+
if (rule.require_encryption && !is_encrypted) {
165+
return Error{ErrorCode::INVALID_ARGUMENT,
166+
"Data classification violation: field '" + field_name +
167+
"' requires encryption (classification=" +
168+
classification_name(rule.classification) + ")"};
169+
}
170+
171+
// PII fields should be pseudonymised unless explicitly allowed
172+
if (rule.sensitivity >= DataSensitivity::PII &&
173+
!is_pseudonymised && !rule.allow_logging) {
174+
return Error{ErrorCode::INVALID_ARGUMENT,
175+
"Data classification violation: field '" + field_name +
176+
"' contains sensitive data and must be pseudonymised for logging"};
177+
}
178+
179+
// Purpose limitation check
180+
if (!purpose_is_allowed && !rule.allowed_purposes.empty()) {
181+
return Error{ErrorCode::INVALID_ARGUMENT,
182+
"Data classification violation: field '" + field_name +
183+
"' processing purpose not in allowed list (GDPR Art.5(1)(b))"};
184+
}
185+
186+
return {};
187+
}
188+
189+
/// Build a default ontology with standard financial/compliance field rules.
190+
[[nodiscard]] static DataClassificationOntology financial_default() {
191+
DataClassificationOntology ont("financial-default");
192+
193+
// Public data
194+
ont.add_rule({"symbol", DataClassification::PUBLIC,
195+
DataSensitivity::NEUTRAL, RegulatoryRegime::NONE});
196+
ont.add_rule({"timestamp", DataClassification::PUBLIC,
197+
DataSensitivity::NEUTRAL, RegulatoryRegime::NONE});
198+
199+
// Internal market data
200+
{
201+
DataClassificationRule r;
202+
r.field_name = "price";
203+
r.classification = DataClassification::INTERNAL;
204+
r.sensitivity = DataSensitivity::NEUTRAL;
205+
r.regime = RegulatoryRegime::MIFID2;
206+
r.min_retention_ns = INT64_C(157788000000000000); // 5y MiFID II
207+
ont.add_rule(r);
208+
}
209+
{
210+
DataClassificationRule r;
211+
r.field_name = "volume";
212+
r.classification = DataClassification::INTERNAL;
213+
r.sensitivity = DataSensitivity::NEUTRAL;
214+
r.regime = RegulatoryRegime::MIFID2;
215+
r.min_retention_ns = INT64_C(157788000000000000);
216+
ont.add_rule(r);
217+
}
218+
219+
// Restricted trading data
220+
{
221+
DataClassificationRule r;
222+
r.field_name = "strategy_id";
223+
r.classification = DataClassification::RESTRICTED;
224+
r.sensitivity = DataSensitivity::NEUTRAL;
225+
r.regime = RegulatoryRegime::MIFID2;
226+
r.require_encryption = true;
227+
r.allow_ml_training = false;
228+
r.min_retention_ns = INT64_C(157788000000000000);
229+
ont.add_rule(r);
230+
}
231+
232+
// Highly restricted PII
233+
{
234+
DataClassificationRule r;
235+
r.field_name = "trader_id";
236+
r.classification = DataClassification::HIGHLY_RESTRICTED;
237+
r.sensitivity = DataSensitivity::FINANCIAL_PII;
238+
r.regime = RegulatoryRegime::GDPR;
239+
r.require_encryption = true;
240+
r.allow_ml_training = false;
241+
r.allow_export = false;
242+
r.allow_logging = false;
243+
r.allowed_purposes = {"compliance-reporting", "regulatory-inquiry"};
244+
ont.add_rule(r);
245+
}
246+
247+
// Cryptographic key material
248+
{
249+
DataClassificationRule r;
250+
r.field_name = "encryption_key";
251+
r.classification = DataClassification::HIGHLY_RESTRICTED;
252+
r.sensitivity = DataSensitivity::NEUTRAL;
253+
r.regime = RegulatoryRegime::PCI_DSS;
254+
r.require_encryption = true;
255+
r.allow_pseudonymisation = false;
256+
r.allow_aggregation = false;
257+
r.allow_ml_training = false;
258+
r.allow_export = false;
259+
r.allow_logging = false;
260+
ont.add_rule(r);
261+
}
262+
263+
return ont;
264+
}
265+
266+
private:
267+
static std::string classification_name(DataClassification c) {
268+
switch (c) {
269+
case DataClassification::PUBLIC: return "PUBLIC";
270+
case DataClassification::INTERNAL: return "INTERNAL";
271+
case DataClassification::RESTRICTED: return "RESTRICTED";
272+
case DataClassification::HIGHLY_RESTRICTED: return "HIGHLY_RESTRICTED";
273+
}
274+
return "UNKNOWN";
275+
}
276+
277+
std::string ontology_id_;
278+
std::unordered_map<std::string, DataClassificationRule> rules_;
279+
};
280+
281+
} // namespace signet::forge

0 commit comments

Comments
 (0)