Skip to content

Commit 44696f5

Browse files
Audit #8: fix 32 findings across 27 files (2C/10H/12M/8L)
Comprehensive API audit pass addressing security, correctness, and enterprise-readiness gaps across core Parquet, crypto, AI/compliance, Thrift, and interop layers. 779/779 tests pass.
1 parent 373a0d2 commit 44696f5

27 files changed

Lines changed: 229 additions & 84 deletions

CMakePresets.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,11 @@
176176
"cacheVariables": {
177177
"CMAKE_BUILD_TYPE": "Debug",
178178
"SIGNET_BUILD_TESTS": "ON",
179+
"SIGNET_ENABLE_COMMERCIAL": "ON",
180+
"SIGNET_ENABLE_CRYPTO": "ON",
181+
"SIGNET_ENABLE_ZSTD": "ON",
182+
"SIGNET_ENABLE_LZ4": "ON",
183+
"SIGNET_ENABLE_GZIP": "ON",
179184
"CMAKE_CXX_FLAGS": "-fprofile-instr-generate -fcoverage-mapping",
180185
"CMAKE_EXE_LINKER_FLAGS": "-fprofile-instr-generate"
181186
}

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@
33
**Standalone C++20 Parquet library with AI-native extensions.**
44
Zero mandatory dependencies. Header-mostly. Interoperable with Arrow, DuckDB, Spark, and Polars.
55

6-
[![CI](https://github.com/SIGNETSTACK/signet-forge/actions/workflows/ci.yml/badge.svg)](https://github.com/SIGNETSTACK/signet-forge/actions)
6+
[![CI](https://github.com/SIGNETSTACK/SIGNET_FORGE/actions/workflows/ci.yml/badge.svg)](https://github.com/SIGNETSTACK/SIGNET_FORGE/actions)
77
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](LICENSE)
88
[![C++20](https://img.shields.io/badge/C%2B%2B-20-blue.svg)](https://en.cppreference.com/w/cpp/20)
99
[![Python](https://img.shields.io/badge/Python-3.10%2B-blue.svg)](python/)
10-
[![codecov](https://codecov.io/gh/SIGNETSTACK/signet-forge/graph/badge.svg)](https://codecov.io/gh/SIGNETSTACK/signet-forge)
10+
[![codecov](https://codecov.io/gh/SIGNETSTACK/SIGNET_FORGE/graph/badge.svg)](https://codecov.io/gh/SIGNETSTACK/SIGNET_FORGE)
1111

1212
[**Try it in your browser — drag & drop any .parquet file**](https://signetstack.github.io/SIGNET_FORGE/demo/) | [**API Reference**](https://signetstack.github.io/SIGNET_FORGE/)
1313

include/signet/ai/audit_chain.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,9 +115,10 @@ inline int64_t now_ns() {
115115
// R-5: system_clock provides UTC traceability per MiFID II RTS 25 Art.2.
116116
auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
117117
std::chrono::system_clock::now().time_since_epoch()).count();
118-
int64_t expected = last_ns.load(std::memory_order_relaxed);
118+
int64_t expected = last_ns.load(std::memory_order_acquire);
119119
while (ns <= expected) { ns = expected + 1; }
120-
while (!last_ns.compare_exchange_weak(expected, ns, std::memory_order_relaxed)) {
120+
while (!last_ns.compare_exchange_weak(expected, ns,
121+
std::memory_order_release, std::memory_order_acquire)) {
121122
if (ns <= expected) ns = expected + 1;
122123
}
123124
return ns;

include/signet/ai/column_batch.hpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,8 @@ class ColumnBatch {
177177
[[nodiscard]] std::span<const double> column_span(size_t col_idx) const {
178178
if (col_idx >= columns_.size())
179179
return {};
180-
return {columns_[col_idx].data(), num_rows_};
180+
return {columns_[col_idx].data(),
181+
std::min(num_rows_, columns_[col_idx].size())};
181182
}
182183

183184
// -------------------------------------------------------------------------
@@ -236,8 +237,11 @@ class ColumnBatch {
236237

237238
// CWE-190: Integer Overflow or Wraparound — check row count fits in
238239
// uint32_t before narrowing cast into the serialization header.
239-
if (num_rows_ > static_cast<size_t>(UINT32_MAX))
240-
return StreamRecord{}; // silently return empty record for oversized batch
240+
if (num_rows_ > static_cast<size_t>(UINT32_MAX)) {
241+
throw std::overflow_error(
242+
"ColumnBatch::to_stream_record: num_rows exceeds UINT32_MAX ("
243+
+ std::to_string(num_rows_) + ") — batch too large for WAL serialization");
244+
}
241245
const auto ncols = static_cast<uint32_t>(schema_.size());
242246
const auto nrows = static_cast<uint32_t>(num_rows_);
243247

@@ -250,11 +254,15 @@ class ColumnBatch {
250254
{
251255
const size_t ncols_sz = static_cast<size_t>(ncols);
252256
const size_t nrows_sz = static_cast<size_t>(nrows);
253-
if (ncols_sz > 0 && nrows_sz > SIZE_MAX / ncols_sz)
254-
return StreamRecord{}; // overflow
257+
if (ncols_sz > 0 && nrows_sz > SIZE_MAX / ncols_sz) {
258+
throw std::overflow_error(
259+
"ColumnBatch::to_stream_record: ncols*nrows overflows size_t");
260+
}
255261
const size_t cells = ncols_sz * nrows_sz;
256-
if (cells > SIZE_MAX / sizeof(double))
257-
return StreamRecord{}; // overflow
262+
if (cells > SIZE_MAX / sizeof(double)) {
263+
throw std::overflow_error(
264+
"ColumnBatch::to_stream_record: payload size overflows size_t");
265+
}
258266
payload_bytes += sizeof(double) * cells;
259267
}
260268

include/signet/ai/compliance/eu_ai_act_reporter.hpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class EUAIActReporter {
107107
std::vector<InferenceRecord> records;
108108
bool chain_ok = true;
109109
std::string chain_id;
110+
int read_errors = 0;
110111

111112
for (const auto& path : inference_log_files) {
112113
auto rdr_result = InferenceLogReader::open(path);
@@ -124,7 +125,7 @@ class EUAIActReporter {
124125
if (meta) chain_id = meta->chain_id;
125126
}
126127
auto all = rdr.read_all();
127-
if (!all) continue;
128+
if (!all) { ++read_errors; continue; }
128129
for (auto& rec : *all)
129130
if (rec.timestamp_ns >= opts.start_ns &&
130131
rec.timestamp_ns <= opts.end_ns)
@@ -142,6 +143,11 @@ class EUAIActReporter {
142143
ComplianceReport report = make_report_skeleton(
143144
ComplianceStandard::EU_AI_ACT_ART12, opts,
144145
static_cast<int64_t>(records.size()), chain_ok, chain_id);
146+
if (read_errors > 0) {
147+
report.incomplete_data = true;
148+
report.read_errors.push_back("Art.12: failed to read " + std::to_string(read_errors)
149+
+ " of " + std::to_string(inference_log_files.size()) + " inference log files");
150+
}
145151

146152
report.content = format_article12_json(records, opts, report);
147153
return report;
@@ -171,6 +177,7 @@ class EUAIActReporter {
171177
std::vector<InferenceRecord> records;
172178
bool chain_ok = true;
173179
std::string chain_id;
180+
int read_errors = 0;
174181

175182
for (const auto& path : inference_log_files) {
176183
auto rdr_result = InferenceLogReader::open(path);
@@ -187,7 +194,7 @@ class EUAIActReporter {
187194
if (meta) chain_id = meta->chain_id;
188195
}
189196
auto all = rdr.read_all();
190-
if (!all) continue;
197+
if (!all) { ++read_errors; continue; }
191198
for (auto& rec : *all)
192199
if (rec.timestamp_ns >= opts.start_ns &&
193200
rec.timestamp_ns <= opts.end_ns)
@@ -201,6 +208,11 @@ class EUAIActReporter {
201208
ComplianceReport report = make_report_skeleton(
202209
ComplianceStandard::EU_AI_ACT_ART13, opts,
203210
static_cast<int64_t>(records.size()), chain_ok, chain_id);
211+
if (read_errors > 0) {
212+
report.incomplete_data = true;
213+
report.read_errors.push_back("Art.13: failed to read " + std::to_string(read_errors)
214+
+ " of " + std::to_string(inference_log_files.size()) + " inference log files");
215+
}
204216

205217
report.content = format_article13_json(records, opts, report);
206218
return report;
@@ -234,6 +246,7 @@ class EUAIActReporter {
234246
std::vector<DecisionRecord> dec_records;
235247
bool dec_chain_ok = true;
236248
std::string dec_chain_id;
249+
int dec_read_errors = 0;
237250

238251
for (const auto& path : decision_log_files) {
239252
auto rdr_result = DecisionLogReader::open(path);
@@ -250,7 +263,7 @@ class EUAIActReporter {
250263
if (meta) dec_chain_id = meta->chain_id;
251264
}
252265
auto all = rdr.read_all();
253-
if (!all) continue;
266+
if (!all) { ++dec_read_errors; continue; }
254267
for (auto& rec : *all)
255268
if (rec.timestamp_ns >= opts.start_ns &&
256269
rec.timestamp_ns <= opts.end_ns)
@@ -261,6 +274,7 @@ class EUAIActReporter {
261274
std::vector<InferenceRecord> inf_records;
262275
bool inf_chain_ok = true;
263276
std::string inf_chain_id;
277+
int inf_read_errors = 0;
264278

265279
for (const auto& path : inference_log_files) {
266280
auto rdr_result = InferenceLogReader::open(path);
@@ -277,7 +291,7 @@ class EUAIActReporter {
277291
if (meta) inf_chain_id = meta->chain_id;
278292
}
279293
auto all = rdr.read_all();
280-
if (!all) continue;
294+
if (!all) { ++inf_read_errors; continue; }
281295
for (auto& rec : *all)
282296
if (rec.timestamp_ns >= opts.start_ns &&
283297
rec.timestamp_ns <= opts.end_ns)
@@ -309,6 +323,16 @@ class EUAIActReporter {
309323
ComplianceReport report = make_report_skeleton(
310324
ComplianceStandard::EU_AI_ACT_ART19, opts, total, chain_ok,
311325
dec_chain_id.empty() ? inf_chain_id : dec_chain_id);
326+
if (dec_read_errors > 0) {
327+
report.incomplete_data = true;
328+
report.read_errors.push_back("Art.19: failed to read " + std::to_string(dec_read_errors)
329+
+ " of " + std::to_string(decision_log_files.size()) + " decision log files");
330+
}
331+
if (inf_read_errors > 0) {
332+
report.incomplete_data = true;
333+
report.read_errors.push_back("Art.19: failed to read " + std::to_string(inf_read_errors)
334+
+ " of " + std::to_string(inference_log_files.size()) + " inference log files");
335+
}
312336

313337
report.content = format_article19_json(
314338
dec_records, inf_records, opts, report,
@@ -553,7 +577,7 @@ class EUAIActReporter {
553577
o += ind2 + "\"level\":" + sp + std::to_string(opts.risk_level) + "," + nl;
554578
const char* risk_labels[] = {"","minimal","limited","high","unacceptable"};
555579
o += ind2 + "\"label\":" + sp + "\""
556-
+ std::string(opts.risk_level <= 4 ? risk_labels[opts.risk_level] : "unknown")
580+
+ std::string((opts.risk_level >= 0 && opts.risk_level <= 4) ? risk_labels[opts.risk_level] : "unknown")
557581
+ "\"" + nl;
558582
o += ind + "}," + nl;
559583
}

include/signet/ai/decision_log.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -402,9 +402,12 @@ struct DecisionRecord {
402402
return true;
403403
}
404404

405+
static constexpr uint32_t MAX_STRING_LEN = 16u * 1024u * 1024u; // 16 MB
406+
405407
static inline bool read_string(const uint8_t* data, size_t size, size_t& offset, std::string& out) {
406408
uint32_t len = 0;
407409
if (!read_le32_u(data, size, offset, len)) return false;
410+
if (len > MAX_STRING_LEN) return false;
408411
if (offset + len > size) return false;
409412
out.assign(reinterpret_cast<const char*>(data + offset), len);
410413
offset += len;

include/signet/ai/feature_reader.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,7 @@ class FeatureReader {
120120
total_rows_ = o.total_rows_;
121121
failed_file_count_= o.failed_file_count_;
122122
readers_ = std::move(o.readers_);
123-
std::lock_guard<std::mutex> lk(rg_cache_mutex_);
124-
std::lock_guard<std::mutex> lk2(o.rg_cache_mutex_);
123+
std::scoped_lock lk(rg_cache_mutex_, o.rg_cache_mutex_);
125124
rg_cache_ = std::move(o.rg_cache_);
126125
}
127126
return *this;

include/signet/ai/inference_log.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,9 +337,12 @@ struct InferenceRecord {
337337
return true;
338338
}
339339

340+
static constexpr uint32_t MAX_STRING_LEN = 16u * 1024u * 1024u; // 16 MB
341+
340342
static inline bool read_string(const uint8_t* data, size_t size, size_t& offset, std::string& out) {
341343
uint32_t len = 0;
342344
if (!read_le32_u(data, size, offset, len)) return false;
345+
if (len > MAX_STRING_LEN) return false;
343346
if (offset + len > size) return false;
344347
out.assign(reinterpret_cast<const char*>(data + offset), len);
345348
offset += len;

include/signet/ai/tensor_bridge.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,8 +238,11 @@ class TensorView {
238238
}
239239

240240
/// Total byte size of the tensor data (num_elements * element_size).
241+
/// Returns 0 if num_elements() is non-positive (empty or error shape).
241242
[[nodiscard]] size_t byte_size() const noexcept {
242-
return static_cast<size_t>(num_elements()) * element_size();
243+
const int64_t n = num_elements();
244+
if (n <= 0) return 0;
245+
return static_cast<size_t>(n) * element_size();
243246
}
244247

245248
/// Effective stride in bytes along the first dimension.

include/signet/ai/vector_type.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,15 @@ class VectorWriter {
260260
}
261261

262262
/// Add a batch of vectors (num_vectors vectors, each `dimension` elements, row-major).
263-
inline void add_batch(const float* data, size_t num_vectors) {
263+
/// @return true on success, false on overflow (batch rejected entirely).
264+
inline bool add_batch(const float* data, size_t num_vectors) {
264265
const size_t dim = spec_.dimension;
265-
// Overflow check: num_vectors * dim must not overflow size_t
266-
if (num_vectors > 0 && dim > 0 && num_vectors > SIZE_MAX / dim) return;
266+
if (dim == 0 || num_vectors == 0) return true;
267+
if (num_vectors > SIZE_MAX / dim) return false;
267268
for (size_t i = 0; i < num_vectors; ++i) {
268269
add(data + i * dim);
269270
}
271+
return true;
270272
}
271273

272274
/// Flush the buffered vectors and return the encoded page bytes.

0 commit comments

Comments
 (0)