Skip to content

Commit a6f5288

Browse files
Fix CI: macOS float from_chars portability + MSVC std::min macro guard
writer.hpp: add detail::parse_double/parse_float/try_parse_double helpers with platform-dispatched locale-independent float parsing — std::from_chars on GCC/MSVC, strtod_l/strtof_l with explicit C locale on Apple/POSIX. Fixes Xcode 15.4 libc++ missing float from_chars (never bare strtod). column_batch.hpp: parenthesized (std::min) to prevent Windows min macro collision (CWE-628), matching established codebase pattern.
1 parent 44696f5 commit a6f5288

2 files changed

Lines changed: 94 additions & 14 deletions

File tree

include/signet/ai/column_batch.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ class ColumnBatch {
178178
if (col_idx >= columns_.size())
179179
return {};
180180
return {columns_[col_idx].data(),
181-
std::min(num_rows_, columns_[col_idx].size())};
181+
(std::min)(num_rows_, columns_[col_idx].size())};
182182
}
183183

184184
// -------------------------------------------------------------------------

include/signet/writer.hpp

Lines changed: 93 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,95 @@
4747
#include <unordered_set>
4848
#include <vector>
4949

50+
// ---------------------------------------------------------------------------
51+
// Floating-point from_chars availability detection
52+
//
53+
// Apple Clang's libc++ (Xcode ≤ 16) does not implement std::from_chars for
54+
// float/double. GCC ≥ 11 (libstdc++) and MSVC ≥ 19.24 do.
55+
//
56+
// On Apple we fall back to strtod_l / strtof_l with an explicit "C" locale
57+
// to preserve locale-independence — critical for MiFID II financial data
58+
// where decimal separators must always be '.' regardless of LC_NUMERIC.
59+
// ---------------------------------------------------------------------------
60+
#if (defined(__GNUC__) && !defined(__clang__)) || defined(_MSC_VER)
61+
# define SIGNET_HAS_FLOAT_FROM_CHARS 1
62+
#else
63+
# define SIGNET_HAS_FLOAT_FROM_CHARS 0
64+
# if defined(__APPLE__)
65+
# include <xlocale.h> // strtod_l, strtof_l, newlocale on macOS
66+
# elif defined(__linux__) || defined(__FreeBSD__)
67+
# include <locale.h> // strtod_l, strtof_l, newlocale on Linux/BSD
68+
# endif
69+
#endif
70+
5071
namespace signet::forge {
5172

73+
// ---------------------------------------------------------------------------
74+
// detail::parse_double / detail::parse_float
75+
//
76+
// Locale-independent floating-point parsing for CSV-to-Parquet conversion.
77+
// Uses std::from_chars where available (GCC, MSVC); falls back to
78+
// strtod_l/strtof_l with C locale on platforms without float from_chars
79+
// (Apple Clang). Never uses bare strtod — that is locale-dependent and
80+
// would silently corrupt financial data in EU locales (CWE-1286).
81+
// ---------------------------------------------------------------------------
82+
namespace detail {
83+
84+
inline double parse_double(std::string_view sv) noexcept {
85+
double value = 0.0;
86+
#if SIGNET_HAS_FLOAT_FROM_CHARS
87+
std::from_chars(sv.data(), sv.data() + sv.size(), value);
88+
#elif defined(__APPLE__) || defined(__linux__) || defined(__FreeBSD__)
89+
// Thread-safe: newlocale returns a per-call handle; static ensures
90+
// one-time creation (singleton, never freed — intentional).
91+
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", (locale_t)0);
92+
std::string tmp(sv);
93+
value = strtod_l(tmp.c_str(), nullptr, c_locale);
94+
#else
95+
// Last resort for exotic platforms — document locale dependency risk
96+
std::string tmp(sv);
97+
value = std::strtod(tmp.c_str(), nullptr);
98+
#endif
99+
return value;
100+
}
101+
102+
inline float parse_float(std::string_view sv) noexcept {
103+
float value = 0.0f;
104+
#if SIGNET_HAS_FLOAT_FROM_CHARS
105+
std::from_chars(sv.data(), sv.data() + sv.size(), value);
106+
#elif defined(__APPLE__) || defined(__linux__) || defined(__FreeBSD__)
107+
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", (locale_t)0);
108+
std::string tmp(sv);
109+
value = strtof_l(tmp.c_str(), nullptr, c_locale);
110+
#else
111+
std::string tmp(sv);
112+
value = std::strtof(tmp.c_str(), nullptr);
113+
#endif
114+
return value;
115+
}
116+
117+
/// Try parsing a string_view as double; returns true on full parse success.
118+
/// Used for CSV type-detection (auto-detect DOUBLE columns).
119+
inline bool try_parse_double(std::string_view sv, double& out) noexcept {
120+
#if SIGNET_HAS_FLOAT_FROM_CHARS
121+
auto [p, ec] = std::from_chars(sv.data(), sv.data() + sv.size(), out);
122+
return ec == std::errc{} && p == sv.data() + sv.size();
123+
#elif defined(__APPLE__) || defined(__linux__) || defined(__FreeBSD__)
124+
static locale_t c_locale = newlocale(LC_ALL_MASK, "C", (locale_t)0);
125+
std::string tmp(sv);
126+
char* end = nullptr;
127+
out = strtod_l(tmp.c_str(), &end, c_locale);
128+
return end == tmp.c_str() + tmp.size();
129+
#else
130+
std::string tmp(sv);
131+
char* end = nullptr;
132+
out = std::strtod(tmp.c_str(), &end);
133+
return end == tmp.c_str() + tmp.size();
134+
#endif
135+
}
136+
137+
} // namespace detail
138+
52139
namespace detail::writer {
53140

54141
/// CRC-32 (polynomial 0xEDB88320) for page integrity (Parquet PageHeader.crc).
@@ -1088,11 +1175,10 @@ class ParquetWriter {
10881175
}
10891176
}
10901177

1091-
// Try DOUBLE (locale-independent)
1178+
// Try DOUBLE (locale-independent on all platforms)
10921179
if (all_double) {
10931180
double tmp = 0;
1094-
auto [p, ec] = std::from_chars(val.data(), val.data() + val.size(), tmp);
1095-
if (ec != std::errc{} || p != val.data() + val.size()) {
1181+
if (!detail::try_parse_double(val, tmp)) {
10961182
all_double = false;
10971183
}
10981184
}
@@ -1299,9 +1385,7 @@ class ParquetWriter {
12991385
parsed);
13001386
if (ec != std::errc{}) {
13011387
// Fallback: parse as double and truncate (locale-independent)
1302-
double d = 0;
1303-
std::from_chars(val.data(), val.data() + val.size(), d);
1304-
parsed = static_cast<int32_t>(d);
1388+
parsed = static_cast<int32_t>(detail::parse_double(val));
13051389
}
13061390
cw.write_int32(parsed);
13071391
if (bf_active) bloom_filters_[c]->insert_value(parsed);
@@ -1314,24 +1398,20 @@ class ParquetWriter {
13141398
parsed);
13151399
if (ec != std::errc{}) {
13161400
// Fallback: parse as double and truncate (locale-independent)
1317-
double d = 0;
1318-
std::from_chars(val.data(), val.data() + val.size(), d);
1319-
parsed = static_cast<int64_t>(d);
1401+
parsed = static_cast<int64_t>(detail::parse_double(val));
13201402
}
13211403
cw.write_int64(parsed);
13221404
if (bf_active) bloom_filters_[c]->insert_value(parsed);
13231405
break;
13241406
}
13251407
case PhysicalType::FLOAT: {
1326-
float f = 0;
1327-
std::from_chars(val.data(), val.data() + val.size(), f);
1408+
float f = detail::parse_float(val);
13281409
cw.write_float(f);
13291410
if (bf_active) bloom_filters_[c]->insert_value(f);
13301411
break;
13311412
}
13321413
case PhysicalType::DOUBLE: {
1333-
double d = 0;
1334-
std::from_chars(val.data(), val.data() + val.size(), d);
1414+
double d = detail::parse_double(val);
13351415
cw.write_double(d);
13361416
if (bf_active) bloom_filters_[c]->insert_value(d);
13371417
break;

0 commit comments

Comments
 (0)