4747#include < unordered_set>
4848#include < vector>
4949
50+ // ---------------------------------------------------------------------------
51+ // Floating-point from_chars availability detection
52+ //
53+ // Apple Clang's libc++ (Xcode ≤ 16) does not implement std::from_chars for
54+ // float/double. GCC ≥ 11 (libstdc++) and MSVC ≥ 19.24 do.
55+ //
56+ // On Apple we fall back to strtod_l / strtof_l with an explicit "C" locale
57+ // to preserve locale-independence — critical for MiFID II financial data
58+ // where decimal separators must always be '.' regardless of LC_NUMERIC.
59+ // ---------------------------------------------------------------------------
60+ #if (defined(__GNUC__) && !defined(__clang__)) || defined(_MSC_VER)
61+ # define SIGNET_HAS_FLOAT_FROM_CHARS 1
62+ #else
63+ # define SIGNET_HAS_FLOAT_FROM_CHARS 0
64+ # if defined(__APPLE__)
65+ # include < xlocale.h> // strtod_l, strtof_l, newlocale on macOS
66+ # elif defined(__linux__) || defined(__FreeBSD__)
67+ # include < locale.h> // strtod_l, strtof_l, newlocale on Linux/BSD
68+ # endif
69+ #endif
70+
5071namespace signet ::forge {
5172
73+ // ---------------------------------------------------------------------------
74+ // detail::parse_double / detail::parse_float
75+ //
76+ // Locale-independent floating-point parsing for CSV-to-Parquet conversion.
77+ // Uses std::from_chars where available (GCC, MSVC); falls back to
78+ // strtod_l/strtof_l with C locale on platforms without float from_chars
79+ // (Apple Clang). Never uses bare strtod — that is locale-dependent and
80+ // would silently corrupt financial data in EU locales (CWE-1286).
81+ // ---------------------------------------------------------------------------
82+ namespace detail {
83+
84+ inline double parse_double (std::string_view sv) noexcept {
85+ double value = 0.0 ;
86+ #if SIGNET_HAS_FLOAT_FROM_CHARS
87+ std::from_chars (sv.data (), sv.data () + sv.size (), value);
88+ #elif defined(__APPLE__) || defined(__linux__) || defined(__FreeBSD__)
89+ // Thread-safe: newlocale returns a per-call handle; static ensures
90+ // one-time creation (singleton, never freed — intentional).
91+ static locale_t c_locale = newlocale (LC_ALL_MASK, " C" , (locale_t )0 );
92+ std::string tmp (sv);
93+ value = strtod_l (tmp.c_str (), nullptr , c_locale);
94+ #else
95+ // Last resort for exotic platforms — document locale dependency risk
96+ std::string tmp (sv);
97+ value = std::strtod (tmp.c_str (), nullptr );
98+ #endif
99+ return value;
100+ }
101+
102+ inline float parse_float (std::string_view sv) noexcept {
103+ float value = 0 .0f ;
104+ #if SIGNET_HAS_FLOAT_FROM_CHARS
105+ std::from_chars (sv.data (), sv.data () + sv.size (), value);
106+ #elif defined(__APPLE__) || defined(__linux__) || defined(__FreeBSD__)
107+ static locale_t c_locale = newlocale (LC_ALL_MASK, " C" , (locale_t )0 );
108+ std::string tmp (sv);
109+ value = strtof_l (tmp.c_str (), nullptr , c_locale);
110+ #else
111+ std::string tmp (sv);
112+ value = std::strtof (tmp.c_str (), nullptr );
113+ #endif
114+ return value;
115+ }
116+
117+ // / Try parsing a string_view as double; returns true on full parse success.
118+ // / Used for CSV type-detection (auto-detect DOUBLE columns).
119+ inline bool try_parse_double (std::string_view sv, double & out) noexcept {
120+ #if SIGNET_HAS_FLOAT_FROM_CHARS
121+ auto [p, ec] = std::from_chars (sv.data (), sv.data () + sv.size (), out);
122+ return ec == std::errc{} && p == sv.data () + sv.size ();
123+ #elif defined(__APPLE__) || defined(__linux__) || defined(__FreeBSD__)
124+ static locale_t c_locale = newlocale (LC_ALL_MASK, " C" , (locale_t )0 );
125+ std::string tmp (sv);
126+ char * end = nullptr ;
127+ out = strtod_l (tmp.c_str (), &end, c_locale);
128+ return end == tmp.c_str () + tmp.size ();
129+ #else
130+ std::string tmp (sv);
131+ char * end = nullptr ;
132+ out = std::strtod (tmp.c_str (), &end);
133+ return end == tmp.c_str () + tmp.size ();
134+ #endif
135+ }
136+
137+ } // namespace detail
138+
52139namespace detail ::writer {
53140
54141// / CRC-32 (polynomial 0xEDB88320) for page integrity (Parquet PageHeader.crc).
@@ -1088,11 +1175,10 @@ class ParquetWriter {
10881175 }
10891176 }
10901177
1091- // Try DOUBLE (locale-independent)
1178+ // Try DOUBLE (locale-independent on all platforms )
10921179 if (all_double) {
10931180 double tmp = 0 ;
1094- auto [p, ec] = std::from_chars (val.data (), val.data () + val.size (), tmp);
1095- if (ec != std::errc{} || p != val.data () + val.size ()) {
1181+ if (!detail::try_parse_double (val, tmp)) {
10961182 all_double = false ;
10971183 }
10981184 }
@@ -1299,9 +1385,7 @@ class ParquetWriter {
12991385 parsed);
13001386 if (ec != std::errc{}) {
13011387 // Fallback: parse as double and truncate (locale-independent)
1302- double d = 0 ;
1303- std::from_chars (val.data (), val.data () + val.size (), d);
1304- parsed = static_cast <int32_t >(d);
1388+ parsed = static_cast <int32_t >(detail::parse_double (val));
13051389 }
13061390 cw.write_int32 (parsed);
13071391 if (bf_active) bloom_filters_[c]->insert_value (parsed);
@@ -1314,24 +1398,20 @@ class ParquetWriter {
13141398 parsed);
13151399 if (ec != std::errc{}) {
13161400 // Fallback: parse as double and truncate (locale-independent)
1317- double d = 0 ;
1318- std::from_chars (val.data (), val.data () + val.size (), d);
1319- parsed = static_cast <int64_t >(d);
1401+ parsed = static_cast <int64_t >(detail::parse_double (val));
13201402 }
13211403 cw.write_int64 (parsed);
13221404 if (bf_active) bloom_filters_[c]->insert_value (parsed);
13231405 break ;
13241406 }
13251407 case PhysicalType::FLOAT: {
1326- float f = 0 ;
1327- std::from_chars (val.data (), val.data () + val.size (), f);
1408+ float f = detail::parse_float (val);
13281409 cw.write_float (f);
13291410 if (bf_active) bloom_filters_[c]->insert_value (f);
13301411 break ;
13311412 }
13321413 case PhysicalType::DOUBLE: {
1333- double d = 0 ;
1334- std::from_chars (val.data (), val.data () + val.size (), d);
1414+ double d = detail::parse_double (val);
13351415 cw.write_double (d);
13361416 if (bf_active) bloom_filters_[c]->insert_value (d);
13371417 break ;
0 commit comments