Skip to content

Commit 24a11ca

Browse files
committed
Decimals are actually decimals now, fix issue with params in debug, add max_iterations to validate
1 parent 9860fed commit 24a11ca

17 files changed

Lines changed: 457 additions & 61 deletions

File tree

Cargo.lock

Lines changed: 309 additions & 32 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ ryu = "1.0.20"
7979
nohash-hasher = "0.2.0"
8080
arrow = "56"
8181
ordered-float = "5.1.0"
82+
rust_decimal = "1.41.0"
8283

8384
[patch.crates-io]
8485
#polars = { git = 'https://github.com/pola-rs/polars', rev="665722ac3f3664c589c4827208d173cc16f0ec68" }

lib/maplib/src/model.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ impl Model {
474474
deactivate_shapes: Vec<NamedNode>,
475475
dry_run: bool,
476476
serial: bool,
477+
max_iterations: Option<usize>,
477478
debug_rules: bool,
478479
) -> Result<ValidationReport, MaplibError> {
479480
let mut res = validate(
@@ -492,6 +493,7 @@ impl Model {
492493
dry_run,
493494
Some(self.prefixes.clone()),
494495
serial,
496+
max_iterations,
495497
debug_rules,
496498
)
497499
.map_err(|x| MaplibError::ShaclError(x))?;

lib/query_processing/src/aggregates.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ pub fn min(solution_mappings: &SolutionMappings, column_context: &Context) -> (E
9797
} else {
9898
expr_rdf_node_type.clone()
9999
};
100-
101100
let out_expr = col(column_context.as_str()).min();
102101

103102
(out_expr, out_rdf_node_type)

lib/query_processing/src/expressions.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -475,8 +475,8 @@ pub fn if_expression(
475475
let right_exploded = exploded.pop().unwrap();
476476
let mid_exploded = exploded.pop().unwrap();
477477

478-
let mut right_exploded_types: HashSet<_> = right_exploded.iter().map(|(t,..)|t.clone()).collect();
479-
let mut mid_exploded_types: HashSet<_> = mid_exploded.iter().map(|(t,..)|t.clone()).collect();
478+
let right_exploded_types: HashSet<_> = right_exploded.iter().map(|(t,..)|t.clone()).collect();
479+
let mid_exploded_types: HashSet<_> = mid_exploded.iter().map(|(t,..)|t.clone()).collect();
480480

481481

482482
let mut mid_exprs = vec![];

lib/representation/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ nohash-hasher.workspace = true
2727
uuid.workspace = true
2828
arrow.workspace = true
2929
memchr.workspace = true
30+
rust_decimal.workspace = true
3031

3132
[lints.rust]
3233
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(feature, values("gil-refs", "rdf-star"))'] }

lib/representation/src/base_rdf_type.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use oxrdf::{NamedNode, NamedNodeRef, NamedOrBlankNode, Term};
1010
use polars::datatypes::{DataType, Field, PlSmallStr};
1111
use spargebra::term::GroundTerm;
1212
use std::fmt::{Display, Formatter};
13-
use crate::rdf_to_polars::{default_time_unit, default_time_zone};
13+
use crate::rdf_to_polars::{default_decimal_type, default_time_unit, default_time_zone};
1414

1515
#[derive(Debug, Clone, Ord, PartialOrd, PartialEq, Eq, Hash)]
1616
pub enum BaseRDFNodeType {
@@ -215,7 +215,8 @@ fn literal_type(
215215
xsd::INT => DataType::Int32,
216216
xsd::SHORT => DataType::Int16,
217217
xsd::BYTE => DataType::Int8,
218-
xsd::DOUBLE | xsd::DECIMAL => DataType::Float64,
218+
xsd::DOUBLE => DataType::Float64,
219+
xsd::DECIMAL => default_decimal_type(),
219220
xsd::FLOAT => DataType::Float32,
220221
xsd::BOOLEAN => DataType::Boolean,
221222
rdf::LANG_STRING => {

lib/representation/src/rdf_to_polars.rs

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ use polars::prelude::{
88
as_struct, lit, AnyValue, DataType, Expr, LiteralValue, NamedFrom, PlSmallStr, Scalar, Series,
99
TimeUnit, TimeZone,
1010
};
11+
use polars_core::prelude::{Int128Chunked, IntoSeries, NewChunkedArray};
12+
use rust_decimal::Decimal;
1113
use std::ops::Deref;
1214
use std::str::FromStr;
1315
use tracing::warn;
@@ -239,11 +241,16 @@ pub fn rdf_literal_to_polars_literal_value_impl(
239241
LiteralValue::Scalar(Scalar::null(DataType::Date))
240242
}
241243
} else if datatype == xsd::DECIMAL {
242-
if let Ok(d) = f64::from_str(value) {
243-
LiteralValue::Scalar(Scalar::from(d))
244+
if let Ok(mut d) = Decimal::from_str(value) {
245+
d.rescale(default_decimal_scale() as u32);
246+
LiteralValue::Scalar(Scalar::new_decimal(
247+
d.mantissa(),
248+
default_decimal_precision(),
249+
default_decimal_scale(),
250+
))
244251
} else {
245252
warn!("Could not parse xsd:decimal {value}");
246-
LiteralValue::Scalar(Scalar::null(DataType::Float64))
253+
LiteralValue::Scalar(Scalar::null(default_decimal_type()))
247254
}
248255
} else {
249256
LiteralValue::Scalar(Scalar::from(PlSmallStr::from_string(value.to_string())))
@@ -431,6 +438,24 @@ pub fn polars_literal_values_to_series(literal_values: Vec<LiteralValue>, name:
431438
})
432439
.collect::<Vec<Option<f64>>>(),
433440
),
441+
AnyValue::Decimal(_, ..) => {
442+
let i128ch = Int128Chunked::from_iter_options(
443+
name.into(),
444+
values.into_iter().map(|x| {
445+
if let AnyValue::Decimal(v, ..) = x {
446+
Some(v)
447+
} else {
448+
None
449+
}
450+
}),
451+
);
452+
let dec = i128ch
453+
.into_decimal(default_decimal_precision(), default_decimal_scale())
454+
.unwrap();
455+
let mut ser = dec.into_series();
456+
ser.rename(PlSmallStr::from_str(name));
457+
ser
458+
}
434459
AnyValue::Datetime(_, t, tz) => Series::new(
435460
name.into(),
436461
values
@@ -515,3 +540,15 @@ pub fn default_time_unit() -> TimeUnit {
515540
pub fn default_time_zone() -> TimeZone {
516541
TimeZone::UTC
517542
}
543+
544+
pub fn default_decimal_type() -> DataType {
545+
DataType::Decimal(default_decimal_precision(), default_decimal_scale())
546+
}
547+
548+
pub fn default_decimal_precision() -> usize {
549+
38
550+
}
551+
552+
pub fn default_decimal_scale() -> usize {
553+
12
554+
}

lib/triplestore/src/sparql/debug.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ impl Triplestore {
217217
},
218218
None,
219219
&Context::new(),
220-
&None,
220+
&parameters,
221221
Pushdowns::new(),
222222
qs,
223223
qg,

lib/triplestore/src/storage/so_index.rs

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,14 @@ use polars_core::datatypes::{
66
BooleanChunked, Int16Chunked, Int8Chunked, UInt16Chunked, UInt8Chunked,
77
};
88
use polars_core::frame::DataFrame;
9-
use polars_core::prelude::{
10-
Column, Float32Chunked, Float64Chunked, Int32Chunked, Int64Chunked, IntoColumn, UInt32Chunked,
11-
UInt64Chunked,
12-
};
9+
use polars_core::prelude::{Column, DecimalChunked, Float32Chunked, Float64Chunked, Int128Chunked, Int32Chunked, Int64Chunked, IntoColumn, NewChunkedArray, UInt32Chunked, UInt64Chunked};
1310
use polars_core::series::Series;
1411
use representation::{
1512
BaseRDFNodeType, LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD, OBJECT_COL_NAME,
1613
SUBJECT_COL_NAME,
1714
};
1815
use std::collections::HashSet;
19-
use representation::rdf_to_polars::{default_time_unit, default_time_zone};
16+
use representation::rdf_to_polars::{default_decimal_precision, default_decimal_scale, default_time_unit, default_time_zone};
2017

2118
pub fn bool_chunked(c: &Column) -> &BooleanChunked {
2219
c.bool().unwrap()
@@ -66,6 +63,10 @@ pub fn datetime_chunked(c: &Column) -> &Int64Chunked {
6663
c.datetime().unwrap().physical()
6764
}
6865

66+
pub fn decimal_chunked(c: &Column) -> &Int128Chunked {
67+
c.decimal().unwrap().physical()
68+
}
69+
6970

7071
pub fn bool_vec_to_column(col_name: &str, vec: Vec<bool>) -> Column {
7172
let mut c = Series::from_iter(vec).into_column();
@@ -117,6 +118,11 @@ pub fn datetime_vec_to_column(col_name: &str, vec: Vec<i64>) -> Column {
117118
o_series.into_column()
118119
}
119120

121+
pub fn decimal_vec_to_column(col_name: &str, vec: Vec<i128>) -> Column {
122+
let o_series = Int128Chunked::from_vec(PlSmallStr::from_str(col_name), vec).into_decimal(default_decimal_precision(), default_decimal_scale()).unwrap();
123+
o_series.into_column()
124+
}
125+
120126
pub fn unwrap_ordered_float<T>(o: Option<T>) -> OrderedFloat<T> {
121127
OrderedFloat(o.unwrap())
122128
}
@@ -510,6 +516,18 @@ binary_nonlang_nonlang_index_impl!(
510516
datetime_vec_to_column
511517
);
512518

519+
binary_nonlang_nonlang_index_impl!(
520+
U32DecimalIndex,
521+
(u32, i128),
522+
u32_chunked,
523+
decimal_chunked,
524+
unwrap_t,
525+
noop_t,
526+
noop_t,
527+
u32_vec_to_column,
528+
decimal_vec_to_column
529+
);
530+
513531
binary_nonlang_lang_index_impl!(
514532
U32LangIndex,
515533
(u32, u32, u32),
@@ -533,6 +551,7 @@ pub enum SubjectObjectIndex {
533551
U32I64Index(U32I64Index),
534552
U32F32Index(U32F32Index),
535553
U32F64Index(U32F64Index),
554+
U32DecimalIndex(U32DecimalIndex),
536555
U32DateIndex(U32DateIndex),
537556
U32DateTimeIndex(U32DateTimeIndex),
538557
}
@@ -553,9 +572,11 @@ impl SubjectObjectIndex {
553572
SubjectObjectIndex::U32BoolIndex(U32BoolIndex::new())
554573
} else if object_type.is_lit_type(xsd::FLOAT) {
555574
SubjectObjectIndex::U32F32Index(U32F32Index::new())
556-
} else if object_type.is_lit_type(xsd::DOUBLE) || object_type.is_lit_type(xsd::DECIMAL)
575+
} else if object_type.is_lit_type(xsd::DOUBLE)
557576
{
558577
SubjectObjectIndex::U32F64Index(U32F64Index::new())
578+
} else if object_type.is_lit_type(xsd::DECIMAL) {
579+
SubjectObjectIndex::U32DecimalIndex(U32DecimalIndex::new())
559580
} else if object_type.is_lit_type(xsd::BYTE) {
560581
SubjectObjectIndex::U32I8Index(U32I8Index::new())
561582
} else if object_type.is_lit_type(xsd::UNSIGNED_BYTE) {
@@ -592,6 +613,7 @@ impl SubjectObjectIndex {
592613
SubjectObjectIndex::U32I64Index(i) => i.insert(df),
593614
SubjectObjectIndex::U32F32Index(i) => i.insert(df),
594615
SubjectObjectIndex::U32F64Index(i) => i.insert(df),
616+
SubjectObjectIndex::U32DecimalIndex(i) => i.insert(df),
595617
SubjectObjectIndex::U32DateIndex(i) => i.insert(df),
596618
SubjectObjectIndex::U32DateTimeIndex(i) => i.insert(df),
597619
SubjectObjectIndex::U32BoolIndex(i) => i.insert(df),
@@ -611,6 +633,7 @@ impl SubjectObjectIndex {
611633
SubjectObjectIndex::U32I64Index(i) => i.delete(df),
612634
SubjectObjectIndex::U32F32Index(i) => i.delete(df),
613635
SubjectObjectIndex::U32F64Index(i) => i.delete(df),
636+
SubjectObjectIndex::U32DecimalIndex(i) => i.delete(df),
614637
SubjectObjectIndex::U32DateIndex(i) => i.delete(df),
615638
SubjectObjectIndex::U32DateTimeIndex(i) => i.delete(df),
616639
SubjectObjectIndex::U32BoolIndex(i) => i.delete(df),

0 commit comments

Comments
 (0)