Skip to content

Commit fd12e3a

Browse files
committed
Changes to validation api
1 parent e88d2f3 commit fd12e3a

11 files changed

Lines changed: 432 additions & 162 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/maplib/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ datalog = {path = "../datalog"}
1616
cimxml_export = {path = "../cimxml_export"}
1717
query_processing = {path ="../query_processing"}
1818

19+
uuid = {workspace = true}
1920
rayon = { workspace = true }
2021
oxrdf = { workspace = true }
2122
oxiri = { workspace = true }

lib/maplib/src/model.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ mod constant_terms;
22
pub mod default;
33
pub mod errors;
44
pub mod expansion;
5+
mod shacl_report_mapping;
56

67
use crate::errors::MaplibError;
78
use crate::model::errors::MappingError;
@@ -41,6 +42,7 @@ pub struct Model {
4142
pub default_template_counter: usize,
4243
pub indexing: IndexingOptions,
4344
pub prefixes: HashMap<String, NamedNode>,
45+
pub latest_report_graph: Option<NamedGraph>,
4446
}
4547

4648
#[derive(Clone, Default)]
@@ -105,6 +107,7 @@ impl Model {
105107
default_template_counter: 0,
106108
indexing,
107109
prefixes: use_prefixes,
110+
latest_report_graph: None,
108111
})
109112
}
110113

@@ -459,6 +462,8 @@ impl Model {
459462
&mut self,
460463
data_graph: &NamedGraph,
461464
shape_graph: &NamedGraph,
465+
report_graph: Option<&NamedGraph>,
466+
inferences_graph: Option<&NamedGraph>,
462467
include_details: bool,
463468
include_conforms: bool,
464469
streaming: bool,
@@ -470,7 +475,7 @@ impl Model {
470475
dry_run: bool,
471476
serial: bool,
472477
) -> Result<ValidationReport, MaplibError> {
473-
let res = validate(
478+
let mut res = validate(
474479
&mut self.triplestore,
475480
data_graph,
476481
shape_graph,
@@ -485,8 +490,12 @@ impl Model {
485490
dry_run,
486491
Some(self.prefixes.clone()),
487492
serial,
488-
);
489-
res.map_err(|x| x.into())
493+
)
494+
.map_err(|x| MaplibError::ShaclError(x))?;
495+
if let Some(report_graph) = report_graph {
496+
self.map_validation_result_to_report_graph(&mut res, report_graph)?;
497+
}
498+
Ok(res)
490499
}
491500

492501
fn truncate_graph(&mut self, graph: &NamedGraph) {
@@ -605,6 +614,7 @@ impl Model {
605614
default_template_counter: self.default_template_counter,
606615
indexing: self.indexing.clone(),
607616
prefixes: self.prefixes.clone(),
617+
latest_report_graph: None,
608618
})
609619
}
610620

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
use super::{MapOptions, Model};
2+
use crate::errors::MaplibError;
3+
use oxrdf::vocab::xsd;
4+
use polars::datatypes::{AnyValue, DataType, PlSmallStr};
5+
use polars::frame::DataFrame;
6+
use polars::prelude::{
7+
by_name, col, lit, IdxSize, IntoColumn, IntoLazy, LazyFrame, LiteralValue, NamedFrom, Series,
8+
};
9+
use representation::dataset::NamedGraph;
10+
use representation::solution_mapping::SolutionMappings;
11+
use representation::{BaseRDFNodeType, RDFNodeState};
12+
use shacl::ValidationReport;
13+
use std::collections::HashMap;
14+
use templates::MappingColumnType;
15+
use tracing::debug;
16+
use uuid::Uuid;
17+
18+
const SHACL_RESULT_TEMPLATE: &str = "https://github.com/DataTreehouse/maplib#ShaclResultTemplate";
19+
const SHACL_REPORT_TEMPLATE: &str = "https://github.com/DataTreehouse/maplib#ShaclReportTemplate";
20+
const SHACL_DOC: &str = r#"
21+
@prefix maplib: <https://github.com/DataTreehouse/maplib#>.
22+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
23+
@prefix sh: <http://www.w3.org/ns/shacl#> .
24+
25+
maplib:ShaclReportTemplate [
26+
?report,
27+
?result,
28+
xsd:boolean ?conforms,
29+
] :: {
30+
ottr:Triple(?report, rdf:type, sh:ValidationReport),
31+
ottr:Triple(?report, sh:conforms, ?conforms),
32+
cross | ottr:Triple(?report, sh:result, ++?result),
33+
} .
34+
35+
maplib:ShaclResultTemplate [
36+
?result,
37+
?source_shape,
38+
?focus_node,
39+
? ?value,
40+
ottr:IRI ?source_constraint_component,
41+
? ?message,
42+
? ottr:IRI ?details,
43+
?result_severity,
44+
? ?result_path,
45+
?conforms,
46+
? List<ottr:IRI> ?details,
47+
] :: {
48+
ottr:Triple(?result, a, sh:ValidationResult),
49+
ottr:Triple(?result, sh:sourceShape, ?source_shape),
50+
ottr:Triple(?result, sh:value, ?value),
51+
ottr:Triple(?result, sh:sourceConstraintComponent, ?source_constraint_component),
52+
ottr:Triple(?result, sh:focusNode, ?focus_node),
53+
ottr:Triple(?result, sh:resultMessage, ?message),
54+
ottr:Triple(?result, sh:resultSeverity, ?result_severity),
55+
ottr:Triple(?result, sh:resultPath, ?result_path),
56+
ottr:Triple(?result, maplib:resultConforms, ?conforms),
57+
cross | ottr:Triple(?result, maplib:details, ++?details),
58+
} .
59+
"#;
60+
61+
impl Model {
62+
pub fn map_validation_result_to_report_graph(
63+
&mut self,
64+
report: &mut ValidationReport,
65+
report_graph: &NamedGraph,
66+
) -> Result<(), MaplibError> {
67+
self.add_templates_from_string(SHACL_DOC)
68+
.expect("Template should be correct");
69+
let map_options = MapOptions {
70+
graph: report_graph.clone(),
71+
validate_iris: false,
72+
};
73+
let mut result_cols = vec![];
74+
let mut offset = 0;
75+
let uuid = Uuid::new_v4().to_string();
76+
if let Some(SolutionMappings {
77+
mappings,
78+
rdf_node_types,
79+
..
80+
}) = report.concatenated_results(self.triplestore.global_cats.clone())?
81+
{
82+
debug!("Started creating results input");
83+
let (df, column_types) =
84+
create_results_input(mappings.clone().lazy(), &rdf_node_types, offset, &uuid);
85+
debug!("Finished creating results input");
86+
offset += df.height();
87+
let result_col = df.column("result").unwrap().clone();
88+
result_cols.push(result_col);
89+
self.expand(
90+
SHACL_RESULT_TEMPLATE,
91+
Some(df),
92+
Some(column_types),
93+
map_options.clone(),
94+
)?;
95+
}
96+
if result_cols.is_empty() {
97+
result_cols.push(
98+
Series::new_empty(PlSmallStr::from_str("result"), &DataType::String).into_column(),
99+
);
100+
}
101+
for mut result_col in result_cols {
102+
let result_ser = result_col.into_materialized_series();
103+
let report_df = DataFrame::new(
104+
1,
105+
vec![
106+
Series::from_any_values_and_dtype(
107+
PlSmallStr::from_str("report"),
108+
&[AnyValue::StringOwned("urn:maplib:report".into())],
109+
&DataType::String,
110+
true,
111+
)
112+
.unwrap()
113+
.into_column(),
114+
Series::from_any_values_and_dtype(
115+
PlSmallStr::from_str("result"),
116+
&[AnyValue::List(result_ser.clone())],
117+
&DataType::List(Box::new(DataType::String)),
118+
true,
119+
)
120+
.unwrap()
121+
.into_column(),
122+
Series::new(PlSmallStr::from_str("conforms"), [report.conforms]).into_column(),
123+
],
124+
)
125+
.unwrap();
126+
let mut report_types = HashMap::new();
127+
report_types.insert(
128+
"result".to_string(),
129+
MappingColumnType::Nested(Box::new(MappingColumnType::Flat(
130+
BaseRDFNodeType::IRI.into_default_input_rdf_node_state(),
131+
))),
132+
);
133+
report_types.insert(
134+
"report".to_string(),
135+
MappingColumnType::Flat(BaseRDFNodeType::IRI.into_default_input_rdf_node_state()),
136+
);
137+
report_types.insert(
138+
"conforms".to_string(),
139+
MappingColumnType::Flat(
140+
BaseRDFNodeType::Literal(xsd::BOOLEAN.into_owned())
141+
.into_default_input_rdf_node_state(),
142+
),
143+
);
144+
self.expand(
145+
SHACL_REPORT_TEMPLATE,
146+
Some(report_df),
147+
Some(report_types),
148+
map_options.clone(),
149+
)?;
150+
}
151+
if let Some(SolutionMappings {
152+
mappings,
153+
rdf_node_types,
154+
..
155+
}) = report.concatenated_details(self.triplestore.global_cats.clone())?
156+
{
157+
debug!("Started creating details input");
158+
let (details_df, details_types) =
159+
create_results_input(mappings.clone().lazy(), &rdf_node_types, offset, &uuid);
160+
debug!("Finished creating details input");
161+
162+
self.expand(
163+
SHACL_RESULT_TEMPLATE,
164+
Some(details_df),
165+
Some(details_types),
166+
map_options,
167+
)?;
168+
}
169+
report.results = None;
170+
Ok(())
171+
}
172+
}
173+
174+
fn create_results_input(
175+
mut mappings: LazyFrame,
176+
types: &HashMap<String, RDFNodeState>,
177+
offset: usize,
178+
uuid: &str,
179+
) -> (DataFrame, HashMap<String, MappingColumnType>) {
180+
let mut rdf_node_types = types.clone();
181+
if rdf_node_types.contains_key("id") {
182+
mappings = mappings
183+
.with_column(
184+
(lit(format!("urn:maplib:r{}_", uuid)) + col("id").cast(DataType::String))
185+
.alias("result"),
186+
)
187+
.drop(by_name(["id"], true, false));
188+
rdf_node_types.remove("id").unwrap();
189+
} else {
190+
mappings = mappings.with_row_index("result", Some(offset as IdxSize));
191+
mappings = mappings.with_column(
192+
(lit(format!("urn:maplib:tr{}_", uuid)) + col("result").cast(DataType::String))
193+
.alias("result"),
194+
);
195+
}
196+
rdf_node_types.insert(
197+
"result".to_string(),
198+
BaseRDFNodeType::IRI.into_default_input_rdf_node_state(),
199+
);
200+
201+
if !rdf_node_types.contains_key("result_path") {
202+
mappings = mappings.with_column(
203+
lit(LiteralValue::untyped_null())
204+
.cast(DataType::String)
205+
.alias("result_path"),
206+
);
207+
rdf_node_types.insert(
208+
"result_path".to_string(),
209+
BaseRDFNodeType::IRI.into_default_input_rdf_node_state(),
210+
);
211+
}
212+
213+
let mut column_types = HashMap::new();
214+
for (k, v) in rdf_node_types {
215+
column_types.insert(k, MappingColumnType::Flat(v));
216+
}
217+
218+
if column_types.contains_key("details") {
219+
mappings = mappings.with_column(
220+
col("details")
221+
.list()
222+
.eval(lit(format!("urn:maplib:r{}_", uuid)) + col("").cast(DataType::String)),
223+
);
224+
} else {
225+
mappings = mappings.with_column(
226+
lit(LiteralValue::untyped_null())
227+
.cast(DataType::List(Box::new(DataType::String)))
228+
.alias("details"),
229+
)
230+
}
231+
column_types.insert(
232+
"details".to_string(),
233+
MappingColumnType::Nested(Box::new(MappingColumnType::Flat(
234+
BaseRDFNodeType::IRI.into_default_input_rdf_node_state(),
235+
))),
236+
);
237+
238+
(mappings.collect().unwrap(), column_types)
239+
}

lib/representation/src/cats/image.rs

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
use super::{reencode_solution_mappings, CatEncs, CatType};
22
use super::{CatReEnc, Cats};
3-
use crate::cats::LockedCats;
43
use crate::solution_mapping::{BaseCatState, EagerSolutionMappings};
5-
use crate::{BaseRDFNodeType, RDFNodeState, LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD};
4+
use crate::{BaseRDFNodeType, LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD};
65
use nohash_hasher::NoHashHasher;
76
use std::collections::{HashMap, HashSet};
87
use std::hash::BuildHasherDefault;
@@ -201,18 +200,3 @@ pub fn new_solution_mapping_cats(
201200
.collect();
202201
(new_sms, cats)
203202
}
204-
205-
pub fn set_global_cats_as_local(
206-
rdf_node_types: &mut HashMap<String, RDFNodeState>,
207-
cats: LockedCats,
208-
) {
209-
for (_, s) in rdf_node_types {
210-
for v in s.map.values_mut() {
211-
if matches!(v, BaseCatState::CategoricalNative(None)) {
212-
*v = BaseCatState::CategoricalNative(Some(cats.clone()));
213-
} else if matches!(v, BaseCatState::CategoricalNative(Some(..))) {
214-
panic!("Should never be called when locals exist")
215-
}
216-
}
217-
}
218-
}

0 commit comments

Comments
 (0)