From bc0efbfebccc1d94950176907cfb1e7ee7f6014b Mon Sep 17 00:00:00 2001 From: Eliathon Date: Thu, 5 Feb 2026 11:17:37 +0100 Subject: [PATCH 01/19] made new jelly package in lib, added to /cargo.toml --- Cargo.lock | 4 ++++ Cargo.toml | 1 + lib/jelly/Cargo.toml | 7 +++++++ lib/jelly/src/main.rs | 3 +++ 4 files changed, 15 insertions(+) create mode 100644 lib/jelly/Cargo.toml create mode 100644 lib/jelly/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index f33796e5..2b0102ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1264,6 +1264,10 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +[[package]] +name = "jelly" +version = "0.1.0" + [[package]] name = "jobserver" version = "0.1.34" diff --git a/Cargo.toml b/Cargo.toml index bf7dda3e..40ec7c57 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ members = [ "lib/cimxml_export", "lib/cimxml_import", "py_maplib", + "lib/jelly", ] [workspace.package] diff --git a/lib/jelly/Cargo.toml b/lib/jelly/Cargo.toml new file mode 100644 index 00000000..032f1f08 --- /dev/null +++ b/lib/jelly/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "jelly" +version = "0.1.0" +rust-version.workspace = true +edition.workspace = true + +[dependencies] diff --git a/lib/jelly/src/main.rs b/lib/jelly/src/main.rs new file mode 100644 index 00000000..e7a11a96 --- /dev/null +++ b/lib/jelly/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} From 8c9387ea567fb640ed4096057c48f21ff2479f76 Mon Sep 17 00:00:00 2001 From: Eliathon Date: Fri, 6 Feb 2026 01:43:29 +0100 Subject: [PATCH 02/19] resolving for jelly, jelly enum, updated format to allow jelly, generated rust functions from proto file --- Cargo.lock | 366 +++++++ lib/jelly/Cargo.toml | 2 + lib/jelly/src/eu/mod.rs | 2 + lib/jelly/src/eu/ostrzyciel/jelly/core/mod.rs | 2 + .../src/eu/ostrzyciel/jelly/core/proto/mod.rs | 2 + .../src/eu/ostrzyciel/jelly/core/proto/v1.rs | 911 ++++++++++++++++++ lib/jelly/src/eu/ostrzyciel/jelly/mod.rs | 2 + lib/jelly/src/eu/ostrzyciel/mod.rs | 2 + lib/jelly/src/lib.rs | 1 + lib/jelly/src/main.rs | 3 - lib/triplestore/Cargo.toml | 1 + lib/triplestore/src/triples_read.rs | 5 + py_maplib/maplib/__init__.pyi | 4 +- py_maplib/src/lib.rs | 2 +- 14 files changed, 1299 insertions(+), 6 deletions(-) create mode 100644 lib/jelly/src/eu/mod.rs create mode 100644 lib/jelly/src/eu/ostrzyciel/jelly/core/mod.rs create mode 100644 lib/jelly/src/eu/ostrzyciel/jelly/core/proto/mod.rs create mode 100644 lib/jelly/src/eu/ostrzyciel/jelly/core/proto/v1.rs create mode 100644 lib/jelly/src/eu/ostrzyciel/jelly/mod.rs create mode 100644 lib/jelly/src/eu/ostrzyciel/mod.rs create mode 100644 lib/jelly/src/lib.rs delete mode 100644 lib/jelly/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 2b0102ca..3249c6ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,6 +80,62 @@ dependencies = [ "libc", ] +[[package]] +name = "anstream" +version = "0.6.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" + +[[package]] +name = "anstyle-parse" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys 0.61.2", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys 0.61.2", +] + +[[package]] +name = "anyhow" +version = "1.0.100" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" + [[package]] name = "approx" version = "0.3.2" @@ -398,6 +454,12 @@ dependencies = [ "representation", ] +[[package]] +name = "colorchoice" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" + [[package]] name = "comfy-table" version = "7.2.1" @@ -608,6 +670,29 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "env_filter" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" +dependencies = [ + "log", + "regex", +] + +[[package]] +name = "env_logger" +version = "0.11.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" +dependencies = [ + "anstream", + "anstyle", + "env_filter", + "jiff", + "log", +] + [[package]] name = "equivalent" version = "1.0.2" @@ -663,6 +748,12 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "file_io" version = "0.5.0" @@ -677,6 +768,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +[[package]] +name = "fixedbitset" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" + [[package]] name = "flate2" version = "1.1.4" @@ -1249,6 +1346,12 @@ dependencies = [ "serde", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + [[package]] name = "itertools" version = "0.14.0" @@ -1267,6 +1370,53 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "jelly" version = "0.1.0" +dependencies = [ + "jelly 0.1.0 (git+https://github.com/Jelly-RDF/jelly_rs)", + "quick-protobuf", +] + +[[package]] +name = "jelly" +version = "0.1.0" +source = "git+https://github.com/Jelly-RDF/jelly_rs#a9c3ebf5c6db8d6b0c2f4b1d89bef17aa7cf6410" +dependencies = [ + "env_logger", + "log", + "paste", + "prost", + "prost-build", + "prost-types", + "sophia_api", + "sophia_inmem", + "sophia_iri", + "sophia_term", + "sophia_turtle", + "thiserror", +] + +[[package]] +name = "jiff" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde_core", +] + +[[package]] +name = "jiff-static" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] [[package]] name = "jobserver" @@ -1485,6 +1635,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "mownstr" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b33dce847b8623c1f2e473ed3a05e43d0c395e3b93fab62378b6ae94b0a1c42c" + +[[package]] +name = "multimap" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" + [[package]] name = "ndarray" version = "0.16.1" @@ -1622,6 +1784,12 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + [[package]] name = "openssl-probe" version = "0.1.6" @@ -1784,6 +1952,17 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "petgraph" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" +dependencies = [ + "fixedbitset", + "hashbrown 0.15.5", + "indexmap", +] + [[package]] name = "phf" version = "0.12.1" @@ -2408,6 +2587,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro2" version = "1.0.101" @@ -2417,6 +2606,57 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-build" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" +dependencies = [ + "heck", + "itertools", + "log", + "multimap", + "petgraph", + "prettyplease", + "prost", + "prost-types", + "regex", + "syn", + "tempfile", +] + +[[package]] +name = "prost-derive" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "prost-types" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" +dependencies = [ + "prost", +] + [[package]] name = "psm" version = "0.1.27" @@ -2541,6 +2781,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "quick-protobuf" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d6da84cc204722a989e01ba2f6e1e276e190f22263d0cb6ce8526fcdb0d2e1f" +dependencies = [ + "byteorder", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -2852,6 +3101,12 @@ dependencies = [ "web-sys", ] +[[package]] +name = "resiter" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbc95d56eb1865f69288945759cc0879d60ee68168dce676730275804ad2b276" + [[package]] name = "ring" version = "0.17.14" @@ -2866,6 +3121,23 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rio_api" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61d0c76ddf8b00cbb4d2c5932d067d49245c2f1f651809bde3cf265033ddb1af" + +[[package]] +name = "rio_turtle" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6f351b77353c7c896f0cd5ced2a25a7e95b5360cb68d1d7c16682ee096d7f40" +dependencies = [ + "oxilangtag", + "oxiri", + "rio_api", +] + [[package]] name = "rmp" version = "0.8.14" @@ -3213,6 +3485,80 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "sophia_api" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "103a4138290bec38b9b10e0682b613173a102bca9fd2a74b3db25346e22599a3" +dependencies = [ + "lazy_static", + "mownstr", + "regex", + "resiter", + "serde", + "sophia_iri", + "thiserror", +] + +[[package]] +name = "sophia_inmem" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebacba4fa7baed53f89844a5c9e5962d6232a449d5b450b9de72bb67f0203332" +dependencies = [ + "sophia_api", + "thiserror", +] + +[[package]] +name = "sophia_iri" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7675ff44ad920ac07fde1b61ff20d3c832d8cb65395416906df90b76631ea95f" +dependencies = [ + "lazy_static", + "oxiri", + "regex", + "serde", + "thiserror", +] + +[[package]] +name = "sophia_rio" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a2938da8eeb8645ff616e64ac99af8099772c3e22a955ae5669ceac5372c34" +dependencies = [ + "rio_api", + "sophia_api", + "sophia_iri", +] + +[[package]] +name = "sophia_term" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f4c42480d50d14ac7128ad738d28b68368938cb6f507c9505f68875fd0e4db" +dependencies = [ + "lazy_static", + "sophia_api", +] + +[[package]] +name = "sophia_turtle" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9ff316c00bed741ba431b8533b2ce08e089ca031c742d9b6cccdf01b7f6ef2d" +dependencies = [ + "lazy_static", + "oxiri", + "regex", + "rio_turtle", + "sophia_api", + "sophia_iri", + "sophia_rio", +] + [[package]] name = "sparesults" version = "0.3.2" @@ -3363,6 +3709,19 @@ version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" +[[package]] +name = "tempfile" +version = "3.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +dependencies = [ + "fastrand", + "getrandom 0.3.3", + "once_cell", + "rustix", + "windows-sys 0.61.2", +] + [[package]] name = "templates" version = "0.1.0" @@ -3621,6 +3980,7 @@ dependencies = [ "file_io", "fts", "itoa", + "jelly 0.1.0 (git+https://github.com/Jelly-RDF/jelly_rs)", "memmap2", "oxjsonld", "oxrdf", @@ -3728,6 +4088,12 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "utils" version = "0.1.0" diff --git a/lib/jelly/Cargo.toml b/lib/jelly/Cargo.toml index 032f1f08..19c12d98 100644 --- a/lib/jelly/Cargo.toml +++ b/lib/jelly/Cargo.toml @@ -5,3 +5,5 @@ rust-version.workspace = true edition.workspace = true [dependencies] +quick-protobuf = "0.8.1" +jelly = { git = "https://github.com/Jelly-RDF/jelly_rs" } diff --git a/lib/jelly/src/eu/mod.rs b/lib/jelly/src/eu/mod.rs new file mode 100644 index 00000000..69ecf387 --- /dev/null +++ b/lib/jelly/src/eu/mod.rs @@ -0,0 +1,2 @@ +// Automatically generated mod.rs +pub mod ostrzyciel; diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/core/mod.rs b/lib/jelly/src/eu/ostrzyciel/jelly/core/mod.rs new file mode 100644 index 00000000..5e388963 --- /dev/null +++ b/lib/jelly/src/eu/ostrzyciel/jelly/core/mod.rs @@ -0,0 +1,2 @@ +// Automatically generated mod.rs +pub mod proto; diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/mod.rs b/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/mod.rs new file mode 100644 index 00000000..1afc9b55 --- /dev/null +++ b/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/mod.rs @@ -0,0 +1,2 @@ +// Automatically generated mod.rs +pub mod v1; diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/v1.rs b/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/v1.rs new file mode 100644 index 00000000..6390db12 --- /dev/null +++ b/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/v1.rs @@ -0,0 +1,911 @@ +// Automatically generated rust module for 'rdf.proto' file + +#![allow(non_snake_case)] +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(unused_imports)] +#![allow(unknown_lints)] +#![allow(clippy::all)] +#![cfg_attr(rustfmt, rustfmt_skip)] + + +use std::borrow::Cow; +use std::collections::HashMap; +type KVMap = HashMap; +use quick_protobuf::{MessageInfo, MessageRead, MessageWrite, BytesReader, Writer, WriterBackend, Result}; +use quick_protobuf::sizeofs::*; +use super::super::super::super::super::super::*; + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum PhysicalStreamType { + PHYSICAL_STREAM_TYPE_UNSPECIFIED = 0, + PHYSICAL_STREAM_TYPE_TRIPLES = 1, + PHYSICAL_STREAM_TYPE_QUADS = 2, + PHYSICAL_STREAM_TYPE_GRAPHS = 3, +} + +impl Default for PhysicalStreamType { + fn default() -> Self { + PhysicalStreamType::PHYSICAL_STREAM_TYPE_UNSPECIFIED + } +} + +impl From for PhysicalStreamType { + fn from(i: i32) -> Self { + match i { + 0 => PhysicalStreamType::PHYSICAL_STREAM_TYPE_UNSPECIFIED, + 1 => PhysicalStreamType::PHYSICAL_STREAM_TYPE_TRIPLES, + 2 => PhysicalStreamType::PHYSICAL_STREAM_TYPE_QUADS, + 3 => PhysicalStreamType::PHYSICAL_STREAM_TYPE_GRAPHS, + _ => Self::default(), + } + } +} + +impl<'a> From<&'a str> for PhysicalStreamType { + fn from(s: &'a str) -> Self { + match s { + "PHYSICAL_STREAM_TYPE_UNSPECIFIED" => PhysicalStreamType::PHYSICAL_STREAM_TYPE_UNSPECIFIED, + "PHYSICAL_STREAM_TYPE_TRIPLES" => PhysicalStreamType::PHYSICAL_STREAM_TYPE_TRIPLES, + "PHYSICAL_STREAM_TYPE_QUADS" => PhysicalStreamType::PHYSICAL_STREAM_TYPE_QUADS, + "PHYSICAL_STREAM_TYPE_GRAPHS" => PhysicalStreamType::PHYSICAL_STREAM_TYPE_GRAPHS, + _ => Self::default(), + } + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum LogicalStreamType { + LOGICAL_STREAM_TYPE_UNSPECIFIED = 0, + LOGICAL_STREAM_TYPE_FLAT_TRIPLES = 1, + LOGICAL_STREAM_TYPE_FLAT_QUADS = 2, + LOGICAL_STREAM_TYPE_GRAPHS = 3, + LOGICAL_STREAM_TYPE_DATASETS = 4, + LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS = 13, + LOGICAL_STREAM_TYPE_NAMED_GRAPHS = 14, + LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS = 114, +} + +impl Default for LogicalStreamType { + fn default() -> Self { + LogicalStreamType::LOGICAL_STREAM_TYPE_UNSPECIFIED + } +} + +impl From for LogicalStreamType { + fn from(i: i32) -> Self { + match i { + 0 => LogicalStreamType::LOGICAL_STREAM_TYPE_UNSPECIFIED, + 1 => LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_TRIPLES, + 2 => LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_QUADS, + 3 => LogicalStreamType::LOGICAL_STREAM_TYPE_GRAPHS, + 4 => LogicalStreamType::LOGICAL_STREAM_TYPE_DATASETS, + 13 => LogicalStreamType::LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS, + 14 => LogicalStreamType::LOGICAL_STREAM_TYPE_NAMED_GRAPHS, + 114 => LogicalStreamType::LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS, + _ => Self::default(), + } + } +} + +impl<'a> From<&'a str> for LogicalStreamType { + fn from(s: &'a str) -> Self { + match s { + "LOGICAL_STREAM_TYPE_UNSPECIFIED" => LogicalStreamType::LOGICAL_STREAM_TYPE_UNSPECIFIED, + "LOGICAL_STREAM_TYPE_FLAT_TRIPLES" => LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_TRIPLES, + "LOGICAL_STREAM_TYPE_FLAT_QUADS" => LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_QUADS, + "LOGICAL_STREAM_TYPE_GRAPHS" => LogicalStreamType::LOGICAL_STREAM_TYPE_GRAPHS, + "LOGICAL_STREAM_TYPE_DATASETS" => LogicalStreamType::LOGICAL_STREAM_TYPE_DATASETS, + "LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS" => LogicalStreamType::LOGICAL_STREAM_TYPE_SUBJECT_GRAPHS, + "LOGICAL_STREAM_TYPE_NAMED_GRAPHS" => LogicalStreamType::LOGICAL_STREAM_TYPE_NAMED_GRAPHS, + "LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS" => LogicalStreamType::LOGICAL_STREAM_TYPE_TIMESTAMPED_NAMED_GRAPHS, + _ => Self::default(), + } + } +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfIri { + pub prefix_id: u32, + pub name_id: u32, +} + +impl<'a> MessageRead<'a> for RdfIri { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(8) => msg.prefix_id = r.read_uint32(bytes)?, + Ok(16) => msg.name_id = r.read_uint32(bytes)?, + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl MessageWrite for RdfIri { + fn get_size(&self) -> usize { + 0 + + if self.prefix_id == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.prefix_id) as u64) } + + if self.name_id == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.name_id) as u64) } + } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + if self.prefix_id != 0u32 { w.write_with_tag(8, |w| w.write_uint32(*&self.prefix_id))?; } + if self.name_id != 0u32 { w.write_with_tag(16, |w| w.write_uint32(*&self.name_id))?; } + Ok(()) + } +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfLiteral<'a> { + pub lex: Cow<'a, str>, + pub literalKind: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind<'a>, +} + +impl<'a> MessageRead<'a> for RdfLiteral<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.lex = r.read_string(bytes).map(Cow::Borrowed)?, + Ok(18) => msg.literalKind = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::langtag(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(24) => msg.literalKind = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::datatype(r.read_uint32(bytes)?), + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfLiteral<'a> { + fn get_size(&self) -> usize { + 0 + + if self.lex == "" { 0 } else { 1 + sizeof_len((&self.lex).len()) } + + match self.literalKind { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::langtag(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::datatype(ref m) => 1 + sizeof_varint(*(m) as u64), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::None => 0, + } } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + if self.lex != "" { w.write_with_tag(10, |w| w.write_string(&**&self.lex))?; } + match self.literalKind { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::langtag(ref m) => { w.write_with_tag(18, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::datatype(ref m) => { w.write_with_tag(24, |w| w.write_uint32(*m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind::None => {}, + } Ok(()) + } +} + +pub mod mod_RdfLiteral { + +use super::*; + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfliteralKind<'a> { + langtag(Cow<'a, str>), + datatype(u32), + None, +} + +impl<'a> Default for OneOfliteralKind<'a> { + fn default() -> Self { + OneOfliteralKind::None + } +} + +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfDefaultGraph { } + +impl<'a> MessageRead<'a> for RdfDefaultGraph { + fn from_reader(r: &mut BytesReader, _: &[u8]) -> Result { + r.read_to_end(); + Ok(Self::default()) + } +} + +impl MessageWrite for RdfDefaultGraph { } + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfTriple<'a> { + pub subject: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject<'a>, + pub predicate: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate<'a>, + pub object: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject<'a>, +} + +impl<'a> MessageRead<'a> for RdfTriple<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_iri(r.read_message::(bytes)?), + Ok(18) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(26) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_literal(r.read_message::(bytes)?), + Ok(34) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_triple_term(Box::new(r.read_message::(bytes)?)), + Ok(42) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_iri(r.read_message::(bytes)?), + Ok(50) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(58) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_literal(r.read_message::(bytes)?), + Ok(66) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_triple_term(Box::new(r.read_message::(bytes)?)), + Ok(74) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_iri(r.read_message::(bytes)?), + Ok(82) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(90) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_literal(r.read_message::(bytes)?), + Ok(98) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_triple_term(Box::new(r.read_message::(bytes)?)), + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfTriple<'a> { + fn get_size(&self) -> usize { + 0 + + match self.subject { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_literal(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_triple_term(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::None => 0, + } + match self.predicate { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_literal(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_triple_term(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::None => 0, + } + match self.object { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_literal(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_triple_term(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::None => 0, + } } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + match self.subject { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_iri(ref m) => { w.write_with_tag(10, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_bnode(ref m) => { w.write_with_tag(18, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_literal(ref m) => { w.write_with_tag(26, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::s_triple_term(ref m) => { w.write_with_tag(34, |w| w.write_message(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfsubject::None => {}, + } match self.predicate { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_iri(ref m) => { w.write_with_tag(42, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_bnode(ref m) => { w.write_with_tag(50, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_literal(ref m) => { w.write_with_tag(58, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::p_triple_term(ref m) => { w.write_with_tag(66, |w| w.write_message(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfpredicate::None => {}, + } match self.object { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_iri(ref m) => { w.write_with_tag(74, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_bnode(ref m) => { w.write_with_tag(82, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_literal(ref m) => { w.write_with_tag(90, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::o_triple_term(ref m) => { w.write_with_tag(98, |w| w.write_message(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::OneOfobject::None => {}, + } Ok(()) + } +} + +pub mod mod_RdfTriple { + +use super::*; + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfsubject<'a> { + s_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + s_bnode(Cow<'a, str>), + s_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + s_triple_term(Box>), + None, +} + +impl<'a> Default for OneOfsubject<'a> { + fn default() -> Self { + OneOfsubject::None + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfpredicate<'a> { + p_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + p_bnode(Cow<'a, str>), + p_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + p_triple_term(Box>), + None, +} + +impl<'a> Default for OneOfpredicate<'a> { + fn default() -> Self { + OneOfpredicate::None + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfobject<'a> { + o_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + o_bnode(Cow<'a, str>), + o_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + o_triple_term(Box>), + None, +} + +impl<'a> Default for OneOfobject<'a> { + fn default() -> Self { + OneOfobject::None + } +} + +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfQuad<'a> { + pub subject: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject<'a>, + pub predicate: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate<'a>, + pub object: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject<'a>, + pub graph: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph<'a>, +} + +impl<'a> MessageRead<'a> for RdfQuad<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_iri(r.read_message::(bytes)?), + Ok(18) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(26) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_literal(r.read_message::(bytes)?), + Ok(34) => msg.subject = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_triple_term(r.read_message::(bytes)?), + Ok(42) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_iri(r.read_message::(bytes)?), + Ok(50) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(58) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_literal(r.read_message::(bytes)?), + Ok(66) => msg.predicate = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_triple_term(r.read_message::(bytes)?), + Ok(74) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_iri(r.read_message::(bytes)?), + Ok(82) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(90) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_literal(r.read_message::(bytes)?), + Ok(98) => msg.object = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_triple_term(r.read_message::(bytes)?), + Ok(106) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_iri(r.read_message::(bytes)?), + Ok(114) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(122) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_default_graph(r.read_message::(bytes)?), + Ok(130) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_literal(r.read_message::(bytes)?), + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfQuad<'a> { + fn get_size(&self) -> usize { + 0 + + match self.subject { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_literal(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_triple_term(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::None => 0, + } + match self.predicate { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_literal(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_triple_term(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::None => 0, + } + match self.object { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_literal(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_triple_term(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::None => 0, + } + match self.graph { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_default_graph(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_literal(ref m) => 2 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::None => 0, + } } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + match self.subject { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_iri(ref m) => { w.write_with_tag(10, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_bnode(ref m) => { w.write_with_tag(18, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_literal(ref m) => { w.write_with_tag(26, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::s_triple_term(ref m) => { w.write_with_tag(34, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfsubject::None => {}, + } match self.predicate { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_iri(ref m) => { w.write_with_tag(42, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_bnode(ref m) => { w.write_with_tag(50, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_literal(ref m) => { w.write_with_tag(58, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::p_triple_term(ref m) => { w.write_with_tag(66, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfpredicate::None => {}, + } match self.object { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_iri(ref m) => { w.write_with_tag(74, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_bnode(ref m) => { w.write_with_tag(82, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_literal(ref m) => { w.write_with_tag(90, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::o_triple_term(ref m) => { w.write_with_tag(98, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfobject::None => {}, + } match self.graph { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_iri(ref m) => { w.write_with_tag(106, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_bnode(ref m) => { w.write_with_tag(114, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_default_graph(ref m) => { w.write_with_tag(122, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::g_literal(ref m) => { w.write_with_tag(130, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfQuad::OneOfgraph::None => {}, + } Ok(()) + } +} + +pub mod mod_RdfQuad { + +use super::*; + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfsubject<'a> { + s_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + s_bnode(Cow<'a, str>), + s_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + s_triple_term(eu::ostrzyciel::jelly::core::proto::v1::RdfTriple<'a>), + None, +} + +impl<'a> Default for OneOfsubject<'a> { + fn default() -> Self { + OneOfsubject::None + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfpredicate<'a> { + p_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + p_bnode(Cow<'a, str>), + p_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + p_triple_term(eu::ostrzyciel::jelly::core::proto::v1::RdfTriple<'a>), + None, +} + +impl<'a> Default for OneOfpredicate<'a> { + fn default() -> Self { + OneOfpredicate::None + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfobject<'a> { + o_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + o_bnode(Cow<'a, str>), + o_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + o_triple_term(eu::ostrzyciel::jelly::core::proto::v1::RdfTriple<'a>), + None, +} + +impl<'a> Default for OneOfobject<'a> { + fn default() -> Self { + OneOfobject::None + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfgraph<'a> { + g_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + g_bnode(Cow<'a, str>), + g_default_graph(eu::ostrzyciel::jelly::core::proto::v1::RdfDefaultGraph), + g_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + None, +} + +impl<'a> Default for OneOfgraph<'a> { + fn default() -> Self { + OneOfgraph::None + } +} + +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfGraphStart<'a> { + pub graph: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph<'a>, +} + +impl<'a> MessageRead<'a> for RdfGraphStart<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_iri(r.read_message::(bytes)?), + Ok(18) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_bnode(r.read_string(bytes).map(Cow::Borrowed)?), + Ok(26) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_default_graph(r.read_message::(bytes)?), + Ok(34) => msg.graph = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_literal(r.read_message::(bytes)?), + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfGraphStart<'a> { + fn get_size(&self) -> usize { + 0 + + match self.graph { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_iri(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_bnode(ref m) => 1 + sizeof_len((m).len()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_default_graph(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_literal(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::None => 0, + } } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + match self.graph { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_iri(ref m) => { w.write_with_tag(10, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_bnode(ref m) => { w.write_with_tag(18, |w| w.write_string(&**m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_default_graph(ref m) => { w.write_with_tag(26, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::g_literal(ref m) => { w.write_with_tag(34, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfGraphStart::OneOfgraph::None => {}, + } Ok(()) + } +} + +pub mod mod_RdfGraphStart { + +use super::*; + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfgraph<'a> { + g_iri(eu::ostrzyciel::jelly::core::proto::v1::RdfIri), + g_bnode(Cow<'a, str>), + g_default_graph(eu::ostrzyciel::jelly::core::proto::v1::RdfDefaultGraph), + g_literal(eu::ostrzyciel::jelly::core::proto::v1::RdfLiteral<'a>), + None, +} + +impl<'a> Default for OneOfgraph<'a> { + fn default() -> Self { + OneOfgraph::None + } +} + +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfGraphEnd { } + +impl<'a> MessageRead<'a> for RdfGraphEnd { + fn from_reader(r: &mut BytesReader, _: &[u8]) -> Result { + r.read_to_end(); + Ok(Self::default()) + } +} + +impl MessageWrite for RdfGraphEnd { } + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfNamespaceDeclaration<'a> { + pub name: Cow<'a, str>, + pub value: Option, +} + +impl<'a> MessageRead<'a> for RdfNamespaceDeclaration<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.name = r.read_string(bytes).map(Cow::Borrowed)?, + Ok(18) => msg.value = Some(r.read_message::(bytes)?), + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfNamespaceDeclaration<'a> { + fn get_size(&self) -> usize { + 0 + + if self.name == "" { 0 } else { 1 + sizeof_len((&self.name).len()) } + + self.value.as_ref().map_or(0, |m| 1 + sizeof_len((m).get_size())) + } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + if self.name != "" { w.write_with_tag(10, |w| w.write_string(&**&self.name))?; } + if let Some(ref s) = self.value { w.write_with_tag(18, |w| w.write_message(s))?; } + Ok(()) + } +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfNameEntry<'a> { + pub id: u32, + pub value: Cow<'a, str>, +} + +impl<'a> MessageRead<'a> for RdfNameEntry<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(8) => msg.id = r.read_uint32(bytes)?, + Ok(18) => msg.value = r.read_string(bytes).map(Cow::Borrowed)?, + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfNameEntry<'a> { + fn get_size(&self) -> usize { + 0 + + if self.id == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.id) as u64) } + + if self.value == "" { 0 } else { 1 + sizeof_len((&self.value).len()) } + } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + if self.id != 0u32 { w.write_with_tag(8, |w| w.write_uint32(*&self.id))?; } + if self.value != "" { w.write_with_tag(18, |w| w.write_string(&**&self.value))?; } + Ok(()) + } +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfPrefixEntry<'a> { + pub id: u32, + pub value: Cow<'a, str>, +} + +impl<'a> MessageRead<'a> for RdfPrefixEntry<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(8) => msg.id = r.read_uint32(bytes)?, + Ok(18) => msg.value = r.read_string(bytes).map(Cow::Borrowed)?, + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfPrefixEntry<'a> { + fn get_size(&self) -> usize { + 0 + + if self.id == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.id) as u64) } + + if self.value == "" { 0 } else { 1 + sizeof_len((&self.value).len()) } + } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + if self.id != 0u32 { w.write_with_tag(8, |w| w.write_uint32(*&self.id))?; } + if self.value != "" { w.write_with_tag(18, |w| w.write_string(&**&self.value))?; } + Ok(()) + } +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfDatatypeEntry<'a> { + pub id: u32, + pub value: Cow<'a, str>, +} + +impl<'a> MessageRead<'a> for RdfDatatypeEntry<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(8) => msg.id = r.read_uint32(bytes)?, + Ok(18) => msg.value = r.read_string(bytes).map(Cow::Borrowed)?, + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfDatatypeEntry<'a> { + fn get_size(&self) -> usize { + 0 + + if self.id == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.id) as u64) } + + if self.value == "" { 0 } else { 1 + sizeof_len((&self.value).len()) } + } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + if self.id != 0u32 { w.write_with_tag(8, |w| w.write_uint32(*&self.id))?; } + if self.value != "" { w.write_with_tag(18, |w| w.write_string(&**&self.value))?; } + Ok(()) + } +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfStreamOptions<'a> { + pub stream_name: Cow<'a, str>, + pub physical_type: eu::ostrzyciel::jelly::core::proto::v1::PhysicalStreamType, + pub generalized_statements: bool, + pub rdf_star: bool, + pub max_name_table_size: u32, + pub max_prefix_table_size: u32, + pub max_datatype_table_size: u32, + pub logical_type: eu::ostrzyciel::jelly::core::proto::v1::LogicalStreamType, + pub version: u32, +} + +impl<'a> MessageRead<'a> for RdfStreamOptions<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.stream_name = r.read_string(bytes).map(Cow::Borrowed)?, + Ok(16) => msg.physical_type = r.read_enum(bytes)?, + Ok(24) => msg.generalized_statements = r.read_bool(bytes)?, + Ok(32) => msg.rdf_star = r.read_bool(bytes)?, + Ok(72) => msg.max_name_table_size = r.read_uint32(bytes)?, + Ok(80) => msg.max_prefix_table_size = r.read_uint32(bytes)?, + Ok(88) => msg.max_datatype_table_size = r.read_uint32(bytes)?, + Ok(112) => msg.logical_type = r.read_enum(bytes)?, + Ok(120) => msg.version = r.read_uint32(bytes)?, + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfStreamOptions<'a> { + fn get_size(&self) -> usize { + 0 + + if self.stream_name == "" { 0 } else { 1 + sizeof_len((&self.stream_name).len()) } + + if self.physical_type == eu::ostrzyciel::jelly::core::proto::v1::PhysicalStreamType::PHYSICAL_STREAM_TYPE_UNSPECIFIED { 0 } else { 1 + sizeof_varint(*(&self.physical_type) as u64) } + + if self.generalized_statements == false { 0 } else { 1 + sizeof_varint(*(&self.generalized_statements) as u64) } + + if self.rdf_star == false { 0 } else { 1 + sizeof_varint(*(&self.rdf_star) as u64) } + + if self.max_name_table_size == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.max_name_table_size) as u64) } + + if self.max_prefix_table_size == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.max_prefix_table_size) as u64) } + + if self.max_datatype_table_size == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.max_datatype_table_size) as u64) } + + if self.logical_type == eu::ostrzyciel::jelly::core::proto::v1::LogicalStreamType::LOGICAL_STREAM_TYPE_UNSPECIFIED { 0 } else { 1 + sizeof_varint(*(&self.logical_type) as u64) } + + if self.version == 0u32 { 0 } else { 1 + sizeof_varint(*(&self.version) as u64) } + } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + if self.stream_name != "" { w.write_with_tag(10, |w| w.write_string(&**&self.stream_name))?; } + if self.physical_type != eu::ostrzyciel::jelly::core::proto::v1::PhysicalStreamType::PHYSICAL_STREAM_TYPE_UNSPECIFIED { w.write_with_tag(16, |w| w.write_enum(*&self.physical_type as i32))?; } + if self.generalized_statements != false { w.write_with_tag(24, |w| w.write_bool(*&self.generalized_statements))?; } + if self.rdf_star != false { w.write_with_tag(32, |w| w.write_bool(*&self.rdf_star))?; } + if self.max_name_table_size != 0u32 { w.write_with_tag(72, |w| w.write_uint32(*&self.max_name_table_size))?; } + if self.max_prefix_table_size != 0u32 { w.write_with_tag(80, |w| w.write_uint32(*&self.max_prefix_table_size))?; } + if self.max_datatype_table_size != 0u32 { w.write_with_tag(88, |w| w.write_uint32(*&self.max_datatype_table_size))?; } + if self.logical_type != eu::ostrzyciel::jelly::core::proto::v1::LogicalStreamType::LOGICAL_STREAM_TYPE_UNSPECIFIED { w.write_with_tag(112, |w| w.write_enum(*&self.logical_type as i32))?; } + if self.version != 0u32 { w.write_with_tag(120, |w| w.write_uint32(*&self.version))?; } + Ok(()) + } +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfStreamRow<'a> { + pub row: eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow<'a>, +} + +impl<'a> MessageRead<'a> for RdfStreamRow<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::options(r.read_message::(bytes)?), + Ok(18) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::triple(r.read_message::(bytes)?), + Ok(26) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::quad(r.read_message::(bytes)?), + Ok(34) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::graph_start(r.read_message::(bytes)?), + Ok(42) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::graph_end(r.read_message::(bytes)?), + Ok(50) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::namespace(r.read_message::(bytes)?), + Ok(74) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::name(r.read_message::(bytes)?), + Ok(82) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::prefix(r.read_message::(bytes)?), + Ok(90) => msg.row = eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::datatype(r.read_message::(bytes)?), + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfStreamRow<'a> { + fn get_size(&self) -> usize { + 0 + + match self.row { + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::options(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::triple(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::quad(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::graph_start(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::graph_end(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::namespace(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::name(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::prefix(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::datatype(ref m) => 1 + sizeof_len((m).get_size()), + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::None => 0, + } } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + match self.row { eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::options(ref m) => { w.write_with_tag(10, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::triple(ref m) => { w.write_with_tag(18, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::quad(ref m) => { w.write_with_tag(26, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::graph_start(ref m) => { w.write_with_tag(34, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::graph_end(ref m) => { w.write_with_tag(42, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::namespace(ref m) => { w.write_with_tag(50, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::name(ref m) => { w.write_with_tag(74, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::prefix(ref m) => { w.write_with_tag(82, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::datatype(ref m) => { w.write_with_tag(90, |w| w.write_message(m))? }, + eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow::None => {}, + } Ok(()) + } +} + +pub mod mod_RdfStreamRow { + +use super::*; + +#[derive(Debug, PartialEq, Clone)] +pub enum OneOfrow<'a> { + options(eu::ostrzyciel::jelly::core::proto::v1::RdfStreamOptions<'a>), + triple(eu::ostrzyciel::jelly::core::proto::v1::RdfTriple<'a>), + quad(eu::ostrzyciel::jelly::core::proto::v1::RdfQuad<'a>), + graph_start(eu::ostrzyciel::jelly::core::proto::v1::RdfGraphStart<'a>), + graph_end(eu::ostrzyciel::jelly::core::proto::v1::RdfGraphEnd), + namespace(eu::ostrzyciel::jelly::core::proto::v1::RdfNamespaceDeclaration<'a>), + name(eu::ostrzyciel::jelly::core::proto::v1::RdfNameEntry<'a>), + prefix(eu::ostrzyciel::jelly::core::proto::v1::RdfPrefixEntry<'a>), + datatype(eu::ostrzyciel::jelly::core::proto::v1::RdfDatatypeEntry<'a>), + None, +} + +impl<'a> Default for OneOfrow<'a> { + fn default() -> Self { + OneOfrow::None + } +} + +} + +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Debug, Default, PartialEq, Clone)] +pub struct RdfStreamFrame<'a> { + pub rows: Vec>, + pub metadata: KVMap, Cow<'a, [u8]>>, +} + +impl<'a> MessageRead<'a> for RdfStreamFrame<'a> { + fn from_reader(r: &mut BytesReader, bytes: &'a [u8]) -> Result { + let mut msg = Self::default(); + while !r.is_eof() { + match r.next_tag(bytes) { + Ok(10) => msg.rows.push(r.read_message::(bytes)?), + Ok(122) => { + let (key, value) = r.read_map(bytes, |r, bytes| Ok(r.read_string(bytes).map(Cow::Borrowed)?), |r, bytes| Ok(r.read_bytes(bytes).map(Cow::Borrowed)?))?; + msg.metadata.insert(key, value); + } + Ok(t) => { r.read_unknown(bytes, t)?; } + Err(e) => return Err(e), + } + } + Ok(msg) + } +} + +impl<'a> MessageWrite for RdfStreamFrame<'a> { + fn get_size(&self) -> usize { + 0 + + self.rows.iter().map(|s| 1 + sizeof_len((s).get_size())).sum::() + + self.metadata.iter().map(|(k, v)| 1 + sizeof_len(2 + sizeof_len((k).len()) + sizeof_len((v).len()))).sum::() + } + + fn write_message(&self, w: &mut Writer) -> Result<()> { + for s in &self.rows { w.write_with_tag(10, |w| w.write_message(s))?; } + for (k, v) in self.metadata.iter() { w.write_with_tag(122, |w| w.write_map(2 + sizeof_len((k).len()) + sizeof_len((v).len()), 10, |w| w.write_string(&**k), 18, |w| w.write_bytes(&**v)))?; } + Ok(()) + } +} + diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/mod.rs b/lib/jelly/src/eu/ostrzyciel/jelly/mod.rs new file mode 100644 index 00000000..7ccad216 --- /dev/null +++ b/lib/jelly/src/eu/ostrzyciel/jelly/mod.rs @@ -0,0 +1,2 @@ +// Automatically generated mod.rs +pub mod core; diff --git a/lib/jelly/src/eu/ostrzyciel/mod.rs b/lib/jelly/src/eu/ostrzyciel/mod.rs new file mode 100644 index 00000000..26e85b21 --- /dev/null +++ b/lib/jelly/src/eu/ostrzyciel/mod.rs @@ -0,0 +1,2 @@ +// Automatically generated mod.rs +pub mod jelly; diff --git a/lib/jelly/src/lib.rs b/lib/jelly/src/lib.rs new file mode 100644 index 00000000..0ab1871b --- /dev/null +++ b/lib/jelly/src/lib.rs @@ -0,0 +1 @@ +fn main () {} \ No newline at end of file diff --git a/lib/jelly/src/main.rs b/lib/jelly/src/main.rs deleted file mode 100644 index e7a11a96..00000000 --- a/lib/jelly/src/main.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - println!("Hello, world!"); -} diff --git a/lib/triplestore/Cargo.toml b/lib/triplestore/Cargo.toml index c2b09ac5..44dff42c 100644 --- a/lib/triplestore/Cargo.toml +++ b/lib/triplestore/Cargo.toml @@ -15,6 +15,7 @@ file_io = { path = "../file_io" } spargebra = { path = "../spargebra" } fts = {path ="../fts"} cimxml_import = {path = "../cimxml_import"} +jelly = { git = "https://github.com/Jelly-RDF/jelly_rs" } rayon.workspace = true sprs = { workspace = true, features = ["multi_thread"] } diff --git a/lib/triplestore/src/triples_read.rs b/lib/triplestore/src/triples_read.rs index bd8ba065..4f7fd1ca 100644 --- a/lib/triplestore/src/triples_read.rs +++ b/lib/triplestore/src/triples_read.rs @@ -33,12 +33,15 @@ use std::path::Path; use std::time::Instant; use tracing::{debug, instrument}; +use jelly::*; + type MapType = HashMap, Vec)>>; #[derive(Eq, PartialEq, Debug, Clone)] pub enum ExtendedRdfFormat { Normal(RdfFormat), CIMXML, + Jelly, } impl Triplestore { @@ -74,6 +77,8 @@ impl Triplestore { ExtendedRdfFormat::Normal(RdfFormat::JsonLd { profile: JsonLdProfileSet::empty(), }) + } else if path.extension() == Some("jelly".as_ref()) { + ExtendedRdfFormat::Jelly } else { todo!("Have not implemented file format {:?}", path); }; diff --git a/py_maplib/maplib/__init__.pyi b/py_maplib/maplib/__init__.pyi index 352b8753..4c545990 100644 --- a/py_maplib/maplib/__init__.pyi +++ b/py_maplib/maplib/__init__.pyi @@ -802,7 +802,7 @@ class Model: def read( self, file_path: Union[str, Path], - format: LiteralType["ntriples", "turtle", "rdf/xml", "cim/xml", "json-ld"] = None, + format: LiteralType["ntriples", "turtle", "rdf/xml", "cim/xml", "json-ld", "jelly"] = None, base_iri: str = None, transient: bool = False, parallel: bool = None, @@ -849,7 +849,7 @@ class Model: def reads( self, s: str, - format: LiteralType["ntriples", "turtle", "rdf/xml", "cim/xml", "json-ld"], + format: LiteralType["ntriples", "turtle", "rdf/xml", "cim/xml", "json-ld", "jelly"], base_iri: str = None, transient: bool = False, parallel: bool = None, diff --git a/py_maplib/src/lib.rs b/py_maplib/src/lib.rs index ddc2c6ea..548d4bda 100644 --- a/py_maplib/src/lib.rs +++ b/py_maplib/src/lib.rs @@ -1725,7 +1725,7 @@ fn resolve_normal_format(format: &str) -> Result { fn resolve_format(format: &str) -> Result { match format.to_lowercase().as_str() { - "cim" | "cim/xml" | "cimxml" => Ok(ExtendedRdfFormat::CIMXML), + "cim" | "cim/xml" | "cimxml" | "jelly" => Ok(ExtendedRdfFormat::CIMXML), f => match resolve_normal_format(format) { Ok(o) => Ok(ExtendedRdfFormat::Normal(o)), Err(e) => Err(e), From e0dcb21e9c281209f09098a74620d767cb089b98 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Sun, 8 Feb 2026 02:49:38 +0100 Subject: [PATCH 03/19] triples_write extendedrdfformat --- lib/triplestore/src/triples_write.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/triplestore/src/triples_write.rs b/lib/triplestore/src/triples_write.rs index 3f952f5f..8354a0eb 100644 --- a/lib/triplestore/src/triples_write.rs +++ b/lib/triplestore/src/triples_write.rs @@ -17,6 +17,7 @@ use representation::{ use std::collections::HashMap; use std::io::Write; use tracing::warn; +use crate::triples_read::ExtendedRdfFormat; mod fast_ntriples; mod pretty_turtle; @@ -28,12 +29,12 @@ impl Triplestore { pub fn write_triples( &mut self, buf: &mut W, - format: RdfFormat, + format: ExtendedRdfFormat, graph: &NamedGraph, prefixes: &HashMap, ) -> Result<(), TriplestoreError> { self.check_graph_exists(graph)?; - if RdfFormat::NTriples == format { + if ExtendedRdfFormat::Normal(RdfFormat::NTriples) == format { let n_threads = POOL.current_num_threads(); for (predicate, df_map) in self.graph_triples_map.get(graph).unwrap() { let predicate_string = predicate.to_string(); @@ -138,10 +139,10 @@ impl Triplestore { } } } - } else if RdfFormat::Turtle == format { + } else if ExtendedRdfFormat::Normal(RdfFormat::Turtle) == format { self.write_pretty_turtle(buf, graph, prefixes)?; - } else { - let mut writer = RdfSerializer::from_format(format).for_writer(buf); + } else if let ExtendedRdfFormat::Normal(rdf_format) = format { + let mut writer = RdfSerializer::from_format(rdf_format).for_writer(buf); for (predicate, df_map) in self.graph_triples_map.get(graph).unwrap() { for ((subject_type, object_type), tt) in df_map { From 6bda06f0b8b5855b8bf532a9f7d512e0546fb234 Mon Sep 17 00:00:00 2001 From: Eliathon Date: Fri, 13 Feb 2026 12:44:57 +0100 Subject: [PATCH 04/19] jelly writing, started making tests --- .gitignore | 2 +- Cargo.lock | 356 +-------------------------- lib/jelly/Cargo.toml | 2 +- lib/jelly/src/lib.rs | 230 ++++++++++++++++- lib/maplib/src/model.rs | 2 +- lib/triplestore/Cargo.toml | 2 +- lib/triplestore/src/errors.rs | 2 + lib/triplestore/src/triples_read.rs | 3 + lib/triplestore/src/triples_write.rs | 18 ++ py_maplib/src/lib.rs | 11 +- py_maplib/tests/test_jelly.py | 31 +++ 11 files changed, 295 insertions(+), 364 deletions(-) create mode 100644 py_maplib/tests/test_jelly.py diff --git a/.gitignore b/.gitignore index 6ad4f6b0..24d79044 100644 --- a/.gitignore +++ b/.gitignore @@ -3,7 +3,7 @@ /old_fun __pycache__ .idea - +.venv # Generated by Cargo # will have compiled files and executables diff --git a/Cargo.lock b/Cargo.lock index 3249c6ce..6f2ed063 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -80,62 +80,6 @@ dependencies = [ "libc", ] -[[package]] -name = "anstream" -version = "0.6.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" - -[[package]] -name = "anstyle-parse" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" -dependencies = [ - "anstyle", - "once_cell_polyfill", - "windows-sys 0.61.2", -] - -[[package]] -name = "anyhow" -version = "1.0.100" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" - [[package]] name = "approx" version = "0.3.2" @@ -454,12 +398,6 @@ dependencies = [ "representation", ] -[[package]] -name = "colorchoice" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" - [[package]] name = "comfy-table" version = "7.2.1" @@ -670,29 +608,6 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" -[[package]] -name = "env_filter" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" -dependencies = [ - "log", - "regex", -] - -[[package]] -name = "env_logger" -version = "0.11.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" -dependencies = [ - "anstream", - "anstyle", - "env_filter", - "jiff", - "log", -] - [[package]] name = "equivalent" version = "1.0.2" @@ -748,12 +663,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" -[[package]] -name = "fastrand" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" - [[package]] name = "file_io" version = "0.5.0" @@ -768,12 +677,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - [[package]] name = "flate2" version = "1.1.4" @@ -1346,12 +1249,6 @@ dependencies = [ "serde", ] -[[package]] -name = "is_terminal_polyfill" -version = "1.70.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" - [[package]] name = "itertools" version = "0.14.0" @@ -1371,53 +1268,10 @@ checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" name = "jelly" version = "0.1.0" dependencies = [ - "jelly 0.1.0 (git+https://github.com/Jelly-RDF/jelly_rs)", + "oxrdf", "quick-protobuf", ] -[[package]] -name = "jelly" -version = "0.1.0" -source = "git+https://github.com/Jelly-RDF/jelly_rs#a9c3ebf5c6db8d6b0c2f4b1d89bef17aa7cf6410" -dependencies = [ - "env_logger", - "log", - "paste", - "prost", - "prost-build", - "prost-types", - "sophia_api", - "sophia_inmem", - "sophia_iri", - "sophia_term", - "sophia_turtle", - "thiserror", -] - -[[package]] -name = "jiff" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e67e8da4c49d6d9909fe03361f9b620f58898859f5c7aded68351e85e71ecf50" -dependencies = [ - "jiff-static", - "log", - "portable-atomic", - "portable-atomic-util", - "serde_core", -] - -[[package]] -name = "jiff-static" -version = "0.2.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0c84ee7f197eca9a86c6fd6cb771e55eb991632f15f2bc3ca6ec838929e6e78" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "jobserver" version = "0.1.34" @@ -1635,18 +1489,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "mownstr" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b33dce847b8623c1f2e473ed3a05e43d0c395e3b93fab62378b6ae94b0a1c42c" - -[[package]] -name = "multimap" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" - [[package]] name = "ndarray" version = "0.16.1" @@ -1784,12 +1626,6 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -[[package]] -name = "once_cell_polyfill" -version = "1.70.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" - [[package]] name = "openssl-probe" version = "0.1.6" @@ -1952,17 +1788,6 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" -[[package]] -name = "petgraph" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8701b58ea97060d5e5b155d383a69952a60943f0e6dfe30b04c287beb0b27455" -dependencies = [ - "fixedbitset", - "hashbrown 0.15.5", - "indexmap", -] - [[package]] name = "phf" version = "0.12.1" @@ -2587,16 +2412,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "prettyplease" -version = "0.2.37" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" -dependencies = [ - "proc-macro2", - "syn", -] - [[package]] name = "proc-macro2" version = "1.0.101" @@ -2606,57 +2421,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "prost" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2ea70524a2f82d518bce41317d0fae74151505651af45faf1ffbd6fd33f0568" -dependencies = [ - "bytes", - "prost-derive", -] - -[[package]] -name = "prost-build" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "343d3bd7056eda839b03204e68deff7d1b13aba7af2b2fd16890697274262ee7" -dependencies = [ - "heck", - "itertools", - "log", - "multimap", - "petgraph", - "prettyplease", - "prost", - "prost-types", - "regex", - "syn", - "tempfile", -] - -[[package]] -name = "prost-derive" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c6023962132f4b30eb4c172c91ce92d933da334c59c23cddee82358ddafb0b" -dependencies = [ - "anyhow", - "itertools", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "prost-types" -version = "0.14.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8991c4cbdb8bc5b11f0b074ffe286c30e523de90fee5ba8132f1399f23cb3dd7" -dependencies = [ - "prost", -] - [[package]] name = "psm" version = "0.1.27" @@ -3101,12 +2865,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "resiter" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbc95d56eb1865f69288945759cc0879d60ee68168dce676730275804ad2b276" - [[package]] name = "ring" version = "0.17.14" @@ -3121,23 +2879,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rio_api" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61d0c76ddf8b00cbb4d2c5932d067d49245c2f1f651809bde3cf265033ddb1af" - -[[package]] -name = "rio_turtle" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6f351b77353c7c896f0cd5ced2a25a7e95b5360cb68d1d7c16682ee096d7f40" -dependencies = [ - "oxilangtag", - "oxiri", - "rio_api", -] - [[package]] name = "rmp" version = "0.8.14" @@ -3485,80 +3226,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "sophia_api" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "103a4138290bec38b9b10e0682b613173a102bca9fd2a74b3db25346e22599a3" -dependencies = [ - "lazy_static", - "mownstr", - "regex", - "resiter", - "serde", - "sophia_iri", - "thiserror", -] - -[[package]] -name = "sophia_inmem" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebacba4fa7baed53f89844a5c9e5962d6232a449d5b450b9de72bb67f0203332" -dependencies = [ - "sophia_api", - "thiserror", -] - -[[package]] -name = "sophia_iri" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7675ff44ad920ac07fde1b61ff20d3c832d8cb65395416906df90b76631ea95f" -dependencies = [ - "lazy_static", - "oxiri", - "regex", - "serde", - "thiserror", -] - -[[package]] -name = "sophia_rio" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57a2938da8eeb8645ff616e64ac99af8099772c3e22a955ae5669ceac5372c34" -dependencies = [ - "rio_api", - "sophia_api", - "sophia_iri", -] - -[[package]] -name = "sophia_term" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51f4c42480d50d14ac7128ad738d28b68368938cb6f507c9505f68875fd0e4db" -dependencies = [ - "lazy_static", - "sophia_api", -] - -[[package]] -name = "sophia_turtle" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9ff316c00bed741ba431b8533b2ce08e089ca031c742d9b6cccdf01b7f6ef2d" -dependencies = [ - "lazy_static", - "oxiri", - "regex", - "rio_turtle", - "sophia_api", - "sophia_iri", - "sophia_rio", -] - [[package]] name = "sparesults" version = "0.3.2" @@ -3709,19 +3376,6 @@ version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df7f62577c25e07834649fc3b39fafdc597c0a3527dc1c60129201ccfcbaa50c" -[[package]] -name = "tempfile" -version = "3.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" -dependencies = [ - "fastrand", - "getrandom 0.3.3", - "once_cell", - "rustix", - "windows-sys 0.61.2", -] - [[package]] name = "templates" version = "0.1.0" @@ -3980,7 +3634,7 @@ dependencies = [ "file_io", "fts", "itoa", - "jelly 0.1.0 (git+https://github.com/Jelly-RDF/jelly_rs)", + "jelly", "memmap2", "oxjsonld", "oxrdf", @@ -4088,12 +3742,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" -[[package]] -name = "utf8parse" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" - [[package]] name = "utils" version = "0.1.0" diff --git a/lib/jelly/Cargo.toml b/lib/jelly/Cargo.toml index 19c12d98..44ebebb6 100644 --- a/lib/jelly/Cargo.toml +++ b/lib/jelly/Cargo.toml @@ -5,5 +5,5 @@ rust-version.workspace = true edition.workspace = true [dependencies] +oxrdf = { version = "0.3.2" } quick-protobuf = "0.8.1" -jelly = { git = "https://github.com/Jelly-RDF/jelly_rs" } diff --git a/lib/jelly/src/lib.rs b/lib/jelly/src/lib.rs index 0ab1871b..87e03cfd 100644 --- a/lib/jelly/src/lib.rs +++ b/lib/jelly/src/lib.rs @@ -1 +1,229 @@ -fn main () {} \ No newline at end of file +mod eu; + +use std::borrow::Cow; +use std::collections::HashMap; +use std::io::Write; + +use quick_protobuf::{MessageWrite, Writer}; + +use eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind; +use eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow; +use eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::{OneOfobject, OneOfpredicate, OneOfsubject}; +use eu::ostrzyciel::jelly::core::proto::v1::*; + +use oxrdf::{NamedOrBlankNode, Term, Triple}; + +const JELLY_FRAME_SIZE: usize = 1024; + +struct JellyEncoder { + prefix_table: HashMap, + next_prefix_id: u32, + name_table: HashMap, + next_name_id: u32, + datatype_table: HashMap, + next_datatype_id: u32, + pending_rows: Vec>, +} + +impl JellyEncoder { + fn new() -> Self { + Self { + prefix_table: HashMap::new(), + next_prefix_id: 1, + name_table: HashMap::new(), + next_name_id: 1, + datatype_table: HashMap::new(), + next_datatype_id: 1, + pending_rows: Vec::new(), + } + } + + fn get_or_insert_prefix(&mut self, prefix: &str) -> u32 { + if let Some(&id) = self.prefix_table.get(prefix) { + return id; + } + let id = self.next_prefix_id; + self.next_prefix_id += 1; + self.prefix_table.insert(prefix.to_string(), id); + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::prefix(RdfPrefixEntry { + id, + value: Cow::Owned(prefix.to_string()), + }), + }); + id + } + + fn get_or_insert_name(&mut self, name: &str) -> u32 { + if let Some(&id) = self.name_table.get(name) { + return id; + } + let id = self.next_name_id; + self.next_name_id += 1; + self.name_table.insert(name.to_string(), id); + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::name(RdfNameEntry { + id, + value: Cow::Owned(name.to_string()), + }), + }); + id + } + + fn get_or_insert_datatype(&mut self, dt_iri: &str) -> u32 { + if let Some(&id) = self.datatype_table.get(dt_iri) { + return id; + } + let id = self.next_datatype_id; + self.next_datatype_id += 1; + self.datatype_table.insert(dt_iri.to_string(), id); + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::datatype(RdfDatatypeEntry { + id, + value: Cow::Owned(dt_iri.to_string()), + }), + }); + id + } + + fn encode_iri(&mut self, iri: &str) -> RdfIri { + let (prefix, local) = split_iri(iri); + let prefix_id = self.get_or_insert_prefix(prefix); + let name_id = self.get_or_insert_name(local); + RdfIri { prefix_id, name_id } + } + + fn take_pending(&mut self) -> Vec> { + std::mem::take(&mut self.pending_rows) + } + + fn encode_triple(&mut self, triple: &Triple) -> RdfStreamRow<'static> { + let subject = match &triple.subject { + NamedOrBlankNode::NamedNode(nn) => { + OneOfsubject::s_iri(self.encode_iri(nn.as_str())) + } + NamedOrBlankNode::BlankNode(bn) => { + OneOfsubject::s_bnode(Cow::Owned(bn.as_str().to_string())) + } + #[allow(unreachable_patterns)] + _ => OneOfsubject::None, + }; + + let predicate = OneOfpredicate::p_iri( + self.encode_iri(triple.predicate.as_str()), + ); + + let object = match &triple.object { + Term::NamedNode(nn) => { + OneOfobject::o_iri(self.encode_iri(nn.as_str())) + } + Term::BlankNode(bn) => { + OneOfobject::o_bnode(Cow::Owned(bn.as_str().to_string())) + } + Term::Literal(lit) => { + let literal_kind = if let Some(lang) = lit.language() { + OneOfliteralKind::langtag(Cow::Owned(lang.to_string())) + } else { + let dt = lit.datatype().as_str(); + if dt == "http://www.w3.org/2001/XMLSchema#string" { + OneOfliteralKind::None + } else { + let dt_id = self.get_or_insert_datatype(dt); + OneOfliteralKind::datatype(dt_id) + } + }; + OneOfobject::o_literal(RdfLiteral { + lex: Cow::Owned(lit.value().to_string()), + literalKind: literal_kind, + }) + } + #[allow(unreachable_patterns)] + _ => OneOfobject::None, + }; + + RdfStreamRow { + row: OneOfrow::triple(RdfTriple { + subject, + predicate, + object, + }), + } + } +} + +fn split_iri(iri: &str) -> (&str, &str) { + if let Some(pos) = iri.rfind('#') { + (&iri[..=pos], &iri[pos + 1..]) + } else if let Some(pos) = iri.rfind('/') { + (&iri[..=pos], &iri[pos + 1..]) + } else { + ("", iri) + } +} + +fn write_delimit_frame( + buf: &mut W, + frame: RdfStreamFrame, +) -> std::io::Result<()> { + let size = frame.get_size(); + let mut temp = Vec::with_capacity(size + 10); + { + let mut writer = Writer::new(&mut temp); + writer + .write_varint(size as u64) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + frame + .write_message(&mut writer) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; + } + buf.write_all(&temp) +} + +pub fn write_jelly( + buf: &mut W, + triples: &[Triple], +) -> std::io::Result<()> { + let mut encoder = JellyEncoder::new(); + + let options_frame = RdfStreamFrame { + rows: vec![RdfStreamRow { + row: OneOfrow::options(RdfStreamOptions { + physical_type: PhysicalStreamType::PHYSICAL_STREAM_TYPE_TRIPLES, + logical_type: LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_TRIPLES, + version: 1, + max_name_table_size: 0, + max_prefix_table_size: 0, + max_datatype_table_size: 0, + ..Default::default() + }), + }], + metadata: Default::default(), + }; + write_delimit_frame(buf, options_frame)?; + + let mut current_rows: Vec> = Vec::new(); + + for t in triples { + let triple_row = encoder.encode_triple(t); + current_rows.extend(encoder.take_pending()); + current_rows.push(triple_row); + + if current_rows.len() >= JELLY_FRAME_SIZE { + let frame = RdfStreamFrame { + rows: std::mem::take(&mut current_rows), + metadata: Default::default(), + }; + write_delimit_frame(buf, frame)?; + } + } + + if !current_rows.is_empty() { + let frame = RdfStreamFrame { + rows: current_rows, + metadata: Default::default(), + }; + write_delimit_frame(buf, frame)?; + } + + Ok(()) +} \ No newline at end of file diff --git a/lib/maplib/src/model.rs b/lib/maplib/src/model.rs index 594228ba..b7036eac 100644 --- a/lib/maplib/src/model.rs +++ b/lib/maplib/src/model.rs @@ -355,7 +355,7 @@ impl Model { &mut self, buffer: &mut W, graph: &NamedGraph, - rdf_format: RdfFormat, + rdf_format: ExtendedRdfFormat, prefixes: Option<&HashMap>, ) -> Result<(), MaplibError> { let mut use_prefixes = self.prefixes.clone(); diff --git a/lib/triplestore/Cargo.toml b/lib/triplestore/Cargo.toml index 44dff42c..ca12d9f4 100644 --- a/lib/triplestore/Cargo.toml +++ b/lib/triplestore/Cargo.toml @@ -15,7 +15,7 @@ file_io = { path = "../file_io" } spargebra = { path = "../spargebra" } fts = {path ="../fts"} cimxml_import = {path = "../cimxml_import"} -jelly = { git = "https://github.com/Jelly-RDF/jelly_rs" } +jelly = {path = "../jelly" } rayon.workspace = true sprs = { workspace = true, features = ["multi_thread"] } diff --git a/lib/triplestore/src/errors.rs b/lib/triplestore/src/errors.rs index 270d3ac6..a1b5885b 100644 --- a/lib/triplestore/src/errors.rs +++ b/lib/triplestore/src/errors.rs @@ -7,6 +7,8 @@ use thiserror::Error; #[derive(Error, Debug)] pub enum TriplestoreError { + #[error("Error writing Jelly {0}")] + WriteJellyError(String), #[error("Error writing NTriples {0}")] WriteNTriplesError(String), #[error("Path {0} does not exist")] diff --git a/lib/triplestore/src/triples_read.rs b/lib/triplestore/src/triples_read.rs index 4f7fd1ca..143be4f2 100644 --- a/lib/triplestore/src/triples_read.rs +++ b/lib/triplestore/src/triples_read.rs @@ -199,6 +199,9 @@ impl Triplestore { let use_format = match rdf_format { ExtendedRdfFormat::Normal(n) => n, ExtendedRdfFormat::CIMXML => RdfFormat::RdfXml, + ExtendedRdfFormat::Jelly => RdfFormat::JsonLd { + profile: JsonLdProfileSet::empty(), + }, }; let mut parser = RdfParser::from(use_format.clone()); if !checked { diff --git a/lib/triplestore/src/triples_write.rs b/lib/triplestore/src/triples_write.rs index 8354a0eb..3b9e81eb 100644 --- a/lib/triplestore/src/triples_write.rs +++ b/lib/triplestore/src/triples_write.rs @@ -18,6 +18,7 @@ use std::collections::HashMap; use std::io::Write; use tracing::warn; use crate::triples_read::ExtendedRdfFormat; +use jelly::*; mod fast_ntriples; mod pretty_turtle; @@ -161,6 +162,23 @@ impl Triplestore { } } writer.finish().unwrap(); + } else if ExtendedRdfFormat::Jelly == format { + let mut all_triples = Vec::new(); + for (predicate, df_map) in self.graph_triples_map.get(graph).unwrap() { + for ((subject_type, object_type), tt) in df_map { + for (lf, _) in tt.get_lazy_frames(&None, &None)? { + all_triples.extend(global_df_as_triples( + lf.collect().unwrap(), + subject_type.clone(), + object_type.clone(), + predicate, + self.global_cats.clone(), + )); + } + } + } + write_jelly(buf, &all_triples) + .map_err(|e| TriplestoreError::WriteJellyError(e.to_string()))?; } Ok(()) } diff --git a/py_maplib/src/lib.rs b/py_maplib/src/lib.rs index 548d4bda..0951d0ec 100644 --- a/py_maplib/src/lib.rs +++ b/py_maplib/src/lib.rs @@ -1388,9 +1388,9 @@ fn write_triples_mutex( prefixes: Option>, ) -> PyResult<()> { let format = if let Some(format) = format { - resolve_normal_format(&format).map_err(PyMaplibError::from)? + resolve_format(&format).map_err(PyMaplibError::from)? } else { - RdfFormat::NTriples + ExtendedRdfFormat::Normal(RdfFormat::NTriples) }; let path_buf = PathBuf::from(file_path); let mut actual_file = File::create(path_buf.as_path()) @@ -1471,9 +1471,9 @@ fn writes_mutex( prefixes: Option>, ) -> PyResult { let format = if let Some(format) = format { - resolve_normal_format(&format).map_err(PyMaplibError::from)? + resolve_format(&format).map_err(PyMaplibError::from)? } else { - RdfFormat::NTriples + ExtendedRdfFormat::Normal(RdfFormat::NTriples) }; let mut out = vec![]; let graph = parse_optional_named_node(graph)?; @@ -1725,7 +1725,8 @@ fn resolve_normal_format(format: &str) -> Result { fn resolve_format(format: &str) -> Result { match format.to_lowercase().as_str() { - "cim" | "cim/xml" | "cimxml" | "jelly" => Ok(ExtendedRdfFormat::CIMXML), + "cim" | "cim/xml" | "cimxml" => Ok(ExtendedRdfFormat::CIMXML), + "jelly" => Ok(ExtendedRdfFormat::Jelly), f => match resolve_normal_format(format) { Ok(o) => Ok(ExtendedRdfFormat::Normal(o)), Err(e) => Err(e), diff --git a/py_maplib/tests/test_jelly.py b/py_maplib/tests/test_jelly.py new file mode 100644 index 00000000..9ac8352d --- /dev/null +++ b/py_maplib/tests/test_jelly.py @@ -0,0 +1,31 @@ +import polars as pl +import pathlib +from maplib import Model + +pl.Config.set_fmt_str_lengths(300) + + +PATH_HERE = pathlib.Path(__file__).parent +TESTDATA_PATH = PATH_HERE / "testdata" + +def test_write_jelly(): + m = Model() + m.read(TESTDATA_PATH / "read_lists.ttl") + + filename = TESTDATA_PATH / "output.jelly" + m.write(filename, format="jelly") + + m2 = Model() + m2.read(filename, format="jelly") + + query = """ + SELECT ?s ?p ?o WHERE { + ?s ?p ?o . + } ORDER BY ?s ?p ?o + """ + original = m.query(query).df + read_back = m2.query(query).df + + assert original.frame_equal(read_back), ( + f"Read back mismatch: \nOriginal:\n{original}\nRead back:\n{read_back}" + ) \ No newline at end of file From f92fca4037f57cae4c9bad5663941a3a8e320036 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Fri, 13 Feb 2026 13:56:45 +0100 Subject: [PATCH 05/19] Refactor --- Cargo.lock | 10 +----- Cargo.toml | 2 +- lib/jelly/Cargo.toml | 9 ----- lib/triplestore/Cargo.toml | 2 +- .../src/lib.rs => triplestore/src/jelly.rs} | 36 +++++++++---------- .../src => triplestore/src/jelly}/eu/mod.rs | 0 .../jelly}/eu/ostrzyciel/jelly/core/mod.rs | 0 .../eu/ostrzyciel/jelly/core/proto/mod.rs | 0 .../eu/ostrzyciel/jelly/core/proto/v1.rs | 0 .../src/jelly}/eu/ostrzyciel/jelly/mod.rs | 0 .../src/jelly}/eu/ostrzyciel/mod.rs | 0 lib/triplestore/src/lib.rs | 1 + lib/triplestore/src/triples_read.rs | 2 +- lib/triplestore/src/triples_write.rs | 2 +- 14 files changed, 23 insertions(+), 41 deletions(-) delete mode 100644 lib/jelly/Cargo.toml rename lib/{jelly/src/lib.rs => triplestore/src/jelly.rs} (92%) rename lib/{jelly/src => triplestore/src/jelly}/eu/mod.rs (100%) rename lib/{jelly/src => triplestore/src/jelly}/eu/ostrzyciel/jelly/core/mod.rs (100%) rename lib/{jelly/src => triplestore/src/jelly}/eu/ostrzyciel/jelly/core/proto/mod.rs (100%) rename lib/{jelly/src => triplestore/src/jelly}/eu/ostrzyciel/jelly/core/proto/v1.rs (100%) rename lib/{jelly/src => triplestore/src/jelly}/eu/ostrzyciel/jelly/mod.rs (100%) rename lib/{jelly/src => triplestore/src/jelly}/eu/ostrzyciel/mod.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 6f2ed063..c0d6004b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1264,14 +1264,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" -[[package]] -name = "jelly" -version = "0.1.0" -dependencies = [ - "oxrdf", - "quick-protobuf", -] - [[package]] name = "jobserver" version = "0.1.34" @@ -3634,7 +3626,6 @@ dependencies = [ "file_io", "fts", "itoa", - "jelly", "memmap2", "oxjsonld", "oxrdf", @@ -3644,6 +3635,7 @@ dependencies = [ "polars-core", "pyo3", "query_processing", + "quick-protobuf", "rayon", "representation", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 40ec7c57..8691d80b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,7 +15,6 @@ members = [ "lib/cimxml_export", "lib/cimxml_import", "py_maplib", - "lib/jelly", ] [workspace.package] @@ -72,6 +71,7 @@ itoa = "1.0.15" ryu = "1.0.20" #dev-dependencies nohash-hasher = "0.2.0" +quick-protobuf = "0.8.1" [patch.crates-io] #polars = { git = 'https://github.com/pola-rs/polars', rev="665722ac3f3664c589c4827208d173cc16f0ec68" } diff --git a/lib/jelly/Cargo.toml b/lib/jelly/Cargo.toml deleted file mode 100644 index 44ebebb6..00000000 --- a/lib/jelly/Cargo.toml +++ /dev/null @@ -1,9 +0,0 @@ -[package] -name = "jelly" -version = "0.1.0" -rust-version.workspace = true -edition.workspace = true - -[dependencies] -oxrdf = { version = "0.3.2" } -quick-protobuf = "0.8.1" diff --git a/lib/triplestore/Cargo.toml b/lib/triplestore/Cargo.toml index ca12d9f4..9a9b3b85 100644 --- a/lib/triplestore/Cargo.toml +++ b/lib/triplestore/Cargo.toml @@ -15,7 +15,6 @@ file_io = { path = "../file_io" } spargebra = { path = "../spargebra" } fts = {path ="../fts"} cimxml_import = {path = "../cimxml_import"} -jelly = {path = "../jelly" } rayon.workspace = true sprs = { workspace = true, features = ["multi_thread"] } @@ -37,6 +36,7 @@ tracing.workspace = true aho-corasick.workspace = true simd-json.workspace = true serde_json.workspace = true +quick-protobuf.workspace = true pyo3 = { workspace = true, optional = true } diff --git a/lib/jelly/src/lib.rs b/lib/triplestore/src/jelly.rs similarity index 92% rename from lib/jelly/src/lib.rs rename to lib/triplestore/src/jelly.rs index 87e03cfd..b329fdf1 100644 --- a/lib/jelly/src/lib.rs +++ b/lib/triplestore/src/jelly.rs @@ -1,17 +1,15 @@ mod eu; - use std::borrow::Cow; use std::collections::HashMap; use std::io::Write; use quick_protobuf::{MessageWrite, Writer}; -use eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind; -use eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow; -use eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::{OneOfobject, OneOfpredicate, OneOfsubject}; -use eu::ostrzyciel::jelly::core::proto::v1::*; - use oxrdf::{NamedOrBlankNode, Term, Triple}; +use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow; +use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::{LogicalStreamType, PhysicalStreamType, RdfDatatypeEntry, RdfIri, RdfLiteral, RdfNameEntry, RdfPrefixEntry, RdfStreamFrame, RdfStreamOptions, RdfStreamRow, RdfTriple}; +use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind; +use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::{OneOfobject, OneOfpredicate, OneOfsubject}; const JELLY_FRAME_SIZE: usize = 1024; @@ -37,7 +35,7 @@ impl JellyEncoder { pending_rows: Vec::new(), } } - + fn get_or_insert_prefix(&mut self, prefix: &str) -> u32 { if let Some(&id) = self.prefix_table.get(prefix) { return id; @@ -53,7 +51,7 @@ impl JellyEncoder { }); id } - + fn get_or_insert_name(&mut self, name: &str) -> u32 { if let Some(&id) = self.name_table.get(name) { return id; @@ -69,7 +67,7 @@ impl JellyEncoder { }); id } - + fn get_or_insert_datatype(&mut self, dt_iri: &str) -> u32 { if let Some(&id) = self.datatype_table.get(dt_iri) { return id; @@ -85,18 +83,18 @@ impl JellyEncoder { }); id } - + fn encode_iri(&mut self, iri: &str) -> RdfIri { let (prefix, local) = split_iri(iri); let prefix_id = self.get_or_insert_prefix(prefix); let name_id = self.get_or_insert_name(local); RdfIri { prefix_id, name_id } } - + fn take_pending(&mut self) -> Vec> { std::mem::take(&mut self.pending_rows) } - + fn encode_triple(&mut self, triple: &Triple) -> RdfStreamRow<'static> { let subject = match &triple.subject { NamedOrBlankNode::NamedNode(nn) => { @@ -108,11 +106,11 @@ impl JellyEncoder { #[allow(unreachable_patterns)] _ => OneOfsubject::None, }; - + let predicate = OneOfpredicate::p_iri( self.encode_iri(triple.predicate.as_str()), ); - + let object = match &triple.object { Term::NamedNode(nn) => { OneOfobject::o_iri(self.encode_iri(nn.as_str())) @@ -202,21 +200,21 @@ pub fn write_jelly( write_delimit_frame(buf, options_frame)?; let mut current_rows: Vec> = Vec::new(); - + for t in triples { let triple_row = encoder.encode_triple(t); current_rows.extend(encoder.take_pending()); current_rows.push(triple_row); - + if current_rows.len() >= JELLY_FRAME_SIZE { let frame = RdfStreamFrame { rows: std::mem::take(&mut current_rows), metadata: Default::default(), - }; + }; write_delimit_frame(buf, frame)?; } } - + if !current_rows.is_empty() { let frame = RdfStreamFrame { rows: current_rows, @@ -224,6 +222,6 @@ pub fn write_jelly( }; write_delimit_frame(buf, frame)?; } - + Ok(()) } \ No newline at end of file diff --git a/lib/jelly/src/eu/mod.rs b/lib/triplestore/src/jelly/eu/mod.rs similarity index 100% rename from lib/jelly/src/eu/mod.rs rename to lib/triplestore/src/jelly/eu/mod.rs diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/core/mod.rs b/lib/triplestore/src/jelly/eu/ostrzyciel/jelly/core/mod.rs similarity index 100% rename from lib/jelly/src/eu/ostrzyciel/jelly/core/mod.rs rename to lib/triplestore/src/jelly/eu/ostrzyciel/jelly/core/mod.rs diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/mod.rs b/lib/triplestore/src/jelly/eu/ostrzyciel/jelly/core/proto/mod.rs similarity index 100% rename from lib/jelly/src/eu/ostrzyciel/jelly/core/proto/mod.rs rename to lib/triplestore/src/jelly/eu/ostrzyciel/jelly/core/proto/mod.rs diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/core/proto/v1.rs b/lib/triplestore/src/jelly/eu/ostrzyciel/jelly/core/proto/v1.rs similarity index 100% rename from lib/jelly/src/eu/ostrzyciel/jelly/core/proto/v1.rs rename to lib/triplestore/src/jelly/eu/ostrzyciel/jelly/core/proto/v1.rs diff --git a/lib/jelly/src/eu/ostrzyciel/jelly/mod.rs b/lib/triplestore/src/jelly/eu/ostrzyciel/jelly/mod.rs similarity index 100% rename from lib/jelly/src/eu/ostrzyciel/jelly/mod.rs rename to lib/triplestore/src/jelly/eu/ostrzyciel/jelly/mod.rs diff --git a/lib/jelly/src/eu/ostrzyciel/mod.rs b/lib/triplestore/src/jelly/eu/ostrzyciel/mod.rs similarity index 100% rename from lib/jelly/src/eu/ostrzyciel/mod.rs rename to lib/triplestore/src/jelly/eu/ostrzyciel/mod.rs diff --git a/lib/triplestore/src/lib.rs b/lib/triplestore/src/lib.rs index 3a4e157f..a1b3a67a 100644 --- a/lib/triplestore/src/lib.rs +++ b/lib/triplestore/src/lib.rs @@ -12,6 +12,7 @@ pub mod sparql; mod storage; pub mod triples_read; pub mod triples_write; +mod jelly; use crate::errors::TriplestoreError; use crate::storage::{repeated_from_last_row_expr, Triples}; diff --git a/lib/triplestore/src/triples_read.rs b/lib/triplestore/src/triples_read.rs index 143be4f2..4834c183 100644 --- a/lib/triplestore/src/triples_read.rs +++ b/lib/triplestore/src/triples_read.rs @@ -33,7 +33,7 @@ use std::path::Path; use std::time::Instant; use tracing::{debug, instrument}; -use jelly::*; +use crate::jelly::*; type MapType = HashMap, Vec)>>; diff --git a/lib/triplestore/src/triples_write.rs b/lib/triplestore/src/triples_write.rs index 3b9e81eb..ceb48d0f 100644 --- a/lib/triplestore/src/triples_write.rs +++ b/lib/triplestore/src/triples_write.rs @@ -18,7 +18,7 @@ use std::collections::HashMap; use std::io::Write; use tracing::warn; use crate::triples_read::ExtendedRdfFormat; -use jelly::*; +use crate::jelly::*; mod fast_ntriples; mod pretty_turtle; From 718cd01f3b556a35b89619b240f56bfd69eb6f6f Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Mon, 16 Feb 2026 05:59:31 +0100 Subject: [PATCH 06/19] Seems to be correct.. but must be tested --- Cargo.toml | 2 +- lib/maplib/src/model.rs | 1 - lib/representation/src/cats/maps/in_memory.rs | 27 +- lib/representation/src/formatting.rs | 43 ++ lib/representation/src/iri_split.rs | 25 + lib/representation/src/lib.rs | 1 + lib/triplestore/src/errors.rs | 2 + lib/triplestore/src/jelly.rs | 546 ++++++++++++------ lib/triplestore/src/triples_write.rs | 36 +- py_maplib/tests/test_jelly.py | 28 +- 10 files changed, 491 insertions(+), 220 deletions(-) create mode 100644 lib/representation/src/iri_split.rs diff --git a/Cargo.toml b/Cargo.toml index 8691d80b..d9a1722a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,7 +71,7 @@ itoa = "1.0.15" ryu = "1.0.20" #dev-dependencies nohash-hasher = "0.2.0" -quick-protobuf = "0.8.1" +quick-protobuf = { version = "0.8.1" } [patch.crates-io] #polars = { git = 'https://github.com/pola-rs/polars', rev="665722ac3f3664c589c4827208d173cc16f0ec68" } diff --git a/lib/maplib/src/model.rs b/lib/maplib/src/model.rs index b7036eac..ccdb72d9 100644 --- a/lib/maplib/src/model.rs +++ b/lib/maplib/src/model.rs @@ -9,7 +9,6 @@ use cimxml_export::export::{cim_xml_write, FullModelDetails}; use datalog::inference::{infer, InferenceResult}; use datalog::parser::parse_datalog_ruleset; use oxrdf::NamedNode; -use oxrdfio::RdfFormat; use polars::prelude::DataFrame; use representation::solution_mapping::EagerSolutionMappings; use representation::RDFNodeState; diff --git a/lib/representation/src/cats/maps/in_memory.rs b/lib/representation/src/cats/maps/in_memory.rs index d7594802..61010c14 100644 --- a/lib/representation/src/cats/maps/in_memory.rs +++ b/lib/representation/src/cats/maps/in_memory.rs @@ -7,6 +7,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::{Display, Formatter}; use std::hash::BuildHasherDefault; use std::sync::Arc; +use crate::iri_split::split_iri; #[derive(Debug, Clone, Ord, Eq, PartialEq, PartialOrd)] pub struct PrefixCompressedString { @@ -591,29 +592,3 @@ impl CatMapsInMemory { } } } - -pub fn split_iri(iri: &str) -> (&str, &str) { - // Apache 2 / MIT The Rust Project Contributors - #[inline] - fn rsplit_once_inclusive_l( - this: &str, - delimiter: P, - ) -> Option<(&'_ str, &'_ str)> - where - for<'a> P::Searcher<'a>: std::str::pattern::ReverseSearcher<'a>, - { - let (_, end) = std::str::pattern::ReverseSearcher::next_match_back( - &mut delimiter.into_searcher(this), - )?; - // SAFETY: `Searcher` is known to return valid indices. - unsafe { Some((this.get_unchecked(..end), this.get_unchecked(end..))) } - } - - const DELIMITERS: &[char] = &['/', '#', ':']; - - let (prefix, suffix) = match rsplit_once_inclusive_l(iri, DELIMITERS) { - Some(pair) => pair, - None => ("", iri), - }; - (prefix, suffix) -} diff --git a/lib/representation/src/formatting.rs b/lib/representation/src/formatting.rs index 9194a761..7a9008f2 100644 --- a/lib/representation/src/formatting.rs +++ b/lib/representation/src/formatting.rs @@ -5,6 +5,7 @@ use crate::polars_to_rdf::{ use crate::solution_mapping::BaseCatState; use crate::{BaseRDFNodeType, RDFNodeState, LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD}; use oxrdf::vocab::{rdf, xsd}; +use oxrdf::{NamedNode, NamedNodeRef}; use polars::datatypes::{DataType, Field}; use polars::prelude::{as_struct, coalesce, col, lit, Expr, IntoColumn, LazyFrame, LiteralValue}; use std::collections::HashMap; @@ -86,6 +87,48 @@ pub fn base_expression_to_formatted( }; expr.alias(name) } + +pub fn base_literal_expression_to_string( + expr: Expr, + base_type: &BaseRDFNodeType, + base_state: &BaseCatState, + global_cats: LockedCats, +) -> Vec { + let base_literal_datatype = if let BaseRDFNodeType::Literal(l) = base_type { + l.as_ref() + } else { + unreachable!("Should only be called with literal") + }; + let mut exprs = vec![]; + if base_literal_datatype == xsd::DATE_TIME { + exprs.push(expr.map( + |x| { + let dt = x.dtype(); + let tz = if let DataType::Datetime(_, tz) = dt { + tz + } else { + panic!() + }; + Ok(datetime_column_to_strings(&x, tz).into_column()) + }, + |_, f| Ok(Field::new(f.name().clone(), DataType::String)), + )) + } else if base_literal_datatype == xsd::DATE_TIME_STAMP { + exprs.push(expr.dt().strftime(XSD_DATETIME_WITH_TZ_FORMAT)) + } else if base_literal_datatype == xsd::DATE { + exprs.push(expr.dt().strftime(XSD_DATE_WITHOUT_TZ_FORMAT)) + } else if base_literal_datatype == rdf::LANG_STRING { + exprs.push(expr + .clone() + .struct_() + .field_by_name(LANG_STRING_VALUE_FIELD)); + exprs.push(expr.struct_().field_by_name(LANG_STRING_LANG_FIELD)); + } else { + exprs.push(maybe_decode_expr(expr, base_type, base_state, global_cats).cast(DataType::String)) + }; + exprs +} + pub fn expression_to_formatted( expr: Expr, name: &str, diff --git a/lib/representation/src/iri_split.rs b/lib/representation/src/iri_split.rs new file mode 100644 index 00000000..09670baa --- /dev/null +++ b/lib/representation/src/iri_split.rs @@ -0,0 +1,25 @@ +pub fn split_iri(iri: &str) -> (&str, &str) { + // Apache 2 / MIT The Rust Project Contributors + #[inline] + fn rsplit_once_inclusive_l( + this: &str, + delimiter: P, + ) -> Option<(&'_ str, &'_ str)> + where + for<'a> P::Searcher<'a>: std::str::pattern::ReverseSearcher<'a>, + { + let (_, end) = std::str::pattern::ReverseSearcher::next_match_back( + &mut delimiter.into_searcher(this), + )?; + // SAFETY: `Searcher` is known to return valid indices. + unsafe { Some((this.get_unchecked(..end), this.get_unchecked(end..))) } + } + + const DELIMITERS: &[char] = &['/', '#', ':']; + + let (prefix, suffix) = match rsplit_once_inclusive_l(iri, DELIMITERS) { + Some(pair) => pair, + None => ("", iri), + }; + (prefix, suffix) +} \ No newline at end of file diff --git a/lib/representation/src/lib.rs b/lib/representation/src/lib.rs index 1f4fbe56..2831ac32 100644 --- a/lib/representation/src/lib.rs +++ b/lib/representation/src/lib.rs @@ -21,6 +21,7 @@ pub mod python; mod rdf_state; mod rdf_type; pub mod subtypes; +pub mod iri_split; pub use base_rdf_type::*; pub use rdf_state::*; diff --git a/lib/triplestore/src/errors.rs b/lib/triplestore/src/errors.rs index a1b5885b..adb43124 100644 --- a/lib/triplestore/src/errors.rs +++ b/lib/triplestore/src/errors.rs @@ -55,6 +55,8 @@ pub enum TriplestoreError { InvalidPrefixIRI(String), #[error("Please add the document string corresponding to the url to known_contexts: {0}")] MissingContext(String), + #[error("Error flushing buffer when writing triples: {0}")] + FlushError(String), } impl From> for TriplestoreError { diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index b329fdf1..f3f4b16b 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -1,22 +1,42 @@ mod eu; use std::borrow::Cow; -use std::collections::HashMap; +use std::cmp; +use std::collections::{HashMap, HashSet}; use std::io::Write; -use quick_protobuf::{MessageWrite, Writer}; +use quick_protobuf::{serialize_into_vec, BytesWriter, MessageWrite, Writer}; -use oxrdf::{NamedOrBlankNode, Term, Triple}; -use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow; -use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::{LogicalStreamType, PhysicalStreamType, RdfDatatypeEntry, RdfIri, RdfLiteral, RdfNameEntry, RdfPrefixEntry, RdfStreamFrame, RdfStreamOptions, RdfStreamRow, RdfTriple}; +use crate::errors::TriplestoreError; use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind; -use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::{OneOfobject, OneOfpredicate, OneOfsubject}; +use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow; +use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfTriple::{ + OneOfobject, OneOfpredicate, OneOfsubject, +}; +use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::{ + LogicalStreamType, PhysicalStreamType, RdfDatatypeEntry, RdfIri, RdfLiteral, RdfNameEntry, + RdfPrefixEntry, RdfStreamFrame, RdfStreamOptions, RdfStreamRow, RdfTriple, +}; +use oxrdf::NamedNode; +use polars::polars_utils::parma::raw::Key; +use polars::polars_utils::pl_serialize::serialize_into_writer; +use polars::prelude::{col, IntoLazy}; +use polars_core::datatypes::UInt32Chunked; +use polars_core::frame::DataFrame; +use polars_core::prelude::{Column, LhsNumOps}; +use polars_core::POOL; +use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; +use representation::cats::LockedCats; +use representation::formatting::base_literal_expression_to_string; +use representation::iri_split::split_iri; +use representation::{BaseRDFNodeType, OBJECT_COL_NAME, SUBJECT_COL_NAME}; const JELLY_FRAME_SIZE: usize = 1024; -struct JellyEncoder { - prefix_table: HashMap, +pub struct JellyEncoder { + prefix_table: HashMap, + prefix_lookup: HashMap, next_prefix_id: u32, - name_table: HashMap, + name_table: HashMap, next_name_id: u32, datatype_table: HashMap, next_datatype_id: u32, @@ -24,9 +44,10 @@ struct JellyEncoder { } impl JellyEncoder { - fn new() -> Self { + pub(crate) fn new() -> Self { Self { prefix_table: HashMap::new(), + prefix_lookup: HashMap::new(), next_prefix_id: 1, name_table: HashMap::new(), next_name_id: 1, @@ -36,29 +57,300 @@ impl JellyEncoder { } } - fn get_or_insert_prefix(&mut self, prefix: &str) -> u32 { - if let Some(&id) = self.prefix_table.get(prefix) { - return id; + pub fn write_options(&mut self, buf: &mut W) -> Result<(), TriplestoreError> { + let options_frame = RdfStreamFrame { + rows: vec![RdfStreamRow { + row: OneOfrow::options(RdfStreamOptions { + physical_type: PhysicalStreamType::PHYSICAL_STREAM_TYPE_TRIPLES, + logical_type: LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_TRIPLES, + version: 1, + max_name_table_size: 0, + max_prefix_table_size: 0, + max_datatype_table_size: 0, + ..Default::default() + }), + }], + metadata: Default::default(), + }; + let v = serialize_into_vec(&options_frame).map_err(|x| { + TriplestoreError::WriteJellyError(format!("Error serializing options frame: {}", x)) + })?; + buf.write(&v).map_err(|x| { + TriplestoreError::WriteJellyError(format!("Error writing options to buffer: {}", x)) + })?; + Ok(()) + } + + pub fn write_jelly( + &mut self, + buf: &mut W, + df: DataFrame, + predicate: &NamedNode, + predicate_cat: u32, + subject_type: &BaseRDFNodeType, + object_type: &BaseRDFNodeType, + global_cats: LockedCats, + ) -> Result<(), TriplestoreError> { + self.maybe_prepare_new_names_prefixes( + df.column(SUBJECT_COL_NAME).unwrap(), + subject_type, + global_cats.clone(), + ); + self.maybe_prepare_new_names_prefixes( + df.column(OBJECT_COL_NAME).unwrap(), + object_type, + global_cats.clone(), + ); + let (pre, suf) = split_iri(predicate.as_str()); + let pre_u32 = self.get_or_insert_prefix(predicate_cat, pre); + let name_u32 = self.get_or_insert_name(predicate_cat, suf); + let predicate = OneOfpredicate::p_iri(RdfIri { + prefix_id: pre_u32, + name_id: name_u32, + }); + + let subject_u32s = df.column(SUBJECT_COL_NAME).unwrap().u32().unwrap(); + + let subjects = if subject_type.is_iri() { + self.create_iri_subjects(subject_u32s) + } else { + create_blank_subjects(subject_u32s) + }; + + //Todo: push datatype row and predicate row. + if let BaseRDFNodeType::Literal(t) = object_type { + let mut exprs = base_literal_expression_to_string( + col(OBJECT_COL_NAME), + object_type, + &object_type.default_stored_cat_state(), + global_cats, + ); + let mut lf = df.clone().lazy().select([col(OBJECT_COL_NAME)]); + let mut new_exprs = Vec::with_capacity(exprs.len()); + for (i, e) in exprs.into_iter().enumerate() { + new_exprs.push(e.alias(format!("{i}"))); + } + lf = lf.with_columns(new_exprs); + let df = lf.collect().unwrap(); + + if object_type.is_lang_string() { + for ((subject, o_lex), o_lang) in subjects + .into_iter() + .zip(df.column("0").unwrap().str().unwrap()) + .zip(df.column("1").unwrap().str().unwrap()) + { + let o_lex = o_lex.unwrap(); + let object = OneOfobject::o_literal(RdfLiteral { + lex: Cow::Owned(o_lex.to_string()), + literalKind: OneOfliteralKind::langtag(Cow::Owned( + o_lang.unwrap().to_string(), + )), + }); + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::triple(RdfTriple { + subject, + predicate: predicate.clone(), + object, + }), + }); + } + } else { + let dt_o = if let Some(dt_o) = self.datatype_table.get(t.as_str()) { + *dt_o + } else { + let dt_o = self.next_datatype_id; + self.datatype_table.insert(t.as_str().to_string(), dt_o); + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::datatype(RdfDatatypeEntry { + id: dt_o, + value: Cow::Owned(t.as_str().to_string()), + }), + }); + self.next_datatype_id += 1; + dt_o + }; + for (subject, o) in subjects + .into_iter() + .zip(df.column("0").unwrap().str().unwrap()) + { + let o = o.unwrap(); + let object = OneOfobject::o_literal(RdfLiteral { + lex: Cow::Owned(o.to_string()), + literalKind: OneOfliteralKind::datatype(dt_o), + }); + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::triple(RdfTriple { + subject, + predicate: predicate.clone(), + object, + }), + }); + } + } + } else { + // subject and object are both either blank or iri: u32 cols.. + let object_u32s = df.column(OBJECT_COL_NAME).unwrap().u32().unwrap(); + let objects = if object_type.is_iri() { + self.create_iri_objects(object_u32s) + } else { + create_blank_objects(object_u32s) + }; + for (subject, object) in subjects.into_iter().zip(objects.into_iter()) { + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::triple(RdfTriple { + subject, + predicate: predicate.clone(), + object, + }), + }); + } + }; + self.write_rows(buf, false)?; + Ok(()) + } + + fn create_iri(&self, u: &u32) -> RdfIri { + RdfIri { + prefix_id: *self.prefix_table.get(u).unwrap(), + name_id: *self.name_table.get(u).unwrap(), } - let id = self.next_prefix_id; - self.next_prefix_id += 1; - self.prefix_table.insert(prefix.to_string(), id); - self.pending_rows.push(RdfStreamRow { - row: OneOfrow::prefix(RdfPrefixEntry { - id, - value: Cow::Owned(prefix.to_string()), - }), + } + + fn create_iri_subjects(&self, u32s: &UInt32Chunked) -> Vec> { + u32s.iter() + .map(|x| OneOfsubject::s_iri(self.create_iri(&x.unwrap()))) + .collect() + } + + fn create_iri_objects(&self, u32s: &UInt32Chunked) -> Vec> { + u32s.iter() + .map(|x| OneOfobject::o_iri(self.create_iri(&x.unwrap()))) + .collect() + } + + pub fn write_rows(&mut self, buf: &mut W, all: bool) -> Result<(), TriplestoreError> { + if !all && self.pending_rows.len() < JELLY_FRAME_SIZE { + return Ok(()); + } + let mut segments = Vec::new(); + let threads = POOL.current_num_threads(); + let threads = cmp::max(threads, 1); + let frames_per_thread = self.pending_rows.len().div_ceil(JELLY_FRAME_SIZE); + let mut pending_iter = self.pending_rows.drain(..); + 'outer: for _ in 0..(threads - 1) { + let mut seg = Vec::with_capacity(frames_per_thread * JELLY_FRAME_SIZE); + for _ in 0..(JELLY_FRAME_SIZE * frames_per_thread) { + if let Some(n) = pending_iter.next() { + seg.push(n) + } + } + if !seg.is_empty() { + segments.push(seg); + } else { + break 'outer; + } + } + if all { + let seg: Vec<_> = pending_iter.collect(); + if !seg.is_empty() { + segments.push(seg); + } + } else { + let mut seg = Vec::with_capacity(frames_per_thread * JELLY_FRAME_SIZE); + let mut frame = Vec::with_capacity(JELLY_FRAME_SIZE); + let mut i = 0; + loop { + if let Some(n) = pending_iter.next() { + frame.push(n); + i += 1; + } else { + break; + } + if i > 0 && i % JELLY_FRAME_SIZE == 0 { + seg.extend(frame.drain(..)); + } + } + if !seg.is_empty() { + segments.push(seg); + } + self.pending_rows = frame.into_iter().chain(pending_iter).collect(); + } + let mut segments_buffers: Vec<(_, Vec)> = + segments.into_iter().map(|x| (x, Vec::new())).collect(); + + let buffers: Result, TriplestoreError> = POOL.install(|| { + segments_buffers + .into_iter() + .map(|(mut rows, mut buffer)| { + let mut rows_iter = rows.drain(..); + loop { + let mut rows = Vec::with_capacity(JELLY_FRAME_SIZE); + for _ in 0..JELLY_FRAME_SIZE { + if let Some(n) = rows_iter.next() { + rows.push(n); + } else { + break; + } + } + if !rows.is_empty() { + let frame = RdfStreamFrame { + rows, + metadata: HashMap::new(), + }; + let v = serialize_into_vec(&frame).map_err(|x| { + TriplestoreError::WriteJellyError(format!( + "Error serializing to vec: {}", + x + )) + })?; + buffer.extend(v); + } else { + break; + } + } + Ok(buffer) + }) + .collect() }); - id + let buffers = buffers?; + for part in buffers { + buf.write(&part).map_err(|x| { + TriplestoreError::WriteJellyError(format!("Error writing partial buffer {}", x)) + })?; + } + Ok(()) + } + + fn get_or_insert_prefix(&mut self, cat: u32, prefix: &str) -> u32 { + if let Some(p) = self.prefix_table.get(&cat) { + *p + } else { + let prefix_u = if let Some(prefix_u) = self.prefix_lookup.get(prefix) { + *prefix_u + } else { + let prefix_u = self.next_prefix_id; + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::prefix(RdfPrefixEntry { + id: prefix_u, + value: Cow::Owned(prefix.to_string()), + }), + }); + self.prefix_lookup.insert(prefix.to_string(), prefix_u); + self.next_prefix_id += 1; + prefix_u + }; + self.prefix_table.insert(cat, prefix_u); + prefix_u + } } - fn get_or_insert_name(&mut self, name: &str) -> u32 { - if let Some(&id) = self.name_table.get(name) { + fn get_or_insert_name(&mut self, cat: u32, name: &str) -> u32 { + if let Some(&id) = self.name_table.get(&cat) { return id; } let id = self.next_name_id; self.next_name_id += 1; - self.name_table.insert(name.to_string(), id); + self.name_table.insert(cat, id); self.pending_rows.push(RdfStreamRow { row: OneOfrow::name(RdfNameEntry { id, @@ -68,160 +360,78 @@ impl JellyEncoder { id } - fn get_or_insert_datatype(&mut self, dt_iri: &str) -> u32 { - if let Some(&id) = self.datatype_table.get(dt_iri) { - return id; + fn maybe_prepare_new_names_prefixes( + &mut self, + c: &Column, + t: &BaseRDFNodeType, + global_cats: LockedCats, + ) { + let read_cats = global_cats.read().unwrap(); + let mut seen_iri_u32s = Vec::new(); + let mut seen_iri_out_u32s = Vec::new(); + match t { + BaseRDFNodeType::IRI => { + for u in c.u32().unwrap() { + let u = u.unwrap(); + if !self.name_table.contains_key(&u) { + self.name_table.insert(u, self.next_name_id); + seen_iri_u32s.push(u); + seen_iri_out_u32s.push(u); + self.next_name_id += 1; + } + } + } + _ => {} } - let id = self.next_datatype_id; - self.next_datatype_id += 1; - self.datatype_table.insert(dt_iri.to_string(), id); - self.pending_rows.push(RdfStreamRow { - row: OneOfrow::datatype(RdfDatatypeEntry { - id, - value: Cow::Owned(dt_iri.to_string()), - }), - }); - id - } - - fn encode_iri(&mut self, iri: &str) -> RdfIri { - let (prefix, local) = split_iri(iri); - let prefix_id = self.get_or_insert_prefix(prefix); - let name_id = self.get_or_insert_name(local); - RdfIri { prefix_id, name_id } - } - - fn take_pending(&mut self) -> Vec> { - std::mem::take(&mut self.pending_rows) - } + if !seen_iri_u32s.is_empty() { + //Deduplication in order to avoid duplicate names + let seen_iri_out_u32s_set: HashSet<_> = seen_iri_out_u32s.into_iter().collect(); + let seen_iri_out_u32s: Vec<_> = seen_iri_out_u32s_set.into_iter().collect(); + let nns = read_cats.decode_iri_u32s(&seen_iri_u32s, None); + let (pres, sufs): (Vec<_>, Vec<_>) = nns + .par_iter() + .map(|nn| { + let (pre, suf) = split_iri(nn.as_str()); + (pre, suf) + }) + .unzip(); - fn encode_triple(&mut self, triple: &Triple) -> RdfStreamRow<'static> { - let subject = match &triple.subject { - NamedOrBlankNode::NamedNode(nn) => { - OneOfsubject::s_iri(self.encode_iri(nn.as_str())) - } - NamedOrBlankNode::BlankNode(bn) => { - OneOfsubject::s_bnode(Cow::Owned(bn.as_str().to_string())) + for (new_u, suf) in seen_iri_out_u32s.iter().zip(sufs) { + self.pending_rows.push(RdfStreamRow { + row: OneOfrow::name(RdfNameEntry { + id: *new_u, + value: Cow::Owned(suf.to_string()), + }), + }); } - #[allow(unreachable_patterns)] - _ => OneOfsubject::None, - }; - - let predicate = OneOfpredicate::p_iri( - self.encode_iri(triple.predicate.as_str()), - ); - let object = match &triple.object { - Term::NamedNode(nn) => { - OneOfobject::o_iri(self.encode_iri(nn.as_str())) + for (prefix, u) in pres.iter().zip(seen_iri_u32s) { + self.get_or_insert_prefix(u, *prefix); } - Term::BlankNode(bn) => { - OneOfobject::o_bnode(Cow::Owned(bn.as_str().to_string())) - } - Term::Literal(lit) => { - let literal_kind = if let Some(lang) = lit.language() { - OneOfliteralKind::langtag(Cow::Owned(lang.to_string())) - } else { - let dt = lit.datatype().as_str(); - if dt == "http://www.w3.org/2001/XMLSchema#string" { - OneOfliteralKind::None - } else { - let dt_id = self.get_or_insert_datatype(dt); - OneOfliteralKind::datatype(dt_id) - } - }; - OneOfobject::o_literal(RdfLiteral { - lex: Cow::Owned(lit.value().to_string()), - literalKind: literal_kind, - }) - } - #[allow(unreachable_patterns)] - _ => OneOfobject::None, - }; - - RdfStreamRow { - row: OneOfrow::triple(RdfTriple { - subject, - predicate, - object, - }), } + println!("PRefixes {:?}", self.pending_rows); } } -fn split_iri(iri: &str) -> (&str, &str) { - if let Some(pos) = iri.rfind('#') { - (&iri[..=pos], &iri[pos + 1..]) - } else if let Some(pos) = iri.rfind('/') { - (&iri[..=pos], &iri[pos + 1..]) - } else { - ("", iri) - } +fn create_blank_subject(u: u32) -> OneOfsubject<'static> { + OneOfsubject::s_bnode(create_blank_cow(u)) } -fn write_delimit_frame( - buf: &mut W, - frame: RdfStreamFrame, -) -> std::io::Result<()> { - let size = frame.get_size(); - let mut temp = Vec::with_capacity(size + 10); - { - let mut writer = Writer::new(&mut temp); - writer - .write_varint(size as u64) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; - frame - .write_message(&mut writer) - .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?; - } - buf.write_all(&temp) +fn create_blank_object(u: u32) -> OneOfobject<'static> { + OneOfobject::o_bnode(create_blank_cow(u)) } -pub fn write_jelly( - buf: &mut W, - triples: &[Triple], -) -> std::io::Result<()> { - let mut encoder = JellyEncoder::new(); - - let options_frame = RdfStreamFrame { - rows: vec![RdfStreamRow { - row: OneOfrow::options(RdfStreamOptions { - physical_type: PhysicalStreamType::PHYSICAL_STREAM_TYPE_TRIPLES, - logical_type: LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_TRIPLES, - version: 1, - max_name_table_size: 0, - max_prefix_table_size: 0, - max_datatype_table_size: 0, - ..Default::default() - }), - }], - metadata: Default::default(), - }; - write_delimit_frame(buf, options_frame)?; - - let mut current_rows: Vec> = Vec::new(); - - for t in triples { - let triple_row = encoder.encode_triple(t); - current_rows.extend(encoder.take_pending()); - current_rows.push(triple_row); - - if current_rows.len() >= JELLY_FRAME_SIZE { - let frame = RdfStreamFrame { - rows: std::mem::take(&mut current_rows), - metadata: Default::default(), - }; - write_delimit_frame(buf, frame)?; - } - } - - if !current_rows.is_empty() { - let frame = RdfStreamFrame { - rows: current_rows, - metadata: Default::default(), - }; - write_delimit_frame(buf, frame)?; - } +fn create_blank_cow(u: u32) -> Cow<'static, str> { + Cow::Owned(format!("b{}", u)) +} +fn create_blank_subjects(u32s: &UInt32Chunked) -> Vec> { + u32s.iter() + .map(|x| create_blank_subject(x.unwrap())) + .collect() +} - Ok(()) -} \ No newline at end of file +fn create_blank_objects(u32s: &UInt32Chunked) -> Vec> { + u32s.iter() + .map(|x| create_blank_object(x.unwrap())) + .collect() +} diff --git a/lib/triplestore/src/triples_write.rs b/lib/triplestore/src/triples_write.rs index ceb48d0f..d1fe7664 100644 --- a/lib/triplestore/src/triples_write.rs +++ b/lib/triplestore/src/triples_write.rs @@ -1,7 +1,7 @@ use super::Triplestore; use crate::errors::TriplestoreError; use oxrdf::NamedNode; -use oxrdfio::{RdfFormat, RdfSerializer}; +use oxrdfio::{RdfFormat, RdfSerializer, WriterQuadSerializer}; use polars::prelude::{by_name, col, IntoLazy}; use polars_core::datatypes::DataType; use polars_core::frame::DataFrame; @@ -15,7 +15,7 @@ use representation::{ LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD, OBJECT_COL_NAME, SUBJECT_COL_NAME, }; use std::collections::HashMap; -use std::io::Write; +use std::io::{BufWriter, Write}; use tracing::warn; use crate::triples_read::ExtendedRdfFormat; use crate::jelly::*; @@ -143,7 +143,8 @@ impl Triplestore { } else if ExtendedRdfFormat::Normal(RdfFormat::Turtle) == format { self.write_pretty_turtle(buf, graph, prefixes)?; } else if let ExtendedRdfFormat::Normal(rdf_format) = format { - let mut writer = RdfSerializer::from_format(rdf_format).for_writer(buf); + let mut buffered = BufWriter::new(buf); + let mut writer = RdfSerializer::from_format(rdf_format).for_writer(&mut buffered); for (predicate, df_map) in self.graph_triples_map.get(graph).unwrap() { for ((subject_type, object_type), tt) in df_map { @@ -162,23 +163,38 @@ impl Triplestore { } } writer.finish().unwrap(); + buffered.flush().map_err(|x|TriplestoreError::FlushError(x.to_string()))?; } else if ExtendedRdfFormat::Jelly == format { - let mut all_triples = Vec::new(); + let mut buffered = BufWriter::new(buf); + let mut jelly_encoder = JellyEncoder::new(); + // Single roundtrip to cat map + let all_predicates:Vec<_> = self.graph_triples_map.get(graph).unwrap().keys().map(|x| { + x.as_str() + }).collect(); + let all_predicates_u32 = self.global_cats.read()?.encode_iri_slice(&all_predicates); + let all_predicates_u32_map: HashMap<_,_> = all_predicates.into_iter().zip(all_predicates_u32.into_iter()).map(|(x,y)|{ + (x.to_string(), y.unwrap()) + }).collect(); + jelly_encoder.write_options(&mut buffered)?; + for (predicate, df_map) in self.graph_triples_map.get(graph).unwrap() { + let pred_u32 = all_predicates_u32_map.get(predicate.as_str()).unwrap(); for ((subject_type, object_type), tt) in df_map { for (lf, _) in tt.get_lazy_frames(&None, &None)? { - all_triples.extend(global_df_as_triples( + jelly_encoder.write_jelly( + &mut buffered, lf.collect().unwrap(), - subject_type.clone(), - object_type.clone(), predicate, + *pred_u32, + subject_type, + object_type, self.global_cats.clone(), - )); + ) .map_err(|e| TriplestoreError::WriteJellyError(e.to_string()))?; } } } - write_jelly(buf, &all_triples) - .map_err(|e| TriplestoreError::WriteJellyError(e.to_string()))?; + jelly_encoder.write_rows(&mut buffered, true)?; + buffered.flush().map_err(|x|TriplestoreError::FlushError(x.to_string()))?; } Ok(()) } diff --git a/py_maplib/tests/test_jelly.py b/py_maplib/tests/test_jelly.py index 9ac8352d..cc2b49fb 100644 --- a/py_maplib/tests/test_jelly.py +++ b/py_maplib/tests/test_jelly.py @@ -15,17 +15,17 @@ def test_write_jelly(): filename = TESTDATA_PATH / "output.jelly" m.write(filename, format="jelly") - m2 = Model() - m2.read(filename, format="jelly") - - query = """ - SELECT ?s ?p ?o WHERE { - ?s ?p ?o . - } ORDER BY ?s ?p ?o - """ - original = m.query(query).df - read_back = m2.query(query).df - - assert original.frame_equal(read_back), ( - f"Read back mismatch: \nOriginal:\n{original}\nRead back:\n{read_back}" - ) \ No newline at end of file + # m2 = Model() + # m2.read(filename, format="jelly") + # + # query = """ + # SELECT ?s ?p ?o WHERE { + # ?s ?p ?o . + # } ORDER BY ?s ?p ?o + # """ + # original = m.query(query).df + # read_back = m2.query(query).df + # + # assert original.frame_equal(read_back), ( + # f"Read back mismatch: \nOriginal:\n{original}\nRead back:\n{read_back}" + # ) \ No newline at end of file From c630c4ac378172139b08ffbca17afe04340373c3 Mon Sep 17 00:00:00 2001 From: Eliathon Date: Mon, 23 Feb 2026 15:08:25 +0100 Subject: [PATCH 07/19] id bug --- lib/triplestore/src/jelly.rs | 9 +++--- py_maplib/tests/test_jelly.py | 13 +++++++-- py_maplib/tests/testdata/output.jelly | 40 +++++++++++++++++++++++++++ py_maplib/tests/testdata/sunspots.ttl | 16 +++++++++++ 4 files changed, 72 insertions(+), 6 deletions(-) create mode 100644 py_maplib/tests/testdata/output.jelly create mode 100644 py_maplib/tests/testdata/sunspots.ttl diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index f3f4b16b..15dd5c47 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -64,9 +64,9 @@ impl JellyEncoder { physical_type: PhysicalStreamType::PHYSICAL_STREAM_TYPE_TRIPLES, logical_type: LogicalStreamType::LOGICAL_STREAM_TYPE_FLAT_TRIPLES, version: 1, - max_name_table_size: 0, - max_prefix_table_size: 0, - max_datatype_table_size: 0, + max_name_table_size: 4096, + max_prefix_table_size: 4096, + max_datatype_table_size: 4096, ..Default::default() }), }], @@ -397,9 +397,10 @@ impl JellyEncoder { .unzip(); for (new_u, suf) in seen_iri_out_u32s.iter().zip(sufs) { + let jelly_id = *self.name_table.get(new_u).unwrap(); self.pending_rows.push(RdfStreamRow { row: OneOfrow::name(RdfNameEntry { - id: *new_u, + id: jelly_id, value: Cow::Owned(suf.to_string()), }), }); diff --git a/py_maplib/tests/test_jelly.py b/py_maplib/tests/test_jelly.py index cc2b49fb..9c7c3069 100644 --- a/py_maplib/tests/test_jelly.py +++ b/py_maplib/tests/test_jelly.py @@ -2,6 +2,8 @@ import pathlib from maplib import Model +from rdflib import Graph + pl.Config.set_fmt_str_lengths(300) @@ -10,7 +12,7 @@ def test_write_jelly(): m = Model() - m.read(TESTDATA_PATH / "read_lists.ttl") + m.read(TESTDATA_PATH / "sunspots.ttl") filename = TESTDATA_PATH / "output.jelly" m.write(filename, format="jelly") @@ -28,4 +30,11 @@ def test_write_jelly(): # # assert original.frame_equal(read_back), ( # f"Read back mismatch: \nOriginal:\n{original}\nRead back:\n{read_back}" - # ) \ No newline at end of file + # ) + + g = Graph() + g.parse(filename, format="jelly") + + print("Triples from Jelly file:") + for s, p, o in g: + print(f"{s} {p} {o}") \ No newline at end of file diff --git a/py_maplib/tests/testdata/output.jelly b/py_maplib/tests/testdata/output.jelly new file mode 100644 index 00000000..c6d86309 --- /dev/null +++ b/py_maplib/tests/testdata/output.jelly @@ -0,0 +1,40 @@ + + +H€ P€ X€ px· + J Instant +"R http://www.w3.org/2006/time# +1R/+http://www.w3.org/1999/02/22-rdf-syntax-ns# + +Jtype +b0*J + +J7536 +*R($http://example.org/data/Observation/ +J Observation + Rhttp://www.w3.org/ns/sosa/ + +*J +J +resultTime +4Z2.http://www.w3.org/2001/XMLSchema#dateTimeStamp +-+ +*Z +2017-03-31T12:00:00+00:00 +JphenomenonTime + +*Rb0 +JhasSimpleResult +.Z,(http://www.w3.org/2001/XMLSchema#integer + +*Z +66 +JinXSDDateTimeStamp ++)b0*Z +2017-03-31T11:51:42+00:00 +J  sunspotNumber +"R http://example.org/data/Sun# +J +observedProperty + +* +J \ No newline at end of file diff --git a/py_maplib/tests/testdata/sunspots.ttl b/py_maplib/tests/testdata/sunspots.ttl new file mode 100644 index 00000000..c7960411 --- /dev/null +++ b/py_maplib/tests/testdata/sunspots.ttl @@ -0,0 +1,16 @@ +@prefix rdf: . +@prefix time: . +@prefix sosa: . +@prefix xsd: . +@base . + +# The result of an observation of the sunspot number is available a few minutes +# after the phenomenon time, due to the light travel duration. + + rdf:type sosa:Observation ; + sosa:observedProperty ; + sosa:hasSimpleResult 66 ; + sosa:phenomenonTime [ + rdf:type time:Instant ; + time:inXSDDateTimeStamp "2017-03-31T11:51:42+00:00"^^xsd:dateTimeStamp ] ; + sosa:resultTime "2017-03-31T12:00:00+00:00"^^xsd:dateTimeStamp . \ No newline at end of file From b36f18bb34f8ebe966f7da5dc200c14cdef89785 Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Wed, 25 Feb 2026 08:17:56 +0100 Subject: [PATCH 08/19] First pass jelly read --- lib/cimxml_import/src/lib.rs | 1 - lib/representation/src/cats.rs | 2 +- lib/representation/src/cats/maps/in_memory.rs | 16 +- lib/representation/src/formatting.rs | 13 +- lib/representation/src/iri_split.rs | 4 +- lib/representation/src/lib.rs | 2 +- lib/shacl/src/lib.rs | 7 +- lib/triplestore/src/errors.rs | 2 + lib/triplestore/src/jelly.rs | 386 +++++++++++++++++- lib/triplestore/src/lib.rs | 6 +- lib/triplestore/src/storage.rs | 20 +- lib/triplestore/src/triples_read.rs | 4 + lib/triplestore/src/triples_write.rs | 50 ++- .../src/triples_write/pretty_turtle.rs | 18 +- py_maplib/tests/test_jelly.py | 4 +- py_maplib/tests/testdata/output.jelly | 64 +-- 16 files changed, 495 insertions(+), 104 deletions(-) diff --git a/lib/cimxml_import/src/lib.rs b/lib/cimxml_import/src/lib.rs index 56448070..aba300fb 100644 --- a/lib/cimxml_import/src/lib.rs +++ b/lib/cimxml_import/src/lib.rs @@ -1,4 +1,3 @@ - use oxrdf::{GraphName, Subject, Term}; use std::collections::HashMap; type MapType = HashMap, Vec)>>; diff --git a/lib/representation/src/cats.rs b/lib/representation/src/cats.rs index b8c9f4f3..e05ec61e 100644 --- a/lib/representation/src/cats.rs +++ b/lib/representation/src/cats.rs @@ -205,7 +205,7 @@ impl Cats { self.cat_map.get(&ct).unwrap() } - pub(crate) fn from_map(cat_map: HashMap) -> Self { + pub fn from_map(cat_map: HashMap) -> Self { let mut cats = Cats { cat_map, iri_counter: 0, diff --git a/lib/representation/src/cats/maps/in_memory.rs b/lib/representation/src/cats/maps/in_memory.rs index 61010c14..385a3f85 100644 --- a/lib/representation/src/cats/maps/in_memory.rs +++ b/lib/representation/src/cats/maps/in_memory.rs @@ -1,4 +1,5 @@ use crate::cats::CatReEnc; +use crate::iri_split::split_iri; use crate::BaseRDFNodeType; use nohash_hasher::NoHashHasher; use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; @@ -7,12 +8,11 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::{Display, Formatter}; use std::hash::BuildHasherDefault; use std::sync::Arc; -use crate::iri_split::split_iri; #[derive(Debug, Clone, Ord, Eq, PartialEq, PartialOrd)] pub struct PrefixCompressedString { - prefix: Arc, - suffix: Arc, + pub prefix: Arc, + pub suffix: Arc, } impl Display for PrefixCompressedString { @@ -117,6 +117,16 @@ impl PrefixCompressedCatMapsInMemory { self.encode_new_str(&s, u) } + pub fn encode_new_prefix_suffix_str(&mut self, pre: Cow, suf: String, u: u32) { + let arc_pre = self.encode_or_add_new_prefix_str(pre.as_ref()); + let compr = PrefixCompressedString { + prefix: arc_pre, + suffix: Arc::new(suf), + }; + self.map.insert(compr.clone(), u); + self.rev_map.insert(u, compr); + } + fn encode_new_str(&mut self, s: &str, u: u32) { let (pre, suf) = split_iri(&s); let arc_pre = self.encode_or_add_new_prefix_str(pre); diff --git a/lib/representation/src/formatting.rs b/lib/representation/src/formatting.rs index 7a9008f2..cbd2519a 100644 --- a/lib/representation/src/formatting.rs +++ b/lib/representation/src/formatting.rs @@ -118,13 +118,16 @@ pub fn base_literal_expression_to_string( } else if base_literal_datatype == xsd::DATE { exprs.push(expr.dt().strftime(XSD_DATE_WITHOUT_TZ_FORMAT)) } else if base_literal_datatype == rdf::LANG_STRING { - exprs.push(expr - .clone() + exprs.push( + expr.clone() .struct_() - .field_by_name(LANG_STRING_VALUE_FIELD)); - exprs.push(expr.struct_().field_by_name(LANG_STRING_LANG_FIELD)); + .field_by_name(LANG_STRING_VALUE_FIELD), + ); + exprs.push(expr.struct_().field_by_name(LANG_STRING_LANG_FIELD)); } else { - exprs.push(maybe_decode_expr(expr, base_type, base_state, global_cats).cast(DataType::String)) + exprs.push( + maybe_decode_expr(expr, base_type, base_state, global_cats).cast(DataType::String), + ) }; exprs } diff --git a/lib/representation/src/iri_split.rs b/lib/representation/src/iri_split.rs index 09670baa..9f0cf474 100644 --- a/lib/representation/src/iri_split.rs +++ b/lib/representation/src/iri_split.rs @@ -6,7 +6,7 @@ pub fn split_iri(iri: &str) -> (&str, &str) { delimiter: P, ) -> Option<(&'_ str, &'_ str)> where - for<'a> P::Searcher<'a>: std::str::pattern::ReverseSearcher<'a>, + for<'a> P::Searcher<'a>: std::str::pattern::ReverseSearcher<'a>, { let (_, end) = std::str::pattern::ReverseSearcher::next_match_back( &mut delimiter.into_searcher(this), @@ -22,4 +22,4 @@ pub fn split_iri(iri: &str) -> (&str, &str) { None => ("", iri), }; (prefix, suffix) -} \ No newline at end of file +} diff --git a/lib/representation/src/lib.rs b/lib/representation/src/lib.rs index 2831ac32..1e64aaf4 100644 --- a/lib/representation/src/lib.rs +++ b/lib/representation/src/lib.rs @@ -15,13 +15,13 @@ pub mod dataset; pub mod debug; pub mod errors; pub mod formatting; +pub mod iri_split; pub mod literals; pub mod prefixes; pub mod python; mod rdf_state; mod rdf_type; pub mod subtypes; -pub mod iri_split; pub use base_rdf_type::*; pub use rdf_state::*; diff --git a/lib/shacl/src/lib.rs b/lib/shacl/src/lib.rs index 463d1017..f88b241e 100644 --- a/lib/shacl/src/lib.rs +++ b/lib/shacl/src/lib.rs @@ -15,9 +15,7 @@ use representation::cats::LockedCats; use representation::dataset::NamedGraph; #[derive(Clone)] -pub struct ShaclInferenceResult { - -} +pub struct ShaclInferenceResult {} impl Display for ShaclInferenceResult { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -85,7 +83,6 @@ pub fn validate( unimplemented!("Contact Data Treehouse to try") } - pub fn infer_shacl( _triplestore: &mut Triplestore, _data_graph: &NamedGraph, @@ -98,4 +95,4 @@ pub fn infer_shacl( _debug_no_results: bool, ) -> Result { unimplemented!("Contact Data Treehouse to try") -} \ No newline at end of file +} diff --git a/lib/triplestore/src/errors.rs b/lib/triplestore/src/errors.rs index adb43124..54ee64fb 100644 --- a/lib/triplestore/src/errors.rs +++ b/lib/triplestore/src/errors.rs @@ -9,6 +9,8 @@ use thiserror::Error; pub enum TriplestoreError { #[error("Error writing Jelly {0}")] WriteJellyError(String), + #[error("Error reading Jelly {0}")] + ReadJellyError(String), #[error("Error writing NTriples {0}")] WriteNTriplesError(String), #[error("Path {0} does not exist")] diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index 15dd5c47..93cc3305 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -1,11 +1,12 @@ mod eu; +use quick_protobuf::{serialize_into_vec, BytesReader, MessageWrite, Writer}; use std::borrow::Cow; use std::cmp; use std::collections::{HashMap, HashSet}; use std::io::Write; +use std::sync::Arc; -use quick_protobuf::{serialize_into_vec, BytesWriter, MessageWrite, Writer}; - +use super::{TriplesToAdd, Triplestore}; use crate::errors::TriplestoreError; use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind; use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfStreamRow::OneOfrow; @@ -16,21 +17,389 @@ use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::{ LogicalStreamType, PhysicalStreamType, RdfDatatypeEntry, RdfIri, RdfLiteral, RdfNameEntry, RdfPrefixEntry, RdfStreamFrame, RdfStreamOptions, RdfStreamRow, RdfTriple, }; +use oxrdf::vocab::{rdf, xsd}; use oxrdf::NamedNode; -use polars::polars_utils::parma::raw::Key; -use polars::polars_utils::pl_serialize::serialize_into_writer; -use polars::prelude::{col, IntoLazy}; +use polars::prelude::{as_struct, col, IntoLazy, LiteralValue, PlSmallStr}; use polars_core::datatypes::UInt32Chunked; use polars_core::frame::DataFrame; -use polars_core::prelude::{Column, LhsNumOps}; +use polars_core::prelude::{Column, IntoColumn, LhsNumOps, Scalar}; use polars_core::POOL; use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; -use representation::cats::LockedCats; +use representation::cats::maps::in_memory::{ + CatMapsInMemory, PrefixCompressedCatMapsInMemory, PrefixCompressedString, + UncompressedCatMapsInMemory, +}; +use representation::cats::maps::CatMaps; +use representation::cats::{CatEncs, CatType, Cats, LockedCats}; +use representation::dataset::NamedGraph; use representation::formatting::base_literal_expression_to_string; use representation::iri_split::split_iri; -use representation::{BaseRDFNodeType, OBJECT_COL_NAME, SUBJECT_COL_NAME}; +use representation::rdf_to_polars::{ + polars_literal_values_to_series, rdf_literal_to_polars_literal_value_impl, +}; +use representation::solution_mapping::BaseCatState; +use representation::{ + BaseRDFNodeType, LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD, OBJECT_COL_NAME, + SUBJECT_COL_NAME, +}; const JELLY_FRAME_SIZE: usize = 1024; +const LANG_STRING_U32: u32 = u32::MAX - 1; +const IRI_U32: u32 = 0; +const BLANK_U32: u32 = u32::MAX; +const STRING_U32: u32 = u32::MAX - 2; + +impl Triplestore { + pub fn parse_jelly( + &mut self, + slice: &[u8], + graph: &NamedGraph, + triples_batch_size: Option, + ) -> Result<(), TriplestoreError> { + // we can build a bytes reader directly out of the bytes + let mut reader = BytesReader::from_bytes(slice); + let options: RdfStreamFrame = reader.read_message(slice).map_err(|x| { + TriplestoreError::ReadJellyError(format!("Error reading initial options: {}", x)) + })?; + let mut prefix_map: HashMap> = Default::default(); + let mut name_map: HashMap> = Default::default(); + let mut iri_map: HashMap<(u32, u32), u32> = HashMap::new(); + let mut iri_rev_map: HashMap = HashMap::new(); + let mut blank_map: HashMap = HashMap::new(); + let mut datatype_map: HashMap = Default::default(); + let mut predicate_map: HashMap< + u32, + HashMap, Vec, Vec)>>, + > = Default::default(); + while !reader.is_eof() { + let frame: RdfStreamFrame = reader.read_message(slice).map_err(|x| { + TriplestoreError::ReadJellyError(format!("Error reading row: {}", x)) + })?; + for r in frame.rows { + match r.row { + OneOfrow::options(_) => {} + OneOfrow::triple(t) => { + let pred = match t.predicate { + OneOfpredicate::p_iri(i) => (i.prefix_id, i.name_id), + p => { + unimplemented!("Predicate {:?}", p) + } + }; + let pred_iri_u32 = if let Some(pi) = iri_map.get(&pred) { + *pi + } else { + let pi = iri_map.len() as u32; + iri_map.insert(pred.clone(), pi); + iri_rev_map.insert(pi, pred); + pi + }; + let subject_type_map = + if let Some(sm) = predicate_map.get_mut(&pred_iri_u32) { + sm + } else { + predicate_map.insert(pred_iri_u32, Default::default()); + predicate_map.get_mut(&pred_iri_u32).unwrap() + }; + let (subject, object_map) = match t.subject { + OneOfsubject::s_iri(i) => { + let om = if let Some(om) = subject_type_map.get_mut(&true) { + om + } else { + subject_type_map.insert(true, Default::default()); + subject_type_map.get_mut(&true).unwrap() + }; + let k = (i.prefix_id, i.name_id); + let iri_id = if let Some(iri_id) = iri_map.get(&k) { + *iri_id + } else { + let v = iri_map.len() as u32; + iri_map.insert(k, v); + v + }; + (LiteralValue::Scalar(Scalar::from(iri_id)), om) + } + OneOfsubject::s_bnode(b) => { + let om = if let Some(om) = subject_type_map.get_mut(&true) { + om + } else { + subject_type_map.insert(false, Default::default()); + subject_type_map.get_mut(&false).unwrap() + }; + let blank_id = if let Some(u) = blank_map.get(b.as_ref()) { + *u + } else { + let u = blank_map.len() as u32; + blank_map.insert(b.into_owned(), u); + u + }; + (LiteralValue::Scalar(Scalar::from(blank_id)), om) + } + OneOfsubject::s_literal(_) => { + unreachable!() + } + OneOfsubject::s_triple_term(_) => { + unimplemented!() + } + OneOfsubject::None => { + unreachable!() + } + }; + let (object, lang_tag, (subj_vec, obj_vec, lang_tag_vec)) = match t.object { + OneOfobject::o_iri(i) => { + let vecs = if let Some(vecs) = object_map.get_mut(&IRI_U32) { + vecs + } else { + object_map.insert(IRI_U32, Default::default()); + object_map.get_mut(&IRI_U32).unwrap() + }; + let k = (i.prefix_id, i.name_id); + let iri_id = if let Some(iri_id) = iri_map.get(&k) { + *iri_id + } else { + let v = iri_map.len() as u32; + iri_map.insert(k, v); + v + }; + (LiteralValue::Scalar(Scalar::from(iri_id)), None, vecs) + } + OneOfobject::o_bnode(b) => { + let om = if let Some(om) = object_map.get_mut(&BLANK_U32) { + om + } else { + object_map.insert(BLANK_U32, Default::default()); + object_map.get_mut(&BLANK_U32).unwrap() + }; + let blank_id = if let Some(u) = blank_map.get(b.as_ref()) { + *u + } else { + let u = blank_map.len() as u32; + blank_map.insert(b.into_owned(), u); + u + }; + (LiteralValue::Scalar(Scalar::from(blank_id)), None, om) + } + OneOfobject::o_literal(l) => { + let value; + let mut lang_tag = None; + let dt_id = match &l.literalKind { + OneOfliteralKind::langtag(t) => { + lang_tag = Some(LiteralValue::Scalar(Scalar::from( + PlSmallStr::from_string(t.to_string()), + ))); + value = LiteralValue::Scalar(Scalar::from( + PlSmallStr::from_string(l.lex.to_string()), + )); + + LANG_STRING_U32 + } + OneOfliteralKind::datatype(t) => { + let dt = datatype_map.get(t).unwrap(); + value = rdf_literal_to_polars_literal_value_impl( + l.lex.as_str(), + dt.as_ref(), + ); + *t + } + OneOfliteralKind::None => { + value = LiteralValue::Scalar(Scalar::from( + PlSmallStr::from_string(l.lex.to_string()), + )); + STRING_U32 + } + }; + let vecs = if let Some(vecs) = object_map.get_mut(&dt_id) { + vecs + } else { + object_map.insert(dt_id, Default::default()); + object_map.get_mut(&dt_id).unwrap() + }; + (value, lang_tag, vecs) + } + OneOfobject::o_triple_term(_) => { + unimplemented!() + } + OneOfobject::None => { + unimplemented!() + } + }; + subj_vec.push(subject); + obj_vec.push(object); + if let Some(lang_tag) = lang_tag { + lang_tag_vec.push(lang_tag); + } + } + OneOfrow::quad(_) => { + unimplemented!() + } + OneOfrow::graph_start(_) => { + unimplemented!() + } + OneOfrow::graph_end(_) => { + unimplemented!() + } + OneOfrow::namespace(_) => { + unimplemented!() + } + OneOfrow::name(n) => { + name_map.insert(n.id, Arc::new(n.value.to_string())); + } + OneOfrow::prefix(p) => { + prefix_map.insert(p.id, Arc::new(p.value.into_owned())); + } + OneOfrow::datatype(dt) => { + datatype_map.insert(dt.id, NamedNode::new(dt.value.as_str()).unwrap()); + } + OneOfrow::None => { + unimplemented!() + } + } + } + } + let mut iri_cat_enc = PrefixCompressedCatMapsInMemory::new_empty(); + + for ((pre, suf), u) in iri_map { + let prefix = prefix_map.get(&pre).unwrap(); + let suffix = name_map.get(&suf).unwrap(); + let (pre, suf) = split_iri(suffix.as_str()); + if pre.is_empty() { + let (pre, suf) = split_iri(prefix.as_str()); + if suf.is_empty() { + iri_cat_enc.encode_new_prefix_suffix_str( + Cow::Borrowed(prefix), + suffix.to_string(), + u, + ); + } else { + iri_cat_enc.encode_new_prefix_suffix_str( + Cow::Borrowed(pre), + format!("{}{}", suf, suffix), + u, + ); + } + } else { + iri_cat_enc.encode_new_prefix_suffix_str( + Cow::Owned(format!("{}{}", prefix, pre)), + suf.to_string(), + u, + ); + } + } + let iri_cat_enc = CatEncs { + maps: CatMaps::InMemory(CatMapsInMemory::Compressed(iri_cat_enc)), + }; + + let mut blank_cat_enc = UncompressedCatMapsInMemory::new_empty(); + for u in blank_map.values() { + let uuid = uuid::Uuid::new_v4().to_string(); + blank_cat_enc.encode_new_string(uuid, *u); + } + let blank_cat_enc = CatEncs { + maps: CatMaps::InMemory(CatMapsInMemory::Uncompressed(blank_cat_enc)), + }; + let maps = + HashMap::from_iter([(CatType::IRI, iri_cat_enc), (CatType::Blank, blank_cat_enc)]); + let local = LockedCats::new(Cats::from_map(maps)); + + let mut triples_to_add = Vec::new(); + for (p, m) in predicate_map { + let (pre, suf) = iri_rev_map.get(&p).unwrap(); + let predicate = NamedNode::new(format!( + "{}{}", + prefix_map.get(pre).unwrap(), + name_map.get(suf).unwrap() + )) + .unwrap(); + for (subject_is_iri, om) in m { + let subject_type = if subject_is_iri { + BaseRDFNodeType::IRI + } else { + BaseRDFNodeType::BlankNode + }; + for (dt, (subject_vec, object_vec, lang_tag_vec)) in om { + let object_type = if dt == IRI_U32 { + BaseRDFNodeType::IRI + } else if dt == BLANK_U32 { + BaseRDFNodeType::BlankNode + } else if dt == LANG_STRING_U32 { + BaseRDFNodeType::Literal(rdf::LANG_STRING.into_owned()) + } else if dt == STRING_U32 { + BaseRDFNodeType::Literal(xsd::STRING.into_owned()) + } else { + let literal_iri = datatype_map.get(&dt).unwrap(); + BaseRDFNodeType::Literal(literal_iri.clone()) + }; + let subject_ser = + polars_literal_values_to_series(subject_vec, SUBJECT_COL_NAME); + let object_ser = match &object_type { + BaseRDFNodeType::IRI | BaseRDFNodeType::BlankNode => { + polars_literal_values_to_series(object_vec, OBJECT_COL_NAME) + } + BaseRDFNodeType::Literal(l) => { + if l.as_ref() == rdf::LANG_STRING { + let lex_ser = polars_literal_values_to_series( + object_vec, + LANG_STRING_VALUE_FIELD, + ); + let lang_ser = polars_literal_values_to_series( + lang_tag_vec, + LANG_STRING_LANG_FIELD, + ); + let mut df = DataFrame::new(vec![ + lex_ser.into_column(), + lang_ser.into_column(), + ]) + .unwrap(); + df = df + .lazy() + .with_column( + as_struct(vec![ + col(LANG_STRING_VALUE_FIELD), + col(LANG_STRING_LANG_FIELD), + ]) + .alias(OBJECT_COL_NAME), + ) + .select([col(OBJECT_COL_NAME)]) + .collect() + .unwrap(); + df.column(OBJECT_COL_NAME) + .unwrap() + .as_materialized_series() + .clone() + } else { + polars_literal_values_to_series(object_vec, OBJECT_COL_NAME) + } + } + BaseRDFNodeType::None => { + unreachable!() + } + }; + let df = + DataFrame::new(vec![subject_ser.into_column(), object_ser.into_column()]) + .unwrap(); + let object_cat_state = if object_type.is_iri() || object_type.is_blank_node() { + BaseCatState::CategoricalNative(false, Some(local.clone())) + } else { + object_type.default_input_cat_state() + }; + let trips = TriplesToAdd { + df, + subject_type: subject_type.clone(), + object_type, + predicate: Some(predicate.clone()), + graph: Default::default(), + subject_cat_state: BaseCatState::CategoricalNative(false, None), + object_cat_state, + predicate_cat_state: None, + }; + triples_to_add.push(trips); + } + } + } + self.add_triples_vec(triples_to_add, false)?; + + Ok(()) + } +} pub struct JellyEncoder { prefix_table: HashMap, @@ -410,7 +779,6 @@ impl JellyEncoder { self.get_or_insert_prefix(u, *prefix); } } - println!("PRefixes {:?}", self.pending_rows); } } diff --git a/lib/triplestore/src/lib.rs b/lib/triplestore/src/lib.rs index a1b3a67a..d212054b 100644 --- a/lib/triplestore/src/lib.rs +++ b/lib/triplestore/src/lib.rs @@ -4,6 +4,7 @@ extern crate core; pub mod cats; mod dblf; pub mod errors; +mod jelly; mod map_json; pub mod native_parquet_write; pub mod query_solutions; @@ -12,7 +13,6 @@ pub mod sparql; mod storage; pub mod triples_read; pub mod triples_write; -mod jelly; use crate::errors::TriplestoreError; use crate::storage::{repeated_from_last_row_expr, Triples}; @@ -399,7 +399,7 @@ impl Triplestore { transient: bool, ) -> Result, TriplestoreError> { let prepare_triples_now = Instant::now(); - let dfs_to_add = prepare_add_triples_par( + let cat_triples_to_add = prepare_add_triples_par( ts, self.global_cats.clone(), self.storage_folder.as_ref().map(|x| x.as_ref()), @@ -409,7 +409,7 @@ impl Triplestore { prepare_triples_now.elapsed().as_secs_f32() ); let add_triples_now = Instant::now(); - let new_triples = self.add_local_cat_triples(dfs_to_add, transient)?; + let new_triples = self.add_local_cat_triples(cat_triples_to_add, transient)?; trace!( "Adding triples df took {} seconds", add_triples_now.elapsed().as_secs_f32() diff --git a/lib/triplestore/src/storage.rs b/lib/triplestore/src/storage.rs index df1fc874..98ae4b97 100644 --- a/lib/triplestore/src/storage.rs +++ b/lib/triplestore/src/storage.rs @@ -7,8 +7,8 @@ use crate::IndexingOptions; use oxrdf::vocab::{rdf, xsd}; use oxrdf::{NamedNode, Subject, Term}; use polars::prelude::{ - as_struct, col, concat, lit, Expr, IdxSize, IntoLazy, JoinArgs, JoinType, - LazyFrame, MaintainOrderJoin, PlSmallStr, UnionArgs, + as_struct, col, concat, lit, Expr, IdxSize, IntoLazy, JoinArgs, JoinType, LazyFrame, + MaintainOrderJoin, PlSmallStr, UnionArgs, }; use polars_core::datatypes::AnyValue; use polars_core::frame::DataFrame; @@ -374,11 +374,12 @@ impl TriplesSegment { // At this point the true range is between from_i and to_i let height = to_i.saturating_sub(from_i); if height > 0 { - let lf = self - .get_subject_sort_lazy_frame()?; + let lf = self.get_subject_sort_lazy_frame()?; let lf_subj = lf.clone().select([col(SUBJECT_COL_NAME)]); let subjects_start = global_cats.read()?.decode_of_type( - &lf_subj.clone().slice(from_i as i64, (OFFSET_STEP * 2) as u32) + &lf_subj + .clone() + .slice(from_i as i64, (OFFSET_STEP * 2) as u32) .collect() .unwrap() .column(SUBJECT_COL_NAME) @@ -390,7 +391,8 @@ impl TriplesSegment { to_i = to_i.saturating_sub(OFFSET_STEP * 2); // The to_i may be exactly at the sparse index, so without + 1 we may miss it. let subjects_end = global_cats.read()?.decode_of_type( - &lf_subj.slice(to_i as i64, (OFFSET_STEP * 2 + 1) as u32) + &lf_subj + .slice(to_i as i64, (OFFSET_STEP * 2 + 1) as u32) .collect() .unwrap() .column(SUBJECT_COL_NAME) @@ -399,7 +401,6 @@ impl TriplesSegment { subject_type, ); - // case exact: // from = "c" // ["a", "b", "c"] @@ -438,10 +439,7 @@ impl TriplesSegment { } //let r2 = global_cats.read()?.decode_of_type(lf.clone().collect().unwrap().column(SUBJECT_COL_NAME).unwrap().as_materialized_series(), &BaseRDFNodeType::BlankNode); - let ret = lf.slice( - from_i as i64, - height as u32 - ).collect().unwrap(); + let ret = lf.slice(from_i as i64, height as u32).collect().unwrap(); //let r = global_cats.read()?.decode_of_type(ret.column(SUBJECT_COL_NAME).unwrap().as_materialized_series(), &BaseRDFNodeType::BlankNode); //assert!(from <= r.str().unwrap().first().unwrap(),"from {} to {} ret r {} from_i {}, to_i {} r2 {}", from, to, r, from_i, to_i, r2); //assert!(r.str().unwrap().last().unwrap() <= to,"from {} to {} ret r {}", from, to, r); diff --git a/lib/triplestore/src/triples_read.rs b/lib/triplestore/src/triples_read.rs index 4834c183..771f3cb4 100644 --- a/lib/triplestore/src/triples_read.rs +++ b/lib/triplestore/src/triples_read.rs @@ -151,6 +151,10 @@ impl Triplestore { known_contexts: HashMap, ) -> Result<(), TriplestoreError> { let start_quadproc_now = Instant::now(); + if let ExtendedRdfFormat::Jelly = &rdf_format { + return self.parse_jelly(slice, graph, triples_batch_size); + } + let parallel = if let Some(parallel) = parallel { parallel } else { diff --git a/lib/triplestore/src/triples_write.rs b/lib/triplestore/src/triples_write.rs index d1fe7664..19e95cff 100644 --- a/lib/triplestore/src/triples_write.rs +++ b/lib/triplestore/src/triples_write.rs @@ -1,5 +1,7 @@ use super::Triplestore; use crate::errors::TriplestoreError; +use crate::jelly::*; +use crate::triples_read::ExtendedRdfFormat; use oxrdf::NamedNode; use oxrdfio::{RdfFormat, RdfSerializer, WriterQuadSerializer}; use polars::prelude::{by_name, col, IntoLazy}; @@ -17,8 +19,6 @@ use representation::{ use std::collections::HashMap; use std::io::{BufWriter, Write}; use tracing::warn; -use crate::triples_read::ExtendedRdfFormat; -use crate::jelly::*; mod fast_ntriples; mod pretty_turtle; @@ -163,38 +163,50 @@ impl Triplestore { } } writer.finish().unwrap(); - buffered.flush().map_err(|x|TriplestoreError::FlushError(x.to_string()))?; + buffered + .flush() + .map_err(|x| TriplestoreError::FlushError(x.to_string()))?; } else if ExtendedRdfFormat::Jelly == format { let mut buffered = BufWriter::new(buf); let mut jelly_encoder = JellyEncoder::new(); // Single roundtrip to cat map - let all_predicates:Vec<_> = self.graph_triples_map.get(graph).unwrap().keys().map(|x| { - x.as_str() - }).collect(); + let all_predicates: Vec<_> = self + .graph_triples_map + .get(graph) + .unwrap() + .keys() + .map(|x| x.as_str()) + .collect(); let all_predicates_u32 = self.global_cats.read()?.encode_iri_slice(&all_predicates); - let all_predicates_u32_map: HashMap<_,_> = all_predicates.into_iter().zip(all_predicates_u32.into_iter()).map(|(x,y)|{ - (x.to_string(), y.unwrap()) - }).collect(); + let all_predicates_u32_map: HashMap<_, _> = all_predicates + .into_iter() + .zip(all_predicates_u32.into_iter()) + .map(|(x, y)| (x.to_string(), y.unwrap())) + .collect(); jelly_encoder.write_options(&mut buffered)?; for (predicate, df_map) in self.graph_triples_map.get(graph).unwrap() { let pred_u32 = all_predicates_u32_map.get(predicate.as_str()).unwrap(); for ((subject_type, object_type), tt) in df_map { for (lf, _) in tt.get_lazy_frames(&None, &None)? { - jelly_encoder.write_jelly( - &mut buffered, - lf.collect().unwrap(), - predicate, - *pred_u32, - subject_type, - object_type, - self.global_cats.clone(), - ) .map_err(|e| TriplestoreError::WriteJellyError(e.to_string()))?; + jelly_encoder + .write_jelly( + &mut buffered, + lf.collect().unwrap(), + predicate, + *pred_u32, + subject_type, + object_type, + self.global_cats.clone(), + ) + .map_err(|e| TriplestoreError::WriteJellyError(e.to_string()))?; } } } jelly_encoder.write_rows(&mut buffered, true)?; - buffered.flush().map_err(|x|TriplestoreError::FlushError(x.to_string()))?; + buffered + .flush() + .map_err(|x| TriplestoreError::FlushError(x.to_string()))?; } Ok(()) } diff --git a/lib/triplestore/src/triples_write/pretty_turtle.rs b/lib/triplestore/src/triples_write/pretty_turtle.rs index 494117d8..225db7f5 100644 --- a/lib/triplestore/src/triples_write/pretty_turtle.rs +++ b/lib/triplestore/src/triples_write/pretty_turtle.rs @@ -7,6 +7,7 @@ use oxrdf::vocab::rdf; use oxrdf::{BlankNode, NamedNode, NamedNodeRef, Term, TermRef, Variable}; use polars::prelude::{col, concat, LazyFrame, UnionArgs}; use polars_core::frame::DataFrame; +use polars_core::prelude::BooleanChunked; use polars_core::POOL; use rayon::iter::{IntoParallelIterator, ParallelIterator}; use representation::cats::LockedCats; @@ -19,7 +20,6 @@ use spargebra::Query; use std::collections::{BTreeMap, HashMap, HashSet}; use std::io::Write; use std::sync::Arc; -use polars_core::prelude::BooleanChunked; const STRIDE: usize = 20_000; @@ -423,12 +423,10 @@ impl Triplestore { let mut thread_strings = Vec::with_capacity(n_threads); for _ in 0..n_threads { let mut start_string = if let Some(last_string) = last_string.take() { - if let Some((next_string)) = triples - .get_next_different_subject( - self.global_cats.clone(), - last_string.as_str(), - )? - { + if let Some((next_string)) = triples.get_next_different_subject( + self.global_cats.clone(), + last_string.as_str(), + )? { assert!(next_string > last_string); next_string } else { @@ -437,9 +435,7 @@ impl Triplestore { } } else { assert!(!found_first); - if let Some(start_string) = - triples.get_first_subject_string()? - { + if let Some(start_string) = triples.get_first_subject_string()? { found_first = true; start_string } else { @@ -449,7 +445,7 @@ impl Triplestore { }; let end_string = triples.get_next_different_approximately_n_distance_away( &start_string, - STRIDE / n_threads + STRIDE / n_threads, )?; let end_string = if let Some(end_string) = end_string { if start_string == end_string { diff --git a/py_maplib/tests/test_jelly.py b/py_maplib/tests/test_jelly.py index 9c7c3069..4d827661 100644 --- a/py_maplib/tests/test_jelly.py +++ b/py_maplib/tests/test_jelly.py @@ -37,4 +37,6 @@ def test_write_jelly(): print("Triples from Jelly file:") for s, p, o in g: - print(f"{s} {p} {o}") \ No newline at end of file + print(f"{s} {p} {o}") + + m.read(filename) \ No newline at end of file diff --git a/py_maplib/tests/testdata/output.jelly b/py_maplib/tests/testdata/output.jelly index c6d86309..46ad5d57 100644 --- a/py_maplib/tests/testdata/output.jelly +++ b/py_maplib/tests/testdata/output.jelly @@ -1,40 +1,40 @@   H€ P€ X€ px· - J Instant -"R http://www.w3.org/2006/time# -1R/+http://www.w3.org/1999/02/22-rdf-syntax-ns# -Jtype -b0*J - -J7536 -*R($http://example.org/data/Observation/ -J Observation - Rhttp://www.w3.org/ns/sosa/ - -*J -J +J7536 +*R($http://example.org/data/Observation/ + Rhttp://www.w3.org/ns/sosa/ +JhasSimpleResult +.Z,(http://www.w3.org/2001/XMLSchema#integer + +*Z +66 +J resultTime -4Z2.http://www.w3.org/2001/XMLSchema#dateTimeStamp +4Z2.http://www.w3.org/2001/XMLSchema#dateTimeStamp -+ -*Z -2017-03-31T12:00:00+00:00 -JphenomenonTime - -*Rb0 -JhasSimpleResult -.Z,(http://www.w3.org/2001/XMLSchema#integer - -*Z -66 -JinXSDDateTimeStamp -+)b0*Z -2017-03-31T11:51:42+00:00 -J  sunspotNumber +*Z +2017-03-31T12:00:00+00:00 + J Instant +"R http://www.w3.org/2006/time# +1R/+http://www.w3.org/1999/02/22-rdf-syntax-ns# + +Jtype +b0*J +J Observation + +*J +J sunspotNumber "R http://example.org/data/Sun# -J -observedProperty +JobservedProperty  -* -J \ No newline at end of file +*J +J inXSDDateTimeStamp ++)b0* Z +2017-03-31T11:51:42+00:00 +J +phenomenonTime + +* +Rb0 \ No newline at end of file From e42687b0a3abd75ec7bfe2f3af8de50960826221 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Wed, 25 Feb 2026 13:53:15 +0100 Subject: [PATCH 09/19] add_triples resolve predicate and test --- py_maplib/src/lib.rs | 19 +++++++++-- py_maplib/tests/.gitignore | 3 +- py_maplib/tests/test_resolve_predicate.py | 31 ++++++++++++++++++ py_maplib/tests/testdata/output.jelly | 40 ----------------------- 4 files changed, 50 insertions(+), 43 deletions(-) create mode 100644 py_maplib/tests/test_resolve_predicate.py delete mode 100644 py_maplib/tests/testdata/output.jelly diff --git a/py_maplib/src/lib.rs b/py_maplib/src/lib.rs index 0951d0ec..073c88b4 100644 --- a/py_maplib/src/lib.rs +++ b/py_maplib/src/lib.rs @@ -11,7 +11,7 @@ use tracing_subscriber::{filter, prelude::*}; use crate::shacl::PyValidationReport; use maplib::errors::MaplibError; -use maplib::model::{MapOptions, Model as InnerModel}; +use maplib::model::{MapOptions, Model as InnerModel, Model}; use chrono::Utc; use cimxml_export::export::FullModelDetails; @@ -1045,7 +1045,7 @@ fn map_triples_mutex( let options = MapOptions::from_args(named_graph, validate_iris); let types = map_types(types); let predicate = if let Some(predicate) = predicate { - Some(NamedNode::new(predicate).map_err(|x| PyMaplibError::from(MaplibError::from(x)))?) + Some(resolve_predicate(&predicate, &inner.prefixes)?) } else { None }; @@ -1055,6 +1055,21 @@ fn map_triples_mutex( Ok(None) } +fn resolve_predicate( + predicate: &str, + prefixes: &HashMap, +) -> PyResult { + if let Some((prefix, suffix)) = predicate.split_once(':') { + if let Some(prefix_iri) = prefixes.get(prefix) { + let pre_and_suf = format!("{}{}", prefix_iri.as_str(), suffix); + if let Ok(nn) = NamedNode::new(&pre_and_suf) { + return Ok(nn); + } + } + } + NamedNode::new(predicate).map_err(|x| PyMaplibError::from(MaplibError::from(x)).into()) +} + fn map_default_mutex( inner: &mut MutexGuard, df: DataFrame, diff --git a/py_maplib/tests/.gitignore b/py_maplib/tests/.gitignore index 4dcf6293..e3547d40 100644 --- a/py_maplib/tests/.gitignore +++ b/py_maplib/tests/.gitignore @@ -8,4 +8,5 @@ testdata/*.parquet bench200_000.nt tmp out.nt -create*.nt \ No newline at end of file +create*.nt +output.jelly \ No newline at end of file diff --git a/py_maplib/tests/test_resolve_predicate.py b/py_maplib/tests/test_resolve_predicate.py new file mode 100644 index 00000000..f263c8b1 --- /dev/null +++ b/py_maplib/tests/test_resolve_predicate.py @@ -0,0 +1,31 @@ +import polars as pl +import pathlib +from maplib import Model + +pl.Config.set_fmt_str_lengths(300) + + +PATH_HERE = pathlib.Path(__file__).parent +TESTDATA_PATH = PATH_HERE / "testdata" + +def test_resolve_predicate(): + m = Model() + m.add_prefixes({"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#"}) + m.add_prefixes({"ex": "https://example.net/"}) + + df = pl.DataFrame({ + "subject": ["http://example.net/subject1"], + "object": ["http://example.net/object1"], + }) + + m.map_triples(df, predicate="rdf:type") + + result = m.query(""" + SELECT ?s ?o WHERE { + ?s ?o . + } + """) + + assert result.height == 1 + assert result["s"][0] == "" + assert result["o"][0] == "" \ No newline at end of file diff --git a/py_maplib/tests/testdata/output.jelly b/py_maplib/tests/testdata/output.jelly deleted file mode 100644 index 46ad5d57..00000000 --- a/py_maplib/tests/testdata/output.jelly +++ /dev/null @@ -1,40 +0,0 @@ - - -H€ P€ X€ px· - -J7536 -*R($http://example.org/data/Observation/ - Rhttp://www.w3.org/ns/sosa/ -JhasSimpleResult -.Z,(http://www.w3.org/2001/XMLSchema#integer - -*Z -66 -J -resultTime -4Z2.http://www.w3.org/2001/XMLSchema#dateTimeStamp --+ -*Z -2017-03-31T12:00:00+00:00 - J Instant -"R http://www.w3.org/2006/time# -1R/+http://www.w3.org/1999/02/22-rdf-syntax-ns# - -Jtype -b0*J -J Observation - -*J -J sunspotNumber -"R http://example.org/data/Sun# -JobservedProperty - -*J -J inXSDDateTimeStamp -+)b0* Z -2017-03-31T11:51:42+00:00 -J -phenomenonTime - -* -Rb0 \ No newline at end of file From 35e6ab86516d66943029f13d84a25482f2609c63 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Wed, 25 Feb 2026 15:00:07 +0100 Subject: [PATCH 10/19] jelly reading test --- lib/triplestore/src/jelly.rs | 2 +- py_maplib/tests/.gitignore | 3 +- py_maplib/tests/test_jelly.py | 48 +++++++++++++++++++---------- py_maplib/tests/testdata/output.csv | 8 +++++ 4 files changed, 43 insertions(+), 18 deletions(-) create mode 100644 py_maplib/tests/testdata/output.csv diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index 93cc3305..dfb498c7 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -387,7 +387,7 @@ impl Triplestore { object_type, predicate: Some(predicate.clone()), graph: Default::default(), - subject_cat_state: BaseCatState::CategoricalNative(false, None), + subject_cat_state: BaseCatState::CategoricalNative(false, Some(local.clone())), object_cat_state, predicate_cat_state: None, }; diff --git a/py_maplib/tests/.gitignore b/py_maplib/tests/.gitignore index e3547d40..5f7990ec 100644 --- a/py_maplib/tests/.gitignore +++ b/py_maplib/tests/.gitignore @@ -9,4 +9,5 @@ bench200_000.nt tmp out.nt create*.nt -output.jelly \ No newline at end of file +output.jelly +output.csv \ No newline at end of file diff --git a/py_maplib/tests/test_jelly.py b/py_maplib/tests/test_jelly.py index 4d827661..3b701221 100644 --- a/py_maplib/tests/test_jelly.py +++ b/py_maplib/tests/test_jelly.py @@ -1,5 +1,9 @@ import polars as pl import pathlib + +from polars import read_csv +from polars.testing import assert_frame_equal + from maplib import Model from rdflib import Graph @@ -16,21 +20,6 @@ def test_write_jelly(): filename = TESTDATA_PATH / "output.jelly" m.write(filename, format="jelly") - - # m2 = Model() - # m2.read(filename, format="jelly") - # - # query = """ - # SELECT ?s ?p ?o WHERE { - # ?s ?p ?o . - # } ORDER BY ?s ?p ?o - # """ - # original = m.query(query).df - # read_back = m2.query(query).df - # - # assert original.frame_equal(read_back), ( - # f"Read back mismatch: \nOriginal:\n{original}\nRead back:\n{read_back}" - # ) g = Graph() g.parse(filename, format="jelly") @@ -39,4 +28,31 @@ def test_write_jelly(): for s, p, o in g: print(f"{s} {p} {o}") - m.read(filename) \ No newline at end of file +def test_read_jelly(): + m = Model() + if not (TESTDATA_PATH / "output.jelly").exists(): + test_write_jelly() + + filename = TESTDATA_PATH / "output.jelly" + + m.read(filename, format="jelly") + + df = m.query( + """ + SELECT ?s ?p ?o WHERE { + ?s ?p ?o . + } ORDER BY ?s ?p ?o + """ + ) + + df.write_csv(TESTDATA_PATH / "output.csv") + read_csv(TESTDATA_PATH / "output.csv") + + expected = read_csv(TESTDATA_PATH / "output.csv") + + print("\nDataFrame from Jelly file:") + print(df) + print("Expected DataFrame:") + print(expected) + + assert_frame_equal(df, expected) \ No newline at end of file diff --git a/py_maplib/tests/testdata/output.csv b/py_maplib/tests/testdata/output.csv new file mode 100644 index 00000000..b0c07eb7 --- /dev/null +++ b/py_maplib/tests/testdata/output.csv @@ -0,0 +1,8 @@ +s,p,o +,, +,,"""66""^^" +,, +,,_:994b9aff-6122-4413-a4f2-b6e40bc68af8 +,,"""2017-03-31T12:00:00+00:00""^^" +_:994b9aff-6122-4413-a4f2-b6e40bc68af8,, +_:994b9aff-6122-4413-a4f2-b6e40bc68af8,,"""2017-03-31T11:51:42+00:00""^^" From 9c6f7098ec67892770e8aac99f99f4a8dc038fa3 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Sun, 1 Mar 2026 22:52:03 +0100 Subject: [PATCH 11/19] added benchmarking test --- py_maplib/tests/test_jelly.py | 6 ++++ py_maplib/tests/test_jelly_benchmark.py | 38 +++++++++++++++++++++++++ py_maplib/tests/testdata/output.csv | 8 ------ 3 files changed, 44 insertions(+), 8 deletions(-) create mode 100644 py_maplib/tests/test_jelly_benchmark.py delete mode 100644 py_maplib/tests/testdata/output.csv diff --git a/py_maplib/tests/test_jelly.py b/py_maplib/tests/test_jelly.py index 3b701221..e443962c 100644 --- a/py_maplib/tests/test_jelly.py +++ b/py_maplib/tests/test_jelly.py @@ -1,5 +1,7 @@ import polars as pl import pathlib +import pytest +import os from polars import read_csv from polars.testing import assert_frame_equal @@ -14,6 +16,7 @@ PATH_HERE = pathlib.Path(__file__).parent TESTDATA_PATH = PATH_HERE / "testdata" +@pytest.mark.skip def test_write_jelly(): m = Model() m.read(TESTDATA_PATH / "sunspots.ttl") @@ -30,6 +33,9 @@ def test_write_jelly(): def test_read_jelly(): m = Model() + if (TESTDATA_PATH / "output.jelly").exists(): + os.remove(TESTDATA_PATH / "output.jelly") + if not (TESTDATA_PATH / "output.jelly").exists(): test_write_jelly() diff --git a/py_maplib/tests/test_jelly_benchmark.py b/py_maplib/tests/test_jelly_benchmark.py new file mode 100644 index 00000000..53bbfc22 --- /dev/null +++ b/py_maplib/tests/test_jelly_benchmark.py @@ -0,0 +1,38 @@ +import pathlib +import time +import os + +from maplib import Model + +PATH_HERE = pathlib.Path(__file__).parent +TESTDATA_PATH = PATH_HERE / "testdata" + +def test_jelly_benchmark(): + m = Model() + m.read(TESTDATA_PATH / "sunspots.ttl") + filename = TESTDATA_PATH / "output.jelly" + if filename.exists(): + os.remove(filename) + + start_time_total = time.perf_counter() + start_time_write = time.perf_counter() + m.write(filename, format="jelly") + end_time_write = time.perf_counter() + start_time_read = time.perf_counter() + m.read(filename, format="jelly") + end_time_read = time.perf_counter() + end_time_total = time.perf_counter() + + df = m.query( + """ + SELECT ?s ?p ?o WHERE { + ?s ?p ?o . + } + """ + ) + + print(f"\nAmount of triples: {df.height}") + + print(f"\nWrite time: {end_time_write - start_time_write:.4f} seconds") + print(f"Read time: {end_time_read - start_time_read:.4f} seconds") + print(f"Total time: {end_time_total - start_time_total:.4f} seconds") diff --git a/py_maplib/tests/testdata/output.csv b/py_maplib/tests/testdata/output.csv deleted file mode 100644 index b0c07eb7..00000000 --- a/py_maplib/tests/testdata/output.csv +++ /dev/null @@ -1,8 +0,0 @@ -s,p,o -,, -,,"""66""^^" -,, -,,_:994b9aff-6122-4413-a4f2-b6e40bc68af8 -,,"""2017-03-31T12:00:00+00:00""^^" -_:994b9aff-6122-4413-a4f2-b6e40bc68af8,, -_:994b9aff-6122-4413-a4f2-b6e40bc68af8,,"""2017-03-31T11:51:42+00:00""^^" From ef54c2158c79ecadb4f1f744e30803e9af4abe68 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Tue, 3 Mar 2026 12:52:01 +0100 Subject: [PATCH 12/19] return added to __init__.pyi insert --- py_maplib/maplib/__init__.pyi | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/py_maplib/maplib/__init__.pyi b/py_maplib/maplib/__init__.pyi index 4c545990..e08556ec 100644 --- a/py_maplib/maplib/__init__.pyi +++ b/py_maplib/maplib/__init__.pyi @@ -1,5 +1,7 @@ from pathlib import Path from typing import Union, List, Dict, Optional, Callable, Tuple, Literal as LiteralType + +import polars from polars import DataFrame from datetime import datetime, date from maplib.maplib import rdf @@ -732,7 +734,7 @@ class Model: include_transient: bool = True, max_rows: int = None, debug: bool = False, - ): + ) -> Dict[str, polars.DataFrame]: """ Insert the results of a Construct query in the graph. Useful for being able to use the same query for inspecting what will be inserted and actually inserting. From 83021c85239cc6a2347912e93c76cd3dc0f6c1ce Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Wed, 4 Mar 2026 20:29:17 +0100 Subject: [PATCH 13/19] removed every instance of unwrap() from jelly.rs --- lib/representation/src/cats/maps/in_memory.rs | 3 +- lib/triplestore/src/jelly.rs | 306 ++++++++++++++---- 2 files changed, 247 insertions(+), 62 deletions(-) diff --git a/lib/representation/src/cats/maps/in_memory.rs b/lib/representation/src/cats/maps/in_memory.rs index 385a3f85..0678cd70 100644 --- a/lib/representation/src/cats/maps/in_memory.rs +++ b/lib/representation/src/cats/maps/in_memory.rs @@ -8,6 +8,7 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::{Display, Formatter}; use std::hash::BuildHasherDefault; use std::sync::Arc; +use std::time::Instant; #[derive(Debug, Clone, Ord, Eq, PartialEq, PartialOrd)] pub struct PrefixCompressedString { @@ -363,7 +364,7 @@ impl UncompressedCatMapsInMemory { } pub fn counter(&self) -> u32 { - self.rev_map.keys().max().unwrap().clone() + 1 + self.rev_map.keys().max().cloned().unwrap_or(0) + 1 } pub fn decode_batch(&self, v: &[Option]) -> Vec>> { diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index dfb498c7..0a633439 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -98,7 +98,7 @@ impl Triplestore { sm } else { predicate_map.insert(pred_iri_u32, Default::default()); - predicate_map.get_mut(&pred_iri_u32).unwrap() + predicate_map.get_mut(&pred_iri_u32).expect("Just inserted") }; let (subject, object_map) = match t.subject { OneOfsubject::s_iri(i) => { @@ -106,7 +106,7 @@ impl Triplestore { om } else { subject_type_map.insert(true, Default::default()); - subject_type_map.get_mut(&true).unwrap() + subject_type_map.get_mut(&true).expect("Just inserted") }; let k = (i.prefix_id, i.name_id); let iri_id = if let Some(iri_id) = iri_map.get(&k) { @@ -123,7 +123,7 @@ impl Triplestore { om } else { subject_type_map.insert(false, Default::default()); - subject_type_map.get_mut(&false).unwrap() + subject_type_map.get_mut(&false).expect("Just inserted") }; let blank_id = if let Some(u) = blank_map.get(b.as_ref()) { *u @@ -150,7 +150,7 @@ impl Triplestore { vecs } else { object_map.insert(IRI_U32, Default::default()); - object_map.get_mut(&IRI_U32).unwrap() + object_map.get_mut(&IRI_U32).expect("Just inserted") }; let k = (i.prefix_id, i.name_id); let iri_id = if let Some(iri_id) = iri_map.get(&k) { @@ -167,7 +167,7 @@ impl Triplestore { om } else { object_map.insert(BLANK_U32, Default::default()); - object_map.get_mut(&BLANK_U32).unwrap() + object_map.get_mut(&BLANK_U32).expect("Just inserted") }; let blank_id = if let Some(u) = blank_map.get(b.as_ref()) { *u @@ -193,7 +193,14 @@ impl Triplestore { LANG_STRING_U32 } OneOfliteralKind::datatype(t) => { - let dt = datatype_map.get(t).unwrap(); + let dt = match datatype_map.get(t) { + Some(dt) => dt, + None => { + return Err(TriplestoreError::ReadJellyError( + format!("Datatype id {} not found", t).into(), + )) + } + }; value = rdf_literal_to_polars_literal_value_impl( l.lex.as_str(), dt.as_ref(), @@ -211,7 +218,7 @@ impl Triplestore { vecs } else { object_map.insert(dt_id, Default::default()); - object_map.get_mut(&dt_id).unwrap() + object_map.get_mut(&dt_id).expect("Just inserted") }; (value, lang_tag, vecs) } @@ -247,7 +254,13 @@ impl Triplestore { prefix_map.insert(p.id, Arc::new(p.value.into_owned())); } OneOfrow::datatype(dt) => { - datatype_map.insert(dt.id, NamedNode::new(dt.value.as_str()).unwrap()); + let nn = NamedNode::new(dt.value.as_str()).map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Invalid datatype IRI {}: {}", + dt.value, e + )) + })?; + datatype_map.insert(dt.id, nn); } OneOfrow::None => { unimplemented!() @@ -258,8 +271,22 @@ impl Triplestore { let mut iri_cat_enc = PrefixCompressedCatMapsInMemory::new_empty(); for ((pre, suf), u) in iri_map { - let prefix = prefix_map.get(&pre).unwrap(); - let suffix = name_map.get(&suf).unwrap(); + let prefix = match prefix_map.get(&pre) { + Some(p) => p, + None => { + return Err(TriplestoreError::ReadJellyError( + format!("Prefix id {} not found", pre).into(), + )) + } + }; + let suffix = match name_map.get(&suf) { + Some(s) => s, + None => { + return Err(TriplestoreError::ReadJellyError( + format!("Name id {} not found", suf).into(), + )) + } + }; let (pre, suf) = split_iri(suffix.as_str()); if pre.is_empty() { let (pre, suf) = split_iri(prefix.as_str()); @@ -302,13 +329,28 @@ impl Triplestore { let mut triples_to_add = Vec::new(); for (p, m) in predicate_map { - let (pre, suf) = iri_rev_map.get(&p).unwrap(); - let predicate = NamedNode::new(format!( - "{}{}", - prefix_map.get(pre).unwrap(), - name_map.get(suf).unwrap() - )) - .unwrap(); + let (pre, suf) = iri_rev_map.get(&p).ok_or_else(|| { + TriplestoreError::ReadJellyError(format!( + "Missing IRI reverse mapping for predicate {}", + p + )) + })?; + + let prefix = prefix_map.get(pre).ok_or_else(|| { + TriplestoreError::ReadJellyError(format!("Missing prefix for id {}", pre)) + })?; + + let name = name_map.get(suf).ok_or_else(|| { + TriplestoreError::ReadJellyError(format!("Missing name for id {}", suf)) + })?; + + let predicate = NamedNode::new(format!("{}{}", prefix, name)).map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Invalid predicate IRI {}{}: {}", + prefix, name, e + )) + })?; + for (subject_is_iri, om) in m { let subject_type = if subject_is_iri { BaseRDFNodeType::IRI @@ -325,7 +367,14 @@ impl Triplestore { } else if dt == STRING_U32 { BaseRDFNodeType::Literal(xsd::STRING.into_owned()) } else { - let literal_iri = datatype_map.get(&dt).unwrap(); + let literal_iri = match datatype_map.get(&dt) { + Some(dt) => dt, + None => { + return Err(TriplestoreError::ReadJellyError( + format!("Datatype id {} not found", dt).into(), + )) + } + }; BaseRDFNodeType::Literal(literal_iri.clone()) }; let subject_ser = @@ -348,7 +397,12 @@ impl Triplestore { lex_ser.into_column(), lang_ser.into_column(), ]) - .unwrap(); + .map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Error creating DataFrame for language tagged string: {}", + e + )) + })?; df = df .lazy() .with_column( @@ -360,9 +414,19 @@ impl Triplestore { ) .select([col(OBJECT_COL_NAME)]) .collect() - .unwrap(); + .map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Error structuring DataFrame for language tagged string: {}", + e + )) + })?; df.column(OBJECT_COL_NAME) - .unwrap() + .map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Missing column {}: {}", + OBJECT_COL_NAME, e + )) + })? .as_materialized_series() .clone() } else { @@ -375,7 +439,12 @@ impl Triplestore { }; let df = DataFrame::new(vec![subject_ser.into_column(), object_ser.into_column()]) - .unwrap(); + .map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Error creating DataFrame for triple: {}", + e + )) + })?; let object_cat_state = if object_type.is_iri() || object_type.is_blank_node() { BaseCatState::CategoricalNative(false, Some(local.clone())) } else { @@ -387,7 +456,10 @@ impl Triplestore { object_type, predicate: Some(predicate.clone()), graph: Default::default(), - subject_cat_state: BaseCatState::CategoricalNative(false, Some(local.clone())), + subject_cat_state: BaseCatState::CategoricalNative( + false, + Some(local.clone()), + ), object_cat_state, predicate_cat_state: None, }; @@ -461,15 +533,19 @@ impl JellyEncoder { global_cats: LockedCats, ) -> Result<(), TriplestoreError> { self.maybe_prepare_new_names_prefixes( - df.column(SUBJECT_COL_NAME).unwrap(), + df.column(SUBJECT_COL_NAME).map_err(|e| { + TriplestoreError::WriteJellyError(format!("Missing subject column: {}", e)) + })?, subject_type, global_cats.clone(), - ); + )?; self.maybe_prepare_new_names_prefixes( - df.column(OBJECT_COL_NAME).unwrap(), + df.column(OBJECT_COL_NAME).map_err(|e| { + TriplestoreError::WriteJellyError(format!("Missing object column: {}", e)) + })?, object_type, global_cats.clone(), - ); + )?; let (pre, suf) = split_iri(predicate.as_str()); let pre_u32 = self.get_or_insert_prefix(predicate_cat, pre); let name_u32 = self.get_or_insert_name(predicate_cat, suf); @@ -478,12 +554,20 @@ impl JellyEncoder { name_id: name_u32, }); - let subject_u32s = df.column(SUBJECT_COL_NAME).unwrap().u32().unwrap(); + let subject_u32s = df + .column(SUBJECT_COL_NAME) + .map_err(|e| { + TriplestoreError::WriteJellyError(format!("Missing subject column: {}", e)) + })? + .u32() + .map_err(|e| { + TriplestoreError::WriteJellyError(format!("Subject column is not u32: {}", e)) + })?; let subjects = if subject_type.is_iri() { - self.create_iri_subjects(subject_u32s) + self.create_iri_subjects(subject_u32s)? } else { - create_blank_subjects(subject_u32s) + create_blank_subjects(subject_u32s)? }; //Todo: push datatype row and predicate row. @@ -500,19 +584,54 @@ impl JellyEncoder { new_exprs.push(e.alias(format!("{i}"))); } lf = lf.with_columns(new_exprs); - let df = lf.collect().unwrap(); + let df = lf.collect().map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Error evaluating literal expressions for object column: {}", + e + )) + })?; if object_type.is_lang_string() { for ((subject, o_lex), o_lang) in subjects .into_iter() - .zip(df.column("0").unwrap().str().unwrap()) - .zip(df.column("1").unwrap().str().unwrap()) + .zip(df.column("0").map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Missing object column after evaluating literal expressions: {}", + e + )) + })?.str().map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Object column is not string after evaluating literal expressions: {}", + e + )) + })?) + .zip(df.column("1").map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Missing language tag column after evaluating literal expressions: {}", + e + )) + })?.str().map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Language tag column is not string after evaluating literal expressions: {}", + e + )) + })?) { - let o_lex = o_lex.unwrap(); + let o_lex = match o_lex { + Some(o) => o, + None => return Err(TriplestoreError::WriteJellyError( + "Missing lexical form for language tagged literal".into(), + )), + }; + let o_lang = o_lang.ok_or_else(|| { + TriplestoreError::WriteJellyError( + "Missing language tag for language tagged literal".into(), + ) + })?; let object = OneOfobject::o_literal(RdfLiteral { lex: Cow::Owned(o_lex.to_string()), literalKind: OneOfliteralKind::langtag(Cow::Owned( - o_lang.unwrap().to_string(), + o_lang.to_string(), )), }); self.pending_rows.push(RdfStreamRow { @@ -538,11 +657,25 @@ impl JellyEncoder { self.next_datatype_id += 1; dt_o }; - for (subject, o) in subjects - .into_iter() - .zip(df.column("0").unwrap().str().unwrap()) - { - let o = o.unwrap(); + for (subject, o) in subjects.into_iter().zip( + df.column("0") + .map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Missing object column after evaluating literal expressions: {}", + e + )) + })? + .str() + .map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Object column is not string after evaluating literal expressions: {}", + e + )) + })?, + ) { + let o = o.ok_or_else(|| { + TriplestoreError::WriteJellyError("Missing lexical form for literal".into()) + })?; let object = OneOfobject::o_literal(RdfLiteral { lex: Cow::Owned(o.to_string()), literalKind: OneOfliteralKind::datatype(dt_o), @@ -558,11 +691,19 @@ impl JellyEncoder { } } else { // subject and object are both either blank or iri: u32 cols.. - let object_u32s = df.column(OBJECT_COL_NAME).unwrap().u32().unwrap(); + let object_u32s = df + .column(OBJECT_COL_NAME) + .map_err(|e| { + TriplestoreError::WriteJellyError(format!("Missing object column: {}", e)) + })? + .u32() + .map_err(|e| { + TriplestoreError::WriteJellyError(format!("Object column is not u32: {}", e)) + })?; let objects = if object_type.is_iri() { - self.create_iri_objects(object_u32s) + self.create_iri_objects(object_u32s)? } else { - create_blank_objects(object_u32s) + create_blank_objects(object_u32s)? }; for (subject, object) in subjects.into_iter().zip(objects.into_iter()) { self.pending_rows.push(RdfStreamRow { @@ -578,22 +719,42 @@ impl JellyEncoder { Ok(()) } - fn create_iri(&self, u: &u32) -> RdfIri { - RdfIri { - prefix_id: *self.prefix_table.get(u).unwrap(), - name_id: *self.name_table.get(u).unwrap(), - } + fn create_iri(&self, u: &u32) -> Result { + Ok(RdfIri { + prefix_id: *self.prefix_table.get(u).ok_or_else(|| { + TriplestoreError::WriteJellyError(format!("Prefix id not found for u32 {}", u)) + })?, + name_id: *self.name_table.get(u).ok_or_else(|| { + TriplestoreError::WriteJellyError(format!("Name id not found for u32 {}", u)) + })?, + }) } - fn create_iri_subjects(&self, u32s: &UInt32Chunked) -> Vec> { + fn create_iri_subjects( + &self, + u32s: &UInt32Chunked, + ) -> Result>, TriplestoreError> { u32s.iter() - .map(|x| OneOfsubject::s_iri(self.create_iri(&x.unwrap()))) + .map(|x| { + let u = x.ok_or_else(|| { + TriplestoreError::WriteJellyError("Null value in subject column".into()) + })?; + Ok(OneOfsubject::s_iri(self.create_iri(&u)?)) + }) .collect() } - fn create_iri_objects(&self, u32s: &UInt32Chunked) -> Vec> { + fn create_iri_objects( + &self, + u32s: &UInt32Chunked, + ) -> Result>, TriplestoreError> { u32s.iter() - .map(|x| OneOfobject::o_iri(self.create_iri(&x.unwrap()))) + .map(|x| { + let u = x.ok_or_else(|| { + TriplestoreError::WriteJellyError("Null value in object column".into()) + })?; + Ok(OneOfobject::o_iri(self.create_iri(&u)?)) + }) .collect() } @@ -734,14 +895,19 @@ impl JellyEncoder { c: &Column, t: &BaseRDFNodeType, global_cats: LockedCats, - ) { - let read_cats = global_cats.read().unwrap(); + ) -> Result<(), TriplestoreError> { + let read_cats = global_cats.read().map_err(|e| { + TriplestoreError::WriteJellyError(format!( + "Error acquiring read lock on global categories: {}", + e + )) + })?; let mut seen_iri_u32s = Vec::new(); let mut seen_iri_out_u32s = Vec::new(); match t { BaseRDFNodeType::IRI => { - for u in c.u32().unwrap() { - let u = u.unwrap(); + for u in c.u32().expect("IRI column should be u32") { + let u = u.expect("Null value in IRI column"); if !self.name_table.contains_key(&u) { self.name_table.insert(u, self.next_name_id); seen_iri_u32s.push(u); @@ -766,7 +932,10 @@ impl JellyEncoder { .unzip(); for (new_u, suf) in seen_iri_out_u32s.iter().zip(sufs) { - let jelly_id = *self.name_table.get(new_u).unwrap(); + let jelly_id = *self + .name_table + .get(new_u) + .expect("Just inserted name id should be present"); self.pending_rows.push(RdfStreamRow { row: OneOfrow::name(RdfNameEntry { id: jelly_id, @@ -779,6 +948,7 @@ impl JellyEncoder { self.get_or_insert_prefix(u, *prefix); } } + Ok(()) } } @@ -793,14 +963,28 @@ fn create_blank_object(u: u32) -> OneOfobject<'static> { fn create_blank_cow(u: u32) -> Cow<'static, str> { Cow::Owned(format!("b{}", u)) } -fn create_blank_subjects(u32s: &UInt32Chunked) -> Vec> { +fn create_blank_subjects( + u32s: &UInt32Chunked, +) -> Result>, TriplestoreError> { u32s.iter() - .map(|x| create_blank_subject(x.unwrap())) + .map(|x| { + let u = x.ok_or_else(|| { + TriplestoreError::WriteJellyError("Null value in subject column".into()) + })?; + Ok(create_blank_subject(u)) + }) .collect() } -fn create_blank_objects(u32s: &UInt32Chunked) -> Vec> { +fn create_blank_objects( + u32s: &UInt32Chunked, +) -> Result>, TriplestoreError> { u32s.iter() - .map(|x| create_blank_object(x.unwrap())) + .map(|x| { + let u = x.ok_or_else(|| { + TriplestoreError::WriteJellyError("Null value in object column".into()) + })?; + Ok(create_blank_object(u)) + }) .collect() } From 7760427298ab7faea9802eeb40f9da5d98c0af4d Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Fri, 6 Mar 2026 13:23:53 +0100 Subject: [PATCH 14/19] dashmap --- Cargo.lock | 16 +++++ Cargo.toml | 2 + lib/representation/Cargo.toml | 1 + lib/representation/src/cats/maps/in_memory.rs | 51 +++++++++------ lib/representation/src/cats/re_encode.rs | 5 +- lib/triplestore/src/jelly.rs | 64 +++++++++++++++++-- 6 files changed, 110 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0d6004b..054187a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -549,6 +549,21 @@ dependencies = [ "typenum", ] +[[package]] +name = "dashmap" +version = "7.0.0-rc2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a1e35a65fe0538a60167f0ada6e195ad5d477f6ddae273943596d4a1a5730b" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "equivalent", + "hashbrown 0.15.5", + "lock_api", + "parking_lot_core", + "rayon", +] + [[package]] name = "datalog" version = "0.1.0" @@ -2802,6 +2817,7 @@ version = "0.6.10" dependencies = [ "chrono", "chrono-tz", + "dashmap", "nohash-hasher", "oxrdf", "oxsdatatypes", diff --git a/Cargo.toml b/Cargo.toml index d9a1722a..0ca979ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,8 @@ tracing-log = "0.2" itoa = "1.0.15" ryu = "1.0.20" +dashmap = { version = "7.0.0-rc2", features = ["rayon"] } + #dev-dependencies nohash-hasher = "0.2.0" quick-protobuf = { version = "0.8.1" } diff --git a/lib/representation/Cargo.toml b/lib/representation/Cargo.toml index 7eeccd90..1b7731b9 100644 --- a/lib/representation/Cargo.toml +++ b/lib/representation/Cargo.toml @@ -22,5 +22,6 @@ oxsdatatypes.workspace = true rayon.workspace = true nohash-hasher.workspace = true uuid.workspace = true +dashmap.workspace = true [lints.rust] unexpected_cfgs = { level = "warn", check-cfg = ['cfg(feature, values("gil-refs", "rdf-star"))'] } diff --git a/lib/representation/src/cats/maps/in_memory.rs b/lib/representation/src/cats/maps/in_memory.rs index 0678cd70..53acd8f8 100644 --- a/lib/representation/src/cats/maps/in_memory.rs +++ b/lib/representation/src/cats/maps/in_memory.rs @@ -1,6 +1,7 @@ use crate::cats::CatReEnc; use crate::iri_split::split_iri; use crate::BaseRDFNodeType; +use dashmap::DashMap; use nohash_hasher::NoHashHasher; use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; use std::borrow::Cow; @@ -73,7 +74,7 @@ impl PrefixCompressedCatMapsInMemory { new_maps.encode_new_prefix_compressed_string(s.clone(), *c); *c += 1; } - let remap: HashMap<_, _, BuildHasherDefault>> = + let remap: DashMap<_, _, BuildHasherDefault>> = remap.into_iter().collect(); ( new_maps, @@ -251,9 +252,10 @@ impl PrefixCompressedCatMapsInMemory { } pub fn merge(&mut self, other: &PrefixCompressedCatMapsInMemory, c: &mut u32) -> CatReEnc { + let remap_insert_now = Instant::now(); let (remap, insert): (Vec<_>, Vec<_>) = other .map - .iter() + .par_iter() .map(|(s, u)| { if let Some(e) = self.map.get(s) { (Some((*u, *e)), None) @@ -262,24 +264,28 @@ impl PrefixCompressedCatMapsInMemory { } }) .unzip(); + println!("remap_insert_now took: {:?}", remap_insert_now.elapsed()); + + let mut remap: DashMap<_, _, BuildHasherDefault>> = remap + .into_par_iter() + .filter(|x| x.is_some()) + .map(|x| x.unwrap()) + .collect(); + let mut numbered_insert = Vec::new(); - let mut new_remap = Vec::new(); for k in insert { if let Some((s, u)) = k { numbered_insert.push((s, *c)); - new_remap.push((*u, *c)); + remap.insert(*u, *c); *c += 1; } } for (s, u) in numbered_insert { self.encode_new_prefix_compressed_string(s.clone(), u); } - let remap: HashMap<_, _, BuildHasherDefault>> = remap - .into_iter() - .filter(|x| x.is_some()) - .map(|x| x.unwrap()) - .chain(new_remap.into_iter()) - .collect(); + + remap.extend(remap.into_iter()); + let reenc = CatReEnc { cat_map: Arc::new(remap), }; @@ -312,7 +318,7 @@ impl UncompressedCatMapsInMemory { new_maps.encode_new_arc_string(s.clone(), *c); *c += 1; } - let remap: HashMap<_, _, BuildHasherDefault>> = + let remap: DashMap<_, _, BuildHasherDefault>> = remap.into_iter().collect(); ( new_maps, @@ -427,7 +433,7 @@ impl UncompressedCatMapsInMemory { pub fn merge(&mut self, other: &UncompressedCatMapsInMemory, c: &mut u32) -> CatReEnc { let (remap, insert): (Vec<_>, Vec<_>) = other .map - .iter() + .par_iter() .map(|(s, u)| { if let Some(e) = self.map.get(s) { (Some((*u, *e)), None) @@ -436,24 +442,27 @@ impl UncompressedCatMapsInMemory { } }) .unzip(); + + let mut remap: DashMap<_, _, BuildHasherDefault>> = remap + .into_par_iter() + .filter(|x| x.is_some()) + .map(|x| x.unwrap()) + .collect(); + let mut numbered_insert = Vec::new(); - let mut new_remap = Vec::new(); for k in insert { if let Some((s, u)) = k { numbered_insert.push((s, *c)); - new_remap.push((*u, *c)); + remap.insert(*u, *c); *c += 1; } } for (s, u) in numbered_insert { - self.encode_new_arc_string(s, u); + self.encode_new_arc_string(s.clone(), u); } - let remap: HashMap<_, _, BuildHasherDefault>> = remap - .into_iter() - .filter(|x| x.is_some()) - .map(|x| x.unwrap()) - .chain(new_remap.into_iter()) - .collect(); + + remap.extend(remap.into_iter()); + let reenc = CatReEnc { cat_map: Arc::new(remap), }; diff --git a/lib/representation/src/cats/re_encode.rs b/lib/representation/src/cats/re_encode.rs index 856906de..f1b35ad0 100644 --- a/lib/representation/src/cats/re_encode.rs +++ b/lib/representation/src/cats/re_encode.rs @@ -14,10 +14,11 @@ use std::collections::HashMap; use std::hash::BuildHasherDefault; use std::path::Path; use std::sync::Arc; +use dashmap::DashMap; #[derive(Debug, Clone)] pub struct CatReEnc { - pub cat_map: Arc>>>, + pub cat_map: Arc>>>, } impl CatReEnc { @@ -80,7 +81,7 @@ impl Cats { .filter(|x| x.is_some()) .map(|x| x.unwrap()) .collect(); - let renc_map: HashMap<_, _, BuildHasherDefault>> = + let renc_map: DashMap<_, _, BuildHasherDefault>> = rencs.into_iter().flatten().map(|x| x).collect(); let cat_re_enc = CatReEnc { cat_map: Arc::new(renc_map), diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index 0a633439..23250196 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -327,6 +327,20 @@ impl Triplestore { HashMap::from_iter([(CatType::IRI, iri_cat_enc), (CatType::Blank, blank_cat_enc)]); let local = LockedCats::new(Cats::from_map(maps)); + let local_uuid = local.read().expect("Could not read LockedCat").uuid.clone(); + let mut reencs_map = { + let mut global = self.global_cats.write().map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Could not acquire write lock on global cats: {}", + e + )) + })?; + global.merge(vec![local.clone()], None) + }; + let reencs = reencs_map.remove(&local_uuid).unwrap_or_default(); + let iri_reenc = reencs.get(&CatType::IRI); + let blank_reenc = reencs.get(&CatType::Blank); + let mut triples_to_add = Vec::new(); for (p, m) in predicate_map { let (pre, suf) = iri_rev_map.get(&p).ok_or_else(|| { @@ -437,7 +451,7 @@ impl Triplestore { unreachable!() } }; - let df = + let mut df = DataFrame::new(vec![subject_ser.into_column(), object_ser.into_column()]) .map_err(|e| { TriplestoreError::ReadJellyError(format!( @@ -445,8 +459,49 @@ impl Triplestore { e )) })?; + + let sub_reenc = if subject_is_iri { + &iri_reenc + } else { + &blank_reenc + }; + if let Some(reenc) = sub_reenc { + df = reenc + .clone() + .clone() + .re_encode(df.lazy(), SUBJECT_COL_NAME, false) + .collect() + .map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Error remapping subject column: {}", + e + )) + })?; + } + + if object_type.is_iri() || object_type.is_blank_node() { + let obj_reenc = if object_type.is_iri() { + &iri_reenc + } else { + &blank_reenc + }; + if let Some(reenc) = obj_reenc { + df = reenc + .clone() + .clone() + .re_encode(df.lazy(), OBJECT_COL_NAME, false) + .collect() + .map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Error remapping object column: {}", + e + )) + })?; + } + } + let object_cat_state = if object_type.is_iri() || object_type.is_blank_node() { - BaseCatState::CategoricalNative(false, Some(local.clone())) + BaseCatState::CategoricalNative(false, None) } else { object_type.default_input_cat_state() }; @@ -456,10 +511,7 @@ impl Triplestore { object_type, predicate: Some(predicate.clone()), graph: Default::default(), - subject_cat_state: BaseCatState::CategoricalNative( - false, - Some(local.clone()), - ), + subject_cat_state: BaseCatState::CategoricalNative(false, None), object_cat_state, predicate_cat_state: None, }; From 93decaf9983f5d906da4bcc3266a65ba107c2f5a Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 6 Mar 2026 14:05:28 +0100 Subject: [PATCH 15/19] Fix Dashmap, add more parallelism in multiple places --- Cargo.lock | 1 + lib/query_processing/Cargo.toml | 1 + lib/query_processing/src/cats.rs | 5 +++-- lib/representation/src/cats/image.rs | 10 ++++++---- lib/representation/src/cats/maps/in_memory.rs | 4 ---- lib/triplestore/src/jelly.rs | 14 +++++--------- 6 files changed, 16 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 054187a7..43918296 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2542,6 +2542,7 @@ dependencies = [ name = "query_processing" version = "0.3.12" dependencies = [ + "dashmap", "oxrdf", "polars", "rayon", diff --git a/lib/query_processing/Cargo.toml b/lib/query_processing/Cargo.toml index 95912635..7657740d 100644 --- a/lib/query_processing/Cargo.toml +++ b/lib/query_processing/Cargo.toml @@ -38,3 +38,4 @@ tracing.workspace = true uuid.workspace = true thiserror.workspace = true rayon.workspace = true +dashmap.workspace = true \ No newline at end of file diff --git a/lib/query_processing/src/cats.rs b/lib/query_processing/src/cats.rs index d1b72cf1..aa3b54dd 100644 --- a/lib/query_processing/src/cats.rs +++ b/lib/query_processing/src/cats.rs @@ -6,6 +6,7 @@ use std::collections::{HashMap, HashSet}; use std::hash::BuildHasherDefault; use std::ops::Deref; use std::sync::Arc; +use dashmap::DashMap; pub fn create_compatible_cats( expressions: Vec>, @@ -59,9 +60,9 @@ pub fn create_compatible_cats( None } else { let mut renc_map = - HashMap::with_capacity_and_hasher(2, BuildHasherDefault::default()); + DashMap::with_capacity_and_hasher(2, BuildHasherDefault::default()); for renc in iri_renc { - renc_map.extend(renc.cat_map.iter().map(|(x, y)| (*x, *y))) + renc_map.extend(renc.cat_map.iter().map(|x| (*x.key(), *x.value()))) } Some(CatReEnc { cat_map: Arc::new(renc_map), diff --git a/lib/representation/src/cats/image.rs b/lib/representation/src/cats/image.rs index e9e66fda..06fe8e68 100644 --- a/lib/representation/src/cats/image.rs +++ b/lib/representation/src/cats/image.rs @@ -9,6 +9,8 @@ use std::hash::BuildHasherDefault; use std::ops::Deref; use std::path::Path; use std::sync::Arc; +use dashmap::DashMap; +use rayon::iter::{IntoParallelRefIterator, ParallelExtend, ParallelIterator}; impl Cats { pub fn mappings_cat_image(&self, sms: &Vec<&EagerSolutionMappings>) -> Cats { @@ -116,7 +118,7 @@ impl Cats { let remap = self.merge(local_cats, path); let mut concat_reenc: HashMap< String, - HashMap>>>, + HashMap>>>, > = HashMap::new(); for (uuid, reenc) in remap { for (ct, cat_reenc) in reenc { @@ -124,7 +126,7 @@ impl Cats { let bt_map = if let Some(bt_map) = concat_reenc.get_mut(&uuid) { bt_map } else { - concat_reenc.insert(uuid.clone(), HashMap::new()); + concat_reenc.insert(uuid.clone(), HashMap::default()); concat_reenc.get_mut(&uuid).unwrap() }; @@ -133,11 +135,11 @@ impl Cats { } else { bt_map.insert( bt.clone(), - HashMap::with_capacity_and_hasher(2, BuildHasherDefault::default()), + DashMap::with_capacity_and_hasher(2, BuildHasherDefault::default()), ); bt_map.get_mut(&bt).unwrap() }; - e_reenc.extend(cat_reenc.cat_map.iter()); + e_reenc.par_extend(cat_reenc.cat_map.par_iter().map(|x|(*x.key(),*x.value()))); } } let mut concat_reenc_cats = HashMap::new(); diff --git a/lib/representation/src/cats/maps/in_memory.rs b/lib/representation/src/cats/maps/in_memory.rs index 53acd8f8..4adff8e3 100644 --- a/lib/representation/src/cats/maps/in_memory.rs +++ b/lib/representation/src/cats/maps/in_memory.rs @@ -284,8 +284,6 @@ impl PrefixCompressedCatMapsInMemory { self.encode_new_prefix_compressed_string(s.clone(), u); } - remap.extend(remap.into_iter()); - let reenc = CatReEnc { cat_map: Arc::new(remap), }; @@ -461,8 +459,6 @@ impl UncompressedCatMapsInMemory { self.encode_new_arc_string(s.clone(), u); } - remap.extend(remap.into_iter()); - let reenc = CatReEnc { cat_map: Arc::new(remap), }; diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index 23250196..ceffabf7 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -461,14 +461,12 @@ impl Triplestore { })?; let sub_reenc = if subject_is_iri { - &iri_reenc + iri_reenc } else { - &blank_reenc + blank_reenc }; if let Some(reenc) = sub_reenc { df = reenc - .clone() - .clone() .re_encode(df.lazy(), SUBJECT_COL_NAME, false) .collect() .map_err(|e| { @@ -481,14 +479,12 @@ impl Triplestore { if object_type.is_iri() || object_type.is_blank_node() { let obj_reenc = if object_type.is_iri() { - &iri_reenc + iri_reenc } else { - &blank_reenc + blank_reenc }; if let Some(reenc) = obj_reenc { - df = reenc - .clone() - .clone() + df = reenc.clone() .re_encode(df.lazy(), OBJECT_COL_NAME, false) .collect() .map_err(|e| { From ad0150e522ba1b8e47eeb6cee281ca3ac8555d6b Mon Sep 17 00:00:00 2001 From: Magnus Bakken <10287813+magbak@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:37:20 +0100 Subject: [PATCH 16/19] =?UTF-8?q?Small=20fixes=C2=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/representation/src/cats/maps/in_memory.rs | 4 -- lib/triplestore/src/jelly.rs | 65 ++++++++++--------- 2 files changed, 34 insertions(+), 35 deletions(-) diff --git a/lib/representation/src/cats/maps/in_memory.rs b/lib/representation/src/cats/maps/in_memory.rs index 4adff8e3..77ccf1da 100644 --- a/lib/representation/src/cats/maps/in_memory.rs +++ b/lib/representation/src/cats/maps/in_memory.rs @@ -9,7 +9,6 @@ use std::collections::{BTreeMap, HashMap, HashSet}; use std::fmt::{Display, Formatter}; use std::hash::BuildHasherDefault; use std::sync::Arc; -use std::time::Instant; #[derive(Debug, Clone, Ord, Eq, PartialEq, PartialOrd)] pub struct PrefixCompressedString { @@ -252,7 +251,6 @@ impl PrefixCompressedCatMapsInMemory { } pub fn merge(&mut self, other: &PrefixCompressedCatMapsInMemory, c: &mut u32) -> CatReEnc { - let remap_insert_now = Instant::now(); let (remap, insert): (Vec<_>, Vec<_>) = other .map .par_iter() @@ -264,8 +262,6 @@ impl PrefixCompressedCatMapsInMemory { } }) .unzip(); - println!("remap_insert_now took: {:?}", remap_insert_now.elapsed()); - let mut remap: DashMap<_, _, BuildHasherDefault>> = remap .into_par_iter() .filter(|x| x.is_some()) diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index ceffabf7..cd7981e9 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -1,11 +1,11 @@ mod eu; -use quick_protobuf::{serialize_into_vec, BytesReader, MessageWrite, Writer}; +use quick_protobuf::{serialize_into_vec, BytesReader, Writer}; use std::borrow::Cow; use std::cmp; use std::collections::{HashMap, HashSet}; use std::io::Write; use std::sync::Arc; - +use std::time::Instant; use super::{TriplesToAdd, Triplestore}; use crate::errors::TriplestoreError; use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind; @@ -22,9 +22,10 @@ use oxrdf::NamedNode; use polars::prelude::{as_struct, col, IntoLazy, LiteralValue, PlSmallStr}; use polars_core::datatypes::UInt32Chunked; use polars_core::frame::DataFrame; -use polars_core::prelude::{Column, IntoColumn, LhsNumOps, Scalar}; +use polars_core::prelude::{Column, IntoColumn, Scalar}; use polars_core::POOL; -use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; +use tracing::trace; use representation::cats::maps::in_memory::{ CatMapsInMemory, PrefixCompressedCatMapsInMemory, PrefixCompressedString, UncompressedCatMapsInMemory, @@ -44,10 +45,11 @@ use representation::{ }; const JELLY_FRAME_SIZE: usize = 1024; -const LANG_STRING_U32: u32 = u32::MAX - 1; -const IRI_U32: u32 = 0; -const BLANK_U32: u32 = u32::MAX; +const IRI_U32: u32 = u32::MAX; +const BLANK_U32: u32 = u32::MAX -1 ; const STRING_U32: u32 = u32::MAX - 2; +const LANG_STRING_U32: u32 = u32::MAX - 3; + impl Triplestore { pub fn parse_jelly( @@ -90,9 +92,12 @@ impl Triplestore { } else { let pi = iri_map.len() as u32; iri_map.insert(pred.clone(), pi); - iri_rev_map.insert(pi, pred); pi }; + if !iri_rev_map.contains_key(&pred_iri_u32) { + iri_rev_map.insert(pred_iri_u32, pred); + } + let subject_type_map = if let Some(sm) = predicate_map.get_mut(&pred_iri_u32) { sm @@ -189,7 +194,6 @@ impl Triplestore { value = LiteralValue::Scalar(Scalar::from( PlSmallStr::from_string(l.lex.to_string()), )); - LANG_STRING_U32 } OneOfliteralKind::datatype(t) => { @@ -465,37 +469,34 @@ impl Triplestore { } else { blank_reenc }; + let mut lf = df.lazy(); if let Some(reenc) = sub_reenc { - df = reenc - .re_encode(df.lazy(), SUBJECT_COL_NAME, false) - .collect() - .map_err(|e| { - TriplestoreError::ReadJellyError(format!( - "Error remapping subject column: {}", - e - )) - })?; + lf = reenc.clone() + .re_encode(lf, SUBJECT_COL_NAME, false); } - if object_type.is_iri() || object_type.is_blank_node() { - let obj_reenc = if object_type.is_iri() { + let obj_reenc = if object_type.is_iri() || object_type.is_blank_node() { + if object_type.is_iri() { iri_reenc } else { blank_reenc - }; - if let Some(reenc) = obj_reenc { - df = reenc.clone() - .re_encode(df.lazy(), OBJECT_COL_NAME, false) - .collect() - .map_err(|e| { - TriplestoreError::ReadJellyError(format!( - "Error remapping object column: {}", - e - )) - })?; } + } else { + None + }; + + if let Some(reenc) = obj_reenc { + lf = reenc.clone().re_encode(lf, OBJECT_COL_NAME, false); } + df = lf.collect() + .map_err(|e| { + TriplestoreError::ReadJellyError(format!( + "Error remapping: {}", + e + )) + })?; + let object_cat_state = if object_type.is_iri() || object_type.is_blank_node() { BaseCatState::CategoricalNative(false, None) } else { @@ -515,7 +516,9 @@ impl Triplestore { } } } + let start_add_triples_vec = Instant::now(); self.add_triples_vec(triples_to_add, false)?; + trace!("Adding triples vec took {}", start_add_triples_vec.elapsed().as_secs_f32()); Ok(()) } From c54008085651e3a40af093e37307b82750ee203e Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Wed, 11 Mar 2026 14:19:42 +0100 Subject: [PATCH 17/19] parse_jelly sends to correct graph --- lib/triplestore/src/jelly.rs | 2 +- py_maplib/tests/test_jelly.py | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index cd7981e9..568d9906 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -507,7 +507,7 @@ impl Triplestore { subject_type: subject_type.clone(), object_type, predicate: Some(predicate.clone()), - graph: Default::default(), + graph: graph.clone(), subject_cat_state: BaseCatState::CategoricalNative(false, None), object_cat_state, predicate_cat_state: None, diff --git a/py_maplib/tests/test_jelly.py b/py_maplib/tests/test_jelly.py index e443962c..01c76c29 100644 --- a/py_maplib/tests/test_jelly.py +++ b/py_maplib/tests/test_jelly.py @@ -61,4 +61,27 @@ def test_read_jelly(): print("Expected DataFrame:") print(expected) - assert_frame_equal(df, expected) \ No newline at end of file + assert_frame_equal(df, expected) + +def test_jelly_correct_graph(): + m = Model() + m.read(TESTDATA_PATH / "sunspots.ttl") + filename = TESTDATA_PATH / "output.jelly" + m.write(filename, format="jelly") + + m2 = Model() + m2.read(filename, format="jelly", graph="http://example.net/mygraph") + + df = m2.query(""" + SELECT * WHERE { + GRAPH { + ?a ?b ?c . + } + } + """) + assert df.height > 0 + + df2 = m2.query(""" + SELECT ?s ?p ?o WHERE { ?s ?p ?o . } ORDER BY ?s ?p ?o + """) + assert df2.height == 0 \ No newline at end of file From 821993ede7f74da4127204d6b4f82926458e9478 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Fri, 13 Mar 2026 10:22:34 +0100 Subject: [PATCH 18/19] renc_map par_extend and iri_id bloomfilter --- Cargo.lock | 18 +++++++++-- Cargo.toml | 1 + lib/query_processing/src/cats.rs | 3 +- lib/triplestore/Cargo.toml | 1 + lib/triplestore/src/jelly.rs | 55 +++++++++++++++++++------------- 5 files changed, 53 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 43918296..e684a0a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -678,6 +678,19 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" +[[package]] +name = "fastbloom" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef975e30683b2d965054bb0a836f8973857c4ebf6acf274fe46617cd285060d8" +dependencies = [ + "foldhash 0.2.0", + "libm", + "portable-atomic", + "rand", + "siphasher", +] + [[package]] name = "file_io" version = "0.5.0" @@ -2388,9 +2401,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" @@ -3640,6 +3653,7 @@ version = "0.5.0" dependencies = [ "aho-corasick", "cimxml_import", + "fastbloom", "file_io", "fts", "itoa", diff --git a/Cargo.toml b/Cargo.toml index 0ca979ab..a678fcf7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,6 +70,7 @@ tracing-log = "0.2" itoa = "1.0.15" ryu = "1.0.20" dashmap = { version = "7.0.0-rc2", features = ["rayon"] } +fastbloom = "0.17.0" #dev-dependencies nohash-hasher = "0.2.0" diff --git a/lib/query_processing/src/cats.rs b/lib/query_processing/src/cats.rs index aa3b54dd..2a5cad94 100644 --- a/lib/query_processing/src/cats.rs +++ b/lib/query_processing/src/cats.rs @@ -7,6 +7,7 @@ use std::hash::BuildHasherDefault; use std::ops::Deref; use std::sync::Arc; use dashmap::DashMap; +use rayon::iter::{IntoParallelRefIterator, ParallelExtend, ParallelIterator}; pub fn create_compatible_cats( expressions: Vec>, @@ -62,7 +63,7 @@ pub fn create_compatible_cats( let mut renc_map = DashMap::with_capacity_and_hasher(2, BuildHasherDefault::default()); for renc in iri_renc { - renc_map.extend(renc.cat_map.iter().map(|x| (*x.key(), *x.value()))) + renc_map.par_extend(renc.cat_map.par_iter().map(|x| (*x.key(), *x.value()))) } Some(CatReEnc { cat_map: Arc::new(renc_map), diff --git a/lib/triplestore/Cargo.toml b/lib/triplestore/Cargo.toml index 9a9b3b85..de90745f 100644 --- a/lib/triplestore/Cargo.toml +++ b/lib/triplestore/Cargo.toml @@ -37,6 +37,7 @@ aho-corasick.workspace = true simd-json.workspace = true serde_json.workspace = true quick-protobuf.workspace = true +fastbloom.workspace = true pyo3 = { workspace = true, optional = true } diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index 568d9906..5e2c9290 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -1,11 +1,4 @@ mod eu; -use quick_protobuf::{serialize_into_vec, BytesReader, Writer}; -use std::borrow::Cow; -use std::cmp; -use std::collections::{HashMap, HashSet}; -use std::io::Write; -use std::sync::Arc; -use std::time::Instant; use super::{TriplesToAdd, Triplestore}; use crate::errors::TriplestoreError; use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::mod_RdfLiteral::OneOfliteralKind; @@ -17,6 +10,7 @@ use crate::jelly::eu::ostrzyciel::jelly::core::proto::v1::{ LogicalStreamType, PhysicalStreamType, RdfDatatypeEntry, RdfIri, RdfLiteral, RdfNameEntry, RdfPrefixEntry, RdfStreamFrame, RdfStreamOptions, RdfStreamRow, RdfTriple, }; +use fastbloom::BloomFilter; use oxrdf::vocab::{rdf, xsd}; use oxrdf::NamedNode; use polars::prelude::{as_struct, col, IntoLazy, LiteralValue, PlSmallStr}; @@ -24,8 +18,8 @@ use polars_core::datatypes::UInt32Chunked; use polars_core::frame::DataFrame; use polars_core::prelude::{Column, IntoColumn, Scalar}; use polars_core::POOL; +use quick_protobuf::{serialize_into_vec, BytesReader, Writer}; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; -use tracing::trace; use representation::cats::maps::in_memory::{ CatMapsInMemory, PrefixCompressedCatMapsInMemory, PrefixCompressedString, UncompressedCatMapsInMemory, @@ -43,14 +37,20 @@ use representation::{ BaseRDFNodeType, LANG_STRING_LANG_FIELD, LANG_STRING_VALUE_FIELD, OBJECT_COL_NAME, SUBJECT_COL_NAME, }; +use std::borrow::Cow; +use std::cmp; +use std::collections::{HashMap, HashSet}; +use std::io::Write; +use std::sync::Arc; +use std::time::Instant; +use tracing::trace; const JELLY_FRAME_SIZE: usize = 1024; const IRI_U32: u32 = u32::MAX; -const BLANK_U32: u32 = u32::MAX -1 ; +const BLANK_U32: u32 = u32::MAX - 1; const STRING_U32: u32 = u32::MAX - 2; const LANG_STRING_U32: u32 = u32::MAX - 3; - impl Triplestore { pub fn parse_jelly( &mut self, @@ -65,6 +65,7 @@ impl Triplestore { })?; let mut prefix_map: HashMap> = Default::default(); let mut name_map: HashMap> = Default::default(); + let mut b_filter = BloomFilter::with_false_pos(0.001).expected_items(1000000); let mut iri_map: HashMap<(u32, u32), u32> = HashMap::new(); let mut iri_rev_map: HashMap = HashMap::new(); let mut blank_map: HashMap = HashMap::new(); @@ -73,6 +74,7 @@ impl Triplestore { u32, HashMap, Vec, Vec)>>, > = Default::default(); + let mut filter_total = std::time::Duration::ZERO; while !reader.is_eof() { let frame: RdfStreamFrame = reader.read_message(slice).map_err(|x| { TriplestoreError::ReadJellyError(format!("Error reading row: {}", x)) @@ -158,13 +160,23 @@ impl Triplestore { object_map.get_mut(&IRI_U32).expect("Just inserted") }; let k = (i.prefix_id, i.name_id); - let iri_id = if let Some(iri_id) = iri_map.get(&k) { - *iri_id + let filer_now = Instant::now(); + let iri_id = if b_filter.contains(&k) { + if let Some(iri_id) = iri_map.get(&k) { + *iri_id + } else { + let v = iri_map.len() as u32; + iri_map.insert(k, v); + b_filter.insert(&k); + v + } } else { let v = iri_map.len() as u32; iri_map.insert(k, v); + b_filter.insert(&k); v }; + filter_total += filer_now.elapsed(); (LiteralValue::Scalar(Scalar::from(iri_id)), None, vecs) } OneOfobject::o_bnode(b) => { @@ -315,6 +327,7 @@ impl Triplestore { ); } } + println!("filter total: {}", filter_total.as_secs_f32()); let iri_cat_enc = CatEncs { maps: CatMaps::InMemory(CatMapsInMemory::Compressed(iri_cat_enc)), }; @@ -471,8 +484,7 @@ impl Triplestore { }; let mut lf = df.lazy(); if let Some(reenc) = sub_reenc { - lf = reenc.clone() - .re_encode(lf, SUBJECT_COL_NAME, false); + lf = reenc.clone().re_encode(lf, SUBJECT_COL_NAME, false); } let obj_reenc = if object_type.is_iri() || object_type.is_blank_node() { @@ -489,13 +501,9 @@ impl Triplestore { lf = reenc.clone().re_encode(lf, OBJECT_COL_NAME, false); } - df = lf.collect() - .map_err(|e| { - TriplestoreError::ReadJellyError(format!( - "Error remapping: {}", - e - )) - })?; + df = lf.collect().map_err(|e| { + TriplestoreError::ReadJellyError(format!("Error remapping: {}", e)) + })?; let object_cat_state = if object_type.is_iri() || object_type.is_blank_node() { BaseCatState::CategoricalNative(false, None) @@ -518,7 +526,10 @@ impl Triplestore { } let start_add_triples_vec = Instant::now(); self.add_triples_vec(triples_to_add, false)?; - trace!("Adding triples vec took {}", start_add_triples_vec.elapsed().as_secs_f32()); + trace!( + "Adding triples vec took {}", + start_add_triples_vec.elapsed().as_secs_f32() + ); Ok(()) } From 95d592a38028dafb6b4c6b9d1a470b7c28dfa4a9 Mon Sep 17 00:00:00 2001 From: Sindre Novi Date: Fri, 13 Mar 2026 10:25:41 +0100 Subject: [PATCH 19/19] removed filter timing --- lib/triplestore/src/jelly.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/triplestore/src/jelly.rs b/lib/triplestore/src/jelly.rs index 5e2c9290..e7c8f71d 100644 --- a/lib/triplestore/src/jelly.rs +++ b/lib/triplestore/src/jelly.rs @@ -74,7 +74,6 @@ impl Triplestore { u32, HashMap, Vec, Vec)>>, > = Default::default(); - let mut filter_total = std::time::Duration::ZERO; while !reader.is_eof() { let frame: RdfStreamFrame = reader.read_message(slice).map_err(|x| { TriplestoreError::ReadJellyError(format!("Error reading row: {}", x)) @@ -160,7 +159,6 @@ impl Triplestore { object_map.get_mut(&IRI_U32).expect("Just inserted") }; let k = (i.prefix_id, i.name_id); - let filer_now = Instant::now(); let iri_id = if b_filter.contains(&k) { if let Some(iri_id) = iri_map.get(&k) { *iri_id @@ -176,7 +174,6 @@ impl Triplestore { b_filter.insert(&k); v }; - filter_total += filer_now.elapsed(); (LiteralValue::Scalar(Scalar::from(iri_id)), None, vecs) } OneOfobject::o_bnode(b) => { @@ -327,7 +324,6 @@ impl Triplestore { ); } } - println!("filter total: {}", filter_total.as_secs_f32()); let iri_cat_enc = CatEncs { maps: CatMaps::InMemory(CatMapsInMemory::Compressed(iri_cat_enc)), };