Skip to content

Commit 0201870

Browse files
authored
Merge pull request #642 from anusii/tony/628_turtleToTripleMap
Tony/628_turtleToTripleMap
2 parents e1f3633 + 74479a0 commit 0201870

3 files changed

Lines changed: 255 additions & 18 deletions

File tree

example/pubspec.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ dependencies:
1414
intl: any
1515
markdown_tooltip: ^0.0.10
1616
rdflib: ^0.2.12
17-
solidpod: ^0.12.0
18-
solidui: ^0.3.22
17+
solidpod: ^0.12.5
18+
solidui: ^0.3.43
1919
universal_io: ^2.3.1
2020
window_manager: ^0.5.1
2121

lib/src/solid/utils/rdf.dart

Lines changed: 252 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/// Utilities for working on Turtle (Terse RDF Triple Language) formated string.
22
///
3-
/// Copyright (C) 2024, Software Innovation Institute, ANU.
3+
/// Copyright (C) 2024-2026, Software Innovation Institute, ANU.
44
///
55
/// Licensed under the MIT License (the "License").
66
///
@@ -24,47 +24,280 @@
2424
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2525
// SOFTWARE.
2626
///
27-
/// Authors: Dawei Chen
27+
/// Authors: Dawei Chen, Tony Chen
2828
2929
library;
3030

31+
import 'package:petitparser/petitparser.dart';
3132
import 'package:rdflib/rdflib.dart';
3233

3334
import 'package:solidpod/src/solid/constants/common.dart';
3435
import 'package:solidpod/src/solid/constants/schema.dart';
3536

37+
// Sentinel characters used to mark string literal boundaries during parsing.
38+
39+
const String _literalStart = '\u0002';
40+
const String _literalEnd = '\u0003';
41+
42+
// Parser definition that wraps every STRING literal with the sentinel
43+
// characters so literals remain distinguishable from prefixed names when the
44+
// parse tree is walked.
45+
//
46+
// The overridden method names use the SCREAMING_SNAKE_CASE convention defined
47+
// by the W3C Turtle grammar and inherited from rdflib's [EvaluatorDefinition],
48+
// so the `non_constant_identifier_names` lint is intentionally ignored.
49+
50+
// ignore_for_file: non_constant_identifier_names
51+
class _LiteralPreservingEvaluator extends EvaluatorDefinition {
52+
@override
53+
Parser STRING_LITERAL_QUOTE() => super
54+
.STRING_LITERAL_QUOTE()
55+
.map((value) => '$_literalStart$value$_literalEnd');
56+
57+
@override
58+
Parser STRING_LITERAL_SINGLE_QUOTE() => super
59+
.STRING_LITERAL_SINGLE_QUOTE()
60+
.map((value) => '$_literalStart$value$_literalEnd');
61+
62+
@override
63+
Parser STRING_LITERAL_LONG_QUOTE() => super
64+
.STRING_LITERAL_LONG_QUOTE()
65+
.map((value) => '$_literalStart$value$_literalEnd');
66+
67+
@override
68+
Parser STRING_LITERAL_LONG_SINGLE_QUOTE() => super
69+
.STRING_LITERAL_LONG_SINGLE_QUOTE()
70+
.map((value) => '$_literalStart$value$_literalEnd');
71+
}
72+
73+
// Build the literal-preserving Turtle parser once and reuse it for every call
74+
// to [turtleToTripleMap].
75+
76+
final Parser _turtleParser = _LiteralPreservingEvaluator().build();
77+
3678
/// Parse the Turtle string into triples stored in a map:
3779
/// {subject: {predicate: object(s)}
3880
/// - subject: URIRef String
3981
/// - predicate: URIRef String
4082
/// - object: dynamic
83+
4184
Map<String, Map<String, dynamic>> turtleToTripleMap(String turtleStr) {
42-
final g = Graph();
43-
g.parseTurtle(turtleStr);
85+
final preprocessed = _preprocessLongLiterals(turtleStr);
86+
final stripped = _stripComments(preprocessed);
87+
88+
final result = _turtleParser.parse(stripped);
89+
if (result is! Success || result.value is! List) {
90+
return <String, Map<String, dynamic>>{};
91+
}
92+
93+
final ast = result.value as List;
94+
final prefixes = <String, String>{};
95+
String? baseIri;
4496
final tripleMap = <String, Map<String, dynamic>>{};
45-
for (final t in g.triples) {
46-
final sub = t.sub.value as String;
47-
final pre = t.pre.value as String;
48-
final obj = t.obj.value;
49-
if (tripleMap.containsKey(sub)) {
50-
if (tripleMap[sub]!.containsKey(pre)) {
51-
final objs = tripleMap[sub]![pre]!;
52-
tripleMap[sub]![pre] = objs is List ? objs + [obj] : [objs, obj];
53-
} else {
54-
tripleMap[sub]![pre] = obj;
97+
98+
for (final statement in ast) {
99+
if (statement is! List || statement.isEmpty) continue;
100+
101+
final head = statement[0];
102+
103+
// Prefix and base directives are matched case-insensitively so that
104+
// both `@prefix` / `@base` and the SPARQL `PREFIX` / `BASE` keywords
105+
// are recognised.
106+
107+
if (head is String) {
108+
final keyword = head.toLowerCase();
109+
if (keyword == '@prefix' || keyword == 'prefix') {
110+
if (statement.length >= 3) {
111+
final ns = statement[1] as String;
112+
prefixes[ns] = _stripAngleBrackets(statement[2] as String);
113+
}
114+
continue;
115+
}
116+
if (keyword == '@base' || keyword == 'base') {
117+
if (statement.length >= 2) {
118+
baseIri = _stripAngleBrackets(statement[1] as String);
119+
}
120+
continue;
121+
}
122+
}
123+
124+
if (head is List && head.length >= 2) {
125+
final subjectRaw = head[0];
126+
final predicateObjectList = head[1];
127+
if (subjectRaw is! String || predicateObjectList is! List) continue;
128+
129+
final subject = _expandIri(subjectRaw, prefixes, baseIri);
130+
final subjectEntry =
131+
tripleMap.putIfAbsent(subject, () => <String, dynamic>{});
132+
133+
for (final predicateObject in predicateObjectList) {
134+
if (predicateObject is! List || predicateObject.length < 2) continue;
135+
136+
final predicateRaw = predicateObject[0];
137+
final objectList = predicateObject[1];
138+
if (predicateRaw is! String || objectList is! List) continue;
139+
140+
final predicate = _expandPredicate(predicateRaw, prefixes, baseIri);
141+
142+
for (final obj in objectList) {
143+
final value = _convertObject(obj, prefixes, baseIri);
144+
if (value == null) continue;
145+
146+
if (subjectEntry.containsKey(predicate)) {
147+
final existing = subjectEntry[predicate];
148+
subjectEntry[predicate] =
149+
existing is List ? existing + [value] : [existing, value];
150+
} else {
151+
subjectEntry[predicate] = value;
152+
}
153+
}
55154
}
56-
} else {
57-
tripleMap[sub] = {pre: obj};
58155
}
59156
}
157+
60158
return tripleMap;
61159
}
62160

161+
// Resolve a predicate token: 'a' is shorthand for rdf:type.
162+
163+
String _expandPredicate(
164+
String raw,
165+
Map<String, String> prefixes,
166+
String? base,
167+
) {
168+
if (raw.trim() == 'a') {
169+
return 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
170+
}
171+
return _expandIri(raw, prefixes, base);
172+
}
173+
174+
// Expand an IRI-like token (IRIREF, PrefixedName, blank node, or relative
175+
// IRI) into a full IRI string suitable for use as a map key.
176+
177+
String _expandIri(String raw, Map<String, String> prefixes, String? base) {
178+
final trimmed = raw.trim();
179+
180+
if (trimmed.startsWith('<') && trimmed.endsWith('>')) {
181+
return trimmed.substring(1, trimmed.length - 1);
182+
}
183+
184+
if (trimmed.startsWith('_:')) {
185+
return trimmed;
186+
}
187+
188+
if (trimmed.startsWith(':')) {
189+
final nsValue = prefixes[':'] ?? base ?? '';
190+
return '$nsValue${trimmed.substring(1)}';
191+
}
192+
193+
final colonIdx = trimmed.indexOf(':');
194+
if (colonIdx > 0) {
195+
final ns = trimmed.substring(0, colonIdx + 1);
196+
final localName = trimmed.substring(colonIdx + 1);
197+
final nsValue = prefixes[ns];
198+
if (nsValue != null) {
199+
return '$nsValue$localName';
200+
}
201+
}
202+
203+
return trimmed;
204+
}
205+
206+
// Convert a parsed object token into its serialised value. String literals
207+
// are returned without their sentinels and without any trailing lang tag or
208+
// datatype IRI, matching the historical behaviour of using `Literal.value`.
209+
210+
dynamic _convertObject(
211+
dynamic obj,
212+
Map<String, String> prefixes,
213+
String? base,
214+
) {
215+
if (obj is! String) {
216+
return obj.toString();
217+
}
218+
219+
if (obj.startsWith(_literalStart)) {
220+
final end = obj.indexOf(_literalEnd);
221+
if (end < 0) {
222+
return obj.substring(_literalStart.length);
223+
}
224+
return obj.substring(_literalStart.length, end);
225+
}
226+
227+
final trimmed = obj.trim();
228+
229+
if (trimmed.startsWith('<') && trimmed.endsWith('>')) {
230+
return trimmed.substring(1, trimmed.length - 1);
231+
}
232+
233+
if (trimmed.startsWith('_:')) {
234+
return trimmed;
235+
}
236+
237+
// Numeric and boolean literals are returned as their original lexical form.
238+
239+
if (trimmed == 'true' || trimmed == 'false') {
240+
return trimmed;
241+
}
242+
if (double.tryParse(trimmed) != null) {
243+
return trimmed;
244+
}
245+
246+
if (trimmed.contains(':')) {
247+
return _expandIri(trimmed, prefixes, base);
248+
}
249+
250+
return trimmed;
251+
}
252+
253+
String _stripAngleBrackets(String token) {
254+
if (token.length >= 2 && token.startsWith('<') && token.endsWith('>')) {
255+
return token.substring(1, token.length - 1);
256+
}
257+
return token;
258+
}
259+
260+
// Remove Turtle comments. Mirrors rdflib's behaviour: a line starting with
261+
// `#` is dropped entirely, and an inline ` # ` comment is trimmed to the end
262+
// of the line. Comments inside string literals are not stripped, as the
263+
// pattern only matches ` # ` preceded by whitespace, which cannot appear
264+
// inside an unbroken single-line literal.
265+
266+
String _stripComments(String content) {
267+
final lines = content.split('\n');
268+
final buffer = StringBuffer();
269+
final inlineCommentRe = RegExp(r'\s+#\s.*$');
270+
for (final line in lines) {
271+
if (line.trimLeft().startsWith('#')) continue;
272+
buffer.write(line.replaceFirst(inlineCommentRe, ''));
273+
buffer.write('\n');
274+
}
275+
return buffer.toString();
276+
}
277+
278+
// Collapse a `"""..."""` (or `'''...'''`) long literal onto a single line so
279+
// the line-based grammar can match it. Mirrors the preprocessing performed
280+
// by rdflib's [Graph.parseTurtle].
281+
282+
String _preprocessLongLiterals(String content) {
283+
final regex = RegExp(
284+
content.contains("'''") ? r"'''(.*?)'''" : r'"""(.*?)"""',
285+
dotAll: true,
286+
);
287+
return content.replaceAllMapped(regex, (match) {
288+
var inner = match.group(1)!;
289+
inner = inner.replaceAll('\n', r'\n');
290+
inner = inner.replaceAll('"', r'\"');
291+
return '"$inner"';
292+
});
293+
}
294+
63295
/// Generate Turtle string from triples stored in a map:
64296
/// {subject: {predicate: {object}}}
65297
/// - subject: URIRef String
66298
/// - predicate: URIRef String
67299
/// - object: {dynamic}
300+
68301
String tripleMapToTurtle(
69302
Map<URIRef, Map<URIRef, dynamic>> triples, {
70303
Map<String, Namespace>? bindNamespaces,
@@ -121,6 +354,7 @@ String tripleMapToTurtle(
121354
// predicate. Also the function extract() can be removed when we have properly
122355
// defined our namespaces
123356
/// Parse TTL content into a map {subject: {predicate: {objects}}}
357+
124358
Map<String, dynamic> parseTTLMap(String ttlContent) {
125359
final g = Graph();
126360
g.parseTurtle(ttlContent);
@@ -145,6 +379,7 @@ Map<String, dynamic> parseTTLMap(String ttlContent) {
145379
}
146380

147381
/// Parse ACL content into a map {subject: {predicate: object}}
382+
148383
Map<String, dynamic> parseACL(String aclContent) {
149384
final g = Graph();
150385
g.parseTurtle(aclContent);
@@ -176,6 +411,7 @@ Map<String, dynamic> parseACL(String aclContent) {
176411
}
177412

178413
/// Generate permission log file content
414+
179415
String genPermLogTTLStr(String resourceUrl) => tripleMapToTurtle(
180416
{
181417
URIRef(resourceUrl): {

pubspec.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ dependencies:
3030
mime: ^2.0.0
3131
package_info_plus: ^9.0.0
3232
path: ^1.9.1
33+
petitparser: ^6.1.0
3334
pointycastle: ^4.0.0
3435
rdflib: ^0.2.12
3536
solid_auth: ^0.1.29

0 commit comments

Comments
 (0)