11/// Utilities for working on Turtle (Terse RDF Triple Language) formated string.
22///
3- /// Copyright (C) 2024, Software Innovation Institute, ANU.
3+ /// Copyright (C) 2024-2026 , Software Innovation Institute, ANU.
44///
55/// Licensed under the MIT License (the "License").
66///
2424// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2525// SOFTWARE.
2626///
27- /// Authors: Dawei Chen
27+ /// Authors: Dawei Chen, Tony Chen
2828
2929library ;
3030
31+ import 'package:petitparser/petitparser.dart' ;
3132import 'package:rdflib/rdflib.dart' ;
3233
3334import 'package:solidpod/src/solid/constants/common.dart' ;
3435import 'package:solidpod/src/solid/constants/schema.dart' ;
3536
37+ // Sentinel characters used to mark string literal boundaries during parsing.
38+
39+ const String _literalStart = '\u 0002' ;
40+ const String _literalEnd = '\u 0003' ;
41+
42+ // Parser definition that wraps every STRING literal with the sentinel
43+ // characters so literals remain distinguishable from prefixed names when the
44+ // parse tree is walked.
45+ //
46+ // The overridden method names use the SCREAMING_SNAKE_CASE convention defined
47+ // by the W3C Turtle grammar and inherited from rdflib's [EvaluatorDefinition],
48+ // so the `non_constant_identifier_names` lint is intentionally ignored.
49+
50+ // ignore_for_file: non_constant_identifier_names
51+ class _LiteralPreservingEvaluator extends EvaluatorDefinition {
52+ @override
53+ Parser STRING_LITERAL_QUOTE () => super
54+ .STRING_LITERAL_QUOTE ()
55+ .map ((value) => '$_literalStart $value $_literalEnd ' );
56+
57+ @override
58+ Parser STRING_LITERAL_SINGLE_QUOTE () => super
59+ .STRING_LITERAL_SINGLE_QUOTE ()
60+ .map ((value) => '$_literalStart $value $_literalEnd ' );
61+
62+ @override
63+ Parser STRING_LITERAL_LONG_QUOTE () => super
64+ .STRING_LITERAL_LONG_QUOTE ()
65+ .map ((value) => '$_literalStart $value $_literalEnd ' );
66+
67+ @override
68+ Parser STRING_LITERAL_LONG_SINGLE_QUOTE () => super
69+ .STRING_LITERAL_LONG_SINGLE_QUOTE ()
70+ .map ((value) => '$_literalStart $value $_literalEnd ' );
71+ }
72+
73+ // Build the literal-preserving Turtle parser once and reuse it for every call
74+ // to [turtleToTripleMap].
75+
76+ final Parser _turtleParser = _LiteralPreservingEvaluator ().build ();
77+
3678/// Parse the Turtle string into triples stored in a map:
3779/// {subject: {predicate: object(s)}
3880/// - subject: URIRef String
3981/// - predicate: URIRef String
4082/// - object: dynamic
83+
4184Map <String , Map <String , dynamic >> turtleToTripleMap (String turtleStr) {
42- final g = Graph ();
43- g.parseTurtle (turtleStr);
85+ final preprocessed = _preprocessLongLiterals (turtleStr);
86+ final stripped = _stripComments (preprocessed);
87+
88+ final result = _turtleParser.parse (stripped);
89+ if (result is ! Success || result.value is ! List ) {
90+ return < String , Map <String , dynamic >> {};
91+ }
92+
93+ final ast = result.value as List ;
94+ final prefixes = < String , String > {};
95+ String ? baseIri;
4496 final tripleMap = < String , Map <String , dynamic >> {};
45- for (final t in g.triples) {
46- final sub = t.sub.value as String ;
47- final pre = t.pre.value as String ;
48- final obj = t.obj.value;
49- if (tripleMap.containsKey (sub)) {
50- if (tripleMap[sub]! .containsKey (pre)) {
51- final objs = tripleMap[sub]! [pre]! ;
52- tripleMap[sub]! [pre] = objs is List ? objs + [obj] : [objs, obj];
53- } else {
54- tripleMap[sub]! [pre] = obj;
97+
98+ for (final statement in ast) {
99+ if (statement is ! List || statement.isEmpty) continue ;
100+
101+ final head = statement[0 ];
102+
103+ // Prefix and base directives are matched case-insensitively so that
104+ // both `@prefix` / `@base` and the SPARQL `PREFIX` / `BASE` keywords
105+ // are recognised.
106+
107+ if (head is String ) {
108+ final keyword = head.toLowerCase ();
109+ if (keyword == '@prefix' || keyword == 'prefix' ) {
110+ if (statement.length >= 3 ) {
111+ final ns = statement[1 ] as String ;
112+ prefixes[ns] = _stripAngleBrackets (statement[2 ] as String );
113+ }
114+ continue ;
115+ }
116+ if (keyword == '@base' || keyword == 'base' ) {
117+ if (statement.length >= 2 ) {
118+ baseIri = _stripAngleBrackets (statement[1 ] as String );
119+ }
120+ continue ;
121+ }
122+ }
123+
124+ if (head is List && head.length >= 2 ) {
125+ final subjectRaw = head[0 ];
126+ final predicateObjectList = head[1 ];
127+ if (subjectRaw is ! String || predicateObjectList is ! List ) continue ;
128+
129+ final subject = _expandIri (subjectRaw, prefixes, baseIri);
130+ final subjectEntry =
131+ tripleMap.putIfAbsent (subject, () => < String , dynamic > {});
132+
133+ for (final predicateObject in predicateObjectList) {
134+ if (predicateObject is ! List || predicateObject.length < 2 ) continue ;
135+
136+ final predicateRaw = predicateObject[0 ];
137+ final objectList = predicateObject[1 ];
138+ if (predicateRaw is ! String || objectList is ! List ) continue ;
139+
140+ final predicate = _expandPredicate (predicateRaw, prefixes, baseIri);
141+
142+ for (final obj in objectList) {
143+ final value = _convertObject (obj, prefixes, baseIri);
144+ if (value == null ) continue ;
145+
146+ if (subjectEntry.containsKey (predicate)) {
147+ final existing = subjectEntry[predicate];
148+ subjectEntry[predicate] =
149+ existing is List ? existing + [value] : [existing, value];
150+ } else {
151+ subjectEntry[predicate] = value;
152+ }
153+ }
55154 }
56- } else {
57- tripleMap[sub] = {pre: obj};
58155 }
59156 }
157+
60158 return tripleMap;
61159}
62160
161+ // Resolve a predicate token: 'a' is shorthand for rdf:type.
162+
163+ String _expandPredicate (
164+ String raw,
165+ Map <String , String > prefixes,
166+ String ? base ,
167+ ) {
168+ if (raw.trim () == 'a' ) {
169+ return 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' ;
170+ }
171+ return _expandIri (raw, prefixes, base );
172+ }
173+
174+ // Expand an IRI-like token (IRIREF, PrefixedName, blank node, or relative
175+ // IRI) into a full IRI string suitable for use as a map key.
176+
177+ String _expandIri (String raw, Map <String , String > prefixes, String ? base ) {
178+ final trimmed = raw.trim ();
179+
180+ if (trimmed.startsWith ('<' ) && trimmed.endsWith ('>' )) {
181+ return trimmed.substring (1 , trimmed.length - 1 );
182+ }
183+
184+ if (trimmed.startsWith ('_:' )) {
185+ return trimmed;
186+ }
187+
188+ if (trimmed.startsWith (':' )) {
189+ final nsValue = prefixes[':' ] ?? base ?? '' ;
190+ return '$nsValue ${trimmed .substring (1 )}' ;
191+ }
192+
193+ final colonIdx = trimmed.indexOf (':' );
194+ if (colonIdx > 0 ) {
195+ final ns = trimmed.substring (0 , colonIdx + 1 );
196+ final localName = trimmed.substring (colonIdx + 1 );
197+ final nsValue = prefixes[ns];
198+ if (nsValue != null ) {
199+ return '$nsValue $localName ' ;
200+ }
201+ }
202+
203+ return trimmed;
204+ }
205+
206+ // Convert a parsed object token into its serialised value. String literals
207+ // are returned without their sentinels and without any trailing lang tag or
208+ // datatype IRI, matching the historical behaviour of using `Literal.value`.
209+
210+ dynamic _convertObject (
211+ dynamic obj,
212+ Map <String , String > prefixes,
213+ String ? base ,
214+ ) {
215+ if (obj is ! String ) {
216+ return obj.toString ();
217+ }
218+
219+ if (obj.startsWith (_literalStart)) {
220+ final end = obj.indexOf (_literalEnd);
221+ if (end < 0 ) {
222+ return obj.substring (_literalStart.length);
223+ }
224+ return obj.substring (_literalStart.length, end);
225+ }
226+
227+ final trimmed = obj.trim ();
228+
229+ if (trimmed.startsWith ('<' ) && trimmed.endsWith ('>' )) {
230+ return trimmed.substring (1 , trimmed.length - 1 );
231+ }
232+
233+ if (trimmed.startsWith ('_:' )) {
234+ return trimmed;
235+ }
236+
237+ // Numeric and boolean literals are returned as their original lexical form.
238+
239+ if (trimmed == 'true' || trimmed == 'false' ) {
240+ return trimmed;
241+ }
242+ if (double .tryParse (trimmed) != null ) {
243+ return trimmed;
244+ }
245+
246+ if (trimmed.contains (':' )) {
247+ return _expandIri (trimmed, prefixes, base );
248+ }
249+
250+ return trimmed;
251+ }
252+
253+ String _stripAngleBrackets (String token) {
254+ if (token.length >= 2 && token.startsWith ('<' ) && token.endsWith ('>' )) {
255+ return token.substring (1 , token.length - 1 );
256+ }
257+ return token;
258+ }
259+
260+ // Remove Turtle comments. Mirrors rdflib's behaviour: a line starting with
261+ // `#` is dropped entirely, and an inline ` # ` comment is trimmed to the end
262+ // of the line. Comments inside string literals are not stripped, as the
263+ // pattern only matches ` # ` preceded by whitespace, which cannot appear
264+ // inside an unbroken single-line literal.
265+
266+ String _stripComments (String content) {
267+ final lines = content.split ('\n ' );
268+ final buffer = StringBuffer ();
269+ final inlineCommentRe = RegExp (r'\s+#\s.*$' );
270+ for (final line in lines) {
271+ if (line.trimLeft ().startsWith ('#' )) continue ;
272+ buffer.write (line.replaceFirst (inlineCommentRe, '' ));
273+ buffer.write ('\n ' );
274+ }
275+ return buffer.toString ();
276+ }
277+
278+ // Collapse a `"""..."""` (or `'''...'''`) long literal onto a single line so
279+ // the line-based grammar can match it. Mirrors the preprocessing performed
280+ // by rdflib's [Graph.parseTurtle].
281+
282+ String _preprocessLongLiterals (String content) {
283+ final regex = RegExp (
284+ content.contains ("'''" ) ? r"'''(.*?)'''" : r'"""(.*?)"""' ,
285+ dotAll: true ,
286+ );
287+ return content.replaceAllMapped (regex, (match) {
288+ var inner = match.group (1 )! ;
289+ inner = inner.replaceAll ('\n ' , r'\n' );
290+ inner = inner.replaceAll ('"' , r'\"' );
291+ return '"$inner "' ;
292+ });
293+ }
294+
63295/// Generate Turtle string from triples stored in a map:
64296/// {subject: {predicate: {object}}}
65297/// - subject: URIRef String
66298/// - predicate: URIRef String
67299/// - object: {dynamic}
300+
68301String tripleMapToTurtle (
69302 Map <URIRef , Map <URIRef , dynamic >> triples, {
70303 Map <String , Namespace >? bindNamespaces,
@@ -121,6 +354,7 @@ String tripleMapToTurtle(
121354// predicate. Also the function extract() can be removed when we have properly
122355// defined our namespaces
123356/// Parse TTL content into a map {subject: {predicate: {objects}}}
357+
124358Map <String , dynamic > parseTTLMap (String ttlContent) {
125359 final g = Graph ();
126360 g.parseTurtle (ttlContent);
@@ -145,6 +379,7 @@ Map<String, dynamic> parseTTLMap(String ttlContent) {
145379}
146380
147381/// Parse ACL content into a map {subject: {predicate: object}}
382+
148383Map <String , dynamic > parseACL (String aclContent) {
149384 final g = Graph ();
150385 g.parseTurtle (aclContent);
@@ -176,6 +411,7 @@ Map<String, dynamic> parseACL(String aclContent) {
176411}
177412
178413/// Generate permission log file content
414+
179415String genPermLogTTLStr (String resourceUrl) => tripleMapToTurtle (
180416 {
181417 URIRef (resourceUrl): {
0 commit comments