Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion lib/solidpod.dart
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,15 @@ export 'src/solid/constants/solid_constants.dart';
// Legacy exports for backward compatibility (deprecated, use SolidConstants instead)

export 'src/solid/constants/common.dart'
show foaf, terms, ResourceStatus, permStr, agentStr, whatIsWebID, demoWebID;
show
foaf,
terms,
ResourceStatus,
WebIdStatus,
permStr,
agentStr,
whatIsWebID,
demoWebID;
export 'src/solid/constants/schema.dart' show appsTerms;
export 'src/solid/constants/path_type.dart' show PathType;

Expand Down Expand Up @@ -217,13 +225,24 @@ export 'src/solid/shared_resources.dart';
export 'src/solid/api/rest_api.dart'
show
checkResourceStatus,
checkWebIdProfile,
createResource,
deleteResource,
getResource,
getResourcesInContainer,
initialStructureTest,
updateFileByQuery;

/// WebID validation pipeline.

export 'src/solid/utils/webid_validator.dart'
show
WebIdCheckResult,
WebIdCheckStatus,
isValidIpv4,
looksLikeIpv4Attempt,
validateWebId;

/// Function to get the latest log enties

export 'src/solid/api/common_permission.dart'
Expand Down
83 changes: 83 additions & 0 deletions lib/src/solid/api/rest_api.dart
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,89 @@ Future<ResourceStatus> checkWebIdExists(String webIdUrl) async {
}
}

/// MIME types accepted as evidence that a 200 response is an RDF document and
/// therefore plausibly a Solid WebID profile. Anything else (most notably
/// `text/html`) is rejected so that arbitrary websites cannot masquerade as
/// WebIDs simply by responding 200 for an unknown path.
const Set<String> _rdfProfileMimeTypes = {
'text/turtle',
'application/ld+json',
'application/n-triples',
'application/n-quads',
'application/rdf+xml',
'application/trig',
'text/n3',
};

bool _isRdfProfileContentType(String contentType) {
final ct = contentType.toLowerCase().trim();
if (ct.isEmpty) return false;
for (final mimeType in _rdfProfileMimeTypes) {
if (ct == mimeType || ct.startsWith('$mimeType;')) return true;
}
return false;
}

/// Validate that [webIdUrl] points to a real Solid WebID *profile document*,
/// not just any HTTP resource that happens to return 200.
///
/// A plain existence check (status code only) is unsafe because many ordinary
/// websites return a 200 HTML page for arbitrary paths — for example a
/// WordPress site's catch-all "soft 404" — which would otherwise be
/// indistinguishable from a real WebID. To guard against this we
/// content-negotiate for the usual RDF serialisations and accept the URL only
/// when the response carries an RDF `Content-Type`.
///
/// This function does not catch network exceptions; callers should wrap it in
/// a try/catch when a host-resolution or connectivity error needs to be
/// surfaced separately from a "not a profile" outcome.
Future<WebIdStatus> checkWebIdProfile(String webIdUrl) async {
// HTTP requests never carry the URL fragment, but stripping it explicitly
// keeps the request URL and any debug logs honest.
final uri = Uri.parse(webIdUrl).removeFragment();

final response = await http.get(
uri,
headers: const <String, String>{
// Solid servers content-negotiate on `Accept`. List the common RDF
// serialisations in preference order; the trailing `*/*;q=0.1` allows
// us to still inspect the response if the server ignores `Accept`.
'Accept': 'text/turtle, '
'application/ld+json;q=0.95, '
'application/n-triples;q=0.9, '
'application/rdf+xml;q=0.85, '
'application/n-quads;q=0.8, '
'application/trig;q=0.75, '
'text/n3;q=0.7, '
'*/*;q=0.1',
},
);

if (response.statusCode == 404) {
return WebIdStatus.notExist;
}

if (response.statusCode != 200 && response.statusCode != 204) {
debugPrint(
'checkWebIdProfile: unexpected status\n'
'URL: $uri\n'
'Status: ${response.statusCode}\n'
'Body: ${response.body}',
);
return WebIdStatus.unknown;
}

final contentType = response.headers['content-type'] ?? '';
if (_isRdfProfileContentType(contentType)) {
return WebIdStatus.valid;
}

debugPrint(
'checkWebIdProfile: 200 but non-RDF content type "$contentType" for $uri',
);
return WebIdStatus.notProfile;
}

/// Given a WebID check if their POD is initialised using the Solidpod
/// directory structure
Future<bool> checkPodInitialised(String webIdUrl) async {
Expand Down
22 changes: 22 additions & 0 deletions lib/src/solid/constants/common.dart
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,28 @@ enum ResourceStatus {
forbidden,
}

/// Outcome of validating that a URL points to a real Solid WebID profile
/// document, as opposed to merely returning a 200 response. Plain existence
/// is insufficient because many ordinary websites happily return 200 HTML
/// for any unmatched path (SPA catch-alls, soft 404s, etc.), which would
/// otherwise be mistaken for a valid WebID.
enum WebIdStatus {
/// The URL responds with 200/204 and an RDF content type, so it is very
/// likely a genuine WebID profile document.
valid,

/// The URL responds with 200/204 but the body is not RDF (typically a
/// `text/html` page from a regular website). The URL is reachable but is
/// *not* a WebID profile.
notProfile,

/// The URL returned 404.
notExist,

/// Some other status code (e.g. 403, 5xx) — could not determine.
unknown,
}

/// Types of the content of resources
enum ResourceContentType {
/// Detect the MIME type automatically at runtime
Expand Down
175 changes: 175 additions & 0 deletions lib/src/solid/utils/webid_validator.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/// Pure-Dart validation pipeline for candidate Solid WebID URLs.
///
/// Copyright (C) 2026, Software Innovation Institute, ANU.
///
/// Licensed under the MIT License (the "License").
///
/// License: https://choosealicense.com/licenses/mit/.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
///
/// Authors: Tony Chen

library;

import 'package:solidpod/src/solid/api/rest_api.dart' show checkWebIdProfile;
import 'package:solidpod/src/solid/constants/common.dart' show WebIdStatus;

/// Regular expression matching strings made up solely of decimal digits and
/// dots. Such a host is almost certainly an attempt at an IPv4 literal rather
/// than a domain name and should therefore be validated as IPv4 before any
/// network call is dispatched.

final RegExp _digitsAndDotsOnly = RegExp(r'^[0-9.]+$');

/// Returns true when [host] looks like an attempt to type an IPv4 literal,
/// i.e. it only contains decimal digits and dots. This is intentionally
/// permissive: malformed inputs such as `192`, `192.168`, `192.168.1`,
/// `1.2.3.4.5` and `256.0.0.1` all return true so callers can then reject
/// them via [isValidIpv4].

bool looksLikeIpv4Attempt(String host) {
if (host.isEmpty) return false;
return _digitsAndDotsOnly.hasMatch(host);
}

/// Returns true when [host] is a syntactically valid IPv4 address: four
/// dot-separated octets, each a decimal number in the range 0..255 with no
/// leading sign and at most three digits.

bool isValidIpv4(String host) {
final parts = host.split('.');
if (parts.length != 4) return false;
for (final part in parts) {
if (part.isEmpty || part.length > 3) return false;
final n = int.tryParse(part);
if (n == null || n < 0 || n > 255) return false;
}
return true;
}

/// Outcome of validating a candidate WebID URL.

enum WebIdCheckStatus {
/// The URL points to a valid Solid WebID profile document.

valid,

/// The URL is not a syntactically absolute URL.

notAbsoluteUrl,

/// The host looks like an IPv4 literal but is malformed (e.g. `192`,
/// `192.168.1`, `1.2.3.4.5`, `256.0.0.1`).

invalidIpv4,

/// The URL could not be reached (DNS lookup or network failure).

unreachable,

/// The URL is reachable and responded 200/204 but the body is not an RDF
/// profile document (typically a `text/html` page from a regular website).

notProfile,

/// The URL returned 404.

notExist,

/// Some other HTTP error (e.g. 403, 5xx) — could not determine.

unknown,
}

/// Structured outcome of [validateWebId].

class WebIdCheckResult {
const WebIdCheckResult(
this.status, {
this.host = '',
this.error,
});

/// The categorised failure mode (or [WebIdCheckStatus.valid] for success).

final WebIdCheckStatus status;

/// The host part of the WebID URL (may be empty if the URL was unparseable).

final String host;

/// The exception that caused the network failure, if any. Only populated
/// for [WebIdCheckStatus.unreachable].

final Object? error;

/// Convenience: true when [status] is [WebIdCheckStatus.valid].

bool get isValid => status == WebIdCheckStatus.valid;
}

/// Validate a candidate [webId] URL.
///
/// The validation pipeline is:
/// 1. Reject URLs that are not syntactically absolute.
/// 2. Reject hosts that look like an IPv4 attempt but are malformed.
/// 3. Query [checkWebIdProfile] to confirm the URL points to a real Solid
/// WebID profile document, distinguishing "not a profile", "not found",
/// and "unknown" responses from genuine success.
/// 4. Map network exceptions to [WebIdCheckStatus.unreachable] so the
/// caller can surface a clear, actionable message instead of letting
/// the UI hang.

Future<WebIdCheckResult> validateWebId(String webId) async {
// Fragments such as `#me` are stripped before the absoluteness check
// because some `Uri.parse` paths treat them as part of the path.

if (!Uri.parse(webId.replaceAll('#me', '')).isAbsolute) {
return const WebIdCheckResult(WebIdCheckStatus.notAbsoluteUrl);
}

final host = Uri.tryParse(webId)?.host ?? '';

if (looksLikeIpv4Attempt(host) && !isValidIpv4(host)) {
return WebIdCheckResult(WebIdCheckStatus.invalidIpv4, host: host);
}

WebIdStatus status;
try {
status = await checkWebIdProfile(webId);
} on Exception catch (e) {
return WebIdCheckResult(
WebIdCheckStatus.unreachable,
host: host,
error: e,
);
}

switch (status) {
case WebIdStatus.valid:
return WebIdCheckResult(WebIdCheckStatus.valid, host: host);
case WebIdStatus.notProfile:
return WebIdCheckResult(WebIdCheckStatus.notProfile, host: host);
case WebIdStatus.notExist:
return WebIdCheckResult(WebIdCheckStatus.notExist, host: host);
case WebIdStatus.unknown:
return WebIdCheckResult(WebIdCheckStatus.unknown, host: host);
}
}
Loading