containing
s in snapshots)
- let segments = this._readAloud.state!.segments;
- if (!segments
- || !segments.some(s => getContainingBlock(
- closestElement((s.position as RangeRef).range.startContainer)!
- ) === block)) {
+ if (!this._readAloud.getSegmentForBlock(block)) {
return;
}
@@ -1736,19 +1764,31 @@ abstract class DOMView {
protected _handleReadAloudJumpButtonClick() {
if (!this._readAloudJumpButtonBlock || !this._readAloud.state) return;
- let blockRange = this._iframeDocument.createRange();
- blockRange.selectNodeContents(this._readAloudJumpButtonBlock);
+ let segment = this._readAloud.getSegmentForBlock(this._readAloudJumpButtonBlock);
+ if (!segment) return;
- // Immediately move the highlight to the target block
- let blockSelector = this.toSelector(blockRange);
- if (blockSelector) {
- this.setSpotlight(SpotlightKey.ReadAloudActiveSegment, blockSelector, null);
+ // Match the immediate spotlight to the user's highlight granularity,
+ // so we don't show a wrong-granularity flash before the manager overrides
+ // with a new highlight.
+ let state = this._readAloud.state;
+ let useSegmentSpotlight = state.segmentGranularity === 'sentence'
+ && state.highlightGranularity !== 'paragraph'
+ && isSelector(segment.sourcePosition);
+ let immediateSelector: Selector | null;
+ if (useSegmentSpotlight) {
+ immediateSelector = segment.sourcePosition as Selector;
+ }
+ else {
+ let blockRange = this._iframeDocument.createRange();
+ blockRange.selectNodeContents(this._readAloudJumpButtonBlock);
+ immediateSelector = this.toSelector(blockRange);
+ }
+ if (immediateSelector) {
+ this.setSpotlight(SpotlightKey.ReadAloudActiveSegment, immediateSelector, null);
}
-
- blockRange.collapse(true);
this._options.onSetReadAloudState({
- targetPosition: { range: new PersistentRange(blockRange) },
+ targetPosition: segment.position,
});
}
@@ -1845,6 +1885,8 @@ abstract class DOMView {
if (this._iframeDocument) {
this._iframeDocument.documentElement.style.width = width + 'px';
this._iframeDocument.documentElement.style.height = height + 'px';
+ // Immediately reposition annotations
+ this._handleViewUpdate();
}
}
@@ -1876,26 +1918,6 @@ abstract class DOMView {
this._readAloud.setPositionLocked(true);
}
- getSerializableReadAloudPosition(position: Position): Selector | null {
- if ('range' in position) {
- return this.toSelector(position.range.toRange());
- }
- if (!isSelector(position)) {
- return null;
- }
- return position;
- }
-
- isReadAloudPositionTooFar(savedPosition: Position, _viewState: Record): boolean {
- let range = this.toDisplayedRange(savedPosition as Selector);
- if (!range) {
- // Can't resolve the selector - not in a displayed root
- return true;
- }
- let rect = getBoundingPageRect(range);
- return !isPageRectVisible(rect, this._iframeWindow, this._iframeWindow.innerHeight * 3);
- }
-
protected _handleScrollCapture(event: Event) {
// The annotation layer is positioned at the top-left of the document, so it moves along with the content when
// the document is scrolled. But scrollable sub-frames (e.g. elements with overflow: auto) don't have their own
@@ -2165,27 +2187,6 @@ abstract class DOMView {
return this._readAloud.hasTarget;
}
- addAnnotationFromReadAloudSegments(segments: ReadAloudSegment[], init: NewAnnotation): Annotation | null {
- let annotation = this._readAloud.getAnnotationFromSegments(segments, init);
- if (annotation) {
- return this._options.onAddAnnotation(annotation);
- }
- return null;
- }
-
- computeReadAloudRepositionIndex(position: Position, segments: ReadAloudSegment[]): number | null {
- return this._readAloud.computeRepositionIndex(position, segments);
- }
-
- getReadAloudRanges(granularity: ReadAloudGranularity): Range[] {
- let rootRanges = this._getRoots(true).map((root) => {
- let range = this._iframeDocument.createRange();
- range.selectNodeContents(root);
- return range;
- });
- return rootRanges.flatMap(rootRange => this._readAloud.getRanges(rootRange, granularity));
- }
-
// ***
// Public methods to control the view from the outside
// ***
@@ -2315,6 +2316,7 @@ export type DOMViewOptions = {
penConnected?: boolean;
penActive?: boolean;
penExclusive?: boolean;
+ readAloudState: ReadAloudStateSnapshot;
readAloudVoices: Map,
onSetOutline: (outline: OutlineItem[]) => void;
onChangeViewState: (state: State, primary?: boolean) => void;
@@ -2343,7 +2345,6 @@ export type DOMViewOptions = {
onKeyDown: (event: KeyboardEvent) => void;
onEPUBEncrypted: () => void;
onFocusAnnotation: (annotation: WADMAnnotation) => void;
- onSetHiddenAnnotations: (ids: string[]) => void;
onBackdropTap?: (event: PointerEvent) => void;
getLocalizedString?: (name: string) => string;
data: Data & {
diff --git a/src/dom/common/lib/range.ts b/src/dom/common/lib/range.ts
index b299e1f13..919a78e9f 100644
--- a/src/dom/common/lib/range.ts
+++ b/src/dom/common/lib/range.ts
@@ -1,6 +1,5 @@
-import { getSentenceBoundaries } from "sentencex-ts";
import { isFirefox, isWin } from "../../../common/lib/utilities";
-import { closestElement, getLang, iterateWalker } from "./nodes";
+import { closestElement } from "./nodes";
import { getBoundingRect, isPageRectVisible, rectsIntersect } from "./rect";
/**
@@ -173,225 +172,6 @@ export function splitRangeToTextNodes(range: Range): Range[] {
return ranges;
}
-export function splitRangeToSentences(range: Range, { keepWhitespace = false } = {}): Range[] {
- let walker = createRangeWalker(range, NodeFilter.SHOW_TEXT);
-
- let textParts: {
- node: Text;
- globalStart: number;
- globalEnd: number;
- localStart: number;
- localEnd: number;
- }[] = [];
-
- let text = '';
- let globalOffset = 0;
-
- for (let node of iterateWalker(walker)) {
- let nodeValue = node.nodeValue!;
-
- let startInNode = node === range.startContainer
- ? range.startOffset
- : 0;
-
- let endInNode = node === range.endContainer
- ? range.endOffset
- : nodeValue.length;
-
- if (endInNode > startInNode) {
- let nodeValueWithinRange = nodeValue.slice(startInNode, endInNode);
- textParts.push({
- node: node as Text,
- globalStart: globalOffset,
- globalEnd: globalOffset + nodeValueWithinRange.length,
- localStart: startInNode,
- localEnd: endInNode,
- });
- text += nodeValueWithinRange;
- globalOffset += nodeValueWithinRange.length;
- }
- }
-
- // Normalize all whitespace to space characters, because Range#toString()
- // returns invisible newlines in the HTML, and the segmenter will treat
- // those as meaningful.
- text = text.replace(/\s/g, ' ');
-
- let lang = getLang(range.commonAncestorContainer) || 'en';
- let boundaries = getSentenceBoundaries(lang, text);
-
- let outputRanges: Range[] = [];
- for (let boundary of boundaries) {
- let sentStart = boundary.startIndex;
- let sentEnd = boundary.endIndex;
-
- if (!keepWhitespace) {
- // Trim leading/trailing whitespace within the segment
- let segment = text.slice(sentStart, sentEnd);
- let leading = (segment.match(/^\s*/)?.[0].length) ?? 0;
- let trailing = (segment.match(/\s*$/)?.[0].length) ?? 0;
- sentStart += leading;
- sentEnd -= trailing;
- // Skip segments that are only whitespace after trimming
- if (sentEnd <= sentStart) {
- continue;
- }
- }
-
- let startNode: Text | null = null;
- let startOffsetInNode = 0;
- for (let textPart of textParts) {
- if (textPart.globalStart <= sentStart && sentStart < textPart.globalEnd) {
- startNode = textPart.node;
- startOffsetInNode = textPart.localStart + (sentStart - textPart.globalStart);
- break;
- }
- }
-
- let endNode: Text | null = null;
- let endOffsetInNode = 0;
- for (let textPart of textParts) {
- if (textPart.globalStart < sentEnd && sentEnd <= textPart.globalEnd) {
- endNode = textPart.node;
- endOffsetInNode = textPart.localStart + (sentEnd - textPart.globalStart);
- break;
- }
- }
-
- if (!startNode || !endNode) continue;
-
- let sentenceRange = range.commonAncestorContainer.ownerDocument!.createRange();
- sentenceRange.setStart(startNode, startOffsetInNode);
- sentenceRange.setEnd(endNode, endOffsetInNode);
- outputRanges.push(sentenceRange);
- }
- return outputRanges;
-}
-
-export function splitRanges(
- ranges: Range[],
- splitAtRange: Range
-): { ranges: Range[]; startIndex: number; endIndex: number } | null {
- let newRanges: Range[] = [];
- let startIndex = -1;
- let endIndex = -1;
-
- let lastStartToStart: number | null = null;
- let lastStartToEnd: number | null = null;
- let lastEndToStart: number | null = null;
- let lastEndToEnd: number | null = null;
-
- for (let range of ranges) {
- if (startIndex !== -1 && endIndex !== -1) {
- newRanges.push(range);
- continue;
- }
- // If these ranges aren't comparable, we can't split
- if (range.commonAncestorContainer.getRootNode() !== splitAtRange.commonAncestorContainer.getRootNode()) {
- newRanges.push(range);
- continue;
- }
-
- let splitRanges: Range[] = [];
- let containedStart = false;
- let containedEnd = false;
- let splitIndex = -1;
-
- if (startIndex === -1) {
- let startToStart = range.compareBoundaryPoints(Range.START_TO_START, splitAtRange);
- let startToEnd = range.compareBoundaryPoints(Range.START_TO_END, splitAtRange);
- if (
- // If the start point of splitAtRange is somewhere within range,
- // or it was somewhere between the last range and this range
- (startToStart <= 0 || lastStartToStart === -1 && startToStart === 1)
- && (startToEnd >= 0 || lastStartToEnd === -1 && startToEnd === 1)
- ) {
- containedStart = true;
- }
- lastStartToStart = startToStart;
- lastStartToEnd = startToEnd;
- }
-
- if (endIndex === -1) {
- let endToStart = range.compareBoundaryPoints(Range.END_TO_START, splitAtRange);
- let endToEnd = range.compareBoundaryPoints(Range.END_TO_END, splitAtRange);
- if (
- // If the end point of splitAtRange is somewhere within range,
- // or it was somewhere between the last range and this range
- (endToStart <= 0 || lastEndToStart === -1 && endToStart === 1)
- && (endToEnd >= 0 || lastEndToEnd === -1 && endToEnd === 1)
- ) {
- containedEnd = true;
- }
- lastEndToStart = endToStart;
- lastEndToEnd = endToEnd;
- }
-
- if (containedStart) {
- let before = range.cloneRange();
- before.setEnd(splitAtRange.startContainer, splitAtRange.startOffset);
- if (!before.collapsed) splitRanges.push(before);
- }
-
- if (containedStart || containedEnd) {
- let middle = range.cloneRange();
- let start = (containedStart ? splitAtRange : range).startContainer;
- let startOffset = (containedStart ? splitAtRange : range).startOffset;
- let end = (containedEnd ? splitAtRange : range).endContainer;
- let endOffset = (containedEnd ? splitAtRange : range).endOffset;
-
- middle.setStart(start, startOffset);
- middle.setEnd(end, endOffset);
-
- if (!middle.collapsed) {
- splitRanges.push(middle);
- splitIndex = splitRanges.length - 1;
- }
- }
- else if (!range.collapsed) {
- splitRanges.push(range);
- splitIndex = splitRanges.length - 1;
- }
-
- if (containedEnd) {
- let after = range.cloneRange();
- after.setStart(splitAtRange.endContainer, splitAtRange.endOffset);
- if (!after.collapsed) {
- splitRanges.push(after);
- // When splitAtRange is collapsed and falls in a gap between
- // ranges, both containedStart and containedEnd are true for
- // the range after the gap. The 'middle' portion (from
- // splitAt.start to splitAt.end) is also collapsed and gets
- // skipped, leaving splitIndex at -1. Without this fix,
- // startIndex would be newRanges.length + (-1), incorrectly
- // pointing to the previous range. Point to 'after' instead,
- // which is the first range at/after the split point.
- if (containedStart && splitIndex === -1) {
- splitIndex = splitRanges.length - 1;
- }
- }
- }
-
- if (containedStart) {
- startIndex = newRanges.length + splitIndex;
- }
- if (containedEnd) {
- endIndex = newRanges.length + splitIndex + 1;
- }
- newRanges.push(...splitRanges);
- }
-
- if (startIndex === -1 || endIndex === -1) {
- return null;
- }
-
- return {
- ranges: newRanges,
- startIndex,
- endIndex
- };
-}
-
/**
* Create a single range spanning all the positions included in the set of input ranges. For
* example, if rangeA goes from nodeA at offset 5 to nodeB at offset 2 and rangeB goes from nodeC
diff --git a/src/dom/common/lib/read-aloud.ts b/src/dom/common/lib/read-aloud.ts
index 16b46cb05..ef092358f 100644
--- a/src/dom/common/lib/read-aloud.ts
+++ b/src/dom/common/lib/read-aloud.ts
@@ -1,30 +1,17 @@
import {
- NewAnnotation,
- Position,
ReadAloudGranularity,
ReadAloudSegment,
- RangeRef,
- WADMAnnotation, ReadAloudStateSnapshot, ReadAloudStateDelta,
+ ReadAloudStateSnapshot,
+ ReadAloudStateDelta,
+ Position,
} from "../../../common/types";
-import { exceedsSegmentMaxLength, splitTextToChunks } from "../../../common/read-aloud/segment-split";
import { isSelector, Selector } from "./selector";
import DOMView, { SpotlightKey } from "../dom-view";
-import {
- createRangeWalker, getBoundingPageRect,
- makeRangeSpanning,
- PersistentRange, splitRanges,
- splitRangeToSentences,
- splitRangeToTextNodes,
-} from "./range";
-import {
- isPageRectFullyVisible,
- isPageRectVisible,
- isErrorRect,
-} from "./rect";
-import { getContainingBlock, closestElement, iterateWalker } from "./nodes";
+import { getBoundingPageRect } from "./range";
+import { isPageRectVisible } from "./rect";
+import { closestElement } from "./nodes";
import { debounceUntilScrollFinishes } from "../../../common/lib/utilities";
import { getBaseLanguage } from '../../../common/read-aloud/lang';
-import EPUBView from '../../epub/epub-view';
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export class ReadAloud> {
@@ -34,6 +21,15 @@ export class ReadAloud> {
scrolling = false;
+ /**
+ * Cache from base-view block elements to their paragraph-start segment,
+ * used by the jump button. Populated lazily by getSegmentForBlock(),
+ * and invalidated when segments change.
+ */
+ private _blockSegmentCache = new Map();
+
+ private _lastCachedSegments: ReadAloudSegment[] | null = null;
+
private _view: View;
constructor(view: View) {
@@ -60,77 +56,55 @@ export class ReadAloud> {
}
// After resuming playback, re-lock position if the current segment is visible
- if (state.active && previousState?.paused && !state.paused && state.activeSegment?.position) {
- let { range } = state.activeSegment.position as RangeRef;
- if (isPageRectVisible(getBoundingPageRect(range), this._view.iframeWindow)) {
+ if (state.active && previousState?.paused && !state.paused) {
+ let range = this._resolveActiveSegmentRange(state);
+ if (range && isPageRectVisible(getBoundingPageRect(range), this._view.iframeWindow)) {
this.positionLocked = true;
}
}
+ // Highlight and scroll to active segment
if (state.activeSegment?.position) {
- let { range } = state.activeSegment.position as RangeRef;
- let segments = state.segments!;
- // Highlight the whole paragraph
- let firstRangeInParagraph: PersistentRange | null = null;
- for (let i = segments.indexOf(state.activeSegment); i >= 0; i--) {
- firstRangeInParagraph = (segments[i].position as RangeRef).range;
- if (segments[i].anchor === 'paragraphStart') {
- break;
- }
- }
- let lastRangeInParagraph: PersistentRange | null = null;
- for (let i = segments.indexOf(state.activeSegment) + 1; i < segments.length; i++) {
- if (segments[i].anchor === 'paragraphStart') {
- break;
- }
- lastRangeInParagraph = (segments[i].position as RangeRef).range;
- }
- range = range.clone();
- if (firstRangeInParagraph) {
- range.startContainer = firstRangeInParagraph.startContainer;
- range.startOffset = firstRangeInParagraph.startOffset;
- }
- if (lastRangeInParagraph) {
- range.endContainer = lastRangeInParagraph.endContainer;
- range.endOffset = lastRangeInParagraph.endOffset;
+ let segmentSelector = this._resolveSegmentSelector(state);
+ if (!segmentSelector) return null;
+
+ let segmentChanged = state.activeSegment !== previousState?.activeSegment;
+
+ // Navigate first so the section is mounted (important for EPUB),
+ // then set spotlights
+ if (segmentChanged && !state.annotationPopup && this.positionLocked) {
+ this.scrolling = true;
+
+ let startSelector = this._collapseToStart(segmentSelector);
+ this._view.navigateToSelector(startSelector || segmentSelector, {
+ ifNeeded: true,
+ visibilityMargin: -this._view.iframeWindow.innerHeight / 4,
+ block: 'center',
+ behavior: 'smooth'
+ });
+
+ debounceUntilScrollFinishes(this._view.iframeDocument).then(() => {
+ this.scrolling = false;
+ });
}
- let selector = this._view.toSelector(range.toRange());
- if (selector) {
- this._view.setSpotlight(SpotlightKey.ReadAloudActiveSegment, selector, null);
-
- // After a sentence skip, briefly highlight the active sentence segment
- if (state.lastSkipGranularity === 'sentence' && state.activeSegment) {
- let sentenceRange = (state.activeSegment.position as RangeRef).range;
- let sentenceSelector = this._view.toSelector(sentenceRange.toRange());
- this._view.setSpotlight(SpotlightKey.ReadAloudActiveSentence, sentenceSelector, 2000);
- }
- else {
- this._view.setSpotlight(SpotlightKey.ReadAloudActiveSentence, null);
- }
-
- // If the Read Aloud annotation popup isn't open and position is locked, navigate to the current segment
- if (!state.annotationPopup && this.positionLocked) {
- setTimeout(() => {
- this.scrolling = true;
-
- // Navigate to the start of the segment if possible
- let startRange = range.toRange();
- startRange.collapse(true);
- let startSelector = this._view.toSelector(startRange);
-
- this._view.navigateToSelector(startSelector || selector, {
- ifNeeded: true,
- visibilityMargin: -this._view.iframeWindow.innerHeight / 4, // Scroll early, scroll not quite as often
- block: 'center',
- behavior: 'smooth'
- });
-
- debounceUntilScrollFinishes(this._view.iframeDocument).then(() => {
- this.scrolling = false;
- });
- });
- }
+ // The primary highlight tracks the user's chosen granularity; it falls
+ // back to a coarser level when finer-grained data isn't available
+ // (e.g. paragraph-granularity segments have no sentence/word data)
+ let primarySelector = this._resolvePrimarySelector(state, segmentSelector);
+ this._view.setSpotlight(SpotlightKey.ReadAloudActiveSegment, primarySelector, null);
+
+ // After a skip whose granularity differs from the primary highlight,
+ // briefly flash the unit at the skip granularity so it's clear what
+ // the skip moved by. Only retrigger when the active segment changes
+ // so word-level updates don't keep resetting the spotlight.
+ if (segmentChanged) {
+ let spotlightSelector = this._resolveSkipSpotlightSelector(state, segmentSelector);
+ this._view.setSpotlight(
+ SpotlightKey.ReadAloudActiveSentence,
+ spotlightSelector,
+ spotlightSelector ? 2000 : null,
+ );
}
}
@@ -140,101 +114,7 @@ export class ReadAloud> {
};
}
- if (!state.active || !state.segmentGranularity) {
- return null;
- }
-
- if (state.segments !== null && state.segmentGranularity === previousState?.segmentGranularity) {
- return null;
- }
-
- let ranges = this._view.getReadAloudRanges(state.segmentGranularity);
-
- let targetRange: Range | null = null;
- let targetIsSelection = false;
- if (!this._view.iframeDocument.getSelection()!.isCollapsed) {
- targetRange = this._view.iframeDocument.getSelection()!.getRangeAt(0);
- this._view.iframeDocument.getSelection()!.collapseToStart();
- targetIsSelection = true;
- }
- else if (state.targetPosition) {
- targetRange = this._view.toDisplayedRange(state.targetPosition as Selector);
- }
-
- let backwardStopIndex: number | null = null;
- let forwardStopIndex: number | null = null;
- if (targetRange) {
- let split = splitRanges(ranges, targetRange);
- if (split) {
- ranges = split.ranges;
- backwardStopIndex = split.startIndex;
- if (targetIsSelection) {
- forwardStopIndex = split.endIndex;
- }
- }
- else {
- ranges = this.getRanges(targetRange, state.segmentGranularity);
- }
- }
- else {
- backwardStopIndex = ranges.findIndex(
- range => isPageRectFullyVisible(getBoundingPageRect(range), this._view.iframeWindow)
- );
- if (backwardStopIndex === -1) {
- backwardStopIndex = ranges.findIndex(
- range => isPageRectVisible(getBoundingPageRect(range), this._view.iframeWindow)
- );
- }
- if (backwardStopIndex === -1) {
- backwardStopIndex = ranges.findIndex(
- range => isPageRectVisible(getBoundingPageRect(range), this._view.iframeWindow,
- this._view.iframeWindow.innerWidth)
- );
- }
- if (backwardStopIndex === -1) {
- backwardStopIndex = ranges.findIndex((range) => {
- let rect = range.getBoundingClientRect();
- return !isErrorRect(rect) && rect.x >= 0;
- });
- }
- if (backwardStopIndex === -1) {
- backwardStopIndex = null;
- }
- }
-
- let lastContainingBlock: Element | null = null;
- let segments: ReadAloudSegment[] = ranges
- .map((range) => {
- let text = range.toString().trim().replace(/\s+/g, ' ');
- if (!text) return null;
- let containingBlock = getContainingBlock(closestElement(range.commonAncestorContainer)!);
- let differentContainingBlock = containingBlock !== lastContainingBlock;
- lastContainingBlock = containingBlock;
- return {
- text,
- position: {
- range: new PersistentRange(range)
- },
- granularity: state.segmentGranularity!,
- anchor: differentContainingBlock ? 'paragraphStart' : null,
- } satisfies ReadAloudSegment;
- })
- .filter((segment, i) => {
- if (segment) {
- return true;
- }
- if (backwardStopIndex !== null && backwardStopIndex > i) backwardStopIndex--;
- if (forwardStopIndex !== null && forwardStopIndex > i) forwardStopIndex--;
- return false;
- }) as ReadAloudSegment[];
- let lang = state.lang || this._view.lang;
-
- return {
- segments,
- backwardStopIndex,
- forwardStopIndex,
- lang,
- };
+ return null;
}
setPositionLocked(locked: boolean) {
@@ -247,144 +127,160 @@ export class ReadAloud> {
return !!this._view.iframeDocument.getSelection() && !this._view.iframeDocument.getSelection()!.isCollapsed;
}
- getAnnotationFromSegments(segments: ReadAloudSegment[], init: NewAnnotation): NewAnnotation | null {
- if (!segments.length) {
- return null;
+ private _positionToSelector(position: Position | null | undefined): Selector | null {
+ if (!position) return null;
+
+ if (isSelector(position)) {
+ return position as Selector;
}
- let range = makeRangeSpanning(
- segments.map(s => (s.position as RangeRef).range.toRange()),
- true,
- this._view.iframeDocument,
- );
- let annotation = this._view.getAnnotationFromRange(range, 'highlight');
- if (annotation) {
- annotation = {
- ...annotation,
- ...init,
- };
- return annotation;
+
+ // SDTPosition or other non-Selector: try resolving through the view
+ let range = this._view.toDisplayedRange(position);
+ if (range) {
+ return this._view.toSelector(range);
}
+
return null;
}
+ private _resolveSegmentSelector(state: ReadAloudStateSnapshot): Selector | null {
+ let seg = state.activeSegment;
+ if (!seg) return null;
+ // Prefer source position (works in base views), fall back to SDT position (works in SDTView)
+ return this._positionToSelector(seg.sourcePosition)
+ || this._positionToSelector(seg.position);
+ }
+
+ private _resolveParagraphSelector(state: ReadAloudStateSnapshot): Selector | null {
+ let seg = state.activeSegment;
+ if (!seg) return null;
+ return this._positionToSelector(seg.paragraphSourcePosition);
+ }
+
/**
- * Given a target position and existing segments, find the segment index
- * to reposition to. Returns null if the position can't be resolved.
+ * Resolve the primary highlight for the user's chosen granularity. Falls
+ * back coarser when finer-grained data isn't available (e.g. the segment
+ * is a paragraph because the voice supplies paragraph-granularity audio).
*/
- computeRepositionIndex(position: Position, segments: ReadAloudSegment[]): number | null {
- let targetRange;
- if (isSelector(position)) {
- targetRange = this._view.toDisplayedRange(position as Selector);
+ private _resolvePrimarySelector(
+ state: ReadAloudStateSnapshot,
+ segmentSelector: Selector,
+ ): Selector | null {
+ switch (this._effectivePrimaryGranularity(state)) {
+ case 'word':
+ return this._positionToSelector(state.activeWordSourcePosition);
+ case 'sentence':
+ return segmentSelector;
+ case 'paragraph':
+ default:
+ return this._resolveParagraphSelector(state);
}
- else if ('range' in position) {
- targetRange = (position as RangeRef).range.toRange();
+ }
+
+ /**
+ * Resolve the brief flash highlight that should appear after a skip
+ * whose granularity isn't already shown by the primary highlight.
+ * Returns null when the skip granularity matches the primary, or there's
+ * no recent skip to acknowledge.
+ */
+ private _resolveSkipSpotlightSelector(
+ state: ReadAloudStateSnapshot,
+ segmentSelector: Selector,
+ ): Selector | null {
+ if (!state.lastSkipGranularity || !state.activeSegment) {
+ return null;
}
- if (!targetRange) {
+ if (state.lastSkipGranularity === this._effectivePrimaryGranularity(state)) {
return null;
}
- for (let i = 0; i < segments.length; i++) {
- let segmentRange = (segments[i].position as RangeRef).range.toRange();
- // Find the first segment whose end is at or past the target start
- if (EPUBView.compareBoundaryPoints(Range.START_TO_END, segmentRange, targetRange) >= 0) {
- return i;
- }
+ switch (state.lastSkipGranularity) {
+ case 'sentence':
+ return segmentSelector;
+ case 'paragraph':
+ return this._resolveParagraphSelector(state);
+ default:
+ return null;
}
- return null;
}
- getRanges(rootRange: Range, granularity: ReadAloudGranularity): Range[] {
- // https://searchfox.org/mozilla-central/rev/b4412cedce6e2900f5553cbdc43c3fa49c4b9adb/toolkit/components/narrate/Narrator.sys.mjs#54-82
- let matches = new Set();
- let filter = (node: Node) => {
- if (matches.has(node.parentNode)) {
- // Reject sub-trees of accepted nodes.
- return NodeFilter.FILTER_REJECT;
- }
- if (!/\S/.test(node.textContent!)) {
- // Reject nodes with no text.
- return NodeFilter.FILTER_REJECT;
- }
- for (let c = node.firstChild; c; c = c.nextSibling) {
- if (c.nodeType == c.TEXT_NODE && /\S/.test(c.textContent!)) {
- // If node has a non-empty text child accept it.
- matches.add(node);
- return NodeFilter.FILTER_ACCEPT;
- }
- }
- return NodeFilter.FILTER_SKIP;
- };
-
- let walker = createRangeWalker(rootRange, NodeFilter.SHOW_ELEMENT, filter);
- let segmentRanges = [...iterateWalker(walker)].map((el) => {
- let range = this._view.iframeDocument.createRange();
- range.selectNodeContents(el);
- return range;
- });
-
- // If there weren't any element children, just use the whole root range
- if (!segmentRanges.length) {
- segmentRanges = [rootRange];
- }
-
- if (granularity === 'sentence') {
- segmentRanges = segmentRanges.flatMap(range => splitRangeToSentences(range));
+ private _effectivePrimaryGranularity(state: ReadAloudStateSnapshot): ReadAloudGranularity {
+ if (state.highlightGranularity === 'word' && state.segmentGranularity === 'sentence') {
+ return 'word';
}
- else if (granularity === 'paragraph') {
- // Split each paragraph into first sentence + rest of paragraph
- segmentRanges = segmentRanges.flatMap((range) => {
- let sentences = splitRangeToSentences(range);
- if (sentences.length <= 1) {
- return sentences;
- }
- let firstRange = sentences[0];
- let restRange = makeRangeSpanning(sentences.slice(1), true, this._view.iframeDocument);
- return [firstRange, restRange];
- });
+ if (state.highlightGranularity === 'sentence' && state.segmentGranularity === 'sentence') {
+ return 'sentence';
}
+ return 'paragraph';
+ }
- // Enforce max byte length per segment
- segmentRanges = segmentRanges.flatMap((segmentRange) => {
- if (!exceedsSegmentMaxLength(segmentRange.toString())) {
- return [segmentRange];
- }
+ private _collapseToStart(selector: Selector): Selector | null {
+ let range = this._view.toDisplayedRange(selector);
+ if (!range) return null;
+ range.collapse(true);
+ return this._view.toSelector(range);
+ }
- let textNodeRanges = splitRangeToTextNodes(segmentRange);
- let fullText = '';
- let parts: { range: Range; start: number; end: number }[] = [];
- for (let textNodeRange of textNodeRanges) {
- let text = textNodeRange.toString();
- parts.push({ range: textNodeRange, start: fullText.length, end: fullText.length + text.length });
- fullText += text;
- }
+ private _resolveActiveSegmentRange(state: ReadAloudStateSnapshot): Range | null {
+ let selector = this._resolveSegmentSelector(state);
+ if (!selector) return null;
+ return this._view.toDisplayedRange(selector);
+ }
- let chunks = splitTextToChunks(fullText);
- if (chunks.length <= 1) {
- return [segmentRange];
- }
+ /**
+ * Resolve the paragraph-start segment for a hovered block, or null if the
+ * block doesn't contain any read-aloud text. Used by the jump button.
+ *
+ * The cache is populated lazily by scanning all segments on a miss, so
+ * blocks become resolvable as sections mount, without needing the host
+ * view to invalidate anything explicitly.
+ */
+ getSegmentForBlock(block: Element): ReadAloudSegment | null {
+ // Drop stale entries when the segment list itself changes (e.g.,
+ // segmentGranularity changed). Section mounts don't change the list,
+ // just which positions can resolve.
+ if (this.state?.segments !== this._lastCachedSegments) {
+ this._blockSegmentCache = new Map();
+ this._lastCachedSegments = this.state?.segments ?? null;
+ }
+ if (this._blockSegmentCache.has(block)) {
+ return this._blockSegmentCache.get(block) ?? null;
+ }
+ this._populateBlockSegmentCache();
+ // If population didn't add this block, mark it as a known miss so we
+ // don't re-scan on every subsequent hover of the same non-segment block.
+ if (!this._blockSegmentCache.has(block)) {
+ this._blockSegmentCache.set(block, null);
+ }
+ return this._blockSegmentCache.get(block) ?? null;
+ }
- let doc = segmentRange.commonAncestorContainer.ownerDocument!;
- let result: Range[] = [];
- for (let [chunkStart, chunkEnd] of chunks) {
- let startPart = parts.find(p => p.start <= chunkStart && chunkStart < p.end);
- let endPart = parts.find(p => p.start < chunkEnd && chunkEnd <= p.end);
- if (!startPart || !endPart) continue;
-
- let partRange = doc.createRange();
- partRange.setStart(
- startPart.range.startContainer,
- startPart.range.startOffset + (chunkStart - startPart.start)
- );
- partRange.setEnd(
- endPart.range.startContainer,
- endPart.range.startOffset + (chunkEnd - endPart.start)
- );
- if (!partRange.collapsed) {
- result.push(partRange);
- }
+ /**
+ * Walk the segment list once and add any newly resolvable blocks to the
+ * cache. Each leaf block is mapped to its paragraph's first segment, so
+ * the jump button still works when a paragraph spans multiple sub-blocks.
+ */
+ private _populateBlockSegmentCache() {
+ let segments = this.state?.segments;
+ if (!segments) return;
+
+ let currentParagraphStart: ReadAloudSegment | null = null;
+ for (let s of segments) {
+ if (s.anchor === 'paragraphStart') {
+ currentParagraphStart = s;
}
- return result.length ? result : [segmentRange];
- });
-
- return segmentRanges;
+ if (!currentParagraphStart) continue;
+
+ let pos = s.sourcePosition ?? s.position;
+ let selector = this._positionToSelector(pos);
+ if (!selector) continue;
+ let range = this._view.toDisplayedRange(selector);
+ if (!range) continue;
+ let el = closestElement(range.startContainer);
+ if (!el) continue;
+ let block = this._view.getReadAloudBlock(el);
+ if (block && !this._blockSegmentCache.has(block)) {
+ this._blockSegmentCache.set(block, currentParagraphStart);
+ }
+ }
}
}
diff --git a/src/dom/epub/epub-view.ts b/src/dom/epub/epub-view.ts
index 1965739f3..24dfa4439 100644
--- a/src/dom/epub/epub-view.ts
+++ b/src/dom/epub/epub-view.ts
@@ -9,8 +9,10 @@ import {
OutlineItem,
OverlayPopupParams,
ViewStats,
+ Position,
WADMAnnotation
} from "../../common/types";
+import type { StructuredDocumentText } from '../../../structured-document-text/schema';
import Epub, { Book, EpubCFI, NavItem } from "epubjs";
import {
getStartElement,
@@ -18,7 +20,7 @@ import {
PersistentRange,
splitRangeToTextNodes
} from "../common/lib/range";
-import { FragmentSelector, FragmentSelectorConformsTo, isFragment, Selector } from "../common/lib/selector";
+import { FragmentSelector, FragmentSelectorConformsTo, isFragment, isSelector, Selector } from "../common/lib/selector";
import { EPUBFindProcessor } from "./find";
import DOMView, {
DOMViewOptions,
@@ -445,7 +447,9 @@ class EPUBView extends DOMView {
};
}
- override toDisplayedRange(selector: Selector): Range | null {
+ override toDisplayedRange(position: Position): Range | null {
+ if (!isSelector(position)) return null;
+ let selector = position;
switch (selector.type) {
case 'FragmentSelector': {
if (selector.conformsTo !== FragmentSelectorConformsTo.EPUB3) {
@@ -1430,6 +1434,36 @@ class EPUBView extends DOMView {
this.flow.navigateToLastPage();
}
+ getSDTLocation(sdtData: StructuredDocumentText): NavLocation | null {
+ let blockIndex = this.getVisibleBlockIndex(sdtData);
+ return blockIndex === null ? null : { href: '#sdt-' + blockIndex };
+ }
+
+ // Top-level SDT block index for whatever's currently visible, or null.
+ getVisibleBlockIndex(sdtData: StructuredDocumentText | null): number | null {
+ let cfi = this.flow.startCFI?.toString(true);
+ if (!cfi || !sdtData) return null;
+ // Walk back-to-front so we land on the latest block whose anchor's
+ // CFI is contained in the current page CFI range.
+ for (let i = sdtData.content.length - 1; i >= 0; i--) {
+ let block = sdtData.content[i];
+ if (block.artifact || !block.anchor || !('selectorMap' in block.anchor)) {
+ continue;
+ }
+ if (cfiStartsWithSelectorMap(cfi, block.anchor.selectorMap)) {
+ return i;
+ }
+ }
+ return null;
+ }
+
+ navigateToSDTBlock(sdtData: StructuredDocumentText, blockIndex: number) {
+ let block = sdtData.content[blockIndex];
+ if (!block?.anchor || !('selectorMap' in block.anchor)) return;
+ let cfi = 'epubcfi(' + block.anchor.selectorMap + ')';
+ this.navigate({ pageNumber: cfi }, { skipHistory: true, behavior: 'instant' });
+ }
+
canNavigateToPreviousPage() {
return this.flow.canNavigateToPreviousPage();
}
@@ -1572,6 +1606,18 @@ class EPUBView extends DOMView {
}
}
+// Does `cfi` (assertion-free, with the `epubcfi(...)` wrapper) reach into the
+// path described by `selectorMap`? Treats selectorMap as a step-aligned prefix
+// so a different sibling step doesn't accidentally match via substring overlap.
+function cfiStartsWithSelectorMap(cfi: string, selectorMap: string): boolean {
+ let prefix = 'epubcfi(' + selectorMap;
+ if (!cfi.startsWith(prefix)) return false;
+ let next = cfi.charAt(prefix.length);
+ // '/' continues into a deeper step; ':' introduces an offset; ',' starts a
+ // CFI range; ')' closes the wrapper for an exact match.
+ return next === '' || next === '/' || next === ':' || next === ',' || next === ')';
+}
+
type FlowMode = 'paginated' | 'scrolled';
export const enum SpreadMode {
diff --git a/src/dom/sdt/lib/create-position-mapper.ts b/src/dom/sdt/lib/create-position-mapper.ts
new file mode 100644
index 000000000..a6d4b8348
--- /dev/null
+++ b/src/dom/sdt/lib/create-position-mapper.ts
@@ -0,0 +1,30 @@
+import type { StructuredDocumentText } from '../../../../structured-document-text/schema';
+import { PositionIndex, type PositionMapper } from './position-index';
+import { PDFPositionMapper } from './pdf-position-mapper';
+import { SnapshotPositionMapper } from './snapshot-position-mapper';
+import { EPUBPositionMapper } from './epub-position-mapper';
+
+export type ProcessorType = 'pdf' | 'epub' | 'snapshot';
+
+export function createPositionMapper(sdt: StructuredDocumentText): PositionMapper {
+ let index = new PositionIndex(sdt);
+ return createPositionMapperForType(sdt.metadata.processor.type as ProcessorType, index);
+}
+
+export function createEmptyPositionMapper(type: ProcessorType): PositionMapper {
+ let index = new PositionIndex(null);
+ return createPositionMapperForType(type, index);
+}
+
+function createPositionMapperForType(type: ProcessorType, index: PositionIndex): PositionMapper {
+ switch (type) {
+ case 'pdf':
+ return new PDFPositionMapper(index);
+ case 'epub':
+ return new EPUBPositionMapper(index);
+ case 'snapshot':
+ return new SnapshotPositionMapper(index);
+ default:
+ throw new Error(`Unsupported processor type: ${type}`);
+ }
+}
diff --git a/src/dom/sdt/lib/epub-position-mapper.ts b/src/dom/sdt/lib/epub-position-mapper.ts
new file mode 100644
index 000000000..2f73aea56
--- /dev/null
+++ b/src/dom/sdt/lib/epub-position-mapper.ts
@@ -0,0 +1,237 @@
+import type { DomAnchor } from '../../../../structured-document-text/schema';
+import type { AnnotationType, Position, SDTPosition } from '../../../common/types';
+import { isFragment, isTextPosition, type Selector } from '../../common/lib/selector';
+import {
+ expandSelectorMap,
+ resolveSelectorMap,
+ resolveSelectorMapRange,
+} from '../../../../structured-document-text/src/dom/epub/decode';
+import type { PositionIndex, PositionMapper, TextSpanEntry } from './position-index';
+
+interface PathEntry {
+ entry: TextSpanEntry;
+ path: string;
+
+ /** Absolute character start within the block (cumulative across prior text nodes). */
+ absoluteStart: number;
+}
+
+export class EPUBPositionMapper implements PositionMapper {
+ readonly index: PositionIndex;
+
+ /**
+ * All entries with their expanded CFI paths, for text-level matching.
+ * Entries without a valid path are omitted.
+ */
+ private readonly _pathEntries: PathEntry[];
+
+ /**
+ * Entries grouped by block-level CFI path for block-level fallback.
+ * Key is the block anchor's selectorMap.
+ */
+ private readonly _blockPathIndex: Map;
+
+ /** Number of `index.entries` already absorbed into the caches above. */
+ private _processedEntryCount = 0;
+
+ constructor(index: PositionIndex) {
+ this.index = index;
+ this._pathEntries = [];
+ this._blockPathIndex = new Map();
+ this.refresh();
+ }
+
+ // Bring the cached indexes up to date with `index.entries`. Streaming
+ // consumers append entries via index.appendContent(), then call this so
+ // later sourceToSDTPosition() lookups can find newly-loaded blocks.
+ refresh(): void {
+ for (let i = this._processedEntryCount; i < this.index.entries.length; i++) {
+ let entry = this.index.entries[i];
+ let blockAnchor = entry.blockAnchor as DomAnchor | null;
+ if (!blockAnchor) continue;
+
+ let path = this._getExpandedPath(entry);
+ if (path) {
+ let absoluteStart = this.index.computeAbsoluteCharOffset(
+ entry.blockRefPath, entry.textIndex, 0
+ );
+ this._pathEntries.push({ entry, path, absoluteStart });
+ }
+
+ // Block-level index (still useful for block-level CFI matching even
+ // when the entry has no text-node anchor, as in image alt text).
+ let blockPath = blockAnchor.selectorMap;
+ let list = this._blockPathIndex.get(blockPath);
+ if (!list) {
+ list = [];
+ this._blockPathIndex.set(blockPath, list);
+ }
+ if (!list.length || list[list.length - 1] !== entry) {
+ list.push(entry);
+ }
+ }
+ this._processedEntryCount = this.index.entries.length;
+ }
+
+ sdtToSourcePosition(sdtPos: SDTPosition): Position | null {
+ let { startBlockRefPath, startTextIndex, startCharOffset,
+ endBlockRefPath, endTextIndex, endCharOffset } = sdtPos;
+ let startEntry = this.index.findEntry(startBlockRefPath, startTextIndex);
+ let endEntry = (startBlockRefPath === endBlockRefPath && startTextIndex === endTextIndex)
+ ? startEntry
+ : this.index.findEntry(endBlockRefPath, endTextIndex);
+ if (!startEntry || !endEntry) return null;
+
+ let startPath = this._getExpandedPath(startEntry);
+ let endPath = this._getExpandedPath(endEntry);
+ if (!startPath || !endPath) return null;
+
+ if (startPath === endPath) {
+ let adjustedEndOffset = endCharOffset;
+ if (startEntry !== endEntry) {
+ adjustedEndOffset = this._cumulativeOffsetInPath(
+ endEntry, endCharOffset, startEntry, startPath);
+ }
+ let deltaMap = (startEntry.textNode.anchor as DomAnchor | undefined)?.deltaMap;
+ return resolveSelectorMap(startPath, startCharOffset, adjustedEndOffset, deltaMap);
+ }
+
+ let startDeltaMap = (startEntry.textNode.anchor as DomAnchor | undefined)?.deltaMap;
+ let endDeltaMap = (endEntry.textNode.anchor as DomAnchor | undefined)?.deltaMap;
+ return resolveSelectorMapRange(
+ startPath, startCharOffset,
+ endPath, endCharOffset,
+ startDeltaMap, endDeltaMap,
+ );
+ }
+
+ private _cumulativeOffsetInPath(
+ entry: TextSpanEntry, charOffset: number,
+ origin: TextSpanEntry, path: string,
+ ): number {
+ let cumulative = 0;
+ let started = false;
+ for (let e of this.index.entries) {
+ if (e === origin) started = true;
+ if (!started) continue;
+ if (e.blockRefPath !== entry.blockRefPath) continue;
+ if (this._getExpandedPath(e) !== path) continue;
+ if (e === entry) return cumulative + charOffset;
+ cumulative += e.charLength;
+ }
+ return charOffset;
+ }
+
+ transformAnnotationPosition(position: Position, _type: AnnotationType): Position {
+ return position;
+ }
+
+ sourceToSDTPosition(position: Position): SDTPosition | null {
+ let selector = position as Selector;
+ if (!isFragment(selector)) return null;
+
+ let cfiValue = selector.value;
+ let startOffset: number | null = null;
+ let endOffset: number | null = null;
+ if (selector.refinedBy && isTextPosition(selector.refinedBy)) {
+ startOffset = selector.refinedBy.start;
+ endOffset = selector.refinedBy.end;
+ }
+
+ // Try matching at the text-node level using pre-computed paths
+ for (let { entry, path, absoluteStart } of this._pathEntries) {
+ if (!cfiValue.includes(path)) continue;
+
+ if (startOffset === null || endOffset === null) {
+ return {
+ startBlockRefPath: entry.blockRefPath,
+ startTextIndex: entry.textIndex,
+ startCharOffset: 0,
+ endBlockRefPath: entry.blockRefPath,
+ endTextIndex: entry.textIndex,
+ endCharOffset: entry.charLength,
+ };
+ }
+
+ let absoluteEnd = absoluteStart + entry.charLength;
+ if (startOffset < absoluteEnd && endOffset > absoluteStart) {
+ let localStart = Math.max(0, startOffset - absoluteStart);
+ let localEnd = Math.min(entry.charLength, endOffset - absoluteStart);
+ return {
+ startBlockRefPath: entry.blockRefPath,
+ startTextIndex: entry.textIndex,
+ startCharOffset: localStart,
+ endBlockRefPath: entry.blockRefPath,
+ endTextIndex: entry.textIndex,
+ endCharOffset: localEnd,
+ };
+ }
+ }
+
+ // Try matching at the block level
+ for (let [blockPath, blockEntries] of this._blockPathIndex) {
+ if (!cfiValue.includes(blockPath)) continue;
+ if (!blockEntries.length) continue;
+
+ if (startOffset === null || endOffset === null) {
+ let first = blockEntries[0];
+ let last = blockEntries[blockEntries.length - 1];
+ return {
+ startBlockRefPath: first.blockRefPath,
+ startTextIndex: first.textIndex,
+ startCharOffset: 0,
+ endBlockRefPath: last.blockRefPath,
+ endTextIndex: last.textIndex,
+ endCharOffset: last.charLength,
+ };
+ }
+
+ let cumulativeOffsets = [0];
+ for (let i = 0; i < blockEntries.length; i++) {
+ cumulativeOffsets.push(cumulativeOffsets[i] + blockEntries[i].charLength);
+ }
+
+ let startResult = null;
+ let endResult = null;
+ let startLocalOffset = 0;
+ let endLocalOffset = 0;
+
+ for (let i = 0; i < blockEntries.length; i++) {
+ let cumulativeStart = cumulativeOffsets[i];
+ let cumulativeEnd = cumulativeOffsets[i + 1];
+ if (!startResult && startOffset < cumulativeEnd) {
+ startResult = blockEntries[i];
+ startLocalOffset = Math.max(0, startOffset - cumulativeStart);
+ }
+ if (endOffset > cumulativeStart && endOffset <= cumulativeEnd) {
+ endResult = blockEntries[i];
+ endLocalOffset = endOffset - cumulativeStart;
+ }
+ }
+
+ if (startResult && endResult) {
+ return {
+ startBlockRefPath: startResult.blockRefPath,
+ startTextIndex: startResult.textIndex,
+ startCharOffset: startLocalOffset,
+ endBlockRefPath: endResult.blockRefPath,
+ endTextIndex: endResult.textIndex,
+ endCharOffset: endLocalOffset,
+ };
+ }
+ }
+
+ return null;
+ }
+
+ private _getExpandedPath(entry: TextSpanEntry): string | null {
+ let blockAnchor = entry.blockAnchor as DomAnchor | null;
+ if (!blockAnchor) return null;
+ // Without a text-node anchor (e.g., synthetic alt-text entries) the
+ // block path doesn't reach a text node, so it can't carry character
+ // offsets in a CFI. Bail out instead of producing an invalid path.
+ let textAnchor = entry.textNode.anchor as DomAnchor | undefined;
+ if (!textAnchor) return null;
+ return expandSelectorMap(blockAnchor.selectorMap, textAnchor.selectorMap);
+ }
+}
diff --git a/src/dom/sdt/lib/pdf-position-mapper.ts b/src/dom/sdt/lib/pdf-position-mapper.ts
new file mode 100644
index 000000000..53127c72c
--- /dev/null
+++ b/src/dom/sdt/lib/pdf-position-mapper.ts
@@ -0,0 +1,397 @@
+import type { PdfAnchor } from '../../../../structured-document-text/schema';
+import type { Position, PDFPosition, SDTPosition, AnnotationType } from '../../../common/types';
+import { PDF_NOTE_DIMENSIONS } from '../../../common/defines';
+import { parseTextMap, buildRunData } from '../../../../structured-document-text/src/pdf';
+import type { PositionIndex, PositionMapper, TextSpanEntry } from './position-index';
+
+interface RunDatum {
+ rect: number[];
+ pageIndex: number;
+}
+
+/**
+ * Cached per-entry data for fast lookups.
+ */
+interface EntryCache {
+ entry: TextSpanEntry;
+
+ /** Per-character position data, or null if only block-level rects are available. */
+ runData: RunDatum[] | null;
+
+ /** Pages this entry appears on. */
+ pages: Set;
+}
+
+export class PDFPositionMapper implements PositionMapper {
+ readonly index: PositionIndex;
+
+ /** Entries grouped by page index for fast spatial lookups. */
+ private _pageIndex: Map;
+
+ /** All cached entries in document order. */
+ private _cache: EntryCache[];
+
+ constructor(index: PositionIndex) {
+ this.index = index;
+ this._cache = [];
+ this._pageIndex = new Map();
+ this.refresh();
+ }
+
+ refresh() {
+ for (let i = this._cache.length; i < this.index.entries.length; i++) {
+ let entry = this.index.entries[i];
+ let textAnchor = entry.textNode.anchor as PdfAnchor | undefined;
+ let blockAnchor = entry.blockAnchor as PdfAnchor | null;
+ let textMap = textAnchor?.textMap || blockAnchor?.textMap;
+ let anchorPageRects = textAnchor?.pageRects || blockAnchor?.pageRects;
+
+ let runData: RunDatum[] | null = null;
+ let pages = new Set();
+
+ if (textMap) {
+ runData = buildRunData(parseTextMap(textMap));
+ for (let rd of runData) {
+ pages.add(rd.pageIndex);
+ }
+ }
+ else if (anchorPageRects) {
+ for (let pr of anchorPageRects) {
+ pages.add(pr[0]);
+ }
+ }
+
+ let cached: EntryCache = { entry, runData, pages };
+ this._cache.push(cached);
+
+ for (let page of pages) {
+ let list = this._pageIndex.get(page);
+ if (!list) {
+ list = [];
+ this._pageIndex.set(page, list);
+ }
+ list.push(cached);
+ }
+ }
+ }
+
+ sdtToSourcePosition(sdtPos: SDTPosition): Position | null {
+ let { startBlockRefPath, startTextIndex, startCharOffset,
+ endBlockRefPath, endTextIndex, endCharOffset } = sdtPos;
+ let rectsByPage = new Map();
+ let inRange = false;
+
+ for (let { entry, runData } of this._cache) {
+ let isStart = entry.blockRefPath === startBlockRefPath && entry.textIndex === startTextIndex;
+ let isEnd = entry.blockRefPath === endBlockRefPath && entry.textIndex === endTextIndex;
+
+ if (isStart) inRange = true;
+ if (!inRange) continue;
+
+ if (runData) {
+ if (!runData.length) {
+ if (isEnd) break;
+ continue;
+ }
+
+ let charStart = isStart ? startCharOffset : 0;
+ let charEnd = isEnd ? endCharOffset : entry.charLength;
+
+ let runIdx = 0;
+ for (let ci = 0; ci < entry.textNode.text.length && runIdx < runData.length; ci++) {
+ if (isWhitespace(entry.textNode.text[ci])) continue;
+ let rd = runData[runIdx];
+ runIdx++;
+ if (ci >= charStart && ci < charEnd) {
+ let pageRects = rectsByPage.get(rd.pageIndex);
+ if (!pageRects) {
+ pageRects = [];
+ rectsByPage.set(rd.pageIndex, pageRects);
+ }
+ pageRects.push(rd.rect);
+ }
+ }
+ }
+ else {
+ let anchorPageRects = (entry.textNode.anchor as PdfAnchor | undefined)?.pageRects;
+ if (anchorPageRects) {
+ for (let pr of anchorPageRects) {
+ let pageRects = rectsByPage.get(pr[0]);
+ if (!pageRects) {
+ pageRects = [];
+ rectsByPage.set(pr[0], pageRects);
+ }
+ pageRects.push([pr[1], pr[2], pr[3], pr[4]]);
+ }
+ }
+ }
+
+ if (isEnd) break;
+ }
+
+ if (!rectsByPage.size) return null;
+
+ let pages = [...rectsByPage.keys()].sort((a, b) => a - b);
+ let pageIndex = pages[0];
+ let rects = mergeLineRects(rectsByPage.get(pageIndex)!);
+
+ let result: PDFPosition = { pageIndex, rects };
+ if (pages.length > 1) {
+ result.nextPageRects = mergeLineRects(rectsByPage.get(pages[1])!);
+ }
+ return result;
+ }
+
+ transformAnnotationPosition(position: Position, type: AnnotationType): Position {
+ if (type !== 'note') {
+ return position;
+ }
+
+ // Move note into a rect at the top-right
+ let pos = position as PDFPosition;
+ if (!pos.rects?.length) return pos;
+ let right = -Infinity;
+ let top = Infinity;
+ for (let rect of pos.rects) {
+ right = Math.max(right, rect[2]);
+ top = Math.min(top, rect[1]);
+ }
+ return {
+ pageIndex: pos.pageIndex,
+ rects: [[
+ right - PDF_NOTE_DIMENSIONS,
+ top,
+ right,
+ top + PDF_NOTE_DIMENSIONS,
+ ]],
+ };
+ }
+
+ sourceToSDTPosition(position: Position): SDTPosition | null {
+ let pos = position as PDFPosition;
+ if (pos.pageIndex === undefined || !pos.rects?.length) return null;
+
+ let targetPages = [pos.pageIndex];
+ if (pos.nextPageRects) {
+ targetPages.push(pos.pageIndex + 1);
+ }
+ let allTargets: { pageIndex: number; rects: number[][] }[] = [
+ { pageIndex: pos.pageIndex, rects: pos.rects },
+ ];
+ if (pos.nextPageRects) {
+ allTargets.push({ pageIndex: pos.pageIndex + 1, rects: pos.nextPageRects });
+ }
+
+ // Only check entries on the target pages
+ let candidates = new Set();
+ for (let page of targetPages) {
+ let pageEntries = this._pageIndex.get(page);
+ if (pageEntries) {
+ for (let c of pageEntries) {
+ candidates.add(c);
+ }
+ }
+ }
+
+ let startResult: { blockRefPath: string; textIndex: number; charOffset: number } | null = null;
+ let endResult: { blockRefPath: string; textIndex: number; charOffset: number } | null = null;
+ let hadMatch = false;
+ for (let cached of this._cache) {
+ if (!candidates.has(cached)) {
+ if (hadMatch) break;
+ continue;
+ }
+
+ let { entry, runData } = cached;
+ let matchedInThisEntry = false;
+
+ if (runData?.length) {
+ let runIdx = 0;
+ for (let ci = 0; ci < entry.textNode.text.length && runIdx < runData.length; ci++) {
+ if (isWhitespace(entry.textNode.text[ci])) continue;
+ let rd = runData[runIdx];
+ runIdx++;
+ for (let target of allTargets) {
+ if (rd.pageIndex !== target.pageIndex) continue;
+ for (let targetRect of target.rects) {
+ if (charRectInLineRect(rd.rect, targetRect)) {
+ matchedInThisEntry = true;
+ if (!startResult) {
+ startResult = {
+ blockRefPath: entry.blockRefPath,
+ textIndex: entry.textIndex,
+ charOffset: ci,
+ };
+ }
+ endResult = {
+ blockRefPath: entry.blockRefPath,
+ textIndex: entry.textIndex,
+ charOffset: ci + 1,
+ };
+ }
+ }
+ }
+ }
+ }
+ else {
+ let anchorPageRects = (entry.textNode.anchor as PdfAnchor | undefined)?.pageRects;
+ if (anchorPageRects) {
+ for (let pr of anchorPageRects) {
+ let blockRect = [pr[1], pr[2], pr[3], pr[4]];
+ for (let target of allTargets) {
+ if (pr[0] !== target.pageIndex) continue;
+ for (let targetRect of target.rects) {
+ if (rectsOverlap(blockRect, targetRect)) {
+ matchedInThisEntry = true;
+ if (!startResult) {
+ startResult = {
+ blockRefPath: entry.blockRefPath,
+ textIndex: entry.textIndex,
+ charOffset: 0,
+ };
+ }
+ endResult = {
+ blockRefPath: entry.blockRefPath,
+ textIndex: entry.textIndex,
+ charOffset: entry.charLength,
+ };
+ }
+ }
+ }
+ }
+ }
+ else {
+ // No per-text-node positioning info (typically a whitespace
+ // span). Skip without breaking, so a run of matches can span
+ // across these "transparent" entries.
+ continue;
+ }
+ }
+
+ if (hadMatch && !matchedInThisEntry) break;
+ if (matchedInThisEntry) hadMatch = true;
+ }
+
+ if (!startResult || !endResult) {
+ return this._findNearestBlock(pos);
+ }
+ return {
+ startBlockRefPath: startResult.blockRefPath,
+ startTextIndex: startResult.textIndex,
+ startCharOffset: startResult.charOffset,
+ endBlockRefPath: endResult.blockRefPath,
+ endTextIndex: endResult.textIndex,
+ endCharOffset: endResult.charOffset,
+ };
+ }
+
+ /**
+ * Find the block whose vertical center is closest to the position's
+ * vertical center on the same page.
+ */
+ private _findNearestBlock(pos: PDFPosition): SDTPosition | null {
+ let targetRect = pos.rects![0];
+ let targetY = (targetRect[1] + targetRect[3]) / 2;
+ let pageEntries = this._pageIndex.get(pos.pageIndex);
+ if (!pageEntries) return null;
+
+ let seenBlocks = new Map();
+
+ for (let { entry, runData } of pageEntries) {
+ let blockMinY = Infinity;
+ let blockMaxY = -Infinity;
+
+ if (runData) {
+ for (let rd of runData) {
+ if (rd.pageIndex === pos.pageIndex) {
+ blockMinY = Math.min(blockMinY, rd.rect[1]);
+ blockMaxY = Math.max(blockMaxY, rd.rect[3]);
+ }
+ }
+ }
+ else {
+ let anchorPageRects = (entry.textNode.anchor as PdfAnchor | undefined)?.pageRects
+ || (entry.blockAnchor as PdfAnchor | null)?.pageRects;
+ if (anchorPageRects) {
+ for (let pr of anchorPageRects) {
+ if (pr[0] === pos.pageIndex) {
+ blockMinY = Math.min(blockMinY, pr[2]);
+ blockMaxY = Math.max(blockMaxY, pr[4]);
+ }
+ }
+ }
+ }
+
+ if (blockMinY === Infinity) continue;
+
+ let existing = seenBlocks.get(entry.blockRefPath);
+ if (existing) {
+ existing.minY = Math.min(existing.minY, blockMinY);
+ existing.maxY = Math.max(existing.maxY, blockMaxY);
+ }
+ else {
+ seenBlocks.set(entry.blockRefPath, { minY: blockMinY, maxY: blockMaxY, entry });
+ }
+ }
+
+ let bestEntry: TextSpanEntry | null = null;
+ let bestDist = Infinity;
+ for (let [, block] of seenBlocks) {
+ let dist = Math.abs((block.minY + block.maxY) / 2 - targetY);
+ if (dist < bestDist) {
+ bestDist = dist;
+ bestEntry = block.entry;
+ }
+ }
+
+ if (!bestEntry) return null;
+ let blockEntries = this.index.getBlockEntries(bestEntry.blockRefPath);
+ let first = blockEntries[0];
+ let last = blockEntries[blockEntries.length - 1];
+ return {
+ startBlockRefPath: first.blockRefPath,
+ startTextIndex: first.textIndex,
+ startCharOffset: 0,
+ endBlockRefPath: last.blockRefPath,
+ endTextIndex: last.textIndex,
+ endCharOffset: last.charLength,
+ };
+ }
+}
+
+function isWhitespace(char: string): boolean {
+ return char === ' ' || char === '\t' || char === '\n' || char === '\r'
+ || char === '\u00A0' || char === '\u200B';
+}
+
+function charRectInLineRect(charRect: number[], lineRect: number[]): boolean {
+ let charCenterX = (charRect[0] + charRect[2]) / 2;
+ let charCenterY = (charRect[1] + charRect[3]) / 2;
+ return charCenterX >= lineRect[0] && charCenterX <= lineRect[2]
+ && charCenterY >= lineRect[1] && charCenterY <= lineRect[3];
+}
+
+function rectsOverlap(a: number[], b: number[]): boolean {
+ return a[0] < b[2] && a[2] > b[0] && a[1] < b[3] && a[3] > b[1];
+}
+
+function mergeLineRects(rects: number[][]): number[][] {
+ if (!rects.length) return [];
+ rects = rects.slice().sort((a, b) => a[1] - b[1] || a[0] - b[0]);
+ let merged: number[][] = [rects[0].slice()];
+ for (let i = 1; i < rects.length; i++) {
+ let last = merged[merged.length - 1];
+ let rect = rects[i];
+ let lastMidY = (last[1] + last[3]) / 2;
+ if (rect[1] <= lastMidY && rect[3] >= lastMidY) {
+ last[0] = Math.min(last[0], rect[0]);
+ last[1] = Math.min(last[1], rect[1]);
+ last[2] = Math.max(last[2], rect[2]);
+ last[3] = Math.max(last[3], rect[3]);
+ }
+ else {
+ merged.push(rect.slice());
+ }
+ }
+ return merged;
+}
diff --git a/src/dom/sdt/lib/position-index.ts b/src/dom/sdt/lib/position-index.ts
new file mode 100644
index 000000000..23814129c
--- /dev/null
+++ b/src/dom/sdt/lib/position-index.ts
@@ -0,0 +1,154 @@
+import type {
+ StructuredDocumentText,
+ ContentBlockNode,
+ TextNode,
+ ListItemNode,
+ TableRowNode,
+ Anchor,
+} from '../../../../structured-document-text/schema';
+import type { AnnotationType, Position, SDTPosition } from '../../../common/types';
+import { isContentBlockNode, isTextNodeArray } from './utilities';
+
+/**
+ * A single text span entry in the position index.
+ */
+export interface TextSpanEntry {
+ blockRefPath: string;
+ textIndex: number;
+ textNode: TextNode;
+ blockAnchor: Anchor | null;
+ charLength: number;
+}
+
+/**
+ * Converts between SDT DOM positions and source-format positions.
+ */
+export interface PositionMapper {
+ readonly index: PositionIndex;
+
+ sdtToSourcePosition(sdtPos: SDTPosition): Position | null;
+
+ sourceToSDTPosition(position: Position): SDTPosition | null;
+
+ transformAnnotationPosition(position: Position, type: AnnotationType): Position;
+
+ // Required by streaming consumers after PositionIndex.appendContent.
+ refresh(): void;
+}
+
+/**
+ * Generic index of SDT text spans.
+ *
+ * For streaming SDT, pass null to start empty, then call appendContent()
+ * per chunk with the chunk's global content-index offset.
+ */
+export class PositionIndex {
+ private _entries: TextSpanEntry[] = [];
+
+ constructor(sdt: StructuredDocumentText | null) {
+ if (sdt) {
+ this._buildIndex(sdt.content, 0);
+ }
+ }
+
+ get entries(): readonly TextSpanEntry[] {
+ return this._entries;
+ }
+
+ appendContent(content: ContentBlockNode[], baseIndex: number) {
+ this._buildIndex(content, baseIndex);
+ }
+
+ findEntry(blockRefPath: string, textIndex: number): TextSpanEntry | null {
+ return this._entries.find(
+ e => e.blockRefPath === blockRefPath && e.textIndex === textIndex
+ ) || null;
+ }
+
+ getBlockEntries(blockRefPath: string): TextSpanEntry[] {
+ return this._entries.filter(e => e.blockRefPath === blockRefPath);
+ }
+
+ /**
+ * Compute the absolute character offset of a position within a block,
+ * accumulating across prior text nodes.
+ */
+ computeAbsoluteCharOffset(blockRefPath: string, textIndex: number, charOffset: number): number {
+ let cumulative = 0;
+ for (let entry of this._entries) {
+ if (entry.blockRefPath !== blockRefPath) continue;
+ if (entry.textIndex === textIndex) {
+ return cumulative + charOffset;
+ }
+ cumulative += entry.charLength;
+ }
+ return charOffset;
+ }
+
+ private _buildIndex(content: ContentBlockNode[], baseIndex: number) {
+ for (let [i, block] of content.entries()) {
+ if (block.artifact) continue;
+ this._walkBlock(block, String(baseIndex + i));
+ }
+ }
+
+ private _walkBlock(block: ContentBlockNode, refPath: string) {
+ let content = block.content;
+ if (!content || content.length === 0) return;
+
+ if (isTextNodeArray(content)) {
+ this._addTextEntries(content, refPath, block.anchor || null);
+ return;
+ }
+
+ switch (block.type) {
+ case 'list':
+ for (let [i, item] of (block.content as ListItemNode[]).entries()) {
+ this._walkListItem(item, `${refPath}.${i}`);
+ }
+ break;
+ case 'table':
+ for (let [i, row] of (block.content as TableRowNode[]).entries()) {
+ for (let [j, cell] of row.content.entries()) {
+ for (let [k, cellBlock] of cell.content.entries()) {
+ this._walkBlock(cellBlock, `${refPath}.${i}.${j}.${k}`);
+ }
+ }
+ }
+ break;
+ default:
+ for (let [i, child] of content.entries()) {
+ if (isContentBlockNode(child)) {
+ this._walkBlock(child, `${refPath}.${i}`);
+ }
+ }
+ break;
+ }
+ }
+
+ private _walkListItem(item: ListItemNode, refPath: string) {
+ if (!item.content || item.content.length === 0) return;
+ if (item.artifact) return;
+
+ if (isTextNodeArray(item.content)) {
+ this._addTextEntries(item.content, refPath, item.anchor || null);
+ }
+ else {
+ for (let [i, child] of (item.content as ContentBlockNode[]).entries()) {
+ this._walkBlock(child, `${refPath}.${i}`);
+ }
+ }
+ }
+
+ private _addTextEntries(textNodes: TextNode[], refPath: string, blockAnchor: Anchor | null) {
+ for (let [i, textNode] of textNodes.entries()) {
+ this._entries.push({
+ blockRefPath: refPath,
+ textIndex: i,
+ textNode,
+ blockAnchor,
+ charLength: textNode.text.length,
+ });
+ }
+ }
+}
diff --git a/src/dom/sdt/lib/renderer.ts b/src/dom/sdt/lib/renderer.ts
new file mode 100644
index 000000000..29d96368e
--- /dev/null
+++ b/src/dom/sdt/lib/renderer.ts
@@ -0,0 +1,237 @@
+import type {
+ StructuredDocumentText,
+ ContentBlockNode,
+ TextNode,
+ ListNode,
+ ListItemNode,
+ TableNode,
+ BlockquoteNode,
+ RefPath,
+} from '../../../../structured-document-text/schema';
+import { isTextNodeArray } from './utilities';
+
+/**
+ * Convert a RefPath array to a dotted string for use as element ID / data attribute.
+ */
+function refPathToString(ref: RefPath): string {
+ return ref.join('.');
+}
+
+/**
+ * Render a SDT document to semantic HTML.
+ *
+ * Each block element gets `data-ref-path` encoding its path into the SDT content tree.
+ * Each inline text span gets `data-text-index` identifying which TextNode it came from.
+ * Blocks/text nodes with backRefs get an `id` for internal linking.
+ * Text nodes with refs get wrapped in `` links to the target.
+ */
+export function renderSDT(sdt: StructuredDocumentText, doc: Document): HTMLElement {
+ let container = doc.createElement('article');
+ container.id = 'sdt-content';
+ for (let [i, block] of sdt.content.entries()) {
+ if (block.artifact) continue;
+ let el = renderBlock(doc, block, String(i));
+ if (el) {
+ container.append(el);
+ }
+ }
+ return container;
+}
+
+function renderBlock(doc: Document, block: ContentBlockNode, refPath: string): HTMLElement | null {
+ let el: HTMLElement;
+ switch (block.type) {
+ case 'paragraph':
+ el = doc.createElement('p');
+ el.append(renderTextNodes(doc, block.content, refPath));
+ break;
+ case 'heading':
+ el = doc.createElement('h2');
+ el.append(renderTextNodes(doc, block.content, refPath));
+ break;
+ case 'math':
+ el = doc.createElement('div');
+ el.className = 'sdt-math';
+ el.append(renderTextNodes(doc, block.content, refPath));
+ break;
+ case 'image':
+ el = doc.createElement('figure');
+ el.className = 'sdt-image';
+ if (block.content.length) {
+ el.append(renderTextNodes(doc, block.content, refPath));
+ }
+ break;
+ case 'caption':
+ el = doc.createElement('figcaption');
+ el.append(renderTextNodes(doc, block.content, refPath));
+ break;
+ case 'note':
+ el = doc.createElement('aside');
+ el.className = 'sdt-note';
+ el.append(renderTextNodes(doc, block.content, refPath));
+ break;
+ case 'preformatted':
+ el = doc.createElement('pre');
+ el.append(renderTextNodes(doc, block.content, refPath));
+ break;
+ case 'blockquote':
+ el = renderBlockquote(doc, block, refPath);
+ break;
+ case 'list':
+ el = renderList(doc, block, refPath);
+ break;
+ case 'table':
+ el = renderTable(doc, block, refPath);
+ break;
+ default:
+ return null;
+ }
+ el.dataset.refPath = refPath;
+ el.id = 'sdt-' + refPath;
+ if ('reference' in block && block.reference) {
+ el.classList.add('sdt-reference');
+ }
+ if (block.backRefs?.length) {
+ el.dataset.backRefs = block.backRefs.map(refPathToString).join(' ');
+ }
+ return el;
+}
+
+function renderBlockquote(doc: Document, block: BlockquoteNode, refPath: string): HTMLElement {
+ let el = doc.createElement('blockquote');
+ for (let [i, child] of block.content.entries()) {
+ if (child.type) {
+ let childEl = renderBlock(doc, child, `${refPath}.${i}`);
+ if (childEl) el.append(childEl);
+ }
+ }
+ return el;
+}
+
+function renderList(doc: Document, block: ListNode, refPath: string): HTMLElement {
+ let el = doc.createElement(block.ordered ? 'ol' : 'ul');
+ if (block.ordered && block.startIndex && block.startIndex !== 1) {
+ (el as HTMLOListElement).start = block.startIndex;
+ }
+ for (let [i, item] of block.content.entries()) {
+ let li = renderListItem(doc, item, `${refPath}.${i}`);
+ el.append(li);
+ }
+ return el;
+}
+
+function renderListItem(doc: Document, item: ListItemNode, refPath: string): HTMLElement {
+ let li = doc.createElement('li');
+ li.dataset.refPath = refPath;
+ li.id = 'sdt-' + refPath;
+ if (item.reference) {
+ li.classList.add('sdt-reference');
+ }
+ if (item.backRefs?.length) {
+ li.dataset.backRefs = item.backRefs.map(refPathToString).join(' ');
+ }
+ let content = item.content;
+ if (content.length === 0) return li;
+ if (isTextNodeArray(content)) {
+ li.append(renderTextNodes(doc, content, refPath));
+ }
+ else {
+ for (let [i, child] of content.entries()) {
+ let childEl = renderBlock(doc, child, `${refPath}.${i}`);
+ if (childEl) li.append(childEl);
+ }
+ }
+ return li;
+}
+
+function renderTable(doc: Document, block: TableNode, refPath: string): HTMLElement {
+ let table = doc.createElement('table');
+ let content = block.content;
+ if (content.length === 0) return table;
+ if (isTextNodeArray(content)) {
+ let td = doc.createElement('td');
+ td.append(renderTextNodes(doc, content, refPath));
+ let tr = doc.createElement('tr');
+ tr.append(td);
+ let tbody = doc.createElement('tbody');
+ tbody.append(tr);
+ table.append(tbody);
+ }
+ else {
+ let tbody = doc.createElement('tbody');
+ for (let [i, row] of content.entries()) {
+ let tr = doc.createElement('tr');
+ tr.dataset.refPath = `${refPath}.${i}`;
+ for (let [j, cell] of row.content.entries()) {
+ let td = doc.createElement(cell.header ? 'th' : 'td');
+ td.dataset.refPath = `${refPath}.${i}.${j}`;
+ if (cell.colspan && cell.colspan > 1) td.colSpan = cell.colspan;
+ if (cell.rowspan && cell.rowspan > 1) td.rowSpan = cell.rowspan;
+ for (let [k, child] of cell.content.entries()) {
+ let childEl = renderBlock(doc, child, `${refPath}.${i}.${j}.${k}`);
+ if (childEl) td.append(childEl);
+ }
+ tr.append(td);
+ }
+ tbody.append(tr);
+ }
+ table.append(tbody);
+ }
+ return table;
+}
+
+function renderTextNodes(doc: Document, textNodes: TextNode[], parentRefPath: string): DocumentFragment {
+ let frag = doc.createDocumentFragment();
+ for (let [i, textNode] of textNodes.entries()) {
+ frag.append(renderTextNode(doc, textNode, parentRefPath, i));
+ }
+ return frag;
+}
+
+function renderTextNode(doc: Document, textNode: TextNode, _parentRefPath: string, index: number): Node {
+ function wrapIn(node: Node, tagName: string): HTMLElement {
+ let wrapper = doc.createElement(tagName);
+ wrapper.append(node);
+ return wrapper;
+ }
+
+ let text = textNode.text;
+ let node: Node = doc.createTextNode(text);
+
+ // Apply inline styles by wrapping in elements (innermost first)
+ let style = textNode.style;
+ if (style) {
+ if (style.monospace) node = wrapIn(node, 'code');
+ if (style.sub) node = wrapIn(node, 'sub');
+ if (style.sup) node = wrapIn(node, 'sup');
+ if (style.italic) node = wrapIn(node, 'em');
+ if (style.bold) node = wrapIn(node, 'strong');
+ }
+
+ // Wrap in link — external URL or internal ref
+ if (textNode.target?.url) {
+ let a = doc.createElement('a');
+ a.href = textNode.target.url;
+ a.append(node);
+ node = a;
+ }
+ else if (textNode.refs?.length) {
+ let a = doc.createElement('a');
+ a.href = '#sdt-' + refPathToString(textNode.refs[0]);
+ a.className = 'sdt-ref';
+ if (textNode.refs.length > 1) {
+ a.dataset.refs = textNode.refs.map(refPathToString).join(' ');
+ }
+ a.append(node);
+ node = a;
+ }
+
+ // Wrap in a span with data attributes for position tracking
+ let span = doc.createElement('span');
+ span.dataset.textIndex = String(index);
+ if (textNode.backRefs?.length) {
+ span.dataset.backRefs = textNode.backRefs.map(refPathToString).join(' ');
+ }
+ span.append(node);
+ return span;
+}
diff --git a/src/dom/sdt/lib/snapshot-position-mapper.ts b/src/dom/sdt/lib/snapshot-position-mapper.ts
new file mode 100644
index 000000000..6edd6f007
--- /dev/null
+++ b/src/dom/sdt/lib/snapshot-position-mapper.ts
@@ -0,0 +1,214 @@
+import type { DomAnchor } from '../../../../structured-document-text/schema';
+import type { AnnotationType, Position, SDTPosition } from '../../../common/types';
+import { isCss, isTextPosition, type Selector } from '../../common/lib/selector';
+import { expandSelectorMap, parseSelectorMap, resolveSelectorMap } from '../../../../structured-document-text/src/dom/snapshot/decode';
+import type { PositionIndex, PositionMapper, TextSpanEntry } from './position-index';
+
+interface SelectorEntry {
+ entry: TextSpanEntry;
+
+ /** The CSS selector string (without offset). */
+ selector: string;
+
+ /** Character offset of this entry within the selector's text. */
+ offset: number;
+}
+
+export class SnapshotPositionMapper implements PositionMapper {
+ readonly index: PositionIndex;
+
+ /** Entries grouped by their CSS selector string for O(1) lookup. */
+ private _selectorIndex: Map;
+
+ /** Entries grouped by their block-level selector for block fallback. */
+ private _blockSelectorIndex: Map;
+
+ /** Number of `index.entries` already absorbed into the caches above. */
+ private _processedEntryCount = 0;
+
+ constructor(index: PositionIndex) {
+ this.index = index;
+ this._selectorIndex = new Map();
+ this._blockSelectorIndex = new Map();
+ this.refresh();
+ }
+
+ // Bring the cached indexes up to date with `index.entries`. Streaming
+ // consumers append entries via index.appendContent(), then call this so
+ // later sourceToSDTPosition() lookups can find newly-loaded blocks.
+ refresh(): void {
+ for (let i = this._processedEntryCount; i < this.index.entries.length; i++) {
+ let entry = this.index.entries[i];
+ let blockAnchor = entry.blockAnchor as DomAnchor | null;
+ if (!blockAnchor) continue;
+
+ let textAnchor = entry.textNode.anchor as DomAnchor | undefined;
+ let expandedMap: string;
+ if (textAnchor) {
+ expandedMap = expandSelectorMap(blockAnchor.selectorMap, textAnchor.selectorMap);
+ }
+ else {
+ expandedMap = blockAnchor.selectorMap;
+ }
+
+ let { selector, offset } = parseSelectorMap(expandedMap);
+
+ let list = this._selectorIndex.get(selector);
+ if (!list) {
+ list = [];
+ this._selectorIndex.set(selector, list);
+ }
+ list.push({ entry, selector, offset });
+
+ // Block-level index
+ let blockSelector = blockAnchor.selectorMap;
+ let blockList = this._blockSelectorIndex.get(blockSelector);
+ if (!blockList) {
+ blockList = [];
+ this._blockSelectorIndex.set(blockSelector, blockList);
+ }
+ if (!blockList.length || blockList[blockList.length - 1].blockRefPath !== entry.blockRefPath
+ || blockList[blockList.length - 1].textIndex !== entry.textIndex) {
+ blockList.push(entry);
+ }
+ }
+ this._processedEntryCount = this.index.entries.length;
+ }
+
+ sdtToSourcePosition(sdtPos: SDTPosition): Position | null {
+ let { startBlockRefPath, startTextIndex, startCharOffset,
+ endBlockRefPath, endTextIndex, endCharOffset } = sdtPos;
+ let startEntry = this.index.findEntry(startBlockRefPath, startTextIndex);
+ if (!startEntry) return null;
+
+ let blockAnchor = startEntry.blockAnchor as DomAnchor | null;
+ let textAnchor = startEntry.textNode.anchor as DomAnchor | undefined;
+ if (!blockAnchor) return null;
+
+ let selectorMap: string;
+ if (textAnchor) {
+ selectorMap = expandSelectorMap(blockAnchor.selectorMap, textAnchor.selectorMap);
+ }
+ else {
+ selectorMap = blockAnchor.selectorMap;
+ }
+
+ // When both endpoints are in the same text node, the offsets are already
+ // relative to the selectorMap's element
+ if (startBlockRefPath === endBlockRefPath && startTextIndex === endTextIndex) {
+ return resolveSelectorMap(selectorMap, startCharOffset, endCharOffset, textAnchor?.deltaMap);
+ }
+
+ let startAbsOffset = this.index.computeAbsoluteCharOffset(startBlockRefPath, startTextIndex, startCharOffset);
+ let endAbsOffset = this.index.computeAbsoluteCharOffset(endBlockRefPath, endTextIndex, endCharOffset);
+
+ if (startBlockRefPath === endBlockRefPath) {
+ return resolveSelectorMap(selectorMap, startAbsOffset, endAbsOffset, textAnchor?.deltaMap);
+ }
+
+ return resolveSelectorMap(selectorMap, startCharOffset, endCharOffset, textAnchor?.deltaMap);
+ }
+
+ transformAnnotationPosition(position: Position, _type: AnnotationType): Position {
+ return position;
+ }
+
+ sourceToSDTPosition(position: Position): SDTPosition | null {
+ let selector = position as Selector;
+ if (!isCss(selector)) return null;
+
+ let selectorValue = selector.value;
+ let startOffset: number | null = null;
+ let endOffset: number | null = null;
+ if (selector.refinedBy && isTextPosition(selector.refinedBy)) {
+ startOffset = selector.refinedBy.start;
+ endOffset = selector.refinedBy.end;
+ }
+
+ // Try matching at the text-node level
+ let entries = this._selectorIndex.get(selectorValue);
+ if (entries) {
+ for (let { entry, offset: entryOffset } of entries) {
+ if (startOffset === null || endOffset === null) {
+ return {
+ startBlockRefPath: entry.blockRefPath,
+ startTextIndex: entry.textIndex,
+ startCharOffset: 0,
+ endBlockRefPath: entry.blockRefPath,
+ endTextIndex: entry.textIndex,
+ endCharOffset: entry.charLength,
+ };
+ }
+
+ let entryStart = entryOffset;
+ let entryEnd = entryOffset + entry.charLength;
+
+ if (startOffset < entryEnd && endOffset > entryStart) {
+ let localStart = Math.max(0, startOffset - entryStart);
+ let localEnd = Math.min(entry.charLength, endOffset - entryStart);
+ return {
+ startBlockRefPath: entry.blockRefPath,
+ startTextIndex: entry.textIndex,
+ startCharOffset: localStart,
+ endBlockRefPath: entry.blockRefPath,
+ endTextIndex: entry.textIndex,
+ endCharOffset: localEnd,
+ };
+ }
+ }
+ }
+
+ // Try matching at the block level
+ let blockEntries = this._blockSelectorIndex.get(selectorValue);
+ if (blockEntries?.length) {
+ let cumulativeOffsets = [0];
+ for (let i = 0; i < blockEntries.length; i++) {
+ cumulativeOffsets.push(cumulativeOffsets[i] + blockEntries[i].charLength);
+ }
+
+ if (startOffset === null || endOffset === null) {
+ let first = blockEntries[0];
+ let last = blockEntries[blockEntries.length - 1];
+ return {
+ startBlockRefPath: first.blockRefPath,
+ startTextIndex: first.textIndex,
+ startCharOffset: 0,
+ endBlockRefPath: last.blockRefPath,
+ endTextIndex: last.textIndex,
+ endCharOffset: last.charLength,
+ };
+ }
+
+ let startEntry = null;
+ let endEntry = null;
+ let startLocalOffset = 0;
+ let endLocalOffset = 0;
+
+ for (let i = 0; i < blockEntries.length; i++) {
+ let cumulativeStart = cumulativeOffsets[i];
+ let cumulativeEnd = cumulativeOffsets[i + 1];
+ if (!startEntry && startOffset < cumulativeEnd) {
+ startEntry = blockEntries[i];
+ startLocalOffset = Math.max(0, startOffset - cumulativeStart);
+ }
+ if (endOffset > cumulativeStart && endOffset <= cumulativeEnd) {
+ endEntry = blockEntries[i];
+ endLocalOffset = endOffset - cumulativeStart;
+ }
+ }
+
+ if (startEntry && endEntry) {
+ return {
+ startBlockRefPath: startEntry.blockRefPath,
+ startTextIndex: startEntry.textIndex,
+ startCharOffset: startLocalOffset,
+ endBlockRefPath: endEntry.blockRefPath,
+ endTextIndex: endEntry.textIndex,
+ endCharOffset: endLocalOffset,
+ };
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/src/dom/sdt/lib/utilities.ts b/src/dom/sdt/lib/utilities.ts
new file mode 100644
index 000000000..60d43414d
--- /dev/null
+++ b/src/dom/sdt/lib/utilities.ts
@@ -0,0 +1,9 @@
+import { ContentBlockNode, ListItemNode, TableRowNode, TextNode } from '../../../../structured-document-text/schema';
+
+export function isTextNodeArray(content: unknown[]): content is TextNode[] {
+ return content.length > 0 && 'text' in (content[0] as TextNode | never);
+}
+
+export function isContentBlockNode(node: ContentBlockNode | ListItemNode | TableRowNode): node is ContentBlockNode {
+ return node.type !== 'listitem' && node.type !== 'tablerow';
+}
diff --git a/src/dom/sdt/sdt-view.ts b/src/dom/sdt/sdt-view.ts
new file mode 100644
index 000000000..f3cc8d596
--- /dev/null
+++ b/src/dom/sdt/sdt-view.ts
@@ -0,0 +1,564 @@
+import {
+ AnnotationType,
+ WADMAnnotation,
+ FindState,
+ isSDTPosition,
+ NavLocation,
+ NewAnnotation,
+ Position,
+ ViewStats,
+ OutlineItem,
+ SDTPosition,
+} from "../../common/types";
+import {
+ getBoundingPageRect,
+ getInnerText,
+ getStartElement,
+} from "../common/lib/range";
+import {
+ Selector,
+} from "../common/lib/selector";
+import DOMView, {
+ DOMViewState,
+ NavigateOptions,
+} from "../common/dom-view";
+import {
+ closestElement,
+ getVisibleTextNodes,
+} from "../common/lib/nodes";
+import DefaultFindProcessor, { createSearchContext } from "../common/lib/find";
+import { isPageRectVisible } from "../common/lib/rect";
+import { scrollIntoView } from "../common/lib/scroll-into-view";
+import { isSafari } from "../../common/lib/utilities";
+import { renderSDT } from "./lib/renderer";
+import { type PositionMapper } from "./lib/position-index";
+import { createPositionMapper } from "./lib/create-position-mapper";
+import sdtSCSS from './stylesheets/sdt.scss';
+import type { StructuredDocumentText } from '../../../structured-document-text/schema';
+
+export interface SDTViewData {
+ sdt: StructuredDocumentText;
+ getSourceAnnotationMeta: (position: Position) => { sortIndex: string; pageLabel: string } | null;
+ syncBaseView: (blockIndex: number) => void;
+}
+
+class SDTView extends DOMView {
+ protected _find: DefaultFindProcessor | null = null;
+
+ private _sdt!: StructuredDocumentText;
+
+ private _positionMapper!: PositionMapper;
+
+ private get _searchContext() {
+ let searchContext = createSearchContext(getVisibleTextNodes(this._iframeDocument.body));
+ Object.defineProperty(this, '_searchContext', { value: searchContext });
+ return searchContext;
+ }
+
+ protected override async _getSrcDoc() {
+ return '';
+ }
+
+ override getData(): SDTViewData {
+ return this._options.data;
+ }
+
+ override get lang(): string {
+ let props = this._sdt.metadata?.source?.properties as Record | undefined;
+ let lang = props?.language || props?.Language || props?.['dc:language'];
+ if (typeof lang === 'string' && lang) {
+ return lang.split('-')[0];
+ }
+ return 'en';
+ }
+
+ protected override async _handleViewCreated(viewState: Partial) {
+ this._sdt = this._options.data.sdt;
+
+ // Render SDT content into the iframe body
+ let content = renderSDT(this._sdt, this._iframeDocument);
+ this._iframeDocument.body.append(content);
+
+ // Build position index for source-format annotation mapping
+ this._positionMapper = this._createPositionMapper();
+
+ // Inject SDT stylesheet
+ let style = this._iframeDocument.createElement('style');
+ style.textContent = sdtSCSS;
+ this._iframeDocument.head.append(style);
+
+ await super._handleViewCreated(viewState);
+
+ this._setScale(viewState.scale ?? 1);
+
+ // Build outline
+ this._initOutline();
+
+ if (this._options.location) {
+ this.navigate(this._options.location, { behavior: 'instant' });
+ }
+ }
+
+ getVisibleBlockIndex(): number | null {
+ let blocks = this._iframeDocument.querySelectorAll('#sdt-content > [data-ref-path]');
+ let bestRefPath: string | null = null;
+ let bestDist = Infinity;
+ for (let block of blocks) {
+ let rect = block.getBoundingClientRect();
+ if (rect.bottom < 0) continue;
+ let dist = Math.abs(rect.top);
+ if (dist < bestDist) {
+ bestDist = dist;
+ bestRefPath = (block as HTMLElement).dataset.refPath ?? null;
+ }
+ if (rect.top > 200) break;
+ }
+ if (!bestRefPath) return null;
+ return parseInt(bestRefPath.split('.')[0]);
+ }
+
+ private _createPositionMapper(): PositionMapper {
+ return createPositionMapper(this._sdt);
+ }
+
+ private _initOutline() {
+ if (!this._sdt.catalog?.outline?.length) return;
+ let outline = this._convertOutline(this._sdt.catalog.outline);
+ this._options.onSetOutline(outline);
+ }
+
+ private _convertOutline(items: StructuredDocumentText['catalog']['outline']): OutlineItem[] {
+ if (!items) return [];
+ return items.map((item) => {
+ let location: NavLocation = {};
+ if (item.ref) {
+ location.href = '#sdt-' + item.ref.join('.');
+ }
+ return {
+ title: item.title,
+ location,
+ items: item.children ? this._convertOutline(item.children) : undefined,
+ };
+ });
+ }
+
+ // Annotation methods
+
+ override getAnnotationFromRange(range: Range, type: AnnotationType, color?: string): NewAnnotation | null {
+ if (range.collapsed) {
+ return null;
+ }
+
+ let text = getInnerText(range);
+ if (!text.trim().length) {
+ return null;
+ }
+
+ let selector = this.toSelector(range);
+ if (!selector) {
+ return null;
+ }
+
+ let meta = this._options.data.getSourceAnnotationMeta(selector);
+ if (!meta) {
+ return null;
+ }
+
+ return {
+ type,
+ color,
+ sortIndex: meta.sortIndex,
+ position: selector,
+ text,
+ pageLabel: meta.pageLabel,
+ };
+ }
+
+ protected override _finalizeAnnotation(annotation: NewAnnotation): NewAnnotation {
+ let position = this._positionMapper.transformAnnotationPosition(annotation.position, annotation.type);
+ if (position === annotation.position) return annotation;
+ return {
+ ...annotation,
+ position: position as Selector,
+ sortIndex: this._options.data.getSourceAnnotationMeta(position)?.sortIndex
+ ?? annotation.sortIndex,
+ };
+ }
+
+ override toSelector(range: Range): Selector | null {
+ if (!this._positionMapper) {
+ return null;
+ }
+
+ // Resolve DOM range to SDT text node positions
+ let sdtPos = this._resolveRangeToSDT(range);
+ if (!sdtPos) return null;
+
+ // Map through SDT anchors to source-format position
+ return this._positionMapper.sdtToSourcePosition(sdtPos) as Selector | null;
+ }
+
+ override toDisplayedRange(position: Position): Range | null {
+ if (!this._positionMapper) {
+ return null;
+ }
+
+ if (isSDTPosition(position)) {
+ return this._createDOMRange(position);
+ }
+
+ // Source-format position: map through SDT anchors to DOM range
+ let sdtPos = this._positionMapper.sourceToSDTPosition(position);
+ if (!sdtPos) return null;
+
+ return this._createDOMRange(sdtPos);
+ }
+
+ override getSelectionPosition(): SDTPosition | null {
+ let sel = this._iframeDocument.getSelection();
+ if (!sel || sel.isCollapsed || !sel.rangeCount) return null;
+ return this._resolveRangeToSDT(sel.getRangeAt(0));
+ }
+
+ protected override _getAnnotationDisplayedRange(annotation: Partial & Pick): Range | null {
+ let range = this.toDisplayedRange(annotation.position);
+ if (!range) return null;
+ if (annotation.type === 'note') {
+ let block = closestElement(range.commonAncestorContainer)?.closest('[data-ref-path]');
+ if (block) {
+ range = this._iframeDocument.createRange();
+ range.selectNodeContents(block);
+ }
+ }
+ return range;
+ }
+
+ /**
+ * Resolve a DOM Range to SDT text node coordinates.
+ */
+ private _resolveRangeToSDT(range: Range): {
+ startBlockRefPath: string; startTextIndex: number; startCharOffset: number;
+ endBlockRefPath: string; endTextIndex: number; endCharOffset: number;
+ } | null {
+ let start = this._domPositionToSDT(range.startContainer, range.startOffset, false);
+ let end = this._domPositionToSDT(range.endContainer, range.endOffset, true);
+ if (!start || !end) return null;
+ return {
+ startBlockRefPath: start.blockRefPath,
+ startTextIndex: start.textIndex,
+ startCharOffset: start.charOffset,
+ endBlockRefPath: end.blockRefPath,
+ endTextIndex: end.textIndex,
+ endCharOffset: end.charOffset,
+ };
+ }
+
+ /**
+ * Map a single DOM position (node + offset) to SDT text node coordinates.
+ */
+ private _domPositionToSDT(node: Node, offset: number, isEnd = false): {
+ blockRefPath: string;
+ textIndex: number;
+ charOffset: number;
+ } | null {
+ // Walk up to find the text span (has data-text-index)
+ let textSpan: HTMLElement | null = null;
+ let current: Node | null = node;
+ while (current && current !== this._iframeDocument.body) {
+ if (current.nodeType === Node.ELEMENT_NODE
+ && (current as HTMLElement).dataset.textIndex !== undefined) {
+ textSpan = current as HTMLElement;
+ break;
+ }
+ current = current.parentNode;
+ }
+
+ // If we didn't find a text span (e.g., position is at an element boundary
+ // between blocks), resolve to the nearest text span
+ if (!textSpan && node.nodeType === Node.ELEMENT_NODE) {
+ let el = node as HTMLElement;
+ if (isEnd && offset > 0) {
+ // End position: find the last text span in the preceding content
+ let child = el.childNodes[offset - 1];
+ if (child) {
+ let spans = (child.nodeType === Node.ELEMENT_NODE ? child as HTMLElement : el)
+ .querySelectorAll('[data-text-index]');
+ if (spans?.length) {
+ textSpan = spans[spans.length - 1] as HTMLElement;
+ }
+ }
+ }
+ if (!textSpan) {
+ // Start position or fallback: find the first text span in the following content
+ let child = el.childNodes[offset] || el.childNodes[el.childNodes.length - 1];
+ if (child) {
+ textSpan = (child.nodeType === Node.ELEMENT_NODE ? child as HTMLElement : el)
+ .querySelector('[data-text-index]') as HTMLElement | null;
+ }
+ }
+ }
+
+ if (!textSpan) return null;
+
+ // Walk up further to find the block (has data-ref-path)
+ let blockEl: HTMLElement | null = textSpan.parentElement;
+ while (blockEl && !blockEl.dataset.refPath) {
+ blockEl = blockEl.parentElement;
+ }
+ if (!blockEl) return null;
+
+ // Compute character offset within this text span.
+ let charOffset: number;
+ if (textSpan.contains(node)) {
+ let charRange = this._iframeDocument.createRange();
+ charRange.setStart(textSpan, 0);
+ charRange.setEnd(node, offset);
+ charOffset = charRange.toString().length;
+ }
+ else {
+ // Position was resolved to a different span — use start or end
+ charOffset = isEnd ? (textSpan.textContent?.length ?? 0) : 0;
+ }
+
+ return {
+ blockRefPath: blockEl.dataset.refPath!,
+ textIndex: parseInt(textSpan.dataset.textIndex!),
+ charOffset,
+ };
+ }
+
+ /**
+ * Create a DOM Range from SDT position coordinates.
+ */
+ private _createDOMRange(pos: {
+ startBlockRefPath: string; startTextIndex: number; startCharOffset: number;
+ endBlockRefPath: string; endTextIndex: number; endCharOffset: number;
+ }): Range | null {
+ let startPos = this._sdtPositionToDOM(pos.startBlockRefPath, pos.startTextIndex, pos.startCharOffset);
+ let endPos = this._sdtPositionToDOM(pos.endBlockRefPath, pos.endTextIndex, pos.endCharOffset);
+ if (!startPos || !endPos) return null;
+
+ let range = this._iframeDocument.createRange();
+ range.setStart(startPos.node, startPos.offset);
+ range.setEnd(endPos.node, endPos.offset);
+ return range;
+ }
+
+ /**
+ * Map SDT coordinates to a DOM text node + offset.
+ */
+ private _sdtPositionToDOM(blockRefPath: string, textIndex: number, charOffset: number): {
+ node: Node; offset: number;
+ } | null {
+ let blockEl = this._iframeDocument.querySelector(`[data-ref-path="${blockRefPath}"]`);
+ if (!blockEl) return null;
+
+ let textSpan = blockEl.querySelector(`[data-text-index="${textIndex}"]`);
+ if (!textSpan) return null;
+
+ // Walk text nodes within the span to find the right offset
+ let walker = this._iframeDocument.createTreeWalker(textSpan, NodeFilter.SHOW_TEXT);
+ let remaining = charOffset;
+ let textNode;
+ while ((textNode = walker.nextNode())) {
+ let len = textNode.textContent!.length;
+ if (remaining <= len) {
+ return { node: textNode, offset: remaining };
+ }
+ remaining -= len;
+ }
+
+ // Fallback: end of last text node
+ let lastText = textSpan.lastChild;
+ if (lastText) {
+ return { node: lastText, offset: lastText.textContent?.length ?? 0 };
+ }
+ return null;
+ }
+
+ protected override _getHistoryLocation(): NavLocation | null {
+ return { scrollCoords: [this._iframeWindow.scrollX, this._iframeWindow.scrollY] };
+ }
+
+ override navigate(location: NavLocation, options: NavigateOptions = {}) {
+ if (location.href?.startsWith('#sdt-')) {
+ let el = this._iframeDocument.getElementById(location.href.slice(1));
+ if (el) {
+ scrollIntoView(el, {
+ behavior: options.behavior ?? 'smooth',
+ block: options.block ?? 'start',
+ });
+ }
+ return;
+ }
+ if (location.scrollYPercent !== undefined) {
+ this._iframeWindow.scrollTo({
+ top: location.scrollYPercent / 100
+ * (this._iframeDocument.body.scrollHeight - this._iframeDocument.documentElement.clientHeight),
+ behavior: options.behavior as ScrollBehavior ?? 'instant',
+ });
+ return;
+ }
+ if (location.scrollCoords) {
+ this._iframeWindow.scrollTo(...location.scrollCoords);
+ return;
+ }
+ super.navigate(location, options);
+ }
+
+ override navigateToSelector(selector: Selector, options: NavigateOptions = {}) {
+ let range = this.toDisplayedRange(selector);
+ if (!range) {
+ return;
+ }
+ let rect = getBoundingPageRect(range);
+ if (!rect || options.ifNeeded && isPageRectVisible(rect, this._iframeWindow)) {
+ return;
+ }
+ scrollIntoView(range, {
+ behavior: options.behavior ?? 'smooth',
+ block: options.block ?? 'center',
+ });
+ }
+
+ protected override _handleScroll(event: Event) {
+ super._handleScroll(event);
+ this._updateViewState();
+ }
+
+ protected override _updateViewState() {
+ let blockIndex = this.getVisibleBlockIndex();
+ if (blockIndex !== null) {
+ this._options.data.syncBaseView(blockIndex);
+ }
+ }
+
+ protected override _updateViewStats() {
+ let viewStats: ViewStats = {
+ canCopy: !!this._selectedAnnotationIDs.length || !(this._iframeWindow.getSelection()?.isCollapsed ?? true),
+ canZoomIn: this.scale === undefined || this.scale < this.MAX_SCALE,
+ canZoomOut: this.scale === undefined || this.scale > this.MIN_SCALE,
+ canZoomReset: this.scale !== undefined && this.scale !== 1,
+ canNavigateBack: this._history.canNavigateBack,
+ canNavigateForward: this._history.canNavigateForward,
+ appearance: this.appearance,
+ };
+ this._options.onChangeViewStats(viewStats);
+ }
+
+ protected override _handleInternalLinkClick(link: HTMLAnchorElement): void {
+ let href = link.getAttribute('href');
+ if (!href?.startsWith('#sdt-')) return;
+ this.navigate({ href }, { behavior: 'smooth', block: 'center' });
+ }
+
+ protected override _setScale(scale: number) {
+ this.scale = scale;
+ let scaleString = scale.toFixed(3);
+ if (CSS.supports('scale', scaleString)) {
+ this._iframeDocument.documentElement.style.setProperty('--scale', scaleString);
+ if (isSafari) {
+ this._iframeCoordScaleFactor = scale;
+ }
+ }
+ }
+
+ override getReadAloudBlock(element: Element): Element | null {
+ return element.closest('[data-ref-path]');
+ }
+
+ protected override _getRoots(): HTMLElement[] {
+ return [this._iframeDocument.body];
+ }
+
+ async setFindState(state: FindState) {
+ let previousState = this._findState;
+ this._findState = state;
+ if (!state.active && previousState && previousState.active !== state.active) {
+ if (this._find) {
+ this._find = null;
+ this._handleViewUpdate();
+ }
+ }
+ else if (state.active) {
+ if (!this._find
+ || !previousState
+ || previousState.query !== state.query
+ || previousState.caseSensitive !== state.caseSensitive
+ || previousState.entireWord !== state.entireWord
+ || previousState.active !== state.active) {
+ this._find = new DefaultFindProcessor({
+ findState: { ...state },
+ onSetFindState: (result) => {
+ this._options.onSetFindState({
+ ...state,
+ result: {
+ total: result.total ?? 0,
+ index: result.index ?? 0,
+ snippets: result.snippets ?? [],
+ annotation: (
+ result.range
+ && this.getAnnotationFromRange(result.range.toRange(), 'highlight')
+ ) ?? undefined,
+ currentSnippet: result.snippets?.[result.index ?? 0] ?? '',
+ currentPageLabel: null,
+ },
+ });
+ if (result.range) {
+ this._a11yVirtualCursorTarget = getStartElement(result.range);
+ }
+ },
+ });
+ await this._find.run(
+ this._searchContext,
+ this._lastSelectionRange ?? undefined,
+ );
+ this.findNext();
+ }
+ else {
+ if (previousState && previousState.highlightAll !== state.highlightAll) {
+ this._find.findState.highlightAll = state.highlightAll;
+ this._renderAnnotations();
+ }
+ if (previousState && state.index !== null && previousState.index !== state.index) {
+ this._find.position = state.index;
+ let result = this._find.getResults()[state.index];
+ if (result) {
+ scrollIntoView(result.range.toRange(), { block: 'center' });
+ }
+ this._renderAnnotations();
+ }
+ }
+ }
+ }
+
+ findNext() {
+ if (this._find) {
+ let result = this._find.next();
+ if (result) {
+ scrollIntoView(result.range.toRange(), { block: 'center' });
+ }
+ this._renderAnnotations();
+ }
+ }
+
+ findPrevious() {
+ if (this._find) {
+ let result = this._find.prev();
+ if (result) {
+ scrollIntoView(result.range.toRange(), { block: 'center' });
+ }
+ this._renderAnnotations();
+ }
+ }
+
+ override async print() {
+ this._iframeWindow.print();
+ }
+
+ setSidebarOpen(_sidebarOpen: boolean) {
+ // Ignore
+ }
+}
+
+export default SDTView;
diff --git a/src/dom/sdt/stylesheets/sdt.scss b/src/dom/sdt/stylesheets/sdt.scss
new file mode 100644
index 000000000..e34930830
--- /dev/null
+++ b/src/dom/sdt/stylesheets/sdt.scss
@@ -0,0 +1,165 @@
+:root {
+ &:not(.use-original-font) {
+ font-family: var(--content-font-family, "Georgia", serif);
+ }
+ text-align: justify;
+ text-rendering: optimizeLegibility;
+
+ // Dynamic leading (from readium-css)
+ --content-line-height-compensation: 1;
+ --content-line-height: calc(
+ (1em + (2ex - 1ch) - ((1rem - 16px) * 0.1667))
+ * var(--content-line-height-compensation)
+ * var(--content-line-height-adjust, 1.2)
+ );
+ --content-word-spacing: calc(var(--content-word-spacing-adjust, 0) * 1%);
+ --content-letter-spacing: calc(var(--content-letter-spacing-adjust, 0) * 1em);
+
+ font-size: 1.1rem;
+ background-color: var(--background-color);
+ color: var(--text-color);
+
+ --link-color: #0000ee;
+ --visited-link-color: #551a8b;
+
+ &[data-color-scheme="dark"] {
+ --link-color: #63caff;
+ --visited-link-color: #0099e5;
+ }
+}
+
+body {
+ margin-inline: auto;
+ padding: 3rem;
+ overflow-wrap: break-word;
+
+ :root[data-page-width="narrow"] & {
+ max-inline-size: 650px;
+ }
+
+ :root[data-page-width="normal"] & {
+ max-inline-size: 800px;
+ }
+
+ :root[data-page-width="full"] & {
+ max-inline-size: 100%;
+ }
+
+ :root.hyphenate & {
+ hyphens: auto;
+ }
+
+ &, * {
+ line-height: var(--content-line-height);
+ word-spacing: var(--content-word-spacing);
+ letter-spacing: var(--content-letter-spacing);
+ }
+}
+
+::selection {
+ background-color: var(--selection-color);
+}
+
+:link {
+ color: var(--link-color);
+}
+
+:visited {
+ color: var(--visited-link-color);
+}
+
+// Internal cross-reference links (citations, footnotes)
+a.sdt-ref {
+ color: var(--link-color);
+ text-decoration: none;
+ cursor: pointer;
+
+ &:hover {
+ text-decoration: underline;
+ }
+}
+
+h1, h2, h3, h4, h5, h6 {
+ text-align: start;
+ text-wrap: balance;
+ hyphens: none !important;
+}
+
+// Tables
+table {
+ border-collapse: collapse;
+ width: 100%;
+ margin-block: 1em;
+
+ th, td {
+ border: 1px solid var(--text-color, #333);
+ padding: 0.4em 0.6em;
+ text-align: start;
+ }
+
+ th {
+ font-weight: bold;
+ background: rgba(128, 128, 128, 0.1);
+ }
+}
+
+// Block types
+.sdt-math {
+ font-family: serif; // TODO
+ text-align: center;
+ margin-block: 0.5em;
+}
+
+.sdt-image {
+ text-align: center;
+ color: #666;
+ font-style: italic;
+ margin-block: 1em;
+}
+
+.sdt-note {
+ font-size: 0.9em;
+ color: #555;
+ border-inline-start: 3px solid #ccc;
+ padding-inline-start: 1em;
+ margin-block: 0.5em;
+
+ :root[data-color-scheme="dark"] & {
+ color: #aaa;
+ border-color: #555;
+ }
+}
+
+.sdt-reference {
+ font-size: 0.9em;
+}
+
+figcaption {
+ font-size: 0.9em;
+ text-align: center;
+ margin-block: 0.5em;
+ color: #555;
+
+ :root[data-color-scheme="dark"] & {
+ color: #aaa;
+ }
+}
+
+pre {
+ overflow-x: auto;
+ font-size: 0.85em;
+ background: rgba(128, 128, 128, 0.05);
+ padding: 1em;
+ border-radius: 4px;
+}
+
+blockquote {
+ border-inline-start: 3px solid #ccc;
+ padding-inline-start: 1em;
+ margin-inline-start: 0;
+ font-style: italic;
+
+ :root[data-color-scheme="dark"] & {
+ border-color: #555;
+ }
+}
diff --git a/src/dom/snapshot/reading-mode/index.ts b/src/dom/snapshot/reading-mode/index.ts
deleted file mode 100644
index 73b90385f..000000000
--- a/src/dom/snapshot/reading-mode/index.ts
+++ /dev/null
@@ -1,177 +0,0 @@
-import readingModeSCSS from '../stylesheets/reading-mode.scss';
-import { Readability } from "@abejellinek/readability-keep-nodes";
-import { iterateWalker } from "../../common/lib/nodes";
-import { enumerate } from "../../common/lib/collection";
-import { NodeMapping } from "./node-mapping";
-
-export class ReadingMode {
- private readonly _doc: Document;
-
- private readonly _mapping = new NodeMapping();
-
- private readonly _preFragment: DocumentFragment;
-
- private readonly _originalStyleSheets = new Map;
-
- private readonly _style: HTMLStyleElement;
-
- private _enabled = false;
-
- constructor(doc: Document) {
- this._doc = doc;
- this._preFragment = doc.createDocumentFragment();
- this._style = doc.createElement('style');
- this._style.textContent = readingModeSCSS;
-
- for (let styleSheet of [...this._doc.styleSheets, ...this._doc.adoptedStyleSheets]) {
- if (styleSheet.disabled) {
- continue;
- }
- this._originalStyleSheets.set(styleSheet, styleSheet.ownerNode);
- }
- }
-
- get enabled() {
- return this._enabled;
- }
-
- set enabled(enabled: boolean) {
- if (enabled === this._enabled) {
- return;
- }
- if (enabled) {
- this._enable();
- }
- else {
- this._disable();
- }
- this._enabled = enabled;
- }
-
- get preBody(): HTMLBodyElement {
- if (this._enabled) {
- return this._preFragment.firstElementChild as HTMLBodyElement;
- }
- else {
- return this._doc.body as HTMLBodyElement;
- }
- }
-
- mapNodeToFocus(node: Node) {
- if (!this._enabled) {
- throw new Error('Not enabled');
- }
- let mappedNode = this._mapping.getByPre(node);
- if (!mappedNode || !this._doc.body.contains(mappedNode)) {
- return null;
- }
- return mappedNode;
- }
-
- mapRangeToFocus(range: Range) {
- let startContainer = this.mapNodeToFocus(range.startContainer);
- let endContainer = this.mapNodeToFocus(range.endContainer);
- if (!startContainer || !endContainer) {
- return null;
- }
- let newRange = this._doc.createRange();
- newRange.setStart(startContainer, range.startOffset);
- newRange.setEnd(endContainer, range.endOffset);
- return newRange;
- }
-
- mapNodeFromFocus(node: Node) {
- if (!this._enabled) {
- throw new Error('Not enabled');
- }
- let mappedNode = this._mapping.getByPost(node);
- if (!mappedNode || !this._preFragment.contains(mappedNode)) {
- return null;
- }
- return mappedNode;
- }
-
- mapRangeFromFocus(range: Range) {
- let startContainer = this.mapNodeFromFocus(range.startContainer);
- let endContainer = this.mapNodeFromFocus(range.endContainer);
- if (!startContainer || !endContainer) {
- return null;
- }
- let newRange = this._doc.createRange();
- newRange.setStart(startContainer, range.startOffset);
- newRange.setEnd(endContainer, range.endOffset);
- return newRange;
- }
-
- private _enable() {
- let initMapping = () => {
- let clonedDoc = this._doc.cloneNode(true) as Document;
-
- let originalNodes = [...iterateWalker(this._doc.createTreeWalker(this._doc.body, NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT))];
- for (let [i, mappedNode] of enumerate(iterateWalker(clonedDoc.createTreeWalker(clonedDoc.body, NodeFilter.SHOW_TEXT | NodeFilter.SHOW_ELEMENT)))) {
- this._mapping.setByPre(originalNodes[i], mappedNode);
- }
-
- let fragmentBody = this._doc.createElement('body');
- for (let child of [...this._doc.body.childNodes]) {
- if (child.nodeType === Node.ELEMENT_NODE && (child as Element).id === 'annotation-overlay') {
- continue;
- }
- fragmentBody.append(child);
- }
- this._preFragment.replaceChildren(fragmentBody);
-
- return clonedDoc;
- };
-
- let clonedDoc = initMapping();
- let readability = new Readability(clonedDoc, {
- serializer: node => node,
- reload: () => {
- this._disable();
- clonedDoc = initMapping();
- return clonedDoc;
- },
- });
- Object.defineProperty(readability, '_setNodeTag', {
- value: (node: Node, _newTagName: string) => {
- // We don't really care about the element changes Readability wants to make
- // (mostly h1 -> h2), and letting it make them would break our mappings
- return node;
- }
- });
- Object.defineProperty(readability, '_fixRelativeUris', {
- value: () => {
- // Leave links alone - we've already handled them
- }
- });
- let root = readability.parse()?.content;
- if (!root) {
- throw new Error('Readability failed');
- }
- this._doc.body.prepend(root);
-
- for (let [styleSheet, ownerNode] of this._originalStyleSheets) {
- styleSheet.disabled = true;
- ownerNode?.remove();
- }
- this._doc.head.append(this._style);
- }
-
- private _disable() {
- this._doc.body.replaceChildren(
- ...this._preFragment.firstElementChild!.childNodes,
- this._doc.body.querySelector(':scope > #annotation-overlay')!,
- );
-
- for (let [styleSheet, ownerNode] of this._originalStyleSheets) {
- styleSheet.disabled = false;
- if (ownerNode) {
- this._doc.head.append(ownerNode);
- }
- }
- this._style.remove();
- this._mapping.clear();
- this._preFragment.replaceChildren();
- }
-}
diff --git a/src/dom/snapshot/reading-mode/node-mapping.ts b/src/dom/snapshot/reading-mode/node-mapping.ts
deleted file mode 100644
index c4c76cc05..000000000
--- a/src/dom/snapshot/reading-mode/node-mapping.ts
+++ /dev/null
@@ -1,79 +0,0 @@
-export class NodeMapping {
- private readonly _preToPost = new Map();
-
- private readonly _postToPre = new Map();
-
- get size() {
- return this._preToPost.size;
- }
-
- [Symbol.iterator](): MapIterator<[Node, Node]> {
- return this._preToPost[Symbol.iterator]();
- }
-
- clear(): void {
- this._preToPost.clear();
- this._postToPre.clear();
- }
-
- deleteByPre(preKey: Node): boolean {
- if (!this._preToPost.has(preKey)) {
- return false;
- }
- let postKey = this._preToPost.get(preKey);
- this._preToPost.delete(preKey);
- this._postToPre.delete(postKey!);
- return true;
- }
-
- deleteByPost(postKey: Node): boolean {
- if (!this._postToPre.has(postKey)) {
- return false;
- }
- let preKey = this._postToPre.get(postKey);
- this._preToPost.delete(preKey!);
- this._postToPre.delete(postKey);
- return true;
- }
-
- entries(): MapIterator<[Node, Node]> {
- return this._preToPost.entries();
- }
-
- forEach(callbackfn: (value: Node, key: Node, map: Map) => void, thisArg?: any): void {
- this._preToPost.forEach(callbackfn, thisArg);
- }
-
- getByPre(preKey: Node): Node | undefined {
- return this._preToPost.get(preKey);
- }
-
- getByPost(postKey: Node): Node | undefined {
- return this._postToPre.get(postKey);
- }
-
- hasByPre(preKey: Node): boolean {
- return this._preToPost.has(preKey);
- }
-
- hasByPost(postKey: Node): boolean {
- return this._postToPre.has(postKey);
- }
-
- preKeys(): MapIterator {
- return this._preToPost.keys();
- }
-
- postKeys(): MapIterator {
- return this._postToPre.keys();
- }
-
- setByPre(preKey: Node, postKey: Node): this {
- if (preKey.getRootNode() === postKey.getRootNode()) {
- throw new Error('Nodes are in same root');
- }
- this._preToPost.set(preKey, postKey);
- this._postToPre.set(postKey, preKey);
- return this;
- }
-}
diff --git a/src/dom/snapshot/snapshot-view.ts b/src/dom/snapshot/snapshot-view.ts
index 193daa6dc..13ea50413 100644
--- a/src/dom/snapshot/snapshot-view.ts
+++ b/src/dom/snapshot/snapshot-view.ts
@@ -6,17 +6,16 @@ import {
NewAnnotation,
ViewStats,
OutlineItem,
- ReadAloudGranularity
+ Position,
} from "../../common/types";
import {
getBoundingPageRect,
getInnerText,
getStartElement,
- moveRangeEndsIntoTextNodes,
- PersistentRange
} from "../common/lib/range";
import {
CssSelector,
+ isSelector,
textPositionFromRange,
Selector,
textPositionToRange
@@ -39,16 +38,14 @@ import { isPageRectVisible } from "../common/lib/rect";
import { debounceUntilScrollFinishes, isSafari } from "../../common/lib/utilities";
import { scrollIntoView } from "../common/lib/scroll-into-view";
import { SORT_INDEX_LENGTH, SORT_INDEX_LENGTH_OLD } from "./defines";
-import { ReadingMode } from "./reading-mode";
import { detectLang } from '../../common/lib/detect-lang';
+import type { StructuredDocumentText } from '../../../structured-document-text/schema';
class SnapshotView extends DOMView {
protected _find: DefaultFindProcessor | null = null;
private _isDynamicThemeSupported = true;
- protected _readingMode!: ReadingMode;
-
private get _searchContext() {
let searchContext = createSearchContext(getVisibleTextNodes(this._iframeDocument.body));
Object.defineProperty(this, '_searchContext', { value: searchContext });
@@ -156,8 +153,6 @@ class SnapshotView extends DOMView {
}
}
- this._readingMode = new ReadingMode(this._iframeDocument);
-
this._iframeDocument.addEventListener('visibilitychange', this._handleVisibilityChange.bind(this));
return super._handleIFrameLoaded();
@@ -188,8 +183,6 @@ class SnapshotView extends DOMView {
try {
// Update old sortIndexes (determined based on length)
- // We used to count characters from , which was volatile and led
- // to unnecessarily large sortIndexes. Now we count from .
if (!this._options.readOnly) {
this._options.onUpdateAnnotations(this._annotations
.filter(a => !a.readOnly && a.sortIndex && a.sortIndex.length === SORT_INDEX_LENGTH_OLD)
@@ -224,9 +217,7 @@ class SnapshotView extends DOMView {
private _initOutline() {
let bodyFontSize = parseFloat(getComputedStyle(this._iframeDocument.body).fontSize);
let flatOutline: (OutlineItem & { level: number })[] = [];
- // Create a flat outline array from the headings on the page
for (let heading of this._iframeDocument.body.querySelectorAll('h1, h2, h3, h4, h5, h6') as NodeListOf) {
- // If the site uses semantic HTML, we can try to skip probably-irrelevant headings
if (heading.closest('aside, nav, footer, template, [hidden]')) {
continue;
}
@@ -253,7 +244,6 @@ class SnapshotView extends DOMView {
level
});
}
- // For each heading, move subsequent headings with deeper levels into its items array
let outline = [];
let stack: (OutlineItem & { level: number })[] = [];
for (let item of flatOutline) {
@@ -276,7 +266,6 @@ class SnapshotView extends DOMView {
return null;
}
let text = type == 'highlight' || type == 'underline' ? getInnerText(range).trim() : undefined;
- // If this annotation type wants text, but we didn't get any, abort
if (text === '') {
return null;
}
@@ -306,14 +295,10 @@ class SnapshotView extends DOMView {
}
count += node.nodeValue!.trim().length;
}
- // If we never terminated, just return 0
return 0;
};
- let mappedRange = this._readingMode.enabled ? this._readingMode.mapRangeFromFocus(range) : range;
- let count = mappedRange
- ? getCount(this._readingMode.preBody, mappedRange.startContainer, mappedRange.startOffset)
- : 0;
+ let count = getCount(this._iframeDocument.body, range.startContainer, range.startOffset);
let countString = String(count).padStart(SORT_INDEX_LENGTH, '0');
if (countString.length > SORT_INDEX_LENGTH) {
countString = countString.substring(0, SORT_INDEX_LENGTH);
@@ -321,21 +306,56 @@ class SnapshotView extends DOMView {
return countString;
}
- toSelector(range: Range): Selector | null {
- if (this._readingMode.enabled) {
- let newRange = this._readingMode.mapRangeFromFocus(range);
- if (!newRange) {
- return null;
+ getSDTLocation(_sdtData: StructuredDocumentText): NavLocation | null {
+ return { scrollYPercent: this._getScrollYPercent() };
+ }
+
+ // Top-level SDT block index for the first block at or below the current
+ // scroll position, or null.
+ getVisibleBlockIndex(sdtData: StructuredDocumentText | null): number | null {
+ if (!sdtData?.content?.length) return null;
+ for (let i = 0; i < sdtData.content.length; i++) {
+ let block = sdtData.content[i];
+ if (block.artifact || !block.anchor || !('selectorMap' in block.anchor)) continue;
+ try {
+ let el = this._iframeDocument.body.querySelector(block.anchor.selectorMap);
+ if (el && el.getBoundingClientRect().bottom > 0) {
+ return i;
+ }
}
- range = newRange;
+ catch {}
}
+ return null;
+ }
+ navigateToSDTBlock(sdtData: StructuredDocumentText, blockIndex: number) {
+ let block = sdtData.content[blockIndex];
+ if (!block.anchor || !('selectorMap' in block.anchor)) return;
+ let el = this._iframeDocument.body.querySelector(block.anchor.selectorMap);
+ if (el) {
+ el.scrollIntoView({ behavior: 'instant', block: 'start' });
+ }
+ }
+
+ private _getScrollYPercent(): number {
+ return this._iframeWindow.scrollY
+ / Math.max(1, this._iframeDocument.body.scrollHeight - this._iframeDocument.documentElement.clientHeight)
+ * 100;
+ }
+
+ getAnnotationMeta(position: Selector): { sortIndex: string; pageLabel: string } | null {
+ let range = this.toDisplayedRange(position);
+ if (!range) return null;
+ return {
+ sortIndex: this._getSortIndex(range),
+ pageLabel: '',
+ };
+ }
+
+ toSelector(range: Range): Selector | null {
let doc = range.commonAncestorContainer.ownerDocument;
if (!doc) return null;
let targetNode;
- // In most cases, the range will wrap a single child of the
- // commonAncestorContainer. Build a selector targeting that element,
- // not the container.
if (range.startContainer === range.endContainer
&& range.startOffset == range.endOffset - 1
&& range.startContainer.nodeType == Node.ELEMENT_NODE) {
@@ -354,8 +374,6 @@ class SnapshotView extends DOMView {
type: 'CssSelector',
value: targetElementQuery
};
- // If the user has highlighted the full text content of the element, no need to add a
- // TextPositionSelector.
if (range.toString().trim() !== (targetElement.textContent || '').trim()) {
selector.refinedBy = textPositionFromRange(range, targetElement) || undefined;
}
@@ -366,15 +384,17 @@ class SnapshotView extends DOMView {
}
}
- toDisplayedRange(selector: Selector): Range | null {
+ toDisplayedRange(position: Position): Range | null {
+ if (!isSelector(position)) return null;
+ let selector = position;
switch (selector.type) {
case 'CssSelector': {
if (selector.refinedBy && selector.refinedBy.type != 'TextPositionSelector') {
throw new Error('CssSelectors can only be refined by TextPositionSelectors');
}
- let root = this._readingMode.preBody.querySelector(selector.value);
+ let root = this._iframeDocument.body.querySelector(selector.value);
if (!root) {
- console.error(`Unable to locate selector root for selector '${selector.value}' (reading mode: ${this._readingMode.enabled})`);
+ console.error(`Unable to locate selector root for selector '${selector.value}'`);
return null;
}
let range;
@@ -385,19 +405,9 @@ class SnapshotView extends DOMView {
range = this._iframeDocument.createRange();
range.selectNodeContents(root);
}
- if (this._readingMode.enabled) {
- let newRange = this._readingMode.mapRangeToFocus(range);
- if (!newRange) {
- newRange = this._readingMode.mapRangeToFocus(moveRangeEndsIntoTextNodes(range));
- }
- if (!newRange) {
- return null;
- }
- range = newRange;
- }
- if (!range.getClientRects().length) {
+ if (!range?.getClientRects().length) {
try {
- range.selectNode(range.commonAncestorContainer);
+ range?.selectNode(range.commonAncestorContainer);
}
catch (e) {
return null;
@@ -423,10 +433,7 @@ class SnapshotView extends DOMView {
navigateToSelector(selector: Selector, options: NavigateOptions = {}) {
let range = this.toDisplayedRange(selector);
if (!range) {
- // Suppress log when failure is likely just due to reading mode
- if (!this._readingMode.enabled) {
- console.warn('Unable to resolve selector to range', selector);
- }
+ console.warn('Unable to resolve selector to range', selector);
return;
}
@@ -442,8 +449,6 @@ class SnapshotView extends DOMView {
if (elem) {
elem.scrollIntoView(options);
- // Remember which node was navigated to for screen readers to place
- // virtual cursor on it later. Used for navigating between sections in the outline.
debounceUntilScrollFinishes(this._iframeDocument).then(() => {
this._a11yVirtualCursorTarget = elem;
});
@@ -453,17 +458,15 @@ class SnapshotView extends DOMView {
}
protected override _updateViewState() {
- let scale = Math.round(this.scale * 1000) / 1000; // Three decimal places
+ let scale = Math.round(this.scale * 1000) / 1000;
let scrollYPercent = this._iframeWindow.scrollY
/ (this._iframeDocument.body.scrollHeight - this._iframeDocument.documentElement.clientHeight)
* 100;
- // The calculation above shouldn't ever yield NaN, but just to be safe:
if (isNaN(scrollYPercent)) {
scrollYPercent = 0;
}
- // Keep it within [0, 100]
scrollYPercent = Math.max(0, Math.min(100, scrollYPercent));
- scrollYPercent = Math.round(scrollYPercent * 10) / 10; // One decimal place
+ scrollYPercent = Math.round(scrollYPercent * 10) / 10;
let viewState: SnapshotViewState = {
scale,
scrollYPercent,
@@ -481,7 +484,6 @@ class SnapshotView extends DOMView {
canNavigateBack: this._history.canNavigateBack,
canNavigateForward: this._history.canNavigateForward,
appearance: this.appearance,
- readingModeEnabled: this._readingMode.enabled,
};
this._options.onChangeViewStats(viewStats);
}
@@ -492,17 +494,13 @@ class SnapshotView extends DOMView {
protected override _updateColorScheme() {
super._updateColorScheme();
- if (this._isDynamicThemeSupported || this._readingMode.enabled) {
- // Pages with a reasonable amount of CSS: Use Dark Reader
+ if (this._isDynamicThemeSupported) {
this._iframeDocument.body.classList.remove('force-static-theme');
if (!('DarkReader' in this._iframeWindow)) {
let url = this._getSnapshotLocation() || 'about:blank';
- // Dark Reader gets the page location by accessing the global property 'location'
- // Horrifying, but it works
this._iframeWindow.eval(`{ let location = new URL(${JSON.stringify(url)}); ${darkReaderJS} }`);
}
let DarkReader = this._iframeWindow.DarkReader!;
- // Stock light theme: Just let the page use its default styles
if (this._themeColorScheme === 'light' && this._theme.id === 'light') {
DarkReader.disable();
}
@@ -515,14 +513,12 @@ class SnapshotView extends DOMView {
lightSchemeTextColor: this._theme.foreground,
}, {
invert: [
- // Invert Mediawiki equations
'.mw-invert'
]
} satisfies Partial as DynamicThemeFix);
}
}
else {
- // Pages with a *lot* of CSS: Use static theme
if ('DarkReader' in this._iframeWindow) {
this._iframeWindow.DarkReader!.disable();
}
@@ -561,8 +557,6 @@ class SnapshotView extends DOMView {
// Setters that get called once there are changes in reader._state
// ***
- // Unlike annotation, selection and overlay popups, find popup open state is determined
- // with .open property. All popup properties are preserved even when it's closed
async setFindState(state: FindState) {
let previousState = this._findState;
this._findState = state;
@@ -599,7 +593,6 @@ class SnapshotView extends DOMView {
}
});
if (result.range) {
- // Record the result that screen readers should focus on after search popup is closed
this._a11yVirtualCursorTarget = getStartElement(result.range);
}
},
@@ -650,33 +643,6 @@ class SnapshotView extends DOMView {
}
}
- override getReadAloudRanges(granularity: ReadAloudGranularity): Range[] {
- if (this._readingMode.enabled) {
- return super.getReadAloudRanges(granularity);
- }
-
- let segmentsWithReadingModeEnabled = this._keepSelection(() => {
- try {
- this._readingMode.enabled = true;
- return super.getReadAloudRanges(granularity).map((range) => {
- let mappedRange = this._readingMode.mapRangeFromFocus(range);
- if (!mappedRange) return null;
- return new PersistentRange(mappedRange);
- }).filter(Boolean) as PersistentRange[];
- }
- finally {
- this._readingMode.enabled = false;
- }
- });
- this._handleViewUpdate(false);
-
- if (segmentsWithReadingModeEnabled.length) {
- return segmentsWithReadingModeEnabled.map(r => r.toRange());
- }
-
- return super.getReadAloudRanges(granularity);
- }
-
protected _setScale(scale: number) {
this.scale = scale;
@@ -684,7 +650,6 @@ class SnapshotView extends DOMView {
if (CSS.supports('scale', scaleString)) {
this._iframeDocument.documentElement.style.setProperty('--scale', scaleString);
if (isSafari) {
- // Scaling doesn't affect getClientRects() in Safari
this._iframeCoordScaleFactor = scale;
}
}
@@ -694,9 +659,6 @@ class SnapshotView extends DOMView {
return;
}
- // Calculate the default root font size, then multiply by scale.
- // Can't just set font-size to an em value -- the page itself might set a font-size on , and we need to
- // scale relative to that.
this._iframeDocument.documentElement.style.fontSize = '';
let defaultSize = parseFloat(getComputedStyle(this._iframeDocument.documentElement).fontSize);
this._iframeDocument.documentElement.style.fontSize = (defaultSize * scale) + 'px';
@@ -735,32 +697,6 @@ class SnapshotView extends DOMView {
setSidebarOpen(_sidebarOpen: boolean) {
// Ignore
}
-
- setReadingModeEnabled(enabled: boolean) {
- this._readingMode.enabled = enabled;
- // Hide inaccessible annotations
- if (enabled) {
- this._options.onSetHiddenAnnotations(
- this._annotations
- .filter(a => !this.toDisplayedRange(a.position))
- .map(a => a.id)
- );
- }
- else {
- this._options.onSetHiddenAnnotations([]);
- }
- // Reinitialize outline to remove inaccessible sections
- this._initOutline();
- // Reset Read Aloud segments, since ranges will no longer be valid
- if (this._readAloud.state?.active && this._readAloud.state.segments !== null) {
- this._options.onSetReadAloudState({ segments: null });
- }
- // Wait a frame due to layout not updating synchronously after
- // is replaced in Firefox
- requestAnimationFrame(() => {
- this._handleViewUpdate();
- });
- }
}
export interface SnapshotViewState extends DOMViewState {
diff --git a/src/dom/snapshot/stylesheets/reading-mode.scss b/src/dom/snapshot/stylesheets/reading-mode.scss
deleted file mode 100644
index a2ccdbd54..000000000
--- a/src/dom/snapshot/stylesheets/reading-mode.scss
+++ /dev/null
@@ -1,75 +0,0 @@
-:root {
- &:not(.use-original-font) {
- font-family: var(--content-font-family, "Georgia", serif);
- }
- text-align: justify;
- text-rendering: optimizeLegibility;
-
- // https://readium.org/readium-css/docs/CSS08-defaults.html#dynamic-leading-line-height
- --content-line-height-compensation: 1;
- --content-line-height: calc(
- (1em + (2ex - 1ch) - ((1rem - 16px) * 0.1667))
- * var(--content-line-height-compensation)
- * var(--content-line-height-adjust, 1.2)
- );
- --content-word-spacing: calc(var(--content-word-spacing-adjust, 0) * 1%);
- --content-letter-spacing: calc(var(--content-letter-spacing-adjust, 0) * 1em);
-
- font-size: 1.1rem;
- font-family: Georgia, serif;
- background-color: var(--background-color);
- color: var(--text-color);
-
- --link-color: #0000ee;
- --visited-link-color: #551a8b;
-
- &[data-color-scheme="dark"] {
- --link-color: #63caff;
- --visited-link-color: #0099e5;
- }
-}
-
-body {
- margin-inline: auto;
- padding: 3rem;
-
- :root[data-page-width="narrow"] & {
- max-inline-size: 650px;
- }
-
- :root[data-page-width="normal"] & {
- max-inline-size: 800px;
- }
-
- :root[data-page-width="full"] & {
- max-inline-size: 100%;
- }
-
- :root.hyphenate & {
- hyphens: auto;
- }
-
- &, * {
- line-height: var(--content-line-height);
- word-spacing: var(--content-word-spacing);
- letter-spacing: var(--content-letter-spacing);
- }
-}
-
-img, svg {
- max-width: 100%;
-}
-
-:link {
- color: var(--link-color);
-}
-
-:visited {
- color: var(--visited-link-color);
-}
-
-h1, h2, h3, h4, h5, h6 {
- text-align: start;
- text-wrap: balance;
- hyphens: none !important;
-}
diff --git a/src/index.dev.js b/src/index.dev.js
index ed7aab71b..40b2ba921 100644
--- a/src/index.dev.js
+++ b/src/index.dev.js
@@ -1,4 +1,5 @@
import Reader from './common/reader';
+import { generateSDT } from './worker-client.dev';
import pdf from '../demo/pdf';
import epub from '../demo/epub';
import snapshot from '../demo/snapshot';
@@ -10,6 +11,47 @@ import brandFTL from '../locales/en-US/brand.ftl';
// eslint-disable-next-line no-process-env
const ZOTERO_API_KEY = process.env.ZOTERO_API_KEY;
+const READ_ALOUD_CACHE = 'zotero-read-aloud';
+
+function readAloudCacheURL(voiceId, text) {
+ return 'https://read-aloud.zotero.invalid/audio?'
+ + new URLSearchParams({ voice: voiceId, text });
+}
+
+async function loadCachedReadAloudAudio(voiceId, text) {
+ try {
+ let cache = await caches.open(READ_ALOUD_CACHE);
+ let baseURL = readAloudCacheURL(voiceId, text);
+ let [audioResponse, timestampsResponse] = await Promise.all([
+ cache.match(baseURL),
+ cache.match(baseURL + '&meta=timestamps'),
+ ]);
+ // Both entries must be present. A lone audio entry means an older
+ // format -- treat it as a miss so we re-fetch and rewrite paired.
+ if (!audioResponse || !timestampsResponse) return null;
+ let timestamps = await timestampsResponse.json();
+ return { audio: await audioResponse.blob(), timestamps: timestamps ?? undefined };
+ }
+ catch (e) {
+ console.error(e);
+ return null;
+ }
+}
+
+async function storeCachedReadAloudAudio(voiceId, text, audio, timestamps) {
+ try {
+ let cache = await caches.open(READ_ALOUD_CACHE);
+ let baseURL = readAloudCacheURL(voiceId, text);
+ await Promise.all([
+ cache.put(baseURL, new Response(audio)),
+ cache.put(baseURL + '&meta=timestamps', Response.json(timestamps ?? null)),
+ ]);
+ }
+ catch (e) {
+ console.error(e);
+ }
+}
+
window.dev = true;
async function createReader() {
@@ -30,20 +72,37 @@ async function createReader() {
demo = snapshot;
}
+ // Set ?lastReadAloudPosition=base64(JSON) to simulate a persisted Read Aloud position
+ let savedPositionParam = urlParams.get('lastReadAloudPosition');
+ let savedPosition = null;
+ if (savedPositionParam) {
+ try {
+ savedPosition = JSON.parse(atob(savedPositionParam));
+ }
+ catch (e) {
+ console.warn('Failed to parse lastReadAloudPosition param', e);
+ }
+ }
+ let primaryViewState = savedPosition
+ ? { ...demo.state, lastReadAloudPosition: savedPosition }
+ : demo.state;
// Default to Standard without showing first-run
let readAloudVoices = { en: { tierVoices: { standard: {} } } };
+
let res = await fetch(demo.fileName);
+
let reader = new Reader({
type,
ftl: [zoteroFTL, readerFTL, brandFTL],
readOnly: false,
+ getSDT: password => generateSDT(type, demo.fileName, password),
data: {
buf: new Uint8Array(await res.arrayBuffer()),
url: new URL('/', window.location).toString()
},
// rtl: true,
annotations: demo.annotations,
- primaryViewState: demo.state,
+ primaryViewState,
sidebarWidth: 240,
sidebarView: 'annotations', //thumbnails, outline
bottomPlaceholderHeight: null,
@@ -68,6 +127,11 @@ async function createReader() {
},
onChangeViewState: function (state, primary) {
console.log('Set state', state, primary);
+ // Stash the latest persisted Read Aloud position so we can
+ // inspect/reuse it from the test harness.
+ if (primary) {
+ window._lastPersistedReadAloudPosition = state.lastReadAloudPosition ?? null;
+ }
},
onOpenTagsPopup(annotationID, left, top) {
alert(`Opening Zotero tagbox popup for id: ${annotationID}, left: ${left}, top: ${top}`);
@@ -229,25 +293,15 @@ async function createReader() {
},
async getAudio(segment, voice) {
- let cacheURL = 'https://read-aloud.zotero.invalid/audio?'
- + new URLSearchParams({ voice: voice.id, text: segment.text });
- let cache;
- try {
- cache = await caches.open('zotero-read-aloud');
- let cached = await cache.match(cacheURL);
- if (cached) {
- return { audio: await cached.blob() };
- }
- }
- catch (e) {
- console.error(e);
- }
+ let cached = await loadCachedReadAloudAudio(voice.id, segment.text);
+ if (cached) return cached;
let url;
let fetchOptions;
if (segment === 'sample') {
let params = new URLSearchParams();
params.set('voice', voice.id);
+ params.set('timestamps', '1');
url = 'https://api.zotero.org/tts/sample?' + params;
fetchOptions = {
headers: {
@@ -266,6 +320,7 @@ async function createReader() {
body: JSON.stringify({
voice: voice.id,
text: segment.text,
+ timestamps: 1,
}),
};
}
@@ -294,14 +349,30 @@ async function createReader() {
};
}
- let audio = await response.blob();
- try {
- await cache?.put(cacheURL, new Response(audio));
+ // Either we got a redirect-followed audio response, or a JSON envelope with
+ // `audioURL` and `timestamps` for word-level highlighting.
+ let audio;
+ let timestamps;
+ if (response.headers.get('Content-Type')?.includes('application/json')) {
+ let json = await response.json();
+ timestamps = json.timestamps;
+ try {
+ let audioResponse = await fetch(json.audioURL);
+ if (!audioResponse.ok) {
+ return { audio: null, error: 'unknown' };
+ }
+ audio = await audioResponse.blob();
+ }
+ catch {
+ return { audio: null, error: 'network' };
+ }
}
- catch (e) {
- console.error(e);
+ else {
+ audio = await response.blob();
}
- return { audio };
+
+ await storeCachedReadAloudAudio(voice.id, segment.text, audio, timestamps);
+ return { audio, timestamps };
},
},
onLogIn() {
diff --git a/src/pdf/pdf-view.js b/src/pdf/pdf-view.js
index 25e94ba49..73b7809a0 100644
--- a/src/pdf/pdf-view.js
+++ b/src/pdf/pdf-view.js
@@ -68,13 +68,6 @@ import { adjustTextAnnotationPosition } from './lib/text-annotation';
import { applyTransformationMatrixToInkPosition, eraseInk, smoothPath } from './lib/path';
import { History } from '../common/lib/history';
import { FindState, PDFFindController } from './pdf-find-controller';
-import {
- buildReadAloudSegments,
- buildReadAloudSegmentsFromRanges,
- getReadAloudSelectionBounds,
- splitReadAloudSegmentsBySelection
-} from './read-aloud-segments';
-import { detectLang } from '../common/lib/detect-lang';
class PDFView {
constructor(options) {
@@ -347,6 +340,8 @@ class PDFView {
onClick: () => this._handleReadAloudJumpButtonClick(),
});
this._readAloudJumpButtonParagraph = null;
+ this._readAloudJumpButtonMatch = null;
+ this._readAloudParagraphIndex = [];
this._autoScroll = new AutoScroll({
container: this._iframeWindow.document.getElementById('viewerContainer')
@@ -459,37 +454,6 @@ class PDFView {
this._findController.setDocument(this._iframeWindow.PDFViewerApplication.pdfDocument);
}
- async _initReadAloudSegments() {
- if (this._readAloudSegmentsPromise) {
- return this._readAloudSegmentsPromise;
- }
- let resolvePromise;
- this._readAloudSegmentsPromise = new Promise(r => (resolvePromise = r));
- let allParagraphs = [];
- let allSentences = [];
- let { pagesCount } = this._iframeWindow.PDFViewerApplication.pdfViewer;
- for (let pageIndex = 0; pageIndex < pagesCount; pageIndex++) {
- let pageData = await this._iframeWindow.PDFViewerApplication.pdfDocument.getPageData({ pageIndex });
- let chars = pageData.chars;
- if (!chars.length) {
- continue;
- }
- let { paragraphs, sentences } = buildReadAloudSegments(chars, pageIndex);
- let paragraphOffset = allParagraphs.length;
- for (let sentence of sentences) {
- sentence.paragraphIndex += paragraphOffset;
- }
- allParagraphs.push(...paragraphs);
- allSentences.push(...sentences);
- }
- this._readAloudSegments = {
- paragraphs: allParagraphs,
- sentences: allSentences
- };
- resolvePromise();
- return allParagraphs;
- }
-
async _setState(state, skipScroll) {
if (Number.isInteger(state.scrollMode)) {
this._iframeWindow.PDFViewerApplication.pdfViewer.scrollMode = state.scrollMode;
@@ -683,6 +647,131 @@ class PDFView {
delete this._pdfPages[pageIndex];
}
+ /**
+ * Get sortIndex and pageLabel for a given position.
+ * Used by the SDT overlay to produce source-format annotation metadata.
+ */
+ getAnnotationMeta(position) {
+ let pageIndex = position.pageIndex ?? 0;
+ return {
+ sortIndex: getSortIndex(this._pdfPages, position),
+ pageLabel: this._getPageLabel(pageIndex, true),
+ };
+ }
+
+ getSDTLocation(sdtData) {
+ let blockIndex = this.getVisibleBlockIndex(sdtData);
+ return blockIndex === null ? null : { href: '#sdt-' + blockIndex };
+ }
+
+ // Top-level SDT block index for the first non-artifact block whose
+ // rect overlaps (or is below) the current viewport, or null.
+ getVisibleBlockIndex(sdtData) {
+ let pdfViewer = this._iframeWindow?.PDFViewerApplication?.pdfViewer;
+ let viewerContainer = this._iframeWindow?.document?.getElementById('viewerContainer');
+ let pages = sdtData?.catalog?.pages;
+ if (!pdfViewer || !viewerContainer || !pages || !sdtData?.content) {
+ return null;
+ }
+ let visibleRect = [
+ viewerContainer.scrollLeft,
+ viewerContainer.scrollTop,
+ viewerContainer.scrollLeft + viewerContainer.clientWidth,
+ viewerContainer.scrollTop + viewerContainer.clientHeight,
+ ];
+
+ // Cover the page(s) visible in the viewport. Fall back to the current
+ // page when _getVisiblePages reports nothing (e.g., before first paint).
+ let visiblePages = pdfViewer._getVisiblePages().views;
+ let pageIndices = visiblePages.map(v => v.id - 1).sort((a, b) => a - b);
+ if (!pageIndices.length) {
+ let cur = pdfViewer.currentPageNumber - 1;
+ if (cur >= 0) pageIndices = [cur];
+ else return null;
+ }
+
+ for (let pageIdx of pageIndices) {
+ let ranges = pages[pageIdx]?.contentRanges;
+ if (!ranges?.length) continue;
+ for (let range of ranges) {
+ let start = range[0]?.[0];
+ let end = range[1]?.[0] ?? start;
+ if (start === undefined) continue;
+ for (let i = start; i <= end; i++) {
+ let block = sdtData.content[i];
+ if (!block || block.artifact) continue;
+ if (this._blockIntersectsRect(block, visibleRect)) {
+ return i;
+ }
+ }
+ }
+ }
+ return null;
+ }
+
+ // Does any of `block`'s anchor rects, projected into viewer-container
+ // coords, overlap `viewRect` (also in viewer-container coords)? If the
+ // block has no spatial info, treat it as a match so blocks without anchor
+ // data can still anchor a starting point.
+ _blockIntersectsRect(block, viewRect) {
+ let pageRects = block.anchor?.pageRects;
+ if (!pageRects?.length) return true;
+ for (let pr of pageRects) {
+ let rect;
+ try {
+ rect = this.getPositionBoundingViewRect({
+ pageIndex: pr[0],
+ rects: [[pr[1], pr[2], pr[3], pr[4]]],
+ });
+ }
+ catch {
+ continue;
+ }
+
+ if (quickIntersectRect(rect, viewRect)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ // Current text selection as a PDFPosition, or null. Mirrors the position
+ // shape used by highlight annotations: the rects from the first selected
+ // page, plus nextPageRects when the selection spans onto the next page.
+ getSelectionPosition() {
+ if (!this._selectionRanges?.length || this._selectionRanges[0].collapsed) return null;
+ let ranges = this._selectionRanges
+ .slice()
+ .sort((a, b) => a.pageIndex - b.pageIndex)
+ .slice(0, 2);
+ let position = { ...ranges[0].position };
+ if (ranges.length === 2) {
+ position.nextPageRects = ranges[1].position.rects;
+ }
+ return position;
+ }
+
+ clearSelection() {
+ this._setSelectionRanges();
+ this._iframeWindow.getSelection()?.removeAllRanges();
+ }
+
+ navigateToSDTBlock(sdtData, blockIndex) {
+ let pages = sdtData?.catalog?.pages;
+ if (!pages) return;
+ for (let [pageIdx, page] of pages.entries()) {
+ if (!page.contentRanges) continue;
+ for (let range of page.contentRanges) {
+ let startBlock = range[0][0];
+ let endBlock = range[1][0];
+ if (blockIndex >= startBlock && blockIndex <= endBlock) {
+ this.navigate({ pageIndex: pageIdx }, { skipHistory: true, behavior: 'instant' });
+ return;
+ }
+ }
+ }
+ }
+
_getPageLabel(pageIndex, usePrevAnnotation) {
let pageLabel = this._pageLabels[pageIndex] || (pageIndex + 1).toString()/* || '-'*/;
if (usePrevAnnotation) {
@@ -1047,13 +1136,19 @@ class PDFView {
let previousState = this._readAloudState;
this._readAloudState = state;
+ if (state.segments !== previousState?.segments) {
+ this._buildReadAloudParagraphIndex(state.segments);
+ }
+
if (state.active && !previousState?.active) {
this._readAloudPositionLocked = true;
}
+ let activePosition = state.activeSegment?.sourcePosition;
+
if (state.active && previousState?.paused && !state.paused
- && state.activeSegment?.position
- && this._isPositionInViewBounds(state.activeSegment.position)) {
+ && activePosition
+ && this._isPositionInViewBounds(activePosition)) {
this._readAloudPositionLocked = true;
}
@@ -1066,22 +1161,30 @@ class PDFView {
return;
}
- if (state.activeSegment?.position) {
- // Highlight the whole paragraph containing the active segment (matching dom-view behavior)
- this._readAloudHighlightedPosition = this._getReadAloudParagraphPosition(state)
- || state.activeSegment.position;
-
- // After a sentence skip, briefly highlight the active sentence segment
- clearTimeout(this._readAloudSentenceTimeout);
- if (state.lastSkipGranularity === 'sentence') {
- this._readAloudSentenceHighlightedPosition = state.activeSegment.position;
- this._readAloudSentenceTimeout = setTimeout(() => {
+ if (activePosition?.pageIndex !== undefined) {
+ // The primary highlight tracks the user's chosen granularity; it falls
+ // back to a coarser level when finer-grained data isn't available
+ // (e.g., paragraph-granularity segments have no sentence/word data).
+ this._readAloudHighlightedPosition = this._resolveReadAloudPrimaryPosition(state, activePosition);
+
+ // After a skip whose granularity differs from the primary highlight,
+ // briefly flash the unit at the skip granularity so it's clear what
+ // the skip moved by. Only retrigger when the active segment changes
+ // so word-level updates don't keep resetting the timeout.
+ let segmentChanged = state.activeSegment !== previousState?.activeSegment;
+ if (segmentChanged) {
+ clearTimeout(this._readAloudSentenceTimeout);
+ let highlightSelector = this._resolveReadAloudSkipHighlightPosition(state, activePosition);
+ if (highlightSelector) {
+ this._readAloudSentenceHighlightedPosition = highlightSelector;
+ this._readAloudSentenceTimeout = setTimeout(() => {
+ this._readAloudSentenceHighlightedPosition = null;
+ this._render();
+ }, 2000);
+ }
+ else {
this._readAloudSentenceHighlightedPosition = null;
- this._render();
- }, 2000);
- }
- else {
- this._readAloudSentenceHighlightedPosition = null;
+ }
}
this._render();
@@ -1089,7 +1192,7 @@ class PDFView {
if (!state.annotationPopup && this._readAloudPositionLocked) {
setTimeout(() => {
this._readAloudScrolling = true;
- this.navigateToPosition(state.activeSegment.position, {
+ this.navigateToPosition(activePosition, {
ifNeeded: true,
visibilityMargin: -this._iframeWindow.innerHeight / 4,
block: 'center',
@@ -1108,208 +1211,74 @@ class PDFView {
});
}
}
+ }
- await this._initReadAloudSegments();
-
- if (!state.lang) {
- let textSample = this._readAloudSegments.paragraphs
- .slice(0, 25)
- .map(p => p.text)
- .join('\n');
- this._options.onSetReadAloudState({
- lang: detectLang(textSample) || 'en',
- });
- return;
- }
-
- if (!state.active || !state.segmentGranularity) {
- return;
- }
-
- if (state.segments !== null && state.segmentGranularity === previousState?.segmentGranularity) {
- return;
- }
-
- let segments = state.segmentGranularity === 'sentence'
- ? this._readAloudSegments.sentences
- : this._readAloudSegments.paragraphs;
-
- let backwardStopIndex = null;
- let forwardStopIndex = null;
-
- let selectionInfo = getReadAloudSelectionBounds(this._selectionRanges);
- if (selectionInfo) {
- await this._ensureBasicPageData(selectionInfo.start.pageIndex);
- if (selectionInfo.end.pageIndex !== selectionInfo.start.pageIndex) {
- await this._ensureBasicPageData(selectionInfo.end.pageIndex);
- }
-
- this._setSelectionRanges();
-
- let split = splitReadAloudSegmentsBySelection(
- segments,
- selectionInfo.start,
- selectionInfo.end,
- pageIndex => this._pdfPages[pageIndex]?.chars
- );
-
- if (split) {
- segments = split.segments;
- backwardStopIndex = split.startIndex;
- forwardStopIndex = split.endIndex;
- }
- else if (selectionInfo.selectionRanges.length) {
- let selectionSegments = { paragraphs: [], sentences: [] };
-
- for (let selectionRange of selectionInfo.selectionRanges) {
- let { pageIndex } = selectionRange.position;
- await this._ensureBasicPageData(pageIndex);
- let page = this._pdfPages[pageIndex];
- if (!page?.chars?.length) {
- continue;
- }
- let { chars } = page;
- let start = Math.min(selectionRange.anchorOffset, selectionRange.headOffset);
- let end = Math.max(selectionRange.anchorOffset, selectionRange.headOffset);
-
- let { paragraphs, sentences } = buildReadAloudSegmentsFromRanges(
- chars, pageIndex, [[start, end - 1]]
- );
-
- let paragraphOffset = selectionSegments.paragraphs.length;
- for (let sentence of sentences) {
- sentence.paragraphIndex += paragraphOffset;
- }
+ get hasReadAloudTarget() {
+ return this._selectionRanges.length && !this._selectionRanges[0].collapsed;
+ }
- selectionSegments.paragraphs.push(...paragraphs);
- selectionSegments.sentences.push(...sentences);
- }
+ lockPositionToReadAloud() {
+ this._readAloudPositionLocked = true;
+ }
- segments = state.segmentGranularity === 'sentence'
- ? selectionSegments.sentences
- : selectionSegments.paragraphs;
- if (segments.length) {
- backwardStopIndex = 0;
- forwardStopIndex = segments.length;
- }
+ /**
+ * Resolve the primary Read Aloud highlight position for the user's chosen
+ * granularity, falling back coarser when finer-grained data isn't available
+ */
+ _resolveReadAloudPrimaryPosition(state, activePosition) {
+ switch (this._effectiveReadAloudPrimaryGranularity(state)) {
+ case 'word': {
+ let wordPosition = state.activeWordSourcePosition;
+ return wordPosition?.pageIndex !== undefined ? wordPosition : null;
}
- }
- else if (state.targetPosition) {
- for (let i = 0; i < segments.length; i++) {
- let segment = segments[i];
- if (segment.position.pageIndex === state.targetPosition.pageIndex
- && intersectAnnotationWithPoint(segment.position, state.targetPosition)) {
- backwardStopIndex = i;
- break;
- }
- }
- }
- else {
- let objects = segments.map((object, index) => ({ index, object }));
- let visibleObjects = this._getVisibleObjects(objects);
- if (visibleObjects.length) {
- backwardStopIndex = visibleObjects[0].index;
+ case 'sentence':
+ return activePosition;
+ case 'paragraph':
+ default: {
+ let paragraphPosition = state.activeSegment?.paragraphSourcePosition;
+ return paragraphPosition?.pageIndex !== undefined ? paragraphPosition : activePosition;
}
}
-
- this._options.onSetReadAloudState({
- segments,
- backwardStopIndex,
- forwardStopIndex,
- });
}
- _getReadAloudParagraphPosition(state) {
- if (!state.activeSegment?.position) {
+ /**
+ * Resolve the brief flash highlight position that should appear after a
+ * skip whose granularity isn't already shown by the primary highlight.
+ * Returns null when the skip granularity matches the primary or there's
+ * no recent skip to acknowledge.
+ */
+ _resolveReadAloudSkipHighlightPosition(state, activePosition) {
+ if (!state.lastSkipGranularity) {
return null;
}
-
- let pageIndex = state.activeSegment.position.pageIndex;
- let paragraphPosition = null;
-
- if (state.segmentGranularity === 'sentence') {
- let paragraphIndex = state.activeSegment.paragraphIndex;
- let paragraph = Number.isInteger(paragraphIndex)
- ? this._readAloudSegments?.paragraphs?.[paragraphIndex]
- : null;
- if (paragraph?.position?.rects?.length && paragraph.position.pageIndex === pageIndex) {
- paragraphPosition = paragraph.position;
- }
- }
- else if (state.segmentGranularity === 'paragraph') {
- paragraphPosition = state.activeSegment.position;
- }
-
- if (paragraphPosition) {
- return paragraphPosition;
- }
-
- let segments = state.segments || [];
- let activeIndex = segments.indexOf(state.activeSegment);
- if (activeIndex === -1) {
+ if (state.lastSkipGranularity === this._effectiveReadAloudPrimaryGranularity(state)) {
return null;
}
-
- // Find paragraph boundaries using anchor === 'paragraphStart'
- let paragraphStartIndex = activeIndex;
- for (let i = activeIndex; i >= 0; i--) {
- paragraphStartIndex = i;
- if (segments[i].anchor === 'paragraphStart') {
- break;
- }
- }
- let paragraphEndIndex = activeIndex;
- for (let i = activeIndex + 1; i < segments.length; i++) {
- if (segments[i].anchor === 'paragraphStart') {
- break;
- }
- paragraphEndIndex = i;
- }
-
- // Combine positions of all segments in the paragraph
- let paragraphRects = [];
- for (let i = paragraphStartIndex; i <= paragraphEndIndex; i++) {
- let seg = segments[i];
- if (seg.position.pageIndex === pageIndex && seg.position.rects) {
- paragraphRects.push(...seg.position.rects);
- }
- }
-
- if (!paragraphRects.length) {
- return null;
+ if (state.lastSkipGranularity === 'sentence') {
+ return activePosition;
}
-
- return { pageIndex, rects: paragraphRects };
- }
-
- computeReadAloudRepositionIndex(position, segments) {
- for (let i = 0; i < segments.length; i++) {
- let segment = segments[i];
- if (segment.position.pageIndex === position.pageIndex
- && intersectAnnotationWithPoint(segment.position, position)) {
- return i;
- }
+ if (state.lastSkipGranularity === 'paragraph') {
+ let paragraphPosition = state.activeSegment?.paragraphSourcePosition;
+ return paragraphPosition ?? null;
}
return null;
}
- get hasReadAloudTarget() {
- return this._selectionRanges.length && !this._selectionRanges[0].collapsed;
- }
-
- lockPositionToReadAloud() {
- this._readAloudPositionLocked = true;
- }
-
- getSerializableReadAloudPosition(position) {
- return position;
+ _effectiveReadAloudPrimaryGranularity(state) {
+ if (state.highlightGranularity === 'word' && state.segmentGranularity === 'sentence') {
+ return 'word';
+ }
+ if (state.highlightGranularity === 'sentence' && state.segmentGranularity === 'sentence') {
+ return 'sentence';
+ }
+ return 'paragraph';
}
- isReadAloudPositionTooFar(savedPosition, viewState) {
- if (savedPosition.pageIndex === undefined) {
- return false;
- }
- return Math.abs(viewState.pageIndex - savedPosition.pageIndex) > 2;
+ isPositionNearView(position) {
+ if (typeof position?.pageIndex !== 'number') return true;
+ let currentPageNumber = this._iframeWindow?.PDFViewerApplication?.pdfViewer?.currentPageNumber;
+ if (!currentPageNumber) return true;
+ return Math.abs(position.pageIndex - (currentPageNumber - 1)) <= 5;
}
_isPositionInViewBounds(position) {
@@ -1329,48 +1298,6 @@ class PDFView {
return quickIntersectRect(rect, visibleRect);
}
- addAnnotationFromReadAloudSegments(segments, init) {
- if (!segments.length) {
- return undefined;
- }
- let firstSegment = segments[0];
- let rects = [];
- let nextPageRects = [];
- let texts = [];
- let pageIndex = firstSegment.position.pageIndex;
- let nextPageIndex = pageIndex + 1;
-
- for (let segment of segments) {
- texts.push(segment.text);
- if (segment.position.pageIndex === pageIndex) {
- rects.push(...segment.position.rects);
- }
- else if (segment.position.pageIndex === nextPageIndex) {
- nextPageRects.push(...segment.position.rects);
- }
- else {
- break;
- }
- }
-
- let position = {
- pageIndex,
- rects,
- };
- if (nextPageRects.length) {
- position.nextPageRects = nextPageRects;
- }
-
- let annotation = {
- pageLabel: this._getPageLabel(pageIndex, true),
- sortIndex: getSortIndex(this._pdfPages, position),
- position,
- text: texts.join(' '), // TODO: Is this always right?
- ...init,
- };
- return this._onAddAnnotation(annotation);
- }
-
setFindState(state) {
if (!state.active && this._findState.active !== state.active) {
this._findController.onClose();
@@ -1577,27 +1504,132 @@ class PDFView {
}
}
- _updateReadAloudJumpButton(position) {
- if (!this._readAloudState?.popupOpen || !this._readAloudSegments?.paragraphs || !position) {
+ /**
+ * Pre-compute paragraph column regions from segments.
+ * Each entry is { segment, pageIndex, rect } where rect is the bounding
+ * rect of the paragraph's rects in one column on one page.
+ */
+ _buildReadAloudParagraphIndex(segments) {
+ this._readAloudParagraphIndex = [];
+ if (!segments) return;
+
+ let i = 0;
+ while (i < segments.length) {
+ let paragraphStart = i;
+ let paragraphEnd = i;
+ for (let j = i + 1; j < segments.length; j++) {
+ if (segments[j].anchor === 'paragraphStart') break;
+ paragraphEnd = j;
+ }
+ i = paragraphEnd + 1;
+
+ let paragraph = segments[paragraphStart];
+
+ // Group rects by page
+ let pageGroups = new Map();
+ for (let j = paragraphStart; j <= paragraphEnd; j++) {
+ let segPos = segments[j].sourcePosition;
+ if (!segPos?.rects || segPos.pageIndex === undefined) continue;
+ let key = segPos.pageIndex;
+ if (!pageGroups.has(key)) pageGroups.set(key, []);
+ pageGroups.get(key).push(...segPos.rects);
+ }
+
+ for (let [pageIndex, rects] of pageGroups) {
+ // Cluster rects into columns by x-overlap
+ let columns = [];
+ for (let rect of rects) {
+ let placed = false;
+ for (let col of columns) {
+ if (rect[0] < col.maxX && rect[2] > col.minX) {
+ col.rects.push(rect);
+ col.minX = Math.min(col.minX, rect[0]);
+ col.maxX = Math.max(col.maxX, rect[2]);
+ placed = true;
+ break;
+ }
+ }
+ if (!placed) {
+ columns.push({ rects: [rect], minX: rect[0], maxX: rect[2] });
+ }
+ }
+
+ for (let col of columns) {
+ let boundingRect = getPositionBoundingRect({ rects: col.rects });
+ this._readAloudParagraphIndex.push({
+ segment: paragraph,
+ pageIndex,
+ rect: boundingRect,
+ });
+ }
+ }
+ }
+
+ // Collapse consecutive entries that share a visual line into one hit
+ // target.
+ // A heading like "1. Introduction" becomes two sentence-level segments
+ // ("1." and "Introduction"), and the x-clustering above then splits
+ // them into two index entries with a gap between them.
+ // We restrict the merge to single-line-tall segments to avoid
+ // collapsing the per-column entries that a multi-line paragraph
+ // produces when it spans two columns of a single page.
+ const SINGLE_LINE_MAX_HEIGHT = 40;
+ for (let i = this._readAloudParagraphIndex.length - 1; i > 0; i--) {
+ let curr = this._readAloudParagraphIndex[i];
+ let prev = this._readAloudParagraphIndex[i - 1];
+ if (curr.pageIndex !== prev.pageIndex) continue;
+
+ let currHeight = curr.rect[3] - curr.rect[1];
+ let prevHeight = prev.rect[3] - prev.rect[1];
+ if (currHeight > SINGLE_LINE_MAX_HEIGHT
+ || prevHeight > SINGLE_LINE_MAX_HEIGHT) continue;
+
+ let yOverlap = Math.min(curr.rect[3], prev.rect[3])
+ - Math.max(curr.rect[1], prev.rect[1]);
+ if (yOverlap < Math.max(currHeight, prevHeight) * 0.5) continue;
+
+ prev.rect = [
+ Math.min(prev.rect[0], curr.rect[0]),
+ Math.min(prev.rect[1], curr.rect[1]),
+ Math.max(prev.rect[2], curr.rect[2]),
+ Math.max(prev.rect[3], curr.rect[3]),
+ ];
+ this._readAloudParagraphIndex.splice(i, 1);
+ }
+ }
+
+ _updateReadAloudJumpButton(position, event) {
+ if (!this._readAloudState?.popupOpen) {
+ return;
+ }
+
+ if (event && this._readAloudJumpButton.iconContainsPoint(event.clientX, event.clientY)) {
return;
}
- let paragraph = null;
- for (let p of this._readAloudSegments.paragraphs) {
- if (p.position.pageIndex !== position.pageIndex) continue;
- if (intersectAnnotationWithPoint(p.position, position)) {
- paragraph = p;
+ if (!position) {
+ return;
+ }
+
+ let match = null;
+ for (let entry of this._readAloudParagraphIndex) {
+ if (entry.pageIndex !== position.pageIndex) continue;
+ if (intersectAnnotationWithPoint({ pageIndex: entry.pageIndex, rects: [entry.rect] }, position)) {
+ match = entry;
break;
}
}
- if (!paragraph || paragraph === this._readAloudJumpButtonParagraph) {
+ if (!match) {
return;
}
+ if (match === this._readAloudJumpButtonMatch) {
+ return;
+ }
+ this._readAloudJumpButtonMatch = match;
+ this._readAloudJumpButtonParagraph = match.segment;
- this._readAloudJumpButtonParagraph = paragraph;
- let paraRect = getPositionBoundingRect(paragraph.position);
- let clientRect = this.getClientRect(paraRect, paragraph.position.pageIndex);
+ let clientRect = this.getClientRect(match.rect, match.pageIndex);
let container = this._iframeWindow.document.getElementById('viewerContainer');
let containerRect = container.getBoundingClientRect();
@@ -1613,6 +1645,7 @@ class PDFView {
_hideReadAloudJumpButton() {
this._readAloudJumpButton.hide();
this._readAloudJumpButtonParagraph = null;
+ this._readAloudJumpButtonMatch = null;
}
_handleReadAloudJumpButtonClick() {
@@ -1620,15 +1653,17 @@ class PDFView {
let paragraph = this._readAloudJumpButtonParagraph;
- // Immediately move the highlight to the target paragraph
- this._readAloudHighlightedPosition = paragraph.position;
+ // Match the immediate spotlight to the user's highlight granularity,
+ // so we don't show a wrong-granularity flash before the manager overrides
+ // with a new highlight.
+ let granularity = this._effectiveReadAloudPrimaryGranularity(this._readAloudState);
+ this._readAloudHighlightedPosition = granularity === 'paragraph'
+ ? (paragraph.paragraphSourcePosition || paragraph.sourcePosition)
+ : paragraph.sourcePosition;
this._render();
this._options.onSetReadAloudState({
- targetPosition: {
- pageIndex: paragraph.position.pageIndex,
- rects: paragraph.position.rects,
- },
+ targetPosition: paragraph.position,
});
}
diff --git a/src/pdf/read-aloud-segments.js b/src/pdf/read-aloud-segments.js
deleted file mode 100644
index 0095466e5..000000000
--- a/src/pdf/read-aloud-segments.js
+++ /dev/null
@@ -1,555 +0,0 @@
-import { detectLang } from '../common/lib/detect-lang';
-import { splitTextToChunks } from '../common/read-aloud/segment-split';
-import { getRangeRects } from './lib/utilities';
-import { getTextFromChars } from './selection';
-
-let trimText = (s) => s.replace(/^ +| +$/g, '');
-let joinWithSpace = (a, b) => {
- if (!a) return b;
- if (!b) return a;
- return a + ((a.at(-1) !== ' ' && !/[\p{P}]/u.test(b[0] || '')) ? ' ' : '') + b;
-};
-
-let computeBoundingRect = (rects) => {
- let minX = Infinity;
- let minY = Infinity;
- let maxX = -Infinity;
- let maxY = -Infinity;
- for (let r of rects) {
- if (!r) continue;
- let [x1, y1, x2, y2] = r;
- if (x1 < minX) minX = x1;
- if (y1 < minY) minY = y1;
- if (x2 > maxX) maxX = x2;
- if (y2 > maxY) maxY = y2;
- }
-
- return [minX, minY, maxX, maxY];
-};
-
-function paragraphsFromChars(chars) {
- let paragraphs = [];
- if (!chars || !chars.length) return paragraphs;
-
- let lines = [];
- let explicitBreaks = new Set();
-
- let lineStart = 0;
-
- let pushLine = (endIdx) => {
- let parts = getRangeRects(chars, lineStart, endIdx) || [];
- let rect = computeBoundingRect(parts);
- lines.push({ start: lineStart, end: endIdx, rect });
- lineStart = endIdx + 1;
- };
-
- for (let i = 0; i < chars.length; i++) {
- let ch = chars[i];
- if (!ch) continue;
-
- if (ch.paragraphBreakAfter) {
- explicitBreaks.add(i);
- }
-
- let isLineEnd = ch.lineBreakAfter || ch.paragraphBreakAfter || i === chars.length - 1;
-
- if (isLineEnd) {
- pushLine(i);
- }
- }
-
- if (!lines.length) {
- return paragraphs;
- }
-
- // First pass: decide paragraph breaks **between lines**
- let breaksBetweenLines = new Array(Math.max(0, lines.length - 1)).fill(false);
- const INDENT_EPS = 10;
-
- for (let li = 1; li < lines.length; li++) {
- let prev = lines[li - 1];
- let cur = lines[li];
-
- // Explicit paragraph break right after the previous line
- if (explicitBreaks.has(prev.end)) {
- breaksBetweenLines[li - 1] = true;
- continue;
- }
-
- if (cur.rect[0] > prev.rect[0] + INDENT_EPS) {
- breaksBetweenLines[li - 1] = true;
- }
- }
-
- // Group lines into initial paragraphs (by line indices)
- let paragraphLineGroups = [];
- let paraStartLine = 0;
- for (let li = 1; li < lines.length; li++) {
- if (breaksBetweenLines[li - 1]) {
- paragraphLineGroups.push([paraStartLine, li - 1]);
- paraStartLine = li;
- }
- }
- paragraphLineGroups.push([paraStartLine, lines.length - 1]);
-
- // Second pass: join single‑line paragraphs with previous
- // when the first characters share the same font
- if (paragraphLineGroups.length <= 1) {
- for (let [ls, le] of paragraphLineGroups) {
- paragraphs.push([lines[ls].start, lines[le].end]);
- }
- return paragraphs;
- }
-
- let mergedLineGroups = [];
- let [prevLs, prevLe] = paragraphLineGroups[0];
- let prevFont = chars[lines[prevLs].start].fontName;
-
- for (let idx = 1; idx < paragraphLineGroups.length; idx++) {
- let [curLs, curLe] = paragraphLineGroups[idx];
-
- let isSingleLine = curLs === curLe;
- let curFont = chars[lines[curLs].start].fontName;
-
- if (isSingleLine && prevFont && curFont && prevFont === curFont) {
- // Merge: extend previous paragraph to include current paragraph's lines
- prevLe = curLe;
- }
- else {
- mergedLineGroups.push([prevLs, prevLe]);
- [prevLs, prevLe] = [curLs, curLe];
- prevFont = curFont;
- }
- }
- mergedLineGroups.push([prevLs, prevLe]);
-
- // Final step: convert merged line groups to character index ranges
- for (let [ls, le] of mergedLineGroups) {
- paragraphs.push([lines[ls].start, lines[le].end]);
- }
-
- return paragraphs;
-}
-
-function buildSegmenterText(chars) {
- let textParts = [];
- let textLength = 0;
- let charIndexByTextIndex = [];
-
- for (let i = 0; i < chars.length; i++) {
- let ch = chars[i];
- if (!ch || ch.ignorable) {
- continue;
- }
-
- // Map all code units in ch.c to this char index for robust offset mapping.
- for (let j = 0; j < ch.c.length; j++) {
- charIndexByTextIndex[textLength + j] = i;
- }
- textParts.push(ch.c);
- textLength += ch.c.length;
-
- if (ch.spaceAfter || ch.lineBreakAfter || ch.paragraphBreakAfter) {
- textParts.push(' ');
- textLength += 1;
- }
- }
-
- // Normalize all whitespace to space characters
- let text = textParts.join('');
- text = text.replace(/\s/g, ' ');
-
- return { text, charIndexByTextIndex };
-}
-
-function trimSegmentSpaces(segmentText) {
- let start = 0;
- let end = segmentText.length;
- while (start < end && segmentText[start] === ' ') start++;
- while (end > start && segmentText[end - 1] === ' ') end--;
- return { start, end };
-}
-
-function findCharIndex(charIndexByTextIndex, start, end, forward) {
- let i = forward ? start : end - 1;
- let step = forward ? 1 : -1;
- let stop = forward ? end : start - 1;
- for (; i !== stop; i += step) {
- if (charIndexByTextIndex[i] !== undefined) {
- return charIndexByTextIndex[i];
- }
- }
- return null;
-}
-
-function textRangeToCharRange(charIndexByTextIndex, start, end) {
- let startChar = findCharIndex(charIndexByTextIndex, start, end, true);
- if (startChar === null) {
- return null;
- }
-
- let endChar = findCharIndex(charIndexByTextIndex, start, end, false);
- if (endChar === null) {
- return null;
- }
-
- return [startChar, endChar];
-}
-
-function sentencesFromSegmenterText(text, charIndexByTextIndex, lang) {
- if (!text) {
- return [];
- }
-
- if (!('Segmenter' in Intl)) {
- return [];
- }
-
- let segmenter = new Intl.Segmenter(lang || undefined, { granularity: 'sentence' });
- let segments = [...segmenter.segment(text)];
-
- let out = [];
- for (let segment of segments) {
- let sentStart = segment.index;
- let sentEnd = sentStart + segment.segment.length;
-
- let trimmed = trimSegmentSpaces(segment.segment);
- sentStart += trimmed.start;
- sentEnd = sentStart + (trimmed.end - trimmed.start);
-
- if (sentEnd <= sentStart) {
- continue;
- }
-
- let segmentText = text.slice(sentStart, sentEnd);
- if (!segmentText) {
- continue;
- }
-
- // Enforce max byte length per segment
- let chunks = splitTextToChunks(segmentText);
- for (let [chunkStart, chunkEnd] of chunks) {
- let absStart = sentStart + chunkStart;
- let absEnd = sentStart + chunkEnd;
-
- let chunkTrimmed = trimSegmentSpaces(segmentText.slice(chunkStart, chunkEnd));
- absStart += chunkTrimmed.start;
- absEnd = absStart + (chunkTrimmed.end - chunkTrimmed.start);
-
- if (absEnd <= absStart) {
- continue;
- }
-
- let charRange = textRangeToCharRange(charIndexByTextIndex, absStart, absEnd);
- if (!charRange) {
- continue;
- }
-
- let chunkText = text.slice(absStart, absEnd);
- if (!chunkText) {
- continue;
- }
-
- out.push({
- text: chunkText,
- ranges: [charRange]
- });
- }
- }
-
- return out;
-}
-
-export function buildReadAloudSegmentsFromRanges(chars, pageIndex, paragraphRanges) {
- if (!chars || !chars.length || !paragraphRanges || !paragraphRanges.length) {
- return { paragraphs: [], sentences: [] };
- }
-
- let paragraphs = [];
- let sentences = [];
-
- for (let [start, end] of paragraphRanges) {
- if (start === null || end === null || start > end) continue;
-
- let paraChars = chars.slice(start, end + 1);
- let { text, charIndexByTextIndex } = buildSegmenterText(paraChars);
- let paragraphText = trimText(text);
- let paragraphLang = detectLang(paragraphText) || undefined;
- let rawSentences = sentencesFromSegmenterText(text, charIndexByTextIndex, paragraphLang);
-
- let paraRects = getRangeRects(chars, start, end) || [];
- let paraText = '';
-
- // Track first sentence in this paragraph
- let isFirstSentenceInParagraph = true;
-
- for (let s of rawSentences) {
- if (!s.text) continue;
-
- let rects = [];
- let sentenceStart = null;
- let sentenceEnd = null;
- for (let [localStart, localEndInc] of s.ranges) {
- if (localStart === null || localEndInc === null) continue;
-
- let ss = Math.max(0, start + localStart);
- let ee = Math.min(start + localEndInc, chars.length - 1);
- if (ee < ss) continue;
-
- if (sentenceStart === null || ss < sentenceStart) sentenceStart = ss;
- if (sentenceEnd === null || ee > sentenceEnd) sentenceEnd = ee;
-
- let part = getRangeRects(chars, ss, ee);
- if (part && part.length) rects = rects.concat(part);
- }
-
- if (!rects.length || sentenceStart === null || sentenceEnd === null) continue;
-
- let sentence = {
- text: s.text,
- position: { pageIndex, rects },
- paragraphIndex: paragraphs.length,
- granularity: 'sentence',
- offsetStart: sentenceStart,
- offsetEnd: sentenceEnd
- };
-
- // Mark the first sentence of each paragraph
- if (isFirstSentenceInParagraph) {
- sentence.anchor = 'paragraphStart';
- isFirstSentenceInParagraph = false;
- }
-
- sentences.push(sentence);
- paraText = joinWithSpace(paraText, s.text);
- }
-
- if (paraRects.length && paraText) {
- paragraphs.push({
- anchor: 'paragraphStart',
- text: paraText,
- position: { pageIndex, rects: paraRects },
- granularity: 'paragraph',
- offsetStart: start,
- offsetEnd: end
- });
- }
- }
-
- return { paragraphs, sentences };
-}
-
-export function buildReadAloudSegments(chars, pageIndex) {
- if (!chars || !chars.length) {
- return { paragraphs: [], sentences: [] };
- }
- let paragraphRanges = paragraphsFromChars(chars);
- return buildReadAloudSegmentsFromRanges(chars, pageIndex, paragraphRanges);
-}
-
-export function getReadAloudSelectionBounds(selectionRanges) {
- if (!selectionRanges?.length || selectionRanges[0].collapsed) {
- return null;
- }
-
- let sortedRanges = [...selectionRanges];
- sortedRanges.sort((a, b) => {
- const pa = a.position.pageIndex;
- const pb = b.position.pageIndex;
- if (pa !== pb) {
- return pa - pb;
- }
- const aMin = Math.min(a.anchorOffset, a.headOffset);
- const bMin = Math.min(b.anchorOffset, b.headOffset);
- return aMin - bMin;
- });
-
- let startRange = sortedRanges[0];
- let endRange = sortedRanges[sortedRanges.length - 1];
- let startOffset = Math.min(startRange.anchorOffset, startRange.headOffset);
- let endOffset = Math.max(endRange.anchorOffset, endRange.headOffset) - 1;
- if (endOffset < startOffset) {
- return null;
- }
-
- return {
- selectionRanges: sortedRanges,
- start: {
- pageIndex: startRange.position.pageIndex,
- offset: startOffset
- },
- end: {
- pageIndex: endRange.position.pageIndex,
- offset: endOffset
- }
- };
-}
-
-export function buildReadAloudSegmentPart(chars, segment, pageIndex, offsetStart, offsetEnd, anchor) {
- if (offsetStart > offsetEnd || !chars?.length) {
- return null;
- }
-
- let start = Math.max(0, Math.min(offsetStart, chars.length - 1));
- let end = Math.max(0, Math.min(offsetEnd, chars.length - 1));
- if (start > end) {
- return null;
- }
-
- let rects = getRangeRects(chars, start, end);
- if (!rects?.length) {
- return null;
- }
-
- let text = getTextFromChars(chars.slice(start, end + 1));
- if (!text) {
- return null;
- }
-
- let next = {
- text,
- position: { pageIndex, rects },
- granularity: segment.granularity,
- anchor: anchor || null,
- offsetStart: start,
- offsetEnd: end
- };
- if (segment.paragraphIndex !== undefined) {
- next.paragraphIndex = segment.paragraphIndex;
- }
- return next;
-}
-
-export function splitReadAloudSegmentsBySelection(segments, selectionStart, selectionEnd, getCharsForPage) {
- let comparePos = (a, b) => {
- if (a.pageIndex !== b.pageIndex) {
- return a.pageIndex - b.pageIndex;
- }
- return a.offset - b.offset;
- };
-
- let hasOffsets = segments.every(segment =>
- Number.isInteger(segment.offsetStart) && Number.isInteger(segment.offsetEnd)
- );
- if (!hasOffsets) {
- return null;
- }
-
- let startIndex = segments.findIndex(segment => {
- let segEnd = { pageIndex: segment.position.pageIndex, offset: segment.offsetEnd };
- return comparePos(selectionStart, segEnd) <= 0;
- });
- if (startIndex === -1) {
- return null;
- }
-
- let endIndex = segments.findIndex(segment => {
- let segStart = { pageIndex: segment.position.pageIndex, offset: segment.offsetStart };
- return comparePos(selectionEnd, segStart) < 0;
- });
- if (endIndex === -1) {
- endIndex = segments.length;
- }
- if (startIndex >= endIndex) {
- return null;
- }
-
- let newSegments = [];
- let splitStartIndex = null;
- let splitEndIndex = null;
-
- for (let i = 0; i < segments.length; i++) {
- let segment = segments[i];
- if (i < startIndex || i >= endIndex) {
- newSegments.push(segment);
- continue;
- }
-
- let pageIndex = segment.position.pageIndex;
- let segStart = { pageIndex, offset: segment.offsetStart };
- let segEnd = { pageIndex, offset: segment.offsetEnd };
-
- let startWithin = i === startIndex
- && comparePos(selectionStart, segStart) > 0
- && comparePos(selectionStart, segEnd) <= 0;
- let endWithin = i === endIndex - 1
- && comparePos(selectionEnd, segStart) >= 0
- && comparePos(selectionEnd, segEnd) < 0;
-
- if (!startWithin && !endWithin) {
- if (i === startIndex) {
- splitStartIndex = newSegments.length;
- }
- newSegments.push(segment);
- if (i === endIndex - 1) {
- splitEndIndex = newSegments.length;
- }
- continue;
- }
-
- let chars = getCharsForPage?.(pageIndex);
- let middleAnchor = segment.anchor || null;
- if (startWithin) {
- let before = buildReadAloudSegmentPart(
- chars,
- segment,
- pageIndex,
- segment.offsetStart,
- selectionStart.offset - 1,
- middleAnchor
- );
- if (before) {
- newSegments.push(before);
- middleAnchor = null;
- }
- }
-
- let middleStart = startWithin ? selectionStart.offset : segment.offsetStart;
- let middleEnd = endWithin ? selectionEnd.offset : segment.offsetEnd;
- let middle = buildReadAloudSegmentPart(
- chars,
- segment,
- pageIndex,
- middleStart,
- middleEnd,
- middleAnchor
- );
- if (middle) {
- if (i === startIndex) {
- splitStartIndex = newSegments.length;
- }
- newSegments.push(middle);
- if (i === endIndex - 1 && endWithin) {
- splitEndIndex = newSegments.length;
- }
- }
-
- if (endWithin) {
- let after = buildReadAloudSegmentPart(
- chars,
- segment,
- pageIndex,
- selectionEnd.offset + 1,
- segment.offsetEnd,
- null
- );
- if (after) {
- newSegments.push(after);
- }
- }
-
- if (i === endIndex - 1 && !endWithin) {
- splitEndIndex = newSegments.length;
- }
- }
-
- if (splitStartIndex === null || splitEndIndex === null) {
- return null;
- }
-
- return {
- segments: newSegments,
- startIndex: splitStartIndex,
- endIndex: splitEndIndex
- };
-}
diff --git a/src/worker-client.dev.js b/src/worker-client.dev.js
new file mode 100644
index 000000000..649df345c
--- /dev/null
+++ b/src/worker-client.dev.js
@@ -0,0 +1,111 @@
+const CONTENT_TYPES = {
+ pdf: 'application/pdf',
+ epub: 'application/epub+zip',
+ snapshot: 'text/html',
+};
+
+// Dummy hash
+const DEV_SOURCE_HASH = '0'.repeat(32);
+
+// Served by webpack-dev-server from ../document-worker/build/ (see devServer.static
+// in webpack.config.js). If document-worker hasn't been built, the worker fails
+// to load and getSDT() resolves to null.
+const DOCUMENT_WORKER_BASE = 'document-worker/';
+
+let documentWorker = null;
+let documentWorkerFailed = false;
+let lastWorkerPromiseID = 0;
+// id -> { resolve, reject }.
+let workerPromises = {};
+
+function failAllPending(error) {
+ for (let id of Object.keys(workerPromises)) {
+ workerPromises[id].reject(error);
+ delete workerPromises[id];
+ }
+}
+
+function initDocumentWorker() {
+ if (documentWorker || documentWorkerFailed) {
+ return documentWorker;
+ }
+ documentWorker = new Worker(DOCUMENT_WORKER_BASE + 'worker.js');
+ documentWorker.addEventListener('error', (event) => {
+ console.warn(`Document worker failed to load from ${DOCUMENT_WORKER_BASE}:`, event.message || event);
+ documentWorkerFailed = true;
+ failAllPending(new Error('Document worker unavailable'));
+ documentWorker = null;
+ });
+ documentWorker.addEventListener('message', async (event) => {
+ let message = event.data;
+ if (message.responseID) {
+ let pending = workerPromises[message.responseID];
+ if (!pending) return;
+ delete workerPromises[message.responseID];
+ if ('error' in message) {
+ pending.reject(new Error(JSON.stringify(message.error)));
+ }
+ else {
+ pending.resolve(message.data);
+ }
+ return;
+ }
+ if (message.id) {
+ let respData = null;
+ try {
+ if (message.action === 'FetchBuiltInCMap') {
+ let res = await fetch(DOCUMENT_WORKER_BASE + 'cmaps/' + message.data + '.bcmap');
+ respData = { isCompressed: true, cMapData: new Uint8Array(await res.arrayBuffer()) };
+ }
+ else if (message.action === 'FetchStandardFontData') {
+ let res = await fetch(DOCUMENT_WORKER_BASE + 'standard_fonts/' + message.data);
+ respData = new Uint8Array(await res.arrayBuffer());
+ }
+ else if (message.action === 'FetchWasm') {
+ let res = await fetch(DOCUMENT_WORKER_BASE + 'wasm/' + message.data);
+ respData = new Uint8Array(await res.arrayBuffer());
+ }
+ else if (message.action === 'FetchData') {
+ let res = await fetch(DOCUMENT_WORKER_BASE + message.data);
+ respData = new Uint8Array(await res.arrayBuffer());
+ }
+ }
+ catch (e) {
+ console.warn(`Document worker ${message.action} failed:`, e);
+ }
+ documentWorker.postMessage({ responseID: message.id, data: respData });
+ }
+ });
+ return documentWorker;
+}
+
+function queryDocumentWorker(action, data, transfer) {
+ let worker = initDocumentWorker();
+ if (!worker) {
+ return Promise.reject(new Error('Document worker unavailable'));
+ }
+ return new Promise((resolve, reject) => {
+ lastWorkerPromiseID++;
+ workerPromises[lastWorkerPromiseID] = { resolve, reject };
+ worker.postMessage({ id: lastWorkerPromiseID, action, data }, transfer || []);
+ });
+}
+
+export async function generateSDT(type, fileName, password) {
+ let contentType = CONTENT_TYPES[type];
+ if (!contentType) return null;
+ try {
+ let res = await fetch(fileName);
+ let buf = await res.arrayBuffer();
+ return await queryDocumentWorker(
+ 'getStructuredDocumentText',
+ { buf, contentType, password, sourceHash: DEV_SOURCE_HASH },
+ [buf]
+ );
+ }
+ catch (e) {
+ console.warn('Failed to generate SDT:', e);
+ return null;
+ }
+}
+
diff --git a/structured-document-text b/structured-document-text
new file mode 160000
index 000000000..2449d33b0
--- /dev/null
+++ b/structured-document-text
@@ -0,0 +1 @@
+Subproject commit 2449d33b089fe7f3654e88aa9ffd0c700ba0e8c6
diff --git a/webpack.config.js b/webpack.config.js
index 53596cf69..96193a31c 100644
--- a/webpack.config.js
+++ b/webpack.config.js
@@ -131,7 +131,7 @@ function generateReaderConfig(build) {
patterns: [
{ from: 'demo/epub/demo.epub', to: './' },
{ from: 'demo/pdf/demo.pdf', to: './' },
- { from: 'demo/snapshot/demo.html', to: './' }
+ { from: 'demo/snapshot/demo.html', to: './' },
],
options: {
@@ -142,10 +142,16 @@ function generateReaderConfig(build) {
}),
);
config.devServer = {
- static: {
- directory: path.resolve(__dirname, 'build/'),
- watch: true,
- },
+ static: [
+ {
+ directory: path.resolve(__dirname, 'build/'),
+ watch: true,
+ },
+ {
+ directory: path.resolve(__dirname, '../document-worker/build/'),
+ publicPath: '/dev/document-worker',
+ },
+ ],
devMiddleware: {
writeToDisk: true,
},