diff --git a/pyproject.toml b/pyproject.toml index cbcdb85d..c631c77c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Topic :: Database :: Front-Ends" ] -dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml", "rdflib", "requests-sse", "tqdm>=4.60.0" ] +dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml", "rdflib", "requests-sse", "tqdm>=4.60.0", "textual>=8.0", "rich" ] [project.optional-dependencies] dev = [ "ruff", "pre-commit" ] diff --git a/src/qlever/commands/benchmark_queries.py b/src/qlever/commands/benchmark_queries.py index 9cb43f34..737be31b 100644 --- a/src/qlever/commands/benchmark_queries.py +++ b/src/qlever/commands/benchmark_queries.py @@ -20,37 +20,12 @@ from qlever.command import QleverCommand from qlever.commands.clear_cache import ClearCacheCommand from qlever.commands.ui import dict_to_yaml -from qlever.containerize import Containerize from qlever.log import log, mute_log -from qlever.util import run_command, run_curl_command - - -def pretty_printed_query( - query: str, show_prefixes: bool, system: str = "docker" -) -> str: - """ - Pretty-print a SPARQL query using the sparql-formatter Docker image. - Optionally strips PREFIX declarations from the output. - Argument `system` can either be docker or podman. - """ - if system not in Containerize.supported_systems(): - system = "docker" - remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else "" - pretty_print_query_cmd = ( - f"echo {shlex.quote(query)}" - f" | {system} run -i --rm docker.io/sparqling/sparql-formatter" - f"{remove_prefixes_cmd} | grep -v '^$'" - ) - try: - query_pretty_printed = run_command( - pretty_print_query_cmd, return_output=True - ) - return query_pretty_printed.rstrip() - except Exception as e: - log.debug( - f"Failed to pretty-print query, returning original query: {e}" - ) - return query.rstrip() +from qlever.util import ( + pretty_printed_query, + run_command, + run_curl_command, +) def sparql_query_type(query: str) -> str: @@ -1005,7 +980,8 @@ def execute(self, args) -> bool: colored( pretty_printed_query( query, args.show_prefixes, args.system - ), + ) + or query, "cyan", ) ) @@ -1107,7 +1083,8 @@ def execute(self, args) -> bool: description=description, query=pretty_printed_query( query, args.show_prefixes, args.system - ), + ) + or query, client_time=time_seconds, result=query_results, result_size=result_length, @@ -1166,7 +1143,8 @@ def execute(self, args) -> bool: colored( pretty_printed_query( query, args.show_prefixes, args.system - ), + ) + or query, "cyan", ) ) diff --git a/src/qlever/commands/monitor_queries.py b/src/qlever/commands/monitor_queries.py new file mode 100644 index 00000000..61c1d001 --- /dev/null +++ b/src/qlever/commands/monitor_queries.py @@ -0,0 +1,118 @@ +from __future__ import annotations + +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.monitor.app import MonitorQueriesApp + +# `LiveLogReader` ingests new log lines every 0.2s, so a faster screen +# refresh would only re-render the ticking duration; 0.2s is the floor. +REFRESH_MIN_S = 0.2 +REFRESH_MAX_S = 2.0 + + +class MonitorQueriesCommand(QleverCommand): + """ + Class for executing the `monitor-queries-tui` command. + """ + + def __init__(self): + pass + + def description(self) -> str: + return ( + "Show the currently active and historically all the active queries " + "on the server (interactive TUI)" + ) + + def should_have_qleverfile(self) -> bool: + return False + + def relevant_qleverfile_arguments(self) -> dict[str, list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port", "timeout"], + "runtime": ["system"], + } + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--sparql-endpoint", + type=str, + help="URL of the SPARQL endpoint (default = {host_name}:{port})", + ) + subparser.add_argument( + "--metrics-log", + type=Path, + help=( + "QLever's `metrics-log.jsonl` log file " + "(default = {name}.metrics-log.jsonl)" + ), + ) + subparser.add_argument( + "--slow-threshold", + type=int, + default=None, + help="Duration in seconds above which a query (active or" + " completed) is counted as slow in the metrics" + " (default = server timeout - 10s)", + ) + subparser.add_argument( + "--refresh", + type=float, + default=REFRESH_MIN_S, + help="Live view screen refresh interval in seconds, between" + f" {REFRESH_MIN_S} and {REFRESH_MAX_S}" + f" (default = {REFRESH_MIN_S})", + ) + + def execute(self, args) -> bool: + if not args.metrics_log: + args.metrics_log = Path.cwd() / f"{args.name}.metrics-log.jsonl" + show_msg = ( + f"Reading server logs from {args.metrics_log} to display the " + "currently active queries on the server" + ) + self.show(show_msg, only_show=args.show) + if args.show: + return True + + if not args.metrics_log.is_file(): + log.error(f"Log file not found: {args.metrics_log}") + return False + + timeout_s = 30 + if args.slow_threshold is None: + try: + timeout_s = int(args.timeout.rstrip("s")) + except ValueError: + log.error( + f"Could not parse server timeout {args.timeout!r};" + " pass --slow-threshold explicitly" + ) + return False + args.slow_threshold = max(1, timeout_s - 10) + + if args.refresh < REFRESH_MIN_S or args.refresh > REFRESH_MAX_S: + log.error( + f"--refresh must be between {REFRESH_MIN_S} and" + f" {REFRESH_MAX_S} seconds" + ) + return False + + sparql_endpoint = ( + args.sparql_endpoint + if args.sparql_endpoint + else f"{args.host_name}:{args.port}" + ) + + MonitorQueriesApp( + log_file=args.metrics_log, + sparql_endpoint=sparql_endpoint, + timeout=timeout_s, + slow_threshold=args.slow_threshold, + refresh_interval=args.refresh, + system=args.system, + ).run() + return True diff --git a/src/qlever/monitor/__init__.py b/src/qlever/monitor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py new file mode 100644 index 00000000..b4f4cd85 --- /dev/null +++ b/src/qlever/monitor/app.py @@ -0,0 +1,241 @@ +from __future__ import annotations + +import time +from pathlib import Path + +from textual import work +from textual.app import App +from textual.binding import Binding +from textual.css.query import NoMatches +from textual.worker import get_current_worker + +from qlever.monitor.live_data import ( + LIVE_HORIZON_MS, + LiveLogReader, + LiveState, + current_ms, + find_active_queries, + load_completed_history, +) +from qlever.monitor.log_reader import read_first_timestamp +from qlever.monitor.util import clipboard_install_hint, copy_text +from qlever.monitor.views.historic import HistoricScreen +from qlever.monitor.views.live import LiveScreen +from qlever.monitor.widgets.sparql_pane import SparqlPane, SparqlScroll +from qlever.util import pretty_printed_query + + +class MonitorQueriesApp(App): + """ + Textual app for the interactive `monitor-queries` TUI + + The app has two tabs: + 1. Live view which tails an active `qlever-server` metrics.log file to + show currently-active queries on the server plus rolling metrics + 2. Historic view which parses the log file in a given time window for + active queries and metrics. + + In both tabs, the user has the option of selecting a particular query and + view the full SPARQL query with syntax highlighting plus copy it. + """ + + CSS_PATH = "monitor.tcss" + + SCREENS = {"live": LiveScreen, "historic": HistoricScreen} + + BINDINGS = [ + ("q", "quit", "Quit/Exit"), + Binding("t", "cycle_themes", "Change theme", key_display="T/t"), + Binding("T", "cycle_themes(-1)", "Previous theme", show=False), + ("y", "copy_query", "Copy SPARQL"), + ("p", "pretty_print", "Pretty print"), + ("c", "clear_query", "Clear SPARQL"), + Binding( + "shift+up", + "scroll_sparql_up", + "Scroll SPARQL", + key_display="⇧ ↑↓", + ), + Binding( + "shift+down", "scroll_sparql_down", "Scroll SPARQL", show=False + ), + ] + + def __init__( + self, + log_file: Path, + sparql_endpoint: str, + timeout: int, + slow_threshold: int, + refresh_interval: float, + system: str = "docker", + ) -> None: + super().__init__() + self.log_file = log_file + self.sparql_endpoint = sparql_endpoint + self.timeout = timeout + self.window_pad_ms = 2000 * timeout + self.slow_threshold = slow_threshold + self.refresh_interval = refresh_interval + self.system = system + self.live_state = LiveState() + self.log_start_ms = None + + def set_log_start_ms(self) -> None: + """Read the log's first timestamp and cache it on the app. + + Re-reads only while the value is still None (empty log at boot); + once set, the value is immutable and subsequent calls no-op. + """ + if self.log_start_ms is not None: + return + with self.log_file.open("rb") as log_stream: + self.log_start_ms = read_first_timestamp( + log_stream, self.log_file.stat().st_size + ) + + def on_mount(self) -> None: + """Boot the live engine, then open the Live screen.""" + # The ansi theme renders an unreadable Footer (its colors are + # ansi_default, which flattens to black). Drop it so it is + # offered neither in the theme cycle nor the command palette. + self.unregister_theme("textual-ansi") + self.boot_time_ms = current_ms() + state, cut_offset, _ = find_active_queries( + self.log_file, self.window_pad_ms + ) + self.live_state = state + self.cut_offset = cut_offset + self.set_log_start_ms() + self.tail_live_log() + self.load_metrics_history() + self.push_screen("live") + + @work(thread=True, exclusive=True, group="tail_live_log") + def tail_live_log(self) -> None: + """Poll the log forever, dispatching new events into LiveState.""" + tailer = LiveLogReader( + self.log_file, + self.live_state, + self.cut_offset, + self.window_pad_ms, + ) + worker = get_current_worker() + with self.log_file.open("rb") as log_stream: + while not worker.is_cancelled: + tailer.poll(log_stream) + time.sleep(tailer.poll_interval) + + @work(thread=True, group="load_metrics_history") + def load_metrics_history(self) -> None: + """Prepend the hour before cut_offset into completed history so metrics aren't empty at boot.""" + load_completed_history( + self.log_file, + self.live_state, + self.cut_offset, + self.window_pad_ms, + ) + # Backfill is done; record how far back metrics have data. + log_start_or_boot = self.log_start_ms or self.boot_time_ms + self.live_state.metrics_known_from_ms = max( + log_start_or_boot, self.boot_time_ms - LIVE_HORIZON_MS + ) + + def action_swap_screen(self) -> None: + """Toggle between Live and Historic (bound to Tab on each screen).""" + target = "historic" if isinstance(self.screen, LiveScreen) else "live" + if target == "historic": + self.set_log_start_ms() + if self.log_start_ms is None: + self.notify( + "Log is empty - nothing to show in Historic yet", + severity="warning", + ) + return + self.switch_screen(target) + + def action_copy_query(self) -> None: + """Copy the displayed query's SPARQL to the system clipboard.""" + pane = self.screen.query_one(SparqlPane) + if pane.displayed_text is None: + self.notify("No query selected", severity="warning") + return + result = copy_text(pane.displayed_text) + if result is True: + self.notify("SPARQL copied to clipboard") + elif result is None: + self.notify( + f"No clipboard tool found: {clipboard_install_hint()}", + severity="warning", + ) + else: + self.notify("Clipboard tool failed", severity="error") + + def action_pretty_print(self) -> None: + """Toggle pretty-printed SPARQL in the pane for the selected query.""" + pane = self.screen.query_one(SparqlPane) + if pane.content is None: + self.notify("No query selected", severity="warning") + return + if pane.show_pretty: + pane.show_pretty = False + return + pane.show_pretty = True + # A cached result means the formatter already ran for this query. + if pane.pretty_text is None: + self.pretty_print_worker(pane, pane.content.sparql_text) + + @work(thread=True, exclusive=True, group="pretty_print") + def pretty_print_worker(self, pane: SparqlPane, raw: str) -> None: + """Run the blocking sparql-formatter off the UI thread.""" + result = pretty_printed_query( + raw, show_prefixes=True, system=self.system + ) + self.call_from_thread(self.apply_pretty_result, pane, raw, result) + + def apply_pretty_result( + self, pane: SparqlPane, raw: str, result: str | None + ) -> None: + """Apply the formatter output, unless the selection moved on.""" + if pane.content is None or pane.content.sparql_text != raw: + return + if result is None: + pane.show_pretty = False + self.notify("Could not pretty-print this query", severity="error") + return + pane.pretty_text = result + + def action_clear_query(self) -> None: + """Drop the displayed query, restoring the empty-state hint.""" + pane = self.screen.query_one(SparqlPane) + pane.content = None + + def action_scroll_sparql_up(self) -> None: + """Scroll the overflowing SPARQL pane up one line.""" + self.screen.query_one(SparqlScroll).scroll_up() + + def action_scroll_sparql_down(self) -> None: + """Scroll the overflowing SPARQL pane down one line.""" + self.screen.query_one(SparqlScroll).scroll_down() + + def check_action( + self, action: str, parameters: tuple[object, ...] + ) -> bool | None: + """Show the scroll bindings only when the query overflows the pane. + + Returns False (hidden) rather than None (grayed) so the footer + entry disappears entirely until there is something to scroll. + """ + if action in ("scroll_sparql_up", "scroll_sparql_down"): + try: + scroll = self.screen.query_one(SparqlScroll) + except NoMatches: + return False + return scroll.max_scroll_y > 0 + return True + + def action_cycle_themes(self, direction: int = 1) -> None: + """Step through themes; `direction` is +1 for `t`, -1 for `T`.""" + themes = list(self.available_themes) + selected_theme_idx = themes.index(self.theme) + self.theme = themes[(selected_theme_idx + direction) % len(themes)] diff --git a/src/qlever/monitor/historic_data.py b/src/qlever/monitor/historic_data.py new file mode 100644 index 00000000..776e387a --- /dev/null +++ b/src/qlever/monitor/historic_data.py @@ -0,0 +1,231 @@ +"""Data layer for the Historic screen. + +Reads one time window of the log into a list of `LoggedQuery`, +filters it by display mode, and maps the survivors into the +`models.py` dataclasses the screen renders. Metrics are computed per +mode at render time. A window change reruns the scan; a mode change +reuses the scanned list. +""" + +from dataclasses import replace +from pathlib import Path +from typing import NamedTuple + +from qlever.monitor.log_reader import ( + CompletedQuery, + load_sparql_at, + offset_for_ts, + pair_start_end_events, + scan_range, +) +from qlever.monitor.metrics import metrics_for_queries +from qlever.monitor.models import ( + ControlsState, + HistoricQueryRow, + MetricsCounts, +) + +# Used as the duration of orphaned queries. Negative so they sort last. +DURATION_UNKNOWN = -1 + + +class LoggedQuery(NamedTuple): + """One SPARQL query observed in the log over the current window. + + `end_ms` is `None` when the query started but has not ended yet, + either because it is still running or the server crashed before + writing the end event. `status` carries the raw end status, or + `"running"` for a still-open survivor. `start_line_offset` is the + byte offset of the start line, used to read the query's text only + once it is about to be displayed. + """ + + start_ms: int + end_ms: int | None + status: str + start_line_offset: int + + +def read_window( + log_path: Path, + window_start_ms: int, + window_end_ms: int, + pad_ms: int, + log_end_ms: int, + now_ms: int, +) -> list[LoggedQuery]: + """Scan one time window of the log into the queries overlapping it. + + The byte range scanned is the window padded by `pad_ms` on each + side so both events of a query straddling the window edge are + recovered. Pairs that lie entirely inside the pad are dropped, so + the result holds only rows a mode predicate could keep. The query + text is not read here; `load_query_details_for_rows` reads it for + the visible rows. Metrics are computed later, per mode, in + `render_window`. + + A still-open query is `"running"` only if its start is within + `pad_ms` of `log_end_ms` and the log itself is fresh + (`now_ms - log_end_ms <= pad_ms`); otherwise `"orphaned"`. + """ + with log_path.open("rb") as log_stream: + file_size = log_path.stat().st_size + lo_offset = offset_for_ts( + log_stream, window_start_ms - pad_ms, file_size + ) + hi_bound = offset_for_ts(log_stream, window_end_ms + pad_ms, file_size) + events = scan_range(log_stream, lo_offset, hi_bound) + completed, still_open = pair_start_end_events(events) + + queries = [] + for pair in completed: + if pair.start_ms > window_end_ms or pair.end_ms < window_start_ms: + continue + queries.append( + LoggedQuery( + start_ms=pair.start_ms, + end_ms=pair.end_ms, + status=pair.status, + start_line_offset=pair.start_line_offset, + ) + ) + log_is_fresh = now_ms - log_end_ms <= pad_ms + running_cutoff_ms = log_end_ms - pad_ms + for start_ms, start_line_offset in still_open.values(): + if start_ms > window_end_ms: + continue + status = ( + "running" + if log_is_fresh and start_ms >= running_cutoff_ms + else "orphaned" + ) + queries.append( + LoggedQuery( + start_ms=start_ms, + end_ms=None, + status=status, + start_line_offset=start_line_offset, + ) + ) + + return queries + + +def filter_queries( + queries: list[LoggedQuery], + mode: str, + window_start_ms: int, + window_end_ms: int, +) -> list[LoggedQuery]: + """Select queries from the scanned window for the given mode. + + The given list is already the ACTIVE set (every query overlaps the + window), so ACTIVE is the fallthrough that returns it unchanged. + STARTS narrows to queries that began inside the window; ENDS + narrows to queries that finished inside it. Still-running queries + (`end_ms is None`) can satisfy STARTS but never ENDS. + """ + if mode == "STARTS": + return [ + query + for query in queries + if window_start_ms <= query.start_ms <= window_end_ms + ] + if mode == "ENDS": + return [ + query + for query in queries + if query.end_ms is not None + and window_start_ms <= query.end_ms <= window_end_ms + ] + return queries + + +def display_duration_ms(query: LoggedQuery, log_end_ms: int) -> int: + """Duration to show for a query row; DURATION_UNKNOWN for orphans.""" + if query.status == "orphaned": + return DURATION_UNKNOWN + return (query.end_ms or log_end_ms) - query.start_ms + + +def render_window( + queries: list[LoggedQuery], + controls: ControlsState, + slow_threshold_ms: int, + log_end_ms: int, +) -> tuple[list[HistoricQueryRow], MetricsCounts]: + """Map the scanned window into the rows and metrics the UI consumes. + + Runs `filter_queries` for the current mode and translates each + surviving `LoggedQuery` into a `HistoricQueryRow`. `duration_ms` + is measured against `log_end_ms` for still-running queries, + against the recorded `end_ms` for completed ones, and reported as + DURATION_UNKNOWN for crash orphans. The rows carry empty text; + `load_query_details_for_rows` fills it for the visible slice. + Metrics are computed over the completed queries in the selected + mode, so they match the rows on screen, and labelled with the + current window size. + """ + selected = filter_queries( + queries, controls.mode, controls.start_ms, controls.end_ms + ) + historic_rows = [ + HistoricQueryRow( + qid="", + start_line_offset=query.start_line_offset, + started_at_ms=query.start_ms, + duration_ms=display_duration_ms(query, log_end_ms), + status=query.status, + sparql="", + client_ip="", + ) + for query in selected + ] + completed = [ + CompletedQuery( + start_ms=query.start_ms, + end_ms=query.end_ms, + duration_ms=query.end_ms - query.start_ms, + status=query.status, + start_line_offset=query.start_line_offset, + ) + for query in selected + if query.end_ms is not None + ] + snapshot = metrics_for_queries(completed, slow_threshold_ms) + metrics = MetricsCounts(label=controls.window_size, **snapshot._asdict()) + return historic_rows, metrics + + +def load_query_details_for_rows( + log_path: Path, + rows: list[HistoricQueryRow], + query_details_cache: dict[int, tuple[str, str, str]], +) -> list[HistoricQueryRow]: + """Fill the deferred start-line fields on the given rows. + + Each row's `qid`, `client_ip`, and SPARQL text live on its start + line, which `read_window` did not read. Offsets already in + `query_details_cache` are reused; the rest are read in one pass, + opening the file only when something is missing. The cache is + scoped to one window so a sort or mode change repaints from memory. + Returns filled copies; the input rows are left unchanged. + """ + missing = [ + row.start_line_offset + for row in rows + if row.start_line_offset not in query_details_cache + ] + if missing: + with log_path.open("rb") as log_stream: + for offset in missing: + query_details_cache[offset] = load_sparql_at( + log_stream, offset + ) + filled_rows = [] + for row in rows: + qid, client_ip, sparql = query_details_cache[row.start_line_offset] + filled_rows.append( + replace(row, qid=qid, client_ip=client_ip, sparql=sparql) + ) + return filled_rows diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py new file mode 100644 index 00000000..0d4c727d --- /dev/null +++ b/src/qlever/monitor/live_data.py @@ -0,0 +1,421 @@ +"""Data layer for the Live screen. + +Owns the in-memory state the Live screen renders: the queries currently +running on the server and the rolling 1h history of completed queries +that feeds the 5m, 15m, and 1h metric rows. The tailer is the only +steady-state writer; the screen's refresh callbacks read under the same +lock and return the frozen `models.py` dataclasses the widgets consume. +""" + +import json +import threading +import time +from collections import deque +from collections.abc import Callable +from dataclasses import dataclass, field +from pathlib import Path +from typing import BinaryIO + +from qlever.monitor.log_reader import ( + STATUS_SET, + CompletedQuery, + load_sparql_at, + offset_for_ts, + pair_start_end_events, + read_last_timestamp, + scan_range, +) +from qlever.monitor.metrics import ( + EMPTY_FIELDS, + MetricsSnapshot, + metrics_for_ranges, +) +from qlever.monitor.models import LiveQueryRow, MetricsCounts + +LIVE_METRIC_WINDOWS_MS = [5 * 60_000, 15 * 60_000, 60 * 60_000] +LIVE_HORIZON_MS = LIVE_METRIC_WINDOWS_MS[2] +LIVE_METRIC_LABELS = ["last 5m", "last 15m", "last 1h"] +LOG_IDLE_THRESHOLD_MS = 10_000 +PING_INTERVAL_S = 5.0 +PING_FAILS_TO_UNREACHABLE = 3 + + +def current_ms() -> int: + """Wall-clock now in epoch milliseconds.""" + return time.time_ns() // 1_000_000 + + +def is_log_fresh(state: "LiveState", now_ms: int) -> bool: + """True iff the log produced a line within LOG_IDLE_THRESHOLD_MS. + + The live screen's reachability state machine uses this as the + cheap evidence stream; while it's True we skip pinging the server. + """ + last_ms = state.latest_event_ms + return last_ms is not None and now_ms - last_ms <= LOG_IDLE_THRESHOLD_MS + + +class CompletedQueries: + """Rolling 1h history of completed queries. + + Storage is a deque ordered by insertion, which is the tailer's + forward read order. Wall-clock is monotonic in steady state so + older entries sit at the left and trimming is one popleft loop. + """ + + def __init__(self) -> None: + self.entries = deque() + + def add(self, query: CompletedQuery) -> None: + """Append a completed query to the history. + + Caller writes in tailer order, which is wall-clock order in + steady state; entries remain sorted by end_ms for trimming. + """ + self.entries.append(query) + + def drop_older_than(self, cutoff_ms: int) -> None: + """Discard completions that ended before cutoff_ms. + + Called on each tailer tick with cutoff_ms = now_ms - 1h so the + history stays bounded even during quiet periods. + """ + while self.entries and self.entries[0].end_ms < cutoff_ms: + self.entries.popleft() + + def metrics_for_windows( + self, + now_ms: int, + windows_ms: list[int], + slow_threshold_ms: int, + data_start_ms: int, + ) -> list[MetricsSnapshot | None]: + """Compute one snapshot per window in a single deque walk. + + Each window is the interval [now_ms - width, now_ms]. + data_start_ms is the oldest timestamp the caller has loaded + complete data for; a window starting before that gets None + instead of a snapshot, so the widget can render the row as + not-yet-ready. Returns snapshots in the same order as windows_ms. + """ + ranges = [(now_ms - width, now_ms) for width in windows_ms] + snapshots = metrics_for_ranges(self.entries, ranges, slow_threshold_ms) + return [ + None if lo_ms < data_start_ms else snapshot + for (lo_ms, _), snapshot in zip(ranges, snapshots) + ] + + +@dataclass +class ActiveQuery: + """A running or just-finished query on the Live screen. + + end_ms is None while the query runs; seen is set once a repaint has + rendered the query, so one too fast to catch while running still + shows for a single paint before being dropped. + """ + + start_ms: int + end_ms: int | None + client_ip: str + sparql: str + seen: bool = False + + +@dataclass +class LiveState: + """Shared in-memory state for the Live screen. + + The tailer is the steady-state writer; the boot-time loader prepends + historical completions once. UI refresh callbacks read under the + same lock and build metric rows on the fly. + """ + + completed: CompletedQueries = field(default_factory=CompletedQueries) + active: dict[str, ActiveQuery] = field(default_factory=dict) + lock: threading.Lock = field(default_factory=threading.Lock) + metrics_known_from_ms: int | None = None + latest_event_ms: int | None = None + + +def find_active_queries( + log_path: Path, + window_pad_ms: int, +) -> tuple[LiveState, int, int]: + """Scan the log's tail to determine queries still running. + + Reads [eof_ts - window_pad_ms, EOF], pairs start/end events, and + records the unmatched starts as the active set. The completed + pairs found in this window are discarded; backfill rediscovers + them. Loads SPARQL text for each survivor by stored offset. + + Returns (state, cut_offset, eof_ts). cut_offset is where the tailer + starts reading forward; eof_ts is the timestamp of the log's last + complete line, or 0 when the log holds none. + """ + state = LiveState() + with log_path.open("rb") as log_stream: + file_size = log_path.stat().st_size + eof_ts = read_last_timestamp(log_stream, file_size) + if eof_ts is None: + return (state, file_size, 0) + + state.latest_event_ms = eof_ts + lo_offset = offset_for_ts( + log_stream, eof_ts - window_pad_ms, file_size + ) + events = scan_range(log_stream, lo_offset, file_size) + _, still_open = pair_start_end_events(events) + + for qid, (start_ms, start_line_offset) in still_open.items(): + _, client_ip, sparql = load_sparql_at( + log_stream, start_line_offset + ) + state.active[qid] = ActiveQuery( + start_ms=start_ms, + end_ms=None, + client_ip=client_ip, + sparql=sparql, + ) + + return (state, file_size, eof_ts) + + +def load_completed_history( + log_path: Path, + state: LiveState, + cut_offset: int, + window_pad_ms: int, + now_ms: Callable[[], int] = current_ms, +) -> None: + """Scan the hour before cut_offset and seed completed history. + + One-shot: callers spawn this on a daemon thread at startup if they + want it off the main loop. After it returns, the tailer is the only + writer of state.completed. + + The scan starts window_pad_ms before the hour boundary so a query + that began before the hour but ended inside it still has its start + line in range and can be paired. Pairs that ended before the hour + are then dropped, matching the deque's end_ms retention. + """ + with log_path.open("rb") as log_stream: + oldest_wanted_ms = now_ms() - LIVE_HORIZON_MS + scan_start_offset = offset_for_ts( + log_stream, oldest_wanted_ms - window_pad_ms, cut_offset + ) + events = scan_range(log_stream, scan_start_offset, cut_offset) + paired, _ = pair_start_end_events(events) + older_completions = [ + query for query in paired if query.end_ms >= oldest_wanted_ms + ] + + with state.lock: + tailer_entries = state.completed.entries + state.completed.entries = deque(older_completions) + state.completed.entries.extend(tailer_entries) + + +class LiveLogReader: + """Tail cursor and per-poll logic for the live query log. + + Sole writer of LiveState in steady state. Each poll() reads any + whole lines appended since the last cursor, dispatches them into + active / completed under the state lock, and trims stale entries. + The run loop is owned by the caller. + """ + + def __init__( + self, + log_path: Path, + state: LiveState, + cut_offset: int, + window_pad_ms: int, + poll_interval: float = 0.2, + now_ms: Callable[[], int] = current_ms, + ) -> None: + self.log_path = log_path + self.state = state + self.cursor = cut_offset + self.window_pad_ms = window_pad_ms + self.poll_interval = poll_interval + self.now_ms = now_ms + + def poll(self, log_stream: BinaryIO) -> None: + """Process any whole lines appended since the last poll.""" + log_stream.seek(self.cursor) + for line in log_stream: + if not line.endswith(b"\n"): + break + self.cursor += len(line) + self.handle_line(line) + self.evict_stale() + + def evict_stale(self) -> None: + """Drop old completions and finished or stale active entries. + + An active entry leaves once a repaint has shown it and it has + finished, or once its start is older than the 2t safety horizon + (a missing end event, since no real query lives that long). + """ + now = self.now_ms() + completed_cutoff = now - LIVE_HORIZON_MS + active_cutoff = now - self.window_pad_ms + with self.state.lock: + self.state.completed.drop_older_than(completed_cutoff) + to_remove = [ + qid + for qid, entry in self.state.active.items() + if (entry.end_ms is not None and entry.seen) + or entry.start_ms < active_cutoff + ] + for qid in to_remove: + del self.state.active[qid] + + def handle_line(self, line: bytes) -> None: + """Parse one whole line and dispatch it into LiveState.""" + try: + obj = json.loads(line) + except (ValueError, TypeError): + return + if not isinstance(obj, dict): + return + ts_ms = obj.get("ts-ms") + event = obj.get("event") + qid = obj.get("qid") + if not isinstance(ts_ms, int) or not isinstance(qid, str): + return + + if event == "start": + sparql = obj.get("query") + if not isinstance(sparql, str): + sparql = "" + client_ip = obj.get("client-ip", "") + with self.state.lock: + self.state.active[qid] = ActiveQuery( + start_ms=ts_ms, + end_ms=None, + client_ip=client_ip, + sparql=sparql, + ) + self.state.latest_event_ms = max( + self.state.latest_event_ms or 0, ts_ms + ) + return + + if event == "end": + status = obj.get("status") + if status not in STATUS_SET: + return + with self.state.lock: + self.state.latest_event_ms = max( + self.state.latest_event_ms or 0, ts_ms + ) + entry = self.state.active.get(qid) + if entry is None: + return + entry.end_ms = ts_ms + self.state.completed.add( + CompletedQuery( + start_ms=entry.start_ms, + end_ms=ts_ms, + duration_ms=ts_ms - entry.start_ms, + status=status, + start_line_offset=None, + ) + ) + + +def get_live_query_rows(state: LiveState, now_ms: int) -> list[LiveQueryRow]: + """Snapshot the active set as table rows, marking each one shown. + + Skips entries that have finished and already appeared in a repaint + (they linger only until the tailer reaps them), and flips seen on + every returned entry. A query that finished before any paint could + catch it running is therefore still shown exactly once. The caller + chooses the clock: the Live screen passes display_clock_ms() so + freeze-on-unreachable freezes durations too. + """ + rows = [] + with state.lock: + for qid, entry in state.active.items(): + if entry.end_ms is not None and entry.seen: + continue + entry.seen = True + duration_ms = ( + entry.end_ms - entry.start_ms + if entry.end_ms is not None + else now_ms - entry.start_ms + ) + rows.append( + LiveQueryRow( + qid=qid, + started_at_ms=entry.start_ms, + duration_ms=duration_ms, + sparql=entry.sparql, + client_ip=entry.client_ip, + ) + ) + return rows + + +def discard_finished_backlog(state: LiveState) -> None: + """Mark finished-but-unshown active entries as seen. + + Called on screen resume so the queries that finished while the user + was away are dropped instead of all flashing at once. Running + entries are left to show. + """ + with state.lock: + for entry in state.active.values(): + if entry.end_ms is not None: + entry.seen = True + + +def format_eta(ms: int) -> str: + """Coarse countdown for a not-ready metric row; sub-minute reads "<1m".""" + if ms < 60_000: + return "<1m" + return f"{ms // 60_000}m" + + +def get_live_metrics( + state: LiveState, + slow_threshold_ms: int, + now_ms: int, +) -> list[MetricsCounts]: + """Three rolling-window metric rows for the Live screen. + + Takes a short-locked deque copy so the metric scan and percentile + work runs lock-free while the tailer keeps appending. A window + whose start predates metrics_known_from_ms renders as an all-None + row so the widget shows "..." until coverage reaches that range. + """ + with state.lock: + history = CompletedQueries() + history.entries = deque(state.completed.entries) + coverage_start_ms = state.metrics_known_from_ms + snapshots = history.metrics_for_windows( + now_ms=now_ms, + windows_ms=LIVE_METRIC_WINDOWS_MS, + slow_threshold_ms=slow_threshold_ms, + data_start_ms=coverage_start_ms or now_ms, + ) + rows = [] + for label, width_ms, snap in zip( + LIVE_METRIC_LABELS, LIVE_METRIC_WINDOWS_MS, snapshots + ): + # Show "ready in Nm" only once we know when coverage arrives; + # during boot ramp coverage_start_ms is None and rows just read "…". + message = None + if snap is None and coverage_start_ms is not None: + eta_ms = coverage_start_ms + width_ms - now_ms + message = f"ready in {format_eta(eta_ms)}" + rows.append( + MetricsCounts( + label=label, + **(snap._asdict() if snap is not None else EMPTY_FIELDS), + not_ready_message=message, + ) + ) + return rows diff --git a/src/qlever/monitor/log_reader.py b/src/qlever/monitor/log_reader.py new file mode 100644 index 00000000..1e413b8e --- /dev/null +++ b/src/qlever/monitor/log_reader.py @@ -0,0 +1,358 @@ +"""Pure primitives for reading the query metrics log""" + +import json +from collections.abc import Iterator +from typing import BinaryIO, NamedTuple + +TS_PREFIX = b'{"ts-ms":' +EVENT_KEY = b'"event":"' +QID_KEY = b'"qid":"' +STATUS_KEY = b'"status":"' + +STATUS_SET = frozenset({"ok", "failed", "cancelled", "timeout", "unknown"}) + + +class CompletedQuery(NamedTuple): + """One start event paired with its matching end event. + + start_line_offset is the byte offset of the start line in the log, + kept so load_sparql_at can fetch the SPARQL text later. None when + the pair was built without scanning the file (the live tailer), + since those completions feed metrics only and never need their + SPARQL text re-read. + """ + + start_ms: int + end_ms: int + duration_ms: int + status: str + start_line_offset: int | None + + +def slice_string_value(line_bytes: bytes, key: bytes) -> str | None: + """Return the string value following `key`, up to the next quote. + + `key` includes the value's opening quote (for example b'"qid":"'). + Returns None if the key or its closing quote is absent. + """ + start = line_bytes.find(key) + if start == -1: + return None + start += len(key) + end = line_bytes.find(b'"', start) + if end == -1: + return None + return line_bytes[start:end].decode() + + +def peek_ts_ms(line_bytes: bytes) -> int | None: + """Read only the leading ts-ms timestamp from a log line. + + The producer always writes ts-ms first, so the integer sits between + the fixed prefix and the next comma. Lets navigation compare + timestamps without parsing the rest of the line. Returns None if the + prefix is absent or the value is not an integer. + """ + if not line_bytes.startswith(TS_PREFIX): + return None + comma = line_bytes.find(b",", len(TS_PREFIX)) + if comma == -1: + return None + try: + return int(line_bytes[len(TS_PREFIX) : comma]) + except ValueError: + return None + + +def parse_line( + line_bytes: bytes, +) -> tuple[int, str, str, str | None] | None: + """Byte-slice one log line into (ts_ms, event, qid, status). + + The fast path: no json.loads, the query blob is never scanned. + `status` is None on start lines. Returns None on any sanity-check + miss so the caller can fall back to parse_line_fallback. Never + raises. + """ + ts_ms = peek_ts_ms(line_bytes) + if ts_ms is None: + return None + + event = slice_string_value(line_bytes, EVENT_KEY) + if event not in ("start", "end"): + return None + + qid = slice_string_value(line_bytes, QID_KEY) + if qid is None: + return None + + if event == "start": + return (ts_ms, event, qid, None) + + status = slice_string_value(line_bytes, STATUS_KEY) + if status not in STATUS_SET: + return None + return (ts_ms, event, qid, status) + + +def parse_line_fallback( + line_bytes: bytes, +) -> tuple[int, str, str, str | None] | None: + """Real json.loads for a line parse_line rejected. + + Same 4-tuple shape as parse_line so callers treat both paths + alike. Returns None on a malformed line or a contract miss; never + raises. + """ + try: + obj = json.loads(line_bytes) + except (ValueError, TypeError): + return None + if not isinstance(obj, dict): + return None + + ts_ms = obj.get("ts-ms") + event = obj.get("event") + qid = obj.get("qid") + if not isinstance(ts_ms, int): + return None + if event not in ("start", "end"): + return None + if not isinstance(qid, str): + return None + + if event == "start": + return (ts_ms, event, qid, None) + + status = obj.get("status") + if status not in STATUS_SET: + return None + return (ts_ms, event, qid, status) + + +def next_whole_line( + log_stream: BinaryIO, probe: int, file_size: int +) -> tuple[int, int] | None: + """Find the first complete line starting at or after `probe`. + + A raw probe usually lands mid-line, so we align to the start of the + next whole line. Returns (line_start, ts_ms), or None when `probe` + falls in the file's trailing partial line or past EOF. + """ + if probe >= file_size: + return None + if probe == 0: + line_start = 0 + log_stream.seek(0) + else: + log_stream.seek(probe) + # Discard the partial line the probe landed in; the next read + # then starts on a whole line. + log_stream.readline() + line_start = log_stream.tell() + + line = log_stream.readline() + # An empty read or a line with no terminating newline is the file's + # trailing partial line, not a complete record. + if not line.endswith(b"\n"): + return None + ts_ms = peek_ts_ms(line) + if ts_ms is None: + return None + return (line_start, ts_ms) + + +def read_first_timestamp(log_stream: BinaryIO, file_size: int) -> int | None: + """Return the ts_ms of the first complete line, or None. + + Scans forward from the start of the file, skipping malformed + lines, and returns the first ts_ms that parses. + """ + if file_size == 0: + return None + log_stream.seek(0) + while True: + line = log_stream.readline() + # No newline means EOF or a partial line: nothing complete left. + if not line.endswith(b"\n"): + return None + ts_ms = peek_ts_ms(line) + if ts_ms is not None: + return ts_ms + + +def read_last_timestamp(log_stream: BinaryIO, file_size: int) -> int | None: + """Return the ts_ms of the last complete line, or None. + + Reads a 32KB tail buffer and walks its lines backward, returning + the first ts_ms that parses. A trailing partial line is skipped by + its missing newline; a leading mid-line fragment is skipped because + it does not start with the ts_ms prefix. Doubles the buffer and + retries if nothing parses, capped at the whole file. + """ + if file_size == 0: + return None + tail_bytes = 32 * 1024 + while True: + probe = max(0, file_size - tail_bytes) + log_stream.seek(probe) + for line in reversed(log_stream.readlines()): + if not line.endswith(b"\n"): + continue + ts_ms = peek_ts_ms(line) + if ts_ms is not None: + return ts_ms + if probe == 0: + return None + tail_bytes *= 2 + + +GALLOP_START = 128 * 1024 + + +def offset_for_ts(log_stream: BinaryIO, target_ms: int, file_size: int) -> int: + """Find where to start reading so a forward read sees every line + at or after target_ms. + + The log is only roughly time-ordered, so this aims a little early + on purpose rather than risk skipping lines; the caller ignores the + few extra older lines. Returns 0 if target_ms is at or before the + first line, and a spot near the end if it is past the last line. + """ + first = next_whole_line(log_stream, 0, file_size) + if first is None: + return 0 + first_start, first_ts = first + if first_ts >= target_ms: + return 0 + + # Gallop backward from the end, doubling the step until a probed + # line is old enough (ts <= target). before_target brackets the + # search from the old side, after_target from the new side. + before_target = first_start + after_target = file_size + step = GALLOP_START + while True: + probe = file_size - step + if probe <= before_target: + break + found = next_whole_line(log_stream, probe, file_size) + if found is None: + step *= 2 + continue + start, ts = found + if ts <= target_ms: + before_target = start + break + after_target = start + step *= 2 + + # Narrow the bracket. Bias every step toward the older side so the + # result never sits past the first matching line. + while after_target - before_target > 1: + mid = (before_target + after_target) // 2 + found = next_whole_line(log_stream, mid, file_size) + if found is None: + after_target = mid + continue + start, ts = found + # The probe sat inside one long line spanning the bracket edge, + # so it gave no usable interior line. Tighten the new side to + # keep the search making progress. + if start <= before_target or start >= after_target: + after_target = mid + continue + if ts < target_ms: + before_target = start + else: + after_target = start + return before_target + + +def scan_range( + log_stream: BinaryIO, lo_offset: int, hi_bound: int +) -> Iterator: + """Yield (parsed, line_offset) for whole lines in [lo_offset, hi_bound]. + + parsed is the (ts_ms, event, qid, status) tuple. lo_offset must be + line-aligned; it always comes from offset_for_ts. The line straddling + hi_bound is included; a trailing line without a newline is left for a + later read. Malformed lines are skipped. + """ + log_stream.seek(lo_offset) + offset = lo_offset + for line in log_stream: + if offset > hi_bound: + return + if not line.endswith(b"\n"): + return + parsed = parse_line(line) or parse_line_fallback(line) + if parsed is not None: + yield (parsed, offset) + offset += len(line) + + +def pair_start_end_events( + events: Iterator, +) -> tuple[list[CompletedQuery], dict[str, tuple[int, int]]]: + """Pair start and end events from a scan into completed queries. + + Walks the events once. Each end pops its matching start by qid into + completed_queries; whatever remains unmatched is still_open. Unmatched + ends are dropped (their start was outside the scanned range). + + events: yields ((ts_ms, event, qid, status), line_offset) from + scan_range. + + Returns (completed_queries, still_open). still_open maps qid to + (start_ms, start_line_offset) for queries with no end event seen yet. + """ + completed_queries = [] + still_open = {} + for (ts_ms, event, qid, status), line_offset in events: + if event == "start": + still_open[qid] = (ts_ms, line_offset) + continue + # event == "end" + matched_start = still_open.pop(qid, None) + if matched_start is None: + continue + start_ms, start_line_offset = matched_start + completed_queries.append( + CompletedQuery( + start_ms=start_ms, + end_ms=ts_ms, + duration_ms=ts_ms - start_ms, + status=status, + start_line_offset=start_line_offset, + ) + ) + return (completed_queries, still_open) + + +def extract_qid_ip_query(line_bytes: bytes) -> tuple[str, str, str]: + """Return (qid, client_ip, query) from a start line, or ("", "", ""). + + All three fields come from one json.loads. The caller has already + validated this is a start line via parse_line, so qid and query are + guaranteed present strings. client-ip falls back to "" so log lines + written before the field existed still produce usable rows. + """ + try: + obj = json.loads(line_bytes) + except (ValueError, TypeError): + return ("", "", "") + return (obj["qid"], obj.get("client-ip", ""), obj["query"]) + + +def load_sparql_at( + log_stream: BinaryIO, line_offset: int +) -> tuple[str, str, str]: + """Return (qid, client_ip, sparql) for the start line at line_offset. + + Used by callers that have only the offset, not the line bytes: + find_active_queries on survivors (qid already known, ignored), and + Historic on displayed rows (needs all three for the SparqlPane). + """ + log_stream.seek(line_offset) + return extract_qid_ip_query(log_stream.readline()) diff --git a/src/qlever/monitor/metrics.py b/src/qlever/monitor/metrics.py new file mode 100644 index 00000000..1b3981a4 --- /dev/null +++ b/src/qlever/monitor/metrics.py @@ -0,0 +1,114 @@ +"""Summarises a group of finished queries into one metrics row. + +The Live and Historic screens both display these counts and timings. +Keeping the math here lets both engines share it without importing +each other. +""" + +import statistics +from collections.abc import Collection, Iterable +from typing import NamedTuple + +from qlever.monitor.log_reader import CompletedQuery + + +class MetricsSnapshot(NamedTuple): + """One metrics row: counts and timings for a group of finished queries. + + Every finished query has exactly one status, so the status counts + always add up to seen: + seen == ok + failed + timeout + cancelled + unknown + slow is counted separately, since a query can be both ok and slow. + p50 and p95 are the median and 95th-percentile run times in + milliseconds, or None when there are no queries to measure. + """ + + seen: int + ok: int + failed: int + timeout: int + cancelled: int + unknown: int + slow: int + p50: int | None + p95: int | None + + +# All-None counts, spread into MetricsCounts when a row has no data yet. +EMPTY_FIELDS = dict.fromkeys(MetricsSnapshot._fields) + + +def percentiles(durations_ms: list[int]) -> tuple[int | None, int | None]: + """Return the median (p50) and 95th-percentile (p95) run times. + + Both are in milliseconds. Returns (None, None) when the list is + empty. + """ + if not durations_ms: + return (None, None) + if len(durations_ms) == 1: + only = durations_ms[0] + return (only, only) + cuts = statistics.quantiles(durations_ms, n=100) + return (round(cuts[49]), round(cuts[94])) + + +def build_snapshot( + counts: dict[str, int], + slow: int, + durations_ms: list[int], +) -> MetricsSnapshot: + """Build one MetricsSnapshot from already-counted query results. + + counts holds the per-status totals, slow is how many queries ran + longer than the slow threshold, and durations_ms is every query's + run time (its length is the total seen). + """ + p50, p95 = percentiles(durations_ms) + return MetricsSnapshot( + seen=len(durations_ms), + ok=counts["ok"], + failed=counts["failed"], + timeout=counts["timeout"], + cancelled=counts["cancelled"], + unknown=counts["unknown"], + slow=slow, + p50=p50, + p95=p95, + ) + + +def metrics_for_queries( + completed: Iterable[CompletedQuery], + slow_threshold_ms: int, +) -> MetricsSnapshot: + """Compute the metrics snapshot for a set of completed queries.""" + counts = {"ok": 0, "failed": 0, "timeout": 0, "cancelled": 0, "unknown": 0} + slow = 0 + durations = [] + for entry in completed: + counts[entry.status] += 1 + if entry.duration_ms >= slow_threshold_ms: + slow += 1 + durations.append(entry.duration_ms) + return build_snapshot(counts, slow, durations) + + +def metrics_for_ranges( + completed: Collection[CompletedQuery], + ranges: list[tuple[int, int]], + slow_threshold_ms: int, +) -> list[MetricsSnapshot]: + """Summarise completed queries into one snapshot per time range. + + Each range is an absolute (lo_ms, hi_ms) interval; a completed + query counts toward a range when its end_ms falls inside it. + Returns one snapshot per range, in the order the ranges were given. + """ + return [ + metrics_for_queries( + [entry for entry in completed if lo_ms <= entry.end_ms <= hi_ms], + slow_threshold_ms, + ) + for lo_ms, hi_ms in ranges + ] diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py new file mode 100644 index 00000000..9bdddec3 --- /dev/null +++ b/src/qlever/monitor/models.py @@ -0,0 +1,96 @@ +"""Shared data models for the monitor-queries TUI. + +These frozen dataclasses are the contract between the data layer and the +UI. Widgets render them; the data adapters produce them. Neither side +imports the other: both depend on this module. +""" + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class LiveSubtitle: + """Subtitle line shown under the Live HeaderRow. + + state is one of: + 'checking' boot, no evidence yet + 'reachable' server confirmed alive (log fresh or ping ok) + 'pinging' was reachable, log went quiet, silently rechecking; + renders the same as reachable + 'unreachable' three consecutive pings failed + """ + + endpoint: str + state: str + n_active: int | None + + +@dataclass(frozen=True) +class MetricsCounts: + label: str + seen: int | None + ok: int | None + failed: int | None + timeout: int | None + cancelled: int | None + unknown: int | None + p50: int | None + p95: int | None + slow: int | None + not_ready_message: str | None = None + + +@dataclass(frozen=True) +class LiveQueryRow: + qid: str + started_at_ms: int + duration_ms: int + sparql: str + client_ip: str = "" + + +@dataclass(frozen=True) +class HistoricQueryRow: + qid: str + start_line_offset: int + started_at_ms: int + duration_ms: int + status: str + sparql: str + client_ip: str = "" + + +@dataclass(frozen=True) +class SparqlContent: + """What the SparqlPane renders for the row under the table cursor. + + `status` is filled at the screen seam: Live passes None (active + queries have no terminal status), Historic passes the real status. + """ + + qid: str + started_at_ms: int + status: str | None + sparql_text: str + client_ip: str = "" + + +@dataclass(frozen=True) +class TimelineBounds: + """The full log span and the slice the window currently covers. + + The Timeline maps these epochs (ms) to bar positions at render. + """ + + log_start_ms: int + log_end_ms: int + window_start_ms: int + window_end_ms: int + + +@dataclass(frozen=True) +class ControlsState: + window_size: str + mode: str + start_ms: int + end_ms: int diff --git a/src/qlever/monitor/monitor.tcss b/src/qlever/monitor/monitor.tcss new file mode 100644 index 00000000..bf698543 --- /dev/null +++ b/src/qlever/monitor/monitor.tcss @@ -0,0 +1,179 @@ +/* HeaderRow */ + +HeaderRow { + height: 1; + background: $primary; + color: $text; +} + +.slot-left { + width: auto; + min-width: 15; +} + +.slot-center { + width: 1fr; + text-align: center; + text-style: bold; +} + +.slot-right { + width: auto; + min-width: 15; +} + +/* NavPill */ + +NavPill { + padding: 0 1; + background: $accent; + color: $text; + text-style: bold; + text-align: center; +} + +NavPill:hover { + background: $accent-lighten-2; +} + +/* LiveStatusRow */ + +LiveStatusRow { + height: 1; + width: 100%; + text-align: center; + text-style: italic; + margin-bottom: 1; + color: $text; +} + + +/* MetricsRow */ + +MetricsRow { + height: 3; + margin-bottom: 1; + margin-left: 1; +} + +LiveQueryTable.stale, MetricsRow.stale { + color: $text-muted; +} + +/* Timeline (Historic only) */ + +Timeline { + height: 2; + margin-bottom: 1; + margin-left: 1; + margin-right: 1; +} + +/* Historic's MetricsRow is a single line; the global rule above + reserves 3 rows for Live's rolling windows. */ + +HistoricScreen MetricsRow { + height: auto; +} + +/* query tables (Live + Historic) */ + +QueryTable { + height: 1fr; +} + +QueryTable > .datatable--cursor { + background: $secondary; + color: $text; +} + +/* table status line, flush under the table */ + +#table-status { + height: 1; + margin-bottom: 1; + margin-left: 1; +} + +/* SparqlPane */ + +SparqlPane { + height: 1fr; + margin-bottom: 1; + margin-left: 1; + margin-right: 1; +} + +#sparql-header { + height: 1; + margin-bottom: 1; +} + +#sparql-scroll { + height: 1fr; +} + +#sparql-body { + width: 1fr; + height: auto; +} + +/* HistoricControlsRow */ + +HistoricControlsRow { + height: 1; + margin-bottom: 1; + width: 100%; +} + +HistoricControlsRow > WindowStepper, +HistoricControlsRow > ModePicker { + width: 1fr; + height: 1; + align-horizontal: center; +} + +HistoricControlsRow > SelectedWindow { + width: 1fr; + height: 1; + text-align: center; +} + +WindowStepper > Static, +ModePicker > Static { + width: auto; +} + +.stepper-caption, +.picker-caption { + text-style: bold; + margin-right: 1; +} + +.stepper-arrow { + padding: 0 1; +} + +#window-size { + padding: 0 1; + text-style: bold; +} + +.picker-bracket { + padding: 0 1; +} + +.picker-segment { + padding: 0 1; + color: $text-muted; +} + +.picker-sep { + color: $text-muted; +} + +.picker-segment.-selected { + background: $accent; + color: $text; + text-style: bold; +} \ No newline at end of file diff --git a/src/qlever/monitor/util.py b/src/qlever/monitor/util.py new file mode 100644 index 00000000..09354f3b --- /dev/null +++ b/src/qlever/monitor/util.py @@ -0,0 +1,88 @@ +"""Small shared helpers for the monitor-queries TUI widgets.""" + +from __future__ import annotations + +import os +import platform +import shutil +import subprocess +from datetime import datetime + + +def format_timestamp(ms: int) -> str: + """Render an epoch (ms) as a full local date and time.""" + return datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S") + + +def format_clock(ms: int) -> str: + """Render an epoch (ms) as a local wall-clock time.""" + return datetime.fromtimestamp(ms / 1000).strftime("%H:%M:%S") + + +def truncate(text: str, max_len: int) -> str: + """Trim text to max_len with an ellipsis.""" + if len(text) <= max_len: + return text + return text[: max_len - 1] + "…" + + +def clipboard_commands() -> list[list[str]]: + """Pick clipboard CLIs available on this platform, in priority order.""" + system = platform.system() + if system == "Darwin": + return [["pbcopy"]] + if system != "Linux": + return [] + + # On Wayland, never fall through to xclip/xsel: they write to the + # XWayland selection, which Wayland-native apps don't read. + on_wayland = bool(os.environ.get("WAYLAND_DISPLAY")) + if on_wayland and shutil.which("wl-copy"): + # Force text/plain so wl-copy doesn't tag SPARQL starting with + # `PREFIX foo: ` as a URI-ish MIME type. + return [["wl-copy", "--type", "text/plain"]] + + cmds = [] + if shutil.which("xclip"): + cmds.append(["xclip", "-selection", "clipboard", "-t", "UTF8_STRING"]) + if shutil.which("xsel"): + cmds.append(["xsel", "--clipboard", "--input"]) + return cmds + + +def clipboard_install_hint() -> str: + """Platform-specific suggestion for installing a clipboard tool.""" + system = platform.system() + if system == "Darwin": + return "pbcopy should already be available on macOS" + if system != "Linux": + return f"no clipboard tool support for {system}" + if os.environ.get("WAYLAND_DISPLAY"): + return "install wl-clipboard (provides wl-copy)" + return "install xclip or xsel" + + +def copy_text(text: str) -> bool | None: + """Copy text to the system clipboard. + + Returns True on success, False if a tool ran but failed, None if no + clipboard tool is available on this system. + """ + cmds = clipboard_commands() + if not cmds: + return None + payload = text.encode("utf-8") + for cmd in cmds: + try: + proc = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + proc.communicate(input=payload, timeout=2) + except (subprocess.TimeoutExpired, OSError): + continue + if proc.returncode == 0: + return True + return False diff --git a/src/qlever/monitor/views/__init__.py b/src/qlever/monitor/views/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py new file mode 100644 index 00000000..a0ad2fe4 --- /dev/null +++ b/src/qlever/monitor/views/historic.py @@ -0,0 +1,458 @@ +from __future__ import annotations + +from textual import work +from textual.app import ComposeResult +from textual.binding import Binding +from textual.screen import Screen +from textual.widgets import Footer, Static + +from qlever.monitor.historic_data import ( + load_query_details_for_rows, + read_window, + render_window, +) +from qlever.monitor.live_data import current_ms +from qlever.monitor.metrics import EMPTY_FIELDS +from qlever.monitor.models import ( + ControlsState, + HistoricQueryRow, + MetricsCounts, + SparqlContent, + TimelineBounds, +) +from qlever.monitor.widgets.controls_row import HistoricControlsRow +from qlever.monitor.widgets.header_row import HeaderRow +from qlever.monitor.widgets.metrics_row import MetricsRow +from qlever.monitor.widgets.mode_picker import MODES, ModePicker +from qlever.monitor.widgets.nav_pill import NavPill +from qlever.monitor.widgets.query_table import HistoricQueryTable +from qlever.monitor.widgets.selected_window import SelectedWindow +from qlever.monitor.widgets.sparql_pane import SparqlPane +from qlever.monitor.widgets.timeline import Timeline +from qlever.monitor.widgets.window_stepper import ( + WindowStepper, + available_presets, + preset_ms, +) + +TITLE = "QLever monitor-queries: Historic" + +MODE_PHRASES = { + "ACTIVE": "were active during", + "STARTS": "started in", + "ENDS": "ended in", +} + +SORT_COLUMNS = ["Started", "Duration", "Status"] + +SORT_KEYS = { + "Started": lambda row: row.started_at_ms, + "Duration": lambda row: row.duration_ms, + "Status": lambda row: row.status, +} + +# Each column's (descending, ascending) wording for the status line. +SORT_PHRASES = { + "Started": ("newest first", "oldest first"), + "Duration": ("longest first", "shortest first"), + "Status": ("Z to A", "A to Z"), +} + +# Sort the whole window but paint at most this many rows, so a huge +# window does not freeze the table. +MAX_VISIBLE_ROWS = 1000 + +# Collapse a fast window scrub into a single scan once the user settles. +RESCAN_DEBOUNCE_S = 0.1 + + +class HistoricScreen(Screen, inherit_bindings=False): + """Historic view: shows active queries parsed from the log over a time window.""" + + BINDINGS = [ + Binding("tab", "app.swap_screen", " ComposeResult: + self.log_start_ms = self.app.log_start_ms + self.log_end_ms = self.read_log_end() + # Presets longer than the log span are pointless; `all` covers it. + self.available_presets = available_presets( + self.log_end_ms - self.log_start_ms + ) + self.window_size = self.available_presets[0] + self.mode = "ACTIVE" + self.sort_column = "Duration" + self.sort_reverse = True + width = preset_ms(self.window_size) + self.window_start_ms = ( + self.log_start_ms if width is None else self.log_end_ms - width + ) + self.window_end_ms = self.log_end_ms + self.window_queries = None + self.all_rows = [] + self.query_details_cache = {} + self.rescan_timer = None + self.cached_window = None + controls = ControlsState( + window_size=self.window_size, + mode=self.mode, + start_ms=self.window_start_ms, + end_ms=self.window_end_ms, + ) + bounds = TimelineBounds( + log_start_ms=self.log_start_ms, + log_end_ms=self.log_end_ms, + window_start_ms=self.window_start_ms, + window_end_ms=self.window_end_ms, + ) + + yield HeaderRow( + left=NavPill("< Live", target="live"), + center=Static(TITLE), + ) + yield HistoricControlsRow(controls) + yield Timeline(bounds) + yield MetricsRow( + [MetricsCounts(label=self.window_size, **EMPTY_FIELDS)], + self.app.slow_threshold, + ) + yield HistoricQueryTable([]) + yield Static("", id="table-status") + yield SparqlPane() + yield Footer(show_command_palette=False) + + def on_screen_resume(self) -> None: + """Catch up on log growth, then push state and rescan.""" + self.log_end_ms = self.read_log_end() + self.available_presets = available_presets( + self.log_end_ms - self.log_start_ms + ) + self.clamp_window() + self.refresh_view(rescan=True) + + def read_log_end(self) -> int: + """Return the freshest log timestamp the tailer has seen.""" + state = self.app.live_state + with state.lock: + return state.latest_event_ms + + def show_loading_state(self) -> None: + """Blank the table and metrics row while a rescan is in flight.""" + self.query_one(HistoricQueryTable).set_rows([]) + self.query_one(MetricsRow).rows = [ + MetricsCounts( + label=self.window_size, + **EMPTY_FIELDS, + not_ready_message="loading…", + ) + ] + self.query_one("#table-status", Static).update("Loading window…") + + def refresh_view(self, rescan: bool) -> None: + """Rebuild the controls/timeline widgets and kick the data refresh. + + The single update path: every key/click action mutates the + screen's window fields and then calls this. `rescan=True` reads + a fresh window from the log; `rescan=False` reuses the cached + scan and only re-filters by the current mode. + """ + controls = ControlsState( + window_size=self.window_size, + mode=self.mode, + start_ms=self.window_start_ms, + end_ms=self.window_end_ms, + ) + bounds = TimelineBounds( + log_start_ms=self.log_start_ms, + log_end_ms=self.log_end_ms, + window_start_ms=self.window_start_ms, + window_end_ms=self.window_end_ms, + ) + self.query_one(WindowStepper).window_size = self.window_size + self.query_one(ModePicker).selected = self.mode + self.query_one(SelectedWindow).state = controls + self.query_one(Timeline).bounds = bounds + if rescan: + if (self.window_start_ms, self.window_end_ms) == self.cached_window: + return + self.show_loading_state() + self.schedule_rescan() + else: + self.refresh_data(rescan=False) + + def schedule_rescan(self) -> None: + """Collapse a fast window scrub into one scan of where the user lands.""" + if self.rescan_timer is not None: + self.rescan_timer.stop() + self.rescan_timer = self.set_timer( + RESCAN_DEBOUNCE_S, lambda: self.refresh_data(rescan=True) + ) + + @work(thread=True, exclusive=True, group="refresh_data") + def refresh_data(self, rescan: bool) -> None: + """Scan and/or re-filter the window, push rows + metrics + status. + + On `rescan` the log is read into a fresh list of window queries + cached on the screen and the query-details cache is emptied, + since a new window is a new set of queries. Render always runs, + so a mode change pays only an in-memory filter. The visible + rows' text is read here, off the UI thread, reusing the cache + across sorts and mode changes within the window. + """ + if rescan: + self.window_queries = read_window( + self.app.log_file, + self.window_start_ms, + self.window_end_ms, + self.app.window_pad_ms, + self.log_end_ms, + current_ms(), + ) + self.query_details_cache = {} + self.cached_window = (self.window_start_ms, self.window_end_ms) + controls = ControlsState( + window_size=self.window_size, + mode=self.mode, + start_ms=self.window_start_ms, + end_ms=self.window_end_ms, + ) + rows, metrics = render_window( + self.window_queries, + controls, + self.app.slow_threshold * 1000, + self.log_end_ms, + ) + self.all_rows = rows + visible_rows = load_query_details_for_rows( + self.app.log_file, self.visible_rows(), self.query_details_cache + ) + self.app.call_from_thread( + self.apply_window_result, visible_rows, metrics + ) + + def apply_window_result( + self, + rows: list[HistoricQueryRow], + metrics: MetricsCounts, + ) -> None: + """Push fresh rows, metrics, and status line into the widgets.""" + self.query_one(HistoricQueryTable).set_rows(rows) + self.query_one(MetricsRow).rows = [metrics] + self.query_one("#table-status", Static).update( + self.status_text(len(self.all_rows)) + ) + self.refresh_sort_indicator() + + def sorted_rows( + self, rows: list[HistoricQueryRow] + ) -> list[HistoricQueryRow]: + """Order rows by the active sort column and direction.""" + return sorted( + rows, + key=SORT_KEYS[self.sort_column], + reverse=self.sort_reverse, + ) + + def visible_rows(self) -> list[HistoricQueryRow]: + """The sorted window capped to the rows the table will paint.""" + return self.sorted_rows(self.all_rows)[:MAX_VISIBLE_ROWS] + + def sort_phrase(self) -> str: + """Describe the active sort and direction for the status line.""" + descending, ascending = SORT_PHRASES[self.sort_column] + direction = descending if self.sort_reverse else ascending + return f"{self.sort_column}, {direction}" + + def status_text(self, total: int) -> str: + """Status line describing the window mode, row count, and sort.""" + phrase = MODE_PHRASES[self.mode] + if total > MAX_VISIBLE_ROWS: + count = f"top {MAX_VISIBLE_ROWS:,} of {total:,}" + else: + count = f"{total:,}" + return ( + f"Showing {count} queries that {phrase} the time window " + f"(sorted by {self.sort_phrase()})" + ) + + def refresh_sort_indicator(self) -> None: + """Point the table's header arrow at the active sort column.""" + self.query_one(HistoricQueryTable).set_sort_indicator( + SORT_COLUMNS.index(self.sort_column), self.sort_reverse + ) + + def clamp_window_start(self, start: int, width: int) -> int: + """Keep a window of `width` fully inside the log span.""" + return min(max(start, self.log_start_ms), self.log_end_ms - width) + + def center_window_at(self, ms: int) -> None: + """Center the window on `ms`; no-op when `all` (unbounded).""" + width = preset_ms(self.window_size) + if width is None: + return + self.window_start_ms = self.clamp_window_start(ms - width // 2, width) + self.window_end_ms = self.window_start_ms + width + self.refresh_view(rescan=True) + + def clamp_window(self) -> None: + """Fit the window into the log span, anchored to its right edge. + + `all` spans the whole log; any fixed size keeps the current + `window_end_ms` put and walks `window_start_ms` left, clamped + so the window stays inside the log span. + """ + width = preset_ms(self.window_size) + if width is None: + self.window_start_ms = self.log_start_ms + self.window_end_ms = self.log_end_ms + return + start = self.clamp_window_start(self.window_end_ms - width, width) + self.window_start_ms = start + self.window_end_ms = start + width + + def step_window(self, direction: int) -> None: + """Move the window size one preset in `direction` (wraps).""" + presets = self.available_presets + index = presets.index(self.window_size) + self.window_size = presets[(index + direction) % len(presets)] + self.clamp_window() + self.refresh_view(rescan=True) + + def set_mode(self, mode: str) -> None: + """Select an exact match mode, then refresh the view (no rescan).""" + self.mode = mode + self.refresh_view(rescan=False) + + def action_cycle_window(self) -> None: + """Step to the next window-size preset (wraps).""" + self.step_window(1) + + def action_cycle_mode(self) -> None: + """Step to the next match mode (wraps).""" + index = MODES.index(self.mode) + self.set_mode(MODES[(index + 1) % len(MODES)]) + + def cycle_sort_column(self, direction: int) -> None: + """Move the sort one column in `direction` (wraps).""" + index = SORT_COLUMNS.index(self.sort_column) + self.sort_column = SORT_COLUMNS[ + (index + direction) % len(SORT_COLUMNS) + ] + self.refresh_data(rescan=False) + + def action_sort_next_column(self) -> None: + """Sort by the next column (wraps).""" + self.cycle_sort_column(1) + + def action_sort_prev_column(self) -> None: + """Sort by the previous column (wraps).""" + self.cycle_sort_column(-1) + + def action_invert_sort(self) -> None: + """Flip the sort direction.""" + self.sort_reverse = not self.sort_reverse + self.refresh_data(rescan=False) + + def shift_window(self, direction: int) -> None: + """Move the window by its own width; clamp; no-op when `all`.""" + width = preset_ms(self.window_size) + if width is None: + return + start = self.window_start_ms + direction * width + self.window_start_ms = self.clamp_window_start(start, width) + self.window_end_ms = self.window_start_ms + width + self.refresh_view(rescan=True) + + def action_shift_earlier(self) -> None: + """Shift the window one width toward the log start.""" + self.shift_window(-1) + + def action_shift_later(self) -> None: + """Shift the window one width toward the log end.""" + self.shift_window(1) + + def action_snap_start(self) -> None: + """Snap the window to the log start.""" + width = preset_ms(self.window_size) + self.window_start_ms = self.log_start_ms + self.window_end_ms = ( + self.log_end_ms if width is None else self.log_start_ms + width + ) + self.refresh_view(rescan=True) + + def action_snap_end(self) -> None: + """Snap the window to the log end.""" + width = preset_ms(self.window_size) + self.window_end_ms = self.log_end_ms + self.window_start_ms = ( + self.log_start_ms if width is None else self.log_end_ms - width + ) + self.refresh_view(rescan=True) + + def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: + """Switch to the screen named by the clicked pill.""" + self.app.switch_screen(message.target) + + def on_window_stepper_stepped( + self, message: WindowStepper.Stepped + ) -> None: + """Resize the window when a stepper arrow is clicked.""" + self.step_window(message.direction) + + def on_mode_picker_selected(self, message: ModePicker.Selected) -> None: + """Switch the match mode when a segment is clicked.""" + self.set_mode(message.mode) + + def on_resize(self) -> None: + """Re-evaluate the conditional scroll bindings after a resize.""" + self.call_after_refresh(self.refresh_bindings) + + def on_timeline_recentered(self, message: Timeline.Recentered) -> None: + """Recenter the window on the clicked timeline position.""" + self.center_window_at(message.center_ms) + + def on_data_table_header_selected( + self, message: HistoricQueryTable.HeaderSelected + ) -> None: + """Sort by the clicked column; click the active one again to invert.""" + if message.column_index >= len(SORT_COLUMNS): + return + column = SORT_COLUMNS[message.column_index] + if column == self.sort_column: + self.sort_reverse = not self.sort_reverse + else: + self.sort_column = column + self.refresh_data(rescan=False) + + def on_data_table_row_selected( + self, message: HistoricQueryTable.RowSelected + ) -> None: + """Show the selected finished query's SPARQL in the pane.""" + row = message.data_table.query_rows[message.cursor_row] + self.query_one(SparqlPane).content = SparqlContent( + qid=row.qid, + started_at_ms=row.started_at_ms, + status=row.status, + sparql_text=row.sparql, + client_ip=row.client_ip, + ) diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py new file mode 100644 index 00000000..c4f672d1 --- /dev/null +++ b/src/qlever/monitor/views/live.py @@ -0,0 +1,258 @@ +from __future__ import annotations + +from textual import work +from textual.app import ComposeResult +from textual.binding import Binding +from textual.reactive import reactive +from textual.screen import Screen +from textual.widgets import Footer, Static + +from qlever.monitor.live_data import ( + PING_FAILS_TO_UNREACHABLE, + PING_INTERVAL_S, + current_ms, + discard_finished_backlog, + get_live_metrics, + get_live_query_rows, + is_log_fresh, +) +from qlever.monitor.models import LiveSubtitle, SparqlContent +from qlever.monitor.widgets.header_row import HeaderRow +from qlever.monitor.widgets.metrics_row import MetricsRow +from qlever.monitor.widgets.nav_pill import NavPill +from qlever.monitor.widgets.query_table import LiveQueryTable +from qlever.monitor.widgets.sparql_pane import SparqlPane +from qlever.monitor.widgets.status_row import LiveStatusRow +from qlever.util import is_qlever_server_alive + +TITLE = "QLever monitor-queries: Live" + + +class LiveScreen(Screen, inherit_bindings=False): + """Live view: shows currently active queries tailed from the server log.""" + + BINDINGS = [ + Binding("tab", "app.swap_screen", "Historic>", priority=True), + Binding("f", "toggle_freeze", "Freeze/Unfreeze"), + ] + + frozen = reactive(False, init=False) + + def compose(self) -> ComposeResult: + yield HeaderRow( + center=Static(TITLE), + right=NavPill("Historic >", target="historic"), + ) + state = self.app.live_state + slow_ms = self.app.slow_threshold * 1000 + rows = sorted( + get_live_query_rows(state, current_ms()), + key=lambda row: row.duration_ms, + reverse=True, + ) + + self.liveness = ( + "reachable" if is_log_fresh(state, current_ms()) else "checking" + ) + self.consecutive_ping_fails = 0 + self.ping_timer = None + + yield LiveStatusRow( + LiveSubtitle( + endpoint=self.app.sparql_endpoint, + state=self.liveness, + n_active=len(rows), + ) + ) + yield MetricsRow( + get_live_metrics(state, slow_ms, current_ms()), + self.app.slow_threshold, + ) + yield LiveQueryTable(rows) + yield Static("", id="table-status") + yield SparqlPane() + yield Footer() + + def on_mount(self) -> None: + """Start periodic refreshes; kick off boot pings if the log is stale.""" + self.table_timer = self.set_interval( + self.app.refresh_interval, self.refresh_table + ) + self.metrics_timer = self.set_interval(2.0, self.refresh_metrics) + if self.liveness == "checking": + self.start_pinging(initial=True) + + def on_screen_suspend(self) -> None: + """Pause periodic UI work while Live isn't the active screen.""" + self.table_timer.pause() + self.metrics_timer.pause() + if self.ping_timer is not None: + self.ping_timer.pause() + + def on_screen_resume(self) -> None: + """Resume periodic UI work; recheck server unless already reachable.""" + # Drop the backlog of queries that finished while suspended. + discard_finished_backlog(self.app.live_state) + self.table_timer.resume() + self.update_liveness_visuals() + if self.ping_timer is not None: + self.ping_timer.resume() + if self.liveness != "reachable": + self.ping_server() + + def start_pinging(self, initial: bool) -> None: + """Enter the ping cycle when the log goes quiet or at a cold boot. + + `initial` distinguishes boot's "checking" subtitle from the + invisible "pinging" recheck that follows a reachable period. + Fires one ping immediately so the first verification doesn't + wait the full interval. + """ + self.liveness = "checking" if initial else "pinging" + self.consecutive_ping_fails = 0 + if self.ping_timer is None: + self.ping_timer = self.set_interval( + PING_INTERVAL_S, self.ping_server + ) + self.ping_server() + self.refresh_subtitle() + + def mark_reachable(self) -> None: + """Confirm the server is alive and tear down the ping cycle.""" + self.liveness = "reachable" + self.consecutive_ping_fails = 0 + if self.ping_timer is not None: + self.ping_timer.stop() + self.ping_timer = None + self.refresh_subtitle() + self.update_liveness_visuals() + + def update_liveness_visuals(self) -> None: + """Sync the dim style and metric freeze with self.liveness.""" + stale = self.liveness == "unreachable" + self.query_one(LiveQueryTable).set_class(stale, "stale") + self.query_one(MetricsRow).set_class(stale, "stale") + if stale: + self.metrics_timer.pause() + else: + self.metrics_timer.resume() + + @work(thread=True, exclusive=True, group="ping_server") + def ping_server(self) -> None: + """Curl the server's /ping off the UI thread.""" + ok = is_qlever_server_alive(self.app.sparql_endpoint, max_time=2) + self.app.call_from_thread(self.apply_ping_result, ok) + + def apply_ping_result(self, ok: bool) -> None: + """Advance the state machine using one ping outcome.""" + if self.liveness == "reachable": + return + if is_log_fresh(self.app.live_state, current_ms()): + self.mark_reachable() + return + if ok: + self.mark_reachable() + return + self.consecutive_ping_fails += 1 + if self.consecutive_ping_fails >= PING_FAILS_TO_UNREACHABLE: + self.liveness = "unreachable" + self.refresh_subtitle() + self.update_liveness_visuals() + + def refresh_subtitle(self) -> None: + """Rebuild the subtitle to match the table currently on screen.""" + rows = self.query_one(LiveQueryTable).query_rows + self.query_one(LiveStatusRow).subtitle = LiveSubtitle( + endpoint=self.app.sparql_endpoint, + state=self.liveness, + n_active=len(rows), + ) + + def update_liveness_from_log(self) -> None: + """Re-evaluate the reachability state from log freshness alone. + + Runs on each refresh_table tick. A fresh log line is enough + evidence to flip back to reachable from any non-reachable state; + a long quiet log moves us from reachable into pinging. + """ + log_fresh = is_log_fresh(self.app.live_state, current_ms()) + if self.liveness == "reachable" and not log_fresh: + self.start_pinging(initial=False) + elif self.liveness != "reachable" and log_fresh: + self.mark_reachable() + + def display_clock_ms(self) -> int: + """The clock used for live duration math. + + When unreachable, freeze at the last log timestamp so durations + stop ticking on stale rows; otherwise trust real time. + """ + state = self.app.live_state + if ( + self.liveness == "unreachable" + and state.latest_event_ms is not None + ): + return state.latest_event_ms + return current_ms() + + def refresh_table(self) -> None: + """Push the active rows sorted by duration; no-op when frozen.""" + self.update_liveness_from_log() + if self.frozen: + return + state = self.app.live_state + rows = sorted( + get_live_query_rows(state, self.display_clock_ms()), + key=lambda row: row.duration_ms, + reverse=True, + ) + self.query_one(LiveQueryTable).set_rows(rows) + self.refresh_subtitle() + + def refresh_metrics(self) -> None: + """Push fresh metric snapshots into MetricsRow; no-op when frozen.""" + if self.frozen: + return + state = self.app.live_state + slow_ms = self.app.slow_threshold * 1000 + self.query_one(MetricsRow).rows = get_live_metrics( + state, slow_ms, current_ms() + ) + + def watch_frozen(self, frozen: bool) -> None: + """Reflect the frozen state in the table status line.""" + status = "Frozen - press f to resume" if frozen else "" + self.query_one("#table-status", Static).update(status) + + def action_toggle_freeze(self) -> None: + """Toggle the frozen state of the live view.""" + self.frozen = not self.frozen + + def on_resize(self) -> None: + """Re-evaluate the conditional scroll bindings after a resize.""" + self.call_after_refresh(self.refresh_bindings) + + def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: + """Switch to the screen named by the clicked pill. + + Routes through `action_swap_screen` when the pill points at the + other tab so the empty-log guard for Historic entry stays in one + place; any other target falls through to a plain screen switch. + """ + if message.target == "historic": + self.app.action_swap_screen() + else: + self.app.switch_screen(message.target) + + def on_data_table_row_selected( + self, message: LiveQueryTable.RowSelected + ) -> None: + """Show the selected active query's SPARQL in the pane.""" + row = message.data_table.query_rows[message.cursor_row] + self.query_one(SparqlPane).content = SparqlContent( + qid=row.qid, + started_at_ms=row.started_at_ms, + status=None, + sparql_text=row.sparql, + client_ip=row.client_ip, + ) diff --git a/src/qlever/monitor/widgets/__init__.py b/src/qlever/monitor/widgets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/qlever/monitor/widgets/controls_row.py b/src/qlever/monitor/widgets/controls_row.py new file mode 100644 index 00000000..b00e7f34 --- /dev/null +++ b/src/qlever/monitor/widgets/controls_row.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Horizontal + +from qlever.monitor.models import ControlsState +from qlever.monitor.widgets.mode_picker import ModePicker +from qlever.monitor.widgets.selected_window import SelectedWindow +from qlever.monitor.widgets.window_stepper import WindowStepper + + +class HistoricControlsRow(Horizontal): + """One-row control strip under the HeaderRow on the Historic screen. + + Holds the window stepper, mode picker, and read-only selected range. + Distributes the snapshot to its children; never reads data itself. + """ + + can_focus = False + + def __init__(self, state: ControlsState) -> None: + """Hold the controls snapshot to distribute once mounted.""" + super().__init__() + self.state = state + + def compose(self) -> ComposeResult: + yield WindowStepper(self.state.window_size) + yield ModePicker(self.state.mode) + yield SelectedWindow(self.state) diff --git a/src/qlever/monitor/widgets/header_row.py b/src/qlever/monitor/widgets/header_row.py new file mode 100644 index 00000000..670e04c2 --- /dev/null +++ b/src/qlever/monitor/widgets/header_row.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Horizontal +from textual.widget import Widget +from textual.widgets import Static + + +class HeaderRow(Horizontal): + """One-row header layout with three slots: left, center, right. + + The row is screen-agnostic. Each screen decides what goes in each slot. + Empty slots become an empty Static so the three-slot layout stays stable. + """ + + def __init__( + self, + left: Widget | None = None, + center: Widget | None = None, + right: Widget | None = None, + ) -> None: + """Build a row with the given widgets in the left, center, right slots.""" + super().__init__() + self.left = left if left is not None else Static("") + self.center = center if center is not None else Static("") + self.right = right if right is not None else Static("") + self.left.add_class("slot-left") + self.center.add_class("slot-center") + self.right.add_class("slot-right") + + def compose(self) -> ComposeResult: + yield self.left + yield self.center + yield self.right diff --git a/src/qlever/monitor/widgets/metrics_row.py b/src/qlever/monitor/widgets/metrics_row.py new file mode 100644 index 00000000..3797791b --- /dev/null +++ b/src/qlever/monitor/widgets/metrics_row.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Vertical +from textual.reactive import reactive +from textual.widgets import Static + +from qlever.monitor.models import MetricsCounts + +METRIC_COLORS = { + "ok": "$success", + "failed": "$error", + "timeout": "$error", + "cancelled": "$warning", + "unknown": "$warning", + "slow": "$warning", +} + +# (MetricsCounts field, kind) in render order; the field name is the label. +COLUMNS = [ + ("seen", "count"), + ("ok", "count"), + ("failed", "count"), + ("timeout", "count"), + ("cancelled", "count"), + ("unknown", "count"), + ("p50", "ms"), + ("p95", "ms"), + ("slow", "count"), +] + + +def color_tag(label: str, value: int | None) -> str | None: + """Return the theme color for a metric, or None if it should render neutral.""" + if value is None or value == 0: + return None + return METRIC_COLORS.get(label) + + +def format_count(n: int | None) -> str: + """Render a count, or an ellipsis if it is not yet computed.""" + if n is None: + return "…" + return f"{n:,}" + + +def format_ms(ms: int | None) -> str: + """Render a duration in seconds, ellipsis if not computed. + + Smart precision: 2 decimals under 1 s, 1 decimal otherwise. + """ + if ms is None: + return "…" + seconds = ms / 1000 + return f"{seconds:.2f}s" if seconds < 1 else f"{seconds:.1f}s" + + +def format_value(value: int | None, kind: str) -> str: + """Render a raw cell value according to its column kind.""" + if kind == "count": + return format_count(value) + return format_ms(value) + + +def column_widths(rows: list[MetricsCounts]) -> list[int]: + """Width of each column, sized to its own widest value across all rows. + + Per-column (not one shared width) so every column uses the least + horizontal space while still aligning across the rolling-window rows. + """ + widths = [] + for name, kind in COLUMNS: + cells = (format_value(getattr(r, name), kind) for r in rows) + widths.append(max((len(c) for c in cells), default=1)) + return widths + + +def column_label(name: str, slow_threshold_s: int) -> str: + """Display label for a metric column; slow carries its threshold.""" + if name == "slow": + return f">{slow_threshold_s}s" + return name + + +def render_cell( + name: str, label: str, value: int | None, width: int, kind: str +) -> str: + """Render one bold-label, right-aligned value pair with optional color. + + `name` is the metric field and drives the color lookup; `label` is + the text shown, which can differ from it (the slow column shows its + threshold). + """ + text = format_value(value, kind).rjust(width) + color = color_tag(name, value) + if color: + return f"[b]{label}[/] [{color}]{text}[/]" + return f"[b]{label}[/] {text}" + + +def format_row( + row: MetricsCounts, widths: list[int], slow_threshold_s: int +) -> str: + """Format one rolling-window row as a single line with markup.""" + if row.not_ready_message: + return f"[bold]{row.label:<8}[/] │ [dim]{row.not_ready_message}[/]" + cells = [ + render_cell( + name, + column_label(name, slow_threshold_s), + getattr(row, name), + width, + kind, + ) + for (name, kind), width in zip(COLUMNS, widths) + ] + return f"[bold]{row.label:<8}[/] │ " + " · ".join(cells) + + +class MetricsRow(Vertical): + """Stack of rolling-window metric lines (5m, 15m, 1h on Live).""" + + can_focus = False + + rows = reactive(list, init=False) + + def __init__( + self, rows: list[MetricsCounts], slow_threshold_s: int + ) -> None: + """Render each row in `rows` as one formatted text line.""" + super().__init__() + self.slow_threshold_s = slow_threshold_s + self.set_reactive(MetricsRow.rows, rows) + + def compose(self) -> ComposeResult: + widths = column_widths(self.rows) + for row in self.rows: + yield Static(format_row(row, widths, self.slow_threshold_s)) + + def watch_rows(self, rows: list[MetricsCounts]) -> None: + """Repaint each line static when the row data changes.""" + widths = column_widths(rows) + for line, row in zip(self.query(Static), rows): + line.update(format_row(row, widths, self.slow_threshold_s)) diff --git a/src/qlever/monitor/widgets/mode_picker.py b/src/qlever/monitor/widgets/mode_picker.py new file mode 100644 index 00000000..3fa3ae4b --- /dev/null +++ b/src/qlever/monitor/widgets/mode_picker.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from textual import events +from textual.app import ComposeResult +from textual.containers import Horizontal +from textual.message import Message +from textual.reactive import reactive +from textual.widgets import Static + +MODES = ("STARTS", "ACTIVE", "ENDS") + + +class ModePicker(Horizontal): + """Segmented mode control: MODE [ STARTS │ ACTIVE │ ENDS ]. + + Each mode is its own Static so a segment can be made independently + clickable without restructuring the widget. The selected segment + carries the `-selected` state class for styling. + """ + + can_focus = False + + selected = reactive("", init=False) + + class Selected(Message): + """Posted when a segment is clicked; the screen sets that mode.""" + + def __init__(self, mode: str) -> None: + super().__init__() + self.mode = mode + + def __init__(self, selected: str) -> None: + """Hold the selected mode to paint once mounted.""" + super().__init__() + self.set_reactive(ModePicker.selected, selected) + + def compose(self) -> ComposeResult: + caption = Static("MODE", classes="picker-caption") + caption.tooltip = ( + "Which queries in the window to show - STARTS began in it, " + "ACTIVE ran during it, ENDS finished in it." + ) + yield caption + yield Static("[", classes="picker-bracket") + for index, mode in enumerate(MODES): + if index > 0: + yield Static("│", classes="picker-sep") + classes = "picker-segment" + if mode == self.selected: + classes += " -selected" + yield Static(mode, id=f"mode-{mode.lower()}", classes=classes) + yield Static("]", classes="picker-bracket") + + def watch_selected(self, value: str) -> None: + """Move the -selected state class onto the chosen segment.""" + for mode in MODES: + segment = self.query_one(f"#mode-{mode.lower()}", Static) + segment.set_class(mode == value, "-selected") + + def on_click(self, event: events.Click) -> None: + """Translate a segment click into a Selected message.""" + clicked = event.widget.id if event.widget else None + if clicked and clicked.startswith("mode-"): + mode = clicked.removeprefix("mode-").upper() + if mode in MODES: + self.post_message(self.Selected(mode)) diff --git a/src/qlever/monitor/widgets/nav_pill.py b/src/qlever/monitor/widgets/nav_pill.py new file mode 100644 index 00000000..91e406f6 --- /dev/null +++ b/src/qlever/monitor/widgets/nav_pill.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from textual.message import Message +from textual.widgets import Static + + +class NavPill(Static): + """Clickable, non-focusable pill that requests a screen switch.""" + + can_focus = False + + class Clicked(Message): + """Posted on click so an ancestor decides what the click does. + + Bubbles up the DOM, letting the widget stay unaware of the app + or screen registry. + """ + + def __init__(self, target: str) -> None: + super().__init__() + self.target = target + + def __init__(self, label: str, target: str) -> None: + """Render `label` as the pill; `target` names the screen to switch to.""" + super().__init__(label) + self.target = target + + def on_click(self) -> None: + """Announce the click; the handler lives on the parent screen.""" + self.post_message(self.Clicked(self.target)) diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py new file mode 100644 index 00000000..c1e9f0f8 --- /dev/null +++ b/src/qlever/monitor/widgets/query_table.py @@ -0,0 +1,212 @@ +from __future__ import annotations + +from rich.text import Text +from textual.widgets import DataTable + +from qlever.monitor.models import HistoricQueryRow, LiveQueryRow +from qlever.monitor.util import format_clock + +SPARQL_WIDTH = 280 + + +def oneline(text: str) -> str: + """Collapse all runs of whitespace (incl. newlines) to single spaces.""" + return " ".join(text.split()) + + +def truncate(text: str, width: int) -> str: + """Clip `text` to `width` characters, ending with an ellipsis if cut.""" + if len(text) <= width: + return text + return text[: width - 1] + "…" + + +def format_duration(ms: int) -> str: + """Render a duration in seconds to one decimal, or '?' when unknown.""" + if ms < 0: + return "?" + return f"{ms / 1000:.1f}s" + + +class QueryTable(DataTable): + """Common behaviour for the Live and Historic query tables. + + Each subclass defines its own columns, gives each row a key, and + renders a row to its cells. The base uses those to fill the table + and to keep the cursor on the same row when the rows are replaced. + """ + + def __init__( + self, rows: list[LiveQueryRow] | list[HistoricQueryRow] + ) -> None: + """Hold the rows to paint once the table is mounted.""" + super().__init__(cursor_type="row") + self.query_rows = rows + + def row_key(self, row: LiveQueryRow | HistoricQueryRow) -> str: + """The DataTable row key identifying this query.""" + raise NotImplementedError + + def render_row(self, row: LiveQueryRow | HistoricQueryRow) -> tuple: + """Render one query to its display cells.""" + raise NotImplementedError + + def fill_rows(self) -> None: + """Add one keyed DataTable row per entry in self.query_rows.""" + for row in self.query_rows: + self.add_row(*self.render_row(row), key=self.row_key(row)) + + def key_under_cursor(self) -> str | int | None: + """The key of the row under the cursor, or None when none is valid.""" + index = self.cursor_row + if self.query_rows and 0 <= index < len(self.query_rows): + return self.row_key(self.query_rows[index]) + return None + + def restore_cursor( + self, key: str | int | None, fallback_index: int + ) -> None: + """Move the cursor back to key, or clamp to fallback_index if it is gone.""" + if not self.query_rows: + return + if key is not None: + for index, row in enumerate(self.query_rows): + if self.row_key(row) == key: + self.move_cursor(row=index) + return + self.move_cursor(row=min(fallback_index, len(self.query_rows) - 1)) + + def set_rows( + self, rows: list[LiveQueryRow] | list[HistoricQueryRow] + ) -> None: + """Replace the rows by a full rebuild, preserving the cursor by key.""" + key = self.key_under_cursor() + fallback_index = self.cursor_row + self.query_rows = rows + self.clear(columns=False) + self.fill_rows() + self.restore_cursor(key, fallback_index) + + +class LiveQueryTable(QueryTable): + """Active queries on the Live screen; the only focusable widget.""" + + def __init__(self, rows: list[LiveQueryRow]) -> None: + """Track the cells and column keys the incremental diff compares.""" + super().__init__(rows) + # qid -> the cell tuple currently shown for that row. + self.shown_cells = {} + # Column keys in display order, recorded when columns are added. + self.column_keys = () + + def on_mount(self) -> None: + """Add the columns and one table row per active query.""" + self.column_keys = ( + self.add_column("Started"), + self.add_column("Duration", width=8), + self.add_column("SPARQL"), + ) + self.fill_rows() + + def render_row(self, row: LiveQueryRow) -> tuple: + """Render one active query to its Started/Duration/SPARQL cells.""" + return ( + format_clock(row.started_at_ms), + Text(format_duration(row.duration_ms), justify="right"), + truncate(oneline(row.sparql), SPARQL_WIDTH), + ) + + def row_key(self, row: LiveQueryRow) -> str: + """Key an active query by its qid, unique among concurrent queries.""" + return row.qid + + def fill_rows(self) -> None: + """Fill the table and remember each row's cells for the diff.""" + self.shown_cells = {} + for row in self.query_rows: + cells = self.render_row(row) + self.add_row(*cells, key=row.qid) + self.shown_cells[row.qid] = cells + + def set_rows(self, rows: list[LiveQueryRow]) -> None: + """Bring the table to the new sorted rows by an incremental diff. + + Removes gone rows, updates only changed cells in place, and + appends new rows at the bottom. A steady tick touches neither + scroll nor highlight. Falls back to a full rebuild if the sorted + order cannot be reproduced by append-at-bottom alone. + """ + new_cells = {row.qid: self.render_row(row) for row in rows} + new_order = [row.qid for row in rows] + old_order = [row.qid for row in self.query_rows] + new_set, old_set = set(new_order), set(old_order) + + survivors = [qid for qid in old_order if qid in new_set] + additions = [qid for qid in new_order if qid not in old_set] + if survivors + additions != new_order: + super().set_rows(rows) + return + + cursor_key = self.key_under_cursor() + old_cursor_row = self.cursor_row + layout_changed = bool(additions) or len(survivors) != len(old_order) + + for qid in old_order: + if qid not in new_set: + self.remove_row(qid) + for qid in survivors: + old_row, new_row = self.shown_cells[qid], new_cells[qid] + for index, column_key in enumerate(self.column_keys): + if str(old_row[index]) != str(new_row[index]): + self.update_cell(qid, column_key, new_row[index]) + for qid in additions: + self.add_row(*new_cells[qid], key=qid) + + self.query_rows = rows + self.shown_cells = new_cells + if layout_changed and cursor_key is not None: + self.restore_cursor(cursor_key, old_cursor_row) + + +class HistoricQueryTable(QueryTable): + """Finished queries in the current window on the Historic screen.""" + + # Wide enough to fit a sortable header plus its sort arrow. + SORTABLE_COLUMN_WIDTH = 10 + + def on_mount(self) -> None: + """Add the columns and one table row per finished query.""" + self.add_column("Started", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("Duration", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("Status", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("SPARQL") + self.fill_rows() + + def render_row(self, row: HistoricQueryRow) -> tuple: + """Render one finished query to Started/Duration/Status/SPARQL cells.""" + return ( + format_clock(row.started_at_ms), + Text(format_duration(row.duration_ms), justify="right"), + row.status, + truncate(oneline(row.sparql), SPARQL_WIDTH), + ) + + def row_key(self, row: HistoricQueryRow) -> str: + """Key a finished query by its start line offset, a stable identity. + + A window can hold the same qid twice (the server reuses a qid + after its end), but every start line sits at one byte offset. + """ + return str(row.start_line_offset) + + def set_sort_indicator(self, column_index: int, descending: bool) -> None: + """Show a sort arrow on one column header and clear the rest.""" + arrow = " ▼" if descending else " ▲" + for index, column in enumerate(self.ordered_columns): + base = column.label.plain.rstrip(" ▲▼") + label = base + arrow if index == column_index else base + column.label = Text(label) + # Invalidate the render cache so the new labels paint, the way + # update_cell does; Textual has no public column relabel. + self._update_count += 1 + self.refresh() diff --git a/src/qlever/monitor/widgets/selected_window.py b/src/qlever/monitor/widgets/selected_window.py new file mode 100644 index 00000000..4e9b804b --- /dev/null +++ b/src/qlever/monitor/widgets/selected_window.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from textual.reactive import reactive +from textual.widgets import Static + +from qlever.monitor.models import ControlsState +from qlever.monitor.util import format_clock + + +def format_selected_window(state: ControlsState) -> str: + """Read-only summary of the current window's range and width.""" + start = format_clock(state.start_ms) + end = format_clock(state.end_ms) + return f"[b]SELECTED[/b] {start} → {end} ([b]{state.window_size}[/b])" + + +class SelectedWindow(Static): + """Read-only window range under the HeaderRow on the Historic screen.""" + + can_focus = False + + state = reactive(None, init=False) + + def __init__(self, state: ControlsState) -> None: + """Render the selected window range computed from `state`.""" + super().__init__(format_selected_window(state)) + self.set_reactive(SelectedWindow.state, state) + + def watch_state(self, value: ControlsState) -> None: + """Repaint the range summary when the window state changes.""" + self.update(format_selected_window(value)) diff --git a/src/qlever/monitor/widgets/sparql_pane.py b/src/qlever/monitor/widgets/sparql_pane.py new file mode 100644 index 00000000..0955f0f0 --- /dev/null +++ b/src/qlever/monitor/widgets/sparql_pane.py @@ -0,0 +1,120 @@ +from __future__ import annotations + +from rich.syntax import Syntax +from textual.app import ComposeResult +from textual.containers import Vertical, VerticalScroll +from textual.reactive import reactive +from textual.widgets import Static + +from qlever.monitor.models import SparqlContent +from qlever.monitor.util import format_timestamp + +LIGHT_SYNTAX_THEME = "friendly" +DARK_SYNTAX_THEME = "monokai" + + +def format_header(content: SparqlContent) -> str: + """One-line query identity shown above the SPARQL body.""" + parts = [ + f"[b]Server Query ID:[/b] {content.qid}", + f"[b]Client IP:[/b] {content.client_ip or '-'}", + f"[b]Started at:[/b] {format_timestamp(content.started_at_ms)}", + ] + if content.status is not None: + parts.append(f"[b]Status:[/b] {content.status}") + return " │ ".join(parts) + + +class SparqlBody(Static): + """Syntax-highlighted SPARQL; theme follows the active app theme. + + The Syntax object is rebuilt in render() rather than stored, so a + theme switch (which triggers a repaint) picks the matching variant + without any explicit subscription. + """ + + code = reactive(None, layout=True) + + def render(self): + if self.code is None: + return "" + theme = ( + DARK_SYNTAX_THEME + if self.app.current_theme.dark + else LIGHT_SYNTAX_THEME + ) + return Syntax(self.code, "sparql", theme=theme, word_wrap=True) + + +class SparqlScroll(VerticalScroll): + """Scroll region for an overflowing query; never takes focus.""" + + can_focus = False + + +class SparqlPane(Vertical): + """Identity line plus full SPARQL for the row under the table cursor. + + Shared by Live and Historic. The screen builds the SparqlContent and + fills `status` (Live: None, Historic: real status); the pane never + reads stub data itself. + """ + + can_focus = False + + content = reactive(None, init=False) + show_pretty = reactive(False, init=False) + pretty_text = reactive(None, init=False) + + def compose(self) -> ComposeResult: + yield Static(id="sparql-header") + with SparqlScroll(id="sparql-scroll"): + yield SparqlBody(id="sparql-body") + + def on_mount(self) -> None: + """Paint the initial (empty) state once children are mounted.""" + self.refresh_content(self.content) + + def watch_content(self, content: SparqlContent | None) -> None: + """Reset pretty-print state for the newly selected query, then paint.""" + self.show_pretty = False + self.pretty_text = None + self.refresh_content(content) + + def watch_show_pretty(self, show_pretty: bool) -> None: + """Repaint with raw or pretty text when the toggle flips.""" + self.refresh_content(self.content) + + def watch_pretty_text(self, pretty_text: str | None) -> None: + """Repaint once the pretty-printed text has been computed.""" + self.refresh_content(self.content) + + @property + def displayed_text(self) -> str | None: + """The SPARQL string currently on screen, or None if empty. + + Pretty-printed text only once the formatter has produced it; + the raw query otherwise. + """ + if self.content is None: + return None + if self.show_pretty and self.pretty_text is not None: + return self.pretty_text + return self.content.sparql_text + + def refresh_content(self, content: SparqlContent | None) -> None: + """Push the current content into the header and body widgets.""" + header = self.query_one("#sparql-header", Static) + body = self.query_one("#sparql-body", SparqlBody) + # Body height changes here; re-evaluate the conditional scroll + # bindings once layout has settled. + self.call_after_refresh(self.refresh_bindings) + if content is None: + header.update( + "[dim]Press Enter or double-click a row to view its " + "full SPARQL query and details.[/dim]" + ) + body.code = None + return + header.update(format_header(content)) + body.code = self.displayed_text diff --git a/src/qlever/monitor/widgets/status_row.py b/src/qlever/monitor/widgets/status_row.py new file mode 100644 index 00000000..dbefee10 --- /dev/null +++ b/src/qlever/monitor/widgets/status_row.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from textual.reactive import Reactive +from textual.widgets import Static + +from qlever.monitor.models import LiveSubtitle + + +def format_subtitle(subtitle: LiveSubtitle) -> str: + """Build the Live subtitle text from current state and server endpoint.""" + if subtitle.state == "checking": + return f"[$warning]Checking[/] server at [b]{subtitle.endpoint}[/]…" + if subtitle.state == "unreachable": + return ( + f"[$error]Can't reach server[/] at [b]{subtitle.endpoint}[/]" + f" · retrying every 5s" + ) + if subtitle.n_active is None: + return f"[$success]Server active[/] at [b]{subtitle.endpoint}[/]" + return ( + f"[$success]Server active[/] at [b]{subtitle.endpoint}[/]" + f" · [b $success]{subtitle.n_active}[/] active queries" + ) + + +class LiveStatusRow(Static): + """One-line subtitle under the HeaderRow on the Live screen.""" + + can_focus = False + + subtitle = Reactive(None, init=False) + + def __init__(self, subtitle: LiveSubtitle) -> None: + """Render the subtitle text computed from `subtitle` and `endpoint`.""" + super().__init__(format_subtitle(subtitle)) + self.set_reactive(LiveStatusRow.subtitle, subtitle) + + def watch_subtitle(self, subtitle: LiveSubtitle) -> None: + self.update(format_subtitle(subtitle)) diff --git a/src/qlever/monitor/widgets/timeline.py b/src/qlever/monitor/widgets/timeline.py new file mode 100644 index 00000000..fd13f918 --- /dev/null +++ b/src/qlever/monitor/widgets/timeline.py @@ -0,0 +1,207 @@ +from __future__ import annotations + +from datetime import datetime +from math import ceil + +from textual import events +from textual.message import Message +from textual.reactive import reactive +from textual.widgets import Static + +from qlever.monitor.models import TimelineBounds +from qlever.monitor.util import format_timestamp + +# Width of a full "YYYY-MM-DD HH:MM:SS" stamp, and the gap that +# separates the edge labels from the bar. +STAMP_W = 19 +GUTTER = " " + +# Minimum blank columns kept between two adjacent tick labels; the +# actual spacing is this plus the label width (derived, not fixed, +# because multi-day labels are wider than HH:MM ones). +MIN_TICK_GAP = 4 + +DAY_MS = 24 * 60 * 60 * 1000 + + +def tick_pattern(span_ms: int) -> str: + """strftime for interior ticks; add the date once the span crosses days. + + Edge labels always carry the full date, so within a single day the + ticks can stay the compact HH:MM form. + """ + if span_ms >= DAY_MS: + return "%m-%d %H:%M" + return "%H:%M" + + +def format_tick(ms: int, pattern: str) -> str: + """Render an epoch (ms) using a tick pattern from `tick_pattern`.""" + return datetime.fromtimestamp(ms / 1000).strftime(pattern) + + +def span_fraction(ts_ms: int, bounds: TimelineBounds) -> float: + """Where `ts_ms` sits in the log span, as a 0..1 fraction. + + A zero-length span collapses everything to the start. + """ + span = bounds.log_end_ms - bounds.log_start_ms + if span <= 0: + return 0.0 + return (ts_ms - bounds.log_start_ms) / span + + +def fraction_to_column(fraction: float, bar_width: int) -> int: + """Map a 0..1 position along the log span to a bar column index.""" + clamped = min(max(fraction, 0.0), 1.0) + return round(clamped * (bar_width - 1)) + + +def column_to_ms(col: int, bar_width: int, bounds: TimelineBounds) -> int: + """Epoch (ms) at a bar column; inverse of fraction_to_column. + + Clamped to the log span so an edge click maps to an edge time. + """ + if bar_width <= 1: + return bounds.log_start_ms + fraction = min(max(col / (bar_width - 1), 0.0), 1.0) + span = bounds.log_end_ms - bounds.log_start_ms + return bounds.log_start_ms + round(fraction * span) + + +def window_width_cells(bounds: TimelineBounds, bar_width: int) -> int: + """Cells the window block should occupy, based on its duration. + + Width depends only on how long the window is relative to the log + span, never on where it sits. Floors at one cell so a tiny window + is still visible. + """ + log_span = bounds.log_end_ms - bounds.log_start_ms + if log_span <= 0: + return 1 + window_span = bounds.window_end_ms - bounds.window_start_ms + fraction = window_span / log_span + return max(1, ceil(fraction * bar_width)) + + +def render_track(bounds: TimelineBounds, bar_width: int) -> str: + """The bar line: full-span track with the window span marked. + + `████` marks the window; `├`/`┤` cap the log edges, but only when + the window does not already sit on that edge (the window block + is its own edge marker). + """ + chars = ["─"] * bar_width + width = window_width_cells(bounds, bar_width) + start = fraction_to_column( + span_fraction(bounds.window_start_ms, bounds), bar_width + ) + end = start + width - 1 + # Pin the block to the last cell when it overruns the bar, or when + # the window actually reaches the log end but rounding fell short. + if end > bar_width - 1 or bounds.window_end_ms >= bounds.log_end_ms: + end = bar_width - 1 + start = end - width + 1 + for col in range(start, end + 1): + chars[col] = "█" + if chars[0] != "█": + chars[0] = "├" + if chars[-1] != "█": + chars[-1] = "┤" + return "".join(chars) + + +def render_ticks(bounds: TimelineBounds, bar_width: int) -> str: + """A blank bar-width line with evenly spaced time labels. + + Tick spacing is derived from the label width so wider multi-day + labels simply yield fewer ticks instead of overlapping. + """ + span = bounds.log_end_ms - bounds.log_start_ms + # Every label uses one pattern, so all labels share a width. + pattern = tick_pattern(span) + label_width = len(format_tick(bounds.log_start_ms, pattern)) + # A tick owns its label plus a blank gap; fewer fit when wider. + step = label_width + MIN_TICK_GAP + tick_count = max(1, bar_width // step) + cells = [" "] * bar_width + for i in range(1, tick_count + 1): + # Interior positions only; the edges hold the Log start/end labels. + fraction = i / (tick_count + 1) + center = fraction_to_column(fraction, bar_width) + # Time at this position: that same fraction into the log span. + label = format_tick( + bounds.log_start_ms + round(fraction * span), pattern + ) + # Center the label on its column, clamped to stay on the bar. + start = min(max(center - len(label) // 2, 0), bar_width - len(label)) + for offset, char in enumerate(label): + cells[start + offset] = char + return "".join(cells) + + +class Timeline(Static): + """Two-row map of the log span with the current window marked. + + Row 1: edge timestamps around a bar whose `████` block is the + selected window. Row 2: `Log start` / tick labels / `Log end`. + Geometry is recomputed each paint because the bar width is only + known after layout. + """ + + can_focus = False + + class Recentered(Message): + """Posted when the bar is clicked; carries the clicked time.""" + + def __init__(self, center_ms: int) -> None: + super().__init__() + self.center_ms = center_ms + + bounds = reactive(None, init=False) + + def __init__(self, bounds: TimelineBounds) -> None: + """Hold the span/window snapshot to draw at render time.""" + super().__init__() + self.set_reactive(Timeline.bounds, bounds) + + def watch_bounds(self) -> None: + """Redraw the bar when the span or window changes.""" + self.refresh() + + def on_resize(self) -> None: + """Repaint so the bar tracks the new width.""" + self.refresh() + + def on_click(self, event: events.Click) -> None: + """Recenter the window on the clicked bar column. + + Clicks on the edge stamps or gutter (outside the bar) are + ignored; the screen decides whether `all` makes it a no-op. + """ + offset = STAMP_W + len(GUTTER) + bar_width = self.size.width - 2 * offset + if bar_width < 2: + return + col = event.x - offset + if col < 0 or col >= bar_width: + return + self.post_message( + self.Recentered(column_to_ms(col, bar_width, self.bounds)) + ) + + def render(self): + bar_width = self.size.width - 2 * (STAMP_W + len(GUTTER)) + if bar_width < 2: + return "" + top = ( + f"{format_timestamp(self.bounds.log_start_ms)}{GUTTER}" + f"{render_track(self.bounds, bar_width)}{GUTTER}" + f"{format_timestamp(self.bounds.log_end_ms)}" + ) + bottom = ( + f"{'Log start'.ljust(STAMP_W)}{GUTTER}" + f"{render_ticks(self.bounds, bar_width)}{GUTTER}" + f"{'Log end'.rjust(STAMP_W)}" + ) + return f"{top}\n{bottom}" diff --git a/src/qlever/monitor/widgets/window_stepper.py b/src/qlever/monitor/widgets/window_stepper.py new file mode 100644 index 00000000..e72458aa --- /dev/null +++ b/src/qlever/monitor/widgets/window_stepper.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from textual import events +from textual.app import ComposeResult +from textual.containers import Horizontal +from textual.message import Message +from textual.reactive import reactive +from textual.widgets import Static + +WINDOW_PRESETS = ("5m", "15m", "30m", "1h", "6h", "12h", "24h", "all") + +UNIT_MS = {"m": 60_000, "h": 3_600_000} + + +def preset_ms(preset: str) -> int | None: + """Width of a window preset in ms; None for the unbounded `all`.""" + if preset == "all": + return None + return int(preset[:-1]) * UNIT_MS[preset[-1]] + + +def available_presets(span_ms: int) -> tuple[str, ...]: + """Presets that fit within the log span; `all` is always offered.""" + fits = tuple( + p for p in WINDOW_PRESETS if p != "all" and preset_ms(p) <= span_ms + ) + return fits + ("all",) + + +class WindowStepper(Horizontal): + """Window-size stepper: WINDOW ◄ 15m ►. + + The arrows are separate Statics so each can be made independently + clickable without restructuring the widget. + """ + + can_focus = False + + window_size = reactive("", init=False) + + class Stepped(Message): + """Posted when an arrow is clicked; the screen resizes the window. + + `direction` is -1 for the left arrow, +1 for the right arrow. + """ + + def __init__(self, direction: int) -> None: + super().__init__() + self.direction = direction + + def __init__(self, window_size: str) -> None: + """Hold the current window size to paint once mounted.""" + super().__init__() + self.set_reactive(WindowStepper.window_size, window_size) + + def compose(self) -> ComposeResult: + caption = Static("WINDOW", classes="stepper-caption") + caption.tooltip = ( + "Width of the log time window whose queries are shown. " + "Press w or click the arrows to resize." + ) + yield caption + yield Static("◄", id="window-prev", classes="stepper-arrow") + yield Static(self.window_size, id="window-size") + yield Static("►", id="window-next", classes="stepper-arrow") + + def watch_window_size(self, value: str) -> None: + """Repaint the size label when the window size changes.""" + self.query_one("#window-size", Static).update(value) + + def on_click(self, event: events.Click) -> None: + """Translate an arrow click into a Stepped message.""" + clicked = event.widget.id if event.widget else None + if clicked == "window-prev": + self.post_message(self.Stepped(-1)) + elif clicked == "window-next": + self.post_message(self.Stepped(1)) diff --git a/src/qlever/util.py b/src/qlever/util.py index 4685b3c8..aaf3bebf 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -147,15 +147,47 @@ def run_curl_command( return result.stdout -def is_qlever_server_alive(endpoint_url: str) -> bool: +def pretty_printed_query( + query: str, show_prefixes: bool, system: str = "docker" +) -> str | None: """ - Helper function that checks if a QLever server is running on the given - endpoint. Return `True` if the server is alive, `False` otherwise. + Pretty-print a SPARQL query using the sparql-formatter Docker image. + Optionally strips PREFIX declarations from the output. Argument + `system` can either be docker or podman. Returns None if the query + could not be pretty-printed. """ + from qlever.containerize import Containerize + if system not in Containerize.supported_systems(): + system = "docker" + remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else "" + pretty_print_query_cmd = ( + f"echo {shlex.quote(query)}" + f" | {system} run -i --rm docker.io/sparqling/sparql-formatter" + f"{remove_prefixes_cmd} | grep -v '^$'" + ) + try: + query_pretty_printed = run_command( + pretty_print_query_cmd, return_output=True + ) + return query_pretty_printed.rstrip() + except Exception as e: + log.debug(f"Failed to pretty-print query, returning None: {e}") + return None + + +def is_qlever_server_alive( + endpoint_url: str, max_time: int | None = None +) -> bool: + """Check if a QLever server is running on the given endpoint. + + `max_time` (seconds) caps the curl invocation when set; default is + unbounded so existing callers behave as before. + """ message = "from the `qlever` CLI" + max_time_flag = f"--max-time {max_time} " if max_time is not None else "" curl_cmd = ( - f"curl -s {endpoint_url}/ping" + f"curl -s {max_time_flag}{endpoint_url}/ping" f" --data-urlencode msg={shlex.quote(message)}" ) log.debug(curl_cmd) diff --git a/test/qlever/conftest.py b/test/qlever/conftest.py index 9f825cb1..a10f6d42 100644 --- a/test/qlever/conftest.py +++ b/test/qlever/conftest.py @@ -3,6 +3,37 @@ import pytest +@pytest.fixture +def write_log(tmp_path): + """Write bytes to a fresh log file and return its Path.""" + + def make(data): + path = tmp_path / "q.log" + path.write_bytes(data) + return path + + return make + + +@pytest.fixture +def open_log(write_log): + """Write bytes to a log file and hand back an open (handle, size). + + Closes every handle it produced when the test finishes. + """ + handles = [] + + def make(data): + path = write_log(data) + handle = path.open("rb") + handles.append(handle) + return handle, path.stat().st_size + + yield make + for handle in handles: + handle.close() + + @pytest.fixture def mock_command(monkeypatch): def _mock(module_name: str, function_name: str, override=None): diff --git a/test/qlever/test_historic_data.py b/test/qlever/test_historic_data.py new file mode 100644 index 00000000..67b3dc28 --- /dev/null +++ b/test/qlever/test_historic_data.py @@ -0,0 +1,456 @@ +"""Tests for read_window, filter_queries, render_window, and detail loading.""" + +from qlever.monitor.historic_data import ( + DURATION_UNKNOWN, + LoggedQuery, + filter_queries, + load_query_details_for_rows, + read_window, + render_window, +) +from qlever.monitor.models import ControlsState, HistoricQueryRow + +PAD_MS = 10_000 +SLOW_MS = 60_000 + + +def start_line(ts_ms, qid, query="SELECT 1"): + return ( + f'{{"ts-ms":{ts_ms},"event":"start","qid":"{qid}",' + f'"client-ip":"x","query":"{query}"}}\n' + ).encode() + + +def end_line(ts_ms, qid, status="ok"): + return ( + f'{{"ts-ms":{ts_ms},"event":"end","qid":"{qid}",' + f'"status":"{status}"}}\n' + ).encode() + + +def test_read_window_empty_log_returns_no_queries(write_log): + path = write_log(b"") + queries = read_window(path, 0, 1000, PAD_MS, log_end_ms=0, now_ms=0) + assert queries == [] + + +def test_read_window_keeps_pair_entirely_inside_window(write_log): + path = write_log( + start_line(1_000_000, "q1", "SELECT a") + end_line(1_000_500, "q1") + ) + queries = read_window( + path, + 900_000, + 1_100_000, + PAD_MS, + log_end_ms=1_000_500, + now_ms=1_000_500, + ) + assert queries == [ + LoggedQuery( + start_ms=1_000_000, + end_ms=1_000_500, + status="ok", + start_line_offset=0, + ) + ] + + +def test_read_window_drops_pair_inside_pad_before_window(write_log): + path = write_log( + start_line(900_000, "pad", "SELECT pad") + + end_line(900_500, "pad") + + start_line(1_000_000, "q1", "SELECT a") + + end_line(1_000_500, "q1") + ) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_000_500, + now_ms=1_000_500, + ) + starts = [query.start_ms for query in queries] + assert starts == [1_000_000] + + +def test_read_window_keeps_pair_straddling_window_start(write_log): + path = write_log( + start_line(995_000, "edge", "SELECT edge") + + end_line(1_005_000, "edge") + ) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_005_000, + now_ms=1_005_000, + ) + assert len(queries) == 1 + assert queries[0].start_ms == 995_000 + assert queries[0].end_ms == 1_005_000 + + +def test_read_window_keeps_pair_straddling_window_end(write_log): + path = write_log( + start_line(1_095_000, "edge", "SELECT edge") + + end_line(1_105_000, "edge") + ) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_105_000, + now_ms=1_105_000, + ) + assert len(queries) == 1 + assert queries[0].start_ms == 1_095_000 + + +def test_read_window_includes_still_open_with_running_status(write_log): + path = write_log(start_line(1_050_000, "live", "SELECT live")) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_050_000, + now_ms=1_050_000, + ) + assert queries == [ + LoggedQuery( + start_ms=1_050_000, + end_ms=None, + status="running", + start_line_offset=0, + ) + ] + + +def test_read_window_marks_still_open_as_orphaned_when_log_advanced_past_pad( + write_log, +): + path = write_log(start_line(1_050_000, "ghost", "SELECT ghost")) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_200_000, + now_ms=1_200_000, + ) + assert queries == [ + LoggedQuery( + start_ms=1_050_000, + end_ms=None, + status="orphaned", + start_line_offset=0, + ) + ] + + +def test_read_window_marks_still_open_as_orphaned_when_log_is_stale(write_log): + """A still-open query is orphaned once the log has been silent past 2t. + + The start is within pad of log_end_ms (gate (a) passes), but wall + clock has moved past log_end_ms by more than pad (gate (b) fails), + so the server must be dead and the survivor cannot be running. + """ + path = write_log(start_line(1_050_000, "ghost", "SELECT ghost")) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_050_000, + now_ms=1_050_000 + PAD_MS + 1, + ) + [query] = queries + assert query.status == "orphaned" + + +def test_read_window_drops_still_open_started_after_window(write_log): + path = write_log(start_line(1_200_000, "later", "SELECT later")) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_200_000, + now_ms=1_200_000, + ) + assert queries == [] + + +def test_read_window_keeps_all_overlapping_pairs(write_log): + path = write_log( + start_line(995_000, "edge_start", "SELECT a") + + end_line(1_005_000, "edge_start") + + start_line(1_050_000, "inside", "SELECT b") + + end_line(1_060_000, "inside") + + start_line(1_095_000, "edge_end", "SELECT c") + + end_line(1_105_000, "edge_end") + ) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_105_000, + now_ms=1_105_000, + ) + assert {query.start_ms for query in queries} == { + 995_000, + 1_050_000, + 1_095_000, + } + + +def test_read_window_carries_real_start_line_offset(write_log): + first = start_line(1_010_000, "q1", "SELECT a") + end_line(1_020_000, "q1") + path = write_log( + first + + start_line(1_050_000, "q2", "SELECT b") + + end_line(1_060_000, "q2") + ) + queries = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + log_end_ms=1_060_000, + now_ms=1_060_000, + ) + offsets = {query.start_ms: query.start_line_offset for query in queries} + assert offsets[1_010_000] == 0 + assert offsets[1_050_000] == len(first) + + +def make_query(start_ms, end_ms, status="ok", start_line_offset=0): + return LoggedQuery( + start_ms=start_ms, + end_ms=end_ms, + status=status, + start_line_offset=start_line_offset, + ) + + +def test_filter_queries_active_returns_list_unchanged(): + queries = [ + make_query(100, 200), + make_query(300, None, status="running"), + ] + assert filter_queries(queries, "ACTIVE", 0, 1000) is queries + + +def test_filter_queries_starts_keeps_only_starts_in_window(): + queries = [ + make_query(50, 150), + make_query(200, 400), + make_query(1100, 1200), + ] + starts = [ + query.start_ms for query in filter_queries(queries, "STARTS", 100, 1000) + ] + assert starts == [200] + + +def test_filter_queries_starts_includes_running_queries(): + queries = [make_query(500, None, status="running")] + starts = [ + query.start_ms for query in filter_queries(queries, "STARTS", 100, 1000) + ] + assert starts == [500] + + +def test_filter_queries_ends_excludes_running_queries(): + queries = [ + make_query(200, 500), + make_query(300, None, status="running"), + ] + ends = [ + query.end_ms for query in filter_queries(queries, "ENDS", 100, 1000) + ] + assert ends == [500] + + +def test_filter_queries_ends_drops_ends_outside_window(): + queries = [ + make_query(50, 90), + make_query(200, 500), + make_query(800, 1200), + ] + ends = [ + query.end_ms for query in filter_queries(queries, "ENDS", 100, 1000) + ] + assert ends == [500] + + +def make_controls(mode="ACTIVE"): + return ControlsState( + window_size="15m", + mode=mode, + start_ms=1_000_000, + end_ms=2_000_000, + ) + + +def test_render_window_uses_recorded_end_for_completed_query(): + queries = [make_query(1_100_000, 1_150_000)] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 + ) + assert rows[0].duration_ms == 50_000 + assert rows[0].status == "ok" + + +def test_render_window_uses_log_end_for_running_query(): + queries = [make_query(1_100_000, None, status="running")] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 + ) + assert rows[0].duration_ms == 1_400_000 + assert rows[0].status == "running" + + +def test_render_window_marks_orphan_duration_as_unknown(): + queries = [make_query(1_100_000, None, status="orphaned")] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 + ) + assert rows[0].duration_ms == DURATION_UNKNOWN + assert rows[0].status == "orphaned" + + +def test_render_window_passes_window_size_as_metric_label(): + queries = [make_query(100 + index, 200 + index) for index in range(7)] + controls = ControlsState( + window_size="1h", + mode="ACTIVE", + start_ms=0, + end_ms=1_000_000, + ) + _, metrics = render_window(queries, controls, SLOW_MS, log_end_ms=1_000_000) + assert metrics.label == "1h" + assert metrics.seen == 7 + assert metrics.ok == 7 + + +def test_render_window_carries_start_line_offset_onto_row(): + queries = [make_query(1_100_000, 1_200_000, start_line_offset=512)] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 + ) + assert rows[0].start_line_offset == 512 + + +def test_render_window_leaves_text_empty(): + queries = [make_query(1_100_000, 1_200_000)] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 + ) + assert rows[0].qid == "" + assert rows[0].sparql == "" + assert rows[0].client_ip == "" + + +def test_render_window_respects_mode_filter(): + queries = [ + make_query(1_100_000, 1_200_000), + make_query(1_500_000, None, status="running"), + ] + rows, _ = render_window( + queries, make_controls(mode="ENDS"), SLOW_MS, log_end_ms=2_500_000 + ) + assert [row.started_at_ms for row in rows] == [1_100_000] + + +# Queries used by the mode-aware metric tests. A is fully inside the +# window, B starts inside but ends after it, C starts before but ends +# inside. So the metric count must change with the selected mode. +def mode_metric_queries(): + return [ + make_query(1_100_000, 1_900_000), + make_query(1_500_000, 2_200_000), + make_query(800_000, 1_300_000), + ] + + +def test_render_window_metrics_active_counts_all_overlapping(): + _, metrics = render_window( + mode_metric_queries(), + make_controls("ACTIVE"), + SLOW_MS, + log_end_ms=2_200_000, + ) + assert metrics.seen == 3 + + +def test_render_window_metrics_starts_counts_started_in_window(): + _, metrics = render_window( + mode_metric_queries(), + make_controls("STARTS"), + SLOW_MS, + log_end_ms=2_200_000, + ) + assert metrics.seen == 2 + + +def test_render_window_metrics_ends_counts_ended_in_window(): + _, metrics = render_window( + mode_metric_queries(), + make_controls("ENDS"), + SLOW_MS, + log_end_ms=2_200_000, + ) + assert metrics.seen == 2 + + +def test_render_window_metrics_exclude_running_and_orphaned(): + queries = [ + make_query(1_100_000, 1_900_000), + make_query(1_500_000, None, status="running"), + make_query(1_200_000, None, status="orphaned"), + ] + _, metrics = render_window( + queries, make_controls("ACTIVE"), SLOW_MS, log_end_ms=2_500_000 + ) + assert metrics.seen == 1 + + +def make_row(start_line_offset): + return HistoricQueryRow( + qid="", + start_line_offset=start_line_offset, + started_at_ms=1_000_000, + duration_ms=0, + status="ok", + sparql="", + client_ip="", + ) + + +def test_load_query_details_fills_text_from_offsets(write_log): + first = start_line(1_000_000, "q1", "SELECT a") + path = write_log(first + start_line(1_050_000, "q2", "SELECT b")) + cache = {} + rows = [make_row(0), make_row(len(first))] + filled = load_query_details_for_rows(path, rows, cache) + assert [(row.qid, row.sparql) for row in filled] == [ + ("q1", "SELECT a"), + ("q2", "SELECT b"), + ] + assert filled[0].client_ip == "x" + assert cache[0] == ("q1", "x", "SELECT a") + + +def test_load_query_details_reuses_cache_without_reading(write_log): + path = write_log(start_line(1_000_000, "q1", "SELECT a")) + cache = {0: ("cached-qid", "cached-ip", "CACHED")} + [filled] = load_query_details_for_rows(path, [make_row(0)], cache) + assert filled.qid == "cached-qid" + assert filled.client_ip == "cached-ip" + assert filled.sparql == "CACHED" diff --git a/test/qlever/test_live_data.py b/test/qlever/test_live_data.py new file mode 100644 index 00000000..852c6f5d --- /dev/null +++ b/test/qlever/test_live_data.py @@ -0,0 +1,632 @@ +"""Tests for the Live data layer: state, tailer, boot, metrics.""" + +import threading +import time + +from qlever.monitor.live_data import ( + LIVE_HORIZON_MS, + ActiveQuery, + CompletedQueries, + LiveLogReader, + LiveState, + discard_finished_backlog, + find_active_queries, + get_live_metrics, + get_live_query_rows, + load_completed_history, +) +from qlever.monitor.log_reader import CompletedQuery +from qlever.monitor.metrics import MetricsSnapshot, percentiles + +NOW_MS = 1_700_000_000_000 +MIN_MS = 60_000 +WINDOW_5M = 5 * MIN_MS +WINDOW_15M = 15 * MIN_MS + + +def make_completed(end_ms, status="ok", duration_ms=100): + """Build a CompletedQuery with a self-consistent start_ms.""" + return CompletedQuery( + start_ms=end_ms - duration_ms, + end_ms=end_ms, + duration_ms=duration_ms, + status=status, + start_line_offset=0, + ) + + +def test_empty_history_returns_empty_snapshot(): + history = CompletedQueries() + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap == MetricsSnapshot( + seen=0, + ok=0, + failed=0, + timeout=0, + cancelled=0, + unknown=0, + slow=0, + p50=None, + p95=None, + ) + + +def test_status_counts_split_correctly(): + history = CompletedQueries() + for status in ("ok", "failed", "timeout", "cancelled", "unknown"): + history.add(make_completed(NOW_MS - MIN_MS, status=status)) + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap.seen == 5 + assert snap.ok == 1 + assert snap.failed == 1 + assert snap.timeout == 1 + assert snap.cancelled == 1 + assert snap.unknown == 1 + + +def test_window_excludes_old_entries(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 2 * MIN_MS)) + history.add(make_completed(NOW_MS - WINDOW_5M)) + history.add(make_completed(NOW_MS - 10 * MIN_MS)) + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap.seen == 2 + + +def test_partial_coverage_returns_none(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 2 * MIN_MS)) + snaps = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M, WINDOW_15M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - 10 * MIN_MS, + ) + assert snaps[0] is not None + assert snaps[0].seen == 1 + assert snaps[1] is None + + +def test_slow_threshold_independent_of_status(): + history = CompletedQueries() + history.add( + make_completed(NOW_MS - MIN_MS, status="ok", duration_ms=12_000) + ) + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap.ok == 1 + assert snap.slow == 1 + + +def test_drop_older_than_pops_left(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 30 * MIN_MS)) + history.add(make_completed(NOW_MS - 20 * MIN_MS)) + history.add(make_completed(NOW_MS - MIN_MS)) + history.drop_older_than(NOW_MS - 15 * MIN_MS) + assert len(history.entries) == 1 + assert history.entries[0].end_ms == NOW_MS - MIN_MS + + +def test_percentiles_with_known_values(): + durations = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] + p50, p95 = percentiles(durations) + assert 450 <= p50 <= 600 + assert 900 <= p95 <= 1100 + + +def test_data_start_equal_to_cutoff_returns_snapshot(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 2 * MIN_MS)) + cutoff = NOW_MS - WINDOW_5M + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=cutoff, + ) + assert snap is not None + assert snap.seen == 1 + + +def start_line(ts_ms, qid, query="SELECT 1"): + return ( + f'{{"ts-ms":{ts_ms},"event":"start","qid":"{qid}",' + f'"client-ip":"x","query":"{query}"}}\n' + ).encode() + + +def end_line(ts_ms, qid, status="ok"): + return ( + f'{{"ts-ms":{ts_ms},"event":"end","qid":"{qid}",' + f'"status":"{status}"}}\n' + ).encode() + + +def test_find_active_queries_picks_up_unmatched_starts(write_log): + path = write_log( + start_line(1000, "q1", "SELECT a") + + start_line(2000, "q2", "SELECT b") + + end_line(3000, "q1") + ) + state, cut_offset, eof_ts = find_active_queries(path, window_pad_ms=10_000) + assert list(state.active) == ["q2"] + assert state.active["q2"] == ActiveQuery( + start_ms=2000, end_ms=None, client_ip="x", sparql="SELECT b" + ) + assert cut_offset == path.stat().st_size + assert eof_ts == 3000 + + +def test_find_active_queries_loads_sparql_for_each_survivor(write_log): + path = write_log( + start_line(1000, "a", "SELECT alpha") + + start_line(2000, "b", "SELECT beta") + ) + state, _, _ = find_active_queries(path, window_pad_ms=10_000) + assert state.active["a"] == ActiveQuery( + start_ms=1000, end_ms=None, client_ip="x", sparql="SELECT alpha" + ) + assert state.active["b"] == ActiveQuery( + start_ms=2000, end_ms=None, client_ip="x", sparql="SELECT beta" + ) + + +def test_find_active_queries_empty_log_returns_empty_state(write_log): + path = write_log(b"") + state, cut_offset, eof_ts = find_active_queries(path, window_pad_ms=10_000) + assert state.active == {} + assert cut_offset == 0 + assert eof_ts == 0 + + +def test_find_active_queries_seeds_latest_event_ms_from_eof(write_log): + path = write_log( + start_line(1000, "q1") + end_line(2000, "q1") + start_line(3000, "q2") + ) + state, _, eof_ts = find_active_queries(path, window_pad_ms=10_000) + assert state.latest_event_ms == eof_ts == 3000 + + +def test_find_active_queries_empty_log_leaves_latest_event_ms_none(write_log): + path = write_log(b"") + state, _, _ = find_active_queries(path, window_pad_ms=10_000) + assert state.latest_event_ms is None + + +def make_reader(path, state, cut_offset=0, window_pad_ms=10_000, now_ms=3000): + """Build a LiveLogReader with a frozen clock for poll tests.""" + return LiveLogReader( + log_path=path, + state=state, + cut_offset=cut_offset, + window_pad_ms=window_pad_ms, + now_ms=lambda: now_ms, + ) + + +def test_poll_pairs_a_clean_start_and_end(write_log): + path = write_log(start_line(1000, "q1", "SELECT a") + end_line(2000, "q1")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + # The end event records the completion but the query stays in active + # (end_ms set, still unseen) until a repaint has shown it. + entry = state.active["q1"] + assert entry.end_ms == 2000 + assert entry.seen is False + [done] = state.completed.entries + assert done.start_ms == 1000 + assert done.end_ms == 2000 + assert done.duration_ms == 1000 + assert done.status == "ok" + assert done.start_line_offset is None + + +def test_poll_keeps_start_in_active_until_end_arrives(write_log): + path = write_log(start_line(1000, "q1", "SELECT a")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.active == { + "q1": ActiveQuery( + start_ms=1000, end_ms=None, client_ip="x", sparql="SELECT a" + ) + } + assert len(state.completed.entries) == 0 + + +def test_poll_advances_cursor_so_second_poll_sees_new_lines(write_log): + first = start_line(1000, "q1") + end_line(2000, "q1") + path = write_log(first) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert len(state.completed.entries) == 1 + + path.write_bytes( + first + start_line(3000, "q2") + end_line(4000, "q2", "failed") + ) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert len(state.completed.entries) == 2 + assert state.completed.entries[-1].status == "failed" + + +def test_poll_partial_trailing_line_is_left_for_next_poll(write_log): + full = start_line(1000, "q1") + end_line(2000, "q1") + truncated = full[:-5] + path = write_log(truncated) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert "q1" in state.active + assert len(state.completed.entries) == 0 + + path.write_bytes(full) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.active["q1"].end_ms == 2000 + assert len(state.completed.entries) == 1 + + +def test_poll_skips_malformed_line_and_continues(write_log): + path = write_log( + b"not json at all\n" + start_line(1000, "q1") + end_line(2000, "q1") + ) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert len(state.completed.entries) == 1 + + +def test_poll_drops_end_without_matching_start(write_log): + path = write_log(end_line(2000, "ghost")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.active == {} + assert len(state.completed.entries) == 0 + + +def test_poll_advances_latest_event_ms_to_the_newest_line(write_log): + path = write_log(start_line(1000, "q1") + end_line(2500, "q1")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.latest_event_ms == 2500 + + +def test_poll_keeps_latest_event_ms_monotonic_under_out_of_order_writes( + write_log, +): + # Sub-millisecond jitter on the server can write an older ts_ms + # after a newer one; the field must stay at the larger value. + path = write_log(start_line(2000, "q_late") + start_line(1500, "q_early")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.latest_event_ms == 2000 + + +def test_evict_stale_drops_actives_older_than_window_pad(write_log): + state = LiveState() + now = 1_000_000 + pad = 10_000 + state.active["fresh"] = ActiveQuery( + start_ms=now - 1_000, end_ms=None, client_ip="", sparql="" + ) + state.active["old"] = ActiveQuery( + start_ms=now - 20_000, end_ms=None, client_ip="", sparql="" + ) + reader = LiveLogReader( + log_path=write_log(b""), + state=state, + cut_offset=0, + window_pad_ms=pad, + now_ms=lambda: now, + ) + reader.evict_stale() + assert list(state.active) == ["fresh"] + + +def test_evict_stale_drops_completed_older_than_one_hour(write_log): + state = LiveState() + now = 10_000_000 + state.completed.add( + CompletedQuery( + start_ms=now - LIVE_HORIZON_MS - 1_000, + end_ms=now - LIVE_HORIZON_MS - 500, + duration_ms=500, + status="ok", + start_line_offset=None, + ) + ) + state.completed.add( + CompletedQuery( + start_ms=now - 1_000, + end_ms=now - 500, + duration_ms=500, + status="ok", + start_line_offset=None, + ) + ) + reader = LiveLogReader( + log_path=write_log(b""), + state=state, + cut_offset=0, + window_pad_ms=10_000, + now_ms=lambda: now, + ) + reader.evict_stale() + assert len(state.completed.entries) == 1 + assert state.completed.entries[0].end_ms == now - 500 + + +def test_get_live_metrics_returns_three_rows_when_coverage_spans_an_hour(): + state = LiveState() + state.metrics_known_from_ms = NOW_MS - 65 * MIN_MS + state.completed.add( + CompletedQuery( + start_ms=NOW_MS - 65 * MIN_MS, + end_ms=NOW_MS - 60 * MIN_MS, + duration_ms=5 * MIN_MS, + status="ok", + start_line_offset=None, + ) + ) + state.completed.add(make_completed(NOW_MS - 10 * MIN_MS)) + state.completed.add(make_completed(NOW_MS - 1 * MIN_MS)) + + rows = get_live_metrics(state, slow_threshold_ms=10_000, now_ms=NOW_MS) + assert [row.label for row in rows] == ["last 5m", "last 15m", "last 1h"] + row_5m, row_15m, row_1h = rows + assert row_5m.seen == 1 + assert row_15m.seen == 2 + assert row_1h.seen == 3 + + +def test_get_live_metrics_blanks_windows_past_coverage_start(): + state = LiveState() + state.metrics_known_from_ms = NOW_MS - 11 * MIN_MS + state.completed.add(make_completed(NOW_MS - 11 * MIN_MS)) + state.completed.add(make_completed(NOW_MS - 2 * MIN_MS)) + + row_5m, row_15m, row_1h = get_live_metrics( + state, + slow_threshold_ms=10_000, + now_ms=NOW_MS, + ) + assert row_5m.seen == 1 + assert row_5m.not_ready_message is None + assert row_15m.seen is None + assert row_15m.not_ready_message == "ready in 4m" + assert row_1h.seen is None + assert row_1h.not_ready_message == "ready in 49m" + + +def test_get_live_metrics_masks_everything_before_coverage_is_announced(): + state = LiveState() + state.completed.add(make_completed(NOW_MS - 30 * MIN_MS)) + + row_5m, row_15m, row_1h = get_live_metrics( + state, + slow_threshold_ms=10_000, + now_ms=NOW_MS, + ) + # During boot ramp we don't know when coverage arrives, so no ETA. + for row in (row_5m, row_15m, row_1h): + assert row.seen is None + assert row.not_ready_message is None + + +def test_load_completed_history_loads_pairs_from_history(write_log): + path = write_log( + start_line(1000, "q1") + + end_line(2000, "q1") + + start_line(3000, "q2") + + end_line(4000, "q2", "failed") + ) + state = LiveState() + load_completed_history( + log_path=path, + state=state, + cut_offset=path.stat().st_size, + window_pad_ms=10_000, + now_ms=lambda: 5000, + ) + assert len(state.completed.entries) == 2 + assert state.completed.entries[0].start_ms == 1000 + assert state.completed.entries[0].end_ms == 2000 + assert state.completed.entries[1].status == "failed" + + +def test_load_completed_history_prepends_before_existing_tailer_entries( + write_log, +): + path = write_log(start_line(1000, "old") + end_line(2000, "old")) + state = LiveState() + state.completed.add( + CompletedQuery( + start_ms=5000, + end_ms=6000, + duration_ms=1000, + status="ok", + start_line_offset=None, + ) + ) + load_completed_history( + log_path=path, + state=state, + cut_offset=path.stat().st_size, + window_pad_ms=10_000, + now_ms=lambda: 7000, + ) + assert len(state.completed.entries) == 2 + assert state.completed.entries[0].end_ms == 2000 + assert state.completed.entries[1].end_ms == 6000 + + +def test_load_completed_history_drops_starts_without_a_matching_end(write_log): + path = write_log( + start_line(1000, "q1") + + end_line(2000, "q1") + + start_line(3000, "orphan") + ) + state = LiveState() + load_completed_history( + log_path=path, + state=state, + cut_offset=path.stat().st_size, + window_pad_ms=10_000, + now_ms=lambda: 5000, + ) + assert len(state.completed.entries) == 1 + assert state.completed.entries[0].start_ms == 1000 + + +def test_load_completed_history_recovers_query_straddling_the_hour(write_log): + oldest_wanted = 100_000 + now = LIVE_HORIZON_MS + oldest_wanted + path = write_log( + start_line(91_000, "before") + + end_line(93_000, "before") + + start_line(95_000, "straddler") + + end_line(150_000, "straddler") + + start_line(200_000, "inside") + + end_line(300_000, "inside") + ) + state = LiveState() + load_completed_history( + log_path=path, + state=state, + cut_offset=path.stat().st_size, + window_pad_ms=20_000, + now_ms=lambda: now, + ) + starts = [entry.start_ms for entry in state.completed.entries] + assert starts == [95_000, 200_000] + + +def test_poll_loop_picks_up_lines_appended_to_the_file(write_log): + path = write_log(b"") + state = LiveState() + reader = LiveLogReader( + log_path=path, + state=state, + cut_offset=0, + window_pad_ms=10_000, + poll_interval=0.02, + now_ms=lambda: 0, + ) + stop_event = threading.Event() + + def run_loop(): + with path.open("rb") as stream: + while not stop_event.is_set(): + reader.poll(stream) + stop_event.wait(reader.poll_interval) + + thread = threading.Thread(target=run_loop, daemon=True) + thread.start() + try: + with path.open("ab") as appender: + appender.write(start_line(1000, "q1")) + appender.write(end_line(2000, "q1")) + deadline = time.monotonic() + 2.0 + while time.monotonic() < deadline: + if len(state.completed.entries) == 1: + break + time.sleep(0.02) + assert len(state.completed.entries) == 1 + finally: + stop_event.set() + thread.join() + + +def test_finished_query_is_shown_once_then_skipped(write_log): + """A query that finished before any paint shows once, then is hidden.""" + path = write_log(start_line(1000, "q1", "SELECT a") + end_line(1100, "q1")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + [row] = get_live_query_rows(state, now_ms=5000) + assert row.qid == "q1" + assert row.started_at_ms == 1000 + assert row.sparql == "SELECT a" + # Frozen at end - start, regardless of the clock the caller passes. + assert row.duration_ms == 100 + assert get_live_query_rows(state, now_ms=9999) == [] + + +def test_finished_unseen_entry_survives_eviction_until_shown(write_log): + """Eviction keeps a finished query until a repaint has rendered it.""" + path = write_log(start_line(1000, "q1") + end_line(1100, "q1")) + state = LiveState() + reader = make_reader(path, state, now_ms=1200) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert "q1" in state.active + get_live_query_rows(state, now_ms=1200) + reader.evict_stale() + assert "q1" not in state.active + + +def test_discard_finished_backlog_marks_finished_seen_only(): + """The resume drop suppresses finished entries but keeps running ones.""" + state = LiveState() + state.active["done"] = ActiveQuery( + start_ms=1000, end_ms=1100, client_ip="x", sparql="A" + ) + state.active["running"] = ActiveQuery( + start_ms=1200, end_ms=None, client_ip="y", sparql="B" + ) + discard_finished_backlog(state) + assert state.active["done"].seen is True + assert state.active["running"].seen is False + shown = [row.qid for row in get_live_query_rows(state, now_ms=2000)] + assert shown == ["running"] + + +def test_get_live_query_rows_uses_caller_clock_for_duration(): + """Duration is now_ms - start_ms; the screen passes its display clock.""" + state = LiveState() + state.active["q1"] = ActiveQuery( + start_ms=1000, end_ms=None, client_ip="x", sparql="SELECT a" + ) + state.active["q2"] = ActiveQuery( + start_ms=1500, end_ms=None, client_ip="y", sparql="SELECT b" + ) + rows = {row.qid: row for row in get_live_query_rows(state, now_ms=2000)} + assert rows["q1"].duration_ms == 1000 + assert rows["q2"].duration_ms == 500 + assert rows["q1"].started_at_ms == 1000 + assert rows["q2"].client_ip == "y" diff --git a/test/qlever/test_log_reader.py b/test/qlever/test_log_reader.py new file mode 100644 index 00000000..6c5e67fe --- /dev/null +++ b/test/qlever/test_log_reader.py @@ -0,0 +1,529 @@ +import pytest + +from qlever.monitor import log_reader +from qlever.monitor.log_reader import ( + CompletedQuery, + extract_qid_ip_query, + load_sparql_at, + next_whole_line, + offset_for_ts, + pair_start_end_events, + parse_line, + parse_line_fallback, + peek_ts_ms, + read_first_timestamp, + read_last_timestamp, + scan_range, +) + +START = ( + b'{"ts-ms":1716000000000,"event":"start","qid":"q-8a4f",' + b'"client-ip":"1.2.3.4","query":"SELECT * WHERE { ?s ?p ?o }"}' +) +END = b'{"ts-ms":1716000000050,"event":"end","qid":"q-8a4f","status":"ok"}' + +VALID_STATUSES = ["ok", "failed", "cancelled", "timeout", "unknown"] + + +def test_parse_line_start_has_no_status_and_ignores_query(): + assert parse_line(START) == (1716000000000, "start", "q-8a4f", None) + + +@pytest.mark.parametrize("status", VALID_STATUSES) +def test_parse_line_end_carries_each_valid_status(status): + line = b'{"ts-ms":1,"event":"end","qid":"q1","status":"%s"}' % ( + status.encode() + ) + assert parse_line(line) == (1, "end", "q1", status) + + +def test_parse_line_trailing_newline_tolerated(): + assert parse_line(END + b"\n") == (1716000000050, "end", "q-8a4f", "ok") + + +def test_parse_line_escaped_quote_in_query_does_not_break_qid(): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"client-ip":"::1","query":"FILTER(?x = \\"a\\")"}' + ) + assert parse_line(line) == (1, "start", "q1", None) + + +def test_parse_line_wrong_first_key_is_a_miss(): + assert parse_line(b'{"event":"end","ts-ms":1,"qid":"q1"}') is None + + +def test_parse_line_non_integer_ts_is_a_miss(): + assert parse_line(b'{"ts-ms":1.5,"event":"end","qid":"q1"}') is None + + +def test_parse_line_unknown_event_is_a_miss(): + assert parse_line(b'{"ts-ms":1,"event":"ping","qid":"q1"}') is None + + +def test_parse_line_end_status_outside_closed_set_is_a_miss(): + line = b'{"ts-ms":1,"event":"end","qid":"q1","status":"weird"}' + assert parse_line(line) is None + + +def test_parse_line_missing_qid_is_a_miss(): + assert parse_line(b'{"ts-ms":1,"event":"end","status":"ok"}') is None + + +def test_fallback_recovers_a_line_the_fast_path_rejects(): + # Leading space after `{` defeats the byte prefix but is valid + # JSON, so the fallback still extracts the fields. + line = b'{ "ts-ms":1716000000000,"event":"end","qid":"q1","status":"ok"}' + assert parse_line(line) is None + assert parse_line_fallback(line) == (1716000000000, "end", "q1", "ok") + + +def test_fallback_start_returns_none_status(): + assert parse_line_fallback(START) == ( + 1716000000000, + "start", + "q-8a4f", + None, + ) + + +def test_fallback_malformed_json_returns_none(): + assert parse_line_fallback(b'{"ts-ms":1,') is None + + +def test_fallback_non_object_json_returns_none(): + assert parse_line_fallback(b"[1,2,3]") is None + + +def test_fallback_non_integer_ts_returns_none(): + line = b'{"ts-ms":1.0,"event":"end","qid":"q1","status":"ok"}' + assert parse_line_fallback(line) is None + + +def test_fallback_status_outside_closed_set_returns_none(): + line = b'{"ts-ms":1,"event":"end","qid":"q1","status":"weird"}' + assert parse_line_fallback(line) is None + + +FIRST_LINE = b'{"ts-ms":1000,"event":"start","qid":"q1","query":"SELECT 1"}\n' +SECOND_LINE = b'{"ts-ms":2000,"event":"end","qid":"q1","status":"ok"}\n' +THIRD_LINE = b'{"ts-ms":3000,"event":"start","qid":"q2","query":"SELECT 2"}\n' +SECOND_OFFSET = len(FIRST_LINE) +THIRD_OFFSET = len(FIRST_LINE) + len(SECOND_LINE) + + +def test_next_whole_line_probe_zero_returns_first_line(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE + THIRD_LINE) + assert next_whole_line(log, 0, size) == (0, 1000) + + +def test_next_whole_line_mid_line_aligns_to_following_line(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE + THIRD_LINE) + # A probe inside the first line lands on the second. + assert next_whole_line(log, 10, size) == (SECOND_OFFSET, 2000) + assert next_whole_line(log, SECOND_OFFSET - 1, size) == ( + SECOND_OFFSET, + 2000, + ) + + +def test_next_whole_line_on_a_boundary_advances_to_the_next_line(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE + THIRD_LINE) + # probe > 0 always discards one line, so a probe on the second + # line's start returns the third. + assert next_whole_line(log, SECOND_OFFSET, size) == (THIRD_OFFSET, 3000) + + +def test_next_whole_line_past_eof_returns_none(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE) + assert next_whole_line(log, size, size) is None + + +def test_next_whole_line_trailing_partial_line_returns_none(open_log): + partial = b'{"ts-ms":4000,"event":"end","qid":"q3"' + log, size = open_log(FIRST_LINE + partial) + # The only line after the probe has no newline, so it is incomplete. + assert next_whole_line(log, 5, size) is None + + +def build_log(timestamps): + """Bytes for an ascending-ts log plus each line's start offset.""" + data = bytearray() + offsets = [] + for i, ts in enumerate(timestamps): + offsets.append(len(data)) + data += b'{"ts-ms":%d,"event":"end","qid":"q%d","status":"ok"}\n' % ( + ts, + i, + ) + return bytes(data), offsets + + +def first_ts_at_or_after(log, offset, target): + """ts of the first line at/after offset, or None if every line older.""" + log.seek(offset) + for line in log: + ts = peek_ts_ms(line) + if ts >= target: + return ts + return None + + +def test_offset_for_ts_empty_file_returns_zero(open_log): + log, size = open_log(b"") + assert offset_for_ts(log, 100, size) == 0 + + +def test_offset_for_ts_target_before_first_line_returns_zero(open_log): + data, _ = build_log([1000, 1010, 1020]) + log, size = open_log(data) + assert offset_for_ts(log, 500, size) == 0 + + +def test_offset_for_ts_target_equal_to_first_line_returns_zero(open_log): + data, _ = build_log([1000, 1010, 1020]) + log, size = open_log(data) + # first_ts >= target means every line qualifies; start at 0. + assert offset_for_ts(log, 1000, size) == 0 + + +@pytest.mark.parametrize("target", [1015, 1020, 1099, 1500, 1900]) +def test_offset_for_ts_lands_at_or_before_the_boundary( + open_log, monkeypatch, target +): + # Tiny gallop step so the backward gallop actually iterates here. + monkeypatch.setattr(log_reader, "GALLOP_START", 16) + timestamps = list(range(1000, 2000, 10)) + data, offsets = build_log(timestamps) + log, size = open_log(data) + + result = offset_for_ts(log, target, size) + + # Never overshoots: the returned line is not newer than target, so + # no line with ts >= target was skipped. + boundary = next(o for o, ts in zip(offsets, timestamps) if ts >= target) + assert result <= boundary + assert first_ts_at_or_after(log, result, target) == next( + ts for ts in timestamps if ts >= target + ) + + +def test_offset_for_ts_target_past_last_line_yields_empty_scan( + open_log, monkeypatch +): + monkeypatch.setattr(log_reader, "GALLOP_START", 16) + data, _ = build_log(list(range(1000, 2000, 10))) + log, size = open_log(data) + + result = offset_for_ts(log, 9999, size) + + # Near EOF, not necessarily file_size, but a forward scan from it + # finds nothing at or after the target: the same empty result. + assert first_ts_at_or_after(log, result, 9999) is None + + +def test_scan_range_yields_each_whole_line_with_its_offset(open_log): + data, offsets = build_log([1000, 1010, 1020]) + log, size = open_log(data) + assert list(scan_range(log, 0, size)) == [ + ((1000, "end", "q0", "ok"), offsets[0]), + ((1010, "end", "q1", "ok"), offsets[1]), + ((1020, "end", "q2", "ok"), offsets[2]), + ] + + +def test_scan_range_includes_the_line_straddling_hi_bound(open_log): + data, offsets = build_log([1000, 1010, 1020]) + log, size = open_log(data) + # hi_bound falls inside the second line. The second line starts + # before it so it is still emitted; the third starts past it. + result = list(scan_range(log, 0, offsets[1] + 3)) + assert [ts for (ts, _, _, _), _ in result] == [1000, 1010] + + +def test_scan_range_stops_before_a_trailing_partial_line(open_log): + data, _ = build_log([1000]) + log, size = open_log(data + b'{"ts-ms":2000,"event":"end","qid":"q9"') + assert list(scan_range(log, 0, size)) == [ + ((1000, "end", "q0", "ok"), 0), + ] + + +def test_scan_range_skips_a_malformed_line_and_continues(open_log): + good = b'{"ts-ms":1000,"event":"end","qid":"q0","status":"ok"}\n' + junk = b"not json at all\n" + tail = b'{"ts-ms":1020,"event":"end","qid":"q2","status":"ok"}\n' + log, size = open_log(good + junk + tail) + assert list(scan_range(log, 0, size)) == [ + ((1000, "end", "q0", "ok"), 0), + ((1020, "end", "q2", "ok"), len(good) + len(junk)), + ] + + +def test_scan_range_recovers_a_line_via_the_json_fallback(open_log): + # Leading space defeats the fast byte path but is valid JSON. + line = b'{ "ts-ms":2000,"event":"end","qid":"q1","status":"ok"}\n' + log, size = open_log(line) + assert list(scan_range(log, 0, size)) == [ + ((2000, "end", "q1", "ok"), 0), + ] + + +def test_scan_range_from_eof_yields_nothing(open_log): + data, _ = build_log([1000, 1010]) + log, size = open_log(data) + assert list(scan_range(log, size, size)) == [] + + +def test_pair_start_end_events_empty_input_yields_empty_outputs(): + completed, still_open = pair_start_end_events(iter([])) + assert completed == [] + assert still_open == {} + + +def test_pair_start_end_events_clean_pair(): + events = [ + ((1000, "start", "q1", None), 0), + ((1050, "end", "q1", "ok"), 100), + ] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [ + CompletedQuery( + start_ms=1000, + end_ms=1050, + duration_ms=50, + status="ok", + start_line_offset=0, + ) + ] + assert still_open == {} + + +def test_pair_start_end_events_start_without_end_stays_open(): + events = [((1000, "start", "q1", None), 42)] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [] + assert still_open == {"q1": (1000, 42)} + + +def test_pair_start_end_events_end_without_start_is_dropped(): + events = [((1050, "end", "q1", "ok"), 0)] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [] + assert still_open == {} + + +def test_pair_start_end_events_interleaved_queries_pair_in_end_order(): + events = [ + ((1000, "start", "qA", None), 0), + ((1010, "start", "qB", None), 100), + ((1020, "end", "qB", "ok"), 200), + ((1030, "end", "qA", "failed"), 300), + ] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [ + CompletedQuery( + start_ms=1010, + end_ms=1020, + duration_ms=10, + status="ok", + start_line_offset=100, + ), + CompletedQuery( + start_ms=1000, + end_ms=1030, + duration_ms=30, + status="failed", + start_line_offset=0, + ), + ] + assert still_open == {} + + +def test_pair_start_end_events_pairs_by_qid_not_file_order(): + # qA's end is written before qB's start (timestamps out of order), + # but pairing is by qid so each query gets its own duration. + events = [ + ((1000, "start", "qA", None), 0), + ((1100, "start", "qB", None), 100), + ((1050, "end", "qA", "ok"), 200), + ((1150, "end", "qB", "ok"), 300), + ] + completed, _ = pair_start_end_events(iter(events)) + assert completed == [ + CompletedQuery( + start_ms=1000, + end_ms=1050, + duration_ms=50, + status="ok", + start_line_offset=0, + ), + CompletedQuery( + start_ms=1100, + end_ms=1150, + duration_ms=50, + status="ok", + start_line_offset=100, + ), + ] + + +@pytest.mark.parametrize("status", VALID_STATUSES) +def test_pair_start_end_events_status_flows_through(status): + events = [ + ((1000, "start", "q1", None), 0), + ((1050, "end", "q1", status), 100), + ] + completed, _ = pair_start_end_events(iter(events)) + assert completed[0].status == status + + +def test_extract_qid_ip_query_returns_all_three_fields(): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' + ) + assert extract_qid_ip_query(line) == ( + "q1", + "1.2.3.4", + "SELECT * WHERE { ?s ?p ?o }", + ) + + +def test_extract_qid_ip_query_handles_escaped_quote_in_query(): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\" }"}\n' + ) + assert extract_qid_ip_query(line) == ( + "q1", + "1.2.3.4", + 'SELECT ?x WHERE { ?x ?p "a" }', + ) + + +def test_extract_qid_ip_query_missing_client_ip_falls_back_to_empty(): + # Older log lines written before the client-ip field existed. + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' + ) + assert extract_qid_ip_query(line) == ( + "q1", + "", + "SELECT * WHERE { ?s ?p ?o }", + ) + + +def test_extract_qid_ip_query_malformed_json_returns_empty(): + assert extract_qid_ip_query(b"this is not json\n") == ("", "", "") + + +def test_load_sparql_at_returns_qid_ip_and_query(open_log): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' + ) + log, _ = open_log(line) + assert load_sparql_at(log, 0) == ( + "q1", + "1.2.3.4", + "SELECT * WHERE { ?s ?p ?o }", + ) + + +def test_load_sparql_at_handles_escaped_quotes_and_backslashes(open_log): + # The JSON value encodes a literal " and a literal \ in the query. + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\\\b\\" }"}\n' + ) + log, _ = open_log(line) + assert load_sparql_at(log, 0) == ( + "q1", + "1.2.3.4", + 'SELECT ?x WHERE { ?x ?p "a\\b" }', + ) + + +def test_load_sparql_at_handles_unicode(open_log): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"SELECT ?s WHERE { ?s ?p \xe2\x98\x83 }"}\n' + ) + log, _ = open_log(line) + assert load_sparql_at(log, 0) == ( + "q1", + "1.2.3.4", + "SELECT ?s WHERE { ?s ?p ☃ }", + ) + + +def test_load_sparql_at_malformed_json_returns_empty(open_log): + log, _ = open_log(b"this is not json\n") + assert load_sparql_at(log, 0) == ("", "", "") + + +def test_load_sparql_at_reads_the_line_at_the_given_offset(open_log): + first = ( + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"FIRST"}\n' + ) + second = ( + b'{"ts-ms":2,"event":"start","qid":"q2","client-ip":"5.6.7.8",' + b'"query":"SECOND"}\n' + ) + log, _ = open_log(first + second) + assert load_sparql_at(log, len(first)) == ("q2", "5.6.7.8", "SECOND") + + +def test_read_last_timestamp_empty_file_returns_none(open_log): + log, size = open_log(b"") + assert read_last_timestamp(log, size) is None + + +def test_read_last_timestamp_returns_last_complete_line_ts(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE) + assert read_last_timestamp(log, size) == 2000 + + +def test_read_last_timestamp_skips_trailing_partial_line(open_log): + partial = b'{"ts-ms":3000,"event":"end","qid":"q2","status' + log, size = open_log(FIRST_LINE + SECOND_LINE + partial) + assert read_last_timestamp(log, size) == 2000 + + +def test_read_last_timestamp_grows_buffer_for_long_lines(open_log): + long_query = b"x" * (40 * 1024) + line = ( + b'{"ts-ms":1000,"event":"start","qid":"q1",' + b'"query":"' + long_query + b'"}\n' + ) + log, size = open_log(line) + assert read_last_timestamp(log, size) == 1000 + + +def test_read_first_timestamp_empty_file_returns_none(open_log): + log, size = open_log(b"") + assert read_first_timestamp(log, size) is None + + +def test_read_first_timestamp_returns_first_complete_line_ts(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE) + assert read_first_timestamp(log, size) == 1000 + + +def test_read_first_timestamp_single_line(open_log): + log, size = open_log(FIRST_LINE) + assert read_first_timestamp(log, size) == 1000 + + +def test_read_first_timestamp_skips_malformed_first_line(open_log): + log, size = open_log(b"not a log line\n" + SECOND_LINE) + assert read_first_timestamp(log, size) == 2000 + + +def test_read_first_timestamp_unterminated_only_line_returns_none(open_log): + partial = b'{"ts-ms":1000,"event":"start","qid":"q1"' + log, size = open_log(partial) + assert read_first_timestamp(log, size) is None