From 6daa1845dc816dbb8532caa46f923ad1be7375ad Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Fri, 15 May 2026 18:47:23 +0200 Subject: [PATCH 01/26] Build the `monitor-queries` ui v3 using python textual with fake data (incomplete) --- src/qlever/commands/benchmark_queries.py | 44 ++--- src/qlever/commands/monitor_queries.py | 109 +++++++++++ src/qlever/monitor/__init__.py | 0 src/qlever/monitor/app.py | 123 ++++++++++++ src/qlever/monitor/historic_stubs.py | 93 ++++++++++ src/qlever/monitor/live_stubs.py | 105 +++++++++++ src/qlever/monitor/models.py | 85 +++++++++ src/qlever/monitor/monitor.tcss | 175 ++++++++++++++++++ src/qlever/monitor/util.py | 88 +++++++++ src/qlever/monitor/views/__init__.py | 0 src/qlever/monitor/views/historic.py | 66 +++++++ src/qlever/monitor/views/live.py | 56 ++++++ src/qlever/monitor/widgets/__init__.py | 0 src/qlever/monitor/widgets/controls_row.py | 29 +++ src/qlever/monitor/widgets/header_row.py | 34 ++++ src/qlever/monitor/widgets/metrics_row.py | 106 +++++++++++ src/qlever/monitor/widgets/mode_picker.py | 35 ++++ src/qlever/monitor/widgets/nav_pill.py | 30 +++ src/qlever/monitor/widgets/query_table.py | 84 +++++++++ src/qlever/monitor/widgets/selected_window.py | 23 +++ src/qlever/monitor/widgets/sparql_pane.py | 116 ++++++++++++ src/qlever/monitor/widgets/status_row.py | 32 ++++ src/qlever/monitor/widgets/timeline.py | 139 ++++++++++++++ src/qlever/monitor/widgets/window_stepper.py | 26 +++ src/qlever/util.py | 29 +++ 25 files changed, 1594 insertions(+), 33 deletions(-) create mode 100644 src/qlever/commands/monitor_queries.py create mode 100644 src/qlever/monitor/__init__.py create mode 100644 src/qlever/monitor/app.py create mode 100644 src/qlever/monitor/historic_stubs.py create mode 100644 src/qlever/monitor/live_stubs.py create mode 100644 src/qlever/monitor/models.py create mode 100644 src/qlever/monitor/monitor.tcss create mode 100644 src/qlever/monitor/util.py create mode 100644 src/qlever/monitor/views/__init__.py create mode 100644 src/qlever/monitor/views/historic.py create mode 100644 src/qlever/monitor/views/live.py create mode 100644 src/qlever/monitor/widgets/__init__.py create mode 100644 src/qlever/monitor/widgets/controls_row.py create mode 100644 src/qlever/monitor/widgets/header_row.py create mode 100644 src/qlever/monitor/widgets/metrics_row.py create mode 100644 src/qlever/monitor/widgets/mode_picker.py create mode 100644 src/qlever/monitor/widgets/nav_pill.py create mode 100644 src/qlever/monitor/widgets/query_table.py create mode 100644 src/qlever/monitor/widgets/selected_window.py create mode 100644 src/qlever/monitor/widgets/sparql_pane.py create mode 100644 src/qlever/monitor/widgets/status_row.py create mode 100644 src/qlever/monitor/widgets/timeline.py create mode 100644 src/qlever/monitor/widgets/window_stepper.py diff --git a/src/qlever/commands/benchmark_queries.py b/src/qlever/commands/benchmark_queries.py index 9cb43f34..737be31b 100644 --- a/src/qlever/commands/benchmark_queries.py +++ b/src/qlever/commands/benchmark_queries.py @@ -20,37 +20,12 @@ from qlever.command import QleverCommand from qlever.commands.clear_cache import ClearCacheCommand from qlever.commands.ui import dict_to_yaml -from qlever.containerize import Containerize from qlever.log import log, mute_log -from qlever.util import run_command, run_curl_command - - -def pretty_printed_query( - query: str, show_prefixes: bool, system: str = "docker" -) -> str: - """ - Pretty-print a SPARQL query using the sparql-formatter Docker image. - Optionally strips PREFIX declarations from the output. - Argument `system` can either be docker or podman. - """ - if system not in Containerize.supported_systems(): - system = "docker" - remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else "" - pretty_print_query_cmd = ( - f"echo {shlex.quote(query)}" - f" | {system} run -i --rm docker.io/sparqling/sparql-formatter" - f"{remove_prefixes_cmd} | grep -v '^$'" - ) - try: - query_pretty_printed = run_command( - pretty_print_query_cmd, return_output=True - ) - return query_pretty_printed.rstrip() - except Exception as e: - log.debug( - f"Failed to pretty-print query, returning original query: {e}" - ) - return query.rstrip() +from qlever.util import ( + pretty_printed_query, + run_command, + run_curl_command, +) def sparql_query_type(query: str) -> str: @@ -1005,7 +980,8 @@ def execute(self, args) -> bool: colored( pretty_printed_query( query, args.show_prefixes, args.system - ), + ) + or query, "cyan", ) ) @@ -1107,7 +1083,8 @@ def execute(self, args) -> bool: description=description, query=pretty_printed_query( query, args.show_prefixes, args.system - ), + ) + or query, client_time=time_seconds, result=query_results, result_size=result_length, @@ -1166,7 +1143,8 @@ def execute(self, args) -> bool: colored( pretty_printed_query( query, args.show_prefixes, args.system - ), + ) + or query, "cyan", ) ) diff --git a/src/qlever/commands/monitor_queries.py b/src/qlever/commands/monitor_queries.py new file mode 100644 index 00000000..c92ae779 --- /dev/null +++ b/src/qlever/commands/monitor_queries.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +from pathlib import Path + +from qlever.command import QleverCommand +from qlever.log import log +from qlever.monitor.app import MonitorQueriesApp + + +class MonitorQueriesCommand(QleverCommand): + """ + Class for executing the `monitor-queries-tui` command. + """ + + def __init__(self): + pass + + def description(self) -> str: + return ( + "Show the currently active and historically all the active queries " + "on the server (interactive TUI)" + ) + + def should_have_qleverfile(self) -> bool: + return False + + def relevant_qleverfile_arguments(self) -> dict[str, list[str]]: + return { + "data": ["name"], + "server": ["host_name", "port", "timeout"], + "runtime": ["system"], + } + + def additional_arguments(self, subparser) -> None: + subparser.add_argument( + "--sparql-endpoint", + type=str, + help="URL of the SPARQL endpoint (default = {host_name}:{port})", + ) + subparser.add_argument( + "--qlever-server-log", + type=Path, + help="The `qlever-server` log file (default = {name}.server-log.txt)", + ) + subparser.add_argument( + "--warn-after", + type=int, + default=None, + help="Duration in seconds after which an active query is logged" + " as slow (default = server timeout - 10s)", + ) + subparser.add_argument( + "--warning-log", + type=Path, + help="File to append slow-query warnings to" + " (default = {name}.slow-queries.tsv)", + ) + subparser.add_argument( + "--screen_refresh_s", + type=float, + default=0.5, + help="Duration in seconds after which the live view is refreshed " + "with the new queries from server log file. This only affects the " + "screen refresh and not the interval at which the queries are " + "read from the log file (default = 0.5s)", + ) + + def execute(self, args) -> bool: + if not args.qlever_server_log: + args.qlever_server_log = Path.cwd() / f"{args.name}.server-log.txt" + show_msg = ( + f"Reading server logs from {args.qlever_server_log} to display the " + "currently active queries on the server" + ) + self.show(show_msg, only_show=args.show) + if args.show: + return True + + timeout_s = 30 + if args.warn_after is None: + try: + timeout_s = int(args.timeout.rstrip("s")) + except ValueError: + log.error( + f"Could not parse server timeout {args.timeout!r};" + " pass --warn-after explicitly" + ) + return False + args.warn_after = max(1, timeout_s - 10) + if args.warning_log is None: + args.warning_log = Path.cwd() / f"{args.name}.slow-queries.tsv" + + repaint_interval = max(0.5, args.screen_refresh_s) + sparql_endpoint = ( + args.sparql_endpoint + if args.sparql_endpoint + else f"{args.host_name}:{args.port}" + ) + + MonitorQueriesApp( + log_file=args.qlever_server_log, + sparql_endpoint=sparql_endpoint, + timeout=timeout_s, + warn_after=args.warn_after, + warning_log=args.warning_log, + repaint_interval=repaint_interval, + system=args.system, + ).run() + return True diff --git a/src/qlever/monitor/__init__.py b/src/qlever/monitor/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py new file mode 100644 index 00000000..a1d363b1 --- /dev/null +++ b/src/qlever/monitor/app.py @@ -0,0 +1,123 @@ +from __future__ import annotations + +from pathlib import Path + +from textual import work +from textual.app import App + +from qlever.monitor.util import clipboard_install_hint, copy_text +from qlever.monitor.views.historic import HistoricScreen +from qlever.monitor.views.live import LiveScreen +from qlever.monitor.widgets.sparql_pane import SparqlPane +from qlever.util import pretty_printed_query + + +class MonitorQueriesApp(App): + """ + Textual app for the interactive `monitor-queries` TUI + + The app has two tabs: + 1. Live view which tails an active `qlever-server` metrics.log file to + show currently-active queries on the server plus rolling metrics + 2. Historic view which parses the log file in a given time window for + active queries and metrics. + + In both tabs, the user has the option of selecting a particular query and + view the full SPARQL query with syntax highlighting plus copy it. + """ + + CSS_PATH = "monitor.tcss" + + SCREENS = {"live": LiveScreen, "historic": HistoricScreen} + + BINDINGS = [ + ("q", "quit", "Quit/Exit"), + ("t", "cycle_themes", "Change theme"), + ("y", "copy_query", "Copy SPARQL"), + ("p", "pretty_print", "Pretty print"), + ] + + def __init__( + self, + log_file: Path, + sparql_endpoint: str, + timeout: int, + warn_after: int, + warning_log: Path, + repaint_interval: float = 0.5, + system: str = "docker", + ) -> None: + super().__init__() + self.log_file = log_file + self.sparql_endpoint = sparql_endpoint + self.timeout = timeout + self.warn_after = warn_after + self.warning_log = warning_log + self.repaint_interval = repaint_interval + self.system = system + + def on_mount(self) -> None: + """Open the Live screen on startup.""" + self.push_screen("live") + + def action_swap_screen(self) -> None: + """Toggle between Live and Historic (bound to Tab on each screen).""" + target = "historic" if isinstance(self.screen, LiveScreen) else "live" + self.switch_screen(target) + + def action_copy_query(self) -> None: + """Copy the displayed query's SPARQL to the system clipboard.""" + pane = self.screen.query_one(SparqlPane) + if pane.displayed_text is None: + self.notify("No query selected", severity="warning") + return + result = copy_text(pane.displayed_text) + if result is True: + self.notify("SPARQL copied to clipboard") + elif result is None: + self.notify( + f"No clipboard tool found: {clipboard_install_hint()}", + severity="warning", + ) + else: + self.notify("Clipboard tool failed", severity="error") + + def action_pretty_print(self) -> None: + """Toggle pretty-printed SPARQL in the pane for the selected query.""" + pane = self.screen.query_one(SparqlPane) + if pane.content is None: + self.notify("No query selected", severity="warning") + return + if pane.show_pretty: + pane.show_pretty = False + return + pane.show_pretty = True + # A cached result means the formatter already ran for this query. + if pane.pretty_text is None: + self.pretty_print_worker(pane, pane.content.sparql_text) + + @work(thread=True, exclusive=True) + def pretty_print_worker(self, pane: SparqlPane, raw: str) -> None: + """Run the blocking sparql-formatter off the UI thread.""" + result = pretty_printed_query( + raw, show_prefixes=True, system=self.system + ) + self.call_from_thread(self.apply_pretty_result, pane, raw, result) + + def apply_pretty_result( + self, pane: SparqlPane, raw: str, result: str | None + ) -> None: + """Apply the formatter output, unless the selection moved on.""" + if pane.content is None or pane.content.sparql_text != raw: + return + if result is None: + pane.show_pretty = False + self.notify("Could not pretty-print this query", severity="error") + return + pane.pretty_text = result + + def action_cycle_themes(self) -> None: + """Select the next theme on press of `t` binding""" + themes = [t for t in self.available_themes if "ansi" not in t] + selected_theme_idx = themes.index(self.theme) + self.theme = themes[(selected_theme_idx + 1) % len(themes)] diff --git a/src/qlever/monitor/historic_stubs.py b/src/qlever/monitor/historic_stubs.py new file mode 100644 index 00000000..1e2c4131 --- /dev/null +++ b/src/qlever/monitor/historic_stubs.py @@ -0,0 +1,93 @@ +"""Stub data for the Historic screen. + +Replaced later by a real log-reader module exposing the same functions. +Only the data the current widgets consume is defined here. +""" + +import time + +from qlever.monitor.models import ( + ControlsState, + HistoricQueryRow, + MetricsCounts, + TimelineBounds, +) + + +def get_controls_state() -> ControlsState: + """Current window size, mode, and selected time range on Historic.""" + end_ms = int(time.time() * 1000) + return ControlsState( + window_size="15m", + mode="ACTIVE", + start_ms=end_ms - 15 * 60 * 1000, + end_ms=end_ms, + ) + + +def get_timeline_bounds() -> TimelineBounds: + """Full log span plus the slice the current window covers. + + The window sits near the end of the log so the marked block is + visibly offset rather than spanning the whole bar. + """ + now_ms = int(time.time() * 1000) + window_ms = 15 * 60 * 1000 + return TimelineBounds( + log_start_ms=now_ms - 52 * 60 * 1000, + log_end_ms=now_ms, + window_start_ms=now_ms - window_ms, + window_end_ms=now_ms, + ) + + +def get_historic_query_rows() -> list[HistoricQueryRow]: + """Finished queries matching the current mode in the current window.""" + start_ms = int(time.time() * 1000) - 15 * 60 * 1000 + samples = [ + (0, 42_000, "ok", "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 1000"), + (103_000, 8_000, "ok", "SELECT (COUNT(*) AS ?c) WHERE { ?s ?p ?o }"), + ( + 210_000, + 30_000, + "timeout", + "SELECT ?x WHERE { ?x rdfs:label ?l } ORDER BY ?l", + ), + ( + 354_000, + 1_200, + "failed", + "CONSTRUCT { ?x rdfs:label ?l } WHERE { ?x rdfs:label ?l }", + ), + ( + 601_000, + 5_500, + "cancelled", + "SELECT ?p (COUNT(*) AS ?n) WHERE { ?s ?p ?o } GROUP BY ?p", + ), + ] + return [ + HistoricQueryRow( + qid=f"q-{i:04x}", + started_at_ms=start_ms + offset_ms, + duration_ms=duration_ms, + status=status, + sparql=sparql, + ) + for i, (offset_ms, duration_ms, status, sparql) in enumerate(samples) + ] + + +def get_historic_metrics() -> MetricsCounts: + """Single metrics line for the current window; label is its width.""" + return MetricsCounts( + label="15m", + seen=488, + ok=466, + failed=11, + timeout=2, + cancelled=9, + p50=120, + p95=1840, + slow=4, + ) diff --git a/src/qlever/monitor/live_stubs.py b/src/qlever/monitor/live_stubs.py new file mode 100644 index 00000000..db135517 --- /dev/null +++ b/src/qlever/monitor/live_stubs.py @@ -0,0 +1,105 @@ +"""Stub data for the Live screen. + +Replaced later by a real log-reader module exposing the same functions. +UI widgets call these getters rather than reading module-level constants, +so the swap is mechanical. +""" + +import time + +from qlever.monitor.models import LiveQueryRow, LiveSubtitle, MetricsCounts + + +def get_live_subtitle() -> LiveSubtitle: + """Current Live-screen subtitle state.""" + return LiveSubtitle(state="reachable", n_active=3) + + +def get_live_metrics() -> list[MetricsCounts]: + """Three rolling-window metric rows shown on Live.""" + return [ + MetricsCounts( + label="last 5m", + seen=142, + ok=125, + failed=3, + timeout=0, + cancelled=14, + p50=48, + p95=3_900, + slow=14, + ), + MetricsCounts( + label="last 15m", + seen=438, + ok=394, + failed=11, + timeout=2, + cancelled=31, + p50=52, + p95=4_100, + slow=31, + ), + MetricsCounts( + label="last 1h", + seen=None, + ok=None, + failed=None, + timeout=None, + cancelled=None, + p50=None, + p95=None, + slow=None, + ), + ] + + +def get_live_query_rows() -> list[LiveQueryRow]: + """Currently active queries (a start with no matching end).""" + now_ms = int(time.time() * 1000) + return [ + LiveQueryRow( + qid="q-8a4f12345678900", + ts_ms=now_ms - 18_000, + sparql=( + "PREFIX foaf: \n" + "PREFIX rdf: \n" + "SELECT ?person ?name (COUNT(?friend) AS ?friends)\n" + "WHERE {\n" + " ?person rdf:type foaf:Person ;\n" + " foaf:name ?name ;\n" + " foaf:knows ?friend .\n" + " ?friend foaf:name ?friendName .\n" + ' FILTER (STRLEN(?name) > 3 && LANG(?name) = "en")\n' + "}\n" + "GROUP BY ?person ?name\n" + "HAVING (COUNT(?friend) > 5)\n" + "ORDER BY DESC(?friends) ?name\n" + "LIMIT 100" + ), + ), + LiveQueryRow( + qid="q-8a52242534636", + ts_ms=now_ms - 5_000, + sparql="SELECT (COUNT(*) AS ?c) WHERE { ?s ?p ?o }", + ), + LiveQueryRow( + qid="q-8a5b4356476474567456", + ts_ms=now_ms - 2_000, + sparql=( + "PREFIX rdfs: \n" + "PREFIX geo: \n" + "CONSTRUCT {\n" + " ?city rdfs:label ?label ;\n" + " geo:hasGeometry ?geom .\n" + "}\n" + "WHERE {\n" + " ?city rdf:type :City ;\n" + " rdfs:label ?label ;\n" + " geo:hasGeometry ?geom .\n" + ' FILTER (lang(?label) = "en")\n' + " FILTER EXISTS { ?city :population ?pop . FILTER (?pop > 100000) }\n" + "}" + ), + ), + ] diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py new file mode 100644 index 00000000..cbfbc1df --- /dev/null +++ b/src/qlever/monitor/models.py @@ -0,0 +1,85 @@ +"""Shared data models for the monitor-queries TUI. + +These frozen dataclasses are the contract between the data layer and the +UI. Widgets render them; the stub modules (and later the real log-reader) +produce them. Neither side imports the other: both depend on this module, +so swapping stubs for the real reader never touches a widget. +""" + +from dataclasses import dataclass + + +@dataclass(frozen=True) +class LiveSubtitle: + state: str + n_active: int | None + + +@dataclass(frozen=True) +class MetricsCounts: + label: str + seen: int | None + ok: int | None + failed: int | None + timeout: int | None + cancelled: int | None + p50: int | None + p95: int | None + slow: int | None + + +@dataclass(frozen=True) +class LiveQueryRow: + qid: str + ts_ms: int + sparql: str + + +@dataclass(frozen=True) +class HistoricQueryRow: + """A finished query in the current window. + + Unlike LiveQueryRow these carry a terminal `status` and a stored + `duration_ms` (not an elapsed `now - ts`). + """ + + qid: str + started_at_ms: int + duration_ms: int + status: str + sparql: str + + +@dataclass(frozen=True) +class SparqlContent: + """What the SparqlPane renders for the row under the table cursor. + + `status` is filled at the screen seam: Live passes None (active + queries have no terminal status), Historic passes the real status. + """ + + qid: str + started_at_ms: int + status: str | None + sparql_text: str + + +@dataclass(frozen=True) +class TimelineBounds: + """The full log span and the slice the window currently covers. + + The Timeline maps these epochs (ms) to bar positions at render. + """ + + log_start_ms: int + log_end_ms: int + window_start_ms: int + window_end_ms: int + + +@dataclass(frozen=True) +class ControlsState: + window_size: str + mode: str + start_ms: int + end_ms: int diff --git a/src/qlever/monitor/monitor.tcss b/src/qlever/monitor/monitor.tcss new file mode 100644 index 00000000..63ad2ca5 --- /dev/null +++ b/src/qlever/monitor/monitor.tcss @@ -0,0 +1,175 @@ +/* HeaderRow */ + +HeaderRow { + height: 1; + background: $primary; + color: $text; +} + +.slot-left { + width: auto; + min-width: 15; +} + +.slot-center { + width: 1fr; + text-align: center; + text-style: bold; +} + +.slot-right { + width: auto; + min-width: 15; +} + +/* NavPill */ + +NavPill { + padding: 0 1; + background: $accent; + color: $text; + text-style: bold; + text-align: center; +} + +NavPill:hover { + background: $accent-lighten-2; +} + +/* LiveStatusRow */ + +LiveStatusRow { + height: 1; + width: 100%; + text-align: center; + text-style: italic; + margin-bottom: 1; + color: $text; +} + + +/* MetricsRow */ + +MetricsRow { + height: 3; + margin-bottom: 1; + margin-left: 1; +} + +/* Timeline (Historic only) */ + +Timeline { + height: 2; + margin-bottom: 1; + margin-left: 1; + margin-right: 1; +} + +/* Historic's MetricsRow is a single line; the global rule above + reserves 3 rows for Live's rolling windows. */ + +HistoricScreen MetricsRow { + height: auto; +} + +/* query tables (Live + Historic) */ + +QueryTable { + height: 1fr; +} + +QueryTable > .datatable--cursor { + background: $secondary; + color: $text; +} + +/* table status line, flush under the table */ + +#table-status { + height: 1; + margin-bottom: 1; + margin-left: 1; +} + +/* SparqlPane */ + +SparqlPane { + height: 1fr; + margin-bottom: 1; + margin-left: 1; + margin-right: 1; +} + +#sparql-header { + height: 1; + margin-bottom: 1; +} + +#sparql-scroll { + height: 1fr; +} + +#sparql-body { + width: 1fr; + height: auto; +} + +/* HistoricControlsRow */ + +HistoricControlsRow { + height: 1; + margin-bottom: 1; + width: 100%; +} + +HistoricControlsRow > WindowStepper, +HistoricControlsRow > ModePicker { + width: 1fr; + height: 1; + align-horizontal: center; +} + +HistoricControlsRow > SelectedWindow { + width: 1fr; + height: 1; + text-align: center; +} + +WindowStepper > Static, +ModePicker > Static { + width: auto; +} + +.stepper-caption, +.picker-caption { + text-style: bold; + margin-right: 1; +} + +.stepper-arrow { + padding: 0 1; +} + +#window-size { + padding: 0 1; + text-style: bold; +} + +.picker-bracket { + padding: 0 1; +} + +.picker-segment { + padding: 0 1; + color: $text-muted; +} + +.picker-sep { + color: $text-muted; +} + +.picker-segment.-selected { + background: $accent; + color: $text; + text-style: bold; +} diff --git a/src/qlever/monitor/util.py b/src/qlever/monitor/util.py new file mode 100644 index 00000000..09354f3b --- /dev/null +++ b/src/qlever/monitor/util.py @@ -0,0 +1,88 @@ +"""Small shared helpers for the monitor-queries TUI widgets.""" + +from __future__ import annotations + +import os +import platform +import shutil +import subprocess +from datetime import datetime + + +def format_timestamp(ms: int) -> str: + """Render an epoch (ms) as a full local date and time.""" + return datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S") + + +def format_clock(ms: int) -> str: + """Render an epoch (ms) as a local wall-clock time.""" + return datetime.fromtimestamp(ms / 1000).strftime("%H:%M:%S") + + +def truncate(text: str, max_len: int) -> str: + """Trim text to max_len with an ellipsis.""" + if len(text) <= max_len: + return text + return text[: max_len - 1] + "…" + + +def clipboard_commands() -> list[list[str]]: + """Pick clipboard CLIs available on this platform, in priority order.""" + system = platform.system() + if system == "Darwin": + return [["pbcopy"]] + if system != "Linux": + return [] + + # On Wayland, never fall through to xclip/xsel: they write to the + # XWayland selection, which Wayland-native apps don't read. + on_wayland = bool(os.environ.get("WAYLAND_DISPLAY")) + if on_wayland and shutil.which("wl-copy"): + # Force text/plain so wl-copy doesn't tag SPARQL starting with + # `PREFIX foo: ` as a URI-ish MIME type. + return [["wl-copy", "--type", "text/plain"]] + + cmds = [] + if shutil.which("xclip"): + cmds.append(["xclip", "-selection", "clipboard", "-t", "UTF8_STRING"]) + if shutil.which("xsel"): + cmds.append(["xsel", "--clipboard", "--input"]) + return cmds + + +def clipboard_install_hint() -> str: + """Platform-specific suggestion for installing a clipboard tool.""" + system = platform.system() + if system == "Darwin": + return "pbcopy should already be available on macOS" + if system != "Linux": + return f"no clipboard tool support for {system}" + if os.environ.get("WAYLAND_DISPLAY"): + return "install wl-clipboard (provides wl-copy)" + return "install xclip or xsel" + + +def copy_text(text: str) -> bool | None: + """Copy text to the system clipboard. + + Returns True on success, False if a tool ran but failed, None if no + clipboard tool is available on this system. + """ + cmds = clipboard_commands() + if not cmds: + return None + payload = text.encode("utf-8") + for cmd in cmds: + try: + proc = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + proc.communicate(input=payload, timeout=2) + except (subprocess.TimeoutExpired, OSError): + continue + if proc.returncode == 0: + return True + return False diff --git a/src/qlever/monitor/views/__init__.py b/src/qlever/monitor/views/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py new file mode 100644 index 00000000..588cc4c9 --- /dev/null +++ b/src/qlever/monitor/views/historic.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.binding import Binding +from textual.screen import Screen +from textual.widgets import Footer, Static + +from qlever.monitor.historic_stubs import ( + get_controls_state, + get_historic_metrics, + get_historic_query_rows, + get_timeline_bounds, +) +from qlever.monitor.models import SparqlContent +from qlever.monitor.widgets.controls_row import HistoricControlsRow +from qlever.monitor.widgets.header_row import HeaderRow +from qlever.monitor.widgets.metrics_row import MetricsRow +from qlever.monitor.widgets.nav_pill import NavPill +from qlever.monitor.widgets.query_table import HistoricQueryTable +from qlever.monitor.widgets.sparql_pane import SparqlPane +from qlever.monitor.widgets.timeline import Timeline + +TITLE = "QLever monitor-queries: Historic" + + +class HistoricScreen(Screen, inherit_bindings=False): + """Historic view: shows active queries parsed from the log over a time window.""" + + BINDINGS = [Binding("tab", "app.swap_screen", " ComposeResult: + yield HeaderRow( + left=NavPill("< Live", target="live"), + center=Static(TITLE), + ) + yield HistoricControlsRow(get_controls_state()) + yield Timeline(get_timeline_bounds()) + yield MetricsRow([get_historic_metrics()]) + rows = sorted( + get_historic_query_rows(), + key=lambda r: r.duration_ms, + reverse=True, + ) + yield HistoricQueryTable(rows) + yield Static( + f"Showing {len(rows)} queries sorted by duration desc", + id="table-status", + ) + yield SparqlPane() + yield Footer() + + def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: + """Switch to the screen named by the clicked pill.""" + self.app.switch_screen(message.target) + + def on_data_table_row_selected( + self, message: HistoricQueryTable.RowSelected + ) -> None: + """Show the selected finished query's SPARQL in the pane.""" + row = message.data_table.query_rows[message.cursor_row] + self.query_one(SparqlPane).content = SparqlContent( + qid=row.qid, + started_at_ms=row.started_at_ms, + status=row.status, + sparql_text=row.sparql, + ) diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py new file mode 100644 index 00000000..2144ab47 --- /dev/null +++ b/src/qlever/monitor/views/live.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.binding import Binding +from textual.screen import Screen +from textual.widgets import Footer, Static + +from qlever.monitor.live_stubs import ( + get_live_metrics, + get_live_query_rows, + get_live_subtitle, +) +from qlever.monitor.models import SparqlContent +from qlever.monitor.widgets.header_row import HeaderRow +from qlever.monitor.widgets.metrics_row import MetricsRow +from qlever.monitor.widgets.nav_pill import NavPill +from qlever.monitor.widgets.query_table import LiveQueryTable +from qlever.monitor.widgets.sparql_pane import SparqlPane +from qlever.monitor.widgets.status_row import LiveStatusRow + +TITLE = "QLever monitor-queries: Live" + + +class LiveScreen(Screen, inherit_bindings=False): + """Live view: shows currently active queries tailed from the server log.""" + + BINDINGS = [Binding("tab", "app.swap_screen", "Historic>", priority=True)] + + def compose(self) -> ComposeResult: + yield HeaderRow( + center=Static(TITLE), + right=NavPill("Historic >", target="historic"), + ) + yield LiveStatusRow(get_live_subtitle(), self.app.sparql_endpoint) + yield MetricsRow(get_live_metrics()) + rows = sorted(get_live_query_rows(), key=lambda r: r.ts_ms) + yield LiveQueryTable(rows) + yield Static("", id="table-status") + yield SparqlPane() + yield Footer() + + def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: + """Switch to the screen named by the clicked pill.""" + self.app.switch_screen(message.target) + + def on_data_table_row_selected( + self, message: LiveQueryTable.RowSelected + ) -> None: + """Show the selected active query's SPARQL in the pane.""" + row = message.data_table.query_rows[message.cursor_row] + self.query_one(SparqlPane).content = SparqlContent( + qid=row.qid, + started_at_ms=row.ts_ms, + status=None, + sparql_text=row.sparql, + ) diff --git a/src/qlever/monitor/widgets/__init__.py b/src/qlever/monitor/widgets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/qlever/monitor/widgets/controls_row.py b/src/qlever/monitor/widgets/controls_row.py new file mode 100644 index 00000000..b00e7f34 --- /dev/null +++ b/src/qlever/monitor/widgets/controls_row.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Horizontal + +from qlever.monitor.models import ControlsState +from qlever.monitor.widgets.mode_picker import ModePicker +from qlever.monitor.widgets.selected_window import SelectedWindow +from qlever.monitor.widgets.window_stepper import WindowStepper + + +class HistoricControlsRow(Horizontal): + """One-row control strip under the HeaderRow on the Historic screen. + + Holds the window stepper, mode picker, and read-only selected range. + Distributes the snapshot to its children; never reads data itself. + """ + + can_focus = False + + def __init__(self, state: ControlsState) -> None: + """Hold the controls snapshot to distribute once mounted.""" + super().__init__() + self.state = state + + def compose(self) -> ComposeResult: + yield WindowStepper(self.state.window_size) + yield ModePicker(self.state.mode) + yield SelectedWindow(self.state) diff --git a/src/qlever/monitor/widgets/header_row.py b/src/qlever/monitor/widgets/header_row.py new file mode 100644 index 00000000..670e04c2 --- /dev/null +++ b/src/qlever/monitor/widgets/header_row.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Horizontal +from textual.widget import Widget +from textual.widgets import Static + + +class HeaderRow(Horizontal): + """One-row header layout with three slots: left, center, right. + + The row is screen-agnostic. Each screen decides what goes in each slot. + Empty slots become an empty Static so the three-slot layout stays stable. + """ + + def __init__( + self, + left: Widget | None = None, + center: Widget | None = None, + right: Widget | None = None, + ) -> None: + """Build a row with the given widgets in the left, center, right slots.""" + super().__init__() + self.left = left if left is not None else Static("") + self.center = center if center is not None else Static("") + self.right = right if right is not None else Static("") + self.left.add_class("slot-left") + self.center.add_class("slot-center") + self.right.add_class("slot-right") + + def compose(self) -> ComposeResult: + yield self.left + yield self.center + yield self.right diff --git a/src/qlever/monitor/widgets/metrics_row.py b/src/qlever/monitor/widgets/metrics_row.py new file mode 100644 index 00000000..de1ba785 --- /dev/null +++ b/src/qlever/monitor/widgets/metrics_row.py @@ -0,0 +1,106 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Vertical +from textual.widgets import Static + +from qlever.monitor.models import MetricsCounts + +METRIC_COLORS = { + "ok": "$success", + "failed": "$error", + "timeout": "$error", + "cancelled": "$warning", + "slow": "$warning", +} + +# (MetricsCounts field, kind) in render order; the field name is the label. +COLUMNS = [ + ("seen", "count"), + ("ok", "count"), + ("failed", "count"), + ("timeout", "count"), + ("cancelled", "count"), + ("p50", "ms"), + ("p95", "ms"), + ("slow", "count"), +] + + +def color_tag(label: str, value: int | None) -> str | None: + """Return the theme color for a metric, or None if it should render neutral.""" + if value is None or value == 0: + return None + return METRIC_COLORS.get(label) + + +def format_count(n: int | None) -> str: + """Render a count, or an ellipsis if it is not yet computed.""" + if n is None: + return "…" + return f"{n:,}" + + +def format_ms(ms: int | None) -> str: + """Render a duration in seconds, ellipsis if not computed. + + Smart precision: 2 decimals under 1 s, 1 decimal otherwise. + """ + if ms is None: + return "…" + seconds = ms / 1000 + return f"{seconds:.2f}s" if seconds < 1 else f"{seconds:.1f}s" + + +def format_value(value: int | None, kind: str) -> str: + """Render a raw cell value according to its column kind.""" + if kind == "count": + return format_count(value) + return format_ms(value) + + +def column_widths(rows: list[MetricsCounts]) -> list[int]: + """Width of each column, sized to its own widest value across all rows. + + Per-column (not one shared width) so every column uses the least + horizontal space while still aligning across the rolling-window rows. + """ + widths = [] + for name, kind in COLUMNS: + cells = (format_value(getattr(r, name), kind) for r in rows) + widths.append(max((len(c) for c in cells), default=1)) + return widths + + +def render_cell(label: str, value: int | None, width: int, kind: str) -> str: + """Render one bold-label, right-aligned value pair with optional color.""" + text = format_value(value, kind).rjust(width) + color = color_tag(label, value) + if color: + return f"[b]{label}[/] [{color}]{text}[/]" + return f"[b]{label}[/] {text}" + + +def format_row(row: MetricsCounts, widths: list[int]) -> str: + """Format one rolling-window row as a single line with markup.""" + cells = [ + render_cell(name, getattr(row, name), width, kind) + for (name, kind), width in zip(COLUMNS, widths) + ] + return f"[bold]{row.label:<8}[/] │ " + " · ".join(cells) + + +class MetricsRow(Vertical): + """Stack of rolling-window metric lines (5m, 15m, 1h on Live).""" + + can_focus = False + + def __init__(self, rows: list[MetricsCounts]) -> None: + """Render each row in `rows` as one formatted text line.""" + super().__init__() + self.rows = rows + + def compose(self) -> ComposeResult: + widths = column_widths(self.rows) + for row in self.rows: + yield Static(format_row(row, widths)) diff --git a/src/qlever/monitor/widgets/mode_picker.py b/src/qlever/monitor/widgets/mode_picker.py new file mode 100644 index 00000000..f7b4b5b3 --- /dev/null +++ b/src/qlever/monitor/widgets/mode_picker.py @@ -0,0 +1,35 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Horizontal +from textual.widgets import Static + +MODES = ("STARTS", "ACTIVE", "ENDS") + + +class ModePicker(Horizontal): + """Segmented mode control: MODE [ STARTS │ ACTIVE │ ENDS ]. + + Each mode is its own Static so a segment can be made independently + clickable without restructuring the widget. The selected segment + carries the `-selected` state class for styling. + """ + + can_focus = False + + def __init__(self, selected: str) -> None: + """Hold the selected mode to paint once mounted.""" + super().__init__() + self.selected = selected + + def compose(self) -> ComposeResult: + yield Static("MODE", classes="picker-caption") + yield Static("[", classes="picker-bracket") + for index, mode in enumerate(MODES): + if index > 0: + yield Static("│", classes="picker-sep") + classes = "picker-segment" + if mode == self.selected: + classes += " -selected" + yield Static(mode, id=f"mode-{mode.lower()}", classes=classes) + yield Static("]", classes="picker-bracket") diff --git a/src/qlever/monitor/widgets/nav_pill.py b/src/qlever/monitor/widgets/nav_pill.py new file mode 100644 index 00000000..91e406f6 --- /dev/null +++ b/src/qlever/monitor/widgets/nav_pill.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from textual.message import Message +from textual.widgets import Static + + +class NavPill(Static): + """Clickable, non-focusable pill that requests a screen switch.""" + + can_focus = False + + class Clicked(Message): + """Posted on click so an ancestor decides what the click does. + + Bubbles up the DOM, letting the widget stay unaware of the app + or screen registry. + """ + + def __init__(self, target: str) -> None: + super().__init__() + self.target = target + + def __init__(self, label: str, target: str) -> None: + """Render `label` as the pill; `target` names the screen to switch to.""" + super().__init__(label) + self.target = target + + def on_click(self) -> None: + """Announce the click; the handler lives on the parent screen.""" + self.post_message(self.Clicked(self.target)) diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py new file mode 100644 index 00000000..4971193a --- /dev/null +++ b/src/qlever/monitor/widgets/query_table.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import time + +from rich.text import Text +from textual.widgets import DataTable + +from qlever.monitor.models import HistoricQueryRow, LiveQueryRow +from qlever.monitor.util import format_clock + +QID_WIDTH = 10 +SPARQL_WIDTH = 280 + + +def oneline(text: str) -> str: + """Collapse all runs of whitespace (incl. newlines) to single spaces.""" + return " ".join(text.split()) + + +def truncate(text: str, width: int) -> str: + """Clip `text` to `width` characters, ending with an ellipsis if cut.""" + if len(text) <= width: + return text + return text[: width - 1] + "…" + + +def format_duration(ms: int) -> str: + """Render a duration as whole seconds.""" + return f"{max(ms, 0) // 1000}s" + + +class QueryTable(DataTable): + """Shared base for the Live and Historic query tables. + + Holds only what both screens duplicate: the row cursor, the stored + rows, and (via module helpers) SPARQL clipping. Each subclass owns + its own columns, since the two column sets differ. + """ + + def __init__(self, rows: list) -> None: + """Hold the rows to paint once the table is mounted.""" + super().__init__(cursor_type="row") + self.query_rows = rows + + +class LiveQueryTable(QueryTable): + """Active queries on the Live screen; the only focusable widget.""" + + def __init__(self, rows: list[LiveQueryRow]) -> None: + super().__init__(rows) + + def on_mount(self) -> None: + """Add the columns and one table row per active query.""" + self.add_column("Query ID") + self.add_column("Duration", width=8) + self.add_column("SPARQL") + now_ms = int(time.time() * 1000) + for row in self.query_rows: + self.add_row( + truncate(row.qid, QID_WIDTH), + Text(format_duration(now_ms - row.ts_ms), justify="right"), + truncate(oneline(row.sparql), SPARQL_WIDTH), + ) + + +class HistoricQueryTable(QueryTable): + """Finished queries in the current window on the Historic screen.""" + + def __init__(self, rows: list[HistoricQueryRow]) -> None: + super().__init__(rows) + + def on_mount(self) -> None: + """Add the columns and one table row per finished query.""" + self.add_column("Started") + self.add_column("Duration", width=8) + self.add_column("Status") + self.add_column("SPARQL") + for row in self.query_rows: + self.add_row( + format_clock(row.started_at_ms), + Text(format_duration(row.duration_ms), justify="right"), + row.status, + truncate(oneline(row.sparql), SPARQL_WIDTH), + ) diff --git a/src/qlever/monitor/widgets/selected_window.py b/src/qlever/monitor/widgets/selected_window.py new file mode 100644 index 00000000..b7c1e2b7 --- /dev/null +++ b/src/qlever/monitor/widgets/selected_window.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +from textual.widgets import Static + +from qlever.monitor.models import ControlsState +from qlever.monitor.util import format_clock + + +def format_selected_window(state: ControlsState) -> str: + """Read-only summary of the current window's range and width.""" + start = format_clock(state.start_ms) + end = format_clock(state.end_ms) + return f"[b]SELECTED[/b] {start} → {end} ([b]{state.window_size}[/b])" + + +class SelectedWindow(Static): + """Read-only window range under the HeaderRow on the Historic screen.""" + + can_focus = False + + def __init__(self, state: ControlsState) -> None: + """Render the selected window range computed from `state`.""" + super().__init__(format_selected_window(state)) diff --git a/src/qlever/monitor/widgets/sparql_pane.py b/src/qlever/monitor/widgets/sparql_pane.py new file mode 100644 index 00000000..0b1688f6 --- /dev/null +++ b/src/qlever/monitor/widgets/sparql_pane.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +from rich.syntax import Syntax +from textual.app import ComposeResult +from textual.containers import Vertical, VerticalScroll +from textual.reactive import reactive +from textual.widgets import Static + +from qlever.monitor.models import SparqlContent +from qlever.monitor.util import format_timestamp + +LIGHT_SYNTAX_THEME = "friendly" +DARK_SYNTAX_THEME = "monokai" + + +def format_header(content: SparqlContent) -> str: + """One-line query identity shown above the SPARQL body.""" + parts = [ + f"[b]Server Query ID:[/b] {content.qid}", + f"[b]Started at:[/b] {format_timestamp(content.started_at_ms)}", + ] + if content.status is not None: + parts.append(f"[b]Status:[/b] {content.status}") + return " │ ".join(parts) + + +class SparqlBody(Static): + """Syntax-highlighted SPARQL; theme follows the active app theme. + + The Syntax object is rebuilt in render() rather than stored, so a + theme switch (which triggers a repaint) picks the matching variant + without any explicit subscription. + """ + + code = reactive(None, layout=True) + + def render(self): + if self.code is None: + return "" + theme = ( + DARK_SYNTAX_THEME + if self.app.current_theme.dark + else LIGHT_SYNTAX_THEME + ) + return Syntax(self.code, "sparql", theme=theme, word_wrap=True) + + +class SparqlScroll(VerticalScroll): + """Scroll region for an overflowing query; never takes focus.""" + + can_focus = False + + +class SparqlPane(Vertical): + """Identity line plus full SPARQL for the row under the table cursor. + + Shared by Live and Historic. The screen builds the SparqlContent and + fills `status` (Live: None, Historic: real status); the pane never + reads stub data itself. + """ + + can_focus = False + + content = reactive(None, init=False) + show_pretty = reactive(False, init=False) + pretty_text = reactive(None, init=False) + + def compose(self) -> ComposeResult: + yield Static(id="sparql-header") + with SparqlScroll(id="sparql-scroll"): + yield SparqlBody(id="sparql-body") + + def on_mount(self) -> None: + """Paint the initial (empty) state once children are mounted.""" + self.refresh_content(self.content) + + def watch_content(self, content: SparqlContent | None) -> None: + """Reset pretty-print state for the newly selected query, then paint.""" + self.show_pretty = False + self.pretty_text = None + self.refresh_content(content) + + def watch_show_pretty(self, show_pretty: bool) -> None: + """Repaint with raw or pretty text when the toggle flips.""" + self.refresh_content(self.content) + + def watch_pretty_text(self, pretty_text: str | None) -> None: + """Repaint once the pretty-printed text has been computed.""" + self.refresh_content(self.content) + + @property + def displayed_text(self) -> str | None: + """The SPARQL string currently on screen, or None if empty. + + Pretty-printed text only once the formatter has produced it; + the raw query otherwise. + """ + if self.content is None: + return None + if self.show_pretty and self.pretty_text is not None: + return self.pretty_text + return self.content.sparql_text + + def refresh_content(self, content: SparqlContent | None) -> None: + """Push the current content into the header and body widgets.""" + header = self.query_one("#sparql-header", Static) + body = self.query_one("#sparql-body", SparqlBody) + if content is None: + header.update( + "[dim]Press Enter or double-click a row to view its " + "full SPARQL query and details.[/dim]" + ) + body.code = None + return + header.update(format_header(content)) + body.code = self.displayed_text diff --git a/src/qlever/monitor/widgets/status_row.py b/src/qlever/monitor/widgets/status_row.py new file mode 100644 index 00000000..28e2cfa2 --- /dev/null +++ b/src/qlever/monitor/widgets/status_row.py @@ -0,0 +1,32 @@ +from __future__ import annotations + +from textual.widgets import Static + +from qlever.monitor.models import LiveSubtitle + + +def format_subtitle(subtitle: LiveSubtitle, endpoint: str) -> str: + """Build the Live subtitle text from current state and server endpoint.""" + if subtitle.state == "connecting": + return f"[$warning]Connecting[/] to server at [b]{endpoint}[/]…" + if subtitle.state == "unreachable": + return ( + f"[$error]Can't reach server[/] at [b]{endpoint}[/]" + f" · active queries unknown" + ) + if subtitle.n_active is None: + return f"[$success]Server active[/] at [b]{endpoint}[/]" + return ( + f"[$success]Server active[/] at [b]{endpoint}[/]" + f" · [b $success]{subtitle.n_active}[/] active queries" + ) + + +class LiveStatusRow(Static): + """One-line subtitle under the HeaderRow on the Live screen.""" + + can_focus = False + + def __init__(self, subtitle: LiveSubtitle, endpoint: str) -> None: + """Render the subtitle text computed from `subtitle` and `endpoint`.""" + super().__init__(format_subtitle(subtitle, endpoint)) diff --git a/src/qlever/monitor/widgets/timeline.py b/src/qlever/monitor/widgets/timeline.py new file mode 100644 index 00000000..a3d982a0 --- /dev/null +++ b/src/qlever/monitor/widgets/timeline.py @@ -0,0 +1,139 @@ +from __future__ import annotations + +from datetime import datetime + +from textual.widgets import Static + +from qlever.monitor.models import TimelineBounds +from qlever.monitor.util import format_timestamp + +# Width of a full "YYYY-MM-DD HH:MM:SS" stamp, and the gap that +# separates the edge labels from the bar. +STAMP_W = 19 +GUTTER = " " + +# Minimum blank columns kept between two adjacent tick labels; the +# actual spacing is this plus the label width (derived, not fixed, +# because multi-day labels are wider than HH:MM ones). +MIN_TICK_GAP = 4 + +DAY_MS = 24 * 60 * 60 * 1000 + + +def tick_pattern(span_ms: int) -> str: + """strftime for interior ticks; add the date once the span crosses days. + + Edge labels always carry the full date, so within a single day the + ticks can stay the compact HH:MM form. + """ + if span_ms >= DAY_MS: + return "%m-%d %H:%M" + return "%H:%M" + + +def format_tick(ms: int, pattern: str) -> str: + """Render an epoch (ms) using a tick pattern from `tick_pattern`.""" + return datetime.fromtimestamp(ms / 1000).strftime(pattern) + + +def span_fraction(ts_ms: int, bounds: TimelineBounds) -> float: + """Where `ts_ms` sits in the log span, as a 0..1 fraction. + + A zero-length span collapses everything to the start. + """ + span = bounds.log_end_ms - bounds.log_start_ms + if span <= 0: + return 0.0 + return (ts_ms - bounds.log_start_ms) / span + + +def fraction_to_column(fraction: float, bar_width: int) -> int: + """Map a 0..1 position along the log span to a bar column index.""" + clamped = min(max(fraction, 0.0), 1.0) + return round(clamped * (bar_width - 1)) + + +def render_track(bounds: TimelineBounds, bar_width: int) -> str: + """The bar line: full-span track with the window span marked. + + `████` marks the window; `├`/`┤` always cap the log span, even + when the window reaches an edge. + """ + chars = ["─"] * bar_width + start = fraction_to_column( + span_fraction(bounds.window_start_ms, bounds), bar_width + ) + end = fraction_to_column( + span_fraction(bounds.window_end_ms, bounds), bar_width + ) + for col in range(start, end + 1): + chars[col] = "█" + chars[0] = "├" + chars[-1] = "┤" + return "".join(chars) + + +def render_ticks(bounds: TimelineBounds, bar_width: int) -> str: + """A blank bar-width line with evenly spaced time labels. + + Tick spacing is derived from the label width so wider multi-day + labels simply yield fewer ticks instead of overlapping. + """ + span = bounds.log_end_ms - bounds.log_start_ms + # Every label uses one pattern, so all labels share a width. + pattern = tick_pattern(span) + label_width = len(format_tick(bounds.log_start_ms, pattern)) + # A tick owns its label plus a blank gap; fewer fit when wider. + step = label_width + MIN_TICK_GAP + tick_count = max(1, bar_width // step) + cells = [" "] * bar_width + for i in range(1, tick_count + 1): + # Interior positions only; the edges hold the Log start/end labels. + fraction = i / (tick_count + 1) + center = fraction_to_column(fraction, bar_width) + # Time at this position: that same fraction into the log span. + label = format_tick( + bounds.log_start_ms + round(fraction * span), pattern + ) + # Center the label on its column, clamped to stay on the bar. + start = min(max(center - len(label) // 2, 0), bar_width - len(label)) + for offset, char in enumerate(label): + cells[start + offset] = char + return "".join(cells) + + +class Timeline(Static): + """Two-row map of the log span with the current window marked. + + Row 1: edge timestamps around a bar whose `████` block is the + selected window. Row 2: `Log start` / tick labels / `Log end`. + Geometry is recomputed each paint because the bar width is only + known after layout. + """ + + can_focus = False + + def __init__(self, bounds: TimelineBounds) -> None: + """Hold the span/window snapshot to draw at render time.""" + super().__init__() + self.bounds = bounds + + def on_resize(self) -> None: + """Repaint so the bar tracks the new width.""" + self.refresh() + + def render(self): + bar_width = self.size.width - 2 * (STAMP_W + len(GUTTER)) + if bar_width < 2: + return "" + top = ( + f"{format_timestamp(self.bounds.log_start_ms)}{GUTTER}" + f"{render_track(self.bounds, bar_width)}{GUTTER}" + f"{format_timestamp(self.bounds.log_end_ms)}" + ) + bottom = ( + f"{'Log start'.ljust(STAMP_W)}{GUTTER}" + f"{render_ticks(self.bounds, bar_width)}{GUTTER}" + f"{'Log end'.rjust(STAMP_W)}" + ) + return f"{top}\n{bottom}" diff --git a/src/qlever/monitor/widgets/window_stepper.py b/src/qlever/monitor/widgets/window_stepper.py new file mode 100644 index 00000000..483fb761 --- /dev/null +++ b/src/qlever/monitor/widgets/window_stepper.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from textual.app import ComposeResult +from textual.containers import Horizontal +from textual.widgets import Static + + +class WindowStepper(Horizontal): + """Window-size stepper: WINDOW ◄ 15m ►. + + The arrows are separate Statics so each can be made independently + clickable without restructuring the widget. + """ + + can_focus = False + + def __init__(self, window_size: str) -> None: + """Hold the current window size to paint once mounted.""" + super().__init__() + self.window_size = window_size + + def compose(self) -> ComposeResult: + yield Static("WINDOW", classes="stepper-caption") + yield Static("◄", id="window-prev", classes="stepper-arrow") + yield Static(self.window_size, id="window-size") + yield Static("►", id="window-next", classes="stepper-arrow") diff --git a/src/qlever/util.py b/src/qlever/util.py index 4685b3c8..ad2842c8 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -147,6 +147,35 @@ def run_curl_command( return result.stdout +def pretty_printed_query( + query: str, show_prefixes: bool, system: str = "docker" +) -> str | None: + """ + Pretty-print a SPARQL query using the sparql-formatter Docker image. + Optionally strips PREFIX declarations from the output. Argument + `system` can either be docker or podman. Returns None if the query + could not be pretty-printed. + """ + from qlever.containerize import Containerize + + if system not in Containerize.supported_systems(): + system = "docker" + remove_prefixes_cmd = " | sed '/^PREFIX /Id'" if not show_prefixes else "" + pretty_print_query_cmd = ( + f"echo {shlex.quote(query)}" + f" | {system} run -i --rm docker.io/sparqling/sparql-formatter" + f"{remove_prefixes_cmd} | grep -v '^$'" + ) + try: + query_pretty_printed = run_command( + pretty_print_query_cmd, return_output=True + ) + return query_pretty_printed.rstrip() + except Exception as e: + log.debug(f"Failed to pretty-print query, returning None: {e}") + return None + + def is_qlever_server_alive(endpoint_url: str) -> bool: """ Helper function that checks if a QLever server is running on the given From f11d5376ea6359a9ad5ab2f1ff1a0107e28f9aa8 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 16 May 2026 23:49:00 +0200 Subject: [PATCH 02/26] Finish UI implementation without the data layer --- src/qlever/monitor/app.py | 37 +++- src/qlever/monitor/historic_stubs.py | 26 +++ src/qlever/monitor/monitor.tcss | 2 +- src/qlever/monitor/views/historic.py | 174 +++++++++++++++++- src/qlever/monitor/views/live.py | 21 ++- src/qlever/monitor/widgets/mode_picker.py | 35 +++- src/qlever/monitor/widgets/selected_window.py | 8 + src/qlever/monitor/widgets/sparql_pane.py | 3 + src/qlever/monitor/widgets/timeline.py | 47 ++++- src/qlever/monitor/widgets/window_stepper.py | 55 +++++- 10 files changed, 395 insertions(+), 13 deletions(-) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index a1d363b1..6082a540 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -4,11 +4,13 @@ from textual import work from textual.app import App +from textual.binding import Binding +from textual.css.query import NoMatches from qlever.monitor.util import clipboard_install_hint, copy_text from qlever.monitor.views.historic import HistoricScreen from qlever.monitor.views.live import LiveScreen -from qlever.monitor.widgets.sparql_pane import SparqlPane +from qlever.monitor.widgets.sparql_pane import SparqlPane, SparqlScroll from qlever.util import pretty_printed_query @@ -35,6 +37,15 @@ class MonitorQueriesApp(App): ("t", "cycle_themes", "Change theme"), ("y", "copy_query", "Copy SPARQL"), ("p", "pretty_print", "Pretty print"), + Binding( + "shift+up", + "scroll_sparql_up", + "Scroll SPARQL", + key_display="⇧ ↑↓", + ), + Binding( + "shift+down", "scroll_sparql_down", "Scroll SPARQL", show=False + ), ] def __init__( @@ -116,6 +127,30 @@ def apply_pretty_result( return pane.pretty_text = result + def action_scroll_sparql_up(self) -> None: + """Scroll the overflowing SPARQL pane up one line.""" + self.screen.query_one(SparqlScroll).scroll_up() + + def action_scroll_sparql_down(self) -> None: + """Scroll the overflowing SPARQL pane down one line.""" + self.screen.query_one(SparqlScroll).scroll_down() + + def check_action( + self, action: str, parameters: tuple[object, ...] + ) -> bool | None: + """Show the scroll bindings only when the query overflows the pane. + + Returns False (hidden) rather than None (grayed) so the footer + entry disappears entirely until there is something to scroll. + """ + if action in ("scroll_sparql_up", "scroll_sparql_down"): + try: + scroll = self.screen.query_one(SparqlScroll) + except NoMatches: + return False + return scroll.max_scroll_y > 0 + return True + def action_cycle_themes(self) -> None: """Select the next theme on press of `t` binding""" themes = [t for t in self.available_themes if "ansi" not in t] diff --git a/src/qlever/monitor/historic_stubs.py b/src/qlever/monitor/historic_stubs.py index 1e2c4131..78150f2c 100644 --- a/src/qlever/monitor/historic_stubs.py +++ b/src/qlever/monitor/historic_stubs.py @@ -44,7 +44,33 @@ def get_timeline_bounds() -> TimelineBounds: def get_historic_query_rows() -> list[HistoricQueryRow]: """Finished queries matching the current mode in the current window.""" start_ms = int(time.time() * 1000) - 15 * 60 * 1000 + long_query = ( + "PREFIX rdf: \n" + "PREFIX rdfs: \n" + "PREFIX wd: \n" + "PREFIX wdt: \n" + "PREFIX schema: \n" + "SELECT ?person ?personLabel ?birthPlaceLabel ?occupationLabel\n" + " (COUNT(DISTINCT ?work) AS ?works)\n" + "WHERE {\n" + " ?person wdt:P31 wd:Q5 .\n" + " ?person wdt:P106 ?occupation .\n" + " ?person wdt:P19 ?birthPlace .\n" + " ?person rdfs:label ?personLabel .\n" + " ?birthPlace rdfs:label ?birthPlaceLabel .\n" + " ?occupation rdfs:label ?occupationLabel .\n" + " OPTIONAL { ?work wdt:P50 ?person . }\n" + ' FILTER(LANG(?personLabel) = "en")\n' + ' FILTER(LANG(?birthPlaceLabel) = "en")\n' + ' FILTER(LANG(?occupationLabel) = "en")\n' + "}\n" + "GROUP BY ?person ?personLabel ?birthPlaceLabel ?occupationLabel\n" + "HAVING (COUNT(DISTINCT ?work) > 5)\n" + "ORDER BY DESC(?works)\n" + "LIMIT 1000" + ) samples = [ + (50_000, 99_000, "ok", long_query), (0, 42_000, "ok", "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 1000"), (103_000, 8_000, "ok", "SELECT (COUNT(*) AS ?c) WHERE { ?s ?p ?o }"), ( diff --git a/src/qlever/monitor/monitor.tcss b/src/qlever/monitor/monitor.tcss index 63ad2ca5..eb2bed72 100644 --- a/src/qlever/monitor/monitor.tcss +++ b/src/qlever/monitor/monitor.tcss @@ -172,4 +172,4 @@ ModePicker > Static { background: $accent; color: $text; text-style: bold; -} +} \ No newline at end of file diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 588cc4c9..674e2d81 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -11,14 +11,25 @@ get_historic_query_rows, get_timeline_bounds, ) -from qlever.monitor.models import SparqlContent +from qlever.monitor.models import ( + ControlsState, + SparqlContent, + TimelineBounds, +) from qlever.monitor.widgets.controls_row import HistoricControlsRow from qlever.monitor.widgets.header_row import HeaderRow from qlever.monitor.widgets.metrics_row import MetricsRow +from qlever.monitor.widgets.mode_picker import MODES, ModePicker from qlever.monitor.widgets.nav_pill import NavPill from qlever.monitor.widgets.query_table import HistoricQueryTable +from qlever.monitor.widgets.selected_window import SelectedWindow from qlever.monitor.widgets.sparql_pane import SparqlPane from qlever.monitor.widgets.timeline import Timeline +from qlever.monitor.widgets.window_stepper import ( + WindowStepper, + available_presets, + preset_ms, +) TITLE = "QLever monitor-queries: Historic" @@ -26,15 +37,43 @@ class HistoricScreen(Screen, inherit_bindings=False): """Historic view: shows active queries parsed from the log over a time window.""" - BINDINGS = [Binding("tab", "app.swap_screen", " ComposeResult: + controls = get_controls_state() + bounds = get_timeline_bounds() + # Fixed once: the log span never changes during a session. + self.log_start_ms = bounds.log_start_ms + self.log_end_ms = bounds.log_end_ms + # Presets longer than the log span are pointless; `all` covers it. + self.available_presets = available_presets( + self.log_end_ms - self.log_start_ms + ) + # The screen owns the mutable window state from here on. + self.window_size = controls.window_size + self.mode = controls.mode + self.window_start_ms = controls.start_ms + self.window_end_ms = controls.end_ms + yield HeaderRow( left=NavPill("< Live", target="live"), center=Static(TITLE), ) - yield HistoricControlsRow(get_controls_state()) - yield Timeline(get_timeline_bounds()) + yield HistoricControlsRow(controls) + yield Timeline(bounds) yield MetricsRow([get_historic_metrics()]) rows = sorted( get_historic_query_rows(), @@ -47,12 +86,137 @@ def compose(self) -> ComposeResult: id="table-status", ) yield SparqlPane() - yield Footer() + yield Footer(show_command_palette=False) + + def push_state(self) -> None: + """Rebuild the controls/timeline models and push them to the widgets. + + The single update path: every key/click action mutates the + screen's window fields and then calls this. + """ + controls = ControlsState( + window_size=self.window_size, + mode=self.mode, + start_ms=self.window_start_ms, + end_ms=self.window_end_ms, + ) + bounds = TimelineBounds( + log_start_ms=self.log_start_ms, + log_end_ms=self.log_end_ms, + window_start_ms=self.window_start_ms, + window_end_ms=self.window_end_ms, + ) + self.query_one(WindowStepper).window_size = self.window_size + self.query_one(ModePicker).selected = self.mode + self.query_one(SelectedWindow).state = controls + self.query_one(Timeline).bounds = bounds + + def clamp_window_start(self, start: int, width: int) -> int: + """Keep a window of `width` fully inside the log span.""" + return min( + max(start, self.log_start_ms), self.log_end_ms - width + ) + + def center_window_at(self, ms: int) -> None: + """Center the window on `ms`; no-op when `all` (unbounded).""" + width = preset_ms(self.window_size) + if width is None: + return + self.window_start_ms = self.clamp_window_start( + ms - width // 2, width + ) + self.window_end_ms = self.window_start_ms + width + self.push_state() + + def reframe_window(self) -> None: + """Recompute the window range from its size, then push state. + + `all` spans the whole log; any fixed size keeps the current + start but is clamped so the window stays inside the log span. + """ + width = preset_ms(self.window_size) + if width is None: + self.window_start_ms = self.log_start_ms + self.window_end_ms = self.log_end_ms + else: + start = self.clamp_window_start(self.window_start_ms, width) + self.window_start_ms = start + self.window_end_ms = start + width + self.push_state() + + def step_window(self, direction: int) -> None: + """Move the window size one preset in `direction` (wraps).""" + presets = self.available_presets + index = presets.index(self.window_size) + self.window_size = presets[(index + direction) % len(presets)] + self.reframe_window() + + def set_mode(self, mode: str) -> None: + """Select an exact match mode, then push state.""" + self.mode = mode + self.push_state() + + def action_cycle_window(self) -> None: + """Step to the next window-size preset (wraps).""" + self.step_window(1) + + def action_cycle_mode(self) -> None: + """Step to the next match mode (wraps).""" + index = MODES.index(self.mode) + self.set_mode(MODES[(index + 1) % len(MODES)]) + + def shift_window(self, direction: int) -> None: + """Move the window by its own width; clamp; no-op when `all`.""" + width = preset_ms(self.window_size) + if width is None: + return + start = self.window_start_ms + direction * width + self.window_start_ms = self.clamp_window_start(start, width) + self.window_end_ms = self.window_start_ms + width + self.push_state() + + def action_shift_earlier(self) -> None: + """Shift the window one width toward the log start.""" + self.shift_window(-1) + + def action_shift_later(self) -> None: + """Shift the window one width toward the log end.""" + self.shift_window(1) + + def action_snap_start(self) -> None: + """Snap the window to the log start.""" + self.window_start_ms = self.log_start_ms + self.reframe_window() + + def action_snap_end(self) -> None: + """Snap the window to the log end.""" + self.window_start_ms = self.log_end_ms + self.reframe_window() def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: """Switch to the screen named by the clicked pill.""" self.app.switch_screen(message.target) + def on_window_stepper_stepped( + self, message: WindowStepper.Stepped + ) -> None: + """Resize the window when a stepper arrow is clicked.""" + self.step_window(message.direction) + + def on_mode_picker_selected(self, message: ModePicker.Selected) -> None: + """Switch the match mode when a segment is clicked.""" + self.set_mode(message.mode) + + def on_resize(self) -> None: + """Re-evaluate the conditional scroll bindings after a resize.""" + self.call_after_refresh(self.refresh_bindings) + + def on_timeline_recentered( + self, message: Timeline.Recentered + ) -> None: + """Recenter the window on the clicked timeline position.""" + self.center_window_at(message.center_ms) + def on_data_table_row_selected( self, message: HistoricQueryTable.RowSelected ) -> None: diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 2144ab47..4566a5e8 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -2,6 +2,7 @@ from textual.app import ComposeResult from textual.binding import Binding +from textual.reactive import reactive from textual.screen import Screen from textual.widgets import Footer, Static @@ -24,7 +25,12 @@ class LiveScreen(Screen, inherit_bindings=False): """Live view: shows currently active queries tailed from the server log.""" - BINDINGS = [Binding("tab", "app.swap_screen", "Historic>", priority=True)] + BINDINGS = [ + Binding("tab", "app.swap_screen", "Historic>", priority=True), + Binding("f", "toggle_freeze", "Freeze/Unfreeze"), + ] + + frozen = reactive(False, init=False) def compose(self) -> ComposeResult: yield HeaderRow( @@ -39,6 +45,19 @@ def compose(self) -> ComposeResult: yield SparqlPane() yield Footer() + def watch_frozen(self, frozen: bool) -> None: + """Reflect the frozen state in the table status line.""" + status = "Frozen - press f to resume" if frozen else "" + self.query_one("#table-status", Static).update(status) + + def action_toggle_freeze(self) -> None: + """Toggle the frozen state of the live view.""" + self.frozen = not self.frozen + + def on_resize(self) -> None: + """Re-evaluate the conditional scroll bindings after a resize.""" + self.call_after_refresh(self.refresh_bindings) + def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: """Switch to the screen named by the clicked pill.""" self.app.switch_screen(message.target) diff --git a/src/qlever/monitor/widgets/mode_picker.py b/src/qlever/monitor/widgets/mode_picker.py index f7b4b5b3..3fa3ae4b 100644 --- a/src/qlever/monitor/widgets/mode_picker.py +++ b/src/qlever/monitor/widgets/mode_picker.py @@ -1,7 +1,10 @@ from __future__ import annotations +from textual import events from textual.app import ComposeResult from textual.containers import Horizontal +from textual.message import Message +from textual.reactive import reactive from textual.widgets import Static MODES = ("STARTS", "ACTIVE", "ENDS") @@ -17,13 +20,27 @@ class ModePicker(Horizontal): can_focus = False + selected = reactive("", init=False) + + class Selected(Message): + """Posted when a segment is clicked; the screen sets that mode.""" + + def __init__(self, mode: str) -> None: + super().__init__() + self.mode = mode + def __init__(self, selected: str) -> None: """Hold the selected mode to paint once mounted.""" super().__init__() - self.selected = selected + self.set_reactive(ModePicker.selected, selected) def compose(self) -> ComposeResult: - yield Static("MODE", classes="picker-caption") + caption = Static("MODE", classes="picker-caption") + caption.tooltip = ( + "Which queries in the window to show - STARTS began in it, " + "ACTIVE ran during it, ENDS finished in it." + ) + yield caption yield Static("[", classes="picker-bracket") for index, mode in enumerate(MODES): if index > 0: @@ -33,3 +50,17 @@ def compose(self) -> ComposeResult: classes += " -selected" yield Static(mode, id=f"mode-{mode.lower()}", classes=classes) yield Static("]", classes="picker-bracket") + + def watch_selected(self, value: str) -> None: + """Move the -selected state class onto the chosen segment.""" + for mode in MODES: + segment = self.query_one(f"#mode-{mode.lower()}", Static) + segment.set_class(mode == value, "-selected") + + def on_click(self, event: events.Click) -> None: + """Translate a segment click into a Selected message.""" + clicked = event.widget.id if event.widget else None + if clicked and clicked.startswith("mode-"): + mode = clicked.removeprefix("mode-").upper() + if mode in MODES: + self.post_message(self.Selected(mode)) diff --git a/src/qlever/monitor/widgets/selected_window.py b/src/qlever/monitor/widgets/selected_window.py index b7c1e2b7..4e9b804b 100644 --- a/src/qlever/monitor/widgets/selected_window.py +++ b/src/qlever/monitor/widgets/selected_window.py @@ -1,5 +1,6 @@ from __future__ import annotations +from textual.reactive import reactive from textual.widgets import Static from qlever.monitor.models import ControlsState @@ -18,6 +19,13 @@ class SelectedWindow(Static): can_focus = False + state = reactive(None, init=False) + def __init__(self, state: ControlsState) -> None: """Render the selected window range computed from `state`.""" super().__init__(format_selected_window(state)) + self.set_reactive(SelectedWindow.state, state) + + def watch_state(self, value: ControlsState) -> None: + """Repaint the range summary when the window state changes.""" + self.update(format_selected_window(value)) diff --git a/src/qlever/monitor/widgets/sparql_pane.py b/src/qlever/monitor/widgets/sparql_pane.py index 0b1688f6..c78e74b8 100644 --- a/src/qlever/monitor/widgets/sparql_pane.py +++ b/src/qlever/monitor/widgets/sparql_pane.py @@ -105,6 +105,9 @@ def refresh_content(self, content: SparqlContent | None) -> None: """Push the current content into the header and body widgets.""" header = self.query_one("#sparql-header", Static) body = self.query_one("#sparql-body", SparqlBody) + # Body height changes here; re-evaluate the conditional scroll + # bindings once layout has settled. + self.call_after_refresh(self.refresh_bindings) if content is None: header.update( "[dim]Press Enter or double-click a row to view its " diff --git a/src/qlever/monitor/widgets/timeline.py b/src/qlever/monitor/widgets/timeline.py index a3d982a0..76c51e2d 100644 --- a/src/qlever/monitor/widgets/timeline.py +++ b/src/qlever/monitor/widgets/timeline.py @@ -2,6 +2,9 @@ from datetime import datetime +from textual import events +from textual.message import Message +from textual.reactive import reactive from textual.widgets import Static from qlever.monitor.models import TimelineBounds @@ -53,6 +56,18 @@ def fraction_to_column(fraction: float, bar_width: int) -> int: return round(clamped * (bar_width - 1)) +def column_to_ms(col: int, bar_width: int, bounds: TimelineBounds) -> int: + """Epoch (ms) at a bar column; inverse of fraction_to_column. + + Clamped to the log span so an edge click maps to an edge time. + """ + if bar_width <= 1: + return bounds.log_start_ms + fraction = min(max(col / (bar_width - 1), 0.0), 1.0) + span = bounds.log_end_ms - bounds.log_start_ms + return bounds.log_start_ms + round(fraction * span) + + def render_track(bounds: TimelineBounds, bar_width: int) -> str: """The bar line: full-span track with the window span marked. @@ -113,15 +128,45 @@ class Timeline(Static): can_focus = False + class Recentered(Message): + """Posted when the bar is clicked; carries the clicked time.""" + + def __init__(self, center_ms: int) -> None: + super().__init__() + self.center_ms = center_ms + + bounds = reactive(None, init=False) + def __init__(self, bounds: TimelineBounds) -> None: """Hold the span/window snapshot to draw at render time.""" super().__init__() - self.bounds = bounds + self.set_reactive(Timeline.bounds, bounds) + + def watch_bounds(self) -> None: + """Redraw the bar when the span or window changes.""" + self.refresh() def on_resize(self) -> None: """Repaint so the bar tracks the new width.""" self.refresh() + def on_click(self, event: events.Click) -> None: + """Recenter the window on the clicked bar column. + + Clicks on the edge stamps or gutter (outside the bar) are + ignored; the screen decides whether `all` makes it a no-op. + """ + offset = STAMP_W + len(GUTTER) + bar_width = self.size.width - 2 * offset + if bar_width < 2: + return + col = event.x - offset + if col < 0 or col >= bar_width: + return + self.post_message( + self.Recentered(column_to_ms(col, bar_width, self.bounds)) + ) + def render(self): bar_width = self.size.width - 2 * (STAMP_W + len(GUTTER)) if bar_width < 2: diff --git a/src/qlever/monitor/widgets/window_stepper.py b/src/qlever/monitor/widgets/window_stepper.py index 483fb761..e72458aa 100644 --- a/src/qlever/monitor/widgets/window_stepper.py +++ b/src/qlever/monitor/widgets/window_stepper.py @@ -1,9 +1,31 @@ from __future__ import annotations +from textual import events from textual.app import ComposeResult from textual.containers import Horizontal +from textual.message import Message +from textual.reactive import reactive from textual.widgets import Static +WINDOW_PRESETS = ("5m", "15m", "30m", "1h", "6h", "12h", "24h", "all") + +UNIT_MS = {"m": 60_000, "h": 3_600_000} + + +def preset_ms(preset: str) -> int | None: + """Width of a window preset in ms; None for the unbounded `all`.""" + if preset == "all": + return None + return int(preset[:-1]) * UNIT_MS[preset[-1]] + + +def available_presets(span_ms: int) -> tuple[str, ...]: + """Presets that fit within the log span; `all` is always offered.""" + fits = tuple( + p for p in WINDOW_PRESETS if p != "all" and preset_ms(p) <= span_ms + ) + return fits + ("all",) + class WindowStepper(Horizontal): """Window-size stepper: WINDOW ◄ 15m ►. @@ -14,13 +36,42 @@ class WindowStepper(Horizontal): can_focus = False + window_size = reactive("", init=False) + + class Stepped(Message): + """Posted when an arrow is clicked; the screen resizes the window. + + `direction` is -1 for the left arrow, +1 for the right arrow. + """ + + def __init__(self, direction: int) -> None: + super().__init__() + self.direction = direction + def __init__(self, window_size: str) -> None: """Hold the current window size to paint once mounted.""" super().__init__() - self.window_size = window_size + self.set_reactive(WindowStepper.window_size, window_size) def compose(self) -> ComposeResult: - yield Static("WINDOW", classes="stepper-caption") + caption = Static("WINDOW", classes="stepper-caption") + caption.tooltip = ( + "Width of the log time window whose queries are shown. " + "Press w or click the arrows to resize." + ) + yield caption yield Static("◄", id="window-prev", classes="stepper-arrow") yield Static(self.window_size, id="window-size") yield Static("►", id="window-next", classes="stepper-arrow") + + def watch_window_size(self, value: str) -> None: + """Repaint the size label when the window size changes.""" + self.query_one("#window-size", Static).update(value) + + def on_click(self, event: events.Click) -> None: + """Translate an arrow click into a Stepped message.""" + clicked = event.widget.id if event.widget else None + if clicked == "window-prev": + self.post_message(self.Stepped(-1)) + elif clicked == "window-next": + self.post_message(self.Stepped(1)) From 871b1504ce45e75f965aded436c2486a6a52e9c2 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 16 May 2026 23:55:26 +0200 Subject: [PATCH 03/26] Add textual and rich to pyproject.toml --- pyproject.toml | 2 +- src/qlever/monitor/views/historic.py | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cbcdb85d..c631c77c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ classifiers = [ "Topic :: Database :: Front-Ends" ] -dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml", "rdflib", "requests-sse", "tqdm>=4.60.0" ] +dependencies = [ "psutil", "termcolor", "argcomplete", "pyyaml", "rdflib", "requests-sse", "tqdm>=4.60.0", "textual>=8.0", "rich" ] [project.optional-dependencies] dev = [ "ruff", "pre-commit" ] diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 674e2d81..e42d3594 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -113,18 +113,14 @@ def push_state(self) -> None: def clamp_window_start(self, start: int, width: int) -> int: """Keep a window of `width` fully inside the log span.""" - return min( - max(start, self.log_start_ms), self.log_end_ms - width - ) + return min(max(start, self.log_start_ms), self.log_end_ms - width) def center_window_at(self, ms: int) -> None: """Center the window on `ms`; no-op when `all` (unbounded).""" width = preset_ms(self.window_size) if width is None: return - self.window_start_ms = self.clamp_window_start( - ms - width // 2, width - ) + self.window_start_ms = self.clamp_window_start(ms - width // 2, width) self.window_end_ms = self.window_start_ms + width self.push_state() @@ -211,9 +207,7 @@ def on_resize(self) -> None: """Re-evaluate the conditional scroll bindings after a resize.""" self.call_after_refresh(self.refresh_bindings) - def on_timeline_recentered( - self, message: Timeline.Recentered - ) -> None: + def on_timeline_recentered(self, message: Timeline.Recentered) -> None: """Recenter the window on the clicked timeline position.""" self.center_window_at(message.center_ms) From 66186307bd99c8be084434fee9dd372fe3940e3f Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 19 May 2026 17:24:48 +0200 Subject: [PATCH 04/26] Add `log_reader.py` with pure functions to parse a jsonl line with and without loading it as json --- src/qlever/monitor/log_reader.py | 106 +++++++++++++++++++++++++++++++ test/qlever/test_log_reader.py | 91 ++++++++++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 src/qlever/monitor/log_reader.py create mode 100644 test/qlever/test_log_reader.py diff --git a/src/qlever/monitor/log_reader.py b/src/qlever/monitor/log_reader.py new file mode 100644 index 00000000..6e596612 --- /dev/null +++ b/src/qlever/monitor/log_reader.py @@ -0,0 +1,106 @@ +"""Pure primitives for reading the query metrics log. + +The producer escapes every quote in user content, so a bare `"` is +always structural. That is what lets us slice scalars out of a line by +fixed key order instead of paying json.loads on the bulk path. The keys +below assume the producer's fixed field order; a contract change is +caught by the startup self-check, not here. +""" + +import json + +TS_PREFIX = b'{"ts-ms":' +EVENT_KEY = b'"event":"' +QID_KEY = b'"qid":"' +STATUS_KEY = b'"status":"' + +STATUS_SET = frozenset({"ok", "failed", "cancelled", "timeout", "unknown"}) + + +def slice_string_value(line_bytes: bytes, key: bytes) -> str | None: + """Return the string value following `key`, up to the next quote. + + `key` includes the value's opening quote (for example b'"qid":"'). + Returns None if the key or its closing quote is absent. + """ + start = line_bytes.find(key) + if start == -1: + return None + start += len(key) + end = line_bytes.find(b'"', start) + if end == -1: + return None + return line_bytes[start:end].decode() + + +def parse_line( + line_bytes: bytes, +) -> tuple[int, str, str, str | None] | None: + """Byte-slice one log line into (ts_ms, event, qid, status). + + The fast path: no json.loads, the query blob is never scanned. + `status` is None on start lines. Returns None on any sanity-check + miss so the caller can fall back to parse_line_fallback. Never + raises. + """ + if not line_bytes.startswith(TS_PREFIX): + return None + comma = line_bytes.find(b",", len(TS_PREFIX)) + if comma == -1: + return None + try: + ts_ms = int(line_bytes[len(TS_PREFIX):comma]) + except ValueError: + return None + + event = slice_string_value(line_bytes, EVENT_KEY) + if event not in ("start", "end"): + return None + + qid = slice_string_value(line_bytes, QID_KEY) + if qid is None: + return None + + if event == "start": + return (ts_ms, event, qid, None) + + status = slice_string_value(line_bytes, STATUS_KEY) + if status not in STATUS_SET: + return None + return (ts_ms, event, qid, status) + + +def parse_line_fallback( + line_bytes: bytes, +) -> tuple[int, str, str, str | None] | None: + """Real json.loads for a line parse_line rejected. + + Same 4-tuple shape as parse_line so callers treat both paths + alike. Returns None on a malformed line or a contract miss; never + raises. + """ + try: + obj = json.loads(line_bytes) + except (ValueError, TypeError): + return None + if not isinstance(obj, dict): + return None + + ts_ms = obj.get("ts-ms") + event = obj.get("event") + qid = obj.get("qid") + if not isinstance(ts_ms, int): + return None + if event not in ("start", "end"): + return None + if not isinstance(qid, str): + return None + + if event == "start": + return (ts_ms, event, qid, None) + + status = obj.get("status") + if status not in STATUS_SET: + return None + return (ts_ms, event, qid, status) + diff --git a/test/qlever/test_log_reader.py b/test/qlever/test_log_reader.py new file mode 100644 index 00000000..46825b5e --- /dev/null +++ b/test/qlever/test_log_reader.py @@ -0,0 +1,91 @@ +import pytest + +from qlever.monitor.log_reader import parse_line, parse_line_fallback + +START = ( + b'{"ts-ms":1716000000000,"event":"start","qid":"q-8a4f",' + b'"client-ip":"1.2.3.4","query":"SELECT * WHERE { ?s ?p ?o }"}' +) +END = b'{"ts-ms":1716000000050,"event":"end","qid":"q-8a4f","status":"ok"}' + +VALID_STATUSES = ["ok", "failed", "cancelled", "timeout", "unknown"] + + +def test_parse_line_start_has_no_status_and_ignores_query(): + assert parse_line(START) == (1716000000000, "start", "q-8a4f", None) + + +@pytest.mark.parametrize("status", VALID_STATUSES) +def test_parse_line_end_carries_each_valid_status(status): + line = b'{"ts-ms":1,"event":"end","qid":"q1","status":"%s"}' % ( + status.encode() + ) + assert parse_line(line) == (1, "end", "q1", status) + + +def test_parse_line_trailing_newline_tolerated(): + assert parse_line(END + b"\n") == (1716000000050, "end", "q-8a4f", "ok") + + +def test_parse_line_escaped_quote_in_query_does_not_break_qid(): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"client-ip":"::1","query":"FILTER(?x = \\"a\\")"}' + ) + assert parse_line(line) == (1, "start", "q1", None) + + +def test_parse_line_wrong_first_key_is_a_miss(): + assert parse_line(b'{"event":"end","ts-ms":1,"qid":"q1"}') is None + + +def test_parse_line_non_integer_ts_is_a_miss(): + assert parse_line(b'{"ts-ms":1.5,"event":"end","qid":"q1"}') is None + + +def test_parse_line_unknown_event_is_a_miss(): + assert parse_line(b'{"ts-ms":1,"event":"ping","qid":"q1"}') is None + + +def test_parse_line_end_status_outside_closed_set_is_a_miss(): + line = b'{"ts-ms":1,"event":"end","qid":"q1","status":"weird"}' + assert parse_line(line) is None + + +def test_parse_line_missing_qid_is_a_miss(): + assert parse_line(b'{"ts-ms":1,"event":"end","status":"ok"}') is None + + +def test_fallback_recovers_a_line_the_fast_path_rejects(): + # Leading space after `{` defeats the byte prefix but is valid + # JSON, so the fallback still extracts the fields. + line = b'{ "ts-ms":1716000000000,"event":"end","qid":"q1","status":"ok"}' + assert parse_line(line) is None + assert parse_line_fallback(line) == (1716000000000, "end", "q1", "ok") + + +def test_fallback_start_returns_none_status(): + assert parse_line_fallback(START) == ( + 1716000000000, + "start", + "q-8a4f", + None, + ) + + +def test_fallback_malformed_json_returns_none(): + assert parse_line_fallback(b'{"ts-ms":1,') is None + + +def test_fallback_non_object_json_returns_none(): + assert parse_line_fallback(b"[1,2,3]") is None + + +def test_fallback_non_integer_ts_returns_none(): + line = b'{"ts-ms":1.0,"event":"end","qid":"q1","status":"ok"}' + assert parse_line_fallback(line) is None + + +def test_fallback_status_outside_closed_set_returns_none(): + line = b'{"ts-ms":1,"event":"end","qid":"q1","status":"weird"}' + assert parse_line_fallback(line) is None From 5d365bfeea591f8481ccb591879105f3916b8b4a Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 19 May 2026 19:10:21 +0200 Subject: [PATCH 05/26] Add functions to find the file offset for a given timestamp and scan jsonl lines between 2 offsets --- src/qlever/monitor/log_reader.py | 156 +++++++++++++++++++++--- test/qlever/test_log_reader.py | 200 ++++++++++++++++++++++++++++++- 2 files changed, 339 insertions(+), 17 deletions(-) diff --git a/src/qlever/monitor/log_reader.py b/src/qlever/monitor/log_reader.py index 6e596612..95a89ba4 100644 --- a/src/qlever/monitor/log_reader.py +++ b/src/qlever/monitor/log_reader.py @@ -1,13 +1,8 @@ -"""Pure primitives for reading the query metrics log. - -The producer escapes every quote in user content, so a bare `"` is -always structural. That is what lets us slice scalars out of a line by -fixed key order instead of paying json.loads on the bulk path. The keys -below assume the producer's fixed field order; a contract change is -caught by the startup self-check, not here. -""" +"""Pure primitives for reading the query metrics log""" import json +from collections.abc import Iterator +from typing import BinaryIO TS_PREFIX = b'{"ts-ms":' EVENT_KEY = b'"event":"' @@ -33,6 +28,25 @@ def slice_string_value(line_bytes: bytes, key: bytes) -> str | None: return line_bytes[start:end].decode() +def peek_ts_ms(line_bytes: bytes) -> int | None: + """Read only the leading ts-ms timestamp from a log line. + + The producer always writes ts-ms first, so the integer sits between + the fixed prefix and the next comma. Lets navigation compare + timestamps without parsing the rest of the line. Returns None if the + prefix is absent or the value is not an integer. + """ + if not line_bytes.startswith(TS_PREFIX): + return None + comma = line_bytes.find(b",", len(TS_PREFIX)) + if comma == -1: + return None + try: + return int(line_bytes[len(TS_PREFIX) : comma]) + except ValueError: + return None + + def parse_line( line_bytes: bytes, ) -> tuple[int, str, str, str | None] | None: @@ -43,14 +57,8 @@ def parse_line( miss so the caller can fall back to parse_line_fallback. Never raises. """ - if not line_bytes.startswith(TS_PREFIX): - return None - comma = line_bytes.find(b",", len(TS_PREFIX)) - if comma == -1: - return None - try: - ts_ms = int(line_bytes[len(TS_PREFIX):comma]) - except ValueError: + ts_ms = peek_ts_ms(line_bytes) + if ts_ms is None: return None event = slice_string_value(line_bytes, EVENT_KEY) @@ -104,3 +112,119 @@ def parse_line_fallback( return None return (ts_ms, event, qid, status) + +def next_whole_line( + log_stream: BinaryIO, probe: int, file_size: int +) -> tuple[int, int] | None: + """Find the first complete line starting at or after `probe`. + + A raw probe usually lands mid-line, so we align to the start of the + next whole line. Returns (line_start, ts_ms), or None when `probe` + falls in the file's trailing partial line or past EOF. + """ + if probe >= file_size: + return None + if probe == 0: + line_start = 0 + log_stream.seek(0) + else: + log_stream.seek(probe) + # Discard the partial line the probe landed in; the next read + # then starts on a whole line. + log_stream.readline() + line_start = log_stream.tell() + + line = log_stream.readline() + # An empty read or a line with no terminating newline is the file's + # trailing partial line, not a complete record. + if not line.endswith(b"\n"): + return None + ts_ms = peek_ts_ms(line) + if ts_ms is None: + return None + return (line_start, ts_ms) + + +GALLOP_START = 128 * 1024 + + +def offset_for_ts(log_stream: BinaryIO, target_ms: int, file_size: int) -> int: + """Find where to start reading so a forward read sees every line + at or after target_ms. + + The log is only roughly time-ordered, so this aims a little early + on purpose rather than risk skipping lines; the caller ignores the + few extra older lines. Returns 0 if target_ms is at or before the + first line, and a spot near the end if it is past the last line. + """ + first = next_whole_line(log_stream, 0, file_size) + if first is None: + return 0 + first_start, first_ts = first + if first_ts >= target_ms: + return 0 + + # Gallop backward from the end, doubling the step until a probed + # line is old enough (ts <= target). before_target brackets the + # search from the old side, after_target from the new side. + before_target = first_start + after_target = file_size + step = GALLOP_START + while True: + probe = file_size - step + if probe <= before_target: + break + found = next_whole_line(log_stream, probe, file_size) + if found is None: + step *= 2 + continue + start, ts = found + if ts <= target_ms: + before_target = start + break + after_target = start + step *= 2 + + # Narrow the bracket. Bias every step toward the older side so the + # result never sits past the first matching line. + while after_target - before_target > 1: + mid = (before_target + after_target) // 2 + found = next_whole_line(log_stream, mid, file_size) + if found is None: + after_target = mid + continue + start, ts = found + # The probe sat inside one long line spanning the bracket edge, + # so it gave no usable interior line. Tighten the new side to + # keep the search making progress. + if start <= before_target or start >= after_target: + after_target = mid + continue + if ts < target_ms: + before_target = start + else: + after_target = start + return before_target + + +def scan_range( + log_stream: BinaryIO, lo_offset: int, hi_bound: int +) -> Iterator: + """Yield (parsed, line_offset) for whole lines in [lo_offset, hi_bound]. + + parsed is the (ts_ms, event, qid, status) tuple. lo_offset must be + line-aligned; it always comes from offset_for_ts. The line straddling + hi_bound is included; a trailing line without a newline is left for a + later read. Malformed lines are skipped. + """ + log_stream.seek(lo_offset) + offset = lo_offset + for line in log_stream: + if offset > hi_bound: + return + if not line.endswith(b"\n"): + return + parsed = parse_line(line) or parse_line_fallback(line) + if parsed is not None: + yield (parsed, offset) + offset += len(line) diff --git a/test/qlever/test_log_reader.py b/test/qlever/test_log_reader.py index 46825b5e..acfaec3c 100644 --- a/test/qlever/test_log_reader.py +++ b/test/qlever/test_log_reader.py @@ -1,6 +1,14 @@ import pytest -from qlever.monitor.log_reader import parse_line, parse_line_fallback +from qlever.monitor import log_reader +from qlever.monitor.log_reader import ( + next_whole_line, + offset_for_ts, + parse_line, + parse_line_fallback, + peek_ts_ms, + scan_range, +) START = ( b'{"ts-ms":1716000000000,"event":"start","qid":"q-8a4f",' @@ -89,3 +97,193 @@ def test_fallback_non_integer_ts_returns_none(): def test_fallback_status_outside_closed_set_returns_none(): line = b'{"ts-ms":1,"event":"end","qid":"q1","status":"weird"}' assert parse_line_fallback(line) is None + + +FIRST_LINE = b'{"ts-ms":1000,"event":"start","qid":"q1","query":"SELECT 1"}\n' +SECOND_LINE = b'{"ts-ms":2000,"event":"end","qid":"q1","status":"ok"}\n' +THIRD_LINE = b'{"ts-ms":3000,"event":"start","qid":"q2","query":"SELECT 2"}\n' +SECOND_OFFSET = len(FIRST_LINE) +THIRD_OFFSET = len(FIRST_LINE) + len(SECOND_LINE) + + +@pytest.fixture +def open_log(tmp_path): + """Write bytes to a log file and hand back an open (handle, size). + + Closes every handle it produced when the test finishes. + """ + handles = [] + + def make(data): + path = tmp_path / "q.log" + path.write_bytes(data) + handle = path.open("rb") + handles.append(handle) + return handle, len(data) + + yield make + for handle in handles: + handle.close() + + +def test_next_whole_line_probe_zero_returns_first_line(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE + THIRD_LINE) + assert next_whole_line(log, 0, size) == (0, 1000) + + +def test_next_whole_line_mid_line_aligns_to_following_line(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE + THIRD_LINE) + # A probe inside the first line lands on the second. + assert next_whole_line(log, 10, size) == (SECOND_OFFSET, 2000) + assert next_whole_line(log, SECOND_OFFSET - 1, size) == ( + SECOND_OFFSET, + 2000, + ) + + +def test_next_whole_line_on_a_boundary_advances_to_the_next_line(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE + THIRD_LINE) + # probe > 0 always discards one line, so a probe on the second + # line's start returns the third. + assert next_whole_line(log, SECOND_OFFSET, size) == (THIRD_OFFSET, 3000) + + +def test_next_whole_line_past_eof_returns_none(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE) + assert next_whole_line(log, size, size) is None + + +def test_next_whole_line_trailing_partial_line_returns_none(open_log): + partial = b'{"ts-ms":4000,"event":"end","qid":"q3"' + log, size = open_log(FIRST_LINE + partial) + # The only line after the probe has no newline, so it is incomplete. + assert next_whole_line(log, 5, size) is None + + +def build_log(timestamps): + """Bytes for an ascending-ts log plus each line's start offset.""" + data = bytearray() + offsets = [] + for i, ts in enumerate(timestamps): + offsets.append(len(data)) + data += b'{"ts-ms":%d,"event":"end","qid":"q%d","status":"ok"}\n' % ( + ts, + i, + ) + return bytes(data), offsets + + +def first_ts_at_or_after(log, offset, target): + """ts of the first line at/after offset, or None if every line older.""" + log.seek(offset) + for line in log: + ts = peek_ts_ms(line) + if ts >= target: + return ts + return None + + +def test_offset_for_ts_empty_file_returns_zero(open_log): + log, size = open_log(b"") + assert offset_for_ts(log, 100, size) == 0 + + +def test_offset_for_ts_target_before_first_line_returns_zero(open_log): + data, _ = build_log([1000, 1010, 1020]) + log, size = open_log(data) + assert offset_for_ts(log, 500, size) == 0 + + +def test_offset_for_ts_target_equal_to_first_line_returns_zero(open_log): + data, _ = build_log([1000, 1010, 1020]) + log, size = open_log(data) + # first_ts >= target means every line qualifies; start at 0. + assert offset_for_ts(log, 1000, size) == 0 + + +@pytest.mark.parametrize("target", [1015, 1020, 1099, 1500, 1900]) +def test_offset_for_ts_lands_at_or_before_the_boundary( + open_log, monkeypatch, target +): + # Tiny gallop step so the backward gallop actually iterates here. + monkeypatch.setattr(log_reader, "GALLOP_START", 16) + timestamps = list(range(1000, 2000, 10)) + data, offsets = build_log(timestamps) + log, size = open_log(data) + + result = offset_for_ts(log, target, size) + + # Never overshoots: the returned line is not newer than target, so + # no line with ts >= target was skipped. + boundary = next(o for o, ts in zip(offsets, timestamps) if ts >= target) + assert result <= boundary + assert first_ts_at_or_after(log, result, target) == next( + ts for ts in timestamps if ts >= target + ) + + +def test_offset_for_ts_target_past_last_line_yields_empty_scan( + open_log, monkeypatch +): + monkeypatch.setattr(log_reader, "GALLOP_START", 16) + data, _ = build_log(list(range(1000, 2000, 10))) + log, size = open_log(data) + + result = offset_for_ts(log, 9999, size) + + # Near EOF, not necessarily file_size, but a forward scan from it + # finds nothing at or after the target: the same empty result. + assert first_ts_at_or_after(log, result, 9999) is None + + +def test_scan_range_yields_each_whole_line_with_its_offset(open_log): + data, offsets = build_log([1000, 1010, 1020]) + log, size = open_log(data) + assert list(scan_range(log, 0, size)) == [ + ((1000, "end", "q0", "ok"), offsets[0]), + ((1010, "end", "q1", "ok"), offsets[1]), + ((1020, "end", "q2", "ok"), offsets[2]), + ] + + +def test_scan_range_includes_the_line_straddling_hi_bound(open_log): + data, offsets = build_log([1000, 1010, 1020]) + log, size = open_log(data) + # hi_bound falls inside the second line. The second line starts + # before it so it is still emitted; the third starts past it. + result = list(scan_range(log, 0, offsets[1] + 3)) + assert [ts for (ts, _, _, _), _ in result] == [1000, 1010] + + +def test_scan_range_stops_before_a_trailing_partial_line(open_log): + data, _ = build_log([1000]) + log, size = open_log(data + b'{"ts-ms":2000,"event":"end","qid":"q9"') + assert list(scan_range(log, 0, size)) == [ + ((1000, "end", "q0", "ok"), 0), + ] + + +def test_scan_range_skips_a_malformed_line_and_continues(open_log): + good = b'{"ts-ms":1000,"event":"end","qid":"q0","status":"ok"}\n' + junk = b"not json at all\n" + tail = b'{"ts-ms":1020,"event":"end","qid":"q2","status":"ok"}\n' + log, size = open_log(good + junk + tail) + assert list(scan_range(log, 0, size)) == [ + ((1000, "end", "q0", "ok"), 0), + ((1020, "end", "q2", "ok"), len(good) + len(junk)), + ] + + +def test_scan_range_recovers_a_line_via_the_json_fallback(open_log): + # Leading space defeats the fast byte path but is valid JSON. + line = b'{ "ts-ms":2000,"event":"end","qid":"q1","status":"ok"}\n' + log, size = open_log(line) + assert list(scan_range(log, 0, size)) == [ + ((2000, "end", "q1", "ok"), 0), + ] + + +def test_scan_range_from_eof_yields_nothing(open_log): + data, _ = build_log([1000, 1010]) + log, size = open_log(data) + assert list(scan_range(log, size, size)) == [] From 15bd40216cd8e83c196918340c6bcc87648710ed Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Wed, 20 May 2026 13:40:13 +0200 Subject: [PATCH 06/26] Add a way to get active and completed queries from the log file --- src/qlever/commands/monitor_queries.py | 32 +++-- src/qlever/monitor/app.py | 7 +- src/qlever/monitor/log_reader.py | 75 +++++++++++- test/qlever/test_log_reader.py | 160 +++++++++++++++++++++++++ 4 files changed, 251 insertions(+), 23 deletions(-) diff --git a/src/qlever/commands/monitor_queries.py b/src/qlever/commands/monitor_queries.py index c92ae779..9820583d 100644 --- a/src/qlever/commands/monitor_queries.py +++ b/src/qlever/commands/monitor_queries.py @@ -40,20 +40,15 @@ def additional_arguments(self, subparser) -> None: subparser.add_argument( "--qlever-server-log", type=Path, - help="The `qlever-server` log file (default = {name}.server-log.txt)", + help="The `qlever-server` log file (default = {name}.query_metrics.log)", ) subparser.add_argument( - "--warn-after", + "--slow-threshold", type=int, default=None, - help="Duration in seconds after which an active query is logged" - " as slow (default = server timeout - 10s)", - ) - subparser.add_argument( - "--warning-log", - type=Path, - help="File to append slow-query warnings to" - " (default = {name}.slow-queries.tsv)", + help="Duration in seconds above which a query (active or" + " completed) is counted as slow in the metrics" + " (default = server timeout - 10s)", ) subparser.add_argument( "--screen_refresh_s", @@ -67,7 +62,7 @@ def additional_arguments(self, subparser) -> None: def execute(self, args) -> bool: if not args.qlever_server_log: - args.qlever_server_log = Path.cwd() / f"{args.name}.server-log.txt" + args.qlever_server_log = Path.cwd() / f"{args.name}.query_metrics.log" show_msg = ( f"Reading server logs from {args.qlever_server_log} to display the " "currently active queries on the server" @@ -76,19 +71,21 @@ def execute(self, args) -> bool: if args.show: return True + if not args.qlever_server_log.is_file(): + log.error(f"Log file not found: {args.qlever_server_log}") + return False + timeout_s = 30 - if args.warn_after is None: + if args.slow_threshold is None: try: timeout_s = int(args.timeout.rstrip("s")) except ValueError: log.error( f"Could not parse server timeout {args.timeout!r};" - " pass --warn-after explicitly" + " pass --slow-threshold explicitly" ) return False - args.warn_after = max(1, timeout_s - 10) - if args.warning_log is None: - args.warning_log = Path.cwd() / f"{args.name}.slow-queries.tsv" + args.slow_threshold = max(1, timeout_s - 10) repaint_interval = max(0.5, args.screen_refresh_s) sparql_endpoint = ( @@ -101,8 +98,7 @@ def execute(self, args) -> bool: log_file=args.qlever_server_log, sparql_endpoint=sparql_endpoint, timeout=timeout_s, - warn_after=args.warn_after, - warning_log=args.warning_log, + slow_threshold=args.slow_threshold, repaint_interval=repaint_interval, system=args.system, ).run() diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 6082a540..7d20d800 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -53,8 +53,7 @@ def __init__( log_file: Path, sparql_endpoint: str, timeout: int, - warn_after: int, - warning_log: Path, + slow_threshold: int, repaint_interval: float = 0.5, system: str = "docker", ) -> None: @@ -62,8 +61,8 @@ def __init__( self.log_file = log_file self.sparql_endpoint = sparql_endpoint self.timeout = timeout - self.warn_after = warn_after - self.warning_log = warning_log + self.window_pad_ms = 2000 * timeout + self.slow_threshold = slow_threshold self.repaint_interval = repaint_interval self.system = system diff --git a/src/qlever/monitor/log_reader.py b/src/qlever/monitor/log_reader.py index 95a89ba4..2595c406 100644 --- a/src/qlever/monitor/log_reader.py +++ b/src/qlever/monitor/log_reader.py @@ -2,7 +2,7 @@ import json from collections.abc import Iterator -from typing import BinaryIO +from typing import BinaryIO, NamedTuple TS_PREFIX = b'{"ts-ms":' EVENT_KEY = b'"event":"' @@ -12,6 +12,20 @@ STATUS_SET = frozenset({"ok", "failed", "cancelled", "timeout", "unknown"}) +class CompletedQuery(NamedTuple): + """One start event paired with its matching end event. + + start_line_offset is the byte offset of the start line in the log, + kept so load_sparql_at can fetch the SPARQL text later. + """ + + start_ms: int + end_ms: int + duration_ms: int + status: str + start_line_offset: int + + def slice_string_value(line_bytes: bytes, key: bytes) -> str | None: """Return the string value following `key`, up to the next quote. @@ -228,3 +242,62 @@ def scan_range( if parsed is not None: yield (parsed, offset) offset += len(line) + + +def pair_start_end_events( + events: Iterator, +) -> tuple[list[CompletedQuery], dict[str, tuple[int, int]]]: + """Pair start and end events from a scan into completed queries. + + Walks the events once. Each end pops its matching start by qid into + completed_queries; whatever remains unmatched is still_open. Unmatched + ends are dropped (their start was outside the scanned range). + + events: yields ((ts_ms, event, qid, status), line_offset) from + scan_range. + + Returns (completed_queries, still_open). still_open maps qid to + (start_ms, start_line_offset) for queries with no end event seen yet. + """ + completed_queries = [] + still_open = {} + for (ts_ms, event, qid, status), line_offset in events: + if event == "start": + still_open[qid] = (ts_ms, line_offset) + continue + # event == "end" + matched_start = still_open.pop(qid, None) + if matched_start is None: + continue + start_ms, start_line_offset = matched_start + completed_queries.append( + CompletedQuery( + start_ms=start_ms, + end_ms=ts_ms, + duration_ms=ts_ms - start_ms, + status=status, + start_line_offset=start_line_offset, + ) + ) + return (completed_queries, still_open) + + +def load_sparql_at(log_stream: BinaryIO, line_offset: int) -> str: + """Return the SPARQL text of the start line at line_offset. + + Returns "" if the line is malformed or has no query field. Never + raises. Used only for the handful of rows that need their SPARQL + text shown, never on a bulk path. + """ + log_stream.seek(line_offset) + line = log_stream.readline() + try: + obj = json.loads(line) + except (ValueError, TypeError): + return "" + if not isinstance(obj, dict): + return "" + query = obj.get("query") + if not isinstance(query, str): + return "" + return query diff --git a/test/qlever/test_log_reader.py b/test/qlever/test_log_reader.py index acfaec3c..9679c9a7 100644 --- a/test/qlever/test_log_reader.py +++ b/test/qlever/test_log_reader.py @@ -2,8 +2,11 @@ from qlever.monitor import log_reader from qlever.monitor.log_reader import ( + CompletedQuery, + load_sparql_at, next_whole_line, offset_for_ts, + pair_start_end_events, parse_line, parse_line_fallback, peek_ts_ms, @@ -287,3 +290,160 @@ def test_scan_range_from_eof_yields_nothing(open_log): data, _ = build_log([1000, 1010]) log, size = open_log(data) assert list(scan_range(log, size, size)) == [] + + +def test_pair_start_end_events_empty_input_yields_empty_outputs(): + completed, still_open = pair_start_end_events(iter([])) + assert completed == [] + assert still_open == {} + + +def test_pair_start_end_events_clean_pair(): + events = [ + ((1000, "start", "q1", None), 0), + ((1050, "end", "q1", "ok"), 100), + ] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [ + CompletedQuery( + start_ms=1000, + end_ms=1050, + duration_ms=50, + status="ok", + start_line_offset=0, + ) + ] + assert still_open == {} + + +def test_pair_start_end_events_start_without_end_stays_open(): + events = [((1000, "start", "q1", None), 42)] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [] + assert still_open == {"q1": (1000, 42)} + + +def test_pair_start_end_events_end_without_start_is_dropped(): + events = [((1050, "end", "q1", "ok"), 0)] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [] + assert still_open == {} + + +def test_pair_start_end_events_interleaved_queries_pair_in_end_order(): + events = [ + ((1000, "start", "qA", None), 0), + ((1010, "start", "qB", None), 100), + ((1020, "end", "qB", "ok"), 200), + ((1030, "end", "qA", "failed"), 300), + ] + completed, still_open = pair_start_end_events(iter(events)) + assert completed == [ + CompletedQuery( + start_ms=1010, + end_ms=1020, + duration_ms=10, + status="ok", + start_line_offset=100, + ), + CompletedQuery( + start_ms=1000, + end_ms=1030, + duration_ms=30, + status="failed", + start_line_offset=0, + ), + ] + assert still_open == {} + + +def test_pair_start_end_events_pairs_by_qid_not_file_order(): + # qA's end is written before qB's start (timestamps out of order), + # but pairing is by qid so each query gets its own duration. + events = [ + ((1000, "start", "qA", None), 0), + ((1100, "start", "qB", None), 100), + ((1050, "end", "qA", "ok"), 200), + ((1150, "end", "qB", "ok"), 300), + ] + completed, _ = pair_start_end_events(iter(events)) + assert completed == [ + CompletedQuery( + start_ms=1000, + end_ms=1050, + duration_ms=50, + status="ok", + start_line_offset=0, + ), + CompletedQuery( + start_ms=1100, + end_ms=1150, + duration_ms=50, + status="ok", + start_line_offset=100, + ), + ] + + +@pytest.mark.parametrize("status", VALID_STATUSES) +def test_pair_start_end_events_status_flows_through(status): + events = [ + ((1000, "start", "q1", None), 0), + ((1050, "end", "q1", status), 100), + ] + completed, _ = pair_start_end_events(iter(events)) + assert completed[0].status == status + + +def test_load_sparql_at_returns_query_text(open_log): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"client-ip":"1.2.3.4","query":"SELECT * WHERE { ?s ?p ?o }"}\n' + ) + log, _ = open_log(line) + assert load_sparql_at(log, 0) == "SELECT * WHERE { ?s ?p ?o }" + + +def test_load_sparql_at_handles_escaped_quotes_and_backslashes(open_log): + # The JSON value encodes a literal " and a literal \ in the query. + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\\\b\\" }"}\n' + ) + log, _ = open_log(line) + assert load_sparql_at(log, 0) == 'SELECT ?x WHERE { ?x ?p "a\\b" }' + + +def test_load_sparql_at_handles_unicode(open_log): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"query":"SELECT ?s WHERE { ?s ?p \xe2\x98\x83 }"}\n' + ) + log, _ = open_log(line) + assert load_sparql_at(log, 0) == "SELECT ?s WHERE { ?s ?p ☃ }" + + +def test_load_sparql_at_end_line_has_no_query_returns_empty(open_log): + log, _ = open_log(END + b"\n") + assert load_sparql_at(log, 0) == "" + + +def test_load_sparql_at_malformed_json_returns_empty(open_log): + log, _ = open_log(b"this is not json\n") + assert load_sparql_at(log, 0) == "" + + +def test_load_sparql_at_top_level_non_object_returns_empty(open_log): + log, _ = open_log(b"42\n") + assert load_sparql_at(log, 0) == "" + + +def test_load_sparql_at_reads_the_line_at_the_given_offset(open_log): + first = ( + b'{"ts-ms":1,"event":"start","qid":"q1","query":"FIRST"}\n' + ) + second = ( + b'{"ts-ms":2,"event":"start","qid":"q2","query":"SECOND"}\n' + ) + log, _ = open_log(first + second) + assert load_sparql_at(log, len(first)) == "SECOND" From 05092c67cad3e9066456d6887e127083f370b977 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Fri, 22 May 2026 10:54:02 +0200 Subject: [PATCH 07/26] First working version of LiveScreen --- src/qlever/monitor/app.py | 41 ++- src/qlever/monitor/historic_stubs.py | 1 + src/qlever/monitor/live_data.py | 52 +++ src/qlever/monitor/live_engine.py | 357 ++++++++++++++++++ src/qlever/monitor/live_stubs.py | 3 + src/qlever/monitor/log_reader.py | 57 ++- src/qlever/monitor/models.py | 2 + src/qlever/monitor/views/live.py | 81 ++++- src/qlever/monitor/widgets/metrics_row.py | 13 +- src/qlever/monitor/widgets/query_table.py | 24 ++ src/qlever/monitor/widgets/status_row.py | 25 +- src/qlever/util.py | 15 +- test/qlever/conftest.py | 31 ++ test/qlever/test_live_engine.py | 424 ++++++++++++++++++++++ test/qlever/test_log_reader.py | 72 +++- 15 files changed, 1146 insertions(+), 52 deletions(-) create mode 100644 src/qlever/monitor/live_data.py create mode 100644 src/qlever/monitor/live_engine.py create mode 100644 test/qlever/test_live_engine.py diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 7d20d800..f80118cc 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -1,12 +1,20 @@ from __future__ import annotations +import time from pathlib import Path from textual import work from textual.app import App from textual.binding import Binding from textual.css.query import NoMatches - +from textual.worker import get_current_worker + +from qlever.monitor.live_engine import ( + LiveLogReader, + LiveState, + find_active_queries, + load_completed_history, +) from qlever.monitor.util import clipboard_install_hint, copy_text from qlever.monitor.views.historic import HistoricScreen from qlever.monitor.views.live import LiveScreen @@ -65,11 +73,40 @@ def __init__( self.slow_threshold = slow_threshold self.repaint_interval = repaint_interval self.system = system + self.server_status = "checking" + self.live_state = LiveState() def on_mount(self) -> None: - """Open the Live screen on startup.""" + """Boot the live engine, then open the Live screen.""" + state, cut_offset, _ = find_active_queries( + self.log_file, self.window_pad_ms + ) + self.live_state = state + self.cut_offset = cut_offset + self.tail_live_log() + self.load_metrics_history() self.push_screen("live") + @work(thread=True, exclusive=True) + def tail_live_log(self) -> None: + """Poll the log forever, dispatching new events into LiveState.""" + tailer = LiveLogReader( + self.log_file, + self.live_state, + self.cut_offset, + self.window_pad_ms, + ) + worker = get_current_worker() + with self.log_file.open("rb") as log_stream: + while not worker.is_cancelled: + tailer.poll(log_stream) + time.sleep(tailer.poll_interval) + + @work(thread=True) + def load_metrics_history(self) -> None: + """Prepend the hour before cut_offset into completed history so metrics aren't empty at boot.""" + load_completed_history(self.log_file, self.live_state, self.cut_offset) + def action_swap_screen(self) -> None: """Toggle between Live and Historic (bound to Tab on each screen).""" target = "historic" if isinstance(self.screen, LiveScreen) else "live" diff --git a/src/qlever/monitor/historic_stubs.py b/src/qlever/monitor/historic_stubs.py index 78150f2c..85237b42 100644 --- a/src/qlever/monitor/historic_stubs.py +++ b/src/qlever/monitor/historic_stubs.py @@ -113,6 +113,7 @@ def get_historic_metrics() -> MetricsCounts: failed=11, timeout=2, cancelled=9, + unknown=0, p50=120, p95=1840, slow=4, diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py new file mode 100644 index 00000000..411d5e7e --- /dev/null +++ b/src/qlever/monitor/live_data.py @@ -0,0 +1,52 @@ +"""Adapters that translate live engine state into UI models. + +The Live screen calls these getters; they read shared state under +the engine's lock when iteration is involved and return frozen +model dataclasses the widgets consume. No Textual imports, no +log I/O. +""" + +from qlever.monitor.live_engine import ( + LiveState, + MetricsSnapshot, + compute_live_metrics, +) +from qlever.monitor.models import LiveQueryRow, LiveSubtitle, MetricsCounts + +LIVE_METRIC_LABELS = ["last 5m", "last 15m", "last 1h"] +EMPTY_FIELDS = dict.fromkeys(MetricsSnapshot._fields) + + +def get_live_subtitle( + state: LiveState, server_status: str, endpoint: str +) -> LiveSubtitle: + """Subtitle line: server-reachable status plus current active count.""" + return LiveSubtitle( + endpoint=endpoint, state=server_status, n_active=len(state.active) + ) + + +def get_live_query_rows(state: LiveState) -> list[LiveQueryRow]: + """Snapshot the active set as a list of UI rows; no sort.""" + with state.lock: + active_snapshot = list(state.active.items()) + return [ + LiveQueryRow(qid=qid, ts_ms=start_ms, sparql=sparql) + for qid, (start_ms, sparql) in active_snapshot + ] + + +def get_live_metrics( + state: LiveState, + slow_threshold_ms: int, + now_ms: int, +) -> list[MetricsCounts]: + """Three rolling-window metric rows for the Live screen.""" + snapshots = compute_live_metrics(state, slow_threshold_ms, now_ms) + return [ + MetricsCounts( + label=label, + **(snap._asdict() if snap is not None else EMPTY_FIELDS), + ) + for label, snap in zip(LIVE_METRIC_LABELS, snapshots) + ] diff --git a/src/qlever/monitor/live_engine.py b/src/qlever/monitor/live_engine.py new file mode 100644 index 00000000..020852ad --- /dev/null +++ b/src/qlever/monitor/live_engine.py @@ -0,0 +1,357 @@ +"""In-memory state for the Live tab. + +Holds the queries currently running on the server and the rolling +history of completed queries that feeds the 5m, 15m, and 1h metric +rows. A single writer thread mutates the state; readers take a short +locked snapshot. Pure data layer: no Textual imports. +""" + +import json +import statistics +import threading +import time +from collections import deque +from collections.abc import Callable +from dataclasses import dataclass, field +from pathlib import Path +from typing import BinaryIO, NamedTuple + +from qlever.monitor.log_reader import ( + STATUS_SET, + CompletedQuery, + load_sparql_at, + offset_for_ts, + pair_start_end_events, + read_last_timestamp, + scan_range, +) + +LIVE_METRIC_WINDOWS_MS = [5 * 60_000, 15 * 60_000, 60 * 60_000] +LIVE_HORIZON_MS = LIVE_METRIC_WINDOWS_MS[2] + + +def current_ms() -> int: + """Wall-clock now in epoch milliseconds.""" + return time.time_ns() // 1_000_000 + + +class MetricsSnapshot(NamedTuple): + """Counts and percentiles for completed queries in one time window. + + The status counts sum to seen by construction: + seen == ok + failed + timeout + cancelled + unknown + slow is independent: a query can be ok and slow. p50 and p95 are + duration_ms values across all completions in the window, None when + the window is empty. + """ + + seen: int + ok: int + failed: int + timeout: int + cancelled: int + unknown: int + slow: int + p50: int | None + p95: int | None + + +def percentiles(durations_ms: list[int]) -> tuple[int | None, int | None]: + """Return (p50, p95) over durations_ms, or (None, None) if empty.""" + if not durations_ms: + return (None, None) + if len(durations_ms) == 1: + only = durations_ms[0] + return (only, only) + cuts = statistics.quantiles(durations_ms, n=100) + return (round(cuts[49]), round(cuts[94])) + + +def build_snapshot( + counts: dict[str, int], + slow: int, + durations_ms: list[int], +) -> MetricsSnapshot: + """Wrap one set of accumulators into a MetricsSnapshot.""" + p50, p95 = percentiles(durations_ms) + return MetricsSnapshot( + seen=len(durations_ms), + ok=counts["ok"], + failed=counts["failed"], + timeout=counts["timeout"], + cancelled=counts["cancelled"], + unknown=counts["unknown"], + slow=slow, + p50=p50, + p95=p95, + ) + + +class CompletedQueries: + """Rolling 1h history of completed queries. + + Storage is a deque ordered by insertion, which is the tailer's + forward read order. Wall-clock is monotonic in steady state so + older entries sit at the left and trimming is one popleft loop. + """ + + def __init__(self) -> None: + self.entries = deque() + + def add(self, query: CompletedQuery) -> None: + """Append a completed query to the history. + + Caller writes in tailer order, which is wall-clock order in + steady state; entries remain sorted by end_ms for trimming. + """ + self.entries.append(query) + + def drop_older_than(self, cutoff_ms: int) -> None: + """Discard completions that ended before cutoff_ms. + + Called on each tailer tick with cutoff_ms = now_ms - 1h so the + history stays bounded even during quiet periods. + """ + while self.entries and self.entries[0].end_ms < cutoff_ms: + self.entries.popleft() + + def metrics_for_windows( + self, + now_ms: int, + windows_ms: list[int], + slow_threshold_ms: int, + data_start_ms: int, + ) -> list[MetricsSnapshot | None]: + """Compute one snapshot per window in a single deque walk. + + data_start_ms is the oldest timestamp the caller has loaded + complete data for. A window whose cutoff falls before that gets + None instead of a snapshot, so the widget can render the row as + not-yet-ready. Returns snapshots in the same order as windows_ms. + """ + cutoffs = [now_ms - w for w in windows_ms] + counts = [ + {"ok": 0, "failed": 0, "timeout": 0, "cancelled": 0, "unknown": 0} + for _ in windows_ms + ] + slow = [0] * len(windows_ms) + durations = [[] for _ in windows_ms] + + for entry in self.entries: + if entry.end_ms > now_ms: + continue + for i, cutoff in enumerate(cutoffs): + if entry.end_ms >= cutoff: + counts[i][entry.status] += 1 + if entry.duration_ms >= slow_threshold_ms: + slow[i] += 1 + durations[i].append(entry.duration_ms) + + return [ + None + if cutoffs[i] < data_start_ms + else build_snapshot(counts[i], slow[i], durations[i]) + for i in range(len(windows_ms)) + ] + + +@dataclass +class LiveState: + """Shared in-memory state for the Live tab. + + The tailer is the steady-state writer; the boot-time loader prepends + historical completions once. The UI render callback reads under the + same lock and computes metrics on the fly via compute_live_metrics. + """ + + completed: CompletedQueries = field(default_factory=CompletedQueries) + active: dict[str, tuple[int, str]] = field(default_factory=dict) + lock: threading.Lock = field(default_factory=threading.Lock) + + +def compute_live_metrics( + state: LiveState, + slow_threshold_ms: int, + now_ms: int, +) -> list[MetricsSnapshot | None]: + """Compute one snapshot per live window over the completed history. + + Takes a short-locked deque copy so the actual scan and percentile + work runs lock-free while the tailer keeps appending. Windows whose + cutoff predates the oldest entry come back as None so the UI can + render a "not yet covered" sentinel. + """ + with state.lock: + history_snapshot = CompletedQueries() + history_snapshot.entries = deque(state.completed.entries) + + oldest_ms = ( + history_snapshot.entries[0].start_ms + if history_snapshot.entries + else now_ms + ) + return history_snapshot.metrics_for_windows( + now_ms=now_ms, + windows_ms=LIVE_METRIC_WINDOWS_MS, + slow_threshold_ms=slow_threshold_ms, + data_start_ms=oldest_ms, + ) + + +def find_active_queries( + log_path: Path, + window_pad_ms: int, +) -> tuple[LiveState, int, int]: + """Scan the log's tail to determine queries still running. + + Reads [eof_ts - window_pad_ms, EOF], pairs start/end events, and + records the unmatched starts as the active set. The completed + pairs found in this window are discarded; backfill rediscovers + them. Loads SPARQL text for each survivor by stored offset. + + Returns (state, cut_offset, eof_ts). cut_offset is where the tailer + starts reading forward; eof_ts is the timestamp of the log's last + complete line, or 0 when the log holds none. + """ + state = LiveState() + with log_path.open("rb") as log_stream: + file_size = log_path.stat().st_size + eof_ts = read_last_timestamp(log_stream, file_size) + if eof_ts is None: + return (state, file_size, 0) + + lo_offset = offset_for_ts( + log_stream, eof_ts - window_pad_ms, file_size + ) + events = scan_range(log_stream, lo_offset, file_size) + _, still_open = pair_start_end_events(events) + + for qid, (start_ms, start_line_offset) in still_open.items(): + sparql = load_sparql_at(log_stream, start_line_offset) + state.active[qid] = (start_ms, sparql) + + return (state, file_size, eof_ts) + + +def load_completed_history( + log_path: Path, + state: LiveState, + cut_offset: int, + now_ms: Callable[[], int] = current_ms, +) -> None: + """Scan the hour before cut_offset and seed completed history. + + One-shot: callers spawn this on a daemon thread at startup if they + want it off the main loop. After it returns, the tailer is the only + writer of state.completed. + """ + with log_path.open("rb") as log_stream: + oldest_wanted_ms = now_ms() - LIVE_HORIZON_MS + scan_start_offset = offset_for_ts( + log_stream, oldest_wanted_ms, cut_offset + ) + events = scan_range(log_stream, scan_start_offset, cut_offset) + older_completions, _ = pair_start_end_events(events) + + with state.lock: + tailer_entries = state.completed.entries + state.completed.entries = deque(older_completions) + state.completed.entries.extend(tailer_entries) + + +class LiveLogReader: + """Tail cursor and per-poll logic for the live query log. + + Sole writer of LiveState in steady state. Each poll() reads any + whole lines appended since the last cursor, dispatches them into + active / completed under the state lock, and trims stale entries. + The run loop is owned by the caller. + """ + + def __init__( + self, + log_path: Path, + state: LiveState, + cut_offset: int, + window_pad_ms: int, + poll_interval: float = 0.2, + now_ms: Callable[[], int] = current_ms, + ) -> None: + self.log_path = log_path + self.state = state + self.cursor = cut_offset + self.window_pad_ms = window_pad_ms + self.poll_interval = poll_interval + self.now_ms = now_ms + + def poll(self, log_stream: BinaryIO) -> None: + """Process any whole lines appended since the last poll.""" + log_stream.seek(self.cursor) + for line in log_stream: + if not line.endswith(b"\n"): + break + self.cursor += len(line) + self.handle_line(line) + self.evict_stale() + + def evict_stale(self) -> None: + """Drop completions older than 1h and actives older than 2t. + + Active eviction guards against a missing end event: any query + whose start is older than the 2t safety horizon is treated as + gone, since the server contract says no real query lives that + long. + """ + now = self.now_ms() + completed_cutoff = now - LIVE_HORIZON_MS + active_cutoff = now - self.window_pad_ms + with self.state.lock: + self.state.completed.drop_older_than(completed_cutoff) + stale = [ + qid + for qid, (start_ms, _) in self.state.active.items() + if start_ms < active_cutoff + ] + for qid in stale: + del self.state.active[qid] + + def handle_line(self, line: bytes) -> None: + """Parse one whole line and dispatch it into LiveState.""" + try: + obj = json.loads(line) + except (ValueError, TypeError): + return + if not isinstance(obj, dict): + return + ts_ms = obj.get("ts-ms") + event = obj.get("event") + qid = obj.get("qid") + if not isinstance(ts_ms, int) or not isinstance(qid, str): + return + + if event == "start": + sparql = obj.get("query") + if not isinstance(sparql, str): + sparql = "" + with self.state.lock: + self.state.active[qid] = (ts_ms, sparql) + return + + if event == "end": + status = obj.get("status") + if status not in STATUS_SET: + return + with self.state.lock: + start = self.state.active.pop(qid, None) + if start is None: + return + start_ms, _ = start + self.state.completed.add( + CompletedQuery( + start_ms=start_ms, + end_ms=ts_ms, + duration_ms=ts_ms - start_ms, + status=status, + start_line_offset=None, + ) + ) diff --git a/src/qlever/monitor/live_stubs.py b/src/qlever/monitor/live_stubs.py index db135517..a1e0146a 100644 --- a/src/qlever/monitor/live_stubs.py +++ b/src/qlever/monitor/live_stubs.py @@ -25,6 +25,7 @@ def get_live_metrics() -> list[MetricsCounts]: failed=3, timeout=0, cancelled=14, + unknown=0, p50=48, p95=3_900, slow=14, @@ -36,6 +37,7 @@ def get_live_metrics() -> list[MetricsCounts]: failed=11, timeout=2, cancelled=31, + unknown=1, p50=52, p95=4_100, slow=31, @@ -47,6 +49,7 @@ def get_live_metrics() -> list[MetricsCounts]: failed=None, timeout=None, cancelled=None, + unknown=None, p50=None, p95=None, slow=None, diff --git a/src/qlever/monitor/log_reader.py b/src/qlever/monitor/log_reader.py index 2595c406..fd7d8df3 100644 --- a/src/qlever/monitor/log_reader.py +++ b/src/qlever/monitor/log_reader.py @@ -16,14 +16,17 @@ class CompletedQuery(NamedTuple): """One start event paired with its matching end event. start_line_offset is the byte offset of the start line in the log, - kept so load_sparql_at can fetch the SPARQL text later. + kept so load_sparql_at can fetch the SPARQL text later. None when + the pair was built without scanning the file (the live tailer), + since those completions feed metrics only and never need their + SPARQL text re-read. """ start_ms: int end_ms: int duration_ms: int status: str - start_line_offset: int + start_line_offset: int | None def slice_string_value(line_bytes: bytes, key: bytes) -> str | None: @@ -159,6 +162,32 @@ def next_whole_line( return (line_start, ts_ms) +def read_last_timestamp(log_stream: BinaryIO, file_size: int) -> int | None: + """Return the ts_ms of the last complete line, or None. + + Reads a 32KB tail buffer and walks its lines backward, returning + the first ts_ms that parses. A trailing partial line is skipped by + its missing newline; a leading mid-line fragment is skipped because + it does not start with the ts_ms prefix. Doubles the buffer and + retries if nothing parses, capped at the whole file. + """ + if file_size == 0: + return None + tail_bytes = 32 * 1024 + while True: + probe = max(0, file_size - tail_bytes) + log_stream.seek(probe) + for line in reversed(log_stream.readlines()): + if not line.endswith(b"\n"): + continue + ts_ms = peek_ts_ms(line) + if ts_ms is not None: + return ts_ms + if probe == 0: + return None + tail_bytes *= 2 + + GALLOP_START = 128 * 1024 @@ -282,17 +311,15 @@ def pair_start_end_events( return (completed_queries, still_open) -def load_sparql_at(log_stream: BinaryIO, line_offset: int) -> str: - """Return the SPARQL text of the start line at line_offset. +def extract_query(line_bytes: bytes) -> str: + """Return the query field from a start line's bytes, or "". - Returns "" if the line is malformed or has no query field. Never - raises. Used only for the handful of rows that need their SPARQL - text shown, never on a bulk path. + The byte-slice fast path is unsafe for the query field because user + SPARQL can contain escaped quotes, so this path uses real json.loads. + Returns "" on any malformed input or missing field; never raises. """ - log_stream.seek(line_offset) - line = log_stream.readline() try: - obj = json.loads(line) + obj = json.loads(line_bytes) except (ValueError, TypeError): return "" if not isinstance(obj, dict): @@ -301,3 +328,13 @@ def load_sparql_at(log_stream: BinaryIO, line_offset: int) -> str: if not isinstance(query, str): return "" return query + + +def load_sparql_at(log_stream: BinaryIO, line_offset: int) -> str: + """Return the SPARQL text of the start line at line_offset. + + Used by callers that have only the offset, not the line bytes: + find_active_queries on survivors, and Historic on displayed rows. + """ + log_stream.seek(line_offset) + return extract_query(log_stream.readline()) diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py index cbfbc1df..90b2be40 100644 --- a/src/qlever/monitor/models.py +++ b/src/qlever/monitor/models.py @@ -11,6 +11,7 @@ @dataclass(frozen=True) class LiveSubtitle: + endpoint: str state: str n_active: int | None @@ -23,6 +24,7 @@ class MetricsCounts: failed: int | None timeout: int | None cancelled: int | None + unknown: int | None p50: int | None p95: int | None slow: int | None diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 4566a5e8..c5c1e72e 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -1,16 +1,18 @@ from __future__ import annotations +from textual import work from textual.app import ComposeResult from textual.binding import Binding from textual.reactive import reactive from textual.screen import Screen from textual.widgets import Footer, Static -from qlever.monitor.live_stubs import ( +from qlever.monitor.live_data import ( get_live_metrics, get_live_query_rows, get_live_subtitle, ) +from qlever.monitor.live_engine import current_ms from qlever.monitor.models import SparqlContent from qlever.monitor.widgets.header_row import HeaderRow from qlever.monitor.widgets.metrics_row import MetricsRow @@ -18,6 +20,7 @@ from qlever.monitor.widgets.query_table import LiveQueryTable from qlever.monitor.widgets.sparql_pane import SparqlPane from qlever.monitor.widgets.status_row import LiveStatusRow +from qlever.util import is_qlever_server_alive TITLE = "QLever monitor-queries: Live" @@ -37,14 +40,84 @@ def compose(self) -> ComposeResult: center=Static(TITLE), right=NavPill("Historic >", target="historic"), ) - yield LiveStatusRow(get_live_subtitle(), self.app.sparql_endpoint) - yield MetricsRow(get_live_metrics()) - rows = sorted(get_live_query_rows(), key=lambda r: r.ts_ms) + state = self.app.live_state + slow_ms = self.app.slow_threshold * 1000 + yield LiveStatusRow( + get_live_subtitle( + state, self.app.server_status, self.app.sparql_endpoint + ) + ) + yield MetricsRow(get_live_metrics(state, slow_ms, current_ms())) + rows = sorted(get_live_query_rows(state), key=lambda row: row.ts_ms) yield LiveQueryTable(rows) yield Static("", id="table-status") yield SparqlPane() yield Footer() + def on_mount(self) -> None: + """Start the periodic refresh of table, metrics, and server ping.""" + self.table_timer = self.set_interval( + self.app.repaint_interval, self.refresh_table + ) + self.metrics_timer = self.set_interval(2.0, self.refresh_metrics) + self.live_server_check() + self.ping_timer = self.set_interval(5.0, self.live_server_check) + + def on_screen_suspend(self) -> None: + """Pause periodic UI work while Live isn't the active screen.""" + self.table_timer.pause() + self.metrics_timer.pause() + self.ping_timer.pause() + + def on_screen_resume(self) -> None: + """Resume periodic UI work and refresh server status immediately.""" + self.table_timer.resume() + self.metrics_timer.resume() + self.ping_timer.resume() + self.live_server_check() + + @work(thread=True, exclusive=True) + def live_server_check(self) -> None: + """Ping the server off the UI thread and push the result back.""" + server_alive = is_qlever_server_alive( + self.app.sparql_endpoint, max_time=2 + ) + self.app.call_from_thread(self.apply_server_status, server_alive) + + def apply_server_status(self, server_alive: bool) -> None: + """Cache the latest ping result, then redraw the subtitle once.""" + self.app.server_status = ( + "reachable" if server_alive else "unreachable" + ) + self.refresh_subtitle() + + def refresh_subtitle(self) -> None: + """Rebuild the subtitle from the currently cached app state.""" + self.query_one(LiveStatusRow).subtitle = get_live_subtitle( + self.app.live_state, + self.app.server_status, + self.app.sparql_endpoint, + ) + + def refresh_table(self) -> None: + """Push the current active set into the table; no-op when frozen.""" + if self.frozen: + return + state = self.app.live_state + rows = sorted(get_live_query_rows(state), key=lambda row: row.ts_ms) + self.query_one(LiveQueryTable).set_rows(rows) + self.refresh_subtitle() + + def refresh_metrics(self) -> None: + """Push fresh metric snapshots into MetricsRow; no-op when frozen.""" + if self.frozen: + return + state = self.app.live_state + slow_ms = self.app.slow_threshold * 1000 + self.query_one(MetricsRow).rows = get_live_metrics( + state, slow_ms, current_ms() + ) + def watch_frozen(self, frozen: bool) -> None: """Reflect the frozen state in the table status line.""" status = "Frozen - press f to resume" if frozen else "" diff --git a/src/qlever/monitor/widgets/metrics_row.py b/src/qlever/monitor/widgets/metrics_row.py index de1ba785..651bbf1c 100644 --- a/src/qlever/monitor/widgets/metrics_row.py +++ b/src/qlever/monitor/widgets/metrics_row.py @@ -2,6 +2,7 @@ from textual.app import ComposeResult from textual.containers import Vertical +from textual.reactive import reactive from textual.widgets import Static from qlever.monitor.models import MetricsCounts @@ -11,6 +12,7 @@ "failed": "$error", "timeout": "$error", "cancelled": "$warning", + "unknown": "$warning", "slow": "$warning", } @@ -21,6 +23,7 @@ ("failed", "count"), ("timeout", "count"), ("cancelled", "count"), + ("unknown", "count"), ("p50", "ms"), ("p95", "ms"), ("slow", "count"), @@ -95,12 +98,20 @@ class MetricsRow(Vertical): can_focus = False + rows = reactive(list, init=False) + def __init__(self, rows: list[MetricsCounts]) -> None: """Render each row in `rows` as one formatted text line.""" super().__init__() - self.rows = rows + self.set_reactive(MetricsRow.rows, rows) def compose(self) -> ComposeResult: widths = column_widths(self.rows) for row in self.rows: yield Static(format_row(row, widths)) + + def watch_rows(self, rows: list[MetricsCounts]) -> None: + """Repaint each line static when the row data changes.""" + widths = column_widths(rows) + for line, row in zip(self.query(Static), rows): + line.update(format_row(row, widths)) diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py index 4971193a..61361991 100644 --- a/src/qlever/monitor/widgets/query_table.py +++ b/src/qlever/monitor/widgets/query_table.py @@ -54,6 +54,30 @@ def on_mount(self) -> None: self.add_column("Query ID") self.add_column("Duration", width=8) self.add_column("SPARQL") + self.repaint_rows() + + def set_rows(self, rows: list[LiveQueryRow]) -> None: + """Replace the table rows, preserving the cursor position by qid.""" + cursor_qid = None + old_index = self.cursor_row + if self.query_rows and 0 <= old_index < len(self.query_rows): + cursor_qid = self.query_rows[old_index].qid + + self.query_rows = rows + self.clear(columns=False) + self.repaint_rows() + + if not self.query_rows: + return + if cursor_qid is not None: + for index, row in enumerate(self.query_rows): + if row.qid == cursor_qid: + self.move_cursor(row=index) + return + self.move_cursor(row=min(old_index, len(self.query_rows) - 1)) + + def repaint_rows(self) -> None: + """Add one DataTable row per entry in self.query_rows.""" now_ms = int(time.time() * 1000) for row in self.query_rows: self.add_row( diff --git a/src/qlever/monitor/widgets/status_row.py b/src/qlever/monitor/widgets/status_row.py index 28e2cfa2..3008095d 100644 --- a/src/qlever/monitor/widgets/status_row.py +++ b/src/qlever/monitor/widgets/status_row.py @@ -1,23 +1,24 @@ from __future__ import annotations +from textual.reactive import Reactive from textual.widgets import Static from qlever.monitor.models import LiveSubtitle -def format_subtitle(subtitle: LiveSubtitle, endpoint: str) -> str: +def format_subtitle(subtitle: LiveSubtitle) -> str: """Build the Live subtitle text from current state and server endpoint.""" - if subtitle.state == "connecting": - return f"[$warning]Connecting[/] to server at [b]{endpoint}[/]…" + if subtitle.state == "checking": + return f"[$warning]Checking[/] server at [b]{subtitle.endpoint}[/]…" if subtitle.state == "unreachable": return ( - f"[$error]Can't reach server[/] at [b]{endpoint}[/]" - f" · active queries unknown" + f"[$error]Can't reach server[/] at [b]{subtitle.endpoint}[/]" + f" · retrying every 5s" ) if subtitle.n_active is None: - return f"[$success]Server active[/] at [b]{endpoint}[/]" + return f"[$success]Server active[/] at [b]{subtitle.endpoint}[/]" return ( - f"[$success]Server active[/] at [b]{endpoint}[/]" + f"[$success]Server active[/] at [b]{subtitle.endpoint}[/]" f" · [b $success]{subtitle.n_active}[/] active queries" ) @@ -27,6 +28,12 @@ class LiveStatusRow(Static): can_focus = False - def __init__(self, subtitle: LiveSubtitle, endpoint: str) -> None: + subtitle = Reactive(None, init=False) + + def __init__(self, subtitle: LiveSubtitle) -> None: """Render the subtitle text computed from `subtitle` and `endpoint`.""" - super().__init__(format_subtitle(subtitle, endpoint)) + super().__init__(format_subtitle(subtitle)) + self.set_reactive(LiveStatusRow.subtitle, subtitle) + + def watch_subtitle(self, subtitle: LiveSubtitle) -> None: + self.update(format_subtitle(subtitle)) diff --git a/src/qlever/util.py b/src/qlever/util.py index ad2842c8..aaf3bebf 100644 --- a/src/qlever/util.py +++ b/src/qlever/util.py @@ -176,15 +176,18 @@ def pretty_printed_query( return None -def is_qlever_server_alive(endpoint_url: str) -> bool: - """ - Helper function that checks if a QLever server is running on the given - endpoint. Return `True` if the server is alive, `False` otherwise. - """ +def is_qlever_server_alive( + endpoint_url: str, max_time: int | None = None +) -> bool: + """Check if a QLever server is running on the given endpoint. + `max_time` (seconds) caps the curl invocation when set; default is + unbounded so existing callers behave as before. + """ message = "from the `qlever` CLI" + max_time_flag = f"--max-time {max_time} " if max_time is not None else "" curl_cmd = ( - f"curl -s {endpoint_url}/ping" + f"curl -s {max_time_flag}{endpoint_url}/ping" f" --data-urlencode msg={shlex.quote(message)}" ) log.debug(curl_cmd) diff --git a/test/qlever/conftest.py b/test/qlever/conftest.py index 9f825cb1..a10f6d42 100644 --- a/test/qlever/conftest.py +++ b/test/qlever/conftest.py @@ -3,6 +3,37 @@ import pytest +@pytest.fixture +def write_log(tmp_path): + """Write bytes to a fresh log file and return its Path.""" + + def make(data): + path = tmp_path / "q.log" + path.write_bytes(data) + return path + + return make + + +@pytest.fixture +def open_log(write_log): + """Write bytes to a log file and hand back an open (handle, size). + + Closes every handle it produced when the test finishes. + """ + handles = [] + + def make(data): + path = write_log(data) + handle = path.open("rb") + handles.append(handle) + return handle, path.stat().st_size + + yield make + for handle in handles: + handle.close() + + @pytest.fixture def mock_command(monkeypatch): def _mock(module_name: str, function_name: str, override=None): diff --git a/test/qlever/test_live_engine.py b/test/qlever/test_live_engine.py new file mode 100644 index 00000000..5311fa02 --- /dev/null +++ b/test/qlever/test_live_engine.py @@ -0,0 +1,424 @@ +"""Tests for CompletedQueries and the metrics snapshot helpers.""" + +import threading +import time + +from qlever.monitor.live_engine import ( + LIVE_HORIZON_MS, + CompletedQueries, + LiveLogReader, + LiveState, + MetricsSnapshot, + compute_live_metrics, + find_active_queries, + load_completed_history, + percentiles, +) +from qlever.monitor.log_reader import CompletedQuery + +NOW_MS = 1_700_000_000_000 +MIN_MS = 60_000 +WINDOW_5M = 5 * MIN_MS +WINDOW_15M = 15 * MIN_MS + + +def make_completed(end_ms, status="ok", duration_ms=100): + """Build a CompletedQuery with a self-consistent start_ms.""" + return CompletedQuery( + start_ms=end_ms - duration_ms, + end_ms=end_ms, + duration_ms=duration_ms, + status=status, + start_line_offset=0, + ) + + +def test_empty_history_returns_empty_snapshot(): + history = CompletedQueries() + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap == MetricsSnapshot( + seen=0, ok=0, failed=0, timeout=0, cancelled=0, unknown=0, + slow=0, p50=None, p95=None, + ) + + +def test_status_counts_split_correctly(): + history = CompletedQueries() + for status in ("ok", "failed", "timeout", "cancelled", "unknown"): + history.add(make_completed(NOW_MS - MIN_MS, status=status)) + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap.seen == 5 + assert snap.ok == 1 + assert snap.failed == 1 + assert snap.timeout == 1 + assert snap.cancelled == 1 + assert snap.unknown == 1 + + +def test_window_excludes_old_entries(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 2 * MIN_MS)) + history.add(make_completed(NOW_MS - WINDOW_5M)) + history.add(make_completed(NOW_MS - 10 * MIN_MS)) + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap.seen == 2 + + +def test_partial_coverage_returns_none(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 2 * MIN_MS)) + snaps = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M, WINDOW_15M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - 10 * MIN_MS, + ) + assert snaps[0] is not None + assert snaps[0].seen == 1 + assert snaps[1] is None + + +def test_slow_threshold_independent_of_status(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - MIN_MS, status="ok", duration_ms=12_000)) + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=NOW_MS - WINDOW_5M, + ) + assert snap.ok == 1 + assert snap.slow == 1 + + +def test_drop_older_than_pops_left(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 30 * MIN_MS)) + history.add(make_completed(NOW_MS - 20 * MIN_MS)) + history.add(make_completed(NOW_MS - MIN_MS)) + history.drop_older_than(NOW_MS - 15 * MIN_MS) + assert len(history.entries) == 1 + assert history.entries[0].end_ms == NOW_MS - MIN_MS + + +def test_percentiles_with_known_values(): + durations = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000] + p50, p95 = percentiles(durations) + assert 450 <= p50 <= 600 + assert 900 <= p95 <= 1100 + + +def test_data_start_equal_to_cutoff_returns_snapshot(): + history = CompletedQueries() + history.add(make_completed(NOW_MS - 2 * MIN_MS)) + cutoff = NOW_MS - WINDOW_5M + [snap] = history.metrics_for_windows( + now_ms=NOW_MS, + windows_ms=[WINDOW_5M], + slow_threshold_ms=10_000, + data_start_ms=cutoff, + ) + assert snap is not None + assert snap.seen == 1 + + +def start_line(ts_ms, qid, query="SELECT 1"): + return ( + f'{{"ts-ms":{ts_ms},"event":"start","qid":"{qid}",' + f'"client-ip":"x","query":"{query}"}}\n' + ).encode() + + +def end_line(ts_ms, qid, status="ok"): + return ( + f'{{"ts-ms":{ts_ms},"event":"end","qid":"{qid}",' + f'"status":"{status}"}}\n' + ).encode() + + +def test_find_active_queries_picks_up_unmatched_starts(write_log): + path = write_log( + start_line(1000, "q1", "SELECT a") + + start_line(2000, "q2", "SELECT b") + + end_line(3000, "q1") + ) + state, cut_offset, eof_ts = find_active_queries(path, window_pad_ms=10_000) + assert list(state.active) == ["q2"] + assert state.active["q2"] == (2000, "SELECT b") + assert cut_offset == path.stat().st_size + assert eof_ts == 3000 + + +def test_find_active_queries_loads_sparql_for_each_survivor(write_log): + path = write_log( + start_line(1000, "a", "SELECT alpha") + + start_line(2000, "b", "SELECT beta") + ) + state, _, _ = find_active_queries(path, window_pad_ms=10_000) + assert state.active["a"] == (1000, "SELECT alpha") + assert state.active["b"] == (2000, "SELECT beta") + + +def test_find_active_queries_empty_log_returns_empty_state(write_log): + path = write_log(b"") + state, cut_offset, eof_ts = find_active_queries(path, window_pad_ms=10_000) + assert state.active == {} + assert cut_offset == 0 + assert eof_ts == 0 + + +def make_reader(path, state, cut_offset=0, window_pad_ms=10_000, now_ms=3000): + """Build a LiveLogReader with a frozen clock for poll tests.""" + return LiveLogReader( + log_path=path, + state=state, + cut_offset=cut_offset, + window_pad_ms=window_pad_ms, + now_ms=lambda: now_ms, + ) + + +def test_poll_pairs_a_clean_start_and_end(write_log): + path = write_log(start_line(1000, "q1", "SELECT a") + end_line(2000, "q1")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.active == {} + [done] = state.completed.entries + assert done.start_ms == 1000 + assert done.end_ms == 2000 + assert done.duration_ms == 1000 + assert done.status == "ok" + assert done.start_line_offset is None + + +def test_poll_keeps_start_in_active_until_end_arrives(write_log): + path = write_log(start_line(1000, "q1", "SELECT a")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.active == {"q1": (1000, "SELECT a")} + assert len(state.completed.entries) == 0 + + +def test_poll_advances_cursor_so_second_poll_sees_new_lines(write_log): + first = start_line(1000, "q1") + end_line(2000, "q1") + path = write_log(first) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert len(state.completed.entries) == 1 + + path.write_bytes(first + start_line(3000, "q2") + end_line(4000, "q2", "failed")) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert len(state.completed.entries) == 2 + assert state.completed.entries[-1].status == "failed" + + +def test_poll_partial_trailing_line_is_left_for_next_poll(write_log): + full = start_line(1000, "q1") + end_line(2000, "q1") + truncated = full[:-5] + path = write_log(truncated) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert "q1" in state.active + assert len(state.completed.entries) == 0 + + path.write_bytes(full) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.active == {} + assert len(state.completed.entries) == 1 + + +def test_poll_skips_malformed_line_and_continues(write_log): + path = write_log( + b"not json at all\n" + + start_line(1000, "q1") + + end_line(2000, "q1") + ) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert len(state.completed.entries) == 1 + + +def test_poll_drops_end_without_matching_start(write_log): + path = write_log(end_line(2000, "ghost")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.active == {} + assert len(state.completed.entries) == 0 + + +def test_evict_stale_drops_actives_older_than_window_pad(write_log): + state = LiveState() + now = 1_000_000 + pad = 10_000 + state.active["fresh"] = (now - 1_000, "") + state.active["old"] = (now - 20_000, "") + reader = LiveLogReader( + log_path=write_log(b""), state=state, cut_offset=0, + window_pad_ms=pad, now_ms=lambda: now, + ) + reader.evict_stale() + assert list(state.active) == ["fresh"] + + +def test_evict_stale_drops_completed_older_than_one_hour(write_log): + state = LiveState() + now = 10_000_000 + state.completed.add(CompletedQuery( + start_ms=now - LIVE_HORIZON_MS - 1_000, + end_ms=now - LIVE_HORIZON_MS - 500, + duration_ms=500, status="ok", start_line_offset=None, + )) + state.completed.add(CompletedQuery( + start_ms=now - 1_000, end_ms=now - 500, + duration_ms=500, status="ok", start_line_offset=None, + )) + reader = LiveLogReader( + log_path=write_log(b""), state=state, cut_offset=0, + window_pad_ms=10_000, now_ms=lambda: now, + ) + reader.evict_stale() + assert len(state.completed.entries) == 1 + assert state.completed.entries[0].end_ms == now - 500 + + +def test_compute_live_metrics_returns_three_snapshots_when_data_covers_an_hour(): + state = LiveState() + state.completed.add(CompletedQuery( + start_ms=NOW_MS - 65 * MIN_MS, end_ms=NOW_MS - 60 * MIN_MS, + duration_ms=5 * MIN_MS, status="ok", start_line_offset=None, + )) + state.completed.add(make_completed(NOW_MS - 10 * MIN_MS)) + state.completed.add(make_completed(NOW_MS - 1 * MIN_MS)) + + snaps = compute_live_metrics(state, slow_threshold_ms=10_000, now_ms=NOW_MS) + assert len(snaps) == 3 + assert all(snap is not None for snap in snaps) + snap_5m, snap_15m, snap_1h = snaps + assert snap_5m.seen == 1 + assert snap_15m.seen == 2 + assert snap_1h.seen == 3 + + +def test_compute_live_metrics_returns_none_for_windows_past_oldest_entry(): + state = LiveState() + state.completed.add(make_completed(NOW_MS - 11 * MIN_MS)) + state.completed.add(make_completed(NOW_MS - 2 * MIN_MS)) + + snap_5m, snap_15m, snap_1h = compute_live_metrics( + state, slow_threshold_ms=10_000, now_ms=NOW_MS, + ) + assert snap_5m is not None + assert snap_5m.seen == 1 + assert snap_15m is None + assert snap_1h is None + + +def test_load_completed_history_loads_pairs_from_history(write_log): + path = write_log( + start_line(1000, "q1") + end_line(2000, "q1") + + start_line(3000, "q2") + end_line(4000, "q2", "failed") + ) + state = LiveState() + load_completed_history( + log_path=path, state=state, + cut_offset=path.stat().st_size, + now_ms=lambda: 5000, + ) + assert len(state.completed.entries) == 2 + assert state.completed.entries[0].start_ms == 1000 + assert state.completed.entries[0].end_ms == 2000 + assert state.completed.entries[1].status == "failed" + + +def test_load_completed_history_prepends_before_existing_tailer_entries(write_log): + path = write_log(start_line(1000, "old") + end_line(2000, "old")) + state = LiveState() + state.completed.add(CompletedQuery( + start_ms=5000, end_ms=6000, duration_ms=1000, + status="ok", start_line_offset=None, + )) + load_completed_history( + log_path=path, state=state, + cut_offset=path.stat().st_size, + now_ms=lambda: 7000, + ) + assert len(state.completed.entries) == 2 + assert state.completed.entries[0].end_ms == 2000 + assert state.completed.entries[1].end_ms == 6000 + + +def test_load_completed_history_drops_starts_without_a_matching_end(write_log): + path = write_log( + start_line(1000, "q1") + end_line(2000, "q1") + + start_line(3000, "orphan") + ) + state = LiveState() + load_completed_history( + log_path=path, state=state, + cut_offset=path.stat().st_size, + now_ms=lambda: 5000, + ) + assert len(state.completed.entries) == 1 + assert state.completed.entries[0].start_ms == 1000 + + +def test_poll_loop_picks_up_lines_appended_to_the_file(write_log): + path = write_log(b"") + state = LiveState() + reader = LiveLogReader( + log_path=path, state=state, cut_offset=0, + window_pad_ms=10_000, poll_interval=0.02, + now_ms=lambda: 0, + ) + stop_event = threading.Event() + + def run_loop(): + with path.open("rb") as stream: + while not stop_event.is_set(): + reader.poll(stream) + stop_event.wait(reader.poll_interval) + + thread = threading.Thread(target=run_loop, daemon=True) + thread.start() + try: + with path.open("ab") as appender: + appender.write(start_line(1000, "q1")) + appender.write(end_line(2000, "q1")) + deadline = time.monotonic() + 2.0 + while time.monotonic() < deadline: + if len(state.completed.entries) == 1: + break + time.sleep(0.02) + assert len(state.completed.entries) == 1 + finally: + stop_event.set() + thread.join() diff --git a/test/qlever/test_log_reader.py b/test/qlever/test_log_reader.py index 9679c9a7..8d232ec4 100644 --- a/test/qlever/test_log_reader.py +++ b/test/qlever/test_log_reader.py @@ -3,6 +3,7 @@ from qlever.monitor import log_reader from qlever.monitor.log_reader import ( CompletedQuery, + extract_query, load_sparql_at, next_whole_line, offset_for_ts, @@ -10,6 +11,7 @@ parse_line, parse_line_fallback, peek_ts_ms, + read_last_timestamp, scan_range, ) @@ -109,26 +111,6 @@ def test_fallback_status_outside_closed_set_returns_none(): THIRD_OFFSET = len(FIRST_LINE) + len(SECOND_LINE) -@pytest.fixture -def open_log(tmp_path): - """Write bytes to a log file and hand back an open (handle, size). - - Closes every handle it produced when the test finishes. - """ - handles = [] - - def make(data): - path = tmp_path / "q.log" - path.write_bytes(data) - handle = path.open("rb") - handles.append(handle) - return handle, len(data) - - yield make - for handle in handles: - handle.close() - - def test_next_whole_line_probe_zero_returns_first_line(open_log): log, size = open_log(FIRST_LINE + SECOND_LINE + THIRD_LINE) assert next_whole_line(log, 0, size) == (0, 1000) @@ -395,6 +377,30 @@ def test_pair_start_end_events_status_flows_through(status): assert completed[0].status == status +def test_extract_query_returns_query_field(): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' + ) + assert extract_query(line) == "SELECT * WHERE { ?s ?p ?o }" + + +def test_extract_query_handles_escaped_quote_in_query(): + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\" }"}\n' + ) + assert extract_query(line) == 'SELECT ?x WHERE { ?x ?p "a" }' + + +def test_extract_query_missing_field_returns_empty(): + assert extract_query(END + b"\n") == "" + + +def test_extract_query_malformed_json_returns_empty(): + assert extract_query(b"this is not json\n") == "" + + def test_load_sparql_at_returns_query_text(open_log): line = ( b'{"ts-ms":1,"event":"start","qid":"q1",' @@ -447,3 +453,29 @@ def test_load_sparql_at_reads_the_line_at_the_given_offset(open_log): ) log, _ = open_log(first + second) assert load_sparql_at(log, len(first)) == "SECOND" + + +def test_read_last_timestamp_empty_file_returns_none(open_log): + log, size = open_log(b"") + assert read_last_timestamp(log, size) is None + + +def test_read_last_timestamp_returns_last_complete_line_ts(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE) + assert read_last_timestamp(log, size) == 2000 + + +def test_read_last_timestamp_skips_trailing_partial_line(open_log): + partial = b'{"ts-ms":3000,"event":"end","qid":"q2","status' + log, size = open_log(FIRST_LINE + SECOND_LINE + partial) + assert read_last_timestamp(log, size) == 2000 + + +def test_read_last_timestamp_grows_buffer_for_long_lines(open_log): + long_query = b"x" * (40 * 1024) + line = ( + b'{"ts-ms":1000,"event":"start","qid":"q1",' + b'"query":"' + long_query + b'"}\n' + ) + log, size = open_log(line) + assert read_last_timestamp(log, size) == 1000 From 54abc148f3ce8ececbcfa4d8604738604f9ac6bb Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sun, 24 May 2026 22:32:54 +0200 Subject: [PATCH 08/26] First working historic screen --- src/qlever/monitor/app.py | 10 +- src/qlever/monitor/historic_data.py | 193 ++++++++++ src/qlever/monitor/historic_stubs.py | 120 ------ src/qlever/monitor/live_data.py | 290 +++++++++++++- src/qlever/monitor/live_engine.py | 357 ------------------ src/qlever/monitor/live_stubs.py | 108 ------ src/qlever/monitor/log_reader.py | 49 ++- src/qlever/monitor/metrics.py | 112 ++++++ src/qlever/monitor/models.py | 5 +- src/qlever/monitor/views/historic.py | 181 ++++++--- src/qlever/monitor/views/live.py | 2 +- src/qlever/monitor/widgets/query_table.py | 42 +-- test/qlever/test_historic_data.py | 277 ++++++++++++++ ...{test_live_engine.py => test_live_data.py} | 35 +- test/qlever/test_log_reader.py | 73 +++- 15 files changed, 1140 insertions(+), 714 deletions(-) create mode 100644 src/qlever/monitor/historic_data.py delete mode 100644 src/qlever/monitor/historic_stubs.py delete mode 100644 src/qlever/monitor/live_engine.py delete mode 100644 src/qlever/monitor/live_stubs.py create mode 100644 src/qlever/monitor/metrics.py create mode 100644 test/qlever/test_historic_data.py rename test/qlever/{test_live_engine.py => test_live_data.py} (94%) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index f80118cc..22ffe67e 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -9,7 +9,7 @@ from textual.css.query import NoMatches from textual.worker import get_current_worker -from qlever.monitor.live_engine import ( +from qlever.monitor.live_data import ( LiveLogReader, LiveState, find_active_queries, @@ -45,6 +45,7 @@ class MonitorQueriesApp(App): ("t", "cycle_themes", "Change theme"), ("y", "copy_query", "Copy SPARQL"), ("p", "pretty_print", "Pretty print"), + ("c", "clear_query", "Clear SPARQL"), Binding( "shift+up", "scroll_sparql_up", @@ -163,6 +164,13 @@ def apply_pretty_result( return pane.pretty_text = result + def action_clear_query(self) -> None: + """Drop the displayed query, restoring the empty-state hint.""" + pane = self.screen.query_one(SparqlPane) + if pane.content is None: + return + pane.content = None + def action_scroll_sparql_up(self) -> None: """Scroll the overflowing SPARQL pane up one line.""" self.screen.query_one(SparqlScroll).scroll_up() diff --git a/src/qlever/monitor/historic_data.py b/src/qlever/monitor/historic_data.py new file mode 100644 index 00000000..8a96d79c --- /dev/null +++ b/src/qlever/monitor/historic_data.py @@ -0,0 +1,193 @@ +"""Data layer for the Historic screen. + +Reads one time window of the log into a cached `WindowData`, filters +that cache by display mode, and maps the survivors into the +`models.py` dataclasses the screen renders. A window change reruns +the scan; a mode change reuses the cache. +""" + +from pathlib import Path +from typing import NamedTuple + +from qlever.monitor.log_reader import ( + load_sparql_at, + offset_for_ts, + pair_start_end_events, + scan_range, +) +from qlever.monitor.metrics import MetricsSnapshot, metrics_for_ranges +from qlever.monitor.models import ( + ControlsState, + HistoricQueryRow, + MetricsCounts, +) + +# Used as the duration of orphaned queries. Negative so they sort last. +DURATION_UNKNOWN = -1 + + +class LoggedQuery(NamedTuple): + """One SPARQL query observed in the log over the current window. + + `end_ms` is `None` when the query started but has not ended yet, + either because it is still running or the server crashed before + writing the end event. `status` carries the raw end status, or + `"running"` for a still-open survivor. + """ + + start_ms: int + end_ms: int | None + status: str + qid: str + sparql: str + + +class WindowData(NamedTuple): + """Cached scan of one time window, reused across mode flips. + + `queries` is every `LoggedQuery` overlapping the window in any + mode (the ACTIVE superset), already loaded with `qid` and SPARQL + so a mode change is a pure in-memory filter. `metrics` is computed + over completions whose `end_ms` lies inside the window, so it is + the same for every mode. + """ + + queries: list[LoggedQuery] + metrics: MetricsSnapshot + + +def read_window( + log_path: Path, + window_start_ms: int, + window_end_ms: int, + pad_ms: int, + slow_threshold_ms: int, + log_end_ms: int, +) -> WindowData: + """Scan one time window of the log into a `WindowData` snapshot. + + The byte range scanned is the window padded by `pad_ms` on each + side so both events of a query straddling the window edge are + recovered. Pairs that lie entirely inside the pad are dropped + before `qid` and SPARQL are loaded, so the cache holds only rows + a mode predicate could keep. Metrics count completions whose + `end_ms` lies inside `[window_start_ms, window_end_ms]`. + + A still-open query is labelled `"running"` when its start sits + within `pad_ms` of `log_end_ms` (recent enough to plausibly still + be executing) and `"orphaned"` otherwise (the server must have + crashed before writing the end event). + """ + with log_path.open("rb") as log_stream: + file_size = log_path.stat().st_size + lo_offset = offset_for_ts( + log_stream, window_start_ms - pad_ms, file_size + ) + hi_bound = offset_for_ts(log_stream, window_end_ms + pad_ms, file_size) + events = scan_range(log_stream, lo_offset, hi_bound) + completed, still_open = pair_start_end_events(events) + + queries = [] + for pair in completed: + if pair.start_ms > window_end_ms or pair.end_ms < window_start_ms: + continue + qid, sparql = load_sparql_at(log_stream, pair.start_line_offset) + queries.append( + LoggedQuery( + start_ms=pair.start_ms, + end_ms=pair.end_ms, + status=pair.status, + qid=qid, + sparql=sparql, + ) + ) + running_cutoff_ms = log_end_ms - pad_ms + for start_ms, start_line_offset in still_open.values(): + if start_ms > window_end_ms: + continue + qid, sparql = load_sparql_at(log_stream, start_line_offset) + status = "running" if start_ms >= running_cutoff_ms else "orphaned" + queries.append( + LoggedQuery( + start_ms=start_ms, + end_ms=None, + status=status, + qid=qid, + sparql=sparql, + ) + ) + + metrics = metrics_for_ranges( + completed, [(window_start_ms, window_end_ms)], slow_threshold_ms + )[0] + return WindowData(queries=queries, metrics=metrics) + + +def filter_queries( + window_data: WindowData, + mode: str, + window_start_ms: int, + window_end_ms: int, +) -> list[LoggedQuery]: + """Select queries from a cached `WindowData` for the given mode. + + The cache is already the ACTIVE set (every query overlaps the + window), so ACTIVE is the fallthrough that returns it unchanged. + STARTS narrows to queries that began inside the window; ENDS + narrows to queries that finished inside it. Still-running queries + (`end_ms is None`) can satisfy STARTS but never ENDS. + """ + if mode == "STARTS": + return [ + query + for query in window_data.queries + if window_start_ms <= query.start_ms <= window_end_ms + ] + if mode == "ENDS": + return [ + query + for query in window_data.queries + if query.end_ms is not None + and window_start_ms <= query.end_ms <= window_end_ms + ] + return window_data.queries + + +def display_duration_ms(query: LoggedQuery, log_end_ms: int) -> int: + """Duration to show for a query row; DURATION_UNKNOWN for orphans.""" + if query.status == "orphaned": + return DURATION_UNKNOWN + return (query.end_ms or log_end_ms) - query.start_ms + + +def render_window( + window_data: WindowData, + controls: ControlsState, + log_end_ms: int, +) -> tuple[list[HistoricQueryRow], MetricsCounts]: + """Map a cached `WindowData` into the rows and metrics the UI consumes. + + Runs `filter_queries` for the current mode and translates each + surviving `LoggedQuery` into a `HistoricQueryRow`. `duration_ms` + is measured against `log_end_ms` for still-running queries, + against the recorded `end_ms` for completed ones, and reported as + DURATION_UNKNOWN for crash orphans. Metrics come straight from + `window_data.metrics`, labelled with the current window size. + """ + queries = filter_queries( + window_data, controls.mode, controls.start_ms, controls.end_ms + ) + historic_rows = [ + HistoricQueryRow( + qid=query.qid, + started_at_ms=query.start_ms, + duration_ms=display_duration_ms(query, log_end_ms), + status=query.status, + sparql=query.sparql, + ) + for query in queries + ] + metrics = MetricsCounts( + label=controls.window_size, **window_data.metrics._asdict() + ) + return historic_rows, metrics diff --git a/src/qlever/monitor/historic_stubs.py b/src/qlever/monitor/historic_stubs.py deleted file mode 100644 index 85237b42..00000000 --- a/src/qlever/monitor/historic_stubs.py +++ /dev/null @@ -1,120 +0,0 @@ -"""Stub data for the Historic screen. - -Replaced later by a real log-reader module exposing the same functions. -Only the data the current widgets consume is defined here. -""" - -import time - -from qlever.monitor.models import ( - ControlsState, - HistoricQueryRow, - MetricsCounts, - TimelineBounds, -) - - -def get_controls_state() -> ControlsState: - """Current window size, mode, and selected time range on Historic.""" - end_ms = int(time.time() * 1000) - return ControlsState( - window_size="15m", - mode="ACTIVE", - start_ms=end_ms - 15 * 60 * 1000, - end_ms=end_ms, - ) - - -def get_timeline_bounds() -> TimelineBounds: - """Full log span plus the slice the current window covers. - - The window sits near the end of the log so the marked block is - visibly offset rather than spanning the whole bar. - """ - now_ms = int(time.time() * 1000) - window_ms = 15 * 60 * 1000 - return TimelineBounds( - log_start_ms=now_ms - 52 * 60 * 1000, - log_end_ms=now_ms, - window_start_ms=now_ms - window_ms, - window_end_ms=now_ms, - ) - - -def get_historic_query_rows() -> list[HistoricQueryRow]: - """Finished queries matching the current mode in the current window.""" - start_ms = int(time.time() * 1000) - 15 * 60 * 1000 - long_query = ( - "PREFIX rdf: \n" - "PREFIX rdfs: \n" - "PREFIX wd: \n" - "PREFIX wdt: \n" - "PREFIX schema: \n" - "SELECT ?person ?personLabel ?birthPlaceLabel ?occupationLabel\n" - " (COUNT(DISTINCT ?work) AS ?works)\n" - "WHERE {\n" - " ?person wdt:P31 wd:Q5 .\n" - " ?person wdt:P106 ?occupation .\n" - " ?person wdt:P19 ?birthPlace .\n" - " ?person rdfs:label ?personLabel .\n" - " ?birthPlace rdfs:label ?birthPlaceLabel .\n" - " ?occupation rdfs:label ?occupationLabel .\n" - " OPTIONAL { ?work wdt:P50 ?person . }\n" - ' FILTER(LANG(?personLabel) = "en")\n' - ' FILTER(LANG(?birthPlaceLabel) = "en")\n' - ' FILTER(LANG(?occupationLabel) = "en")\n' - "}\n" - "GROUP BY ?person ?personLabel ?birthPlaceLabel ?occupationLabel\n" - "HAVING (COUNT(DISTINCT ?work) > 5)\n" - "ORDER BY DESC(?works)\n" - "LIMIT 1000" - ) - samples = [ - (50_000, 99_000, "ok", long_query), - (0, 42_000, "ok", "SELECT ?s ?p ?o WHERE { ?s ?p ?o } LIMIT 1000"), - (103_000, 8_000, "ok", "SELECT (COUNT(*) AS ?c) WHERE { ?s ?p ?o }"), - ( - 210_000, - 30_000, - "timeout", - "SELECT ?x WHERE { ?x rdfs:label ?l } ORDER BY ?l", - ), - ( - 354_000, - 1_200, - "failed", - "CONSTRUCT { ?x rdfs:label ?l } WHERE { ?x rdfs:label ?l }", - ), - ( - 601_000, - 5_500, - "cancelled", - "SELECT ?p (COUNT(*) AS ?n) WHERE { ?s ?p ?o } GROUP BY ?p", - ), - ] - return [ - HistoricQueryRow( - qid=f"q-{i:04x}", - started_at_ms=start_ms + offset_ms, - duration_ms=duration_ms, - status=status, - sparql=sparql, - ) - for i, (offset_ms, duration_ms, status, sparql) in enumerate(samples) - ] - - -def get_historic_metrics() -> MetricsCounts: - """Single metrics line for the current window; label is its width.""" - return MetricsCounts( - label="15m", - seen=488, - ok=466, - failed=11, - timeout=2, - cancelled=9, - unknown=0, - p50=120, - p95=1840, - slow=4, - ) diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index 411d5e7e..1b2028ba 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -1,20 +1,271 @@ -"""Adapters that translate live engine state into UI models. +"""Data layer for the Live screen. -The Live screen calls these getters; they read shared state under -the engine's lock when iteration is involved and return frozen -model dataclasses the widgets consume. No Textual imports, no -log I/O. +Owns the in-memory state the Live screen renders: the queries currently +running on the server and the rolling 1h history of completed queries +that feeds the 5m, 15m, and 1h metric rows. The tailer is the only +steady-state writer; the screen's refresh callbacks read under the same +lock and return the frozen `models.py` dataclasses the widgets consume. """ -from qlever.monitor.live_engine import ( - LiveState, +import json +import threading +import time +from collections import deque +from collections.abc import Callable +from dataclasses import dataclass, field +from pathlib import Path +from typing import BinaryIO + +from qlever.monitor.log_reader import ( + STATUS_SET, + CompletedQuery, + load_sparql_at, + offset_for_ts, + pair_start_end_events, + read_last_timestamp, + scan_range, +) +from qlever.monitor.metrics import ( + EMPTY_FIELDS, MetricsSnapshot, - compute_live_metrics, + metrics_for_ranges, ) from qlever.monitor.models import LiveQueryRow, LiveSubtitle, MetricsCounts +LIVE_METRIC_WINDOWS_MS = [5 * 60_000, 15 * 60_000, 60 * 60_000] +LIVE_HORIZON_MS = LIVE_METRIC_WINDOWS_MS[2] LIVE_METRIC_LABELS = ["last 5m", "last 15m", "last 1h"] -EMPTY_FIELDS = dict.fromkeys(MetricsSnapshot._fields) + + +def current_ms() -> int: + """Wall-clock now in epoch milliseconds.""" + return time.time_ns() // 1_000_000 + + +class CompletedQueries: + """Rolling 1h history of completed queries. + + Storage is a deque ordered by insertion, which is the tailer's + forward read order. Wall-clock is monotonic in steady state so + older entries sit at the left and trimming is one popleft loop. + """ + + def __init__(self) -> None: + self.entries = deque() + + def add(self, query: CompletedQuery) -> None: + """Append a completed query to the history. + + Caller writes in tailer order, which is wall-clock order in + steady state; entries remain sorted by end_ms for trimming. + """ + self.entries.append(query) + + def drop_older_than(self, cutoff_ms: int) -> None: + """Discard completions that ended before cutoff_ms. + + Called on each tailer tick with cutoff_ms = now_ms - 1h so the + history stays bounded even during quiet periods. + """ + while self.entries and self.entries[0].end_ms < cutoff_ms: + self.entries.popleft() + + def metrics_for_windows( + self, + now_ms: int, + windows_ms: list[int], + slow_threshold_ms: int, + data_start_ms: int, + ) -> list[MetricsSnapshot | None]: + """Compute one snapshot per window in a single deque walk. + + Each window is the interval [now_ms - width, now_ms]. + data_start_ms is the oldest timestamp the caller has loaded + complete data for; a window starting before that gets None + instead of a snapshot, so the widget can render the row as + not-yet-ready. Returns snapshots in the same order as windows_ms. + """ + ranges = [(now_ms - width, now_ms) for width in windows_ms] + snapshots = metrics_for_ranges( + self.entries, ranges, slow_threshold_ms + ) + return [ + None if lo_ms < data_start_ms else snapshot + for (lo_ms, _), snapshot in zip(ranges, snapshots) + ] + + +@dataclass +class LiveState: + """Shared in-memory state for the Live screen. + + The tailer is the steady-state writer; the boot-time loader prepends + historical completions once. UI refresh callbacks read under the + same lock and build metric rows on the fly. + """ + + completed: CompletedQueries = field(default_factory=CompletedQueries) + active: dict[str, tuple[int, str]] = field(default_factory=dict) + lock: threading.Lock = field(default_factory=threading.Lock) + + +def find_active_queries( + log_path: Path, + window_pad_ms: int, +) -> tuple[LiveState, int, int]: + """Scan the log's tail to determine queries still running. + + Reads [eof_ts - window_pad_ms, EOF], pairs start/end events, and + records the unmatched starts as the active set. The completed + pairs found in this window are discarded; backfill rediscovers + them. Loads SPARQL text for each survivor by stored offset. + + Returns (state, cut_offset, eof_ts). cut_offset is where the tailer + starts reading forward; eof_ts is the timestamp of the log's last + complete line, or 0 when the log holds none. + """ + state = LiveState() + with log_path.open("rb") as log_stream: + file_size = log_path.stat().st_size + eof_ts = read_last_timestamp(log_stream, file_size) + if eof_ts is None: + return (state, file_size, 0) + + lo_offset = offset_for_ts( + log_stream, eof_ts - window_pad_ms, file_size + ) + events = scan_range(log_stream, lo_offset, file_size) + _, still_open = pair_start_end_events(events) + + for qid, (start_ms, start_line_offset) in still_open.items(): + _, sparql = load_sparql_at(log_stream, start_line_offset) + state.active[qid] = (start_ms, sparql) + + return (state, file_size, eof_ts) + + +def load_completed_history( + log_path: Path, + state: LiveState, + cut_offset: int, + now_ms: Callable[[], int] = current_ms, +) -> None: + """Scan the hour before cut_offset and seed completed history. + + One-shot: callers spawn this on a daemon thread at startup if they + want it off the main loop. After it returns, the tailer is the only + writer of state.completed. + """ + with log_path.open("rb") as log_stream: + oldest_wanted_ms = now_ms() - LIVE_HORIZON_MS + scan_start_offset = offset_for_ts( + log_stream, oldest_wanted_ms, cut_offset + ) + events = scan_range(log_stream, scan_start_offset, cut_offset) + older_completions, _ = pair_start_end_events(events) + + with state.lock: + tailer_entries = state.completed.entries + state.completed.entries = deque(older_completions) + state.completed.entries.extend(tailer_entries) + + +class LiveLogReader: + """Tail cursor and per-poll logic for the live query log. + + Sole writer of LiveState in steady state. Each poll() reads any + whole lines appended since the last cursor, dispatches them into + active / completed under the state lock, and trims stale entries. + The run loop is owned by the caller. + """ + + def __init__( + self, + log_path: Path, + state: LiveState, + cut_offset: int, + window_pad_ms: int, + poll_interval: float = 0.2, + now_ms: Callable[[], int] = current_ms, + ) -> None: + self.log_path = log_path + self.state = state + self.cursor = cut_offset + self.window_pad_ms = window_pad_ms + self.poll_interval = poll_interval + self.now_ms = now_ms + + def poll(self, log_stream: BinaryIO) -> None: + """Process any whole lines appended since the last poll.""" + log_stream.seek(self.cursor) + for line in log_stream: + if not line.endswith(b"\n"): + break + self.cursor += len(line) + self.handle_line(line) + self.evict_stale() + + def evict_stale(self) -> None: + """Drop completions older than 1h and actives older than 2t. + + Active eviction guards against a missing end event: any query + whose start is older than the 2t safety horizon is treated as + gone, since the server contract says no real query lives that + long. + """ + now = self.now_ms() + completed_cutoff = now - LIVE_HORIZON_MS + active_cutoff = now - self.window_pad_ms + with self.state.lock: + self.state.completed.drop_older_than(completed_cutoff) + stale = [ + qid + for qid, (start_ms, _) in self.state.active.items() + if start_ms < active_cutoff + ] + for qid in stale: + del self.state.active[qid] + + def handle_line(self, line: bytes) -> None: + """Parse one whole line and dispatch it into LiveState.""" + try: + obj = json.loads(line) + except (ValueError, TypeError): + return + if not isinstance(obj, dict): + return + ts_ms = obj.get("ts-ms") + event = obj.get("event") + qid = obj.get("qid") + if not isinstance(ts_ms, int) or not isinstance(qid, str): + return + + if event == "start": + sparql = obj.get("query") + if not isinstance(sparql, str): + sparql = "" + with self.state.lock: + self.state.active[qid] = (ts_ms, sparql) + return + + if event == "end": + status = obj.get("status") + if status not in STATUS_SET: + return + with self.state.lock: + start = self.state.active.pop(qid, None) + if start is None: + return + start_ms, _ = start + self.state.completed.add( + CompletedQuery( + start_ms=start_ms, + end_ms=ts_ms, + duration_ms=ts_ms - start_ms, + status=status, + start_line_offset=None, + ) + ) def get_live_subtitle( @@ -41,8 +292,25 @@ def get_live_metrics( slow_threshold_ms: int, now_ms: int, ) -> list[MetricsCounts]: - """Three rolling-window metric rows for the Live screen.""" - snapshots = compute_live_metrics(state, slow_threshold_ms, now_ms) + """Three rolling-window metric rows for the Live screen. + + Takes a short-locked deque copy so the metric scan and percentile + work runs lock-free while the tailer keeps appending. Windows whose + cutoff predates the oldest entry render as an all-None row so the + widget shows "..." until the deque covers that range. + """ + with state.lock: + history = CompletedQueries() + history.entries = deque(state.completed.entries) + oldest_ms = ( + history.entries[0].start_ms if history.entries else now_ms + ) + snapshots = history.metrics_for_windows( + now_ms=now_ms, + windows_ms=LIVE_METRIC_WINDOWS_MS, + slow_threshold_ms=slow_threshold_ms, + data_start_ms=oldest_ms, + ) return [ MetricsCounts( label=label, diff --git a/src/qlever/monitor/live_engine.py b/src/qlever/monitor/live_engine.py deleted file mode 100644 index 020852ad..00000000 --- a/src/qlever/monitor/live_engine.py +++ /dev/null @@ -1,357 +0,0 @@ -"""In-memory state for the Live tab. - -Holds the queries currently running on the server and the rolling -history of completed queries that feeds the 5m, 15m, and 1h metric -rows. A single writer thread mutates the state; readers take a short -locked snapshot. Pure data layer: no Textual imports. -""" - -import json -import statistics -import threading -import time -from collections import deque -from collections.abc import Callable -from dataclasses import dataclass, field -from pathlib import Path -from typing import BinaryIO, NamedTuple - -from qlever.monitor.log_reader import ( - STATUS_SET, - CompletedQuery, - load_sparql_at, - offset_for_ts, - pair_start_end_events, - read_last_timestamp, - scan_range, -) - -LIVE_METRIC_WINDOWS_MS = [5 * 60_000, 15 * 60_000, 60 * 60_000] -LIVE_HORIZON_MS = LIVE_METRIC_WINDOWS_MS[2] - - -def current_ms() -> int: - """Wall-clock now in epoch milliseconds.""" - return time.time_ns() // 1_000_000 - - -class MetricsSnapshot(NamedTuple): - """Counts and percentiles for completed queries in one time window. - - The status counts sum to seen by construction: - seen == ok + failed + timeout + cancelled + unknown - slow is independent: a query can be ok and slow. p50 and p95 are - duration_ms values across all completions in the window, None when - the window is empty. - """ - - seen: int - ok: int - failed: int - timeout: int - cancelled: int - unknown: int - slow: int - p50: int | None - p95: int | None - - -def percentiles(durations_ms: list[int]) -> tuple[int | None, int | None]: - """Return (p50, p95) over durations_ms, or (None, None) if empty.""" - if not durations_ms: - return (None, None) - if len(durations_ms) == 1: - only = durations_ms[0] - return (only, only) - cuts = statistics.quantiles(durations_ms, n=100) - return (round(cuts[49]), round(cuts[94])) - - -def build_snapshot( - counts: dict[str, int], - slow: int, - durations_ms: list[int], -) -> MetricsSnapshot: - """Wrap one set of accumulators into a MetricsSnapshot.""" - p50, p95 = percentiles(durations_ms) - return MetricsSnapshot( - seen=len(durations_ms), - ok=counts["ok"], - failed=counts["failed"], - timeout=counts["timeout"], - cancelled=counts["cancelled"], - unknown=counts["unknown"], - slow=slow, - p50=p50, - p95=p95, - ) - - -class CompletedQueries: - """Rolling 1h history of completed queries. - - Storage is a deque ordered by insertion, which is the tailer's - forward read order. Wall-clock is monotonic in steady state so - older entries sit at the left and trimming is one popleft loop. - """ - - def __init__(self) -> None: - self.entries = deque() - - def add(self, query: CompletedQuery) -> None: - """Append a completed query to the history. - - Caller writes in tailer order, which is wall-clock order in - steady state; entries remain sorted by end_ms for trimming. - """ - self.entries.append(query) - - def drop_older_than(self, cutoff_ms: int) -> None: - """Discard completions that ended before cutoff_ms. - - Called on each tailer tick with cutoff_ms = now_ms - 1h so the - history stays bounded even during quiet periods. - """ - while self.entries and self.entries[0].end_ms < cutoff_ms: - self.entries.popleft() - - def metrics_for_windows( - self, - now_ms: int, - windows_ms: list[int], - slow_threshold_ms: int, - data_start_ms: int, - ) -> list[MetricsSnapshot | None]: - """Compute one snapshot per window in a single deque walk. - - data_start_ms is the oldest timestamp the caller has loaded - complete data for. A window whose cutoff falls before that gets - None instead of a snapshot, so the widget can render the row as - not-yet-ready. Returns snapshots in the same order as windows_ms. - """ - cutoffs = [now_ms - w for w in windows_ms] - counts = [ - {"ok": 0, "failed": 0, "timeout": 0, "cancelled": 0, "unknown": 0} - for _ in windows_ms - ] - slow = [0] * len(windows_ms) - durations = [[] for _ in windows_ms] - - for entry in self.entries: - if entry.end_ms > now_ms: - continue - for i, cutoff in enumerate(cutoffs): - if entry.end_ms >= cutoff: - counts[i][entry.status] += 1 - if entry.duration_ms >= slow_threshold_ms: - slow[i] += 1 - durations[i].append(entry.duration_ms) - - return [ - None - if cutoffs[i] < data_start_ms - else build_snapshot(counts[i], slow[i], durations[i]) - for i in range(len(windows_ms)) - ] - - -@dataclass -class LiveState: - """Shared in-memory state for the Live tab. - - The tailer is the steady-state writer; the boot-time loader prepends - historical completions once. The UI render callback reads under the - same lock and computes metrics on the fly via compute_live_metrics. - """ - - completed: CompletedQueries = field(default_factory=CompletedQueries) - active: dict[str, tuple[int, str]] = field(default_factory=dict) - lock: threading.Lock = field(default_factory=threading.Lock) - - -def compute_live_metrics( - state: LiveState, - slow_threshold_ms: int, - now_ms: int, -) -> list[MetricsSnapshot | None]: - """Compute one snapshot per live window over the completed history. - - Takes a short-locked deque copy so the actual scan and percentile - work runs lock-free while the tailer keeps appending. Windows whose - cutoff predates the oldest entry come back as None so the UI can - render a "not yet covered" sentinel. - """ - with state.lock: - history_snapshot = CompletedQueries() - history_snapshot.entries = deque(state.completed.entries) - - oldest_ms = ( - history_snapshot.entries[0].start_ms - if history_snapshot.entries - else now_ms - ) - return history_snapshot.metrics_for_windows( - now_ms=now_ms, - windows_ms=LIVE_METRIC_WINDOWS_MS, - slow_threshold_ms=slow_threshold_ms, - data_start_ms=oldest_ms, - ) - - -def find_active_queries( - log_path: Path, - window_pad_ms: int, -) -> tuple[LiveState, int, int]: - """Scan the log's tail to determine queries still running. - - Reads [eof_ts - window_pad_ms, EOF], pairs start/end events, and - records the unmatched starts as the active set. The completed - pairs found in this window are discarded; backfill rediscovers - them. Loads SPARQL text for each survivor by stored offset. - - Returns (state, cut_offset, eof_ts). cut_offset is where the tailer - starts reading forward; eof_ts is the timestamp of the log's last - complete line, or 0 when the log holds none. - """ - state = LiveState() - with log_path.open("rb") as log_stream: - file_size = log_path.stat().st_size - eof_ts = read_last_timestamp(log_stream, file_size) - if eof_ts is None: - return (state, file_size, 0) - - lo_offset = offset_for_ts( - log_stream, eof_ts - window_pad_ms, file_size - ) - events = scan_range(log_stream, lo_offset, file_size) - _, still_open = pair_start_end_events(events) - - for qid, (start_ms, start_line_offset) in still_open.items(): - sparql = load_sparql_at(log_stream, start_line_offset) - state.active[qid] = (start_ms, sparql) - - return (state, file_size, eof_ts) - - -def load_completed_history( - log_path: Path, - state: LiveState, - cut_offset: int, - now_ms: Callable[[], int] = current_ms, -) -> None: - """Scan the hour before cut_offset and seed completed history. - - One-shot: callers spawn this on a daemon thread at startup if they - want it off the main loop. After it returns, the tailer is the only - writer of state.completed. - """ - with log_path.open("rb") as log_stream: - oldest_wanted_ms = now_ms() - LIVE_HORIZON_MS - scan_start_offset = offset_for_ts( - log_stream, oldest_wanted_ms, cut_offset - ) - events = scan_range(log_stream, scan_start_offset, cut_offset) - older_completions, _ = pair_start_end_events(events) - - with state.lock: - tailer_entries = state.completed.entries - state.completed.entries = deque(older_completions) - state.completed.entries.extend(tailer_entries) - - -class LiveLogReader: - """Tail cursor and per-poll logic for the live query log. - - Sole writer of LiveState in steady state. Each poll() reads any - whole lines appended since the last cursor, dispatches them into - active / completed under the state lock, and trims stale entries. - The run loop is owned by the caller. - """ - - def __init__( - self, - log_path: Path, - state: LiveState, - cut_offset: int, - window_pad_ms: int, - poll_interval: float = 0.2, - now_ms: Callable[[], int] = current_ms, - ) -> None: - self.log_path = log_path - self.state = state - self.cursor = cut_offset - self.window_pad_ms = window_pad_ms - self.poll_interval = poll_interval - self.now_ms = now_ms - - def poll(self, log_stream: BinaryIO) -> None: - """Process any whole lines appended since the last poll.""" - log_stream.seek(self.cursor) - for line in log_stream: - if not line.endswith(b"\n"): - break - self.cursor += len(line) - self.handle_line(line) - self.evict_stale() - - def evict_stale(self) -> None: - """Drop completions older than 1h and actives older than 2t. - - Active eviction guards against a missing end event: any query - whose start is older than the 2t safety horizon is treated as - gone, since the server contract says no real query lives that - long. - """ - now = self.now_ms() - completed_cutoff = now - LIVE_HORIZON_MS - active_cutoff = now - self.window_pad_ms - with self.state.lock: - self.state.completed.drop_older_than(completed_cutoff) - stale = [ - qid - for qid, (start_ms, _) in self.state.active.items() - if start_ms < active_cutoff - ] - for qid in stale: - del self.state.active[qid] - - def handle_line(self, line: bytes) -> None: - """Parse one whole line and dispatch it into LiveState.""" - try: - obj = json.loads(line) - except (ValueError, TypeError): - return - if not isinstance(obj, dict): - return - ts_ms = obj.get("ts-ms") - event = obj.get("event") - qid = obj.get("qid") - if not isinstance(ts_ms, int) or not isinstance(qid, str): - return - - if event == "start": - sparql = obj.get("query") - if not isinstance(sparql, str): - sparql = "" - with self.state.lock: - self.state.active[qid] = (ts_ms, sparql) - return - - if event == "end": - status = obj.get("status") - if status not in STATUS_SET: - return - with self.state.lock: - start = self.state.active.pop(qid, None) - if start is None: - return - start_ms, _ = start - self.state.completed.add( - CompletedQuery( - start_ms=start_ms, - end_ms=ts_ms, - duration_ms=ts_ms - start_ms, - status=status, - start_line_offset=None, - ) - ) diff --git a/src/qlever/monitor/live_stubs.py b/src/qlever/monitor/live_stubs.py deleted file mode 100644 index a1e0146a..00000000 --- a/src/qlever/monitor/live_stubs.py +++ /dev/null @@ -1,108 +0,0 @@ -"""Stub data for the Live screen. - -Replaced later by a real log-reader module exposing the same functions. -UI widgets call these getters rather than reading module-level constants, -so the swap is mechanical. -""" - -import time - -from qlever.monitor.models import LiveQueryRow, LiveSubtitle, MetricsCounts - - -def get_live_subtitle() -> LiveSubtitle: - """Current Live-screen subtitle state.""" - return LiveSubtitle(state="reachable", n_active=3) - - -def get_live_metrics() -> list[MetricsCounts]: - """Three rolling-window metric rows shown on Live.""" - return [ - MetricsCounts( - label="last 5m", - seen=142, - ok=125, - failed=3, - timeout=0, - cancelled=14, - unknown=0, - p50=48, - p95=3_900, - slow=14, - ), - MetricsCounts( - label="last 15m", - seen=438, - ok=394, - failed=11, - timeout=2, - cancelled=31, - unknown=1, - p50=52, - p95=4_100, - slow=31, - ), - MetricsCounts( - label="last 1h", - seen=None, - ok=None, - failed=None, - timeout=None, - cancelled=None, - unknown=None, - p50=None, - p95=None, - slow=None, - ), - ] - - -def get_live_query_rows() -> list[LiveQueryRow]: - """Currently active queries (a start with no matching end).""" - now_ms = int(time.time() * 1000) - return [ - LiveQueryRow( - qid="q-8a4f12345678900", - ts_ms=now_ms - 18_000, - sparql=( - "PREFIX foaf: \n" - "PREFIX rdf: \n" - "SELECT ?person ?name (COUNT(?friend) AS ?friends)\n" - "WHERE {\n" - " ?person rdf:type foaf:Person ;\n" - " foaf:name ?name ;\n" - " foaf:knows ?friend .\n" - " ?friend foaf:name ?friendName .\n" - ' FILTER (STRLEN(?name) > 3 && LANG(?name) = "en")\n' - "}\n" - "GROUP BY ?person ?name\n" - "HAVING (COUNT(?friend) > 5)\n" - "ORDER BY DESC(?friends) ?name\n" - "LIMIT 100" - ), - ), - LiveQueryRow( - qid="q-8a52242534636", - ts_ms=now_ms - 5_000, - sparql="SELECT (COUNT(*) AS ?c) WHERE { ?s ?p ?o }", - ), - LiveQueryRow( - qid="q-8a5b4356476474567456", - ts_ms=now_ms - 2_000, - sparql=( - "PREFIX rdfs: \n" - "PREFIX geo: \n" - "CONSTRUCT {\n" - " ?city rdfs:label ?label ;\n" - " geo:hasGeometry ?geom .\n" - "}\n" - "WHERE {\n" - " ?city rdf:type :City ;\n" - " rdfs:label ?label ;\n" - " geo:hasGeometry ?geom .\n" - ' FILTER (lang(?label) = "en")\n' - " FILTER EXISTS { ?city :population ?pop . FILTER (?pop > 100000) }\n" - "}" - ), - ), - ] diff --git a/src/qlever/monitor/log_reader.py b/src/qlever/monitor/log_reader.py index fd7d8df3..594696db 100644 --- a/src/qlever/monitor/log_reader.py +++ b/src/qlever/monitor/log_reader.py @@ -162,6 +162,25 @@ def next_whole_line( return (line_start, ts_ms) +def read_first_timestamp(log_stream: BinaryIO, file_size: int) -> int | None: + """Return the ts_ms of the first complete line, or None. + + Scans forward from the start of the file, skipping malformed + lines, and returns the first ts_ms that parses. + """ + if file_size == 0: + return None + log_stream.seek(0) + while True: + line = log_stream.readline() + # No newline means EOF or a partial line: nothing complete left. + if not line.endswith(b"\n"): + return None + ts_ms = peek_ts_ms(line) + if ts_ms is not None: + return ts_ms + + def read_last_timestamp(log_stream: BinaryIO, file_size: int) -> int | None: """Return the ts_ms of the last complete line, or None. @@ -311,30 +330,36 @@ def pair_start_end_events( return (completed_queries, still_open) -def extract_query(line_bytes: bytes) -> str: - """Return the query field from a start line's bytes, or "". +def extract_qid_and_query(line_bytes: bytes) -> tuple[str, str]: + """Return (qid, query) from a start line's bytes, or ("", ""). The byte-slice fast path is unsafe for the query field because user SPARQL can contain escaped quotes, so this path uses real json.loads. - Returns "" on any malformed input or missing field; never raises. + Both fields come from the same parse so cost is one json.loads per + call. Returns ("", "") on any malformed input or missing field; + never raises. """ try: obj = json.loads(line_bytes) except (ValueError, TypeError): - return "" + return ("", "") if not isinstance(obj, dict): - return "" + return ("", "") + qid = obj.get("qid") query = obj.get("query") - if not isinstance(query, str): - return "" - return query + if not isinstance(qid, str) or not isinstance(query, str): + return ("", "") + return (qid, query) -def load_sparql_at(log_stream: BinaryIO, line_offset: int) -> str: - """Return the SPARQL text of the start line at line_offset. +def load_sparql_at( + log_stream: BinaryIO, line_offset: int +) -> tuple[str, str]: + """Return (qid, sparql) for the start line at line_offset. Used by callers that have only the offset, not the line bytes: - find_active_queries on survivors, and Historic on displayed rows. + find_active_queries on survivors (qid already known, ignored), and + Historic on displayed rows (needs both). """ log_stream.seek(line_offset) - return extract_query(log_stream.readline()) + return extract_qid_and_query(log_stream.readline()) diff --git a/src/qlever/monitor/metrics.py b/src/qlever/monitor/metrics.py new file mode 100644 index 00000000..d6910811 --- /dev/null +++ b/src/qlever/monitor/metrics.py @@ -0,0 +1,112 @@ +"""Summarises a group of finished queries into one metrics row. + +The Live and Historic screens both display these counts and timings. +Keeping the math here lets both engines share it without importing +each other. +""" + +import statistics +from collections.abc import Iterable +from typing import NamedTuple + +from qlever.monitor.log_reader import CompletedQuery + + +class MetricsSnapshot(NamedTuple): + """One metrics row: counts and timings for a group of finished queries. + + Every finished query has exactly one status, so the status counts + always add up to seen: + seen == ok + failed + timeout + cancelled + unknown + slow is counted separately, since a query can be both ok and slow. + p50 and p95 are the median and 95th-percentile run times in + milliseconds, or None when there are no queries to measure. + """ + + seen: int + ok: int + failed: int + timeout: int + cancelled: int + unknown: int + slow: int + p50: int | None + p95: int | None + + +# All-None counts, spread into MetricsCounts when a row has no data yet. +EMPTY_FIELDS = dict.fromkeys(MetricsSnapshot._fields) + + +def percentiles(durations_ms: list[int]) -> tuple[int | None, int | None]: + """Return the median (p50) and 95th-percentile (p95) run times. + + Both are in milliseconds. Returns (None, None) when the list is + empty. + """ + if not durations_ms: + return (None, None) + if len(durations_ms) == 1: + only = durations_ms[0] + return (only, only) + cuts = statistics.quantiles(durations_ms, n=100) + return (round(cuts[49]), round(cuts[94])) + + +def build_snapshot( + counts: dict[str, int], + slow: int, + durations_ms: list[int], +) -> MetricsSnapshot: + """Build one MetricsSnapshot from already-counted query results. + + counts holds the per-status totals, slow is how many queries ran + longer than the slow threshold, and durations_ms is every query's + run time (its length is the total seen). + """ + p50, p95 = percentiles(durations_ms) + return MetricsSnapshot( + seen=len(durations_ms), + ok=counts["ok"], + failed=counts["failed"], + timeout=counts["timeout"], + cancelled=counts["cancelled"], + unknown=counts["unknown"], + slow=slow, + p50=p50, + p95=p95, + ) + + +def metrics_for_ranges( + completed: Iterable[CompletedQuery], + ranges: list[tuple[int, int]], + slow_threshold_ms: int, +) -> list[MetricsSnapshot]: + """Summarise completed queries into one snapshot per time range. + + Each range is an absolute (lo_ms, hi_ms) interval; a completed + query counts toward a range when its end_ms falls inside it. Walks + completed once, filling every range's tally in parallel, so N + ranges still cost a single pass. Returns one snapshot per range, in + the order the ranges were given. + """ + counts = [ + {"ok": 0, "failed": 0, "timeout": 0, "cancelled": 0, "unknown": 0} + for _ in ranges + ] + slow = [0] * len(ranges) + durations = [[] for _ in ranges] + + for entry in completed: + for index, (lo_ms, hi_ms) in enumerate(ranges): + if lo_ms <= entry.end_ms <= hi_ms: + counts[index][entry.status] += 1 + if entry.duration_ms >= slow_threshold_ms: + slow[index] += 1 + durations[index].append(entry.duration_ms) + + return [ + build_snapshot(counts[index], slow[index], durations[index]) + for index in range(len(ranges)) + ] diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py index 90b2be40..f83cd3ac 100644 --- a/src/qlever/monitor/models.py +++ b/src/qlever/monitor/models.py @@ -1,9 +1,8 @@ """Shared data models for the monitor-queries TUI. These frozen dataclasses are the contract between the data layer and the -UI. Widgets render them; the stub modules (and later the real log-reader) -produce them. Neither side imports the other: both depend on this module, -so swapping stubs for the real reader never touches a widget. +UI. Widgets render them; the data adapters produce them. Neither side +imports the other: both depend on this module. """ from dataclasses import dataclass diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index e42d3594..18ecd2a6 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -1,18 +1,21 @@ from __future__ import annotations +from textual import work from textual.app import ComposeResult from textual.binding import Binding from textual.screen import Screen from textual.widgets import Footer, Static -from qlever.monitor.historic_stubs import ( - get_controls_state, - get_historic_metrics, - get_historic_query_rows, - get_timeline_bounds, +from qlever.monitor.historic_data import read_window, render_window +from qlever.monitor.log_reader import ( + read_first_timestamp, + read_last_timestamp, ) +from qlever.monitor.metrics import EMPTY_FIELDS from qlever.monitor.models import ( ControlsState, + HistoricQueryRow, + MetricsCounts, SparqlContent, TimelineBounds, ) @@ -33,6 +36,12 @@ TITLE = "QLever monitor-queries: Historic" +MODE_PHRASES = { + "ACTIVE": "were active during", + "STARTS": "started in", + "ENDS": "ended in", +} + class HistoricScreen(Screen, inherit_bindings=False): """Historic view: shows active queries parsed from the log over a time window.""" @@ -53,20 +62,33 @@ class HistoricScreen(Screen, inherit_bindings=False): ] def compose(self) -> ComposeResult: - controls = get_controls_state() - bounds = get_timeline_bounds() - # Fixed once: the log span never changes during a session. - self.log_start_ms = bounds.log_start_ms - self.log_end_ms = bounds.log_end_ms + self.log_start_ms = None + self.log_end_ms = None + self.read_log_span() # Presets longer than the log span are pointless; `all` covers it. self.available_presets = available_presets( self.log_end_ms - self.log_start_ms ) - # The screen owns the mutable window state from here on. - self.window_size = controls.window_size - self.mode = controls.mode - self.window_start_ms = controls.start_ms - self.window_end_ms = controls.end_ms + self.window_size = self.available_presets[0] + self.mode = "ACTIVE" + width = preset_ms(self.window_size) + self.window_start_ms = ( + self.log_start_ms if width is None else self.log_end_ms - width + ) + self.window_end_ms = self.log_end_ms + self.window_data = None + controls = ControlsState( + window_size=self.window_size, + mode=self.mode, + start_ms=self.window_start_ms, + end_ms=self.window_end_ms, + ) + bounds = TimelineBounds( + log_start_ms=self.log_start_ms, + log_end_ms=self.log_end_ms, + window_start_ms=self.window_start_ms, + window_end_ms=self.window_end_ms, + ) yield HeaderRow( left=NavPill("< Live", target="live"), @@ -74,25 +96,41 @@ def compose(self) -> ComposeResult: ) yield HistoricControlsRow(controls) yield Timeline(bounds) - yield MetricsRow([get_historic_metrics()]) - rows = sorted( - get_historic_query_rows(), - key=lambda r: r.duration_ms, - reverse=True, - ) - yield HistoricQueryTable(rows) - yield Static( - f"Showing {len(rows)} queries sorted by duration desc", - id="table-status", + yield MetricsRow( + [MetricsCounts(label=self.window_size, **EMPTY_FIELDS)] ) + yield HistoricQueryTable([]) + yield Static("", id="table-status") yield SparqlPane() yield Footer(show_command_palette=False) - def push_state(self) -> None: - """Rebuild the controls/timeline models and push them to the widgets. + def on_screen_resume(self) -> None: + """Catch up on log growth, then push state and rescan.""" + self.read_log_span() + self.available_presets = available_presets( + self.log_end_ms - self.log_start_ms + ) + self.clamp_window() + self.refresh_view(rescan=True) + + def read_log_span(self) -> None: + """Refresh log_end_ms from the file; read log_start_ms only once.""" + log_path = self.app.log_file + with log_path.open("rb") as log_stream: + file_size = log_path.stat().st_size + if self.log_start_ms is None: + self.log_start_ms = read_first_timestamp( + log_stream, file_size + ) + self.log_end_ms = read_last_timestamp(log_stream, file_size) + + def refresh_view(self, rescan: bool) -> None: + """Rebuild the controls/timeline widgets and kick the data refresh. The single update path: every key/click action mutates the - screen's window fields and then calls this. + screen's window fields and then calls this. `rescan=True` reads + a fresh window from the log; `rescan=False` reuses the cached + scan and only re-filters by the current mode. """ controls = ControlsState( window_size=self.window_size, @@ -110,6 +148,50 @@ def push_state(self) -> None: self.query_one(ModePicker).selected = self.mode self.query_one(SelectedWindow).state = controls self.query_one(Timeline).bounds = bounds + self.refresh_data(rescan) + + @work(thread=True, exclusive=True) + def refresh_data(self, rescan: bool) -> None: + """Scan and/or re-filter the window, push rows + metrics + status. + + On `rescan` the log is read into a fresh `WindowData` cached on + the screen; otherwise the cached scan is reused. Render always + runs, so a mode change pays only an in-memory filter. + """ + if rescan: + self.window_data = read_window( + self.app.log_file, + self.window_start_ms, + self.window_end_ms, + self.app.window_pad_ms, + self.app.slow_threshold * 1000, + self.log_end_ms, + ) + controls = ControlsState( + window_size=self.window_size, + mode=self.mode, + start_ms=self.window_start_ms, + end_ms=self.window_end_ms, + ) + rows, metrics = render_window( + self.window_data, controls, self.log_end_ms + ) + rows = sorted(rows, key=lambda row: row.duration_ms, reverse=True) + self.app.call_from_thread(self.apply_window_result, rows, metrics) + + def apply_window_result( + self, + rows: list[HistoricQueryRow], + metrics: MetricsCounts, + ) -> None: + """Push fresh rows, metrics, and status line into the widgets.""" + self.query_one(HistoricQueryTable).set_rows(rows) + self.query_one(MetricsRow).rows = [metrics] + phrase = MODE_PHRASES[self.mode] + self.query_one("#table-status", Static).update( + f"Showing {len(rows)} queries that {phrase} the time window " + "(sorted by longest duration)" + ) def clamp_window_start(self, start: int, width: int) -> int: """Keep a window of `width` fully inside the log span.""" @@ -122,35 +204,36 @@ def center_window_at(self, ms: int) -> None: return self.window_start_ms = self.clamp_window_start(ms - width // 2, width) self.window_end_ms = self.window_start_ms + width - self.push_state() + self.refresh_view(rescan=True) - def reframe_window(self) -> None: - """Recompute the window range from its size, then push state. + def clamp_window(self) -> None: + """Fit the window into the log span, anchored to its right edge. `all` spans the whole log; any fixed size keeps the current - start but is clamped so the window stays inside the log span. + `window_end_ms` put and walks `window_start_ms` left, clamped + so the window stays inside the log span. """ width = preset_ms(self.window_size) if width is None: self.window_start_ms = self.log_start_ms self.window_end_ms = self.log_end_ms - else: - start = self.clamp_window_start(self.window_start_ms, width) - self.window_start_ms = start - self.window_end_ms = start + width - self.push_state() + return + start = self.clamp_window_start(self.window_end_ms - width, width) + self.window_start_ms = start + self.window_end_ms = start + width def step_window(self, direction: int) -> None: """Move the window size one preset in `direction` (wraps).""" presets = self.available_presets index = presets.index(self.window_size) self.window_size = presets[(index + direction) % len(presets)] - self.reframe_window() + self.clamp_window() + self.refresh_view(rescan=True) def set_mode(self, mode: str) -> None: - """Select an exact match mode, then push state.""" + """Select an exact match mode, then refresh the view (no rescan).""" self.mode = mode - self.push_state() + self.refresh_view(rescan=False) def action_cycle_window(self) -> None: """Step to the next window-size preset (wraps).""" @@ -169,7 +252,7 @@ def shift_window(self, direction: int) -> None: start = self.window_start_ms + direction * width self.window_start_ms = self.clamp_window_start(start, width) self.window_end_ms = self.window_start_ms + width - self.push_state() + self.refresh_view(rescan=True) def action_shift_earlier(self) -> None: """Shift the window one width toward the log start.""" @@ -181,13 +264,23 @@ def action_shift_later(self) -> None: def action_snap_start(self) -> None: """Snap the window to the log start.""" + width = preset_ms(self.window_size) self.window_start_ms = self.log_start_ms - self.reframe_window() + self.window_end_ms = ( + self.log_end_ms if width is None + else self.log_start_ms + width + ) + self.refresh_view(rescan=True) def action_snap_end(self) -> None: """Snap the window to the log end.""" - self.window_start_ms = self.log_end_ms - self.reframe_window() + width = preset_ms(self.window_size) + self.window_end_ms = self.log_end_ms + self.window_start_ms = ( + self.log_start_ms if width is None + else self.log_end_ms - width + ) + self.refresh_view(rescan=True) def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: """Switch to the screen named by the clicked pill.""" diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index c5c1e72e..668957c7 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -8,11 +8,11 @@ from textual.widgets import Footer, Static from qlever.monitor.live_data import ( + current_ms, get_live_metrics, get_live_query_rows, get_live_subtitle, ) -from qlever.monitor.live_engine import current_ms from qlever.monitor.models import SparqlContent from qlever.monitor.widgets.header_row import HeaderRow from qlever.monitor.widgets.metrics_row import MetricsRow diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py index 61361991..13e4bfe8 100644 --- a/src/qlever/monitor/widgets/query_table.py +++ b/src/qlever/monitor/widgets/query_table.py @@ -25,8 +25,10 @@ def truncate(text: str, width: int) -> str: def format_duration(ms: int) -> str: - """Render a duration as whole seconds.""" - return f"{max(ms, 0) // 1000}s" + """Render a duration in seconds to one decimal, or '?' when unknown.""" + if ms < 0: + return "?" + return f"{ms / 1000:.1f}s" class QueryTable(DataTable): @@ -37,26 +39,12 @@ class QueryTable(DataTable): its own columns, since the two column sets differ. """ - def __init__(self, rows: list) -> None: + def __init__(self, rows: list[LiveQueryRow] | list[HistoricQueryRow]) -> None: """Hold the rows to paint once the table is mounted.""" super().__init__(cursor_type="row") self.query_rows = rows - -class LiveQueryTable(QueryTable): - """Active queries on the Live screen; the only focusable widget.""" - - def __init__(self, rows: list[LiveQueryRow]) -> None: - super().__init__(rows) - - def on_mount(self) -> None: - """Add the columns and one table row per active query.""" - self.add_column("Query ID") - self.add_column("Duration", width=8) - self.add_column("SPARQL") - self.repaint_rows() - - def set_rows(self, rows: list[LiveQueryRow]) -> None: + def set_rows(self, rows: list[LiveQueryRow] | list[HistoricQueryRow]) -> None: """Replace the table rows, preserving the cursor position by qid.""" cursor_qid = None old_index = self.cursor_row @@ -76,6 +64,17 @@ def set_rows(self, rows: list[LiveQueryRow]) -> None: return self.move_cursor(row=min(old_index, len(self.query_rows) - 1)) + +class LiveQueryTable(QueryTable): + """Active queries on the Live screen; the only focusable widget.""" + + def on_mount(self) -> None: + """Add the columns and one table row per active query.""" + self.add_column("Query ID") + self.add_column("Duration", width=8) + self.add_column("SPARQL") + self.repaint_rows() + def repaint_rows(self) -> None: """Add one DataTable row per entry in self.query_rows.""" now_ms = int(time.time() * 1000) @@ -90,15 +89,16 @@ def repaint_rows(self) -> None: class HistoricQueryTable(QueryTable): """Finished queries in the current window on the Historic screen.""" - def __init__(self, rows: list[HistoricQueryRow]) -> None: - super().__init__(rows) - def on_mount(self) -> None: """Add the columns and one table row per finished query.""" self.add_column("Started") self.add_column("Duration", width=8) self.add_column("Status") self.add_column("SPARQL") + self.repaint_rows() + + def repaint_rows(self) -> None: + """Add one DataTable row per entry in self.query_rows.""" for row in self.query_rows: self.add_row( format_clock(row.started_at_ms), diff --git a/test/qlever/test_historic_data.py b/test/qlever/test_historic_data.py new file mode 100644 index 00000000..ba12cde2 --- /dev/null +++ b/test/qlever/test_historic_data.py @@ -0,0 +1,277 @@ +"""Tests for read_window, filter_queries, and render_window.""" + +from qlever.monitor.historic_data import ( + DURATION_UNKNOWN, + LoggedQuery, + WindowData, + filter_queries, + read_window, + render_window, +) +from qlever.monitor.metrics import MetricsSnapshot +from qlever.monitor.models import ControlsState + + +PAD_MS = 10_000 +SLOW_MS = 60_000 + + +def start_line(ts_ms, qid, query="SELECT 1"): + return ( + f'{{"ts-ms":{ts_ms},"event":"start","qid":"{qid}",' + f'"client-ip":"x","query":"{query}"}}\n' + ).encode() + + +def end_line(ts_ms, qid, status="ok"): + return ( + f'{{"ts-ms":{ts_ms},"event":"end","qid":"{qid}",' + f'"status":"{status}"}}\n' + ).encode() + + +def test_read_window_empty_log_returns_no_queries(write_log): + path = write_log(b"") + data = read_window(path, 0, 1000, PAD_MS, SLOW_MS, log_end_ms=0) + assert data.queries == [] + assert data.metrics.seen == 0 + + +def test_read_window_keeps_pair_entirely_inside_window(write_log): + path = write_log( + start_line(1_000_000, "q1", "SELECT a") + + end_line(1_000_500, "q1") + ) + data = read_window( + path, 900_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_000_500 + ) + assert data.queries == [ + LoggedQuery( + start_ms=1_000_000, + end_ms=1_000_500, + status="ok", + qid="q1", + sparql="SELECT a", + ) + ] + assert data.metrics.seen == 1 + assert data.metrics.ok == 1 + + +def test_read_window_drops_pair_inside_pad_before_window(write_log): + path = write_log( + start_line(900_000, "pad", "SELECT pad") + + end_line(900_500, "pad") + + start_line(1_000_000, "q1", "SELECT a") + + end_line(1_000_500, "q1") + ) + data = read_window( + path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_000_500 + ) + qids = [query.qid for query in data.queries] + assert qids == ["q1"] + assert data.metrics.seen == 1 + + +def test_read_window_keeps_pair_straddling_window_start(write_log): + path = write_log( + start_line(995_000, "edge", "SELECT edge") + + end_line(1_005_000, "edge") + ) + data = read_window( + path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_005_000 + ) + assert [query.qid for query in data.queries] == ["edge"] + assert data.queries[0].start_ms == 995_000 + assert data.queries[0].end_ms == 1_005_000 + assert data.metrics.seen == 1 + + +def test_read_window_keeps_pair_straddling_window_end(write_log): + path = write_log( + start_line(1_095_000, "edge", "SELECT edge") + + end_line(1_105_000, "edge") + ) + data = read_window( + path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_105_000 + ) + assert [query.qid for query in data.queries] == ["edge"] + assert data.metrics.seen == 0 + + +def test_read_window_includes_still_open_with_running_status(write_log): + path = write_log(start_line(1_050_000, "live", "SELECT live")) + data = read_window( + path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_050_000 + ) + assert data.queries == [ + LoggedQuery( + start_ms=1_050_000, + end_ms=None, + status="running", + qid="live", + sparql="SELECT live", + ) + ] + + +def test_read_window_marks_still_open_as_orphaned_when_log_advanced_past_pad( + write_log, +): + path = write_log(start_line(1_050_000, "ghost", "SELECT ghost")) + data = read_window( + path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_200_000 + ) + assert data.queries == [ + LoggedQuery( + start_ms=1_050_000, + end_ms=None, + status="orphaned", + qid="ghost", + sparql="SELECT ghost", + ) + ] + + +def test_read_window_drops_still_open_started_after_window(write_log): + path = write_log(start_line(1_200_000, "later", "SELECT later")) + data = read_window( + path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_200_000 + ) + assert data.queries == [] + + +def test_read_window_metrics_count_only_ends_inside_window(write_log): + path = write_log( + start_line(995_000, "edge_start", "SELECT a") + + end_line(1_005_000, "edge_start") + + start_line(1_050_000, "inside", "SELECT b") + + end_line(1_060_000, "inside") + + start_line(1_095_000, "edge_end", "SELECT c") + + end_line(1_105_000, "edge_end") + ) + data = read_window( + path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_105_000 + ) + assert {query.qid for query in data.queries} == { + "edge_start", "inside", "edge_end", + } + assert data.metrics.seen == 2 + + +def make_query(qid, start_ms, end_ms, status="ok"): + return LoggedQuery( + start_ms=start_ms, end_ms=end_ms, status=status, + qid=qid, sparql=f"SELECT {qid}", + ) + + +def make_cache(queries): + empty_metrics = MetricsSnapshot( + seen=0, ok=0, failed=0, timeout=0, cancelled=0, unknown=0, + slow=0, p50=None, p95=None, + ) + return WindowData(queries=queries, metrics=empty_metrics) + + +def test_filter_queries_active_returns_cache_unchanged(): + cache = make_cache([ + make_query("a", 100, 200), + make_query("b", 300, None, status="running"), + ]) + assert filter_queries(cache, "ACTIVE", 0, 1000) is cache.queries + + +def test_filter_queries_starts_keeps_only_starts_in_window(): + cache = make_cache([ + make_query("early", 50, 150), + make_query("inside", 200, 400), + make_query("late", 1100, 1200), + ]) + qids = [query.qid for query in filter_queries(cache, "STARTS", 100, 1000)] + assert qids == ["inside"] + + +def test_filter_queries_starts_includes_running_queries(): + cache = make_cache([make_query("live", 500, None, status="running")]) + qids = [query.qid for query in filter_queries(cache, "STARTS", 100, 1000)] + assert qids == ["live"] + + +def test_filter_queries_ends_excludes_running_queries(): + cache = make_cache([ + make_query("done", 200, 500), + make_query("live", 300, None, status="running"), + ]) + qids = [query.qid for query in filter_queries(cache, "ENDS", 100, 1000)] + assert qids == ["done"] + + +def test_filter_queries_ends_drops_ends_outside_window(): + cache = make_cache([ + make_query("before", 50, 90), + make_query("inside", 200, 500), + make_query("after", 800, 1200), + ]) + qids = [query.qid for query in filter_queries(cache, "ENDS", 100, 1000)] + assert qids == ["inside"] + + +def make_controls(mode="ACTIVE"): + return ControlsState( + window_size="15m", mode=mode, start_ms=1_000_000, end_ms=2_000_000, + ) + + +def make_render_cache(queries, **metric_fields): + snapshot_fields = dict( + seen=0, ok=0, failed=0, timeout=0, cancelled=0, unknown=0, + slow=0, p50=None, p95=None, + ) + snapshot_fields.update(metric_fields) + return WindowData(queries=queries, metrics=MetricsSnapshot(**snapshot_fields)) + + +def test_render_window_uses_recorded_end_for_completed_query(): + cache = make_render_cache([make_query("done", 1_100_000, 1_150_000)]) + rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) + assert rows[0].duration_ms == 50_000 + assert rows[0].status == "ok" + + +def test_render_window_uses_log_end_for_running_query(): + cache = make_render_cache( + [make_query("live", 1_100_000, None, status="running")] + ) + rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) + assert rows[0].duration_ms == 1_400_000 + assert rows[0].status == "running" + + +def test_render_window_marks_orphan_duration_as_unknown(): + cache = make_render_cache( + [make_query("ghost", 1_100_000, None, status="orphaned")] + ) + rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) + assert rows[0].duration_ms == DURATION_UNKNOWN + assert rows[0].status == "orphaned" + + +def test_render_window_passes_window_size_as_metric_label(): + cache = make_render_cache([], seen=7, ok=7) + controls = ControlsState( + window_size="1h", mode="ACTIVE", start_ms=0, end_ms=1, + ) + _, metrics = render_window(cache, controls, log_end_ms=1) + assert metrics.label == "1h" + assert metrics.seen == 7 + assert metrics.ok == 7 + + +def test_render_window_respects_mode_filter(): + cache = make_render_cache([ + make_query("done", 1_100_000, 1_200_000), + make_query("live", 1_500_000, None, status="running"), + ]) + rows, _ = render_window(cache, make_controls(mode="ENDS"), log_end_ms=2_500_000) + assert [row.qid for row in rows] == ["done"] diff --git a/test/qlever/test_live_engine.py b/test/qlever/test_live_data.py similarity index 94% rename from test/qlever/test_live_engine.py rename to test/qlever/test_live_data.py index 5311fa02..6b2c3952 100644 --- a/test/qlever/test_live_engine.py +++ b/test/qlever/test_live_data.py @@ -1,20 +1,19 @@ -"""Tests for CompletedQueries and the metrics snapshot helpers.""" +"""Tests for the Live data layer: state, tailer, boot, metrics.""" import threading import time -from qlever.monitor.live_engine import ( +from qlever.monitor.live_data import ( LIVE_HORIZON_MS, CompletedQueries, LiveLogReader, LiveState, - MetricsSnapshot, - compute_live_metrics, find_active_queries, + get_live_metrics, load_completed_history, - percentiles, ) from qlever.monitor.log_reader import CompletedQuery +from qlever.monitor.metrics import MetricsSnapshot, percentiles NOW_MS = 1_700_000_000_000 MIN_MS = 60_000 @@ -310,7 +309,7 @@ def test_evict_stale_drops_completed_older_than_one_hour(write_log): assert state.completed.entries[0].end_ms == now - 500 -def test_compute_live_metrics_returns_three_snapshots_when_data_covers_an_hour(): +def test_get_live_metrics_returns_three_rows_when_data_covers_an_hour(): state = LiveState() state.completed.add(CompletedQuery( start_ms=NOW_MS - 65 * MIN_MS, end_ms=NOW_MS - 60 * MIN_MS, @@ -319,27 +318,25 @@ def test_compute_live_metrics_returns_three_snapshots_when_data_covers_an_hour() state.completed.add(make_completed(NOW_MS - 10 * MIN_MS)) state.completed.add(make_completed(NOW_MS - 1 * MIN_MS)) - snaps = compute_live_metrics(state, slow_threshold_ms=10_000, now_ms=NOW_MS) - assert len(snaps) == 3 - assert all(snap is not None for snap in snaps) - snap_5m, snap_15m, snap_1h = snaps - assert snap_5m.seen == 1 - assert snap_15m.seen == 2 - assert snap_1h.seen == 3 + rows = get_live_metrics(state, slow_threshold_ms=10_000, now_ms=NOW_MS) + assert [row.label for row in rows] == ["last 5m", "last 15m", "last 1h"] + row_5m, row_15m, row_1h = rows + assert row_5m.seen == 1 + assert row_15m.seen == 2 + assert row_1h.seen == 3 -def test_compute_live_metrics_returns_none_for_windows_past_oldest_entry(): +def test_get_live_metrics_blanks_windows_past_oldest_entry(): state = LiveState() state.completed.add(make_completed(NOW_MS - 11 * MIN_MS)) state.completed.add(make_completed(NOW_MS - 2 * MIN_MS)) - snap_5m, snap_15m, snap_1h = compute_live_metrics( + row_5m, row_15m, row_1h = get_live_metrics( state, slow_threshold_ms=10_000, now_ms=NOW_MS, ) - assert snap_5m is not None - assert snap_5m.seen == 1 - assert snap_15m is None - assert snap_1h is None + assert row_5m.seen == 1 + assert row_15m.seen is None + assert row_1h.seen is None def test_load_completed_history_loads_pairs_from_history(write_log): diff --git a/test/qlever/test_log_reader.py b/test/qlever/test_log_reader.py index 8d232ec4..8576beca 100644 --- a/test/qlever/test_log_reader.py +++ b/test/qlever/test_log_reader.py @@ -3,7 +3,7 @@ from qlever.monitor import log_reader from qlever.monitor.log_reader import ( CompletedQuery, - extract_query, + extract_qid_and_query, load_sparql_at, next_whole_line, offset_for_ts, @@ -11,6 +11,7 @@ parse_line, parse_line_fallback, peek_ts_ms, + read_first_timestamp, read_last_timestamp, scan_range, ) @@ -377,37 +378,46 @@ def test_pair_start_end_events_status_flows_through(status): assert completed[0].status == status -def test_extract_query_returns_query_field(): +def test_extract_qid_and_query_returns_both_fields(): line = ( b'{"ts-ms":1,"event":"start","qid":"q1",' b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' ) - assert extract_query(line) == "SELECT * WHERE { ?s ?p ?o }" + assert extract_qid_and_query(line) == ( + "q1", + "SELECT * WHERE { ?s ?p ?o }", + ) -def test_extract_query_handles_escaped_quote_in_query(): +def test_extract_qid_and_query_handles_escaped_quote_in_query(): line = ( b'{"ts-ms":1,"event":"start","qid":"q1",' b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\" }"}\n' ) - assert extract_query(line) == 'SELECT ?x WHERE { ?x ?p "a" }' + assert extract_qid_and_query(line) == ( + "q1", + 'SELECT ?x WHERE { ?x ?p "a" }', + ) -def test_extract_query_missing_field_returns_empty(): - assert extract_query(END + b"\n") == "" +def test_extract_qid_and_query_missing_field_returns_empty(): + assert extract_qid_and_query(END + b"\n") == ("", "") -def test_extract_query_malformed_json_returns_empty(): - assert extract_query(b"this is not json\n") == "" +def test_extract_qid_and_query_malformed_json_returns_empty(): + assert extract_qid_and_query(b"this is not json\n") == ("", "") -def test_load_sparql_at_returns_query_text(open_log): +def test_load_sparql_at_returns_qid_and_query(open_log): line = ( b'{"ts-ms":1,"event":"start","qid":"q1",' b'"client-ip":"1.2.3.4","query":"SELECT * WHERE { ?s ?p ?o }"}\n' ) log, _ = open_log(line) - assert load_sparql_at(log, 0) == "SELECT * WHERE { ?s ?p ?o }" + assert load_sparql_at(log, 0) == ( + "q1", + "SELECT * WHERE { ?s ?p ?o }", + ) def test_load_sparql_at_handles_escaped_quotes_and_backslashes(open_log): @@ -417,7 +427,10 @@ def test_load_sparql_at_handles_escaped_quotes_and_backslashes(open_log): b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\\\b\\" }"}\n' ) log, _ = open_log(line) - assert load_sparql_at(log, 0) == 'SELECT ?x WHERE { ?x ?p "a\\b" }' + assert load_sparql_at(log, 0) == ( + "q1", + 'SELECT ?x WHERE { ?x ?p "a\\b" }', + ) def test_load_sparql_at_handles_unicode(open_log): @@ -426,22 +439,22 @@ def test_load_sparql_at_handles_unicode(open_log): b'"query":"SELECT ?s WHERE { ?s ?p \xe2\x98\x83 }"}\n' ) log, _ = open_log(line) - assert load_sparql_at(log, 0) == "SELECT ?s WHERE { ?s ?p ☃ }" + assert load_sparql_at(log, 0) == ("q1", "SELECT ?s WHERE { ?s ?p ☃ }") def test_load_sparql_at_end_line_has_no_query_returns_empty(open_log): log, _ = open_log(END + b"\n") - assert load_sparql_at(log, 0) == "" + assert load_sparql_at(log, 0) == ("", "") def test_load_sparql_at_malformed_json_returns_empty(open_log): log, _ = open_log(b"this is not json\n") - assert load_sparql_at(log, 0) == "" + assert load_sparql_at(log, 0) == ("", "") def test_load_sparql_at_top_level_non_object_returns_empty(open_log): log, _ = open_log(b"42\n") - assert load_sparql_at(log, 0) == "" + assert load_sparql_at(log, 0) == ("", "") def test_load_sparql_at_reads_the_line_at_the_given_offset(open_log): @@ -452,7 +465,7 @@ def test_load_sparql_at_reads_the_line_at_the_given_offset(open_log): b'{"ts-ms":2,"event":"start","qid":"q2","query":"SECOND"}\n' ) log, _ = open_log(first + second) - assert load_sparql_at(log, len(first)) == "SECOND" + assert load_sparql_at(log, len(first)) == ("q2", "SECOND") def test_read_last_timestamp_empty_file_returns_none(open_log): @@ -479,3 +492,29 @@ def test_read_last_timestamp_grows_buffer_for_long_lines(open_log): ) log, size = open_log(line) assert read_last_timestamp(log, size) == 1000 + + +def test_read_first_timestamp_empty_file_returns_none(open_log): + log, size = open_log(b"") + assert read_first_timestamp(log, size) is None + + +def test_read_first_timestamp_returns_first_complete_line_ts(open_log): + log, size = open_log(FIRST_LINE + SECOND_LINE) + assert read_first_timestamp(log, size) == 1000 + + +def test_read_first_timestamp_single_line(open_log): + log, size = open_log(FIRST_LINE) + assert read_first_timestamp(log, size) == 1000 + + +def test_read_first_timestamp_skips_malformed_first_line(open_log): + log, size = open_log(b"not a log line\n" + SECOND_LINE) + assert read_first_timestamp(log, size) == 2000 + + +def test_read_first_timestamp_unterminated_only_line_returns_none(open_log): + partial = b'{"ts-ms":1000,"event":"start","qid":"q1"' + log, size = open_log(partial) + assert read_first_timestamp(log, size) is None From 1a3f434392d9a094a356664157c7a86779ad1505 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 26 May 2026 00:04:12 +0200 Subject: [PATCH 09/26] Fix theme cycling and timeline window bar errors --- src/qlever/monitor/app.py | 11 ++++++----- src/qlever/monitor/widgets/timeline.py | 24 +++++++++++++++++++++--- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 22ffe67e..0e5e0211 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -42,7 +42,8 @@ class MonitorQueriesApp(App): BINDINGS = [ ("q", "quit", "Quit/Exit"), - ("t", "cycle_themes", "Change theme"), + Binding("t", "cycle_themes", "Change theme", key_display="T/t"), + Binding("T", "cycle_themes(-1)", "Previous theme", show=False), ("y", "copy_query", "Copy SPARQL"), ("p", "pretty_print", "Pretty print"), ("c", "clear_query", "Clear SPARQL"), @@ -195,8 +196,8 @@ def check_action( return scroll.max_scroll_y > 0 return True - def action_cycle_themes(self) -> None: - """Select the next theme on press of `t` binding""" - themes = [t for t in self.available_themes if "ansi" not in t] + def action_cycle_themes(self, direction: int = 1) -> None: + """Step through themes; `direction` is +1 for `t`, -1 for `T`.""" + themes = list(self.available_themes) selected_theme_idx = themes.index(self.theme) - self.theme = themes[(selected_theme_idx + 1) % len(themes)] + self.theme = themes[(selected_theme_idx + direction) % len(themes)] diff --git a/src/qlever/monitor/widgets/timeline.py b/src/qlever/monitor/widgets/timeline.py index 76c51e2d..ba839383 100644 --- a/src/qlever/monitor/widgets/timeline.py +++ b/src/qlever/monitor/widgets/timeline.py @@ -1,6 +1,7 @@ from __future__ import annotations from datetime import datetime +from math import ceil from textual import events from textual.message import Message @@ -68,6 +69,21 @@ def column_to_ms(col: int, bar_width: int, bounds: TimelineBounds) -> int: return bounds.log_start_ms + round(fraction * span) +def window_width_cells(bounds: TimelineBounds, bar_width: int) -> int: + """Cells the window block should occupy, based on its duration. + + Width depends only on how long the window is relative to the log + span, never on where it sits. Floors at one cell so a tiny window + is still visible. + """ + log_span = bounds.log_end_ms - bounds.log_start_ms + if log_span <= 0: + return 1 + window_span = bounds.window_end_ms - bounds.window_start_ms + fraction = window_span / log_span + return max(1, ceil(fraction * bar_width)) + + def render_track(bounds: TimelineBounds, bar_width: int) -> str: """The bar line: full-span track with the window span marked. @@ -75,12 +91,14 @@ def render_track(bounds: TimelineBounds, bar_width: int) -> str: when the window reaches an edge. """ chars = ["─"] * bar_width + width = window_width_cells(bounds, bar_width) start = fraction_to_column( span_fraction(bounds.window_start_ms, bounds), bar_width ) - end = fraction_to_column( - span_fraction(bounds.window_end_ms, bounds), bar_width - ) + end = start + width - 1 + if end > bar_width - 1: + end = bar_width - 1 + start = end - width + 1 for col in range(start, end + 1): chars[col] = "█" chars[0] = "├" From ff6c8d08ba6c48299fd4031e4984ea1a2c8af701 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 26 May 2026 10:15:23 +0200 Subject: [PATCH 10/26] The timeline widget caps shouldn't render when the bar is at the edges --- src/qlever/monitor/widgets/timeline.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/qlever/monitor/widgets/timeline.py b/src/qlever/monitor/widgets/timeline.py index ba839383..a378e67a 100644 --- a/src/qlever/monitor/widgets/timeline.py +++ b/src/qlever/monitor/widgets/timeline.py @@ -87,8 +87,9 @@ def window_width_cells(bounds: TimelineBounds, bar_width: int) -> int: def render_track(bounds: TimelineBounds, bar_width: int) -> str: """The bar line: full-span track with the window span marked. - `████` marks the window; `├`/`┤` always cap the log span, even - when the window reaches an edge. + `████` marks the window; `├`/`┤` cap the log edges, but only when + the window does not already sit on that edge (the window block + is its own edge marker). """ chars = ["─"] * bar_width width = window_width_cells(bounds, bar_width) @@ -101,8 +102,10 @@ def render_track(bounds: TimelineBounds, bar_width: int) -> str: start = end - width + 1 for col in range(start, end + 1): chars[col] = "█" - chars[0] = "├" - chars[-1] = "┤" + if chars[0] != "█": + chars[0] = "├" + if chars[-1] != "█": + chars[-1] = "┤" return "".join(chars) From b5e499d9e07312a2cfe4f5978ca465f4326ebf1f Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 26 May 2026 12:19:53 +0200 Subject: [PATCH 11/26] Fix metric row for last 1h oscillating between empty and filled, and add eta on empty live metric rows --- src/qlever/monitor/app.py | 13 ++++++++ src/qlever/monitor/live_data.py | 39 ++++++++++++++++------- src/qlever/monitor/models.py | 1 + src/qlever/monitor/views/historic.py | 29 +++++++++++------ src/qlever/monitor/widgets/metrics_row.py | 2 ++ test/qlever/test_live_data.py | 22 +++++++++++-- 6 files changed, 82 insertions(+), 24 deletions(-) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 0e5e0211..3d7e63c6 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -10,11 +10,14 @@ from textual.worker import get_current_worker from qlever.monitor.live_data import ( + LIVE_HORIZON_MS, LiveLogReader, LiveState, + current_ms, find_active_queries, load_completed_history, ) +from qlever.monitor.log_reader import read_first_timestamp from qlever.monitor.util import clipboard_install_hint, copy_text from qlever.monitor.views.historic import HistoricScreen from qlever.monitor.views.live import LiveScreen @@ -80,11 +83,16 @@ def __init__( def on_mount(self) -> None: """Boot the live engine, then open the Live screen.""" + self.boot_time_ms = current_ms() state, cut_offset, _ = find_active_queries( self.log_file, self.window_pad_ms ) self.live_state = state self.cut_offset = cut_offset + with self.log_file.open("rb") as log_stream: + self.log_start_ms = read_first_timestamp( + log_stream, self.log_file.stat().st_size + ) self.tail_live_log() self.load_metrics_history() self.push_screen("live") @@ -108,6 +116,11 @@ def tail_live_log(self) -> None: def load_metrics_history(self) -> None: """Prepend the hour before cut_offset into completed history so metrics aren't empty at boot.""" load_completed_history(self.log_file, self.live_state, self.cut_offset) + # Backfill is done; record how far back metrics have data. + log_start_or_boot = self.log_start_ms or self.boot_time_ms + self.live_state.metrics_known_from_ms = max( + log_start_or_boot, self.boot_time_ms - LIVE_HORIZON_MS + ) def action_swap_screen(self) -> None: """Toggle between Live and Historic (bound to Tab on each screen).""" diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index 1b2028ba..fa63b0a1 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -107,6 +107,7 @@ class LiveState: completed: CompletedQueries = field(default_factory=CompletedQueries) active: dict[str, tuple[int, str]] = field(default_factory=dict) lock: threading.Lock = field(default_factory=threading.Lock) + metrics_known_from_ms: int | None = None def find_active_queries( @@ -287,6 +288,13 @@ def get_live_query_rows(state: LiveState) -> list[LiveQueryRow]: ] +def format_eta(ms: int) -> str: + """Coarse countdown for a not-ready metric row; sub-minute reads "<1m".""" + if ms < 60_000: + return "<1m" + return f"{ms // 60_000}m" + + def get_live_metrics( state: LiveState, slow_threshold_ms: int, @@ -295,26 +303,33 @@ def get_live_metrics( """Three rolling-window metric rows for the Live screen. Takes a short-locked deque copy so the metric scan and percentile - work runs lock-free while the tailer keeps appending. Windows whose - cutoff predates the oldest entry render as an all-None row so the - widget shows "..." until the deque covers that range. + work runs lock-free while the tailer keeps appending. A window + whose start predates metrics_known_from_ms renders as an all-None + row so the widget shows "..." until coverage reaches that range. """ with state.lock: history = CompletedQueries() history.entries = deque(state.completed.entries) - oldest_ms = ( - history.entries[0].start_ms if history.entries else now_ms - ) + coverage_start_ms = state.metrics_known_from_ms snapshots = history.metrics_for_windows( now_ms=now_ms, windows_ms=LIVE_METRIC_WINDOWS_MS, slow_threshold_ms=slow_threshold_ms, - data_start_ms=oldest_ms, + data_start_ms=coverage_start_ms or now_ms, ) - return [ - MetricsCounts( + rows = [] + for label, width_ms, snap in zip( + LIVE_METRIC_LABELS, LIVE_METRIC_WINDOWS_MS, snapshots + ): + # Show "ready in Nm" only once we know when coverage arrives; + # during boot ramp coverage_start_ms is None and rows just read "…". + message = None + if snap is None and coverage_start_ms is not None: + eta_ms = coverage_start_ms + width_ms - now_ms + message = f"ready in {format_eta(eta_ms)}" + rows.append(MetricsCounts( label=label, **(snap._asdict() if snap is not None else EMPTY_FIELDS), - ) - for label, snap in zip(LIVE_METRIC_LABELS, snapshots) - ] + not_ready_message=message, + )) + return rows diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py index f83cd3ac..08ae3752 100644 --- a/src/qlever/monitor/models.py +++ b/src/qlever/monitor/models.py @@ -27,6 +27,7 @@ class MetricsCounts: p50: int | None p95: int | None slow: int | None + not_ready_message: str | None = None @dataclass(frozen=True) diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 18ecd2a6..62d622f6 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -8,7 +8,6 @@ from qlever.monitor.historic_data import read_window, render_window from qlever.monitor.log_reader import ( - read_first_timestamp, read_last_timestamp, ) from qlever.monitor.metrics import EMPTY_FIELDS @@ -62,9 +61,9 @@ class HistoricScreen(Screen, inherit_bindings=False): ] def compose(self) -> ComposeResult: - self.log_start_ms = None + self.log_start_ms = self.app.log_start_ms self.log_end_ms = None - self.read_log_span() + self.refresh_log_end() # Presets longer than the log span are pointless; `all` covers it. self.available_presets = available_presets( self.log_end_ms - self.log_start_ms @@ -106,24 +105,32 @@ def compose(self) -> ComposeResult: def on_screen_resume(self) -> None: """Catch up on log growth, then push state and rescan.""" - self.read_log_span() + self.refresh_log_end() self.available_presets = available_presets( self.log_end_ms - self.log_start_ms ) self.clamp_window() self.refresh_view(rescan=True) - def read_log_span(self) -> None: - """Refresh log_end_ms from the file; read log_start_ms only once.""" + def refresh_log_end(self) -> None: + """Re-read the log's latest timestamp so the screen tracks file growth.""" log_path = self.app.log_file with log_path.open("rb") as log_stream: file_size = log_path.stat().st_size - if self.log_start_ms is None: - self.log_start_ms = read_first_timestamp( - log_stream, file_size - ) self.log_end_ms = read_last_timestamp(log_stream, file_size) + def show_loading_state(self) -> None: + """Blank the table and metrics row while a rescan is in flight.""" + self.query_one(HistoricQueryTable).set_rows([]) + self.query_one(MetricsRow).rows = [ + MetricsCounts( + label=self.window_size, + **EMPTY_FIELDS, + not_ready_message="loading…", + ) + ] + self.query_one("#table-status", Static).update("Loading window…") + def refresh_view(self, rescan: bool) -> None: """Rebuild the controls/timeline widgets and kick the data refresh. @@ -148,6 +155,8 @@ def refresh_view(self, rescan: bool) -> None: self.query_one(ModePicker).selected = self.mode self.query_one(SelectedWindow).state = controls self.query_one(Timeline).bounds = bounds + if rescan: + self.show_loading_state() self.refresh_data(rescan) @work(thread=True, exclusive=True) diff --git a/src/qlever/monitor/widgets/metrics_row.py b/src/qlever/monitor/widgets/metrics_row.py index 651bbf1c..b639e631 100644 --- a/src/qlever/monitor/widgets/metrics_row.py +++ b/src/qlever/monitor/widgets/metrics_row.py @@ -86,6 +86,8 @@ def render_cell(label: str, value: int | None, width: int, kind: str) -> str: def format_row(row: MetricsCounts, widths: list[int]) -> str: """Format one rolling-window row as a single line with markup.""" + if row.not_ready_message: + return f"[bold]{row.label:<8}[/] │ [dim]{row.not_ready_message}[/]" cells = [ render_cell(name, getattr(row, name), width, kind) for (name, kind), width in zip(COLUMNS, widths) diff --git a/test/qlever/test_live_data.py b/test/qlever/test_live_data.py index 6b2c3952..3d624e49 100644 --- a/test/qlever/test_live_data.py +++ b/test/qlever/test_live_data.py @@ -309,8 +309,9 @@ def test_evict_stale_drops_completed_older_than_one_hour(write_log): assert state.completed.entries[0].end_ms == now - 500 -def test_get_live_metrics_returns_three_rows_when_data_covers_an_hour(): +def test_get_live_metrics_returns_three_rows_when_coverage_spans_an_hour(): state = LiveState() + state.metrics_known_from_ms = NOW_MS - 65 * MIN_MS state.completed.add(CompletedQuery( start_ms=NOW_MS - 65 * MIN_MS, end_ms=NOW_MS - 60 * MIN_MS, duration_ms=5 * MIN_MS, status="ok", start_line_offset=None, @@ -326,8 +327,9 @@ def test_get_live_metrics_returns_three_rows_when_data_covers_an_hour(): assert row_1h.seen == 3 -def test_get_live_metrics_blanks_windows_past_oldest_entry(): +def test_get_live_metrics_blanks_windows_past_coverage_start(): state = LiveState() + state.metrics_known_from_ms = NOW_MS - 11 * MIN_MS state.completed.add(make_completed(NOW_MS - 11 * MIN_MS)) state.completed.add(make_completed(NOW_MS - 2 * MIN_MS)) @@ -335,8 +337,24 @@ def test_get_live_metrics_blanks_windows_past_oldest_entry(): state, slow_threshold_ms=10_000, now_ms=NOW_MS, ) assert row_5m.seen == 1 + assert row_5m.not_ready_message is None assert row_15m.seen is None + assert row_15m.not_ready_message == "ready in 4m" assert row_1h.seen is None + assert row_1h.not_ready_message == "ready in 49m" + + +def test_get_live_metrics_masks_everything_before_coverage_is_announced(): + state = LiveState() + state.completed.add(make_completed(NOW_MS - 30 * MIN_MS)) + + row_5m, row_15m, row_1h = get_live_metrics( + state, slow_threshold_ms=10_000, now_ms=NOW_MS, + ) + # During boot ramp we don't know when coverage arrives, so no ETA. + for row in (row_5m, row_15m, row_1h): + assert row.seen is None + assert row.not_ready_message is None def test_load_completed_history_loads_pairs_from_history(write_log): From 410aba1fd25120cf24250623af5dc972538b7213 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 26 May 2026 12:55:55 +0200 Subject: [PATCH 12/26] Show really fast queries in live mode at least once to indicate server liveliness and remove screen_refresh_s for users --- src/qlever/commands/monitor_queries.py | 11 --------- src/qlever/monitor/app.py | 2 -- src/qlever/monitor/live_data.py | 33 ++++++++++++++++++-------- src/qlever/monitor/views/live.py | 31 ++++++++++++++---------- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/src/qlever/commands/monitor_queries.py b/src/qlever/commands/monitor_queries.py index 9820583d..e74b6426 100644 --- a/src/qlever/commands/monitor_queries.py +++ b/src/qlever/commands/monitor_queries.py @@ -50,15 +50,6 @@ def additional_arguments(self, subparser) -> None: " completed) is counted as slow in the metrics" " (default = server timeout - 10s)", ) - subparser.add_argument( - "--screen_refresh_s", - type=float, - default=0.5, - help="Duration in seconds after which the live view is refreshed " - "with the new queries from server log file. This only affects the " - "screen refresh and not the interval at which the queries are " - "read from the log file (default = 0.5s)", - ) def execute(self, args) -> bool: if not args.qlever_server_log: @@ -87,7 +78,6 @@ def execute(self, args) -> bool: return False args.slow_threshold = max(1, timeout_s - 10) - repaint_interval = max(0.5, args.screen_refresh_s) sparql_endpoint = ( args.sparql_endpoint if args.sparql_endpoint @@ -99,7 +89,6 @@ def execute(self, args) -> bool: sparql_endpoint=sparql_endpoint, timeout=timeout_s, slow_threshold=args.slow_threshold, - repaint_interval=repaint_interval, system=args.system, ).run() return True diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 3d7e63c6..32430e49 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -67,7 +67,6 @@ def __init__( sparql_endpoint: str, timeout: int, slow_threshold: int, - repaint_interval: float = 0.5, system: str = "docker", ) -> None: super().__init__() @@ -76,7 +75,6 @@ def __init__( self.timeout = timeout self.window_pad_ms = 2000 * timeout self.slow_threshold = slow_threshold - self.repaint_interval = repaint_interval self.system = system self.server_status = "checking" self.live_state = LiveState() diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index fa63b0a1..ed5837d1 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -30,11 +30,13 @@ MetricsSnapshot, metrics_for_ranges, ) -from qlever.monitor.models import LiveQueryRow, LiveSubtitle, MetricsCounts +from qlever.monitor.models import LiveQueryRow, MetricsCounts LIVE_METRIC_WINDOWS_MS = [5 * 60_000, 15 * 60_000, 60 * 60_000] LIVE_HORIZON_MS = LIVE_METRIC_WINDOWS_MS[2] LIVE_METRIC_LABELS = ["last 5m", "last 15m", "last 1h"] +LIVE_REPAINT_S = 0.25 +FLASH_THRESHOLD_MS = int(LIVE_REPAINT_S * 1000) def current_ms() -> int: @@ -106,6 +108,7 @@ class LiveState: completed: CompletedQueries = field(default_factory=CompletedQueries) active: dict[str, tuple[int, str]] = field(default_factory=dict) + unseen_finished: list[LiveQueryRow] = field(default_factory=list) lock: threading.Lock = field(default_factory=threading.Lock) metrics_known_from_ms: int | None = None @@ -257,25 +260,35 @@ def handle_line(self, line: bytes) -> None: start = self.state.active.pop(qid, None) if start is None: return - start_ms, _ = start + start_ms, sparql = start + duration_ms = ts_ms - start_ms self.state.completed.add( CompletedQuery( start_ms=start_ms, end_ms=ts_ms, - duration_ms=ts_ms - start_ms, + duration_ms=duration_ms, status=status, start_line_offset=None, ) ) + if duration_ms < FLASH_THRESHOLD_MS: + self.state.unseen_finished.append( + LiveQueryRow( + qid=qid, ts_ms=start_ms, sparql=sparql + ) + ) -def get_live_subtitle( - state: LiveState, server_status: str, endpoint: str -) -> LiveSubtitle: - """Subtitle line: server-reachable status plus current active count.""" - return LiveSubtitle( - endpoint=endpoint, state=server_status, n_active=len(state.active) - ) +def take_unseen_finished(state: LiveState) -> list[LiveQueryRow]: + """Snapshot the unseen-finished queue and clear it atomically. + + Called once per repaint by the Live screen so each sub-repaint + query is shown for exactly one tick before being dropped. + """ + with state.lock: + snapshot = list(state.unseen_finished) + state.unseen_finished.clear() + return snapshot def get_live_query_rows(state: LiveState) -> list[LiveQueryRow]: diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 668957c7..8b7f760b 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -8,12 +8,13 @@ from textual.widgets import Footer, Static from qlever.monitor.live_data import ( + LIVE_REPAINT_S, current_ms, get_live_metrics, get_live_query_rows, - get_live_subtitle, + take_unseen_finished, ) -from qlever.monitor.models import SparqlContent +from qlever.monitor.models import LiveSubtitle, SparqlContent from qlever.monitor.widgets.header_row import HeaderRow from qlever.monitor.widgets.metrics_row import MetricsRow from qlever.monitor.widgets.nav_pill import NavPill @@ -42,13 +43,15 @@ def compose(self) -> ComposeResult: ) state = self.app.live_state slow_ms = self.app.slow_threshold * 1000 + rows = sorted(get_live_query_rows(state), key=lambda row: row.ts_ms) yield LiveStatusRow( - get_live_subtitle( - state, self.app.server_status, self.app.sparql_endpoint + LiveSubtitle( + endpoint=self.app.sparql_endpoint, + state=self.app.server_status, + n_active=len(rows), ) ) yield MetricsRow(get_live_metrics(state, slow_ms, current_ms())) - rows = sorted(get_live_query_rows(state), key=lambda row: row.ts_ms) yield LiveQueryTable(rows) yield Static("", id="table-status") yield SparqlPane() @@ -57,7 +60,7 @@ def compose(self) -> ComposeResult: def on_mount(self) -> None: """Start the periodic refresh of table, metrics, and server ping.""" self.table_timer = self.set_interval( - self.app.repaint_interval, self.refresh_table + LIVE_REPAINT_S, self.refresh_table ) self.metrics_timer = self.set_interval(2.0, self.refresh_metrics) self.live_server_check() @@ -92,19 +95,21 @@ def apply_server_status(self, server_alive: bool) -> None: self.refresh_subtitle() def refresh_subtitle(self) -> None: - """Rebuild the subtitle from the currently cached app state.""" - self.query_one(LiveStatusRow).subtitle = get_live_subtitle( - self.app.live_state, - self.app.server_status, - self.app.sparql_endpoint, + """Rebuild the subtitle to match the table currently on screen.""" + rows = self.query_one(LiveQueryTable).query_rows + self.query_one(LiveStatusRow).subtitle = LiveSubtitle( + endpoint=self.app.sparql_endpoint, + state=self.app.server_status, + n_active=len(rows), ) def refresh_table(self) -> None: - """Push the current active set into the table; no-op when frozen.""" + """Push active + just-finished sub-repaint rows; no-op when frozen.""" if self.frozen: return state = self.app.live_state - rows = sorted(get_live_query_rows(state), key=lambda row: row.ts_ms) + rows = get_live_query_rows(state) + take_unseen_finished(state) + rows = sorted(rows, key=lambda row: row.ts_ms) self.query_one(LiveQueryTable).set_rows(rows) self.refresh_subtitle() From 56fe02f3fc7793eecffb1b307a707d5667db79c2 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 26 May 2026 18:00:37 +0200 Subject: [PATCH 13/26] Add server reachable/unreachable wiring and fix empty log file crashes app --- src/qlever/monitor/app.py | 28 ++++- src/qlever/monitor/live_data.py | 21 ++++ src/qlever/monitor/models.py | 10 ++ src/qlever/monitor/monitor.tcss | 4 + src/qlever/monitor/views/historic.py | 12 +- src/qlever/monitor/views/live.py | 138 ++++++++++++++++++---- src/qlever/monitor/widgets/query_table.py | 10 +- test/qlever/test_live_data.py | 35 ++++++ 8 files changed, 221 insertions(+), 37 deletions(-) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 32430e49..20c53103 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -76,8 +76,21 @@ def __init__( self.window_pad_ms = 2000 * timeout self.slow_threshold = slow_threshold self.system = system - self.server_status = "checking" self.live_state = LiveState() + self.log_start_ms = None + + def set_log_start_ms(self) -> None: + """Read the log's first timestamp and cache it on the app. + + Re-reads only while the value is still None (empty log at boot); + once set, the value is immutable and subsequent calls no-op. + """ + if self.log_start_ms is not None: + return + with self.log_file.open("rb") as log_stream: + self.log_start_ms = read_first_timestamp( + log_stream, self.log_file.stat().st_size + ) def on_mount(self) -> None: """Boot the live engine, then open the Live screen.""" @@ -87,10 +100,7 @@ def on_mount(self) -> None: ) self.live_state = state self.cut_offset = cut_offset - with self.log_file.open("rb") as log_stream: - self.log_start_ms = read_first_timestamp( - log_stream, self.log_file.stat().st_size - ) + self.set_log_start_ms() self.tail_live_log() self.load_metrics_history() self.push_screen("live") @@ -123,6 +133,14 @@ def load_metrics_history(self) -> None: def action_swap_screen(self) -> None: """Toggle between Live and Historic (bound to Tab on each screen).""" target = "historic" if isinstance(self.screen, LiveScreen) else "live" + if target == "historic": + self.set_log_start_ms() + if self.log_start_ms is None: + self.notify( + "Log is empty - nothing to show in Historic yet", + severity="warning", + ) + return self.switch_screen(target) def action_copy_query(self) -> None: diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index ed5837d1..396cd7de 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -37,6 +37,9 @@ LIVE_METRIC_LABELS = ["last 5m", "last 15m", "last 1h"] LIVE_REPAINT_S = 0.25 FLASH_THRESHOLD_MS = int(LIVE_REPAINT_S * 1000) +LOG_IDLE_THRESHOLD_MS = 10_000 +PING_INTERVAL_S = 5.0 +PING_FAILS_TO_UNREACHABLE = 3 def current_ms() -> int: @@ -44,6 +47,16 @@ def current_ms() -> int: return time.time_ns() // 1_000_000 +def is_log_fresh(state: "LiveState", now_ms: int) -> bool: + """True iff the log produced a line within LOG_IDLE_THRESHOLD_MS. + + The live screen's reachability state machine uses this as the + cheap evidence stream; while it's True we skip pinging the server. + """ + last_ms = state.latest_event_ms + return last_ms is not None and now_ms - last_ms <= LOG_IDLE_THRESHOLD_MS + + class CompletedQueries: """Rolling 1h history of completed queries. @@ -111,6 +124,7 @@ class LiveState: unseen_finished: list[LiveQueryRow] = field(default_factory=list) lock: threading.Lock = field(default_factory=threading.Lock) metrics_known_from_ms: int | None = None + latest_event_ms: int | None = None def find_active_queries( @@ -135,6 +149,7 @@ def find_active_queries( if eof_ts is None: return (state, file_size, 0) + state.latest_event_ms = eof_ts lo_offset = offset_for_ts( log_stream, eof_ts - window_pad_ms, file_size ) @@ -250,6 +265,9 @@ def handle_line(self, line: bytes) -> None: sparql = "" with self.state.lock: self.state.active[qid] = (ts_ms, sparql) + self.state.latest_event_ms = max( + self.state.latest_event_ms or 0, ts_ms + ) return if event == "end": @@ -257,6 +275,9 @@ def handle_line(self, line: bytes) -> None: if status not in STATUS_SET: return with self.state.lock: + self.state.latest_event_ms = max( + self.state.latest_event_ms or 0, ts_ms + ) start = self.state.active.pop(qid, None) if start is None: return diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py index 08ae3752..7da89220 100644 --- a/src/qlever/monitor/models.py +++ b/src/qlever/monitor/models.py @@ -10,6 +10,16 @@ @dataclass(frozen=True) class LiveSubtitle: + """Subtitle line shown under the Live HeaderRow. + + state is one of: + 'checking' boot, no evidence yet + 'reachable' server confirmed alive (log fresh or ping ok) + 'pinging' was reachable, log went quiet, silently rechecking; + renders the same as reachable + 'unreachable' three consecutive pings failed + """ + endpoint: str state: str n_active: int | None diff --git a/src/qlever/monitor/monitor.tcss b/src/qlever/monitor/monitor.tcss index eb2bed72..bf698543 100644 --- a/src/qlever/monitor/monitor.tcss +++ b/src/qlever/monitor/monitor.tcss @@ -56,6 +56,10 @@ MetricsRow { margin-left: 1; } +LiveQueryTable.stale, MetricsRow.stale { + color: $text-muted; +} + /* Timeline (Historic only) */ Timeline { diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 62d622f6..755bfcd7 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -7,9 +7,6 @@ from textual.widgets import Footer, Static from qlever.monitor.historic_data import read_window, render_window -from qlever.monitor.log_reader import ( - read_last_timestamp, -) from qlever.monitor.metrics import EMPTY_FIELDS from qlever.monitor.models import ( ControlsState, @@ -113,11 +110,10 @@ def on_screen_resume(self) -> None: self.refresh_view(rescan=True) def refresh_log_end(self) -> None: - """Re-read the log's latest timestamp so the screen tracks file growth.""" - log_path = self.app.log_file - with log_path.open("rb") as log_stream: - file_size = log_path.stat().st_size - self.log_end_ms = read_last_timestamp(log_stream, file_size) + """Read the freshest log timestamp the tailer has seen.""" + state = self.app.live_state + with state.lock: + self.log_end_ms = state.latest_event_ms def show_loading_state(self) -> None: """Blank the table and metrics row while a rescan is in flight.""" diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 8b7f760b..00f98e8b 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -9,9 +9,12 @@ from qlever.monitor.live_data import ( LIVE_REPAINT_S, + PING_FAILS_TO_UNREACHABLE, + PING_INTERVAL_S, current_ms, get_live_metrics, get_live_query_rows, + is_log_fresh, take_unseen_finished, ) from qlever.monitor.models import LiveSubtitle, SparqlContent @@ -44,10 +47,17 @@ def compose(self) -> ComposeResult: state = self.app.live_state slow_ms = self.app.slow_threshold * 1000 rows = sorted(get_live_query_rows(state), key=lambda row: row.ts_ms) + + self.liveness = ( + "reachable" if is_log_fresh(state, current_ms()) else "checking" + ) + self.consecutive_ping_fails = 0 + self.ping_timer = None + yield LiveStatusRow( LiveSubtitle( endpoint=self.app.sparql_endpoint, - state=self.app.server_status, + state=self.liveness, n_active=len(rows), ) ) @@ -58,59 +68,135 @@ def compose(self) -> ComposeResult: yield Footer() def on_mount(self) -> None: - """Start the periodic refresh of table, metrics, and server ping.""" + """Start periodic refreshes; kick off boot pings if the log is stale.""" self.table_timer = self.set_interval( LIVE_REPAINT_S, self.refresh_table ) self.metrics_timer = self.set_interval(2.0, self.refresh_metrics) - self.live_server_check() - self.ping_timer = self.set_interval(5.0, self.live_server_check) + if self.liveness == "checking": + self.start_pinging(initial=True) def on_screen_suspend(self) -> None: """Pause periodic UI work while Live isn't the active screen.""" self.table_timer.pause() self.metrics_timer.pause() - self.ping_timer.pause() + if self.ping_timer is not None: + self.ping_timer.pause() def on_screen_resume(self) -> None: - """Resume periodic UI work and refresh server status immediately.""" + """Resume periodic UI work; recheck server unless already reachable.""" self.table_timer.resume() - self.metrics_timer.resume() - self.ping_timer.resume() - self.live_server_check() + self.update_liveness_visuals() + if self.ping_timer is not None: + self.ping_timer.resume() + if self.liveness != "reachable": + self.ping_server() + + def start_pinging(self, initial: bool) -> None: + """Enter the ping cycle when the log goes quiet or at a cold boot. + + `initial` distinguishes boot's "checking" subtitle from the + invisible "pinging" recheck that follows a reachable period. + Fires one ping immediately so the first verification doesn't + wait the full interval. + """ + self.liveness = "checking" if initial else "pinging" + self.consecutive_ping_fails = 0 + if self.ping_timer is None: + self.ping_timer = self.set_interval( + PING_INTERVAL_S, self.ping_server + ) + self.ping_server() + self.refresh_subtitle() + + def mark_reachable(self) -> None: + """Confirm the server is alive and tear down the ping cycle.""" + self.liveness = "reachable" + self.consecutive_ping_fails = 0 + if self.ping_timer is not None: + self.ping_timer.stop() + self.ping_timer = None + self.refresh_subtitle() + self.update_liveness_visuals() + + def update_liveness_visuals(self) -> None: + """Sync the dim style and metric freeze with self.liveness.""" + stale = self.liveness == "unreachable" + self.query_one(LiveQueryTable).set_class(stale, "stale") + self.query_one(MetricsRow).set_class(stale, "stale") + if stale: + self.metrics_timer.pause() + else: + self.metrics_timer.resume() @work(thread=True, exclusive=True) - def live_server_check(self) -> None: - """Ping the server off the UI thread and push the result back.""" - server_alive = is_qlever_server_alive( + def ping_server(self) -> None: + """Curl the server's /ping off the UI thread.""" + ok = is_qlever_server_alive( self.app.sparql_endpoint, max_time=2 ) - self.app.call_from_thread(self.apply_server_status, server_alive) + self.app.call_from_thread(self.apply_ping_result, ok) - def apply_server_status(self, server_alive: bool) -> None: - """Cache the latest ping result, then redraw the subtitle once.""" - self.app.server_status = ( - "reachable" if server_alive else "unreachable" - ) + def apply_ping_result(self, ok: bool) -> None: + """Advance the state machine using one ping outcome.""" + if self.liveness == "reachable": + return + if is_log_fresh(self.app.live_state, current_ms()): + self.mark_reachable() + return + if ok: + self.mark_reachable() + return + self.consecutive_ping_fails += 1 + if self.consecutive_ping_fails >= PING_FAILS_TO_UNREACHABLE: + self.liveness = "unreachable" self.refresh_subtitle() + self.update_liveness_visuals() def refresh_subtitle(self) -> None: """Rebuild the subtitle to match the table currently on screen.""" rows = self.query_one(LiveQueryTable).query_rows self.query_one(LiveStatusRow).subtitle = LiveSubtitle( endpoint=self.app.sparql_endpoint, - state=self.app.server_status, + state=self.liveness, n_active=len(rows), ) + def update_liveness_from_log(self) -> None: + """Re-evaluate the reachability state from log freshness alone. + + Runs every 0.25s via refresh_table. A fresh log line is enough + evidence to flip back to reachable from any non-reachable state; + a long quiet log moves us from reachable into pinging. + """ + log_fresh = is_log_fresh(self.app.live_state, current_ms()) + if self.liveness == "reachable" and not log_fresh: + self.start_pinging(initial=False) + elif self.liveness != "reachable" and log_fresh: + self.mark_reachable() + + def display_clock_ms(self) -> int: + """The clock used for live duration math. + + When unreachable, freeze at the last log timestamp so durations + stop ticking on stale rows; otherwise trust real time. + """ + state = self.app.live_state + if self.liveness == "unreachable" and state.latest_event_ms is not None: + return state.latest_event_ms + return current_ms() + def refresh_table(self) -> None: """Push active + just-finished sub-repaint rows; no-op when frozen.""" + self.update_liveness_from_log() if self.frozen: return state = self.app.live_state rows = get_live_query_rows(state) + take_unseen_finished(state) rows = sorted(rows, key=lambda row: row.ts_ms) - self.query_one(LiveQueryTable).set_rows(rows) + table = self.query_one(LiveQueryTable) + table.clock_ms = self.display_clock_ms() + table.set_rows(rows) self.refresh_subtitle() def refresh_metrics(self) -> None: @@ -137,8 +223,16 @@ def on_resize(self) -> None: self.call_after_refresh(self.refresh_bindings) def on_nav_pill_clicked(self, message: NavPill.Clicked) -> None: - """Switch to the screen named by the clicked pill.""" - self.app.switch_screen(message.target) + """Switch to the screen named by the clicked pill. + + Routes through `action_swap_screen` when the pill points at the + other tab so the empty-log guard for Historic entry stays in one + place; any other target falls through to a plain screen switch. + """ + if message.target == "historic": + self.app.action_swap_screen() + else: + self.app.switch_screen(message.target) def on_data_table_row_selected( self, message: LiveQueryTable.RowSelected diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py index 13e4bfe8..67f29475 100644 --- a/src/qlever/monitor/widgets/query_table.py +++ b/src/qlever/monitor/widgets/query_table.py @@ -68,6 +68,8 @@ def set_rows(self, rows: list[LiveQueryRow] | list[HistoricQueryRow]) -> None: class LiveQueryTable(QueryTable): """Active queries on the Live screen; the only focusable widget.""" + clock_ms: int | None = None + def on_mount(self) -> None: """Add the columns and one table row per active query.""" self.add_column("Query ID") @@ -76,8 +78,12 @@ def on_mount(self) -> None: self.repaint_rows() def repaint_rows(self) -> None: - """Add one DataTable row per entry in self.query_rows.""" - now_ms = int(time.time() * 1000) + """Add one DataTable row per entry in self.query_rows. + + clock_ms freezes the duration math when the screen has lost + contact with the server; None falls back to wall clock. + """ + now_ms = self.clock_ms if self.clock_ms is not None else int(time.time() * 1000) for row in self.query_rows: self.add_row( truncate(row.qid, QID_WIDTH), diff --git a/test/qlever/test_live_data.py b/test/qlever/test_live_data.py index 3d624e49..41574bc2 100644 --- a/test/qlever/test_live_data.py +++ b/test/qlever/test_live_data.py @@ -181,6 +181,21 @@ def test_find_active_queries_empty_log_returns_empty_state(write_log): assert eof_ts == 0 +def test_find_active_queries_seeds_latest_event_ms_from_eof(write_log): + path = write_log( + start_line(1000, "q1") + end_line(2000, "q1") + + start_line(3000, "q2") + ) + state, _, eof_ts = find_active_queries(path, window_pad_ms=10_000) + assert state.latest_event_ms == eof_ts == 3000 + + +def test_find_active_queries_empty_log_leaves_latest_event_ms_none(write_log): + path = write_log(b"") + state, _, _ = find_active_queries(path, window_pad_ms=10_000) + assert state.latest_event_ms is None + + def make_reader(path, state, cut_offset=0, window_pad_ms=10_000, now_ms=3000): """Build a LiveLogReader with a frozen clock for poll tests.""" return LiveLogReader( @@ -274,6 +289,26 @@ def test_poll_drops_end_without_matching_start(write_log): assert len(state.completed.entries) == 0 +def test_poll_advances_latest_event_ms_to_the_newest_line(write_log): + path = write_log(start_line(1000, "q1") + end_line(2500, "q1")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.latest_event_ms == 2500 + + +def test_poll_keeps_latest_event_ms_monotonic_under_out_of_order_writes(write_log): + # Sub-millisecond jitter on the server can write an older ts_ms + # after a newer one; the field must stay at the larger value. + path = write_log(start_line(2000, "q_late") + start_line(1500, "q_early")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.latest_event_ms == 2000 + + def test_evict_stale_drops_actives_older_than_window_pad(write_log): state = LiveState() now = 1_000_000 From bb3c98e7aadb3edca2945bfda89118f18afda7e1 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 26 May 2026 18:25:42 +0200 Subject: [PATCH 14/26] Add client ip to SparqlPane header --- src/qlever/monitor/historic_data.py | 12 +++- src/qlever/monitor/live_data.py | 22 ++++--- src/qlever/monitor/log_reader.py | 33 +++++------ src/qlever/monitor/models.py | 3 + src/qlever/monitor/views/historic.py | 1 + src/qlever/monitor/views/live.py | 1 + src/qlever/monitor/widgets/sparql_pane.py | 1 + test/qlever/test_historic_data.py | 5 +- test/qlever/test_live_data.py | 12 ++-- test/qlever/test_log_reader.py | 71 +++++++++++++---------- 10 files changed, 93 insertions(+), 68 deletions(-) diff --git a/src/qlever/monitor/historic_data.py b/src/qlever/monitor/historic_data.py index 8a96d79c..db0b4d64 100644 --- a/src/qlever/monitor/historic_data.py +++ b/src/qlever/monitor/historic_data.py @@ -40,6 +40,7 @@ class LoggedQuery(NamedTuple): status: str qid: str sparql: str + client_ip: str class WindowData(NamedTuple): @@ -91,7 +92,9 @@ def read_window( for pair in completed: if pair.start_ms > window_end_ms or pair.end_ms < window_start_ms: continue - qid, sparql = load_sparql_at(log_stream, pair.start_line_offset) + qid, client_ip, sparql = load_sparql_at( + log_stream, pair.start_line_offset + ) queries.append( LoggedQuery( start_ms=pair.start_ms, @@ -99,13 +102,16 @@ def read_window( status=pair.status, qid=qid, sparql=sparql, + client_ip=client_ip, ) ) running_cutoff_ms = log_end_ms - pad_ms for start_ms, start_line_offset in still_open.values(): if start_ms > window_end_ms: continue - qid, sparql = load_sparql_at(log_stream, start_line_offset) + qid, client_ip, sparql = load_sparql_at( + log_stream, start_line_offset + ) status = "running" if start_ms >= running_cutoff_ms else "orphaned" queries.append( LoggedQuery( @@ -114,6 +120,7 @@ def read_window( status=status, qid=qid, sparql=sparql, + client_ip=client_ip, ) ) @@ -184,6 +191,7 @@ def render_window( duration_ms=display_duration_ms(query, log_end_ms), status=query.status, sparql=query.sparql, + client_ip=query.client_ip, ) for query in queries ] diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index 396cd7de..116910ef 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -157,8 +157,8 @@ def find_active_queries( _, still_open = pair_start_end_events(events) for qid, (start_ms, start_line_offset) in still_open.items(): - _, sparql = load_sparql_at(log_stream, start_line_offset) - state.active[qid] = (start_ms, sparql) + _, client_ip, sparql = load_sparql_at(log_stream, start_line_offset) + state.active[qid] = (start_ms, client_ip, sparql) return (state, file_size, eof_ts) @@ -239,7 +239,7 @@ def evict_stale(self) -> None: self.state.completed.drop_older_than(completed_cutoff) stale = [ qid - for qid, (start_ms, _) in self.state.active.items() + for qid, (start_ms, _, _) in self.state.active.items() if start_ms < active_cutoff ] for qid in stale: @@ -263,8 +263,9 @@ def handle_line(self, line: bytes) -> None: sparql = obj.get("query") if not isinstance(sparql, str): sparql = "" + client_ip = obj.get("client-ip", "") with self.state.lock: - self.state.active[qid] = (ts_ms, sparql) + self.state.active[qid] = (ts_ms, client_ip, sparql) self.state.latest_event_ms = max( self.state.latest_event_ms or 0, ts_ms ) @@ -281,7 +282,7 @@ def handle_line(self, line: bytes) -> None: start = self.state.active.pop(qid, None) if start is None: return - start_ms, sparql = start + start_ms, client_ip, sparql = start duration_ms = ts_ms - start_ms self.state.completed.add( CompletedQuery( @@ -295,7 +296,10 @@ def handle_line(self, line: bytes) -> None: if duration_ms < FLASH_THRESHOLD_MS: self.state.unseen_finished.append( LiveQueryRow( - qid=qid, ts_ms=start_ms, sparql=sparql + qid=qid, + ts_ms=start_ms, + sparql=sparql, + client_ip=client_ip, ) ) @@ -317,8 +321,10 @@ def get_live_query_rows(state: LiveState) -> list[LiveQueryRow]: with state.lock: active_snapshot = list(state.active.items()) return [ - LiveQueryRow(qid=qid, ts_ms=start_ms, sparql=sparql) - for qid, (start_ms, sparql) in active_snapshot + LiveQueryRow( + qid=qid, ts_ms=start_ms, sparql=sparql, client_ip=client_ip + ) + for qid, (start_ms, client_ip, sparql) in active_snapshot ] diff --git a/src/qlever/monitor/log_reader.py b/src/qlever/monitor/log_reader.py index 594696db..1e413b8e 100644 --- a/src/qlever/monitor/log_reader.py +++ b/src/qlever/monitor/log_reader.py @@ -330,36 +330,29 @@ def pair_start_end_events( return (completed_queries, still_open) -def extract_qid_and_query(line_bytes: bytes) -> tuple[str, str]: - """Return (qid, query) from a start line's bytes, or ("", ""). - - The byte-slice fast path is unsafe for the query field because user - SPARQL can contain escaped quotes, so this path uses real json.loads. - Both fields come from the same parse so cost is one json.loads per - call. Returns ("", "") on any malformed input or missing field; - never raises. +def extract_qid_ip_query(line_bytes: bytes) -> tuple[str, str, str]: + """Return (qid, client_ip, query) from a start line, or ("", "", ""). + + All three fields come from one json.loads. The caller has already + validated this is a start line via parse_line, so qid and query are + guaranteed present strings. client-ip falls back to "" so log lines + written before the field existed still produce usable rows. """ try: obj = json.loads(line_bytes) except (ValueError, TypeError): - return ("", "") - if not isinstance(obj, dict): - return ("", "") - qid = obj.get("qid") - query = obj.get("query") - if not isinstance(qid, str) or not isinstance(query, str): - return ("", "") - return (qid, query) + return ("", "", "") + return (obj["qid"], obj.get("client-ip", ""), obj["query"]) def load_sparql_at( log_stream: BinaryIO, line_offset: int -) -> tuple[str, str]: - """Return (qid, sparql) for the start line at line_offset. +) -> tuple[str, str, str]: + """Return (qid, client_ip, sparql) for the start line at line_offset. Used by callers that have only the offset, not the line bytes: find_active_queries on survivors (qid already known, ignored), and - Historic on displayed rows (needs both). + Historic on displayed rows (needs all three for the SparqlPane). """ log_stream.seek(line_offset) - return extract_qid_and_query(log_stream.readline()) + return extract_qid_ip_query(log_stream.readline()) diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py index 7da89220..2df62c44 100644 --- a/src/qlever/monitor/models.py +++ b/src/qlever/monitor/models.py @@ -45,6 +45,7 @@ class LiveQueryRow: qid: str ts_ms: int sparql: str + client_ip: str = "" @dataclass(frozen=True) @@ -60,6 +61,7 @@ class HistoricQueryRow: duration_ms: int status: str sparql: str + client_ip: str = "" @dataclass(frozen=True) @@ -74,6 +76,7 @@ class SparqlContent: started_at_ms: int status: str | None sparql_text: str + client_ip: str = "" @dataclass(frozen=True) diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 755bfcd7..96cb16d0 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -319,4 +319,5 @@ def on_data_table_row_selected( started_at_ms=row.started_at_ms, status=row.status, sparql_text=row.sparql, + client_ip=row.client_ip, ) diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 00f98e8b..83e0c195 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -244,4 +244,5 @@ def on_data_table_row_selected( started_at_ms=row.ts_ms, status=None, sparql_text=row.sparql, + client_ip=row.client_ip, ) diff --git a/src/qlever/monitor/widgets/sparql_pane.py b/src/qlever/monitor/widgets/sparql_pane.py index c78e74b8..0955f0f0 100644 --- a/src/qlever/monitor/widgets/sparql_pane.py +++ b/src/qlever/monitor/widgets/sparql_pane.py @@ -17,6 +17,7 @@ def format_header(content: SparqlContent) -> str: """One-line query identity shown above the SPARQL body.""" parts = [ f"[b]Server Query ID:[/b] {content.qid}", + f"[b]Client IP:[/b] {content.client_ip or '-'}", f"[b]Started at:[/b] {format_timestamp(content.started_at_ms)}", ] if content.status is not None: diff --git a/test/qlever/test_historic_data.py b/test/qlever/test_historic_data.py index ba12cde2..03cb6824 100644 --- a/test/qlever/test_historic_data.py +++ b/test/qlever/test_historic_data.py @@ -52,6 +52,7 @@ def test_read_window_keeps_pair_entirely_inside_window(write_log): status="ok", qid="q1", sparql="SELECT a", + client_ip="x", ) ] assert data.metrics.seen == 1 @@ -111,6 +112,7 @@ def test_read_window_includes_still_open_with_running_status(write_log): status="running", qid="live", sparql="SELECT live", + client_ip="x", ) ] @@ -129,6 +131,7 @@ def test_read_window_marks_still_open_as_orphaned_when_log_advanced_past_pad( status="orphaned", qid="ghost", sparql="SELECT ghost", + client_ip="x", ) ] @@ -162,7 +165,7 @@ def test_read_window_metrics_count_only_ends_inside_window(write_log): def make_query(qid, start_ms, end_ms, status="ok"): return LoggedQuery( start_ms=start_ms, end_ms=end_ms, status=status, - qid=qid, sparql=f"SELECT {qid}", + qid=qid, sparql=f"SELECT {qid}", client_ip="", ) diff --git a/test/qlever/test_live_data.py b/test/qlever/test_live_data.py index 41574bc2..b5ac4f13 100644 --- a/test/qlever/test_live_data.py +++ b/test/qlever/test_live_data.py @@ -158,7 +158,7 @@ def test_find_active_queries_picks_up_unmatched_starts(write_log): ) state, cut_offset, eof_ts = find_active_queries(path, window_pad_ms=10_000) assert list(state.active) == ["q2"] - assert state.active["q2"] == (2000, "SELECT b") + assert state.active["q2"] == (2000, "x", "SELECT b") assert cut_offset == path.stat().st_size assert eof_ts == 3000 @@ -169,8 +169,8 @@ def test_find_active_queries_loads_sparql_for_each_survivor(write_log): + start_line(2000, "b", "SELECT beta") ) state, _, _ = find_active_queries(path, window_pad_ms=10_000) - assert state.active["a"] == (1000, "SELECT alpha") - assert state.active["b"] == (2000, "SELECT beta") + assert state.active["a"] == (1000, "x", "SELECT alpha") + assert state.active["b"] == (2000, "x", "SELECT beta") def test_find_active_queries_empty_log_returns_empty_state(write_log): @@ -228,7 +228,7 @@ def test_poll_keeps_start_in_active_until_end_arrives(write_log): reader = make_reader(path, state) with path.open("rb") as log_stream: reader.poll(log_stream) - assert state.active == {"q1": (1000, "SELECT a")} + assert state.active == {"q1": (1000, "x", "SELECT a")} assert len(state.completed.entries) == 0 @@ -313,8 +313,8 @@ def test_evict_stale_drops_actives_older_than_window_pad(write_log): state = LiveState() now = 1_000_000 pad = 10_000 - state.active["fresh"] = (now - 1_000, "") - state.active["old"] = (now - 20_000, "") + state.active["fresh"] = (now - 1_000, "", "") + state.active["old"] = (now - 20_000, "", "") reader = LiveLogReader( log_path=write_log(b""), state=state, cut_offset=0, window_pad_ms=pad, now_ms=lambda: now, diff --git a/test/qlever/test_log_reader.py b/test/qlever/test_log_reader.py index 8576beca..6c5e67fe 100644 --- a/test/qlever/test_log_reader.py +++ b/test/qlever/test_log_reader.py @@ -3,7 +3,7 @@ from qlever.monitor import log_reader from qlever.monitor.log_reader import ( CompletedQuery, - extract_qid_and_query, + extract_qid_ip_query, load_sparql_at, next_whole_line, offset_for_ts, @@ -378,44 +378,56 @@ def test_pair_start_end_events_status_flows_through(status): assert completed[0].status == status -def test_extract_qid_and_query_returns_both_fields(): +def test_extract_qid_ip_query_returns_all_three_fields(): line = ( - b'{"ts-ms":1,"event":"start","qid":"q1",' + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' ) - assert extract_qid_and_query(line) == ( + assert extract_qid_ip_query(line) == ( "q1", + "1.2.3.4", "SELECT * WHERE { ?s ?p ?o }", ) -def test_extract_qid_and_query_handles_escaped_quote_in_query(): +def test_extract_qid_ip_query_handles_escaped_quote_in_query(): line = ( - b'{"ts-ms":1,"event":"start","qid":"q1",' + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\" }"}\n' ) - assert extract_qid_and_query(line) == ( + assert extract_qid_ip_query(line) == ( "q1", + "1.2.3.4", 'SELECT ?x WHERE { ?x ?p "a" }', ) -def test_extract_qid_and_query_missing_field_returns_empty(): - assert extract_qid_and_query(END + b"\n") == ("", "") +def test_extract_qid_ip_query_missing_client_ip_falls_back_to_empty(): + # Older log lines written before the client-ip field existed. + line = ( + b'{"ts-ms":1,"event":"start","qid":"q1",' + b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' + ) + assert extract_qid_ip_query(line) == ( + "q1", + "", + "SELECT * WHERE { ?s ?p ?o }", + ) -def test_extract_qid_and_query_malformed_json_returns_empty(): - assert extract_qid_and_query(b"this is not json\n") == ("", "") +def test_extract_qid_ip_query_malformed_json_returns_empty(): + assert extract_qid_ip_query(b"this is not json\n") == ("", "", "") -def test_load_sparql_at_returns_qid_and_query(open_log): +def test_load_sparql_at_returns_qid_ip_and_query(open_log): line = ( - b'{"ts-ms":1,"event":"start","qid":"q1",' - b'"client-ip":"1.2.3.4","query":"SELECT * WHERE { ?s ?p ?o }"}\n' + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"SELECT * WHERE { ?s ?p ?o }"}\n' ) log, _ = open_log(line) assert load_sparql_at(log, 0) == ( "q1", + "1.2.3.4", "SELECT * WHERE { ?s ?p ?o }", ) @@ -423,49 +435,46 @@ def test_load_sparql_at_returns_qid_and_query(open_log): def test_load_sparql_at_handles_escaped_quotes_and_backslashes(open_log): # The JSON value encodes a literal " and a literal \ in the query. line = ( - b'{"ts-ms":1,"event":"start","qid":"q1",' + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' b'"query":"SELECT ?x WHERE { ?x ?p \\"a\\\\b\\" }"}\n' ) log, _ = open_log(line) assert load_sparql_at(log, 0) == ( "q1", + "1.2.3.4", 'SELECT ?x WHERE { ?x ?p "a\\b" }', ) def test_load_sparql_at_handles_unicode(open_log): line = ( - b'{"ts-ms":1,"event":"start","qid":"q1",' + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' b'"query":"SELECT ?s WHERE { ?s ?p \xe2\x98\x83 }"}\n' ) log, _ = open_log(line) - assert load_sparql_at(log, 0) == ("q1", "SELECT ?s WHERE { ?s ?p ☃ }") - - -def test_load_sparql_at_end_line_has_no_query_returns_empty(open_log): - log, _ = open_log(END + b"\n") - assert load_sparql_at(log, 0) == ("", "") + assert load_sparql_at(log, 0) == ( + "q1", + "1.2.3.4", + "SELECT ?s WHERE { ?s ?p ☃ }", + ) def test_load_sparql_at_malformed_json_returns_empty(open_log): log, _ = open_log(b"this is not json\n") - assert load_sparql_at(log, 0) == ("", "") - - -def test_load_sparql_at_top_level_non_object_returns_empty(open_log): - log, _ = open_log(b"42\n") - assert load_sparql_at(log, 0) == ("", "") + assert load_sparql_at(log, 0) == ("", "", "") def test_load_sparql_at_reads_the_line_at_the_given_offset(open_log): first = ( - b'{"ts-ms":1,"event":"start","qid":"q1","query":"FIRST"}\n' + b'{"ts-ms":1,"event":"start","qid":"q1","client-ip":"1.2.3.4",' + b'"query":"FIRST"}\n' ) second = ( - b'{"ts-ms":2,"event":"start","qid":"q2","query":"SECOND"}\n' + b'{"ts-ms":2,"event":"start","qid":"q2","client-ip":"5.6.7.8",' + b'"query":"SECOND"}\n' ) log, _ = open_log(first + second) - assert load_sparql_at(log, len(first)) == ("q2", "SECOND") + assert load_sparql_at(log, len(first)) == ("q2", "5.6.7.8", "SECOND") def test_read_last_timestamp_empty_file_returns_none(open_log): From be70a6ebf4de669b374a06f2471a476dc7eea342 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Tue, 26 May 2026 23:58:42 +0200 Subject: [PATCH 15/26] Fix flash query duration and extra linger, plus orphaned query logic --- src/qlever/monitor/historic_data.py | 15 +- src/qlever/monitor/live_data.py | 37 ++-- src/qlever/monitor/models.py | 9 +- src/qlever/monitor/views/historic.py | 8 +- src/qlever/monitor/views/live.py | 32 ++-- src/qlever/monitor/widgets/query_table.py | 21 +-- src/qlever/monitor/widgets/status_row.py | 2 +- test/qlever/test_historic_data.py | 197 +++++++++++++++++----- test/qlever/test_live_data.py | 189 ++++++++++++++++----- 9 files changed, 369 insertions(+), 141 deletions(-) diff --git a/src/qlever/monitor/historic_data.py b/src/qlever/monitor/historic_data.py index db0b4d64..f6bc53cf 100644 --- a/src/qlever/monitor/historic_data.py +++ b/src/qlever/monitor/historic_data.py @@ -64,6 +64,7 @@ def read_window( pad_ms: int, slow_threshold_ms: int, log_end_ms: int, + now_ms: int, ) -> WindowData: """Scan one time window of the log into a `WindowData` snapshot. @@ -74,10 +75,9 @@ def read_window( a mode predicate could keep. Metrics count completions whose `end_ms` lies inside `[window_start_ms, window_end_ms]`. - A still-open query is labelled `"running"` when its start sits - within `pad_ms` of `log_end_ms` (recent enough to plausibly still - be executing) and `"orphaned"` otherwise (the server must have - crashed before writing the end event). + A still-open query is `"running"` only if its start is within + `pad_ms` of `log_end_ms` and the log itself is fresh + (`now_ms - log_end_ms <= pad_ms`); otherwise `"orphaned"`. """ with log_path.open("rb") as log_stream: file_size = log_path.stat().st_size @@ -105,6 +105,7 @@ def read_window( client_ip=client_ip, ) ) + log_is_fresh = now_ms - log_end_ms <= pad_ms running_cutoff_ms = log_end_ms - pad_ms for start_ms, start_line_offset in still_open.values(): if start_ms > window_end_ms: @@ -112,7 +113,11 @@ def read_window( qid, client_ip, sparql = load_sparql_at( log_stream, start_line_offset ) - status = "running" if start_ms >= running_cutoff_ms else "orphaned" + status = ( + "running" + if log_is_fresh and start_ms >= running_cutoff_ms + else "orphaned" + ) queries.append( LoggedQuery( start_ms=start_ms, diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index 116910ef..cc6754fa 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -101,9 +101,7 @@ def metrics_for_windows( not-yet-ready. Returns snapshots in the same order as windows_ms. """ ranges = [(now_ms - width, now_ms) for width in windows_ms] - snapshots = metrics_for_ranges( - self.entries, ranges, slow_threshold_ms - ) + snapshots = metrics_for_ranges(self.entries, ranges, slow_threshold_ms) return [ None if lo_ms < data_start_ms else snapshot for (lo_ms, _), snapshot in zip(ranges, snapshots) @@ -157,7 +155,9 @@ def find_active_queries( _, still_open = pair_start_end_events(events) for qid, (start_ms, start_line_offset) in still_open.items(): - _, client_ip, sparql = load_sparql_at(log_stream, start_line_offset) + _, client_ip, sparql = load_sparql_at( + log_stream, start_line_offset + ) state.active[qid] = (start_ms, client_ip, sparql) return (state, file_size, eof_ts) @@ -297,7 +297,8 @@ def handle_line(self, line: bytes) -> None: self.state.unseen_finished.append( LiveQueryRow( qid=qid, - ts_ms=start_ms, + started_at_ms=start_ms, + duration_ms=duration_ms, sparql=sparql, client_ip=client_ip, ) @@ -316,13 +317,21 @@ def take_unseen_finished(state: LiveState) -> list[LiveQueryRow]: return snapshot -def get_live_query_rows(state: LiveState) -> list[LiveQueryRow]: - """Snapshot the active set as a list of UI rows; no sort.""" +def get_live_query_rows(state: LiveState, now_ms: int) -> list[LiveQueryRow]: + """Snapshot the active set as table rows with their current duration. + + The caller chooses the clock: the Live screen passes + display_clock_ms() so freeze-on-unreachable freezes durations too. + """ with state.lock: active_snapshot = list(state.active.items()) return [ LiveQueryRow( - qid=qid, ts_ms=start_ms, sparql=sparql, client_ip=client_ip + qid=qid, + started_at_ms=start_ms, + duration_ms=now_ms - start_ms, + sparql=sparql, + client_ip=client_ip, ) for qid, (start_ms, client_ip, sparql) in active_snapshot ] @@ -367,9 +376,11 @@ def get_live_metrics( if snap is None and coverage_start_ms is not None: eta_ms = coverage_start_ms + width_ms - now_ms message = f"ready in {format_eta(eta_ms)}" - rows.append(MetricsCounts( - label=label, - **(snap._asdict() if snap is not None else EMPTY_FIELDS), - not_ready_message=message, - )) + rows.append( + MetricsCounts( + label=label, + **(snap._asdict() if snap is not None else EMPTY_FIELDS), + not_ready_message=message, + ) + ) return rows diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py index 2df62c44..0af6aba0 100644 --- a/src/qlever/monitor/models.py +++ b/src/qlever/monitor/models.py @@ -43,19 +43,14 @@ class MetricsCounts: @dataclass(frozen=True) class LiveQueryRow: qid: str - ts_ms: int + started_at_ms: int + duration_ms: int sparql: str client_ip: str = "" @dataclass(frozen=True) class HistoricQueryRow: - """A finished query in the current window. - - Unlike LiveQueryRow these carry a terminal `status` and a stored - `duration_ms` (not an elapsed `now - ts`). - """ - qid: str started_at_ms: int duration_ms: int diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 96cb16d0..5d38f040 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -7,6 +7,7 @@ from textual.widgets import Footer, Static from qlever.monitor.historic_data import read_window, render_window +from qlever.monitor.live_data import current_ms from qlever.monitor.metrics import EMPTY_FIELDS from qlever.monitor.models import ( ControlsState, @@ -171,6 +172,7 @@ def refresh_data(self, rescan: bool) -> None: self.app.window_pad_ms, self.app.slow_threshold * 1000, self.log_end_ms, + current_ms(), ) controls = ControlsState( window_size=self.window_size, @@ -272,8 +274,7 @@ def action_snap_start(self) -> None: width = preset_ms(self.window_size) self.window_start_ms = self.log_start_ms self.window_end_ms = ( - self.log_end_ms if width is None - else self.log_start_ms + width + self.log_end_ms if width is None else self.log_start_ms + width ) self.refresh_view(rescan=True) @@ -282,8 +283,7 @@ def action_snap_end(self) -> None: width = preset_ms(self.window_size) self.window_end_ms = self.log_end_ms self.window_start_ms = ( - self.log_start_ms if width is None - else self.log_end_ms - width + self.log_start_ms if width is None else self.log_end_ms - width ) self.refresh_view(rescan=True) diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 83e0c195..b5819594 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -46,7 +46,11 @@ def compose(self) -> ComposeResult: ) state = self.app.live_state slow_ms = self.app.slow_threshold * 1000 - rows = sorted(get_live_query_rows(state), key=lambda row: row.ts_ms) + rows = sorted( + get_live_query_rows(state, current_ms()), + key=lambda row: row.duration_ms, + reverse=True, + ) self.liveness = ( "reachable" if is_log_fresh(state, current_ms()) else "checking" @@ -85,6 +89,8 @@ def on_screen_suspend(self) -> None: def on_screen_resume(self) -> None: """Resume periodic UI work; recheck server unless already reachable.""" + # Drop flashes accumulated while suspended. + take_unseen_finished(self.app.live_state) self.table_timer.resume() self.update_liveness_visuals() if self.ping_timer is not None: @@ -132,9 +138,7 @@ def update_liveness_visuals(self) -> None: @work(thread=True, exclusive=True) def ping_server(self) -> None: """Curl the server's /ping off the UI thread.""" - ok = is_qlever_server_alive( - self.app.sparql_endpoint, max_time=2 - ) + ok = is_qlever_server_alive(self.app.sparql_endpoint, max_time=2) self.app.call_from_thread(self.apply_ping_result, ok) def apply_ping_result(self, ok: bool) -> None: @@ -182,7 +186,10 @@ def display_clock_ms(self) -> int: stop ticking on stale rows; otherwise trust real time. """ state = self.app.live_state - if self.liveness == "unreachable" and state.latest_event_ms is not None: + if ( + self.liveness == "unreachable" + and state.latest_event_ms is not None + ): return state.latest_event_ms return current_ms() @@ -192,11 +199,14 @@ def refresh_table(self) -> None: if self.frozen: return state = self.app.live_state - rows = get_live_query_rows(state) + take_unseen_finished(state) - rows = sorted(rows, key=lambda row: row.ts_ms) - table = self.query_one(LiveQueryTable) - table.clock_ms = self.display_clock_ms() - table.set_rows(rows) + active_rows = get_live_query_rows(state, self.display_clock_ms()) + flashed_rows = take_unseen_finished(state) + rows = sorted( + active_rows + flashed_rows, + key=lambda row: row.duration_ms, + reverse=True, + ) + self.query_one(LiveQueryTable).set_rows(rows) self.refresh_subtitle() def refresh_metrics(self) -> None: @@ -241,7 +251,7 @@ def on_data_table_row_selected( row = message.data_table.query_rows[message.cursor_row] self.query_one(SparqlPane).content = SparqlContent( qid=row.qid, - started_at_ms=row.ts_ms, + started_at_ms=row.started_at_ms, status=None, sparql_text=row.sparql, client_ip=row.client_ip, diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py index 67f29475..5fcfb2dd 100644 --- a/src/qlever/monitor/widgets/query_table.py +++ b/src/qlever/monitor/widgets/query_table.py @@ -1,7 +1,5 @@ from __future__ import annotations -import time - from rich.text import Text from textual.widgets import DataTable @@ -39,12 +37,16 @@ class QueryTable(DataTable): its own columns, since the two column sets differ. """ - def __init__(self, rows: list[LiveQueryRow] | list[HistoricQueryRow]) -> None: + def __init__( + self, rows: list[LiveQueryRow] | list[HistoricQueryRow] + ) -> None: """Hold the rows to paint once the table is mounted.""" super().__init__(cursor_type="row") self.query_rows = rows - def set_rows(self, rows: list[LiveQueryRow] | list[HistoricQueryRow]) -> None: + def set_rows( + self, rows: list[LiveQueryRow] | list[HistoricQueryRow] + ) -> None: """Replace the table rows, preserving the cursor position by qid.""" cursor_qid = None old_index = self.cursor_row @@ -68,8 +70,6 @@ def set_rows(self, rows: list[LiveQueryRow] | list[HistoricQueryRow]) -> None: class LiveQueryTable(QueryTable): """Active queries on the Live screen; the only focusable widget.""" - clock_ms: int | None = None - def on_mount(self) -> None: """Add the columns and one table row per active query.""" self.add_column("Query ID") @@ -78,16 +78,11 @@ def on_mount(self) -> None: self.repaint_rows() def repaint_rows(self) -> None: - """Add one DataTable row per entry in self.query_rows. - - clock_ms freezes the duration math when the screen has lost - contact with the server; None falls back to wall clock. - """ - now_ms = self.clock_ms if self.clock_ms is not None else int(time.time() * 1000) + """Add one DataTable row per entry in self.query_rows.""" for row in self.query_rows: self.add_row( truncate(row.qid, QID_WIDTH), - Text(format_duration(now_ms - row.ts_ms), justify="right"), + Text(format_duration(row.duration_ms), justify="right"), truncate(oneline(row.sparql), SPARQL_WIDTH), ) diff --git a/src/qlever/monitor/widgets/status_row.py b/src/qlever/monitor/widgets/status_row.py index 3008095d..dbefee10 100644 --- a/src/qlever/monitor/widgets/status_row.py +++ b/src/qlever/monitor/widgets/status_row.py @@ -34,6 +34,6 @@ def __init__(self, subtitle: LiveSubtitle) -> None: """Render the subtitle text computed from `subtitle` and `endpoint`.""" super().__init__(format_subtitle(subtitle)) self.set_reactive(LiveStatusRow.subtitle, subtitle) - + def watch_subtitle(self, subtitle: LiveSubtitle) -> None: self.update(format_subtitle(subtitle)) diff --git a/test/qlever/test_historic_data.py b/test/qlever/test_historic_data.py index 03cb6824..d3507bd1 100644 --- a/test/qlever/test_historic_data.py +++ b/test/qlever/test_historic_data.py @@ -11,7 +11,6 @@ from qlever.monitor.metrics import MetricsSnapshot from qlever.monitor.models import ControlsState - PAD_MS = 10_000 SLOW_MS = 60_000 @@ -32,18 +31,23 @@ def end_line(ts_ms, qid, status="ok"): def test_read_window_empty_log_returns_no_queries(write_log): path = write_log(b"") - data = read_window(path, 0, 1000, PAD_MS, SLOW_MS, log_end_ms=0) + data = read_window(path, 0, 1000, PAD_MS, SLOW_MS, log_end_ms=0, now_ms=0) assert data.queries == [] assert data.metrics.seen == 0 def test_read_window_keeps_pair_entirely_inside_window(write_log): path = write_log( - start_line(1_000_000, "q1", "SELECT a") - + end_line(1_000_500, "q1") + start_line(1_000_000, "q1", "SELECT a") + end_line(1_000_500, "q1") ) data = read_window( - path, 900_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_000_500 + path, + 900_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_000_500, + now_ms=1_000_500, ) assert data.queries == [ LoggedQuery( @@ -67,7 +71,13 @@ def test_read_window_drops_pair_inside_pad_before_window(write_log): + end_line(1_000_500, "q1") ) data = read_window( - path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_000_500 + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_000_500, + now_ms=1_000_500, ) qids = [query.qid for query in data.queries] assert qids == ["q1"] @@ -80,7 +90,13 @@ def test_read_window_keeps_pair_straddling_window_start(write_log): + end_line(1_005_000, "edge") ) data = read_window( - path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_005_000 + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_005_000, + now_ms=1_005_000, ) assert [query.qid for query in data.queries] == ["edge"] assert data.queries[0].start_ms == 995_000 @@ -94,7 +110,13 @@ def test_read_window_keeps_pair_straddling_window_end(write_log): + end_line(1_105_000, "edge") ) data = read_window( - path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_105_000 + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_105_000, + now_ms=1_105_000, ) assert [query.qid for query in data.queries] == ["edge"] assert data.metrics.seen == 0 @@ -103,7 +125,13 @@ def test_read_window_keeps_pair_straddling_window_end(write_log): def test_read_window_includes_still_open_with_running_status(write_log): path = write_log(start_line(1_050_000, "live", "SELECT live")) data = read_window( - path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_050_000 + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_050_000, + now_ms=1_050_000, ) assert data.queries == [ LoggedQuery( @@ -122,7 +150,13 @@ def test_read_window_marks_still_open_as_orphaned_when_log_advanced_past_pad( ): path = write_log(start_line(1_050_000, "ghost", "SELECT ghost")) data = read_window( - path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_200_000 + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_200_000, + now_ms=1_200_000, ) assert data.queries == [ LoggedQuery( @@ -136,10 +170,37 @@ def test_read_window_marks_still_open_as_orphaned_when_log_advanced_past_pad( ] +def test_read_window_marks_still_open_as_orphaned_when_log_is_stale(write_log): + """A still-open query is orphaned once the log has been silent past 2t. + + The start is within pad of log_end_ms (gate (a) passes), but wall + clock has moved past log_end_ms by more than pad (gate (b) fails), + so the server must be dead and the survivor cannot be running. + """ + path = write_log(start_line(1_050_000, "ghost", "SELECT ghost")) + data = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_050_000, + now_ms=1_050_000 + PAD_MS + 1, + ) + [query] = data.queries + assert query.status == "orphaned" + + def test_read_window_drops_still_open_started_after_window(write_log): path = write_log(start_line(1_200_000, "later", "SELECT later")) data = read_window( - path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_200_000 + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_200_000, + now_ms=1_200_000, ) assert data.queries == [] @@ -154,43 +215,66 @@ def test_read_window_metrics_count_only_ends_inside_window(write_log): + end_line(1_105_000, "edge_end") ) data = read_window( - path, 1_000_000, 1_100_000, PAD_MS, SLOW_MS, log_end_ms=1_105_000 + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_105_000, + now_ms=1_105_000, ) assert {query.qid for query in data.queries} == { - "edge_start", "inside", "edge_end", + "edge_start", + "inside", + "edge_end", } assert data.metrics.seen == 2 def make_query(qid, start_ms, end_ms, status="ok"): return LoggedQuery( - start_ms=start_ms, end_ms=end_ms, status=status, - qid=qid, sparql=f"SELECT {qid}", client_ip="", + start_ms=start_ms, + end_ms=end_ms, + status=status, + qid=qid, + sparql=f"SELECT {qid}", + client_ip="", ) def make_cache(queries): empty_metrics = MetricsSnapshot( - seen=0, ok=0, failed=0, timeout=0, cancelled=0, unknown=0, - slow=0, p50=None, p95=None, + seen=0, + ok=0, + failed=0, + timeout=0, + cancelled=0, + unknown=0, + slow=0, + p50=None, + p95=None, ) return WindowData(queries=queries, metrics=empty_metrics) def test_filter_queries_active_returns_cache_unchanged(): - cache = make_cache([ - make_query("a", 100, 200), - make_query("b", 300, None, status="running"), - ]) + cache = make_cache( + [ + make_query("a", 100, 200), + make_query("b", 300, None, status="running"), + ] + ) assert filter_queries(cache, "ACTIVE", 0, 1000) is cache.queries def test_filter_queries_starts_keeps_only_starts_in_window(): - cache = make_cache([ - make_query("early", 50, 150), - make_query("inside", 200, 400), - make_query("late", 1100, 1200), - ]) + cache = make_cache( + [ + make_query("early", 50, 150), + make_query("inside", 200, 400), + make_query("late", 1100, 1200), + ] + ) qids = [query.qid for query in filter_queries(cache, "STARTS", 100, 1000)] assert qids == ["inside"] @@ -202,37 +286,53 @@ def test_filter_queries_starts_includes_running_queries(): def test_filter_queries_ends_excludes_running_queries(): - cache = make_cache([ - make_query("done", 200, 500), - make_query("live", 300, None, status="running"), - ]) + cache = make_cache( + [ + make_query("done", 200, 500), + make_query("live", 300, None, status="running"), + ] + ) qids = [query.qid for query in filter_queries(cache, "ENDS", 100, 1000)] assert qids == ["done"] def test_filter_queries_ends_drops_ends_outside_window(): - cache = make_cache([ - make_query("before", 50, 90), - make_query("inside", 200, 500), - make_query("after", 800, 1200), - ]) + cache = make_cache( + [ + make_query("before", 50, 90), + make_query("inside", 200, 500), + make_query("after", 800, 1200), + ] + ) qids = [query.qid for query in filter_queries(cache, "ENDS", 100, 1000)] assert qids == ["inside"] def make_controls(mode="ACTIVE"): return ControlsState( - window_size="15m", mode=mode, start_ms=1_000_000, end_ms=2_000_000, + window_size="15m", + mode=mode, + start_ms=1_000_000, + end_ms=2_000_000, ) def make_render_cache(queries, **metric_fields): snapshot_fields = dict( - seen=0, ok=0, failed=0, timeout=0, cancelled=0, unknown=0, - slow=0, p50=None, p95=None, + seen=0, + ok=0, + failed=0, + timeout=0, + cancelled=0, + unknown=0, + slow=0, + p50=None, + p95=None, ) snapshot_fields.update(metric_fields) - return WindowData(queries=queries, metrics=MetricsSnapshot(**snapshot_fields)) + return WindowData( + queries=queries, metrics=MetricsSnapshot(**snapshot_fields) + ) def test_render_window_uses_recorded_end_for_completed_query(): @@ -263,7 +363,10 @@ def test_render_window_marks_orphan_duration_as_unknown(): def test_render_window_passes_window_size_as_metric_label(): cache = make_render_cache([], seen=7, ok=7) controls = ControlsState( - window_size="1h", mode="ACTIVE", start_ms=0, end_ms=1, + window_size="1h", + mode="ACTIVE", + start_ms=0, + end_ms=1, ) _, metrics = render_window(cache, controls, log_end_ms=1) assert metrics.label == "1h" @@ -272,9 +375,13 @@ def test_render_window_passes_window_size_as_metric_label(): def test_render_window_respects_mode_filter(): - cache = make_render_cache([ - make_query("done", 1_100_000, 1_200_000), - make_query("live", 1_500_000, None, status="running"), - ]) - rows, _ = render_window(cache, make_controls(mode="ENDS"), log_end_ms=2_500_000) + cache = make_render_cache( + [ + make_query("done", 1_100_000, 1_200_000), + make_query("live", 1_500_000, None, status="running"), + ] + ) + rows, _ = render_window( + cache, make_controls(mode="ENDS"), log_end_ms=2_500_000 + ) assert [row.qid for row in rows] == ["done"] diff --git a/test/qlever/test_live_data.py b/test/qlever/test_live_data.py index b5ac4f13..b5192538 100644 --- a/test/qlever/test_live_data.py +++ b/test/qlever/test_live_data.py @@ -4,16 +4,20 @@ import time from qlever.monitor.live_data import ( + FLASH_THRESHOLD_MS, LIVE_HORIZON_MS, CompletedQueries, LiveLogReader, LiveState, find_active_queries, get_live_metrics, + get_live_query_rows, load_completed_history, + take_unseen_finished, ) from qlever.monitor.log_reader import CompletedQuery from qlever.monitor.metrics import MetricsSnapshot, percentiles +from qlever.monitor.models import LiveQueryRow NOW_MS = 1_700_000_000_000 MIN_MS = 60_000 @@ -41,8 +45,15 @@ def test_empty_history_returns_empty_snapshot(): data_start_ms=NOW_MS - WINDOW_5M, ) assert snap == MetricsSnapshot( - seen=0, ok=0, failed=0, timeout=0, cancelled=0, unknown=0, - slow=0, p50=None, p95=None, + seen=0, + ok=0, + failed=0, + timeout=0, + cancelled=0, + unknown=0, + slow=0, + p50=None, + p95=None, ) @@ -94,7 +105,9 @@ def test_partial_coverage_returns_none(): def test_slow_threshold_independent_of_status(): history = CompletedQueries() - history.add(make_completed(NOW_MS - MIN_MS, status="ok", duration_ms=12_000)) + history.add( + make_completed(NOW_MS - MIN_MS, status="ok", duration_ms=12_000) + ) [snap] = history.metrics_for_windows( now_ms=NOW_MS, windows_ms=[WINDOW_5M], @@ -183,8 +196,7 @@ def test_find_active_queries_empty_log_returns_empty_state(write_log): def test_find_active_queries_seeds_latest_event_ms_from_eof(write_log): path = write_log( - start_line(1000, "q1") + end_line(2000, "q1") - + start_line(3000, "q2") + start_line(1000, "q1") + end_line(2000, "q1") + start_line(3000, "q2") ) state, _, eof_ts = find_active_queries(path, window_pad_ms=10_000) assert state.latest_event_ms == eof_ts == 3000 @@ -241,7 +253,9 @@ def test_poll_advances_cursor_so_second_poll_sees_new_lines(write_log): reader.poll(log_stream) assert len(state.completed.entries) == 1 - path.write_bytes(first + start_line(3000, "q2") + end_line(4000, "q2", "failed")) + path.write_bytes( + first + start_line(3000, "q2") + end_line(4000, "q2", "failed") + ) with path.open("rb") as log_stream: reader.poll(log_stream) assert len(state.completed.entries) == 2 @@ -268,9 +282,7 @@ def test_poll_partial_trailing_line_is_left_for_next_poll(write_log): def test_poll_skips_malformed_line_and_continues(write_log): path = write_log( - b"not json at all\n" - + start_line(1000, "q1") - + end_line(2000, "q1") + b"not json at all\n" + start_line(1000, "q1") + end_line(2000, "q1") ) state = LiveState() reader = make_reader(path, state) @@ -298,7 +310,9 @@ def test_poll_advances_latest_event_ms_to_the_newest_line(write_log): assert state.latest_event_ms == 2500 -def test_poll_keeps_latest_event_ms_monotonic_under_out_of_order_writes(write_log): +def test_poll_keeps_latest_event_ms_monotonic_under_out_of_order_writes( + write_log, +): # Sub-millisecond jitter on the server can write an older ts_ms # after a newer one; the field must stay at the larger value. path = write_log(start_line(2000, "q_late") + start_line(1500, "q_early")) @@ -316,8 +330,11 @@ def test_evict_stale_drops_actives_older_than_window_pad(write_log): state.active["fresh"] = (now - 1_000, "", "") state.active["old"] = (now - 20_000, "", "") reader = LiveLogReader( - log_path=write_log(b""), state=state, cut_offset=0, - window_pad_ms=pad, now_ms=lambda: now, + log_path=write_log(b""), + state=state, + cut_offset=0, + window_pad_ms=pad, + now_ms=lambda: now, ) reader.evict_stale() assert list(state.active) == ["fresh"] @@ -326,18 +343,30 @@ def test_evict_stale_drops_actives_older_than_window_pad(write_log): def test_evict_stale_drops_completed_older_than_one_hour(write_log): state = LiveState() now = 10_000_000 - state.completed.add(CompletedQuery( - start_ms=now - LIVE_HORIZON_MS - 1_000, - end_ms=now - LIVE_HORIZON_MS - 500, - duration_ms=500, status="ok", start_line_offset=None, - )) - state.completed.add(CompletedQuery( - start_ms=now - 1_000, end_ms=now - 500, - duration_ms=500, status="ok", start_line_offset=None, - )) + state.completed.add( + CompletedQuery( + start_ms=now - LIVE_HORIZON_MS - 1_000, + end_ms=now - LIVE_HORIZON_MS - 500, + duration_ms=500, + status="ok", + start_line_offset=None, + ) + ) + state.completed.add( + CompletedQuery( + start_ms=now - 1_000, + end_ms=now - 500, + duration_ms=500, + status="ok", + start_line_offset=None, + ) + ) reader = LiveLogReader( - log_path=write_log(b""), state=state, cut_offset=0, - window_pad_ms=10_000, now_ms=lambda: now, + log_path=write_log(b""), + state=state, + cut_offset=0, + window_pad_ms=10_000, + now_ms=lambda: now, ) reader.evict_stale() assert len(state.completed.entries) == 1 @@ -347,10 +376,15 @@ def test_evict_stale_drops_completed_older_than_one_hour(write_log): def test_get_live_metrics_returns_three_rows_when_coverage_spans_an_hour(): state = LiveState() state.metrics_known_from_ms = NOW_MS - 65 * MIN_MS - state.completed.add(CompletedQuery( - start_ms=NOW_MS - 65 * MIN_MS, end_ms=NOW_MS - 60 * MIN_MS, - duration_ms=5 * MIN_MS, status="ok", start_line_offset=None, - )) + state.completed.add( + CompletedQuery( + start_ms=NOW_MS - 65 * MIN_MS, + end_ms=NOW_MS - 60 * MIN_MS, + duration_ms=5 * MIN_MS, + status="ok", + start_line_offset=None, + ) + ) state.completed.add(make_completed(NOW_MS - 10 * MIN_MS)) state.completed.add(make_completed(NOW_MS - 1 * MIN_MS)) @@ -369,7 +403,9 @@ def test_get_live_metrics_blanks_windows_past_coverage_start(): state.completed.add(make_completed(NOW_MS - 2 * MIN_MS)) row_5m, row_15m, row_1h = get_live_metrics( - state, slow_threshold_ms=10_000, now_ms=NOW_MS, + state, + slow_threshold_ms=10_000, + now_ms=NOW_MS, ) assert row_5m.seen == 1 assert row_5m.not_ready_message is None @@ -384,7 +420,9 @@ def test_get_live_metrics_masks_everything_before_coverage_is_announced(): state.completed.add(make_completed(NOW_MS - 30 * MIN_MS)) row_5m, row_15m, row_1h = get_live_metrics( - state, slow_threshold_ms=10_000, now_ms=NOW_MS, + state, + slow_threshold_ms=10_000, + now_ms=NOW_MS, ) # During boot ramp we don't know when coverage arrives, so no ETA. for row in (row_5m, row_15m, row_1h): @@ -394,12 +432,15 @@ def test_get_live_metrics_masks_everything_before_coverage_is_announced(): def test_load_completed_history_loads_pairs_from_history(write_log): path = write_log( - start_line(1000, "q1") + end_line(2000, "q1") - + start_line(3000, "q2") + end_line(4000, "q2", "failed") + start_line(1000, "q1") + + end_line(2000, "q1") + + start_line(3000, "q2") + + end_line(4000, "q2", "failed") ) state = LiveState() load_completed_history( - log_path=path, state=state, + log_path=path, + state=state, cut_offset=path.stat().st_size, now_ms=lambda: 5000, ) @@ -409,15 +450,23 @@ def test_load_completed_history_loads_pairs_from_history(write_log): assert state.completed.entries[1].status == "failed" -def test_load_completed_history_prepends_before_existing_tailer_entries(write_log): +def test_load_completed_history_prepends_before_existing_tailer_entries( + write_log, +): path = write_log(start_line(1000, "old") + end_line(2000, "old")) state = LiveState() - state.completed.add(CompletedQuery( - start_ms=5000, end_ms=6000, duration_ms=1000, - status="ok", start_line_offset=None, - )) + state.completed.add( + CompletedQuery( + start_ms=5000, + end_ms=6000, + duration_ms=1000, + status="ok", + start_line_offset=None, + ) + ) load_completed_history( - log_path=path, state=state, + log_path=path, + state=state, cut_offset=path.stat().st_size, now_ms=lambda: 7000, ) @@ -428,12 +477,14 @@ def test_load_completed_history_prepends_before_existing_tailer_entries(write_lo def test_load_completed_history_drops_starts_without_a_matching_end(write_log): path = write_log( - start_line(1000, "q1") + end_line(2000, "q1") + start_line(1000, "q1") + + end_line(2000, "q1") + start_line(3000, "orphan") ) state = LiveState() load_completed_history( - log_path=path, state=state, + log_path=path, + state=state, cut_offset=path.stat().st_size, now_ms=lambda: 5000, ) @@ -445,8 +496,11 @@ def test_poll_loop_picks_up_lines_appended_to_the_file(write_log): path = write_log(b"") state = LiveState() reader = LiveLogReader( - log_path=path, state=state, cut_offset=0, - window_pad_ms=10_000, poll_interval=0.02, + log_path=path, + state=state, + cut_offset=0, + window_pad_ms=10_000, + poll_interval=0.02, now_ms=lambda: 0, ) stop_event = threading.Event() @@ -472,3 +526,54 @@ def run_loop(): finally: stop_event.set() thread.join() + + +def test_flash_query_carries_real_end_minus_start_duration(write_log): + """Sub-repaint completions store end-start, not a wall-clock duration.""" + path = write_log(start_line(1000, "q1", "SELECT a") + end_line(1100, "q1")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + [row] = state.unseen_finished + assert row.qid == "q1" + assert row.started_at_ms == 1000 + assert row.duration_ms == 100 + assert row.sparql == "SELECT a" + + +def test_completion_above_flash_threshold_is_not_queued(write_log): + """Queries longer than FLASH_THRESHOLD_MS stay out of unseen_finished.""" + end_ts = 1000 + FLASH_THRESHOLD_MS + 100 + path = write_log(start_line(1000, "q1") + end_line(end_ts, "q1")) + state = LiveState() + reader = make_reader(path, state) + with path.open("rb") as log_stream: + reader.poll(log_stream) + assert state.unseen_finished == [] + + +def test_take_unseen_finished_returns_rows_and_clears_queue(): + """The drain helper hands back every queued row and empties the list.""" + state = LiveState() + state.unseen_finished.append( + LiveQueryRow(qid="q1", started_at_ms=1000, duration_ms=80, sparql="A") + ) + state.unseen_finished.append( + LiveQueryRow(qid="q2", started_at_ms=1200, duration_ms=120, sparql="B") + ) + taken = take_unseen_finished(state) + assert [row.qid for row in taken] == ["q1", "q2"] + assert state.unseen_finished == [] + + +def test_get_live_query_rows_uses_caller_clock_for_duration(): + """Duration is now_ms - start_ms; the screen passes its display clock.""" + state = LiveState() + state.active["q1"] = (1000, "x", "SELECT a") + state.active["q2"] = (1500, "y", "SELECT b") + rows = {row.qid: row for row in get_live_query_rows(state, now_ms=2000)} + assert rows["q1"].duration_ms == 1000 + assert rows["q2"].duration_ms == 500 + assert rows["q1"].started_at_ms == 1000 + assert rows["q2"].client_ip == "y" From 765adfd091af10a281cda586ce5e98db68baf067 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Wed, 27 May 2026 11:48:00 +0200 Subject: [PATCH 16/26] Update monitor-queries to use correct metrics-log.jsonl filename --- src/qlever/commands/monitor_queries.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/qlever/commands/monitor_queries.py b/src/qlever/commands/monitor_queries.py index e74b6426..e76c44ff 100644 --- a/src/qlever/commands/monitor_queries.py +++ b/src/qlever/commands/monitor_queries.py @@ -38,9 +38,12 @@ def additional_arguments(self, subparser) -> None: help="URL of the SPARQL endpoint (default = {host_name}:{port})", ) subparser.add_argument( - "--qlever-server-log", + "--metrics-log", type=Path, - help="The `qlever-server` log file (default = {name}.query_metrics.log)", + help=( + "QLever's `metrics-log.jsonl` log file " + "(default = {name}.metrics-log.jsonl)" + ), ) subparser.add_argument( "--slow-threshold", @@ -52,18 +55,18 @@ def additional_arguments(self, subparser) -> None: ) def execute(self, args) -> bool: - if not args.qlever_server_log: - args.qlever_server_log = Path.cwd() / f"{args.name}.query_metrics.log" + if not args.metrics_log: + args.metrics_log = Path.cwd() / f"{args.name}.metrics-log.jsonl" show_msg = ( - f"Reading server logs from {args.qlever_server_log} to display the " + f"Reading server logs from {args.metrics_log} to display the " "currently active queries on the server" ) self.show(show_msg, only_show=args.show) if args.show: return True - if not args.qlever_server_log.is_file(): - log.error(f"Log file not found: {args.qlever_server_log}") + if not args.metrics_log.is_file(): + log.error(f"Log file not found: {args.metrics_log}") return False timeout_s = 30 @@ -85,7 +88,7 @@ def execute(self, args) -> bool: ) MonitorQueriesApp( - log_file=args.qlever_server_log, + log_file=args.metrics_log, sparql_endpoint=sparql_endpoint, timeout=timeout_s, slow_threshold=args.slow_threshold, From edb7e82197e9453ed2f1e7fb1c4cd515567fa47d Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 28 May 2026 18:05:04 +0200 Subject: [PATCH 17/26] Replace QID column with Started column on live view table, and make columns sortable on historic view --- src/qlever/monitor/views/historic.py | 106 +++++++++++++++++++++- src/qlever/monitor/widgets/query_table.py | 26 ++++-- 2 files changed, 122 insertions(+), 10 deletions(-) diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 5d38f040..0c953842 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -39,6 +39,21 @@ "ENDS": "ended in", } +SORT_COLUMNS = ["Started", "Duration", "Status"] + +SORT_KEYS = { + "Started": lambda row: row.started_at_ms, + "Duration": lambda row: row.duration_ms, + "Status": lambda row: row.status, +} + +# Each column's (descending, ascending) wording for the status line. +SORT_PHRASES = { + "Started": ("newest first", "oldest first"), + "Duration": ("longest first", "shortest first"), + "Status": ("Z to A", "A to Z"), +} + class HistoricScreen(Screen, inherit_bindings=False): """Historic view: shows active queries parsed from the log over a time window.""" @@ -56,6 +71,14 @@ class HistoricScreen(Screen, inherit_bindings=False): Binding("shift+right", "shift_later", "Shift window", show=False), Binding("g", "snap_start", "Window start/end", key_display="g/G"), Binding("G", "snap_end", "Window end", show=False), + Binding( + "less_than_sign", + "sort_prev_column", + "Sort column", + key_display="< >", + ), + Binding("greater_than_sign", "sort_next_column", "", show=False), + Binding("i", "invert_sort", "Invert sort"), ] def compose(self) -> ComposeResult: @@ -68,6 +91,8 @@ def compose(self) -> ComposeResult: ) self.window_size = self.available_presets[0] self.mode = "ACTIVE" + self.sort_column = "Duration" + self.sort_reverse = True width = preset_ms(self.window_size) self.window_start_ms = ( self.log_start_ms if width is None else self.log_end_ms - width @@ -183,7 +208,7 @@ def refresh_data(self, rescan: bool) -> None: rows, metrics = render_window( self.window_data, controls, self.log_end_ms ) - rows = sorted(rows, key=lambda row: row.duration_ms, reverse=True) + rows = self.sorted_rows(rows) self.app.call_from_thread(self.apply_window_result, rows, metrics) def apply_window_result( @@ -194,10 +219,49 @@ def apply_window_result( """Push fresh rows, metrics, and status line into the widgets.""" self.query_one(HistoricQueryTable).set_rows(rows) self.query_one(MetricsRow).rows = [metrics] - phrase = MODE_PHRASES[self.mode] self.query_one("#table-status", Static).update( - f"Showing {len(rows)} queries that {phrase} the time window " - "(sorted by longest duration)" + self.status_text(len(rows)) + ) + self.refresh_sort_indicator() + + def apply_sort(self) -> None: + """Re-sort the rows already shown and refresh the status line.""" + table = self.query_one(HistoricQueryTable) + rows = self.sorted_rows(table.query_rows) + table.set_rows(rows) + self.query_one("#table-status", Static).update( + self.status_text(len(rows)) + ) + self.refresh_sort_indicator() + + def sorted_rows( + self, rows: list[HistoricQueryRow] + ) -> list[HistoricQueryRow]: + """Order rows by the active sort column and direction.""" + return sorted( + rows, + key=SORT_KEYS[self.sort_column], + reverse=self.sort_reverse, + ) + + def sort_phrase(self) -> str: + """Describe the active sort and direction for the status line.""" + descending, ascending = SORT_PHRASES[self.sort_column] + direction = descending if self.sort_reverse else ascending + return f"{self.sort_column}, {direction}" + + def status_text(self, count: int) -> str: + """Status line describing the window mode, row count, and sort.""" + phrase = MODE_PHRASES[self.mode] + return ( + f"Showing {count} queries that {phrase} the time window " + f"(sorted by {self.sort_phrase()})" + ) + + def refresh_sort_indicator(self) -> None: + """Point the table's header arrow at the active sort column.""" + self.query_one(HistoricQueryTable).set_sort_indicator( + SORT_COLUMNS.index(self.sort_column), self.sort_reverse ) def clamp_window_start(self, start: int, width: int) -> int: @@ -251,6 +315,27 @@ def action_cycle_mode(self) -> None: index = MODES.index(self.mode) self.set_mode(MODES[(index + 1) % len(MODES)]) + def cycle_sort_column(self, direction: int) -> None: + """Move the sort one column in `direction` (wraps).""" + index = SORT_COLUMNS.index(self.sort_column) + self.sort_column = SORT_COLUMNS[ + (index + direction) % len(SORT_COLUMNS) + ] + self.apply_sort() + + def action_sort_next_column(self) -> None: + """Sort by the next column (wraps).""" + self.cycle_sort_column(1) + + def action_sort_prev_column(self) -> None: + """Sort by the previous column (wraps).""" + self.cycle_sort_column(-1) + + def action_invert_sort(self) -> None: + """Flip the sort direction.""" + self.sort_reverse = not self.sort_reverse + self.apply_sort() + def shift_window(self, direction: int) -> None: """Move the window by its own width; clamp; no-op when `all`.""" width = preset_ms(self.window_size) @@ -309,6 +394,19 @@ def on_timeline_recentered(self, message: Timeline.Recentered) -> None: """Recenter the window on the clicked timeline position.""" self.center_window_at(message.center_ms) + def on_data_table_header_selected( + self, message: HistoricQueryTable.HeaderSelected + ) -> None: + """Sort by the clicked column; click the active one again to invert.""" + if message.column_index >= len(SORT_COLUMNS): + return + column = SORT_COLUMNS[message.column_index] + if column == self.sort_column: + self.sort_reverse = not self.sort_reverse + else: + self.sort_column = column + self.apply_sort() + def on_data_table_row_selected( self, message: HistoricQueryTable.RowSelected ) -> None: diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py index 5fcfb2dd..882ee76a 100644 --- a/src/qlever/monitor/widgets/query_table.py +++ b/src/qlever/monitor/widgets/query_table.py @@ -6,7 +6,6 @@ from qlever.monitor.models import HistoricQueryRow, LiveQueryRow from qlever.monitor.util import format_clock -QID_WIDTH = 10 SPARQL_WIDTH = 280 @@ -72,7 +71,7 @@ class LiveQueryTable(QueryTable): def on_mount(self) -> None: """Add the columns and one table row per active query.""" - self.add_column("Query ID") + self.add_column("Started") self.add_column("Duration", width=8) self.add_column("SPARQL") self.repaint_rows() @@ -81,7 +80,7 @@ def repaint_rows(self) -> None: """Add one DataTable row per entry in self.query_rows.""" for row in self.query_rows: self.add_row( - truncate(row.qid, QID_WIDTH), + format_clock(row.started_at_ms), Text(format_duration(row.duration_ms), justify="right"), truncate(oneline(row.sparql), SPARQL_WIDTH), ) @@ -90,11 +89,14 @@ def repaint_rows(self) -> None: class HistoricQueryTable(QueryTable): """Finished queries in the current window on the Historic screen.""" + # Wide enough to fit a sortable header plus its sort arrow. + SORTABLE_COLUMN_WIDTH = 10 + def on_mount(self) -> None: """Add the columns and one table row per finished query.""" - self.add_column("Started") - self.add_column("Duration", width=8) - self.add_column("Status") + self.add_column("Started", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("Duration", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("Status", width=self.SORTABLE_COLUMN_WIDTH) self.add_column("SPARQL") self.repaint_rows() @@ -107,3 +109,15 @@ def repaint_rows(self) -> None: row.status, truncate(oneline(row.sparql), SPARQL_WIDTH), ) + + def set_sort_indicator(self, column_index: int, descending: bool) -> None: + """Show a sort arrow on one column header and clear the rest.""" + arrow = " ▼" if descending else " ▲" + for index, column in enumerate(self.ordered_columns): + base = column.label.plain.rstrip(" ▲▼") + label = base + arrow if index == column_index else base + column.label = Text(label) + # Invalidate the render cache so the new labels paint, the way + # update_cell does; Textual has no public column relabel. + self._update_count += 1 + self.refresh() From 74cccfa99505e59c13579d3a8d7c9ccd18bf3676 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Thu, 28 May 2026 22:31:29 +0200 Subject: [PATCH 18/26] Give each worker thread a unique group so that one thread ending doesn't kill another one --- src/qlever/monitor/app.py | 6 +++--- src/qlever/monitor/views/historic.py | 2 +- src/qlever/monitor/views/live.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 20c53103..04f4b150 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -105,7 +105,7 @@ def on_mount(self) -> None: self.load_metrics_history() self.push_screen("live") - @work(thread=True, exclusive=True) + @work(thread=True, exclusive=True, group="tail_live_log") def tail_live_log(self) -> None: """Poll the log forever, dispatching new events into LiveState.""" tailer = LiveLogReader( @@ -120,7 +120,7 @@ def tail_live_log(self) -> None: tailer.poll(log_stream) time.sleep(tailer.poll_interval) - @work(thread=True) + @work(thread=True, group="load_metrics_history") def load_metrics_history(self) -> None: """Prepend the hour before cut_offset into completed history so metrics aren't empty at boot.""" load_completed_history(self.log_file, self.live_state, self.cut_offset) @@ -174,7 +174,7 @@ def action_pretty_print(self) -> None: if pane.pretty_text is None: self.pretty_print_worker(pane, pane.content.sparql_text) - @work(thread=True, exclusive=True) + @work(thread=True, exclusive=True, group="pretty_print") def pretty_print_worker(self, pane: SparqlPane, raw: str) -> None: """Run the blocking sparql-formatter off the UI thread.""" result = pretty_printed_query( diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 0c953842..d978a367 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -181,7 +181,7 @@ def refresh_view(self, rescan: bool) -> None: self.show_loading_state() self.refresh_data(rescan) - @work(thread=True, exclusive=True) + @work(thread=True, exclusive=True, group="refresh_data") def refresh_data(self, rescan: bool) -> None: """Scan and/or re-filter the window, push rows + metrics + status. diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index b5819594..417ff56c 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -135,7 +135,7 @@ def update_liveness_visuals(self) -> None: else: self.metrics_timer.resume() - @work(thread=True, exclusive=True) + @work(thread=True, exclusive=True, group="ping_server") def ping_server(self) -> None: """Curl the server's /ping off the UI thread.""" ok = is_qlever_server_alive(self.app.sparql_endpoint, max_time=2) From 9ce2311ce435d469961f2dd72d56790f25e89bac Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Fri, 29 May 2026 12:51:35 +0200 Subject: [PATCH 19/26] Ensure that every query is shown in live view at least once --- src/qlever/monitor/live_data.py | 133 +++++++++++++++++++------------ src/qlever/monitor/views/live.py | 12 ++- test/qlever/test_live_data.py | 91 +++++++++++++-------- 3 files changed, 144 insertions(+), 92 deletions(-) diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index cc6754fa..d0751d80 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -36,7 +36,6 @@ LIVE_HORIZON_MS = LIVE_METRIC_WINDOWS_MS[2] LIVE_METRIC_LABELS = ["last 5m", "last 15m", "last 1h"] LIVE_REPAINT_S = 0.25 -FLASH_THRESHOLD_MS = int(LIVE_REPAINT_S * 1000) LOG_IDLE_THRESHOLD_MS = 10_000 PING_INTERVAL_S = 5.0 PING_FAILS_TO_UNREACHABLE = 3 @@ -108,6 +107,22 @@ def metrics_for_windows( ] +@dataclass +class ActiveQuery: + """A running or just-finished query on the Live screen. + + end_ms is None while the query runs; seen is set once a repaint has + rendered the query, so one too fast to catch while running still + shows for a single paint before being dropped. + """ + + start_ms: int + end_ms: int | None + client_ip: str + sparql: str + seen: bool = False + + @dataclass class LiveState: """Shared in-memory state for the Live screen. @@ -118,8 +133,7 @@ class LiveState: """ completed: CompletedQueries = field(default_factory=CompletedQueries) - active: dict[str, tuple[int, str]] = field(default_factory=dict) - unseen_finished: list[LiveQueryRow] = field(default_factory=list) + active: dict[str, ActiveQuery] = field(default_factory=dict) lock: threading.Lock = field(default_factory=threading.Lock) metrics_known_from_ms: int | None = None latest_event_ms: int | None = None @@ -158,7 +172,12 @@ def find_active_queries( _, client_ip, sparql = load_sparql_at( log_stream, start_line_offset ) - state.active[qid] = (start_ms, client_ip, sparql) + state.active[qid] = ActiveQuery( + start_ms=start_ms, + end_ms=None, + client_ip=client_ip, + sparql=sparql, + ) return (state, file_size, eof_ts) @@ -225,24 +244,24 @@ def poll(self, log_stream: BinaryIO) -> None: self.evict_stale() def evict_stale(self) -> None: - """Drop completions older than 1h and actives older than 2t. + """Drop old completions and finished or stale active entries. - Active eviction guards against a missing end event: any query - whose start is older than the 2t safety horizon is treated as - gone, since the server contract says no real query lives that - long. + An active entry leaves once a repaint has shown it and it has + finished, or once its start is older than the 2t safety horizon + (a missing end event, since no real query lives that long). """ now = self.now_ms() completed_cutoff = now - LIVE_HORIZON_MS active_cutoff = now - self.window_pad_ms with self.state.lock: self.state.completed.drop_older_than(completed_cutoff) - stale = [ + to_remove = [ qid - for qid, (start_ms, _, _) in self.state.active.items() - if start_ms < active_cutoff + for qid, entry in self.state.active.items() + if (entry.end_ms is not None and entry.seen) + or entry.start_ms < active_cutoff ] - for qid in stale: + for qid in to_remove: del self.state.active[qid] def handle_line(self, line: bytes) -> None: @@ -265,7 +284,12 @@ def handle_line(self, line: bytes) -> None: sparql = "" client_ip = obj.get("client-ip", "") with self.state.lock: - self.state.active[qid] = (ts_ms, client_ip, sparql) + self.state.active[qid] = ActiveQuery( + start_ms=ts_ms, + end_ms=None, + client_ip=client_ip, + sparql=sparql, + ) self.state.latest_event_ms = max( self.state.latest_event_ms or 0, ts_ms ) @@ -279,62 +303,65 @@ def handle_line(self, line: bytes) -> None: self.state.latest_event_ms = max( self.state.latest_event_ms or 0, ts_ms ) - start = self.state.active.pop(qid, None) - if start is None: + entry = self.state.active.get(qid) + if entry is None: return - start_ms, client_ip, sparql = start - duration_ms = ts_ms - start_ms + entry.end_ms = ts_ms self.state.completed.add( CompletedQuery( - start_ms=start_ms, + start_ms=entry.start_ms, end_ms=ts_ms, - duration_ms=duration_ms, + duration_ms=ts_ms - entry.start_ms, status=status, start_line_offset=None, ) ) - if duration_ms < FLASH_THRESHOLD_MS: - self.state.unseen_finished.append( - LiveQueryRow( - qid=qid, - started_at_ms=start_ms, - duration_ms=duration_ms, - sparql=sparql, - client_ip=client_ip, - ) - ) -def take_unseen_finished(state: LiveState) -> list[LiveQueryRow]: - """Snapshot the unseen-finished queue and clear it atomically. - - Called once per repaint by the Live screen so each sub-repaint - query is shown for exactly one tick before being dropped. +def get_live_query_rows(state: LiveState, now_ms: int) -> list[LiveQueryRow]: + """Snapshot the active set as table rows, marking each one shown. + + Skips entries that have finished and already appeared in a repaint + (they linger only until the tailer reaps them), and flips seen on + every returned entry. A query that finished before any paint could + catch it running is therefore still shown exactly once. The caller + chooses the clock: the Live screen passes display_clock_ms() so + freeze-on-unreachable freezes durations too. """ + rows = [] with state.lock: - snapshot = list(state.unseen_finished) - state.unseen_finished.clear() - return snapshot + for qid, entry in state.active.items(): + if entry.end_ms is not None and entry.seen: + continue + entry.seen = True + duration_ms = ( + entry.end_ms - entry.start_ms + if entry.end_ms is not None + else now_ms - entry.start_ms + ) + rows.append( + LiveQueryRow( + qid=qid, + started_at_ms=entry.start_ms, + duration_ms=duration_ms, + sparql=entry.sparql, + client_ip=entry.client_ip, + ) + ) + return rows -def get_live_query_rows(state: LiveState, now_ms: int) -> list[LiveQueryRow]: - """Snapshot the active set as table rows with their current duration. +def discard_finished_backlog(state: LiveState) -> None: + """Mark finished-but-unshown active entries as seen. - The caller chooses the clock: the Live screen passes - display_clock_ms() so freeze-on-unreachable freezes durations too. + Called on screen resume so the queries that finished while the user + was away are dropped instead of all flashing at once. Running + entries are left to show. """ with state.lock: - active_snapshot = list(state.active.items()) - return [ - LiveQueryRow( - qid=qid, - started_at_ms=start_ms, - duration_ms=now_ms - start_ms, - sparql=sparql, - client_ip=client_ip, - ) - for qid, (start_ms, client_ip, sparql) in active_snapshot - ] + for entry in state.active.values(): + if entry.end_ms is not None: + entry.seen = True def format_eta(ms: int) -> str: diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 417ff56c..ff145ac4 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -12,10 +12,10 @@ PING_FAILS_TO_UNREACHABLE, PING_INTERVAL_S, current_ms, + discard_finished_backlog, get_live_metrics, get_live_query_rows, is_log_fresh, - take_unseen_finished, ) from qlever.monitor.models import LiveSubtitle, SparqlContent from qlever.monitor.widgets.header_row import HeaderRow @@ -89,8 +89,8 @@ def on_screen_suspend(self) -> None: def on_screen_resume(self) -> None: """Resume periodic UI work; recheck server unless already reachable.""" - # Drop flashes accumulated while suspended. - take_unseen_finished(self.app.live_state) + # Drop the backlog of queries that finished while suspended. + discard_finished_backlog(self.app.live_state) self.table_timer.resume() self.update_liveness_visuals() if self.ping_timer is not None: @@ -194,15 +194,13 @@ def display_clock_ms(self) -> int: return current_ms() def refresh_table(self) -> None: - """Push active + just-finished sub-repaint rows; no-op when frozen.""" + """Push the active rows sorted by duration; no-op when frozen.""" self.update_liveness_from_log() if self.frozen: return state = self.app.live_state - active_rows = get_live_query_rows(state, self.display_clock_ms()) - flashed_rows = take_unseen_finished(state) rows = sorted( - active_rows + flashed_rows, + get_live_query_rows(state, self.display_clock_ms()), key=lambda row: row.duration_ms, reverse=True, ) diff --git a/test/qlever/test_live_data.py b/test/qlever/test_live_data.py index b5192538..565b89e2 100644 --- a/test/qlever/test_live_data.py +++ b/test/qlever/test_live_data.py @@ -4,20 +4,19 @@ import time from qlever.monitor.live_data import ( - FLASH_THRESHOLD_MS, LIVE_HORIZON_MS, + ActiveQuery, CompletedQueries, LiveLogReader, LiveState, + discard_finished_backlog, find_active_queries, get_live_metrics, get_live_query_rows, load_completed_history, - take_unseen_finished, ) from qlever.monitor.log_reader import CompletedQuery from qlever.monitor.metrics import MetricsSnapshot, percentiles -from qlever.monitor.models import LiveQueryRow NOW_MS = 1_700_000_000_000 MIN_MS = 60_000 @@ -171,7 +170,9 @@ def test_find_active_queries_picks_up_unmatched_starts(write_log): ) state, cut_offset, eof_ts = find_active_queries(path, window_pad_ms=10_000) assert list(state.active) == ["q2"] - assert state.active["q2"] == (2000, "x", "SELECT b") + assert state.active["q2"] == ActiveQuery( + start_ms=2000, end_ms=None, client_ip="x", sparql="SELECT b" + ) assert cut_offset == path.stat().st_size assert eof_ts == 3000 @@ -182,8 +183,12 @@ def test_find_active_queries_loads_sparql_for_each_survivor(write_log): + start_line(2000, "b", "SELECT beta") ) state, _, _ = find_active_queries(path, window_pad_ms=10_000) - assert state.active["a"] == (1000, "x", "SELECT alpha") - assert state.active["b"] == (2000, "x", "SELECT beta") + assert state.active["a"] == ActiveQuery( + start_ms=1000, end_ms=None, client_ip="x", sparql="SELECT alpha" + ) + assert state.active["b"] == ActiveQuery( + start_ms=2000, end_ms=None, client_ip="x", sparql="SELECT beta" + ) def test_find_active_queries_empty_log_returns_empty_state(write_log): @@ -225,7 +230,11 @@ def test_poll_pairs_a_clean_start_and_end(write_log): reader = make_reader(path, state) with path.open("rb") as log_stream: reader.poll(log_stream) - assert state.active == {} + # The end event records the completion but the query stays in active + # (end_ms set, still unseen) until a repaint has shown it. + entry = state.active["q1"] + assert entry.end_ms == 2000 + assert entry.seen is False [done] = state.completed.entries assert done.start_ms == 1000 assert done.end_ms == 2000 @@ -240,7 +249,11 @@ def test_poll_keeps_start_in_active_until_end_arrives(write_log): reader = make_reader(path, state) with path.open("rb") as log_stream: reader.poll(log_stream) - assert state.active == {"q1": (1000, "x", "SELECT a")} + assert state.active == { + "q1": ActiveQuery( + start_ms=1000, end_ms=None, client_ip="x", sparql="SELECT a" + ) + } assert len(state.completed.entries) == 0 @@ -276,7 +289,7 @@ def test_poll_partial_trailing_line_is_left_for_next_poll(write_log): path.write_bytes(full) with path.open("rb") as log_stream: reader.poll(log_stream) - assert state.active == {} + assert state.active["q1"].end_ms == 2000 assert len(state.completed.entries) == 1 @@ -327,8 +340,12 @@ def test_evict_stale_drops_actives_older_than_window_pad(write_log): state = LiveState() now = 1_000_000 pad = 10_000 - state.active["fresh"] = (now - 1_000, "", "") - state.active["old"] = (now - 20_000, "", "") + state.active["fresh"] = ActiveQuery( + start_ms=now - 1_000, end_ms=None, client_ip="", sparql="" + ) + state.active["old"] = ActiveQuery( + start_ms=now - 20_000, end_ms=None, client_ip="", sparql="" + ) reader = LiveLogReader( log_path=write_log(b""), state=state, @@ -528,50 +545,60 @@ def run_loop(): thread.join() -def test_flash_query_carries_real_end_minus_start_duration(write_log): - """Sub-repaint completions store end-start, not a wall-clock duration.""" +def test_finished_query_is_shown_once_then_skipped(write_log): + """A query that finished before any paint shows once, then is hidden.""" path = write_log(start_line(1000, "q1", "SELECT a") + end_line(1100, "q1")) state = LiveState() reader = make_reader(path, state) with path.open("rb") as log_stream: reader.poll(log_stream) - [row] = state.unseen_finished + [row] = get_live_query_rows(state, now_ms=5000) assert row.qid == "q1" assert row.started_at_ms == 1000 - assert row.duration_ms == 100 assert row.sparql == "SELECT a" + # Frozen at end - start, regardless of the clock the caller passes. + assert row.duration_ms == 100 + assert get_live_query_rows(state, now_ms=9999) == [] -def test_completion_above_flash_threshold_is_not_queued(write_log): - """Queries longer than FLASH_THRESHOLD_MS stay out of unseen_finished.""" - end_ts = 1000 + FLASH_THRESHOLD_MS + 100 - path = write_log(start_line(1000, "q1") + end_line(end_ts, "q1")) +def test_finished_unseen_entry_survives_eviction_until_shown(write_log): + """Eviction keeps a finished query until a repaint has rendered it.""" + path = write_log(start_line(1000, "q1") + end_line(1100, "q1")) state = LiveState() - reader = make_reader(path, state) + reader = make_reader(path, state, now_ms=1200) with path.open("rb") as log_stream: reader.poll(log_stream) - assert state.unseen_finished == [] + assert "q1" in state.active + get_live_query_rows(state, now_ms=1200) + reader.evict_stale() + assert "q1" not in state.active -def test_take_unseen_finished_returns_rows_and_clears_queue(): - """The drain helper hands back every queued row and empties the list.""" +def test_discard_finished_backlog_marks_finished_seen_only(): + """The resume drop suppresses finished entries but keeps running ones.""" state = LiveState() - state.unseen_finished.append( - LiveQueryRow(qid="q1", started_at_ms=1000, duration_ms=80, sparql="A") + state.active["done"] = ActiveQuery( + start_ms=1000, end_ms=1100, client_ip="x", sparql="A" ) - state.unseen_finished.append( - LiveQueryRow(qid="q2", started_at_ms=1200, duration_ms=120, sparql="B") + state.active["running"] = ActiveQuery( + start_ms=1200, end_ms=None, client_ip="y", sparql="B" ) - taken = take_unseen_finished(state) - assert [row.qid for row in taken] == ["q1", "q2"] - assert state.unseen_finished == [] + discard_finished_backlog(state) + assert state.active["done"].seen is True + assert state.active["running"].seen is False + shown = [row.qid for row in get_live_query_rows(state, now_ms=2000)] + assert shown == ["running"] def test_get_live_query_rows_uses_caller_clock_for_duration(): """Duration is now_ms - start_ms; the screen passes its display clock.""" state = LiveState() - state.active["q1"] = (1000, "x", "SELECT a") - state.active["q2"] = (1500, "y", "SELECT b") + state.active["q1"] = ActiveQuery( + start_ms=1000, end_ms=None, client_ip="x", sparql="SELECT a" + ) + state.active["q2"] = ActiveQuery( + start_ms=1500, end_ms=None, client_ip="y", sparql="SELECT b" + ) rows = {row.qid: row for row in get_live_query_rows(state, now_ms=2000)} assert rows["q1"].duration_ms == 1000 assert rows["q2"].duration_ms == 500 From 926fde2562b36f96afc3606887559e7db75ce18e Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 30 May 2026 16:54:27 +0200 Subject: [PATCH 20/26] Add --refresh option to monitor-queries to allow the user to control screen refresh of live view table [0.2s, 2s] --- src/qlever/commands/monitor_queries.py | 21 +++++++++++++++++++++ src/qlever/monitor/app.py | 2 ++ src/qlever/monitor/live_data.py | 1 - src/qlever/monitor/views/live.py | 5 ++--- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/qlever/commands/monitor_queries.py b/src/qlever/commands/monitor_queries.py index e76c44ff..61c1d001 100644 --- a/src/qlever/commands/monitor_queries.py +++ b/src/qlever/commands/monitor_queries.py @@ -6,6 +6,11 @@ from qlever.log import log from qlever.monitor.app import MonitorQueriesApp +# `LiveLogReader` ingests new log lines every 0.2s, so a faster screen +# refresh would only re-render the ticking duration; 0.2s is the floor. +REFRESH_MIN_S = 0.2 +REFRESH_MAX_S = 2.0 + class MonitorQueriesCommand(QleverCommand): """ @@ -53,6 +58,14 @@ def additional_arguments(self, subparser) -> None: " completed) is counted as slow in the metrics" " (default = server timeout - 10s)", ) + subparser.add_argument( + "--refresh", + type=float, + default=REFRESH_MIN_S, + help="Live view screen refresh interval in seconds, between" + f" {REFRESH_MIN_S} and {REFRESH_MAX_S}" + f" (default = {REFRESH_MIN_S})", + ) def execute(self, args) -> bool: if not args.metrics_log: @@ -81,6 +94,13 @@ def execute(self, args) -> bool: return False args.slow_threshold = max(1, timeout_s - 10) + if args.refresh < REFRESH_MIN_S or args.refresh > REFRESH_MAX_S: + log.error( + f"--refresh must be between {REFRESH_MIN_S} and" + f" {REFRESH_MAX_S} seconds" + ) + return False + sparql_endpoint = ( args.sparql_endpoint if args.sparql_endpoint @@ -92,6 +112,7 @@ def execute(self, args) -> bool: sparql_endpoint=sparql_endpoint, timeout=timeout_s, slow_threshold=args.slow_threshold, + refresh_interval=args.refresh, system=args.system, ).run() return True diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 04f4b150..113ba1da 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -67,6 +67,7 @@ def __init__( sparql_endpoint: str, timeout: int, slow_threshold: int, + refresh_interval: float, system: str = "docker", ) -> None: super().__init__() @@ -75,6 +76,7 @@ def __init__( self.timeout = timeout self.window_pad_ms = 2000 * timeout self.slow_threshold = slow_threshold + self.refresh_interval = refresh_interval self.system = system self.live_state = LiveState() self.log_start_ms = None diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index d0751d80..098814ef 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -35,7 +35,6 @@ LIVE_METRIC_WINDOWS_MS = [5 * 60_000, 15 * 60_000, 60 * 60_000] LIVE_HORIZON_MS = LIVE_METRIC_WINDOWS_MS[2] LIVE_METRIC_LABELS = ["last 5m", "last 15m", "last 1h"] -LIVE_REPAINT_S = 0.25 LOG_IDLE_THRESHOLD_MS = 10_000 PING_INTERVAL_S = 5.0 PING_FAILS_TO_UNREACHABLE = 3 diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index ff145ac4..8c2d1ba0 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -8,7 +8,6 @@ from textual.widgets import Footer, Static from qlever.monitor.live_data import ( - LIVE_REPAINT_S, PING_FAILS_TO_UNREACHABLE, PING_INTERVAL_S, current_ms, @@ -74,7 +73,7 @@ def compose(self) -> ComposeResult: def on_mount(self) -> None: """Start periodic refreshes; kick off boot pings if the log is stale.""" self.table_timer = self.set_interval( - LIVE_REPAINT_S, self.refresh_table + self.app.refresh_interval, self.refresh_table ) self.metrics_timer = self.set_interval(2.0, self.refresh_metrics) if self.liveness == "checking": @@ -169,7 +168,7 @@ def refresh_subtitle(self) -> None: def update_liveness_from_log(self) -> None: """Re-evaluate the reachability state from log freshness alone. - Runs every 0.25s via refresh_table. A fresh log line is enough + Runs on each refresh_table tick. A fresh log line is enough evidence to flip back to reachable from any non-reachable state; a long quiet log moves us from reachable into pinging. """ From 6c6ccd4e24d10e8152cac23693acd85abd4a9a03 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sat, 30 May 2026 23:55:31 +0200 Subject: [PATCH 21/26] Fix live table screen flicker beacuse of table rewrite and add upper limit = 1000 to visible rows in historic screen --- src/qlever/monitor/views/historic.py | 31 +++-- src/qlever/monitor/widgets/query_table.py | 133 +++++++++++++++++----- 2 files changed, 127 insertions(+), 37 deletions(-) diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index d978a367..65f84d4d 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -54,6 +54,10 @@ "Status": ("Z to A", "A to Z"), } +# Sort the whole window but paint at most this many rows, so a huge +# window does not freeze the table. +MAX_VISIBLE_ROWS = 1000 + class HistoricScreen(Screen, inherit_bindings=False): """Historic view: shows active queries parsed from the log over a time window.""" @@ -99,6 +103,7 @@ def compose(self) -> ComposeResult: ) self.window_end_ms = self.log_end_ms self.window_data = None + self.all_rows = [] controls = ControlsState( window_size=self.window_size, mode=self.mode, @@ -208,8 +213,10 @@ def refresh_data(self, rescan: bool) -> None: rows, metrics = render_window( self.window_data, controls, self.log_end_ms ) - rows = self.sorted_rows(rows) - self.app.call_from_thread(self.apply_window_result, rows, metrics) + self.all_rows = rows + self.app.call_from_thread( + self.apply_window_result, self.visible_rows(), metrics + ) def apply_window_result( self, @@ -220,17 +227,15 @@ def apply_window_result( self.query_one(HistoricQueryTable).set_rows(rows) self.query_one(MetricsRow).rows = [metrics] self.query_one("#table-status", Static).update( - self.status_text(len(rows)) + self.status_text(len(self.all_rows)) ) self.refresh_sort_indicator() def apply_sort(self) -> None: - """Re-sort the rows already shown and refresh the status line.""" - table = self.query_one(HistoricQueryTable) - rows = self.sorted_rows(table.query_rows) - table.set_rows(rows) + """Re-sort the full window, re-cap, and refresh the status line.""" + self.query_one(HistoricQueryTable).set_rows(self.visible_rows()) self.query_one("#table-status", Static).update( - self.status_text(len(rows)) + self.status_text(len(self.all_rows)) ) self.refresh_sort_indicator() @@ -244,15 +249,23 @@ def sorted_rows( reverse=self.sort_reverse, ) + def visible_rows(self) -> list[HistoricQueryRow]: + """The sorted window capped to the rows the table will paint.""" + return self.sorted_rows(self.all_rows)[:MAX_VISIBLE_ROWS] + def sort_phrase(self) -> str: """Describe the active sort and direction for the status line.""" descending, ascending = SORT_PHRASES[self.sort_column] direction = descending if self.sort_reverse else ascending return f"{self.sort_column}, {direction}" - def status_text(self, count: int) -> str: + def status_text(self, total: int) -> str: """Status line describing the window mode, row count, and sort.""" phrase = MODE_PHRASES[self.mode] + if total > MAX_VISIBLE_ROWS: + count = f"top {MAX_VISIBLE_ROWS:,} of {total:,}" + else: + count = f"{total:,}" return ( f"Showing {count} queries that {phrase} the time window " f"(sorted by {self.sort_phrase()})" diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py index 882ee76a..da22e41f 100644 --- a/src/qlever/monitor/widgets/query_table.py +++ b/src/qlever/monitor/widgets/query_table.py @@ -42,6 +42,22 @@ def __init__( """Hold the rows to paint once the table is mounted.""" super().__init__(cursor_type="row") self.query_rows = rows + # qid -> the cell tuple currently shown for that row. + self.shown_cells = {} + # Column keys in display order, recorded when columns are added. + self.column_keys = () + + def fill_rows(self) -> None: + """Add one keyed DataTable row per entry in self.query_rows. + + Records each rendered cell tuple in shown_cells so a later diff + can compare against what is on screen. + """ + self.shown_cells = {} + for row in self.query_rows: + cells = self.render_row(row) + self.add_row(*cells, key=row.qid) + self.shown_cells[row.qid] = cells def set_rows( self, rows: list[LiveQueryRow] | list[HistoricQueryRow] @@ -54,7 +70,7 @@ def set_rows( self.query_rows = rows self.clear(columns=False) - self.repaint_rows() + self.fill_rows() if not self.query_rows: return @@ -71,19 +87,79 @@ class LiveQueryTable(QueryTable): def on_mount(self) -> None: """Add the columns and one table row per active query.""" - self.add_column("Started") - self.add_column("Duration", width=8) - self.add_column("SPARQL") - self.repaint_rows() + self.column_keys = ( + self.add_column("Started"), + self.add_column("Duration", width=8), + self.add_column("SPARQL"), + ) + self.fill_rows() + + def render_row(self, row: LiveQueryRow) -> tuple: + """Render one active query to its Started/Duration/SPARQL cells.""" + return ( + format_clock(row.started_at_ms), + Text(format_duration(row.duration_ms), justify="right"), + truncate(oneline(row.sparql), SPARQL_WIDTH), + ) + + def cursor_qid(self) -> str | None: + """The qid under the row cursor, or None when nothing valid is selected.""" + index = self.cursor_row + if self.query_rows and 0 <= index < len(self.query_rows): + return self.query_rows[index].qid + return None + + def repin_cursor(self, qid: str, fallback_index: int) -> None: + """Move the cursor back to qid after a layout change. + + Run after query_rows holds the new list. If qid has left the + table, clamp to the nearest remaining row. + """ + for index, row in enumerate(self.query_rows): + if row.qid == qid: + self.move_cursor(row=index) + return + if self.query_rows: + self.move_cursor(row=min(fallback_index, len(self.query_rows) - 1)) + + def set_rows(self, rows: list[LiveQueryRow]) -> None: + """Bring the table to the new sorted rows by an incremental diff. + + Removes gone rows, updates only changed cells in place, and + appends new rows at the bottom. A steady tick touches neither + scroll nor highlight. Falls back to a full rebuild if the sorted + order cannot be reproduced by append-at-bottom alone. + """ + new_cells = {row.qid: self.render_row(row) for row in rows} + new_order = [row.qid for row in rows] + old_order = [row.qid for row in self.query_rows] + new_set, old_set = set(new_order), set(old_order) + + survivors = [qid for qid in old_order if qid in new_set] + additions = [qid for qid in new_order if qid not in old_set] + if survivors + additions != new_order: + super().set_rows(rows) + return - def repaint_rows(self) -> None: - """Add one DataTable row per entry in self.query_rows.""" - for row in self.query_rows: - self.add_row( - format_clock(row.started_at_ms), - Text(format_duration(row.duration_ms), justify="right"), - truncate(oneline(row.sparql), SPARQL_WIDTH), - ) + cursor_qid = self.cursor_qid() + old_cursor_row = self.cursor_row + layout_changed = bool(additions) or len(survivors) != len(old_order) + + for qid in old_order: + if qid not in new_set: + self.remove_row(qid) + for qid in survivors: + old_row, new_row = self.shown_cells[qid], new_cells[qid] + for index, column_key in enumerate(self.column_keys): + if str(old_row[index]) != str(new_row[index]): + self.update_cell(qid, column_key, new_row[index]) + for qid in additions: + self.add_row(*new_cells[qid], key=qid) + + self.query_rows = rows + self.shown_cells = new_cells + if layout_changed and cursor_qid is not None: + self.repin_cursor(cursor_qid, old_cursor_row) class HistoricQueryTable(QueryTable): @@ -94,21 +170,22 @@ class HistoricQueryTable(QueryTable): def on_mount(self) -> None: """Add the columns and one table row per finished query.""" - self.add_column("Started", width=self.SORTABLE_COLUMN_WIDTH) - self.add_column("Duration", width=self.SORTABLE_COLUMN_WIDTH) - self.add_column("Status", width=self.SORTABLE_COLUMN_WIDTH) - self.add_column("SPARQL") - self.repaint_rows() - - def repaint_rows(self) -> None: - """Add one DataTable row per entry in self.query_rows.""" - for row in self.query_rows: - self.add_row( - format_clock(row.started_at_ms), - Text(format_duration(row.duration_ms), justify="right"), - row.status, - truncate(oneline(row.sparql), SPARQL_WIDTH), - ) + self.column_keys = ( + self.add_column("Started", width=self.SORTABLE_COLUMN_WIDTH), + self.add_column("Duration", width=self.SORTABLE_COLUMN_WIDTH), + self.add_column("Status", width=self.SORTABLE_COLUMN_WIDTH), + self.add_column("SPARQL"), + ) + self.fill_rows() + + def render_row(self, row: HistoricQueryRow) -> tuple: + """Render one finished query to Started/Duration/Status/SPARQL cells.""" + return ( + format_clock(row.started_at_ms), + Text(format_duration(row.duration_ms), justify="right"), + row.status, + truncate(oneline(row.sparql), SPARQL_WIDTH), + ) def set_sort_indicator(self, column_index: int, descending: bool) -> None: """Show a sort arrow on one column header and clear the rest.""" From 098f29d94956ac2a5bb5470de6ac7cc5fcc409d6 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sun, 31 May 2026 19:24:36 +0200 Subject: [PATCH 22/26] Make sure qids, client_ip and sparql are only read for the visible rows and are cached for quick mode changes and sort changes --- src/qlever/monitor/historic_data.py | 88 +++++++---- src/qlever/monitor/models.py | 1 + src/qlever/monitor/views/historic.py | 34 +++-- src/qlever/monitor/widgets/query_table.py | 130 ++++++++-------- test/qlever/test_historic_data.py | 172 ++++++++++++++++------ 5 files changed, 274 insertions(+), 151 deletions(-) diff --git a/src/qlever/monitor/historic_data.py b/src/qlever/monitor/historic_data.py index f6bc53cf..6fa7cdf4 100644 --- a/src/qlever/monitor/historic_data.py +++ b/src/qlever/monitor/historic_data.py @@ -6,6 +6,7 @@ the scan; a mode change reuses the cache. """ +from dataclasses import replace from pathlib import Path from typing import NamedTuple @@ -32,25 +33,26 @@ class LoggedQuery(NamedTuple): `end_ms` is `None` when the query started but has not ended yet, either because it is still running or the server crashed before writing the end event. `status` carries the raw end status, or - `"running"` for a still-open survivor. + `"running"` for a still-open survivor. `start_line_offset` is the + byte offset of the start line, used to read the query's text only + once it is about to be displayed. """ start_ms: int end_ms: int | None status: str - qid: str - sparql: str - client_ip: str + start_line_offset: int class WindowData(NamedTuple): """Cached scan of one time window, reused across mode flips. `queries` is every `LoggedQuery` overlapping the window in any - mode (the ACTIVE superset), already loaded with `qid` and SPARQL - so a mode change is a pure in-memory filter. `metrics` is computed - over completions whose `end_ms` lies inside the window, so it is - the same for every mode. + mode (the ACTIVE superset), so a mode change is a pure in-memory + filter. Each query carries its `start_line_offset` but not its + text; the text is read for the visible rows only. `metrics` is + computed over completions whose `end_ms` lies inside the window, + so it is the same for every mode. """ queries: list[LoggedQuery] @@ -70,10 +72,11 @@ def read_window( The byte range scanned is the window padded by `pad_ms` on each side so both events of a query straddling the window edge are - recovered. Pairs that lie entirely inside the pad are dropped - before `qid` and SPARQL are loaded, so the cache holds only rows - a mode predicate could keep. Metrics count completions whose - `end_ms` lies inside `[window_start_ms, window_end_ms]`. + recovered. Pairs that lie entirely inside the pad are dropped, so + the cache holds only rows a mode predicate could keep. The query + text is not read here; `load_query_details_for_rows` reads it for + the visible rows. Metrics count completions whose `end_ms` lies + inside `[window_start_ms, window_end_ms]`. A still-open query is `"running"` only if its start is within `pad_ms` of `log_end_ms` and the log itself is fresh @@ -92,17 +95,12 @@ def read_window( for pair in completed: if pair.start_ms > window_end_ms or pair.end_ms < window_start_ms: continue - qid, client_ip, sparql = load_sparql_at( - log_stream, pair.start_line_offset - ) queries.append( LoggedQuery( start_ms=pair.start_ms, end_ms=pair.end_ms, status=pair.status, - qid=qid, - sparql=sparql, - client_ip=client_ip, + start_line_offset=pair.start_line_offset, ) ) log_is_fresh = now_ms - log_end_ms <= pad_ms @@ -110,9 +108,6 @@ def read_window( for start_ms, start_line_offset in still_open.values(): if start_ms > window_end_ms: continue - qid, client_ip, sparql = load_sparql_at( - log_stream, start_line_offset - ) status = ( "running" if log_is_fresh and start_ms >= running_cutoff_ms @@ -123,9 +118,7 @@ def read_window( start_ms=start_ms, end_ms=None, status=status, - qid=qid, - sparql=sparql, - client_ip=client_ip, + start_line_offset=start_line_offset, ) ) @@ -183,20 +176,23 @@ def render_window( surviving `LoggedQuery` into a `HistoricQueryRow`. `duration_ms` is measured against `log_end_ms` for still-running queries, against the recorded `end_ms` for completed ones, and reported as - DURATION_UNKNOWN for crash orphans. Metrics come straight from - `window_data.metrics`, labelled with the current window size. + DURATION_UNKNOWN for crash orphans. The rows carry empty text; + `load_query_details_for_rows` fills it for the visible slice. + Metrics come straight from `window_data.metrics`, labelled with + the current window size. """ queries = filter_queries( window_data, controls.mode, controls.start_ms, controls.end_ms ) historic_rows = [ HistoricQueryRow( - qid=query.qid, + qid="", + start_line_offset=query.start_line_offset, started_at_ms=query.start_ms, duration_ms=display_duration_ms(query, log_end_ms), status=query.status, - sparql=query.sparql, - client_ip=query.client_ip, + sparql="", + client_ip="", ) for query in queries ] @@ -204,3 +200,37 @@ def render_window( label=controls.window_size, **window_data.metrics._asdict() ) return historic_rows, metrics + + +def load_query_details_for_rows( + log_path: Path, + rows: list[HistoricQueryRow], + query_details_cache: dict[int, tuple[str, str, str]], +) -> list[HistoricQueryRow]: + """Fill the deferred start-line fields on the given rows. + + Each row's `qid`, `client_ip`, and SPARQL text live on its start + line, which `read_window` did not read. Offsets already in + `query_details_cache` are reused; the rest are read in one pass, + opening the file only when something is missing. The cache is + scoped to one window so a sort or mode change repaints from memory. + Returns filled copies; the input rows are left unchanged. + """ + missing = [ + row.start_line_offset + for row in rows + if row.start_line_offset not in query_details_cache + ] + if missing: + with log_path.open("rb") as log_stream: + for offset in missing: + query_details_cache[offset] = load_sparql_at( + log_stream, offset + ) + filled_rows = [] + for row in rows: + qid, client_ip, sparql = query_details_cache[row.start_line_offset] + filled_rows.append( + replace(row, qid=qid, client_ip=client_ip, sparql=sparql) + ) + return filled_rows diff --git a/src/qlever/monitor/models.py b/src/qlever/monitor/models.py index 0af6aba0..9bdddec3 100644 --- a/src/qlever/monitor/models.py +++ b/src/qlever/monitor/models.py @@ -52,6 +52,7 @@ class LiveQueryRow: @dataclass(frozen=True) class HistoricQueryRow: qid: str + start_line_offset: int started_at_ms: int duration_ms: int status: str diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 65f84d4d..b03924e1 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -6,7 +6,11 @@ from textual.screen import Screen from textual.widgets import Footer, Static -from qlever.monitor.historic_data import read_window, render_window +from qlever.monitor.historic_data import ( + load_query_details_for_rows, + read_window, + render_window, +) from qlever.monitor.live_data import current_ms from qlever.monitor.metrics import EMPTY_FIELDS from qlever.monitor.models import ( @@ -104,6 +108,7 @@ def compose(self) -> ComposeResult: self.window_end_ms = self.log_end_ms self.window_data = None self.all_rows = [] + self.query_details_cache = {} controls = ControlsState( window_size=self.window_size, mode=self.mode, @@ -191,8 +196,11 @@ def refresh_data(self, rescan: bool) -> None: """Scan and/or re-filter the window, push rows + metrics + status. On `rescan` the log is read into a fresh `WindowData` cached on - the screen; otherwise the cached scan is reused. Render always - runs, so a mode change pays only an in-memory filter. + the screen and the query-details cache is emptied, since a new + window is a new set of queries. Render always runs, so a mode + change pays only an in-memory filter. The visible rows' text is + read here, off the UI thread, reusing the cache across sorts + and mode changes within the window. """ if rescan: self.window_data = read_window( @@ -204,6 +212,7 @@ def refresh_data(self, rescan: bool) -> None: self.log_end_ms, current_ms(), ) + self.query_details_cache = {} controls = ControlsState( window_size=self.window_size, mode=self.mode, @@ -214,8 +223,11 @@ def refresh_data(self, rescan: bool) -> None: self.window_data, controls, self.log_end_ms ) self.all_rows = rows + visible_rows = load_query_details_for_rows( + self.app.log_file, self.visible_rows(), self.query_details_cache + ) self.app.call_from_thread( - self.apply_window_result, self.visible_rows(), metrics + self.apply_window_result, visible_rows, metrics ) def apply_window_result( @@ -231,14 +243,6 @@ def apply_window_result( ) self.refresh_sort_indicator() - def apply_sort(self) -> None: - """Re-sort the full window, re-cap, and refresh the status line.""" - self.query_one(HistoricQueryTable).set_rows(self.visible_rows()) - self.query_one("#table-status", Static).update( - self.status_text(len(self.all_rows)) - ) - self.refresh_sort_indicator() - def sorted_rows( self, rows: list[HistoricQueryRow] ) -> list[HistoricQueryRow]: @@ -334,7 +338,7 @@ def cycle_sort_column(self, direction: int) -> None: self.sort_column = SORT_COLUMNS[ (index + direction) % len(SORT_COLUMNS) ] - self.apply_sort() + self.refresh_data(rescan=False) def action_sort_next_column(self) -> None: """Sort by the next column (wraps).""" @@ -347,7 +351,7 @@ def action_sort_prev_column(self) -> None: def action_invert_sort(self) -> None: """Flip the sort direction.""" self.sort_reverse = not self.sort_reverse - self.apply_sort() + self.refresh_data(rescan=False) def shift_window(self, direction: int) -> None: """Move the window by its own width; clamp; no-op when `all`.""" @@ -418,7 +422,7 @@ def on_data_table_header_selected( self.sort_reverse = not self.sort_reverse else: self.sort_column = column - self.apply_sort() + self.refresh_data(rescan=False) def on_data_table_row_selected( self, message: HistoricQueryTable.RowSelected diff --git a/src/qlever/monitor/widgets/query_table.py b/src/qlever/monitor/widgets/query_table.py index da22e41f..c1e9f0f8 100644 --- a/src/qlever/monitor/widgets/query_table.py +++ b/src/qlever/monitor/widgets/query_table.py @@ -29,11 +29,11 @@ def format_duration(ms: int) -> str: class QueryTable(DataTable): - """Shared base for the Live and Historic query tables. + """Common behaviour for the Live and Historic query tables. - Holds only what both screens duplicate: the row cursor, the stored - rows, and (via module helpers) SPARQL clipping. Each subclass owns - its own columns, since the two column sets differ. + Each subclass defines its own columns, gives each row a key, and + renders a row to its cells. The base uses those to fill the table + and to keep the cursor on the same row when the rows are replaced. """ def __init__( @@ -42,49 +42,63 @@ def __init__( """Hold the rows to paint once the table is mounted.""" super().__init__(cursor_type="row") self.query_rows = rows - # qid -> the cell tuple currently shown for that row. - self.shown_cells = {} - # Column keys in display order, recorded when columns are added. - self.column_keys = () - def fill_rows(self) -> None: - """Add one keyed DataTable row per entry in self.query_rows. + def row_key(self, row: LiveQueryRow | HistoricQueryRow) -> str: + """The DataTable row key identifying this query.""" + raise NotImplementedError - Records each rendered cell tuple in shown_cells so a later diff - can compare against what is on screen. - """ - self.shown_cells = {} - for row in self.query_rows: - cells = self.render_row(row) - self.add_row(*cells, key=row.qid) - self.shown_cells[row.qid] = cells + def render_row(self, row: LiveQueryRow | HistoricQueryRow) -> tuple: + """Render one query to its display cells.""" + raise NotImplementedError - def set_rows( - self, rows: list[LiveQueryRow] | list[HistoricQueryRow] - ) -> None: - """Replace the table rows, preserving the cursor position by qid.""" - cursor_qid = None - old_index = self.cursor_row - if self.query_rows and 0 <= old_index < len(self.query_rows): - cursor_qid = self.query_rows[old_index].qid + def fill_rows(self) -> None: + """Add one keyed DataTable row per entry in self.query_rows.""" + for row in self.query_rows: + self.add_row(*self.render_row(row), key=self.row_key(row)) - self.query_rows = rows - self.clear(columns=False) - self.fill_rows() + def key_under_cursor(self) -> str | int | None: + """The key of the row under the cursor, or None when none is valid.""" + index = self.cursor_row + if self.query_rows and 0 <= index < len(self.query_rows): + return self.row_key(self.query_rows[index]) + return None + def restore_cursor( + self, key: str | int | None, fallback_index: int + ) -> None: + """Move the cursor back to key, or clamp to fallback_index if it is gone.""" if not self.query_rows: return - if cursor_qid is not None: + if key is not None: for index, row in enumerate(self.query_rows): - if row.qid == cursor_qid: + if self.row_key(row) == key: self.move_cursor(row=index) return - self.move_cursor(row=min(old_index, len(self.query_rows) - 1)) + self.move_cursor(row=min(fallback_index, len(self.query_rows) - 1)) + + def set_rows( + self, rows: list[LiveQueryRow] | list[HistoricQueryRow] + ) -> None: + """Replace the rows by a full rebuild, preserving the cursor by key.""" + key = self.key_under_cursor() + fallback_index = self.cursor_row + self.query_rows = rows + self.clear(columns=False) + self.fill_rows() + self.restore_cursor(key, fallback_index) class LiveQueryTable(QueryTable): """Active queries on the Live screen; the only focusable widget.""" + def __init__(self, rows: list[LiveQueryRow]) -> None: + """Track the cells and column keys the incremental diff compares.""" + super().__init__(rows) + # qid -> the cell tuple currently shown for that row. + self.shown_cells = {} + # Column keys in display order, recorded when columns are added. + self.column_keys = () + def on_mount(self) -> None: """Add the columns and one table row per active query.""" self.column_keys = ( @@ -102,25 +116,17 @@ def render_row(self, row: LiveQueryRow) -> tuple: truncate(oneline(row.sparql), SPARQL_WIDTH), ) - def cursor_qid(self) -> str | None: - """The qid under the row cursor, or None when nothing valid is selected.""" - index = self.cursor_row - if self.query_rows and 0 <= index < len(self.query_rows): - return self.query_rows[index].qid - return None - - def repin_cursor(self, qid: str, fallback_index: int) -> None: - """Move the cursor back to qid after a layout change. + def row_key(self, row: LiveQueryRow) -> str: + """Key an active query by its qid, unique among concurrent queries.""" + return row.qid - Run after query_rows holds the new list. If qid has left the - table, clamp to the nearest remaining row. - """ - for index, row in enumerate(self.query_rows): - if row.qid == qid: - self.move_cursor(row=index) - return - if self.query_rows: - self.move_cursor(row=min(fallback_index, len(self.query_rows) - 1)) + def fill_rows(self) -> None: + """Fill the table and remember each row's cells for the diff.""" + self.shown_cells = {} + for row in self.query_rows: + cells = self.render_row(row) + self.add_row(*cells, key=row.qid) + self.shown_cells[row.qid] = cells def set_rows(self, rows: list[LiveQueryRow]) -> None: """Bring the table to the new sorted rows by an incremental diff. @@ -141,7 +147,7 @@ def set_rows(self, rows: list[LiveQueryRow]) -> None: super().set_rows(rows) return - cursor_qid = self.cursor_qid() + cursor_key = self.key_under_cursor() old_cursor_row = self.cursor_row layout_changed = bool(additions) or len(survivors) != len(old_order) @@ -158,8 +164,8 @@ def set_rows(self, rows: list[LiveQueryRow]) -> None: self.query_rows = rows self.shown_cells = new_cells - if layout_changed and cursor_qid is not None: - self.repin_cursor(cursor_qid, old_cursor_row) + if layout_changed and cursor_key is not None: + self.restore_cursor(cursor_key, old_cursor_row) class HistoricQueryTable(QueryTable): @@ -170,12 +176,10 @@ class HistoricQueryTable(QueryTable): def on_mount(self) -> None: """Add the columns and one table row per finished query.""" - self.column_keys = ( - self.add_column("Started", width=self.SORTABLE_COLUMN_WIDTH), - self.add_column("Duration", width=self.SORTABLE_COLUMN_WIDTH), - self.add_column("Status", width=self.SORTABLE_COLUMN_WIDTH), - self.add_column("SPARQL"), - ) + self.add_column("Started", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("Duration", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("Status", width=self.SORTABLE_COLUMN_WIDTH) + self.add_column("SPARQL") self.fill_rows() def render_row(self, row: HistoricQueryRow) -> tuple: @@ -187,6 +191,14 @@ def render_row(self, row: HistoricQueryRow) -> tuple: truncate(oneline(row.sparql), SPARQL_WIDTH), ) + def row_key(self, row: HistoricQueryRow) -> str: + """Key a finished query by its start line offset, a stable identity. + + A window can hold the same qid twice (the server reuses a qid + after its end), but every start line sits at one byte offset. + """ + return str(row.start_line_offset) + def set_sort_indicator(self, column_index: int, descending: bool) -> None: """Show a sort arrow on one column header and clear the rest.""" arrow = " ▼" if descending else " ▲" diff --git a/test/qlever/test_historic_data.py b/test/qlever/test_historic_data.py index d3507bd1..dbaac130 100644 --- a/test/qlever/test_historic_data.py +++ b/test/qlever/test_historic_data.py @@ -1,15 +1,16 @@ -"""Tests for read_window, filter_queries, and render_window.""" +"""Tests for read_window, filter_queries, render_window, and detail loading.""" from qlever.monitor.historic_data import ( DURATION_UNKNOWN, LoggedQuery, WindowData, filter_queries, + load_query_details_for_rows, read_window, render_window, ) from qlever.monitor.metrics import MetricsSnapshot -from qlever.monitor.models import ControlsState +from qlever.monitor.models import ControlsState, HistoricQueryRow PAD_MS = 10_000 SLOW_MS = 60_000 @@ -54,9 +55,7 @@ def test_read_window_keeps_pair_entirely_inside_window(write_log): start_ms=1_000_000, end_ms=1_000_500, status="ok", - qid="q1", - sparql="SELECT a", - client_ip="x", + start_line_offset=0, ) ] assert data.metrics.seen == 1 @@ -79,8 +78,8 @@ def test_read_window_drops_pair_inside_pad_before_window(write_log): log_end_ms=1_000_500, now_ms=1_000_500, ) - qids = [query.qid for query in data.queries] - assert qids == ["q1"] + starts = [query.start_ms for query in data.queries] + assert starts == [1_000_000] assert data.metrics.seen == 1 @@ -98,7 +97,7 @@ def test_read_window_keeps_pair_straddling_window_start(write_log): log_end_ms=1_005_000, now_ms=1_005_000, ) - assert [query.qid for query in data.queries] == ["edge"] + assert len(data.queries) == 1 assert data.queries[0].start_ms == 995_000 assert data.queries[0].end_ms == 1_005_000 assert data.metrics.seen == 1 @@ -118,7 +117,8 @@ def test_read_window_keeps_pair_straddling_window_end(write_log): log_end_ms=1_105_000, now_ms=1_105_000, ) - assert [query.qid for query in data.queries] == ["edge"] + assert len(data.queries) == 1 + assert data.queries[0].start_ms == 1_095_000 assert data.metrics.seen == 0 @@ -138,9 +138,7 @@ def test_read_window_includes_still_open_with_running_status(write_log): start_ms=1_050_000, end_ms=None, status="running", - qid="live", - sparql="SELECT live", - client_ip="x", + start_line_offset=0, ) ] @@ -163,9 +161,7 @@ def test_read_window_marks_still_open_as_orphaned_when_log_advanced_past_pad( start_ms=1_050_000, end_ms=None, status="orphaned", - qid="ghost", - sparql="SELECT ghost", - client_ip="x", + start_line_offset=0, ) ] @@ -223,22 +219,43 @@ def test_read_window_metrics_count_only_ends_inside_window(write_log): log_end_ms=1_105_000, now_ms=1_105_000, ) - assert {query.qid for query in data.queries} == { - "edge_start", - "inside", - "edge_end", + assert {query.start_ms for query in data.queries} == { + 995_000, + 1_050_000, + 1_095_000, } assert data.metrics.seen == 2 -def make_query(qid, start_ms, end_ms, status="ok"): +def test_read_window_carries_real_start_line_offset(write_log): + first = start_line(1_010_000, "q1", "SELECT a") + end_line(1_020_000, "q1") + path = write_log( + first + + start_line(1_050_000, "q2", "SELECT b") + + end_line(1_060_000, "q2") + ) + data = read_window( + path, + 1_000_000, + 1_100_000, + PAD_MS, + SLOW_MS, + log_end_ms=1_060_000, + now_ms=1_060_000, + ) + offsets = { + query.start_ms: query.start_line_offset for query in data.queries + } + assert offsets[1_010_000] == 0 + assert offsets[1_050_000] == len(first) + + +def make_query(start_ms, end_ms, status="ok", start_line_offset=0): return LoggedQuery( start_ms=start_ms, end_ms=end_ms, status=status, - qid=qid, - sparql=f"SELECT {qid}", - client_ip="", + start_line_offset=start_line_offset, ) @@ -260,8 +277,8 @@ def make_cache(queries): def test_filter_queries_active_returns_cache_unchanged(): cache = make_cache( [ - make_query("a", 100, 200), - make_query("b", 300, None, status="running"), + make_query(100, 200), + make_query(300, None, status="running"), ] ) assert filter_queries(cache, "ACTIVE", 0, 1000) is cache.queries @@ -270,42 +287,50 @@ def test_filter_queries_active_returns_cache_unchanged(): def test_filter_queries_starts_keeps_only_starts_in_window(): cache = make_cache( [ - make_query("early", 50, 150), - make_query("inside", 200, 400), - make_query("late", 1100, 1200), + make_query(50, 150), + make_query(200, 400), + make_query(1100, 1200), ] ) - qids = [query.qid for query in filter_queries(cache, "STARTS", 100, 1000)] - assert qids == ["inside"] + starts = [ + query.start_ms for query in filter_queries(cache, "STARTS", 100, 1000) + ] + assert starts == [200] def test_filter_queries_starts_includes_running_queries(): - cache = make_cache([make_query("live", 500, None, status="running")]) - qids = [query.qid for query in filter_queries(cache, "STARTS", 100, 1000)] - assert qids == ["live"] + cache = make_cache([make_query(500, None, status="running")]) + starts = [ + query.start_ms for query in filter_queries(cache, "STARTS", 100, 1000) + ] + assert starts == [500] def test_filter_queries_ends_excludes_running_queries(): cache = make_cache( [ - make_query("done", 200, 500), - make_query("live", 300, None, status="running"), + make_query(200, 500), + make_query(300, None, status="running"), ] ) - qids = [query.qid for query in filter_queries(cache, "ENDS", 100, 1000)] - assert qids == ["done"] + ends = [ + query.end_ms for query in filter_queries(cache, "ENDS", 100, 1000) + ] + assert ends == [500] def test_filter_queries_ends_drops_ends_outside_window(): cache = make_cache( [ - make_query("before", 50, 90), - make_query("inside", 200, 500), - make_query("after", 800, 1200), + make_query(50, 90), + make_query(200, 500), + make_query(800, 1200), ] ) - qids = [query.qid for query in filter_queries(cache, "ENDS", 100, 1000)] - assert qids == ["inside"] + ends = [ + query.end_ms for query in filter_queries(cache, "ENDS", 100, 1000) + ] + assert ends == [500] def make_controls(mode="ACTIVE"): @@ -336,7 +361,7 @@ def make_render_cache(queries, **metric_fields): def test_render_window_uses_recorded_end_for_completed_query(): - cache = make_render_cache([make_query("done", 1_100_000, 1_150_000)]) + cache = make_render_cache([make_query(1_100_000, 1_150_000)]) rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) assert rows[0].duration_ms == 50_000 assert rows[0].status == "ok" @@ -344,7 +369,7 @@ def test_render_window_uses_recorded_end_for_completed_query(): def test_render_window_uses_log_end_for_running_query(): cache = make_render_cache( - [make_query("live", 1_100_000, None, status="running")] + [make_query(1_100_000, None, status="running")] ) rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) assert rows[0].duration_ms == 1_400_000 @@ -353,7 +378,7 @@ def test_render_window_uses_log_end_for_running_query(): def test_render_window_marks_orphan_duration_as_unknown(): cache = make_render_cache( - [make_query("ghost", 1_100_000, None, status="orphaned")] + [make_query(1_100_000, None, status="orphaned")] ) rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) assert rows[0].duration_ms == DURATION_UNKNOWN @@ -374,14 +399,65 @@ def test_render_window_passes_window_size_as_metric_label(): assert metrics.ok == 7 +def test_render_window_carries_start_line_offset_onto_row(): + cache = make_render_cache( + [make_query(1_100_000, 1_200_000, start_line_offset=512)] + ) + rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) + assert rows[0].start_line_offset == 512 + + +def test_render_window_leaves_text_empty(): + cache = make_render_cache([make_query(1_100_000, 1_200_000)]) + rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) + assert rows[0].qid == "" + assert rows[0].sparql == "" + assert rows[0].client_ip == "" + + def test_render_window_respects_mode_filter(): cache = make_render_cache( [ - make_query("done", 1_100_000, 1_200_000), - make_query("live", 1_500_000, None, status="running"), + make_query(1_100_000, 1_200_000), + make_query(1_500_000, None, status="running"), ] ) rows, _ = render_window( cache, make_controls(mode="ENDS"), log_end_ms=2_500_000 ) - assert [row.qid for row in rows] == ["done"] + assert [row.started_at_ms for row in rows] == [1_100_000] + + +def make_row(start_line_offset): + return HistoricQueryRow( + qid="", + start_line_offset=start_line_offset, + started_at_ms=1_000_000, + duration_ms=0, + status="ok", + sparql="", + client_ip="", + ) + + +def test_load_query_details_fills_text_from_offsets(write_log): + first = start_line(1_000_000, "q1", "SELECT a") + path = write_log(first + start_line(1_050_000, "q2", "SELECT b")) + cache = {} + rows = [make_row(0), make_row(len(first))] + filled = load_query_details_for_rows(path, rows, cache) + assert [(row.qid, row.sparql) for row in filled] == [ + ("q1", "SELECT a"), + ("q2", "SELECT b"), + ] + assert filled[0].client_ip == "x" + assert cache[0] == ("q1", "x", "SELECT a") + + +def test_load_query_details_reuses_cache_without_reading(write_log): + path = write_log(start_line(1_000_000, "q1", "SELECT a")) + cache = {0: ("cached-qid", "cached-ip", "CACHED")} + [filled] = load_query_details_for_rows(path, [make_row(0)], cache) + assert filled.qid == "cached-qid" + assert filled.client_ip == "cached-ip" + assert filled.sparql == "CACHED" From f7ada750ecd70908f5ffc554e3772dcdef3fdc30 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Sun, 31 May 2026 22:42:29 +0200 Subject: [PATCH 23/26] Add a 0.1s debounce to historic view to let the user selection settle in the case of fast time window switching --- src/qlever/monitor/app.py | 2 -- src/qlever/monitor/views/historic.py | 31 +++++++++++++++++++++------- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index 113ba1da..deea54d9 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -199,8 +199,6 @@ def apply_pretty_result( def action_clear_query(self) -> None: """Drop the displayed query, restoring the empty-state hint.""" pane = self.screen.query_one(SparqlPane) - if pane.content is None: - return pane.content = None def action_scroll_sparql_up(self) -> None: diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index b03924e1..5822a33f 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -62,6 +62,9 @@ # window does not freeze the table. MAX_VISIBLE_ROWS = 1000 +# Collapse a fast window scrub into a single scan once the user settles. +RESCAN_DEBOUNCE_S = 0.1 + class HistoricScreen(Screen, inherit_bindings=False): """Historic view: shows active queries parsed from the log over a time window.""" @@ -91,8 +94,7 @@ class HistoricScreen(Screen, inherit_bindings=False): def compose(self) -> ComposeResult: self.log_start_ms = self.app.log_start_ms - self.log_end_ms = None - self.refresh_log_end() + self.log_end_ms = self.read_log_end() # Presets longer than the log span are pointless; `all` covers it. self.available_presets = available_presets( self.log_end_ms - self.log_start_ms @@ -109,6 +111,8 @@ def compose(self) -> ComposeResult: self.window_data = None self.all_rows = [] self.query_details_cache = {} + self.rescan_timer = None + self.cached_window = None controls = ControlsState( window_size=self.window_size, mode=self.mode, @@ -138,18 +142,18 @@ def compose(self) -> ComposeResult: def on_screen_resume(self) -> None: """Catch up on log growth, then push state and rescan.""" - self.refresh_log_end() + self.log_end_ms = self.read_log_end() self.available_presets = available_presets( self.log_end_ms - self.log_start_ms ) self.clamp_window() self.refresh_view(rescan=True) - def refresh_log_end(self) -> None: - """Read the freshest log timestamp the tailer has seen.""" + def read_log_end(self) -> int: + """Return the freshest log timestamp the tailer has seen.""" state = self.app.live_state with state.lock: - self.log_end_ms = state.latest_event_ms + return state.latest_event_ms def show_loading_state(self) -> None: """Blank the table and metrics row while a rescan is in flight.""" @@ -188,8 +192,20 @@ def refresh_view(self, rescan: bool) -> None: self.query_one(SelectedWindow).state = controls self.query_one(Timeline).bounds = bounds if rescan: + if (self.window_start_ms, self.window_end_ms) == self.cached_window: + return self.show_loading_state() - self.refresh_data(rescan) + self.schedule_rescan() + else: + self.refresh_data(rescan=False) + + def schedule_rescan(self) -> None: + """Collapse a fast window scrub into one scan of where the user lands.""" + if self.rescan_timer is not None: + self.rescan_timer.stop() + self.rescan_timer = self.set_timer( + RESCAN_DEBOUNCE_S, lambda: self.refresh_data(rescan=True) + ) @work(thread=True, exclusive=True, group="refresh_data") def refresh_data(self, rescan: bool) -> None: @@ -213,6 +229,7 @@ def refresh_data(self, rescan: bool) -> None: current_ms(), ) self.query_details_cache = {} + self.cached_window = (self.window_start_ms, self.window_end_ms) controls = ControlsState( window_size=self.window_size, mode=self.mode, From 6418cafd42e2fbc8c645181676cad87600012f5c Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 1 Jun 2026 16:25:19 +0200 Subject: [PATCH 24/26] Fix live view metrics for last 1h window and make historic view metrics row adapt to the selected mode --- src/qlever/monitor/app.py | 7 +- src/qlever/monitor/historic_data.py | 87 +++++---- src/qlever/monitor/live_data.py | 13 +- src/qlever/monitor/metrics.py | 48 ++--- src/qlever/monitor/views/historic.py | 22 ++- test/qlever/test_historic_data.py | 265 +++++++++++++-------------- test/qlever/test_live_data.py | 26 +++ 7 files changed, 250 insertions(+), 218 deletions(-) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index deea54d9..acf97763 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -125,7 +125,12 @@ def tail_live_log(self) -> None: @work(thread=True, group="load_metrics_history") def load_metrics_history(self) -> None: """Prepend the hour before cut_offset into completed history so metrics aren't empty at boot.""" - load_completed_history(self.log_file, self.live_state, self.cut_offset) + load_completed_history( + self.log_file, + self.live_state, + self.cut_offset, + self.window_pad_ms, + ) # Backfill is done; record how far back metrics have data. log_start_or_boot = self.log_start_ms or self.boot_time_ms self.live_state.metrics_known_from_ms = max( diff --git a/src/qlever/monitor/historic_data.py b/src/qlever/monitor/historic_data.py index 6fa7cdf4..776e387a 100644 --- a/src/qlever/monitor/historic_data.py +++ b/src/qlever/monitor/historic_data.py @@ -1,9 +1,10 @@ """Data layer for the Historic screen. -Reads one time window of the log into a cached `WindowData`, filters -that cache by display mode, and maps the survivors into the -`models.py` dataclasses the screen renders. A window change reruns -the scan; a mode change reuses the cache. +Reads one time window of the log into a list of `LoggedQuery`, +filters it by display mode, and maps the survivors into the +`models.py` dataclasses the screen renders. Metrics are computed per +mode at render time. A window change reruns the scan; a mode change +reuses the scanned list. """ from dataclasses import replace @@ -11,12 +12,13 @@ from typing import NamedTuple from qlever.monitor.log_reader import ( + CompletedQuery, load_sparql_at, offset_for_ts, pair_start_end_events, scan_range, ) -from qlever.monitor.metrics import MetricsSnapshot, metrics_for_ranges +from qlever.monitor.metrics import metrics_for_queries from qlever.monitor.models import ( ControlsState, HistoricQueryRow, @@ -44,39 +46,23 @@ class LoggedQuery(NamedTuple): start_line_offset: int -class WindowData(NamedTuple): - """Cached scan of one time window, reused across mode flips. - - `queries` is every `LoggedQuery` overlapping the window in any - mode (the ACTIVE superset), so a mode change is a pure in-memory - filter. Each query carries its `start_line_offset` but not its - text; the text is read for the visible rows only. `metrics` is - computed over completions whose `end_ms` lies inside the window, - so it is the same for every mode. - """ - - queries: list[LoggedQuery] - metrics: MetricsSnapshot - - def read_window( log_path: Path, window_start_ms: int, window_end_ms: int, pad_ms: int, - slow_threshold_ms: int, log_end_ms: int, now_ms: int, -) -> WindowData: - """Scan one time window of the log into a `WindowData` snapshot. +) -> list[LoggedQuery]: + """Scan one time window of the log into the queries overlapping it. The byte range scanned is the window padded by `pad_ms` on each side so both events of a query straddling the window edge are recovered. Pairs that lie entirely inside the pad are dropped, so - the cache holds only rows a mode predicate could keep. The query + the result holds only rows a mode predicate could keep. The query text is not read here; `load_query_details_for_rows` reads it for - the visible rows. Metrics count completions whose `end_ms` lies - inside `[window_start_ms, window_end_ms]`. + the visible rows. Metrics are computed later, per mode, in + `render_window`. A still-open query is `"running"` only if its start is within `pad_ms` of `log_end_ms` and the log itself is fresh @@ -122,21 +108,18 @@ def read_window( ) ) - metrics = metrics_for_ranges( - completed, [(window_start_ms, window_end_ms)], slow_threshold_ms - )[0] - return WindowData(queries=queries, metrics=metrics) + return queries def filter_queries( - window_data: WindowData, + queries: list[LoggedQuery], mode: str, window_start_ms: int, window_end_ms: int, ) -> list[LoggedQuery]: - """Select queries from a cached `WindowData` for the given mode. + """Select queries from the scanned window for the given mode. - The cache is already the ACTIVE set (every query overlaps the + The given list is already the ACTIVE set (every query overlaps the window), so ACTIVE is the fallthrough that returns it unchanged. STARTS narrows to queries that began inside the window; ENDS narrows to queries that finished inside it. Still-running queries @@ -145,17 +128,17 @@ def filter_queries( if mode == "STARTS": return [ query - for query in window_data.queries + for query in queries if window_start_ms <= query.start_ms <= window_end_ms ] if mode == "ENDS": return [ query - for query in window_data.queries + for query in queries if query.end_ms is not None and window_start_ms <= query.end_ms <= window_end_ms ] - return window_data.queries + return queries def display_duration_ms(query: LoggedQuery, log_end_ms: int) -> int: @@ -166,11 +149,12 @@ def display_duration_ms(query: LoggedQuery, log_end_ms: int) -> int: def render_window( - window_data: WindowData, + queries: list[LoggedQuery], controls: ControlsState, + slow_threshold_ms: int, log_end_ms: int, ) -> tuple[list[HistoricQueryRow], MetricsCounts]: - """Map a cached `WindowData` into the rows and metrics the UI consumes. + """Map the scanned window into the rows and metrics the UI consumes. Runs `filter_queries` for the current mode and translates each surviving `LoggedQuery` into a `HistoricQueryRow`. `duration_ms` @@ -178,11 +162,12 @@ def render_window( against the recorded `end_ms` for completed ones, and reported as DURATION_UNKNOWN for crash orphans. The rows carry empty text; `load_query_details_for_rows` fills it for the visible slice. - Metrics come straight from `window_data.metrics`, labelled with - the current window size. + Metrics are computed over the completed queries in the selected + mode, so they match the rows on screen, and labelled with the + current window size. """ - queries = filter_queries( - window_data, controls.mode, controls.start_ms, controls.end_ms + selected = filter_queries( + queries, controls.mode, controls.start_ms, controls.end_ms ) historic_rows = [ HistoricQueryRow( @@ -194,11 +179,21 @@ def render_window( sparql="", client_ip="", ) - for query in queries + for query in selected ] - metrics = MetricsCounts( - label=controls.window_size, **window_data.metrics._asdict() - ) + completed = [ + CompletedQuery( + start_ms=query.start_ms, + end_ms=query.end_ms, + duration_ms=query.end_ms - query.start_ms, + status=query.status, + start_line_offset=query.start_line_offset, + ) + for query in selected + if query.end_ms is not None + ] + snapshot = metrics_for_queries(completed, slow_threshold_ms) + metrics = MetricsCounts(label=controls.window_size, **snapshot._asdict()) return historic_rows, metrics diff --git a/src/qlever/monitor/live_data.py b/src/qlever/monitor/live_data.py index 098814ef..0d4c727d 100644 --- a/src/qlever/monitor/live_data.py +++ b/src/qlever/monitor/live_data.py @@ -185,6 +185,7 @@ def load_completed_history( log_path: Path, state: LiveState, cut_offset: int, + window_pad_ms: int, now_ms: Callable[[], int] = current_ms, ) -> None: """Scan the hour before cut_offset and seed completed history. @@ -192,14 +193,22 @@ def load_completed_history( One-shot: callers spawn this on a daemon thread at startup if they want it off the main loop. After it returns, the tailer is the only writer of state.completed. + + The scan starts window_pad_ms before the hour boundary so a query + that began before the hour but ended inside it still has its start + line in range and can be paired. Pairs that ended before the hour + are then dropped, matching the deque's end_ms retention. """ with log_path.open("rb") as log_stream: oldest_wanted_ms = now_ms() - LIVE_HORIZON_MS scan_start_offset = offset_for_ts( - log_stream, oldest_wanted_ms, cut_offset + log_stream, oldest_wanted_ms - window_pad_ms, cut_offset ) events = scan_range(log_stream, scan_start_offset, cut_offset) - older_completions, _ = pair_start_end_events(events) + paired, _ = pair_start_end_events(events) + older_completions = [ + query for query in paired if query.end_ms >= oldest_wanted_ms + ] with state.lock: tailer_entries = state.completed.entries diff --git a/src/qlever/monitor/metrics.py b/src/qlever/monitor/metrics.py index d6910811..1b3981a4 100644 --- a/src/qlever/monitor/metrics.py +++ b/src/qlever/monitor/metrics.py @@ -6,7 +6,7 @@ """ import statistics -from collections.abc import Iterable +from collections.abc import Collection, Iterable from typing import NamedTuple from qlever.monitor.log_reader import CompletedQuery @@ -78,35 +78,37 @@ def build_snapshot( ) -def metrics_for_ranges( +def metrics_for_queries( completed: Iterable[CompletedQuery], + slow_threshold_ms: int, +) -> MetricsSnapshot: + """Compute the metrics snapshot for a set of completed queries.""" + counts = {"ok": 0, "failed": 0, "timeout": 0, "cancelled": 0, "unknown": 0} + slow = 0 + durations = [] + for entry in completed: + counts[entry.status] += 1 + if entry.duration_ms >= slow_threshold_ms: + slow += 1 + durations.append(entry.duration_ms) + return build_snapshot(counts, slow, durations) + + +def metrics_for_ranges( + completed: Collection[CompletedQuery], ranges: list[tuple[int, int]], slow_threshold_ms: int, ) -> list[MetricsSnapshot]: """Summarise completed queries into one snapshot per time range. Each range is an absolute (lo_ms, hi_ms) interval; a completed - query counts toward a range when its end_ms falls inside it. Walks - completed once, filling every range's tally in parallel, so N - ranges still cost a single pass. Returns one snapshot per range, in - the order the ranges were given. + query counts toward a range when its end_ms falls inside it. + Returns one snapshot per range, in the order the ranges were given. """ - counts = [ - {"ok": 0, "failed": 0, "timeout": 0, "cancelled": 0, "unknown": 0} - for _ in ranges - ] - slow = [0] * len(ranges) - durations = [[] for _ in ranges] - - for entry in completed: - for index, (lo_ms, hi_ms) in enumerate(ranges): - if lo_ms <= entry.end_ms <= hi_ms: - counts[index][entry.status] += 1 - if entry.duration_ms >= slow_threshold_ms: - slow[index] += 1 - durations[index].append(entry.duration_ms) - return [ - build_snapshot(counts[index], slow[index], durations[index]) - for index in range(len(ranges)) + metrics_for_queries( + [entry for entry in completed if lo_ms <= entry.end_ms <= hi_ms], + slow_threshold_ms, + ) + for lo_ms, hi_ms in ranges ] diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 5822a33f..04a155fe 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -108,7 +108,7 @@ def compose(self) -> ComposeResult: self.log_start_ms if width is None else self.log_end_ms - width ) self.window_end_ms = self.log_end_ms - self.window_data = None + self.window_queries = None self.all_rows = [] self.query_details_cache = {} self.rescan_timer = None @@ -211,20 +211,19 @@ def schedule_rescan(self) -> None: def refresh_data(self, rescan: bool) -> None: """Scan and/or re-filter the window, push rows + metrics + status. - On `rescan` the log is read into a fresh `WindowData` cached on - the screen and the query-details cache is emptied, since a new - window is a new set of queries. Render always runs, so a mode - change pays only an in-memory filter. The visible rows' text is - read here, off the UI thread, reusing the cache across sorts - and mode changes within the window. + On `rescan` the log is read into a fresh list of window queries + cached on the screen and the query-details cache is emptied, + since a new window is a new set of queries. Render always runs, + so a mode change pays only an in-memory filter. The visible + rows' text is read here, off the UI thread, reusing the cache + across sorts and mode changes within the window. """ if rescan: - self.window_data = read_window( + self.window_queries = read_window( self.app.log_file, self.window_start_ms, self.window_end_ms, self.app.window_pad_ms, - self.app.slow_threshold * 1000, self.log_end_ms, current_ms(), ) @@ -237,7 +236,10 @@ def refresh_data(self, rescan: bool) -> None: end_ms=self.window_end_ms, ) rows, metrics = render_window( - self.window_data, controls, self.log_end_ms + self.window_queries, + controls, + self.app.slow_threshold * 1000, + self.log_end_ms, ) self.all_rows = rows visible_rows = load_query_details_for_rows( diff --git a/test/qlever/test_historic_data.py b/test/qlever/test_historic_data.py index dbaac130..67b3dc28 100644 --- a/test/qlever/test_historic_data.py +++ b/test/qlever/test_historic_data.py @@ -3,13 +3,11 @@ from qlever.monitor.historic_data import ( DURATION_UNKNOWN, LoggedQuery, - WindowData, filter_queries, load_query_details_for_rows, read_window, render_window, ) -from qlever.monitor.metrics import MetricsSnapshot from qlever.monitor.models import ControlsState, HistoricQueryRow PAD_MS = 10_000 @@ -32,25 +30,23 @@ def end_line(ts_ms, qid, status="ok"): def test_read_window_empty_log_returns_no_queries(write_log): path = write_log(b"") - data = read_window(path, 0, 1000, PAD_MS, SLOW_MS, log_end_ms=0, now_ms=0) - assert data.queries == [] - assert data.metrics.seen == 0 + queries = read_window(path, 0, 1000, PAD_MS, log_end_ms=0, now_ms=0) + assert queries == [] def test_read_window_keeps_pair_entirely_inside_window(write_log): path = write_log( start_line(1_000_000, "q1", "SELECT a") + end_line(1_000_500, "q1") ) - data = read_window( + queries = read_window( path, 900_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_000_500, now_ms=1_000_500, ) - assert data.queries == [ + assert queries == [ LoggedQuery( start_ms=1_000_000, end_ms=1_000_500, @@ -58,8 +54,6 @@ def test_read_window_keeps_pair_entirely_inside_window(write_log): start_line_offset=0, ) ] - assert data.metrics.seen == 1 - assert data.metrics.ok == 1 def test_read_window_drops_pair_inside_pad_before_window(write_log): @@ -69,18 +63,16 @@ def test_read_window_drops_pair_inside_pad_before_window(write_log): + start_line(1_000_000, "q1", "SELECT a") + end_line(1_000_500, "q1") ) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_000_500, now_ms=1_000_500, ) - starts = [query.start_ms for query in data.queries] + starts = [query.start_ms for query in queries] assert starts == [1_000_000] - assert data.metrics.seen == 1 def test_read_window_keeps_pair_straddling_window_start(write_log): @@ -88,19 +80,17 @@ def test_read_window_keeps_pair_straddling_window_start(write_log): start_line(995_000, "edge", "SELECT edge") + end_line(1_005_000, "edge") ) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_005_000, now_ms=1_005_000, ) - assert len(data.queries) == 1 - assert data.queries[0].start_ms == 995_000 - assert data.queries[0].end_ms == 1_005_000 - assert data.metrics.seen == 1 + assert len(queries) == 1 + assert queries[0].start_ms == 995_000 + assert queries[0].end_ms == 1_005_000 def test_read_window_keeps_pair_straddling_window_end(write_log): @@ -108,32 +98,29 @@ def test_read_window_keeps_pair_straddling_window_end(write_log): start_line(1_095_000, "edge", "SELECT edge") + end_line(1_105_000, "edge") ) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_105_000, now_ms=1_105_000, ) - assert len(data.queries) == 1 - assert data.queries[0].start_ms == 1_095_000 - assert data.metrics.seen == 0 + assert len(queries) == 1 + assert queries[0].start_ms == 1_095_000 def test_read_window_includes_still_open_with_running_status(write_log): path = write_log(start_line(1_050_000, "live", "SELECT live")) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_050_000, now_ms=1_050_000, ) - assert data.queries == [ + assert queries == [ LoggedQuery( start_ms=1_050_000, end_ms=None, @@ -147,16 +134,15 @@ def test_read_window_marks_still_open_as_orphaned_when_log_advanced_past_pad( write_log, ): path = write_log(start_line(1_050_000, "ghost", "SELECT ghost")) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_200_000, now_ms=1_200_000, ) - assert data.queries == [ + assert queries == [ LoggedQuery( start_ms=1_050_000, end_ms=None, @@ -174,34 +160,32 @@ def test_read_window_marks_still_open_as_orphaned_when_log_is_stale(write_log): so the server must be dead and the survivor cannot be running. """ path = write_log(start_line(1_050_000, "ghost", "SELECT ghost")) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_050_000, now_ms=1_050_000 + PAD_MS + 1, ) - [query] = data.queries + [query] = queries assert query.status == "orphaned" def test_read_window_drops_still_open_started_after_window(write_log): path = write_log(start_line(1_200_000, "later", "SELECT later")) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_200_000, now_ms=1_200_000, ) - assert data.queries == [] + assert queries == [] -def test_read_window_metrics_count_only_ends_inside_window(write_log): +def test_read_window_keeps_all_overlapping_pairs(write_log): path = write_log( start_line(995_000, "edge_start", "SELECT a") + end_line(1_005_000, "edge_start") @@ -210,21 +194,19 @@ def test_read_window_metrics_count_only_ends_inside_window(write_log): + start_line(1_095_000, "edge_end", "SELECT c") + end_line(1_105_000, "edge_end") ) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_105_000, now_ms=1_105_000, ) - assert {query.start_ms for query in data.queries} == { + assert {query.start_ms for query in queries} == { 995_000, 1_050_000, 1_095_000, } - assert data.metrics.seen == 2 def test_read_window_carries_real_start_line_offset(write_log): @@ -234,18 +216,15 @@ def test_read_window_carries_real_start_line_offset(write_log): + start_line(1_050_000, "q2", "SELECT b") + end_line(1_060_000, "q2") ) - data = read_window( + queries = read_window( path, 1_000_000, 1_100_000, PAD_MS, - SLOW_MS, log_end_ms=1_060_000, now_ms=1_060_000, ) - offsets = { - query.start_ms: query.start_line_offset for query in data.queries - } + offsets = {query.start_ms: query.start_line_offset for query in queries} assert offsets[1_010_000] == 0 assert offsets[1_050_000] == len(first) @@ -259,76 +238,53 @@ def make_query(start_ms, end_ms, status="ok", start_line_offset=0): ) -def make_cache(queries): - empty_metrics = MetricsSnapshot( - seen=0, - ok=0, - failed=0, - timeout=0, - cancelled=0, - unknown=0, - slow=0, - p50=None, - p95=None, - ) - return WindowData(queries=queries, metrics=empty_metrics) - - -def test_filter_queries_active_returns_cache_unchanged(): - cache = make_cache( - [ - make_query(100, 200), - make_query(300, None, status="running"), - ] - ) - assert filter_queries(cache, "ACTIVE", 0, 1000) is cache.queries +def test_filter_queries_active_returns_list_unchanged(): + queries = [ + make_query(100, 200), + make_query(300, None, status="running"), + ] + assert filter_queries(queries, "ACTIVE", 0, 1000) is queries def test_filter_queries_starts_keeps_only_starts_in_window(): - cache = make_cache( - [ - make_query(50, 150), - make_query(200, 400), - make_query(1100, 1200), - ] - ) + queries = [ + make_query(50, 150), + make_query(200, 400), + make_query(1100, 1200), + ] starts = [ - query.start_ms for query in filter_queries(cache, "STARTS", 100, 1000) + query.start_ms for query in filter_queries(queries, "STARTS", 100, 1000) ] assert starts == [200] def test_filter_queries_starts_includes_running_queries(): - cache = make_cache([make_query(500, None, status="running")]) + queries = [make_query(500, None, status="running")] starts = [ - query.start_ms for query in filter_queries(cache, "STARTS", 100, 1000) + query.start_ms for query in filter_queries(queries, "STARTS", 100, 1000) ] assert starts == [500] def test_filter_queries_ends_excludes_running_queries(): - cache = make_cache( - [ - make_query(200, 500), - make_query(300, None, status="running"), - ] - ) + queries = [ + make_query(200, 500), + make_query(300, None, status="running"), + ] ends = [ - query.end_ms for query in filter_queries(cache, "ENDS", 100, 1000) + query.end_ms for query in filter_queries(queries, "ENDS", 100, 1000) ] assert ends == [500] def test_filter_queries_ends_drops_ends_outside_window(): - cache = make_cache( - [ - make_query(50, 90), - make_query(200, 500), - make_query(800, 1200), - ] - ) + queries = [ + make_query(50, 90), + make_query(200, 500), + make_query(800, 1200), + ] ends = [ - query.end_ms for query in filter_queries(cache, "ENDS", 100, 1000) + query.end_ms for query in filter_queries(queries, "ENDS", 100, 1000) ] assert ends == [500] @@ -342,92 +298,129 @@ def make_controls(mode="ACTIVE"): ) -def make_render_cache(queries, **metric_fields): - snapshot_fields = dict( - seen=0, - ok=0, - failed=0, - timeout=0, - cancelled=0, - unknown=0, - slow=0, - p50=None, - p95=None, - ) - snapshot_fields.update(metric_fields) - return WindowData( - queries=queries, metrics=MetricsSnapshot(**snapshot_fields) - ) - - def test_render_window_uses_recorded_end_for_completed_query(): - cache = make_render_cache([make_query(1_100_000, 1_150_000)]) - rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) + queries = [make_query(1_100_000, 1_150_000)] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 + ) assert rows[0].duration_ms == 50_000 assert rows[0].status == "ok" def test_render_window_uses_log_end_for_running_query(): - cache = make_render_cache( - [make_query(1_100_000, None, status="running")] + queries = [make_query(1_100_000, None, status="running")] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 ) - rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) assert rows[0].duration_ms == 1_400_000 assert rows[0].status == "running" def test_render_window_marks_orphan_duration_as_unknown(): - cache = make_render_cache( - [make_query(1_100_000, None, status="orphaned")] + queries = [make_query(1_100_000, None, status="orphaned")] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 ) - rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) assert rows[0].duration_ms == DURATION_UNKNOWN assert rows[0].status == "orphaned" def test_render_window_passes_window_size_as_metric_label(): - cache = make_render_cache([], seen=7, ok=7) + queries = [make_query(100 + index, 200 + index) for index in range(7)] controls = ControlsState( window_size="1h", mode="ACTIVE", start_ms=0, - end_ms=1, + end_ms=1_000_000, ) - _, metrics = render_window(cache, controls, log_end_ms=1) + _, metrics = render_window(queries, controls, SLOW_MS, log_end_ms=1_000_000) assert metrics.label == "1h" assert metrics.seen == 7 assert metrics.ok == 7 def test_render_window_carries_start_line_offset_onto_row(): - cache = make_render_cache( - [make_query(1_100_000, 1_200_000, start_line_offset=512)] + queries = [make_query(1_100_000, 1_200_000, start_line_offset=512)] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 ) - rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) assert rows[0].start_line_offset == 512 def test_render_window_leaves_text_empty(): - cache = make_render_cache([make_query(1_100_000, 1_200_000)]) - rows, _ = render_window(cache, make_controls(), log_end_ms=2_500_000) + queries = [make_query(1_100_000, 1_200_000)] + rows, _ = render_window( + queries, make_controls(), SLOW_MS, log_end_ms=2_500_000 + ) assert rows[0].qid == "" assert rows[0].sparql == "" assert rows[0].client_ip == "" def test_render_window_respects_mode_filter(): - cache = make_render_cache( - [ - make_query(1_100_000, 1_200_000), - make_query(1_500_000, None, status="running"), - ] - ) + queries = [ + make_query(1_100_000, 1_200_000), + make_query(1_500_000, None, status="running"), + ] rows, _ = render_window( - cache, make_controls(mode="ENDS"), log_end_ms=2_500_000 + queries, make_controls(mode="ENDS"), SLOW_MS, log_end_ms=2_500_000 ) assert [row.started_at_ms for row in rows] == [1_100_000] +# Queries used by the mode-aware metric tests. A is fully inside the +# window, B starts inside but ends after it, C starts before but ends +# inside. So the metric count must change with the selected mode. +def mode_metric_queries(): + return [ + make_query(1_100_000, 1_900_000), + make_query(1_500_000, 2_200_000), + make_query(800_000, 1_300_000), + ] + + +def test_render_window_metrics_active_counts_all_overlapping(): + _, metrics = render_window( + mode_metric_queries(), + make_controls("ACTIVE"), + SLOW_MS, + log_end_ms=2_200_000, + ) + assert metrics.seen == 3 + + +def test_render_window_metrics_starts_counts_started_in_window(): + _, metrics = render_window( + mode_metric_queries(), + make_controls("STARTS"), + SLOW_MS, + log_end_ms=2_200_000, + ) + assert metrics.seen == 2 + + +def test_render_window_metrics_ends_counts_ended_in_window(): + _, metrics = render_window( + mode_metric_queries(), + make_controls("ENDS"), + SLOW_MS, + log_end_ms=2_200_000, + ) + assert metrics.seen == 2 + + +def test_render_window_metrics_exclude_running_and_orphaned(): + queries = [ + make_query(1_100_000, 1_900_000), + make_query(1_500_000, None, status="running"), + make_query(1_200_000, None, status="orphaned"), + ] + _, metrics = render_window( + queries, make_controls("ACTIVE"), SLOW_MS, log_end_ms=2_500_000 + ) + assert metrics.seen == 1 + + def make_row(start_line_offset): return HistoricQueryRow( qid="", diff --git a/test/qlever/test_live_data.py b/test/qlever/test_live_data.py index 565b89e2..852c6f5d 100644 --- a/test/qlever/test_live_data.py +++ b/test/qlever/test_live_data.py @@ -459,6 +459,7 @@ def test_load_completed_history_loads_pairs_from_history(write_log): log_path=path, state=state, cut_offset=path.stat().st_size, + window_pad_ms=10_000, now_ms=lambda: 5000, ) assert len(state.completed.entries) == 2 @@ -485,6 +486,7 @@ def test_load_completed_history_prepends_before_existing_tailer_entries( log_path=path, state=state, cut_offset=path.stat().st_size, + window_pad_ms=10_000, now_ms=lambda: 7000, ) assert len(state.completed.entries) == 2 @@ -503,12 +505,36 @@ def test_load_completed_history_drops_starts_without_a_matching_end(write_log): log_path=path, state=state, cut_offset=path.stat().st_size, + window_pad_ms=10_000, now_ms=lambda: 5000, ) assert len(state.completed.entries) == 1 assert state.completed.entries[0].start_ms == 1000 +def test_load_completed_history_recovers_query_straddling_the_hour(write_log): + oldest_wanted = 100_000 + now = LIVE_HORIZON_MS + oldest_wanted + path = write_log( + start_line(91_000, "before") + + end_line(93_000, "before") + + start_line(95_000, "straddler") + + end_line(150_000, "straddler") + + start_line(200_000, "inside") + + end_line(300_000, "inside") + ) + state = LiveState() + load_completed_history( + log_path=path, + state=state, + cut_offset=path.stat().st_size, + window_pad_ms=20_000, + now_ms=lambda: now, + ) + starts = [entry.start_ms for entry in state.completed.entries] + assert starts == [95_000, 200_000] + + def test_poll_loop_picks_up_lines_appended_to_the_file(write_log): path = write_log(b"") state = LiveState() From 41562ae42a71387d6d5d10f4e3b1fdbc4226ecf6 Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 1 Jun 2026 18:43:17 +0200 Subject: [PATCH 25/26] Fix timeline not rendering correctly at log end and change slow to (>slow_threshold) in metric row --- src/qlever/monitor/views/historic.py | 3 +- src/qlever/monitor/views/live.py | 5 ++- src/qlever/monitor/widgets/metrics_row.py | 41 ++++++++++++++++++----- src/qlever/monitor/widgets/timeline.py | 4 ++- 4 files changed, 42 insertions(+), 11 deletions(-) diff --git a/src/qlever/monitor/views/historic.py b/src/qlever/monitor/views/historic.py index 04a155fe..a0ad2fe4 100644 --- a/src/qlever/monitor/views/historic.py +++ b/src/qlever/monitor/views/historic.py @@ -133,7 +133,8 @@ def compose(self) -> ComposeResult: yield HistoricControlsRow(controls) yield Timeline(bounds) yield MetricsRow( - [MetricsCounts(label=self.window_size, **EMPTY_FIELDS)] + [MetricsCounts(label=self.window_size, **EMPTY_FIELDS)], + self.app.slow_threshold, ) yield HistoricQueryTable([]) yield Static("", id="table-status") diff --git a/src/qlever/monitor/views/live.py b/src/qlever/monitor/views/live.py index 8c2d1ba0..c4f672d1 100644 --- a/src/qlever/monitor/views/live.py +++ b/src/qlever/monitor/views/live.py @@ -64,7 +64,10 @@ def compose(self) -> ComposeResult: n_active=len(rows), ) ) - yield MetricsRow(get_live_metrics(state, slow_ms, current_ms())) + yield MetricsRow( + get_live_metrics(state, slow_ms, current_ms()), + self.app.slow_threshold, + ) yield LiveQueryTable(rows) yield Static("", id="table-status") yield SparqlPane() diff --git a/src/qlever/monitor/widgets/metrics_row.py b/src/qlever/monitor/widgets/metrics_row.py index b639e631..3797791b 100644 --- a/src/qlever/monitor/widgets/metrics_row.py +++ b/src/qlever/monitor/widgets/metrics_row.py @@ -75,21 +75,43 @@ def column_widths(rows: list[MetricsCounts]) -> list[int]: return widths -def render_cell(label: str, value: int | None, width: int, kind: str) -> str: - """Render one bold-label, right-aligned value pair with optional color.""" +def column_label(name: str, slow_threshold_s: int) -> str: + """Display label for a metric column; slow carries its threshold.""" + if name == "slow": + return f">{slow_threshold_s}s" + return name + + +def render_cell( + name: str, label: str, value: int | None, width: int, kind: str +) -> str: + """Render one bold-label, right-aligned value pair with optional color. + + `name` is the metric field and drives the color lookup; `label` is + the text shown, which can differ from it (the slow column shows its + threshold). + """ text = format_value(value, kind).rjust(width) - color = color_tag(label, value) + color = color_tag(name, value) if color: return f"[b]{label}[/] [{color}]{text}[/]" return f"[b]{label}[/] {text}" -def format_row(row: MetricsCounts, widths: list[int]) -> str: +def format_row( + row: MetricsCounts, widths: list[int], slow_threshold_s: int +) -> str: """Format one rolling-window row as a single line with markup.""" if row.not_ready_message: return f"[bold]{row.label:<8}[/] │ [dim]{row.not_ready_message}[/]" cells = [ - render_cell(name, getattr(row, name), width, kind) + render_cell( + name, + column_label(name, slow_threshold_s), + getattr(row, name), + width, + kind, + ) for (name, kind), width in zip(COLUMNS, widths) ] return f"[bold]{row.label:<8}[/] │ " + " · ".join(cells) @@ -102,18 +124,21 @@ class MetricsRow(Vertical): rows = reactive(list, init=False) - def __init__(self, rows: list[MetricsCounts]) -> None: + def __init__( + self, rows: list[MetricsCounts], slow_threshold_s: int + ) -> None: """Render each row in `rows` as one formatted text line.""" super().__init__() + self.slow_threshold_s = slow_threshold_s self.set_reactive(MetricsRow.rows, rows) def compose(self) -> ComposeResult: widths = column_widths(self.rows) for row in self.rows: - yield Static(format_row(row, widths)) + yield Static(format_row(row, widths, self.slow_threshold_s)) def watch_rows(self, rows: list[MetricsCounts]) -> None: """Repaint each line static when the row data changes.""" widths = column_widths(rows) for line, row in zip(self.query(Static), rows): - line.update(format_row(row, widths)) + line.update(format_row(row, widths, self.slow_threshold_s)) diff --git a/src/qlever/monitor/widgets/timeline.py b/src/qlever/monitor/widgets/timeline.py index a378e67a..fd13f918 100644 --- a/src/qlever/monitor/widgets/timeline.py +++ b/src/qlever/monitor/widgets/timeline.py @@ -97,7 +97,9 @@ def render_track(bounds: TimelineBounds, bar_width: int) -> str: span_fraction(bounds.window_start_ms, bounds), bar_width ) end = start + width - 1 - if end > bar_width - 1: + # Pin the block to the last cell when it overruns the bar, or when + # the window actually reaches the log end but rounding fell short. + if end > bar_width - 1 or bounds.window_end_ms >= bounds.log_end_ms: end = bar_width - 1 start = end - width + 1 for col in range(start, end + 1): From b747a981767efcf6f8a41402f209f59ae449b74d Mon Sep 17 00:00:00 2001 From: tanmay-9 Date: Mon, 1 Jun 2026 19:01:48 +0200 Subject: [PATCH 26/26] Unregister ansi theme because of unreadable footer and complicated coloring logic --- src/qlever/monitor/app.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/qlever/monitor/app.py b/src/qlever/monitor/app.py index acf97763..b4f4cd85 100644 --- a/src/qlever/monitor/app.py +++ b/src/qlever/monitor/app.py @@ -96,6 +96,10 @@ def set_log_start_ms(self) -> None: def on_mount(self) -> None: """Boot the live engine, then open the Live screen.""" + # The ansi theme renders an unreadable Footer (its colors are + # ansi_default, which flattens to black). Drop it so it is + # offered neither in the theme cycle nor the command palette. + self.unregister_theme("textual-ansi") self.boot_time_ms = current_ms() state, cut_offset, _ = find_active_queries( self.log_file, self.window_pad_ms