From 94a46c48980d6e134b2d73a30a85e239b1588f68 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Tue, 12 May 2026 08:54:03 -0400 Subject: [PATCH 01/35] Name clarification --- kugl/impl/engine.py | 2 +- kugl/impl/registry.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kugl/impl/engine.py b/kugl/impl/engine.py index 9e81315..38f4508 100644 --- a/kugl/impl/engine.py +++ b/kugl/impl/engine.py @@ -114,7 +114,7 @@ def query(self, query: Query) -> Tuple[list[Tuple], list[str]]: for named_table in query.named_tables: schema = schemas[named_table.schema_name or DEFAULT_SCHEMA] if table := schema.table_builder(named_table.name): - resource_ref = ResourceRef(schema, schema.resource_for(table)) + resource_ref = ResourceRef(schema, schema.resource_def(table)) tables.append((table, resource_ref)) resource_refs.add(resource_ref) diff --git a/kugl/impl/registry.py b/kugl/impl/registry.py index a38476c..29f8cc3 100644 --- a/kugl/impl/registry.py +++ b/kugl/impl/registry.py @@ -243,6 +243,6 @@ def table_builder(self, name, missing_ok=True): def all_table_names(self): return set(chain(self.builtin.keys(), self._create.keys(), self._extend.keys())) - def resource_for(self, table: Table) -> set[ResourceDef]: + def resource_def(self, table: Table) -> set[ResourceDef]: """Return the ResourceDef used by a Table.""" return self._resources[table.resource] From d24735c1123eafb8c12cffa05b2382a3a8a59f90 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Thu, 14 May 2026 19:42:35 -0400 Subject: [PATCH 02/35] Improve comments & naming --- kugl/impl/registry.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/kugl/impl/registry.py b/kugl/impl/registry.py index 29f8cc3..4ebb69b 100644 --- a/kugl/impl/registry.py +++ b/kugl/impl/registry.py @@ -132,10 +132,15 @@ def cache_path(self): class Schema(BaseModel): """Collection of tables and resource definitions.""" + # schema name name: str + # name -> table defined in python code builtin: dict[str, TableDef] = {} + # name -> table defined in config file _create: dict[str, CreateTable] = {} + # name -> table extended in config file _extend: dict[str, ExtendTable] = {} + # name -> resource defined in config file _resources: dict[str, Resource] = {} def read_configs(self, init_path: list[str]): @@ -148,6 +153,7 @@ def read_configs(self, init_path: list[str]): ] # Reset the non-builtin tables, since these can change during unit tests. + # (We don't create a new registry per test.) self._create.clear() self._extend.clear() self._resources.clear() @@ -160,14 +166,15 @@ def read_configs(self, init_path: list[str]): columns_known.add(column.name) def _check_column(table_name, column_name): - # Detect duplicate columns + # Reject duplicate column, or add to known if new. columns_known = tables_known[table_name] if column_name in columns_known: fail(f"Column '{column_name}' is already defined in table '{table_name}'") columns_known.add(column_name) - def _apply(folder: ConfigPath): + def _merge_config(folder: ConfigPath): # Merge one UserConfig into the schema. + # Return True if the folder exists, else False path = folder / f"{self.name}.yaml" if not path.exists(): return False @@ -201,7 +208,7 @@ def _apply(folder: ConfigPath): return True # Apply builtin config and user config. - found = any([_apply(folder) for folder in init_path]) + found = any([_merge_config(folder) for folder in init_path]) if not found and self.name != DEFAULT_SCHEMA: # There's a built-in schema for Kubernetes, so no issue if no config files fail(f"no configurations found for schema '{self.name}'") @@ -224,23 +231,25 @@ def _find_resource(self, r: ResourceDef) -> Resource: f"can't infer type of resource '{r.name}' -- need one of 'file', 'data', 'namespaced' etc" ) - def table_builder(self, name, missing_ok=True): + def table_builder(self, table_name: str, missing_ok=True): """Return the Table builder subclass (see tables.py) for a table name. - :param missing_ok: Defaults to True because we normally let SQLite flag missing tables. + :param missing_ok: Defaults to True because we normally let SQLite identify missing + tables by surfacing the exception from the query. """ - builtin = self.builtin.get(name) - creator = self._create.get(name) - extender = self._extend.get(name) + builtin = self.builtin.get(table_name) + creator = self._create.get(table_name) + extender = self._extend.get(table_name) if builtin and creator: - fail(f"Pre-defined table {name} can't be created from config") + fail(f"Pre-defined table {table_name} can't be created from config") if builtin: return TableFromCode(builtin, extender) if creator: - return TableFromConfig(name, self.name, creator, extender) + return TableFromConfig(table_name, self.name, creator, extender) if not missing_ok: - fail(f"Table '{name}' is not defined in schema {self.name}") + fail(f"Table '{table_name}' is not defined in schema {self.name}") def all_table_names(self): + """Return all the table names that are built in or defined in user configs.""" return set(chain(self.builtin.keys(), self._create.keys(), self._extend.keys())) def resource_def(self, table: Table) -> set[ResourceDef]: From 5c9f377f246ecc9772ff3d58e8d7bd1398687f07 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Thu, 14 May 2026 19:50:09 -0400 Subject: [PATCH 03/35] Rename resource type -> resource family --- kugl/api.py | 6 +++--- kugl/builtins/resources.py | 10 +++++----- kugl/builtins/schemas/kubernetes.py | 4 ++-- kugl/impl/registry.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kugl/api.py b/kugl/api.py index eed1cc5..7aad740 100644 --- a/kugl/api.py +++ b/kugl/api.py @@ -17,7 +17,7 @@ ) __all__ = [ - "resource", + "resource_family", "table", "column", "Resource", @@ -30,9 +30,9 @@ ] -def resource(type: str, schema_defaults: list[str] = []): +def resource_family(name: str, schema_defaults: list[str] = []): def wrap(cls): - _Registry.get().add_resource(cls, type, schema_defaults) + _Registry.get().add_resource_family(cls, name, schema_defaults) return cls return wrap diff --git a/kugl/builtins/resources.py b/kugl/builtins/resources.py index 7287c0b..def31f8 100644 --- a/kugl/builtins/resources.py +++ b/kugl/builtins/resources.py @@ -6,7 +6,7 @@ from pydantic import model_validator -from kugl.api import resource, fail, run, Resource +from kugl.api import resource_family, fail, run, Resource from kugl.util import best_guess_parse, KPath, debugging @@ -21,7 +21,7 @@ def set_cacheable(cls, resource: "NonCacheableResource") -> "NonCacheableResourc return resource -@resource("data") +@resource_family("data") class DataResource(NonCacheableResource): """A resource whose data is provided directly in the configuration file.""" @@ -31,7 +31,7 @@ def get_objects(self): return self.data -@resource("file") +@resource_family("file") class FileResource(NonCacheableResource): """A resource that reads a file from disk. @@ -51,7 +51,7 @@ def get_objects(self): fail(f"failed to read {self.file} in resource {self.name}", e) -@resource("folder") +@resource_family("folder") class FolderResource(NonCacheableResource): """A resource that reads selectively from a folder tree. @@ -95,7 +95,7 @@ def get_objects(self): return result -@resource("exec") +@resource_family("exec") class ExecResource(Resource): exec: Union[str, list[str]] cache_key: Optional[str] = None diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index 1ac32f6..ce62f3d 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -14,11 +14,11 @@ from pydantic import model_validator from ..helpers import Limits, ItemHelper, PodHelper, JobHelper, CronJobHelper -from kugl.api import table, fail, resource, run, parse_utc, Resource, column +from kugl.api import table, fail, resource_family, run, parse_utc, Resource, column from kugl.util import WHITESPACE_RE, kube_context -@resource("kubernetes", schema_defaults=["kubernetes"]) +@resource_family("kubernetes", schema_defaults=["kubernetes"]) class KubernetesResource(Resource): namespaced: bool _all_ns: bool diff --git a/kugl/impl/registry.py b/kugl/impl/registry.py index 4ebb69b..c1f7344 100644 --- a/kugl/impl/registry.py +++ b/kugl/impl/registry.py @@ -52,9 +52,9 @@ def add_table(self, cls: type, **kwargs): t = TableDef(cls=cls, **kwargs) self.get_schema(t.schema_name).builtin[t.name] = t - def add_resource(self, cls: type, family: str, schema_defaults: list[str]): + def add_resource_family(self, cls: type, family: str, schema_defaults: list[str]): """ - Register a resource type. This is called by the @resource decorator. + This is called by the @resource_family decorator. :param cls: The class to register :param family: e.g. "file", "kubernetes", "aws" From 9554829a5f6350ff26ff97d70440a1ea66ad4a1e Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Thu, 14 May 2026 20:00:04 -0400 Subject: [PATCH 04/35] Revert "Rename resource type -> resource family" This reverts commit 5c9f377f246ecc9772ff3d58e8d7bd1398687f07. --- kugl/api.py | 6 +++--- kugl/builtins/resources.py | 10 +++++----- kugl/builtins/schemas/kubernetes.py | 4 ++-- kugl/impl/registry.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/kugl/api.py b/kugl/api.py index 7aad740..eed1cc5 100644 --- a/kugl/api.py +++ b/kugl/api.py @@ -17,7 +17,7 @@ ) __all__ = [ - "resource_family", + "resource", "table", "column", "Resource", @@ -30,9 +30,9 @@ ] -def resource_family(name: str, schema_defaults: list[str] = []): +def resource(type: str, schema_defaults: list[str] = []): def wrap(cls): - _Registry.get().add_resource_family(cls, name, schema_defaults) + _Registry.get().add_resource(cls, type, schema_defaults) return cls return wrap diff --git a/kugl/builtins/resources.py b/kugl/builtins/resources.py index def31f8..7287c0b 100644 --- a/kugl/builtins/resources.py +++ b/kugl/builtins/resources.py @@ -6,7 +6,7 @@ from pydantic import model_validator -from kugl.api import resource_family, fail, run, Resource +from kugl.api import resource, fail, run, Resource from kugl.util import best_guess_parse, KPath, debugging @@ -21,7 +21,7 @@ def set_cacheable(cls, resource: "NonCacheableResource") -> "NonCacheableResourc return resource -@resource_family("data") +@resource("data") class DataResource(NonCacheableResource): """A resource whose data is provided directly in the configuration file.""" @@ -31,7 +31,7 @@ def get_objects(self): return self.data -@resource_family("file") +@resource("file") class FileResource(NonCacheableResource): """A resource that reads a file from disk. @@ -51,7 +51,7 @@ def get_objects(self): fail(f"failed to read {self.file} in resource {self.name}", e) -@resource_family("folder") +@resource("folder") class FolderResource(NonCacheableResource): """A resource that reads selectively from a folder tree. @@ -95,7 +95,7 @@ def get_objects(self): return result -@resource_family("exec") +@resource("exec") class ExecResource(Resource): exec: Union[str, list[str]] cache_key: Optional[str] = None diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index ce62f3d..1ac32f6 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -14,11 +14,11 @@ from pydantic import model_validator from ..helpers import Limits, ItemHelper, PodHelper, JobHelper, CronJobHelper -from kugl.api import table, fail, resource_family, run, parse_utc, Resource, column +from kugl.api import table, fail, resource, run, parse_utc, Resource, column from kugl.util import WHITESPACE_RE, kube_context -@resource_family("kubernetes", schema_defaults=["kubernetes"]) +@resource("kubernetes", schema_defaults=["kubernetes"]) class KubernetesResource(Resource): namespaced: bool _all_ns: bool diff --git a/kugl/impl/registry.py b/kugl/impl/registry.py index c1f7344..4ebb69b 100644 --- a/kugl/impl/registry.py +++ b/kugl/impl/registry.py @@ -52,9 +52,9 @@ def add_table(self, cls: type, **kwargs): t = TableDef(cls=cls, **kwargs) self.get_schema(t.schema_name).builtin[t.name] = t - def add_resource_family(self, cls: type, family: str, schema_defaults: list[str]): + def add_resource(self, cls: type, family: str, schema_defaults: list[str]): """ - This is called by the @resource_family decorator. + Register a resource type. This is called by the @resource decorator. :param cls: The class to register :param family: e.g. "file", "kubernetes", "aws" From 48d08f6094470f8eebe42250ffc498c76073b88c Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Thu, 14 May 2026 20:04:00 -0400 Subject: [PATCH 05/35] Clarify when something is a resource type vs a resource --- kugl/api.py | 4 ++-- kugl/builtins/resources.py | 10 +++++----- kugl/builtins/schemas/kubernetes.py | 4 ++-- kugl/impl/registry.py | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/kugl/api.py b/kugl/api.py index eed1cc5..47eb62a 100644 --- a/kugl/api.py +++ b/kugl/api.py @@ -17,7 +17,7 @@ ) __all__ = [ - "resource", + "resource_type", "table", "column", "Resource", @@ -30,7 +30,7 @@ ] -def resource(type: str, schema_defaults: list[str] = []): +def resource_type(type: str, schema_defaults: list[str] = []): def wrap(cls): _Registry.get().add_resource(cls, type, schema_defaults) return cls diff --git a/kugl/builtins/resources.py b/kugl/builtins/resources.py index 7287c0b..2e81b19 100644 --- a/kugl/builtins/resources.py +++ b/kugl/builtins/resources.py @@ -6,7 +6,7 @@ from pydantic import model_validator -from kugl.api import resource, fail, run, Resource +from kugl.api import resource_type, fail, run, Resource from kugl.util import best_guess_parse, KPath, debugging @@ -21,7 +21,7 @@ def set_cacheable(cls, resource: "NonCacheableResource") -> "NonCacheableResourc return resource -@resource("data") +@resource_type("data") class DataResource(NonCacheableResource): """A resource whose data is provided directly in the configuration file.""" @@ -31,7 +31,7 @@ def get_objects(self): return self.data -@resource("file") +@resource_type("file") class FileResource(NonCacheableResource): """A resource that reads a file from disk. @@ -51,7 +51,7 @@ def get_objects(self): fail(f"failed to read {self.file} in resource {self.name}", e) -@resource("folder") +@resource_type("folder") class FolderResource(NonCacheableResource): """A resource that reads selectively from a folder tree. @@ -95,7 +95,7 @@ def get_objects(self): return result -@resource("exec") +@resource_type("exec") class ExecResource(Resource): exec: Union[str, list[str]] cache_key: Optional[str] = None diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index 1ac32f6..8ad9b18 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -14,11 +14,11 @@ from pydantic import model_validator from ..helpers import Limits, ItemHelper, PodHelper, JobHelper, CronJobHelper -from kugl.api import table, fail, resource, run, parse_utc, Resource, column +from kugl.api import table, fail, resource_type, run, parse_utc, Resource, column from kugl.util import WHITESPACE_RE, kube_context -@resource("kubernetes", schema_defaults=["kubernetes"]) +@resource_type("kubernetes", schema_defaults=["kubernetes"]) class KubernetesResource(Resource): namespaced: bool _all_ns: bool diff --git a/kugl/impl/registry.py b/kugl/impl/registry.py index 4ebb69b..1f5a89e 100644 --- a/kugl/impl/registry.py +++ b/kugl/impl/registry.py @@ -54,7 +54,7 @@ def add_table(self, cls: type, **kwargs): def add_resource(self, cls: type, family: str, schema_defaults: list[str]): """ - Register a resource type. This is called by the @resource decorator. + Register a resource type. This is called by the @resource_type decorator. :param cls: The class to register :param family: e.g. "file", "kubernetes", "aws" From 61a709a48eedbf9f0b7f27781469e559c6536b47 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Fri, 15 May 2026 08:57:48 -0400 Subject: [PATCH 06/35] Add CLAUDE.md --- CLAUDE.md | 227 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..e0f39ee --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,227 @@ +# Kugl — Claude Seed Context + +> **Note to Claude:** Please keep this file current as the project evolves. + +Kugl lets you query Kubernetes (and other) resources using SQL. It fetches JSON via `kubectl get` +(or other sources), loads it into an in-memory SQLite database, runs the query, and formats results. + +## Package Layout + +``` +kugl/ + api.py # Public decorators: @table, @resource_type, @column + main.py # CLI entry point + impl/ + engine.py # Engine (query execution), DataCache, ResourceRef + registry.py # Registry (singleton), Schema, Resource base class + tables.py # Table, TableFromCode, TableFromConfig, TableDef, RowContext, Itemizer + config.py # Pydantic models: Settings, UserConfig, Column, UserColumn, + # ResourceDef, CreateTable, ExtendTable, Shortcut + extract.py # PathExtractor, LabelExtractor, Extractor base, FieldRef, type maps + builtins/ + resources.py # Built-in resource families (kubernetes, file, folder, exec, data) + schemas/ + kubernetes.py # Built-in @table classes for pods, nodes, jobs, etc. + util/ # Helpers: Age, KPath, SqliteDb, Query, clock, debugging, etc. +tests/ + k8s/ # Kubernetes table tests + config/ # Config parsing and merge tests + resource/ # Per-resource-type tests (cache, exec, file, folder, etc.) +docs/ # RST documentation (syntax, builtins, extending, resources, multi, settings) +kugl/builtins/schemas/ # Built-in YAML schema configs (kubernetes.yaml, etc.) +``` + +## Core Concepts + +### Schema +A named group of tables and resources. The default schema is `"kubernetes"`. Multi-schema queries +attach each schema as a separate in-memory SQLite database and require explicit `schema.table` +qualification in SQL. + +### Resource (family) +Where data comes from. The **resource family** is the type (e.g., `kubernetes`, `file`, `folder`, +`exec`, `data`); a **resource** is a specific instance with a name and family-specific config. + +Built-in families: +- `kubernetes` — runs `kubectl get -o json`; supports `-n`/`-a` namespace flags +- `file` — reads a local file (YAML or JSON); `file: ~/.kube/config` +- `folder` — globs files in a tree, presents each as `{match: {...}, content: {...}}` +- `exec` — runs any shell command producing JSON/YAML; optionally cacheable with `cache_key` +- `data` — static inline data + +### Registry (singleton) +`Registry.get()` is the process-wide singleton. It maps: +- resource family name → Resource subclass (`resources_by_family`) +- schema name → default Resource subclass (`resources_by_schema`) +- schema name → `Schema` object (`schemas`) + +Populated at import time by `@table` and `@resource_type` decorators. + +### Schema object +`Schema` holds: +- `builtin`: `{name: TableDef}` — tables defined in Python via `@table` +- `_create`: tables defined in user config `create:` sections +- `_extend`: column extensions from `extend:` sections +- `_resources`: resource instances from config `resources:` sections + +`Schema.read_configs()` merges config files from (in order): builtin schemas package, +any `init_path` folders, then `~/.kugl/`. + +### Table hierarchy +- `Table` — base; holds column lists, implements `build()` (CREATE TABLE + INSERT) +- `TableFromCode` — wraps a `@table`-decorated class; delegates `make_rows()` to it +- `TableFromConfig` — built from a `create:` config block; uses `Itemizer` for row generation + +### ResourceRef +A `(schema, resource)` pair used as a hashable set member for cache tracking. +Name property is `"schema_name.resource_name"`. + +### Engine +`Engine.query()` orchestrates: +1. Identify schemas from the SQL query; attach them as SQLite databases +2. `Schema.read_configs()` for each schema +3. Build `Table` and `ResourceRef` objects for each named table +4. `DataCache.advise_refresh()` to decide what to fetch vs read from cache +5. Parallel fetch using ThreadPoolExecutor +6. `Table.build()` to CREATE TABLE and INSERT rows +7. Execute the SQL query and return rows + column names + +### DataCache +Stores JSON responses under `~/.kugl/cache//.json`. +Cache age is based on file mtime. Three cache flags: +- `ALWAYS_UPDATE` (`-u`) — fetch everything, no stale warning +- `CHECK` (default) — fetch expired/missing, warn about stale data +- `NEVER_UPDATE` (`-c`) — only fetch missing, never update existing cache + +## Config Files + +### `~/.kugl/init.yaml` +Top-level settings and shortcuts. Only this file may contain `settings:`. + +```yaml +settings: + cache_timeout: 5m # default 2m + reckless: true # suppress stale-data warnings + init_path: # extra config folders, applied before ~/.kugl/ + - ~/team-kugl + +shortcuts: + - name: mypods + args: ["select name, status from pods where namespace = 'default'"] +``` + +### `~/.kugl/.yaml` (e.g. `kubernetes.yaml`) +Defines resources and tables for a schema. + +```yaml +resources: + - name: workflows # resource name used by create: tables + namespaced: true # for kubernetes family; default true + +create: + - table: workflows + resource: workflows + row_source: # default is ["items"] + - items + columns: + - name: name + path: metadata.name + - name: status + label: workflows.argoproj.io/phase + +extend: + - table: pods + columns: + - name: owner + type: text + label: com.mycompany/owner + comment: ML team owner +``` + +## Column Extractors + +Two extractor types, specified by keyword in a column definition: + +**`path:`** — JMESPath expression into the current row item +**`label:`** — shortcut to `metadata.labels`; can be a list to try in order + +**Parent navigation** — prefix `^` chars to walk up the `row_source` chain: +- `^metadata.uid` means `.metadata.uid` one level above the current item +- `^^foo` means two levels up + +## Column Types + +Kugl types (used in config `type:`) → SQLite storage type: + +| Kugl type | SQLite | Accepts | +|-----------|--------|---------| +| `text` | TEXT | strings | +| `integer` | INTEGER | ints | +| `real` | REAL | floats | +| `size` | INTEGER | `50Mi`, bytes | +| `age` | INTEGER | `5d`, `4h30m`, seconds | +| `cpu` | REAL | `0.5`, `300m` | +| `date` | INTEGER | `2021-01-01T12:34:56Z`, epoch secs | + +Built-in SQL functions: `now()`, `to_utc(ts)`, `to_age(secs)`, `to_size(bytes)` + +## row_source + +Multi-step JMESPath iteration for generating multiple rows per API response item. + +```yaml +row_source: + - items # step 1: each element of the top-level items array + - spec.taints # step 2: each taint within each node +``` + +- Each step applies to results of the prior step +- Parent/child relationships are tracked for `^` path navigation +- Dict sources can be unpacked to key/value pairs with `; kv` suffix: `- env; kv` +- Default `row_source` is `["items"]` + +## Decorators (kugl/api.py) + +```python +from kugl.api import resource_type, table, column, Resource + +@resource_type(type="myfamily", schema_defaults=["myschema"]) +class MyResource(Resource): + def get_objects(self): ... + def cache_path(self): ... + +@table(schema="kubernetes", name="pods", resource="pods") +class PodsTable: + def columns(self) -> list[Column]: ... + def make_rows(self, context: RowContext) -> list[tuple[dict, tuple]]: ... +``` + +`make_rows` returns `[(item_dict, row_tuple), ...]` where `row_tuple` contains one value per +builtin column (non-builtin/extension columns are appended by `Table.build()`). + +## Multi-Schema Queries + +When a query references `schema.table`, each schema gets an `ATTACH DATABASE ':memory:' AS schema` +and all table names must be fully qualified. + +```sql +SELECT k.name, e.zone +FROM kubernetes.nodes k +JOIN ec2.instances e ON k.name = e.hostname +``` + +The `ec2` schema is defined in `~/.kugl/ec2.yaml` with an `exec:` resource. + +## Debugging + +Set `KUGL_DEBUG` env var to a comma-separated list of topics: +- `cache` — cache hit/miss decisions +- `extract` — column value extraction +- `itemize` — row_source iteration steps + +## Testing Notes + +- The Registry is a process singleton; tests use `Schema.read_configs()` to reset non-builtin state +- `clock.CLOCK` is patched in tests to control time (for cache age calculations) +- Tests use actual in-memory SQLite; no mocking of the DB layer +- Kubernetes tests mock `kubectl` via fixtures in `tests/k8s/k8s_mocks.py` From 8e75b7a2e0d7f6aff7b71ea9583e82ed545d8e70 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Fri, 15 May 2026 17:52:21 -0400 Subject: [PATCH 07/35] Tweaks --- docs-tmp/extending.md | 2 +- kugl/impl/registry.py | 4 ++-- kugl/impl/tables.py | 19 +++++++------------ 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/docs-tmp/extending.md b/docs-tmp/extending.md index db879a2..c13df2f 100644 --- a/docs-tmp/extending.md +++ b/docs-tmp/extending.md @@ -251,7 +251,7 @@ columns: ## Tips If creating multiple tables from a resource, you should use the `uid` column (sourced from `metadata.uid`) -as a join key, since this is a guaranteed unique key. +as a join key, since this is a guaranteed unique key. (Example: `nodes` and `node_labels`, `pods` and `pod_labels`.) The `utils:` section of `~/.kugl/init.yaml` is ignored during configuration parsing, so you can use it to store reusable bits of YAML. \ No newline at end of file diff --git a/kugl/impl/registry.py b/kugl/impl/registry.py index 1f5a89e..e2fdd59 100644 --- a/kugl/impl/registry.py +++ b/kugl/impl/registry.py @@ -87,7 +87,7 @@ def get_resource_by_family(self, family: str, error_ok: bool = False) -> Type: fail(f"Resource family {family} is not registered") return impl - def get_resource_by_schema(self, schema_name: str) -> Type: + def get_schema_default_resource(self, schema_name: str) -> Type: return self.resources_by_schema.get(schema_name) def augment_cli(self, ap: ArgumentParser): @@ -225,7 +225,7 @@ def _find_resource(self, r: ResourceDef) -> Resource: if family in fields: return parse_model(rgy.get_resource_by_family(family), fields) # If no family is specified, the schema may have a default one - if impl := rgy.get_resource_by_schema(self.name): + if impl := rgy.get_schema_default_resource(self.name): return parse_model(impl, fields) fail( f"can't infer type of resource '{r.name}' -- need one of 'file', 'data', 'namespaced' etc" diff --git a/kugl/impl/tables.py b/kugl/impl/tables.py index b7099ad..cbd578e 100644 --- a/kugl/impl/tables.py +++ b/kugl/impl/tables.py @@ -38,7 +38,7 @@ def __init__( schema_name, resource: str, builtin_columns: list[Column], - non_builtin_columns: list[UserColumn], + added_columns: list[UserColumn], ): """ :param name: table name, e.g. "pods" @@ -49,7 +49,7 @@ def __init__( self.schema_name = schema_name self.resource = resource self.builtin_columns = builtin_columns - self.non_builtin_columns = non_builtin_columns + self.added_columns = added_columns def build(self, db, raw_data: dict, multi_schema: bool): """Create the table in SQLite and insert the data. @@ -60,15 +60,15 @@ def build(self, db, raw_data: dict, multi_schema: bool): """ context = RowContext(raw_data) table_name = f"{self.schema_name}.{self.name}" if multi_schema else self.name - all_columns = self.builtin_columns + self.non_builtin_columns + all_columns = self.builtin_columns + self.added_columns db.execute( f"""CREATE TABLE {table_name} ({", ".join(f"{c.name} {c._sqltype}" for c in all_columns)})""" ) item_rows = list(self.make_rows(context)) if item_rows: - if self.non_builtin_columns: + if self.added_columns: extend_row = lambda item, row: row + tuple( - column.extract(item, context) for column in self.non_builtin_columns + column.extract(item, context) for column in self.added_columns ) else: extend_row = lambda item, row: row @@ -79,7 +79,7 @@ def build(self, db, raw_data: dict, multi_schema: bool): def printable_schema(self): rows = [ (c.name, c._sqltype, c.comment or "") - for c in self.builtin_columns + self.non_builtin_columns + for c in self.builtin_columns + self.added_columns ] return f"## {self.name}\n" + tabulate(rows, tablefmt="plain") @@ -163,7 +163,7 @@ def _itemize(self, context: "RowContext") -> list[dict]: for child in found: if index > 0: # Fix #132 -- don't do this at pass 0, or it sets the parent to the entire - # response object, breaking self.get_root() + # response object. context.set_parent(child, item) new_items.append(child) if debug: @@ -200,11 +200,6 @@ def get_parent(self, child, depth: int = 1): depth -= 1 return child - def get_root(self, child): - while (parent := self._parents.get(id(child))) is not None: - child = parent - return child - @dataclass class Itemizer: From 66253d1cef3903f844940c0da0a7f82a585bbfa3 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Fri, 15 May 2026 18:08:11 -0400 Subject: [PATCH 08/35] Add row_source unit tests for multi-step, kv+parent-nav, and double-parent scenarios Co-Authored-By: Claude Sonnet 4.6 (1M context) --- tests/resource/test_row_source.py | 135 ++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/tests/resource/test_row_source.py b/tests/resource/test_row_source.py index 5d99619..50c1667 100644 --- a/tests/resource/test_row_source.py +++ b/tests/resource/test_row_source.py @@ -2,6 +2,8 @@ Unit tests for row_source errors and special cases. """ +import json + import pytest from kugl.util import KuglError, kugl_home @@ -35,6 +37,139 @@ def test_too_many_parents(test_home): assert_query("SELECT * FROM things", "") +_MULTI_STEP_CONFIG = """ + resources: + - name: things + data: + items: {items} + create: + - table: things + resource: things + row_source: + - items + - children + columns: + - name: parent_id + path: ^parent + - name: val + path: val +""" + +@pytest.mark.parametrize("items,expected", [ + pytest.param( + [ + {"parent": "p1", "children": [{"val": "a"}, {"val": "b"}]}, + {"parent": "p2", "children": [{"val": "c"}]}, + ], + """ + parent_id val + p1 a + p1 b + p2 c + """, + id="normal", + ), + pytest.param( + [ + {"parent": "p1", "children": [{"val": "a"}]}, + {"parent": "p2", "children": []}, + ], + """ + parent_id val + p1 a + """, + id="empty_sublist", + ), +]) +def test_multi_step_row_source(test_home, items, expected): + """Multi-step row_source with ^ parent navigation; also checks empty sublists produce no rows.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text( + _MULTI_STEP_CONFIG.format(items=json.dumps(items)) + ) + assert_query("SELECT * FROM things ORDER BY parent_id, val", expected) + + +def test_kv_with_parent_nav(test_home): + """'; kv' expansion combined with ^ to reference a field from the parent item.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - service: svc-a + env: + FOO: bar + BAZ: glig + - service: svc-b + env: + QUX: quux + create: + - table: things + resource: things + row_source: + - items + - env; kv + columns: + - name: service + path: ^service + - name: key + path: key + - name: value + path: value + """) + assert_query( + "SELECT * FROM things ORDER BY service, key", + """ + service key value + svc-a BAZ glig + svc-a FOO bar + svc-b QUX quux + """, + ) + + +def test_double_parent_nav(test_home): + """^^ navigates two levels up through a three-step row_source chain.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - section: sec-a + groups: + - grp: grp-1 + tags: + - tag: t1 + - tag: t2 + - grp: grp-2 + tags: + - tag: t3 + create: + - table: things + resource: things + row_source: + - items + - groups + - tags + columns: + - name: section + path: ^^section + - name: grp + path: ^grp + - name: tag + path: tag + """) + assert_query( + "SELECT * FROM things ORDER BY section, grp, tag", + """ + section grp tag + sec-a grp-1 t1 + sec-a grp-1 t2 + sec-a grp-2 t3 + """, + ) + + def test_data_dict_expansion(test_home): """Verify the behavior of the '; kv' option in row_source""" kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" From 2ad9bc6a37088b01aa697e5d62a929758a2a2026 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Fri, 15 May 2026 18:08:19 -0400 Subject: [PATCH 09/35] Doc tweak --- docs/extending.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/extending.rst b/docs/extending.rst index a7580af..d93c4a8 100644 --- a/docs/extending.rst +++ b/docs/extending.rst @@ -302,6 +302,7 @@ Tips If creating multiple tables from a resource, you should use the ``uid`` column (sourced from ``metadata.uid``) as a join key, since this is a guaranteed unique key. +(Example: `nodes` and `node_labels`, `pods` and `pod_labels`.) The ``utils:`` section of ``~/.kugl/init.yaml`` is ignored during configuration parsing, so you can use it to store reusable bits of YAML. From 00e8a1d0ffea81e5e6c4b64c5ceb9ba4f786e381 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Fri, 15 May 2026 18:09:27 -0400 Subject: [PATCH 10/35] Document how to run tests correctly in CLAUDE.md Co-Authored-By: Claude Sonnet 4.6 (1M context) --- CLAUDE.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/CLAUDE.md b/CLAUDE.md index e0f39ee..f3e4fce 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -225,3 +225,14 @@ Set `KUGL_DEBUG` env var to a comma-separated list of topics: - `clock.CLOCK` is patched in tests to control time (for cache age calculations) - Tests use actual in-memory SQLite; no mocking of the DB layer - Kubernetes tests mock `kubectl` via fixtures in `tests/k8s/k8s_mocks.py` + +## Running Tests + +```bash +uv run pytest tests/ # full suite +uv run pytest tests/ -k foo # filter by name +``` + +**Important:** always run from the project root with `tests/` as the target, not an individual file. +The Registry is populated by decorator side-effects at import time; running a single test module in +isolation skips those imports and causes "Resource family X is not registered" errors. From 8f53efed55a611b616e3244fa5b059aea730cf45 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Fri, 15 May 2026 18:49:57 -0400 Subject: [PATCH 11/35] Add services and deployments as built-in Kubernetes tables Includes service_labels and deployment_labels, test fixtures and tests, kubectl mock updates, and docs/builtins.rst entries. Co-Authored-By: Claude Sonnet 4.6 --- docs/builtins.rst | 88 +++++++++++++++++++++++++++ kugl/builtins/schemas/kubernetes.py | 82 +++++++++++++++++++++++++ kugl/builtins/schemas/kubernetes.yaml | 4 ++ tests/k8s/k8s_mocks.py | 68 +++++++++++++++++++++ tests/k8s/kubectl | 2 +- tests/k8s/test_deployments.py | 50 +++++++++++++++ tests/k8s/test_services.py | 52 ++++++++++++++++ tests/static/sample_deployment.yaml | 28 +++++++++ tests/static/sample_service.yaml | 19 ++++++ 9 files changed, 392 insertions(+), 1 deletion(-) create mode 100644 tests/k8s/test_deployments.py create mode 100644 tests/k8s/test_services.py create mode 100644 tests/static/sample_deployment.yaml create mode 100644 tests/static/sample_service.yaml diff --git a/docs/builtins.rst b/docs/builtins.rst index 09119cd..6ed07cf 100644 --- a/docs/builtins.rst +++ b/docs/builtins.rst @@ -229,6 +229,94 @@ Built from ``kubectl get nodes``, one row per taint | | | entry in ``spec.taints`` | +--------------------+------+------------------------------------------+ +services +~~~~~~~~ + +Built from ``kubectl get services``, one row per service. + ++-------------+---------+--------------------------------------------------+ +| Column | Type | Description | ++=============+=========+==================================================+ +| name | TEXT | Service name, from ``metadata.name`` | ++-------------+---------+--------------------------------------------------+ +| uid | TEXT | Service UID, from ``metadata.uid`` | ++-------------+---------+--------------------------------------------------+ +| namespace | TEXT | Service namespace, from ``metadata.namespace`` | ++-------------+---------+--------------------------------------------------+ +| type | TEXT | Service type: ``ClusterIP``, ``NodePort``, | +| | | ``LoadBalancer``, or ``ExternalName`` | ++-------------+---------+--------------------------------------------------+ +| cluster_ip | TEXT | Cluster IP, from ``spec.clusterIP``; null for | +| | | headless services and ``ExternalName`` type | ++-------------+---------+--------------------------------------------------+ +| external_ip | TEXT | External IP or hostname for ``LoadBalancer`` | +| | | services, from ``status.loadBalancer.ingress``; | +| | | null otherwise | ++-------------+---------+--------------------------------------------------+ +| creation_ts | INTEGER | Creation timestamp in epoch seconds, from | +| | | ``metadata.creationTimestamp`` | ++-------------+---------+--------------------------------------------------+ + +service_labels +~~~~~~~~~~~~~~ + +Built from ``kubectl get services``, one row per label. + ++--------------+------+--------------------------------------------------+ +| Column | Type | Description | ++==============+======+==================================================+ +| service_uid | TEXT | Service UID, from ``metadata.uid`` | ++--------------+------+--------------------------------------------------+ +| key, value | TEXT | Label key and value from each entry in | +| | | ``metadata.labels`` | ++--------------+------+--------------------------------------------------+ + +deployments +~~~~~~~~~~~ + +Built from ``kubectl get deployments``, one row per deployment. + ++-------------+---------+--------------------------------------------------+ +| Column | Type | Description | ++=============+=========+==================================================+ +| name | TEXT | Deployment name, from ``metadata.name`` | ++-------------+---------+--------------------------------------------------+ +| uid | TEXT | Deployment UID, from ``metadata.uid`` | ++-------------+---------+--------------------------------------------------+ +| namespace | TEXT | Deployment namespace, from | +| | | ``metadata.namespace`` | ++-------------+---------+--------------------------------------------------+ +| replicas | INTEGER | Desired replica count, from ``spec.replicas`` | ++-------------+---------+--------------------------------------------------+ +| ready | INTEGER | Ready replicas, from ``status.readyReplicas`` | ++-------------+---------+--------------------------------------------------+ +| available | INTEGER | Available replicas, from | +| | | ``status.availableReplicas`` | ++-------------+---------+--------------------------------------------------+ +| updated | INTEGER | Updated replicas, from | +| | | ``status.updatedReplicas`` | ++-------------+---------+--------------------------------------------------+ +| strategy | TEXT | Rollout strategy, from ``spec.strategy.type``; | +| | | ``RollingUpdate`` or ``Recreate`` | ++-------------+---------+--------------------------------------------------+ +| creation_ts | INTEGER | Creation timestamp in epoch seconds, from | +| | | ``metadata.creationTimestamp`` | ++-------------+---------+--------------------------------------------------+ + +deployment_labels +~~~~~~~~~~~~~~~~~ + +Built from ``kubectl get deployments``, one row per label. + ++----------------+------+--------------------------------------------------+ +| Column | Type | Description | ++================+======+==================================================+ +| deployment_uid | TEXT | Deployment UID, from ``metadata.uid`` | ++----------------+------+--------------------------------------------------+ +| key, value | TEXT | Label key and value from each entry in | +| | | ``metadata.labels`` | ++----------------+------+--------------------------------------------------+ + Built-in functions ------------------ diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index 8ad9b18..1dd5a36 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -311,3 +311,85 @@ def make_rows(self, context) -> list[tuple[dict, tuple]]: @table(schema="kubernetes", name="cronjob_labels", resource="cronjobs") class CronJobLabelsTable(LabelsTable): UID_FIELD = "cronjob_uid" + + +@table(schema="kubernetes", name="services", resource="services") +class ServicesTable: + _COLUMNS = [ + column("name", "TEXT", "service name, from metadata.name"), + column("uid", "TEXT", "service UID, from metadata.uid"), + column("namespace", "TEXT", "service namespace, from metadata.namespace"), + column("type", "TEXT", "service type: ClusterIP, NodePort, LoadBalancer, or ExternalName"), + column("cluster_ip", "TEXT", "cluster IP, or null for headless services"), + column("external_ip", "TEXT", "external IP or hostname for LoadBalancer services, or null"), + column("creation_ts", "INTEGER", "creation timestamp in epoch seconds, from metadata.creationTimestamp"), + ] + + def columns(self): + return self._COLUMNS + + def make_rows(self, context) -> list[tuple[dict, tuple]]: + for item in context.data["items"]: + svc = ItemHelper(item) + cluster_ip = item["spec"].get("clusterIP") + ingress = item.get("status", {}).get("loadBalancer", {}).get("ingress", []) + external_ip = (ingress[0].get("ip") or ingress[0].get("hostname")) if ingress else None + yield ( + item, + ( + svc.name, + svc.metadata.get("uid"), + svc.namespace, + item["spec"].get("type"), + None if cluster_ip == "None" else cluster_ip, + external_ip, + parse_utc(svc.metadata.get("creationTimestamp")), + ), + ) + + +@table(schema="kubernetes", name="service_labels", resource="services") +class ServiceLabelsTable(LabelsTable): + UID_FIELD = "service_uid" + + +@table(schema="kubernetes", name="deployments", resource="deployments") +class DeploymentsTable: + _COLUMNS = [ + column("name", "TEXT", "deployment name, from metadata.name"), + column("uid", "TEXT", "deployment UID, from metadata.uid"), + column("namespace", "TEXT", "deployment namespace, from metadata.namespace"), + column("replicas", "INTEGER", "desired replica count, from spec.replicas"), + column("ready", "INTEGER", "ready replicas, from status.readyReplicas"), + column("available", "INTEGER", "available replicas, from status.availableReplicas"), + column("updated", "INTEGER", "updated replicas, from status.updatedReplicas"), + column("strategy", "TEXT", "rollout strategy: RollingUpdate or Recreate, from spec.strategy.type"), + column("creation_ts", "INTEGER", "creation timestamp in epoch seconds, from metadata.creationTimestamp"), + ] + + def columns(self): + return self._COLUMNS + + def make_rows(self, context) -> list[tuple[dict, tuple]]: + for item in context.data["items"]: + deploy = ItemHelper(item) + status = item.get("status", {}) + yield ( + item, + ( + deploy.name, + deploy.metadata.get("uid"), + deploy.namespace, + item["spec"].get("replicas"), + status.get("readyReplicas"), + status.get("availableReplicas"), + status.get("updatedReplicas"), + item["spec"].get("strategy", {}).get("type"), + parse_utc(deploy.metadata.get("creationTimestamp")), + ), + ) + + +@table(schema="kubernetes", name="deployment_labels", resource="deployments") +class DeploymentLabelsTable(LabelsTable): + UID_FIELD = "deployment_uid" diff --git a/kugl/builtins/schemas/kubernetes.yaml b/kugl/builtins/schemas/kubernetes.yaml index ff49c86..36e5289 100644 --- a/kugl/builtins/schemas/kubernetes.yaml +++ b/kugl/builtins/schemas/kubernetes.yaml @@ -11,6 +11,10 @@ resources: namespaced: true - name: nodes namespaced: false + - name: services + namespaced: true + - name: deployments + namespaced: true # node_taints builtin table is done here because it doesn't have any special column extraction # logic, and it serves as a good unit test. diff --git a/tests/k8s/k8s_mocks.py b/tests/k8s/k8s_mocks.py index 0f6f614..80422e7 100644 --- a/tests/k8s/k8s_mocks.py +++ b/tests/k8s/k8s_mocks.py @@ -215,6 +215,74 @@ def make_cronjob( return obj +def make_service( + name: str, + namespace: str = None, + svc_type: str = "ClusterIP", + cluster_ip: str = "10.96.0.1", + external_ip: str = None, + labels: Optional[dict] = None, +): + """ + Construct a Service dict from a generic chunk of YAML that we can alter to simulate different + responses from the K8S API. + + :param name: Service name + :param svc_type: Service type (ClusterIP, NodePort, LoadBalancer, ExternalName) + :param cluster_ip: Cluster IP, or "None" for headless/ExternalName services + :param external_ip: External IP for LoadBalancer services + """ + obj = yaml.safe_load(_static_content("sample_service.yaml")) + obj["metadata"]["name"] = name + obj["metadata"]["uid"] = "uid-" + name + if namespace is not None: + obj["metadata"]["namespace"] = namespace + obj["spec"]["type"] = svc_type + obj["spec"]["clusterIP"] = cluster_ip + if external_ip is not None: + obj["status"]["loadBalancer"]["ingress"] = [{"ip": external_ip}] + if labels is not None: + obj["metadata"]["labels"] = labels + return obj + + +def make_deployment( + name: str, + namespace: str = None, + replicas: int = 3, + ready: int = None, + available: int = None, + updated: int = None, + strategy: str = "RollingUpdate", + labels: Optional[dict] = None, +): + """ + Construct a Deployment dict from a generic chunk of YAML that we can alter to simulate different + responses from the K8S API. + + :param name: Deployment name + :param replicas: Desired replica count + :param ready: Ready replicas (defaults to replicas) + :param available: Available replicas (defaults to replicas) + :param updated: Updated replicas (defaults to replicas) + :param strategy: Rollout strategy (RollingUpdate or Recreate) + """ + obj = yaml.safe_load(_static_content("sample_deployment.yaml")) + obj["metadata"]["name"] = name + obj["metadata"]["uid"] = "uid-" + name + if namespace is not None: + obj["metadata"]["namespace"] = namespace + obj["spec"]["replicas"] = replicas + obj["spec"]["strategy"]["type"] = strategy + obj["status"]["replicas"] = replicas + obj["status"]["readyReplicas"] = replicas if ready is None else ready + obj["status"]["availableReplicas"] = replicas if available is None else available + obj["status"]["updatedReplicas"] = replicas if updated is None else updated + if labels is not None: + obj["metadata"]["labels"] = labels + return obj + + @cache def _static_content(filename: str): return Path(__file__).parent.parent.joinpath("static", filename).read_text() diff --git a/tests/k8s/kubectl b/tests/k8s/kubectl index e64a05d..ae92e46 100755 --- a/tests/k8s/kubectl +++ b/tests/k8s/kubectl @@ -9,7 +9,7 @@ import re import sys args = " ".join(sys.argv[1:]) -if m := re.match("get (pods|jobs|cronjobs|things) (-n default|--all-namespaces) -o json", args): +if m := re.match("get (pods|jobs|cronjobs|services|deployments|things) (-n default|--all-namespaces) -o json", args): kind = m.group(1) elif re.match("get pods (-n default|--all-namespaces)", args): kind = "pod_statuses" diff --git a/tests/k8s/test_deployments.py b/tests/k8s/test_deployments.py new file mode 100644 index 0000000..3698699 --- /dev/null +++ b/tests/k8s/test_deployments.py @@ -0,0 +1,50 @@ +""" +Tests for the deployments table. +""" + +from .k8s_mocks import make_deployment, kubectl_response +from ..testing import assert_query + + +def test_deployment_replicas(test_home): + kubectl_response( + "deployments", + { + "items": [ + make_deployment("deploy-1"), + make_deployment("deploy-2", replicas=5, ready=3, available=3, updated=5), + make_deployment("deploy-3", replicas=2, strategy="Recreate"), + ] + }, + ) + assert_query( + "SELECT name, replicas, ready, available, updated, strategy FROM deployments ORDER BY 1", + """ + name replicas ready available updated strategy + deploy-1 3 3 3 3 RollingUpdate + deploy-2 5 3 3 5 RollingUpdate + deploy-3 2 2 2 2 Recreate + """, + ) + + +def test_deployment_labels(test_home): + kubectl_response( + "deployments", + { + "items": [ + make_deployment("deploy-1", labels=dict(app="web", env="prod")), + make_deployment("deploy-2", labels=dict(app="api")), + make_deployment("deploy-3", labels=dict()), + ] + }, + ) + assert_query( + "SELECT deployment_uid, key, value FROM deployment_labels ORDER BY 2, 3, 1", + """ + deployment_uid key value + uid-deploy-2 app api + uid-deploy-1 app web + uid-deploy-1 env prod + """, + ) diff --git a/tests/k8s/test_services.py b/tests/k8s/test_services.py new file mode 100644 index 0000000..1f2cdf8 --- /dev/null +++ b/tests/k8s/test_services.py @@ -0,0 +1,52 @@ +""" +Tests for the services table. +""" + +from .k8s_mocks import make_service, kubectl_response +from ..testing import assert_query + + +def test_service_types(test_home): + kubectl_response( + "services", + { + "items": [ + make_service("svc-1"), + make_service("svc-2", svc_type="NodePort", cluster_ip="10.96.0.2"), + make_service("svc-3", svc_type="LoadBalancer", cluster_ip="10.96.0.3", external_ip="203.0.113.5"), + make_service("svc-4", svc_type="ExternalName", cluster_ip="None"), + ] + }, + ) + assert_query( + "SELECT name, type, cluster_ip, external_ip FROM services ORDER BY 1", + """ + name type cluster_ip external_ip + svc-1 ClusterIP 10.96.0.1 + svc-2 NodePort 10.96.0.2 + svc-3 LoadBalancer 10.96.0.3 203.0.113.5 + svc-4 ExternalName + """, + ) + + +def test_service_labels(test_home): + kubectl_response( + "services", + { + "items": [ + make_service("svc-1", labels=dict(foo="bar")), + make_service("svc-2", labels=dict(a="b", c="d")), + make_service("svc-3", labels=dict()), + ] + }, + ) + assert_query( + "SELECT service_uid, key, value FROM service_labels ORDER BY 2, 1", + """ + service_uid key value + uid-svc-2 a b + uid-svc-2 c d + uid-svc-1 foo bar + """, + ) diff --git a/tests/static/sample_deployment.yaml b/tests/static/sample_deployment.yaml new file mode 100644 index 0000000..183aaea --- /dev/null +++ b/tests/static/sample_deployment.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: "2024-11-20T01:05:00Z" + name: example-deployment + namespace: example + uid: aaaabbbb-1234-5678-abcd-000000000002 + labels: {} +spec: + replicas: 3 + strategy: + type: RollingUpdate + selector: + matchLabels: + app: example + template: + metadata: + labels: + app: example + spec: + containers: + - name: main + image: example.com/app:latest +status: + replicas: 3 + readyReplicas: 3 + availableReplicas: 3 + updatedReplicas: 3 diff --git a/tests/static/sample_service.yaml b/tests/static/sample_service.yaml new file mode 100644 index 0000000..9b7c995 --- /dev/null +++ b/tests/static/sample_service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: "2024-11-20T01:05:00Z" + name: example-service + namespace: example + uid: aaaabbbb-1234-5678-abcd-000000000001 + labels: {} +spec: + clusterIP: 10.96.0.1 + type: ClusterIP + selector: + app: example + ports: + - port: 80 + targetPort: 8080 + protocol: TCP +status: + loadBalancer: {} From bb6397fc3182a781599b0002aac2ec651ac87d1a Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 09:11:23 -0400 Subject: [PATCH 12/35] Add events as a built-in Kubernetes table Co-Authored-By: Claude Sonnet 4.6 --- docs/builtins.rst | 44 +++++++++++++++++++++++ kugl/builtins/schemas/kubernetes.py | 41 ++++++++++++++++++++++ kugl/builtins/schemas/kubernetes.yaml | 2 ++ tests/k8s/k8s_mocks.py | 30 ++++++++++++++++ tests/k8s/kubectl | 2 +- tests/k8s/test_events.py | 50 +++++++++++++++++++++++++++ tests/static/sample_event.yaml | 20 +++++++++++ 7 files changed, 188 insertions(+), 1 deletion(-) create mode 100644 tests/k8s/test_events.py create mode 100644 tests/static/sample_event.yaml diff --git a/docs/builtins.rst b/docs/builtins.rst index 6ed07cf..839b5c2 100644 --- a/docs/builtins.rst +++ b/docs/builtins.rst @@ -317,6 +317,50 @@ Built from ``kubectl get deployments``, one row per label. | | | ``metadata.labels`` | +----------------+------+--------------------------------------------------+ +events +~~~~~~ + +Built from ``kubectl get events``, one row per event. Kubernetes deduplicates +repeated events, so ``count`` reflects how many times an event occurred rather +than the number of rows. Note that ``type`` and ``count`` conflict with SQL +keywords / aggregate function names and must be backtick-quoted in queries, +e.g. ``SELECT \`type\`, \`count\` FROM events``. + ++---------------+---------+------------------------------------------------------------+ +| Column | Type | Description | ++===============+=========+============================================================+ +| namespace | TEXT | Event namespace, from ``metadata.namespace`` | ++---------------+---------+------------------------------------------------------------+ +| type | TEXT | Event type: ``Normal`` or ``Warning``; backtick-quote | +| | | in SQL | ++---------------+---------+------------------------------------------------------------+ +| reason | TEXT | Short machine-readable reason, e.g. ``Scheduled``, | +| | | ``OOMKilling`` | ++---------------+---------+------------------------------------------------------------+ +| message | TEXT | Human-readable event description | ++---------------+---------+------------------------------------------------------------+ +| count | INTEGER | Number of times this event has occurred; backtick-quote | +| | | in SQL | ++---------------+---------+------------------------------------------------------------+ +| first_ts | INTEGER | First occurrence timestamp in epoch seconds, from | +| | | ``firstTimestamp`` | ++---------------+---------+------------------------------------------------------------+ +| last_ts | INTEGER | Last occurrence timestamp in epoch seconds, from | +| | | ``lastTimestamp`` | ++---------------+---------+------------------------------------------------------------+ +| obj_kind | TEXT | Involved object kind, from ``involvedObject.kind``, | +| | | e.g. ``Pod``, ``Node`` | ++---------------+---------+------------------------------------------------------------+ +| obj_name | TEXT | Involved object name, from ``involvedObject.name``; | +| | | primary join key to other tables | ++---------------+---------+------------------------------------------------------------+ +| obj_namespace | TEXT | Involved object namespace, from | +| | | ``involvedObject.namespace`` | ++---------------+---------+------------------------------------------------------------+ +| source | TEXT | Generating component, from ``source.component``, | +| | | e.g. ``kubelet``, ``default-scheduler`` | ++---------------+---------+------------------------------------------------------------+ + Built-in functions ------------------ diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index 1dd5a36..2c39369 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -393,3 +393,44 @@ def make_rows(self, context) -> list[tuple[dict, tuple]]: @table(schema="kubernetes", name="deployment_labels", resource="deployments") class DeploymentLabelsTable(LabelsTable): UID_FIELD = "deployment_uid" + + +@table(schema="kubernetes", name="events", resource="events") +class EventsTable: + _COLUMNS = [ + column("namespace", "TEXT", "event namespace, from metadata.namespace"), + column("type", "TEXT", "event type: Normal or Warning — quote with backticks in SQL"), + column("reason", "TEXT", "short machine-readable event reason"), + column("message", "TEXT", "human-readable event description"), + column("count", "INTEGER", "number of times this event has occurred — quote with backticks in SQL"), + column("first_ts", "INTEGER", "first occurrence timestamp in epoch seconds, from firstTimestamp"), + column("last_ts", "INTEGER", "last occurrence timestamp in epoch seconds, from lastTimestamp"), + column("obj_kind", "TEXT", "involved object kind, from involvedObject.kind"), + column("obj_name", "TEXT", "involved object name, from involvedObject.name"), + column("obj_namespace", "TEXT", "involved object namespace, from involvedObject.namespace"), + column("source", "TEXT", "component that generated the event, from source.component"), + ] + + def columns(self): + return self._COLUMNS + + def make_rows(self, context) -> list[tuple[dict, tuple]]: + for item in context.data["items"]: + event = ItemHelper(item) + obj = item.get("involvedObject", {}) + yield ( + item, + ( + event.namespace, + item.get("type"), + item.get("reason"), + item.get("message"), + item.get("count"), + parse_utc(item.get("firstTimestamp")), + parse_utc(item.get("lastTimestamp")), + obj.get("kind"), + obj.get("name"), + obj.get("namespace"), + item.get("source", {}).get("component"), + ), + ) diff --git a/kugl/builtins/schemas/kubernetes.yaml b/kugl/builtins/schemas/kubernetes.yaml index 36e5289..525798d 100644 --- a/kugl/builtins/schemas/kubernetes.yaml +++ b/kugl/builtins/schemas/kubernetes.yaml @@ -15,6 +15,8 @@ resources: namespaced: true - name: deployments namespaced: true + - name: events + namespaced: true # node_taints builtin table is done here because it doesn't have any special column extraction # logic, and it serves as a good unit test. diff --git a/tests/k8s/k8s_mocks.py b/tests/k8s/k8s_mocks.py index 80422e7..fdebf31 100644 --- a/tests/k8s/k8s_mocks.py +++ b/tests/k8s/k8s_mocks.py @@ -283,6 +283,36 @@ def make_deployment( return obj +def make_event( + name: str, + namespace: str = "default", + event_type: str = "Normal", + reason: str = "Scheduled", + message: str = "Event message", + count: int = 1, + first_ts: int = UNIT_TEST_TIMEBASE, + last_ts: int = UNIT_TEST_TIMEBASE, + obj_kind: str = "Pod", + obj_name: str = "example-pod", + obj_namespace: str = "default", + source: str = "default-scheduler", +): + obj = yaml.safe_load(_static_content("sample_event.yaml")) + obj["metadata"]["name"] = f"{obj_name}.{name}" + obj["metadata"]["namespace"] = namespace + obj["type"] = event_type + obj["reason"] = reason + obj["message"] = message + obj["count"] = count + obj["firstTimestamp"] = to_utc(first_ts) + obj["lastTimestamp"] = to_utc(last_ts) + obj["involvedObject"]["kind"] = obj_kind + obj["involvedObject"]["name"] = obj_name + obj["involvedObject"]["namespace"] = obj_namespace + obj["source"]["component"] = source + return obj + + @cache def _static_content(filename: str): return Path(__file__).parent.parent.joinpath("static", filename).read_text() diff --git a/tests/k8s/kubectl b/tests/k8s/kubectl index ae92e46..28900eb 100755 --- a/tests/k8s/kubectl +++ b/tests/k8s/kubectl @@ -9,7 +9,7 @@ import re import sys args = " ".join(sys.argv[1:]) -if m := re.match("get (pods|jobs|cronjobs|services|deployments|things) (-n default|--all-namespaces) -o json", args): +if m := re.match("get (pods|jobs|cronjobs|services|deployments|events|things) (-n default|--all-namespaces) -o json", args): kind = m.group(1) elif re.match("get pods (-n default|--all-namespaces)", args): kind = "pod_statuses" diff --git a/tests/k8s/test_events.py b/tests/k8s/test_events.py new file mode 100644 index 0000000..5efd18c --- /dev/null +++ b/tests/k8s/test_events.py @@ -0,0 +1,50 @@ +""" +Tests for the events table. +""" + +from kugl.util import UNIT_TEST_TIMEBASE +from .k8s_mocks import make_event, kubectl_response +from ..testing import assert_query + + +def test_event_columns(test_home): + kubectl_response( + "events", + { + "items": [ + make_event("ev1", event_type="Normal", reason="Scheduled", count=1, + obj_kind="Pod", obj_name="my-pod", source="default-scheduler"), + make_event("ev2", event_type="Warning", reason="OOMKilling", count=5, + obj_kind="Pod", obj_name="my-pod", source="kubelet"), + make_event("ev3", event_type="Warning", reason="Failed", count=3, + obj_kind="Node", obj_name="node-1", obj_namespace="", source="kubelet"), + ] + }, + ) + assert_query( + "SELECT namespace, `type`, reason, `count`, obj_kind, obj_name, source FROM events ORDER BY reason", + """ + namespace type reason count obj_kind obj_name source + default Warning Failed 3 Node node-1 kubelet + default Warning OOMKilling 5 Pod my-pod kubelet + default Normal Scheduled 1 Pod my-pod default-scheduler + """, + ) + + +def test_event_timestamps(test_home): + kubectl_response( + "events", + { + "items": [ + make_event("ev1", first_ts=UNIT_TEST_TIMEBASE, last_ts=UNIT_TEST_TIMEBASE + 300), + ] + }, + ) + assert_query( + "SELECT first_ts, last_ts, last_ts - first_ts AS elapsed FROM events", + f""" + first_ts last_ts elapsed + {UNIT_TEST_TIMEBASE} {UNIT_TEST_TIMEBASE + 300} 300 + """, + ) diff --git a/tests/static/sample_event.yaml b/tests/static/sample_event.yaml new file mode 100644 index 0000000..5c1981d --- /dev/null +++ b/tests/static/sample_event.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Event +metadata: + name: example-pod.17c8a9f0c0b23456 + namespace: default + uid: aaaabbbb-1234-5678-abcd-000000000099 +type: Normal +reason: Scheduled +message: Successfully assigned default/example-pod to node-1 +count: 1 +firstTimestamp: "2024-11-20T01:00:00Z" +lastTimestamp: "2024-11-20T01:00:00Z" +involvedObject: + kind: Pod + name: example-pod + namespace: default + uid: aaaabbbb-1234-5678-abcd-000000000001 +source: + component: default-scheduler + host: master-node From cc9f0388da0cc4da6d3175d34094fbd54f751797 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 09:27:30 -0400 Subject: [PATCH 13/35] Add some Claude discussions --- .claude/plans/row-source.md | 188 ++++++++++++++++++++++++++++++++++ .claude/plans/shortcomings.md | 113 ++++++++++++++++++++ 2 files changed, 301 insertions(+) create mode 100644 .claude/plans/row-source.md create mode 100644 .claude/plans/shortcomings.md diff --git a/.claude/plans/row-source.md b/.claude/plans/row-source.md new file mode 100644 index 0000000..36a802a --- /dev/null +++ b/.claude/plans/row-source.md @@ -0,0 +1,188 @@ +# Implementation Plan: Named Scopes + `from:` Unification + +Two related improvements to the YAML extension mechanism. They can be implemented +sequentially on one branch or separately; Phase 1 is a prerequisite for Phase 2's +scope-aware path resolution. + +--- + +## Phase 1: Named Scopes in `row_source` + +### Goal + +Replace the `^` parent-hop syntax with named scope references. Before: + +```yaml +create: + - table: node_taints + resource: nodes + row_source: + - items + - spec.taints + columns: + - name: node_uid + path: ^metadata.uid # ^ = "go up one level" +``` + +After: + +```yaml +create: + - table: node_taints + resource: nodes + row_source: + - items as node + - spec.taints + columns: + - name: node_uid + path: node.metadata.uid # named scope, no counting required +``` + +### Changes + +**`kugl/impl/tables.py` — `Itemizer`** + +- Parse `as ` suffix from row_source entries. `"items as node"` yields + `Itemizer(expr="items", name="node", finder=..., unpack=False)`. +- Store `name: Optional[str]` on the dataclass. + +**`kugl/impl/tables.py` — `RowContext`** + +- Add `_scopes: dict[int, dict[str, object]]`. Key is `id(child)`; value is the + map of scope names visible at that child's level. +- Update `set_parent` to also record named scopes: when a child is created from a + level that had a name, include that name → parent-object in the child's scope map, + merging with any scopes already inherited. +- Add `get_scope(child, name) -> Optional[object]` that walks up the scope chain + to find the named object. + +**`kugl/impl/tables.py` — `TableFromConfig._itemize`** + +- After calling `context.set_parent(child, item)`, also call a new + `context.set_scope(child, source.name, item)` when `source.name` is not None, + carrying forward all ancestor scopes so deeper levels can still reference `node`. + +**`kugl/impl/extract.py` — `FieldRef` / `PathExtractor` / `LabelExtractor`** + +- `FieldRef.parse` currently strips leading `^` characters. Extend it to also + detect a leading `.` prefix that could be a scope name. Store as + `scope_name: Optional[str]` and strip it from the target before JMESPath + compilation. +- In `PathExtractor.extract` and `LabelExtractor.extract`, when `self._ref.scope_name` + is set, resolve the object via `context.get_scope(obj, scope_name)` instead of + `context.get_parent(obj, n_parents)`. +- Keep `^` handling intact for backward compatibility. + +### Builtin Update + +`kugl/builtins/schemas/kubernetes.yaml` — convert `node_taints` to use named scopes +as a self-contained example: + +```yaml + row_source: + - items as node + - spec.taints + columns: + - name: node_uid + path: node.metadata.uid +``` + +### Tests + +- Update the existing `node_taints` test (wherever it lives) to verify the new + syntax produces the same output. +- Add a new test with three levels of nesting (e.g. `pod → container → env`) using + two named scopes, verifying that both ancestor levels are reachable by name. +- Add a test that `^` still works (backward compat). + +--- + +## Phase 2: `from:` Key Unification + +### Goal + +Replace the two-key `path:` / `label:` vocabulary with a single `from:` key that +auto-detects extraction type. Named scope prefixes compose naturally. + +```yaml + columns: + - name: node_pool + from: karpenter.sh/nodepool # auto-detected: label (contains / before first .) + - name: provider_id + from: spec.providerID # auto-detected: JMESPath + - name: pod_name + from: pod.metadata.name # named scope + JMESPath + - name: pod_pool + from: pod.karpenter.sh/nodepool # named scope + label +``` + +### Auto-Detection Rule + +After stripping any `.` prefix: + +- Matches `[a-zA-Z0-9.-]+/[a-zA-Z0-9._/-]+` (K8s label format: DNS domain + `/` + + key) → `LabelExtractor` +- Otherwise → `PathExtractor` + +A value like `metadata.labels.foo/bar` is a JMESPath, not a label — the `/` appears +inside a path segment, not as the label-domain separator. The regex above handles +this correctly because `metadata.labels.foo` is not a valid DNS domain segment. + +### Changes + +**`kugl/impl/config.py` — `UserColumn`** + +- Add `from_: Optional[str] = Field(None, alias="from")` (Pydantic alias needed + because `from` is a Python keyword). +- In `gen_extractor`, handle `from_` alongside `path` and `label`. + - If `from_` is set alongside `path` or `label`, raise `ValueError`. + - Parse any scope prefix from `from_`. + - Apply the label-vs-path regex to the remainder. + - Construct the appropriate extractor, passing the scope name through. +- Keep `path:` and `label:` fully supported so existing configs are not broken. + +**`kugl/impl/extract.py` — `FieldRef`** + +- Move the scope-prefix parsing here (it's already handling `^`); `gen_extractor` + delegates to `FieldRef.parse_from(s, known_scopes=None)`. +- Known scopes are not available at Pydantic parse time (they live in `CreateTable` + which is a sibling, not a parent). Two options: + - **Lazy validation**: accept any `.` prefix as a potential scope; fail at + table-build time in `TableFromConfig.__init__` if a referenced scope name is not + declared in `row_source`. + - **Two-pass**: `CreateTable` validates column scope references after parsing. + Lazy validation is simpler and consistent with how `path:` expressions are + currently validated (JMESPath compilation errors surface at parse time, but + missing-path errors surface at query time). + +### Tests + +- `from: karpenter.sh/nodepool` produces the same result as `label: karpenter.sh/nodepool`. +- `from: spec.providerID` produces the same result as `path: spec.providerID`. +- `from: node.metadata.name` with a named `node` scope resolves correctly. +- `from: node.karpenter.sh/nodepool` with a named `node` scope resolves as a label + on the node object. +- Error: `from:` and `path:` both specified → validation error. +- Error: `from: unknownscope.foo` where `unknownscope` is not in `row_source` → clear + error message at table-build time. + +--- + +## Files Touched + +| File | Change | +|---|---| +| `kugl/impl/extract.py` | `FieldRef.parse`: detect scope prefix; extractors: resolve via scope | +| `kugl/impl/tables.py` | `Itemizer`: parse `as `; `RowContext`: track named scopes | +| `kugl/impl/config.py` | `UserColumn`: add `from_` field and dispatch in `gen_extractor` | +| `kugl/builtins/schemas/kubernetes.yaml` | Convert `node_taints` to named scope syntax | +| `tests/` | Update node_taints test; add multi-level and `from:` tests | + +--- + +## Out of Scope + +- The broader resource-coverage gaps from `discuss.md` (deployments, containers table, + etc.) are separate work and should not be bundled here. +- `^` removal: keep the old syntax working indefinitely. No deprecation warning needed + until named scopes have been in a release. diff --git a/.claude/plans/shortcomings.md b/.claude/plans/shortcomings.md new file mode 100644 index 0000000..9cca333 --- /dev/null +++ b/.claude/plans/shortcomings.md @@ -0,0 +1,113 @@ +# Kugl Discussion Summary + +## What Kugl Is + +Kugl is a Python CLI tool that queries Kubernetes resources using SQL (SQLite). It runs `kubectl get` commands, caches the JSON output, and loads it into an in-memory SQLite database. Users write SQL queries directly on the command line or via saved shortcuts. + +Built-in tables: `pods`, `jobs`, `nodes`, `node_labels`, `pod_labels`, `job_labels`, `node_taints`. Resource types, namespaces, and cache TTL are controlled via CLI flags (`-a`, `-n`, `-u`, `-c`, `-t`). + +Kugl automatically converts Kubernetes-specific value formats to queryable numerics: `50Mi` → bytes, `100m` CPU → float, ISO8601 timestamps → epoch seconds. Helper functions `to_size()`, `to_age()`, `to_utc()` convert back to human-readable strings for output. + +--- + +## Strengths + +- **SQL is better than jq for aggregation.** Queries involving `GROUP BY`, `SUM`, `JOIN`, `ORDER BY`, and CTEs are dramatically more readable in SQL than in jq pipelines. The target use case — "how is compute distributed across node pools and taints?" — is well served. +- **Automatic type coercion.** CPU, memory, and timestamp conversion is handled transparently. Steampipe's Kubernetes plugin likely exposes these as raw strings or JSONB; kugl makes them directly comparable numerically. +- **Built-in caching.** A 2-minute TTL cache avoids hammering the API server during exploratory queries. +- **Declarative extensions require no code.** Adding a label or nested field to an existing table takes 4 lines of YAML, no build step, no Go, no Python. Far more accessible than Steampipe's Go plugin model. +- **Multi-schema queries.** Joining Kubernetes data with other JSON sources (files, exec output) via `kubernetes.nodes JOIN ec2.instances` is architecturally sound, even if the AWS side is experimental. + +--- + +## Weaknesses + +### Priority (blocking credibility) + +1. **Narrow built-in resource coverage.** Only pods, jobs, and nodes are built in. Deployments, StatefulSets, DaemonSets, CronJobs, Services, Ingresses, Namespaces, PVs/PVCs are absent. Users can add them via YAML config, but requiring setup before querying standard resources is a significant barrier. + +2. **No per-container table.** Pod-level resource data aggregates across all containers. For multi-container pods (sidecars, init containers), individual container visibility is lost. A `containers` table (one row per container, joinable to `pods` via pod UID) is needed. + +3. **No context selection at invocation time.** Users must `kubectl config use-context` before running kugl. A `--context` flag is table stakes for anyone with more than one cluster. + +4. **No structured output.** Output is human-readable tabular text only. Without `--output csv` or `--output json`, kugl cannot participate in pipelines or feed dashboards. + +5. **No shortcut parameters.** Shortcuts are static query aliases. The docs acknowledge this gap and suggest wrapper scripts as the workaround. Named parameter substitution (e.g., `{{namespace}}`) is needed for real team adoption. + +### Nice-to-Have + +- **Events table.** `kubectl get events` is one of the most-used debugging commands; it should be built in. +- **PVs/PVCs.** Important for stateful workloads. +- **RBAC tables.** Roles, RoleBindings, ClusterRoles for security auditing. +- **Metrics integration.** Joining `kubectl top pods` data with resource requests would enable requests-vs-actual-usage analysis. +- **Shell completions,** especially for shortcuts. +- **Richer `--schema` output** (columns, types, source paths). + +--- + +## Comparison to Steampipe (Kubernetes plugin) + +| Capability | Kugl | Steampipe | +|---|---|---| +| Built-in resource types | pods, jobs, nodes + labels/taints | All standard K8s types | +| SQL dialect | SQLite | PostgreSQL (full) | +| CPU/memory type handling | Auto-converted to numerics | Likely raw strings/JSONB | +| Adding a label column | 4 lines of YAML | Go code + rebuild + reinstall | +| Adding a new resource type | YAML `create:` block | Go plugin with K8s client call | +| Ecosystem integration | CLI output only | Postgres wire protocol (Grafana, psql, etc.) | +| Multi-cluster | Not supported | Aggregator plugins | +| Cross-source joins | Experimental | Core feature, 100+ plugins | +| Caching | Built-in TTL cache | Plugin-level | +| Maintenance | Personal project | Turbot-backed, active community | + +Steampipe's Kubernetes plugin likely does **not** pre-convert CPU/memory strings to numerics — this appears to be a genuine and specific kugl advantage for resource utilization queries. + +--- + +## Extension Mechanism + +### Current model + +Users add columns via `~/.kugl/init.yaml` or `~/.kugl/kubernetes.yaml`: + +```yaml +extend: + - table: nodes + columns: + - name: node_pool + type: text + label: karpenter.sh/nodepool # shortcut for metadata.labels."..." + - name: provider_id + type: text + path: spec.providerID # JMESPath expression +``` + +Special kugl types (`size`, `age`, `cpu`, `date`) handle K8s-specific string-to-numeric conversion. + +Multi-row-per-resource tables (e.g., one row per container or taint) use `row_source:` — a sequential JMESPath pipeline — with `^` prefix to reference parent-level fields. + +### Friction points + +1. **Two-vocabulary system (`path:` vs `label:`).** Users who don't know about `label:` write awkward quoted JMESPath: `metadata.labels."karpenter.sh/nodepool"`. The shortcut is useful but invisible until you need it. +2. **`path:` is a required key even when it's the only thing expressed.** Three keys for a conceptually one-line mapping. +3. **`row_source` + `^` parent references** are non-obvious, but affect only the minority of multi-row-per-resource cases. + +### Recommended improvement: unified `from:` key + +Replace `path:` / `label:` with a single `from:` key that auto-detects the extraction type: +- Value containing `/` with no leading dot-path segment → label name (matches all real K8s labels) +- Otherwise → JMESPath expression + +```yaml +extend: + - table: nodes + columns: + - name: node_pool + type: text + from: karpenter.sh/nodepool # auto-detected as label + - name: provider_id + type: text + from: spec.providerID # auto-detected as JSON path +``` + +**Implementation:** add `from_` field to `UserColumn` in `config.py`; dispatch to `LabelExtractor` or `PathExtractor` in `gen_extractor` validator. Keep `path:` and `label:` for backward compatibility. Change is small and non-breaking. From 44392b3ea0af13b8091dcf113a03ffd922291ada Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 09:46:00 -0400 Subject: [PATCH 14/35] Doc tweaks --- docs/index.rst | 6 +++--- docs/multi.rst | 7 ++++--- docs/resources.rst | 6 +++--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 168d5f5..d9afe58 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,11 +8,11 @@ Kugl lets you use SQL to query Kubernetes resources. :caption: Contents: Usage + Recommended Configuration + Settings + Shortcuts Built-in Tables Extending Tables - Shortcuts - Settings - Recommended Configuration Other Resource Types Multi-schema Queries Breaking Changes diff --git a/docs/multi.rst b/docs/multi.rst index 1ae3f6d..07c6cdb 100644 --- a/docs/multi.rst +++ b/docs/multi.rst @@ -7,12 +7,13 @@ Other resource types ``PYTHONPATH.`` Kugl will refuse to read a configuration file that is world-writable. +.. warning:: + This is experimental, the functionality is pretty limited. + It's also subject to change. + Extending Kugl to AWS ~~~~~~~~~~~~~~~~~~~~~ -(So far this is just an experiment, the functionality is pretty -limited.) - Using the ``exec`` resource type described in `Other resource types <./resources.rst>`__, you can make AWS data available for query. For example: if ``~/.kugl/ec2.yaml`` contains diff --git a/docs/resources.rst b/docs/resources.rst index 06734d0..8285842 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -111,9 +111,9 @@ element offering the metadate extracted from the pattern match, example .. code:: json [ - { "match": {"region": "us-east-1" }, "content": { ... file contents ... } }, - { "match": {"region": "us-east-2" }, "content": { ... file contents ... } }, - { "match": {"region": "us-west-1" }, "content": { ... file contents ... } }, + { "match": {"region": "us-east-1" }, "content": "file contents as string" }, + { "match": {"region": "us-east-2" }, "content": "file contents as string" }, + { "match": {"region": "us-west-1" }, "content": "file contents as string" }, ] To build a table showing environment settings by region: From ad264c440520b01c3113046d39ec945bcc25229f Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 10:44:46 -0400 Subject: [PATCH 15/35] Include README subset in index.rst --- README.rst | 29 +++++++++++++++++------------ docs/index.rst | 27 ++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index b495c75..3497d71 100644 --- a/README.rst +++ b/README.rst @@ -47,7 +47,7 @@ Installing Kugl requires Python 3.9 or later, and kubectl. **This is an alpha release.** Please expect bugs and -`backward-incompatible changes <./docs-tmp/breaking.md>`__ +`backward-incompatible changes `__ If you don't mind Kugl cluttering your Python with its `dependencies <./reqs_public.txt>`__: @@ -95,11 +95,11 @@ Find the pods using the most memory: kugl -a "select namespace, name, to_size(mem_req) from pods order by mem_req desc limit 15" -If this query is helpful, `save it <./docs-tmp/shortcuts.md>`__, then +If this query is helpful, `save it `__, then you can run ``kugl hi-mem``. Please also see the `recommended -configuration <./docs-tmp/recommended.md>`__. +configuration `__. How it works (important) ------------------------ @@ -124,24 +124,29 @@ be set in the config file as well. In any case, please be mindful of stale data and server load. +.. BEGIN_LEARN_MORE + Learn more ---------- -- `Command-line syntax <./docs-tmp/syntax.md>`__ -- `Recommended configuration <./docs-tmp/recommended.md>`__ -- `Settings <./docs-tmp/settings.md>`__ -- `Built-in tables and functions <./docs-tmp/builtins.md>`__ -- `Configuring new columns and tables <./docs-tmp/extending.md>`__ -- `Troubleshooting and feedback <./docs-tmp/trouble.md>`__ +- `Command-line syntax `__ +- `Recommended configuration `__ +- `Settings `__ +- `Shortcuts `__ +- `Built-in tables and functions `__ +- `Configuring new columns and tables `__ +- `Troubleshooting and feedback `__ - Beyond Kubernetes and kubectl - - `Other resource types <./docs-tmp/resources.md>`__ - - `Additional schemas <./docs-tmp/multi.md>`__ + - `Other resource types `__ + - `Additional schemas `__ - `Release notes <./CHANGELOG.md>`__ -- `Breaking changes <./docs-tmp/breaking.md>`__ +- `Breaking changes `__ - `License <./LICENSE>`__ +.. END_LEARN_MORE + Pronunciation ~~~~~~~~~~~~~ diff --git a/docs/index.rst b/docs/index.rst index d9afe58..9d413b0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,9 +1,30 @@ -Kugl Documentation -================== +.. include:: ../README.rst + :end-before: BEGIN_LEARN_MORE -Kugl lets you use SQL to query Kubernetes resources. +Learn more +---------- + +- :doc:`Command-line syntax ` +- :doc:`Recommended configuration ` +- :doc:`Settings ` +- :doc:`Shortcuts ` +- :doc:`Built-in tables and functions ` +- :doc:`Configuring new columns and tables ` +- :doc:`Troubleshooting and feedback ` +- Beyond Kubernetes and kubectl + + - :doc:`Other resource types ` + - :doc:`Additional schemas ` + +- `Release notes `__ +- :doc:`Breaking changes ` +- `License `__ + +.. include:: ../README.rst + :start-after: END_LEARN_MORE .. toctree:: + :hidden: :maxdepth: 1 :caption: Contents: From 31f0a190dd4f263b159ef61d3720016a73ad7d76 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 12:15:15 -0400 Subject: [PATCH 16/35] Replace GPU example with a universally-relevant node memory pressure query The old masthead required custom label configuration and was specific to ML/GPU workloads. The new example needs no configuration, joins pods and nodes to show per-node memory pressure, uses the synthesized status column to catch init-phase pods, and includes a working kubectl+jq equivalent that honestly illustrates the footguns: pipe-scope bugs, argv overflow on large clusters, memory suffix parsing, and the absence of to_size() formatting. Co-Authored-By: Claude Sonnet 4.6 --- README.rst | 69 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 25 deletions(-) diff --git a/README.rst b/README.rst index 3497d71..66a8839 100644 --- a/README.rst +++ b/README.rst @@ -6,40 +6,59 @@ Explore Kubernetes resources using SQLite. Example ------- -Find the top users of a GPU pool, based on instance type and a -team-specific pod label. +Find memory pressure by node — how much memory running and initializing +pods are requesting, versus what each node can allocate. No configuration +required. -With Kugl (and a bit of configuration for owner and instance type) +With Kugl: .. code:: shell - kugl -a "select owner, sum(gpu_req), sum(cpu_req) - from pods join nodes on pods.node_name = nodes.name - where instance_type like 'g5.%large' and pods.phase in ('Running', 'Pending') - group by 1 order by 2 desc limit 10" + kugl -a "select n.name, to_size(sum(p.mem_req)) as requested, to_size(n.mem_alloc) as allocatable + from nodes n join pods p on p.node_name = n.name + where p.phase = 'Running' or p.status like 'Init:%' + group by n.name order by sum(p.mem_req) desc" With ``kubectl`` and ``jq``, that's a little more work: .. code:: shell - kubectl get pods -o json --all | - jq -r --argjson nodes "$(kubectl get nodes -o json | jq '[.items[] - | select((.metadata.labels["node.kubernetes.io/instance-type"] // "") | test("g5.*large")) - | .metadata.name]')" \ - '[ .items[] - | select(.spec.nodeName as $node | $nodes | index($node)) - | select(.status.phase == "Running" or .status.phase == "Pending") - | . as $pod | $pod.spec.containers[] - | select(.resources.requests["nvidia.com/gpu"] != null) - | {owner: $pod.metadata.labels["com.mycompany/job-owner"], - gpu: .resources.requests["nvidia.com/gpu"], - cpu: .resources.requests["cpu"]} - ] | group_by(.owner) - | map({owner: .[0].owner, - gpu: map(.gpu | tonumber) | add, - cpu: map(.cpu | if test("m$") then (sub("m$"; "") | tonumber / 1000) else tonumber end) | add}) - | sort_by(-.gpu) | .[:10] | .[] - | "\(.owner) \(.gpu) \(.cpu)"' + { kubectl get nodes -o json; kubectl get pods -A -o json; } | jq -rn ' + def membytes: + if test("Ki$") then (gsub("Ki$"; "") | tonumber * 1024) + elif test("Mi$") then (gsub("Mi$"; "") | tonumber * 1048576) + elif test("Gi$") then (gsub("Gi$"; "") | tonumber * 1073741824) + elif test("K$") then (gsub("K$"; "") | tonumber * 1000) + elif test("M$") then (gsub("M$"; "") | tonumber * 1000000) + elif test("G$") then (gsub("G$"; "") | tonumber * 1000000000) + else tonumber end; + (input | .items | map({ + name: .metadata.name, + alloc: (.status.allocatable.memory | membytes) + }) | INDEX(.name)) as $nodeMap | + [input | .items[] | + select( + .status.phase == "Running" or + (((.spec.initContainers // []) | length) > 0 and + ((.status.initContainerStatuses // []) | map(select(.ready)) | length) < + ((.spec.initContainers // []) | length)) + ) | + select(.spec.nodeName) | + { + node: .spec.nodeName, + mem: ([.spec.containers[].resources.requests.memory // "0"] | map(membytes) | add) + } + ] | + group_by(.node) | + map({node: .[0].node, requested: (map(.mem) | add), allocatable: $nodeMap[.[0].node].alloc}) | + sort_by(-.requested)[] | + [.node, .requested, .allocatable] | @tsv' + +The ``jq`` version pipes both ``kubectl`` calls through a brace group to avoid +passing large JSON as a command-line argument — the ``--argjson`` alternative +fails with ``argument list too long`` on clusters with many nodes. It also +leaves byte values as raw integers; formatting them as ``to_size()`` does +requires another pass. Installing ---------- From 8433b67449eadde5541729b80101010ca4becd06 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 13:48:01 -0400 Subject: [PATCH 17/35] Tweak README --- README.rst | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/README.rst b/README.rst index 66a8839..4899f36 100644 --- a/README.rst +++ b/README.rst @@ -6,11 +6,9 @@ Explore Kubernetes resources using SQLite. Example ------- -Find memory pressure by node — how much memory running and initializing -pods are requesting, versus what each node can allocate. No configuration -required. - -With Kugl: +Report memory pressure by node — how much memory is requested by running and initializing +pods, versus what each node can allocate. Kugl understands Kubernetes memory and CPU +units natively, and offers ``kubectl``'s human-friendly status string as a column: .. code:: shell @@ -19,7 +17,18 @@ With Kugl: where p.phase = 'Running' or p.status like 'Init:%' group by n.name order by sum(p.mem_req) desc" -With ``kubectl`` and ``jq``, that's a little more work: +Result: + +.. code:: text + + name requested allocatable + ip-10-12-18-252.us-east-2.compute.internal 42Gi 59Gi + ip-10-12-188-56.us-east-2.compute.internal 36Gi 120Gi + ... + +With ``kubectl -o json`` and ``jq``, that's rather more work. Parsing units is your problem, +status is derived from multiple fields, joins are awkward, and this doesn't yet cover +output formatting: .. code:: shell @@ -54,12 +63,6 @@ With ``kubectl`` and ``jq``, that's a little more work: sort_by(-.requested)[] | [.node, .requested, .allocatable] | @tsv' -The ``jq`` version pipes both ``kubectl`` calls through a brace group to avoid -passing large JSON as a command-line argument — the ``--argjson`` alternative -fails with ``argument list too long`` on clusters with many nodes. It also -leaves byte values as raw integers; formatting them as ``to_size()`` does -requires another pass. - Installing ---------- From 587bdef02b26135429a462e686a32d511287cd15 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 16:25:48 -0400 Subject: [PATCH 18/35] Rename cache/reckless flags; add --context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - -c/--cache → -s/--stale - -u/--update → -r/--refresh - -r/--reckless → -q/--quiet (and reckless: → quiet: in settings) - -c is now free for --context (Kubernetes context switching) Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 2 +- README.rst | 6 +++--- docs-tmp/settings.md | 2 +- docs-tmp/syntax.md | 6 +++--- docs/breaking.rst | 13 ++++++++++++ docs/settings.rst | 2 +- docs/syntax.rst | 8 ++++---- kugl/builtins/schemas/kubernetes.py | 4 ++-- kugl/impl/config.py | 2 +- kugl/impl/engine.py | 2 +- kugl/main.py | 17 +++++++-------- tests/config/test_config.py | 8 ++++---- tests/test_cli.py | 32 ++++++++++++++--------------- 13 files changed, 59 insertions(+), 45 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index f3e4fce..968ca26 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -101,7 +101,7 @@ Top-level settings and shortcuts. Only this file may contain `settings:`. ```yaml settings: cache_timeout: 5m # default 2m - reckless: true # suppress stale-data warnings + quiet: true # suppress stale-data warnings init_path: # extra config folders, applied before ~/.kugl/ - ~/team-kugl diff --git a/README.rst b/README.rst index 4899f36..ff77e51 100644 --- a/README.rst +++ b/README.rst @@ -139,9 +139,9 @@ Server load by **caching responses for two minutes**. This is why it often prints "Data delayed up to ..." messages. Depending on your cluster activity, the cache can be a help or a -hindrance. You can suppress the "delayed" messages with the ``-r`` / -``--reckless`` option, or always update data using the ``-u`` / -``--update`` option. These behaviors, and the cache expiration time, can +hindrance. You can suppress the "delayed" messages with the ``-q`` / +``--quiet`` option, or always fetch fresh data using the ``-r`` / +``--refresh`` option. These behaviors, and the cache expiration time, can be set in the config file as well. In any case, please be mindful of stale data and server load. diff --git a/docs-tmp/settings.md b/docs-tmp/settings.md index de150c4..15fa92e 100644 --- a/docs-tmp/settings.md +++ b/docs-tmp/settings.md @@ -12,7 +12,7 @@ rather than on every usage from the command line. Example: ```yaml settings: cache_timeout: 5m - reckless: true + quiet: true ``` The `init_path` section of `settings` can be used to specify multiple configuration folders. diff --git a/docs-tmp/syntax.md b/docs-tmp/syntax.md index e40444e..8b80f6e 100644 --- a/docs-tmp/syntax.md +++ b/docs-tmp/syntax.md @@ -15,10 +15,10 @@ If your cluster is small, you could also (for instance) `alias kg="kugl -a"` and ### Cache control -* `-c, --cache` - Always use cached data, if available, regardless of its age -* `-r, --reckless` - Don't print stale data warnings +* `-s, --stale` - Always use cached data, if available, regardless of its age +* `-r, --refresh` - Always fetch fresh data from `kubectl`, regardless of data age +* `-q, --quiet` - Don't print stale data warnings * `-t, --timeout AGE` - Change the expiration time for cached data, e.g. `5m`, `1h`; the default is `2m` (two minutes) -* `-u, --update` - Always updated from `kubectl`, regardless of data age ## Other diff --git a/docs/breaking.rst b/docs/breaking.rst index c68a1a4..95e34bf 100644 --- a/docs/breaking.rst +++ b/docs/breaking.rst @@ -1,6 +1,19 @@ Breaking changes ---------------- +Kugl is still in alpha. +Please expect bugs and backward-incompatible changes. + +.. _080: + +0.8.0 +~~~~~ + +- Renamed ``-a`` option to ``-A`` for consistency with ``kubectl`` +- Renamed ``-c``/``--cache`` to ``-s``/``--stale`` +- Renamed ``-u``/``--update`` to ``-r``/``--refresh`` +- Renamed ``-r``/``--reckless`` to ``-q``/``--quiet`` (and ``reckless:`` in settings to ``quiet:``) + .. _050: 0.5.0 diff --git a/docs/settings.rst b/docs/settings.rst index 86c6563..6b5808b 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -14,7 +14,7 @@ Example: settings: cache_timeout: 5m - reckless: true + quiet: true The ``init_path`` section of ``settings`` can be used to specify multiple configuration folders. This is useful for team configuration diff --git a/docs/syntax.rst b/docs/syntax.rst index 3d94164..e399523 100644 --- a/docs/syntax.rst +++ b/docs/syntax.rst @@ -20,13 +20,13 @@ Most invocations of Kugl will need ``-a`` or ``-n namespace``, just like Cache control ~~~~~~~~~~~~~ -- ``-c, --cache`` - Always use cached data, if available, regardless of +- ``-s, --stale`` - Always use cached data, if available, regardless of its age -- ``-r, --reckless`` - Don't print stale data warnings +- ``-r, --refresh`` - Always fetch fresh data from ``kubectl``, + regardless of data age +- ``-q, --quiet`` - Don't print stale data warnings - ``-t, --timeout AGE`` - Change the expiration time for cached data, e.g. ``5m``, ``1h``; the default is ``2m`` (two minutes) -- ``-u, --update`` - Always updated from ``kubectl``, regardless of data - age Other ~~~~~~~~~~~~~ diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index 2c39369..f4bc4ee 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -34,12 +34,12 @@ def set_cacheable(cls, resource: "KubernetesResource") -> "KubernetesResource": @classmethod def add_cli_options(cls, ap: ArgumentParser): - ap.add_argument("-a", "--all", "--all-namespaces", dest="all", default=False, action="store_true") + ap.add_argument("-A", "--all", "--all-namespaces", dest="all", default=False, action="store_true") ap.add_argument("-n", "--namespace", type=str) def handle_cli_options(self, args): if args.all and args.namespace: - fail("Cannot use both -a/--all and -n/--namespace") + fail("Cannot use both -A/--all and -n/--namespace") if args.all: self._ns = "__all" self._all_ns = True diff --git a/kugl/impl/config.py b/kugl/impl/config.py index 129467b..1c66e5a 100644 --- a/kugl/impl/config.py +++ b/kugl/impl/config.py @@ -35,7 +35,7 @@ class Settings(BaseModel): model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) cache_timeout: Union[Age, int] = Age(120) - reckless: bool = False + quiet: bool = False no_headers: bool = False init_path: list[str] = [] diff --git a/kugl/impl/engine.py b/kugl/impl/engine.py index 38f4508..bcf3942 100644 --- a/kugl/impl/engine.py +++ b/kugl/impl/engine.py @@ -122,7 +122,7 @@ def query(self, query: Query) -> Tuple[list[Tuple], list[str]]: for r in resource_refs: r.resource.handle_cli_options(self.args) refreshable, max_staleness = self.cache.advise_refresh(resource_refs, self.cache_flag) - if not self.settings.reckless and max_staleness is not None: + if not self.settings.quiet and max_staleness is not None: print(f"(Data may be up to {max_staleness} seconds old.)", file=sys.stderr) clock.CLOCK.sleep(0.5) diff --git a/kugl/main.py b/kugl/main.py index d4cdd9d..53c1c1e 100644 --- a/kugl/main.py +++ b/kugl/main.py @@ -101,23 +101,24 @@ def parse_args( argv: list[str], ap: ArgumentParser, settings: Settings ) -> tuple[argparse.Namespace, CacheFlag]: """Add stock arguments to parser, parse the command line, and override settings.""" + ap.add_argument("-c", "--context", type=str) ap.add_argument("-D", "--debug", type=str) - ap.add_argument("-c", "--cache", default=False, action="store_true") ap.add_argument("-H", "--no-headers", default=False, action="store_true") - ap.add_argument("-r", "--reckless", default=False, action="store_true") + ap.add_argument("-q", "--quiet", default=False, action="store_true") + ap.add_argument("-r", "--refresh", default=False, action="store_true") + ap.add_argument("-s", "--stale", default=False, action="store_true") ap.add_argument("-t", "--timeout", type=str) - ap.add_argument("-u", "--update", default=False, action="store_true") ap.add_argument("sql") args = ap.parse_args(argv) - if args.cache and args.update: - fail("Cannot use both -c/--cache and -u/--update") + if args.stale and args.refresh: + fail("Cannot use both -s/--stale and -r/--refresh") if args.timeout: settings.cache_timeout = Age(args.timeout) - if args.reckless: - settings.reckless = True + if args.quiet: + settings.quiet = True if args.no_headers: settings.no_headers = True - return args, (ALWAYS_UPDATE if args.update else NEVER_UPDATE if args.cache else CHECK) + return args, (ALWAYS_UPDATE if args.refresh else NEVER_UPDATE if args.stale else CHECK) def _merge_init_files() -> tuple[UserInit, dict[str, Shortcut]]: diff --git a/tests/config/test_config.py b/tests/config/test_config.py index 766bff3..dbb3f4e 100644 --- a/tests/config/test_config.py +++ b/tests/config/test_config.py @@ -19,7 +19,7 @@ def test_settings_defaults(): s = Settings() assert s.cache_timeout == Age(120) - assert not s.reckless + assert not s.quiet assert not s.no_headers assert s.init_path == [] @@ -30,7 +30,7 @@ def test_settings_custom(monkeypatch): Settings, yaml.safe_load(""" cache_timeout: 5s - reckless: true + quiet: true no_headers: true init_path: - $FOO/abc @@ -39,7 +39,7 @@ def test_settings_custom(monkeypatch): """), ) assert s.cache_timeout == Age(5) - assert s.reckless + assert s.quiet assert s.no_headers assert s.init_path == ["/tmp/abc", "/tmp/xyz", "$BAR/xyz"] @@ -53,7 +53,7 @@ def test_empty_config(): def test_empty_init(): c = UserInit() assert c.settings.cache_timeout == Age(120) - assert not c.settings.reckless + assert not c.settings.quiet assert c.shortcuts == [] diff --git a/tests/test_cli.py b/tests/test_cli.py index 4eef2c1..6a433d0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -15,8 +15,8 @@ def test_enforce_cache_option(test_home): - with pytest.raises(KuglError, match="Cannot use both -c/--cache and -u/--update"): - main1(["-c", "-u", "select 1"]) + with pytest.raises(KuglError, match="Cannot use both -s/--stale and -r/--refresh"): + main1(["-s", "-r", "select 1"]) def test_enforce_cache_option_via_shortcut(test_home, capsys): @@ -24,16 +24,16 @@ def test_enforce_cache_option_via_shortcut(test_home, capsys): shortcuts: - name: foo args: - - -u + - -r - "select 1" """) - with pytest.raises(KuglError, match="Cannot use both -c/--cache and -u/--update"): - main1(["-c", "foo"]) + with pytest.raises(KuglError, match="Cannot use both -s/--stale and -r/--refresh"): + main1(["-s", "foo"]) def test_enforce_one_namespace_option(test_home): - with pytest.raises(KuglError, match="Cannot use both -a/--all and -n/--namespace"): - main1(["-a", "-n", "x", "select * from pods"]) + with pytest.raises(KuglError, match="Cannot use both -A/--all and -n/--namespace"): + main1(["-A", "-n", "x", "select * from pods"]) def test_no_such_table(test_home): @@ -65,7 +65,7 @@ def test_unknown_option_in_shortcut(test_home, capsys): - "select * from pods" """) with pytest.raises(SystemExit): - main1(["-a", "foo"]) + main1(["-A", "foo"]) assert "unrecognized arguments: --badoption" in capsys.readouterr().err @@ -77,22 +77,22 @@ def test_no_headers(test_home, capsys): @pytest.mark.parametrize( - "argv,expected_flag,age,reckless,error", + "argv,expected_flag,age,quiet,error", [ - (["-u", "select 1"], ALWAYS_UPDATE, Age(120), False, None), + (["-r", "select 1"], ALWAYS_UPDATE, Age(120), False, None), (["-t", "5", "select 1"], CHECK, Age(5), False, None), - (["-c", "-r", "select 1"], NEVER_UPDATE, Age(120), True, None), + (["-s", "-q", "select 1"], NEVER_UPDATE, Age(120), True, None), ( - ["-c", "-u", "select 1"], + ["-s", "-r", "select 1"], None, None, None, - "Cannot use both -c/--cache and -u/--update", + "Cannot use both -s/--stale and -r/--refresh", ), ], ) -def test_parse_args(test_home, argv, expected_flag, age, reckless, error): - """Verify correct values received for -u, -t, -c, -r options""" +def test_parse_args(test_home, argv, expected_flag, age, quiet, error): + """Verify correct values received for -r, -t, -s, -q options""" ap = ArgumentParser() settings = Settings() if error: @@ -102,7 +102,7 @@ def test_parse_args(test_home, argv, expected_flag, age, reckless, error): args, actual_flag = parse_args(argv, ap, settings) assert actual_flag == expected_flag assert settings.cache_timeout == age - assert settings.reckless == reckless + assert settings.quiet == quiet def test_init_command(test_home, capsys): From 5199ce46ce6de1cd6079afd0684d53e7116c3b80 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 16:37:22 -0400 Subject: [PATCH 19/35] Wire --context flag through to kubectl invocations Pass -c/--context to KubernetesResource so kubectl calls include --context , and use it as the cache path prefix instead of reading the active kubeconfig context. Co-Authored-By: Claude Sonnet 4.6 --- kugl/builtins/schemas/kubernetes.py | 18 +++++++++++++----- tests/k8s/kubectl | 9 +++++++-- tests/resource/test_cache.py | 2 +- tests/testing.py | 2 +- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index f4bc4ee..cb9b22e 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -10,6 +10,7 @@ import os from argparse import ArgumentParser from threading import Thread +from typing import Optional from pydantic import model_validator @@ -20,9 +21,14 @@ @resource_type("kubernetes", schema_defaults=["kubernetes"]) class KubernetesResource(Resource): + # Does 'kubectl get' for this resource need a --namespace flag? namespaced: bool + # User specified -A on the command line _all_ns: bool - _ns: str + # User specified -n on the command line and this is the namespace + _ns: Optional[str] + # User specified -c on the command line and this is the context + _context: Optional[str] @model_validator(mode="after") @classmethod @@ -46,9 +52,10 @@ def handle_cli_options(self, args): else: self._ns = args.namespace or "default" self._all_ns = False + self._context = args.context def cache_path(self) -> str: - return f"{kube_context()}/{self._ns}.{self.name}.json" + return f"{self._context or kube_context()}/{self._ns}.{self.name}.json" def get_objects(self) -> dict: """Fetch resources from Kubernetes using kubectl. @@ -56,13 +63,14 @@ def get_objects(self) -> dict: :return: JSON as output by "kubectl get {self.name} -o json" """ unit_testing = "KUGL_UNIT_TESTING" in os.environ + context_flag = ["--context", self._context] if self._context else [] namespace_flag = ["--all-namespaces"] if self._all_ns else ["-n", self._ns] if self.name == "pods": pod_statuses = {} # Kick off a thread to get pod statuses def _fetch(): - _, output, _ = run(["kubectl", "get", "pods", *namespace_flag]) + _, output, _ = run(["kubectl", *context_flag, "get", "pods", *namespace_flag]) pod_statuses.update(self._pod_status_from_pod_list(output)) status_thread = Thread(target=_fetch, daemon=True) @@ -71,9 +79,9 @@ def _fetch(): if unit_testing: status_thread.join() if self.namespaced: - _, output, _ = run(["kubectl", "get", self.name, *namespace_flag, "-o", "json"]) + _, output, _ = run(["kubectl", *context_flag, "get", self.name, *namespace_flag, "-o", "json"]) else: - _, output, _ = run(["kubectl", "get", self.name, "-o", "json"]) + _, output, _ = run(["kubectl", *context_flag, "get", self.name, "-o", "json"]) data = json.loads(output) if self.name == "pods": # Add pod status to pods diff --git a/tests/k8s/kubectl b/tests/k8s/kubectl index 28900eb..d6f22aa 100755 --- a/tests/k8s/kubectl +++ b/tests/k8s/kubectl @@ -8,7 +8,12 @@ from pathlib import Path import re import sys -args = " ".join(sys.argv[1:]) +argv = sys.argv[1:] +if "--context" in argv: + idx = argv.index("--context") + argv = argv[:idx] + argv[idx+2:] + +args = " ".join(argv) if m := re.match("get (pods|jobs|cronjobs|services|deployments|events|things) (-n default|--all-namespaces) -o json", args): kind = m.group(1) elif re.match("get pods (-n default|--all-namespaces)", args): @@ -20,4 +25,4 @@ else: mockdir = Path(os.environ["KUGL_MOCKDIR"]) content = mockdir.joinpath(kind).read_text() -print(content) \ No newline at end of file +print(content) diff --git a/tests/resource/test_cache.py b/tests/resource/test_cache.py index 13aff36..58f3bba 100644 --- a/tests/resource/test_cache.py +++ b/tests/resource/test_cache.py @@ -24,7 +24,7 @@ def test_cache(test_home, capsys): all_res = {pods, jobs, nodes, events} for r in all_res: - r.resource.handle_cli_options(SimpleNamespace(namespace="foo", all=False)) + r.resource.handle_cli_options(SimpleNamespace(namespace="foo", all=False, context=None)) # Pretend we have cached data for pods, nodes, and events, but not jobs. diff --git a/tests/testing.py b/tests/testing.py index 5d6eec2..83f20a6 100644 --- a/tests/testing.py +++ b/tests/testing.py @@ -24,7 +24,7 @@ def assert_query(sql: str, expected: Union[str, list], all_ns: bool = False): caller can indent for neatness. Or, if a list, each item will be checked in order. :param all_ns: FIXME temporary hack until we get namespaces out of engine.py """ - args = SimpleNamespace(all=all_ns, namespace=None) + args = SimpleNamespace(all=all_ns, namespace=None, context=None) engine = Engine(args, ALWAYS_UPDATE, Settings()) if isinstance(expected, str): actual = engine.query_and_format(Query(sql)) From 7165b84686862e34eced5e4fa38b562476f83f75 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 16:38:20 -0400 Subject: [PATCH 20/35] Add missing doc for --context --- docs/syntax.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/syntax.rst b/docs/syntax.rst index e399523..64b0a64 100644 --- a/docs/syntax.rst +++ b/docs/syntax.rst @@ -16,6 +16,8 @@ Most invocations of Kugl will need ``-a`` or ``-n namespace``, just like Kubernetes resources. May not be combined with ``-n``. - ``-n, --namespace NS`` - Look in namespace ``NS`` for Kubernetes resources. May not be combined with ``-a``. +- ``-c, --context CONTEXT`` - Use context ``CONTEXT`` for Kubernetes + resources. If not specified, the current context is used. Cache control ~~~~~~~~~~~~~ From fad2b839f39735fe5735c0810cb85e5267a6aee3 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 16:43:30 -0400 Subject: [PATCH 21/35] Remove old Markdown docs --- docs-tmp/breaking.md | 9 -- docs-tmp/builtins.md | 103 ---------------- docs-tmp/extending.md | 257 ---------------------------------------- docs-tmp/multi.md | 73 ------------ docs-tmp/recommended.md | 44 ------- docs-tmp/resources.md | 122 ------------------- docs-tmp/settings.md | 23 ---- docs-tmp/shortcuts.md | 36 ------ docs-tmp/syntax.md | 25 ---- docs-tmp/trouble.md | 45 ------- 10 files changed, 737 deletions(-) delete mode 100644 docs-tmp/breaking.md delete mode 100644 docs-tmp/builtins.md delete mode 100644 docs-tmp/extending.md delete mode 100644 docs-tmp/multi.md delete mode 100644 docs-tmp/recommended.md delete mode 100644 docs-tmp/resources.md delete mode 100644 docs-tmp/settings.md delete mode 100644 docs-tmp/shortcuts.md delete mode 100644 docs-tmp/syntax.md delete mode 100644 docs-tmp/trouble.md diff --git a/docs-tmp/breaking.md b/docs-tmp/breaking.md deleted file mode 100644 index e390473..0000000 --- a/docs-tmp/breaking.md +++ /dev/null @@ -1,9 +0,0 @@ -# Breaking changes - -## 0.5.0 - -- Shortcut syntax in `init.yaml` has changed, but old syntax is still supported (a warning will be printed) - -## 0.4.2 - -- The `namespaced` field in a Kubernetes resource definition is now required. \ No newline at end of file diff --git a/docs-tmp/builtins.md b/docs-tmp/builtins.md deleted file mode 100644 index 4b37a3b..0000000 --- a/docs-tmp/builtins.md +++ /dev/null @@ -1,103 +0,0 @@ - -## Built-in tables - -A note about data types - -* Timestamps are stored as integers, representing seconds since the Unix epoch. Timestamps and deltas can be converted -back to strings like `2021-01-01 12:34:56Z` or `5d`, `4h30m` using the `to_utc` and `to_age` functions, below. -* Memory is stored as bytes, and can be coverted back to a string like `1Gi` or `3.4Mi` using the `to_size` function, below -* CPU and GPU limits are stored as floats - -### pods - -Built from `kubectl get pods`, one row per pod. Two calls are made to `get pods`, one to get textual outut -of the STATUS column, since this is difficult to determine from the pod detail. - -NOTE: some of the containers in a pod may have no limits expressed. If all have no limits for e.g. CPU, -`cpu_req` will be null; otherwise, to sum container resources, a null value will be treated as zero. - -| Column | Type | Description | -|---------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| name | TEXT | Pod name, from `metadata.name` | -| uid | TEXT | Pod UID, from `metadata.uid` | -| namespace | TEXT | Pod namespace, from `metadata.namespace` | -| node_name | TEXT | Node name, from `spec.nodeName` | -| phase | TEXT | Pod phase, from `status.phase` | -| status | TEXT | Pod status as reported by `kubectl get pods` | -| creation_ts | INTEGER | Pod creation timestamp, from `metadata.creationTimestamp` | -| deletion_ts | INTEGER | Pod deletion timestamp (or null) from `metadata.deletionTimestamp` | -| is_daemon | INTEGER | 1 if the pod is in a DaemonSet, 0 otherwise | -| command | TEXT | The concatenated command args from what appears to be the main container (look for containers named `main`, `app`, or `notebook`) else from the first container | -| cpu_req, gpu_req, mem_req | REAL | Sum of CPU, GPU and memory values from `resources.requests` in each `spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` | -| cpu_lim, gpu_lim, mem_lim | REAL | Sum of CPU, GPU and memory values from `resources.limits` in each `spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` (this isn't necessarily helpful, since limits can be absent) | - -### pod_labels - -Built from `kubectl get pods`, one row per label. - -| Column | Type | Description | -|------------|---------|----------------------------------------------------------| -| pod_uid | TEXT | Pod UID, from `metadata.uid` | -| key, value | TEXT | Label key and value from each entry in `metadata.labels` | - -### jobs - -Built from `kubectl get jobs`, one row per job - -| Column | Type | Description | -|---------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| name | TEXT | Job name, from `metadata.name` | -| uid | TEXT | Job UID, from `metadata.uid` | -| namespace | TEXT | Job namespace, from `metadata.namespace` | -| status | TEXT | Job status as described by [V1JobStatus](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1JobStatus.md) -- this is one of `Running`, `Complete`, `Suspended`, Failed`, `Unknown` | -| cpu_req, gpu_req, mem_req | REAL | Sum of CPU, GPU and memory values from `resources.requests` in each `spec.template.spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` | -| cpu_lim, gpu_lim, mem_lim | REAL | Sum of CPU, GPU and memory values from `resources.limits` in each `spec.template.spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` (this isn't necessarily helpful, since limits can be | - -### job_labels - -Built from `kubectl get jobs`, one row per label. - -| Column | Type | Description | -|------------|---------|----------------------------------------------------------| -| job_uid | TEXT | Job UID, from `metadata.uid` | -| key, value | TEXT | Label key and value from each entry in `metadata.labels` | - -### nodes - -Built from `kubectl get nodes`, one row per node. See [recommended configuration](./recommended.md) -about adding node instance type. - -| Column | Type | Description | -|---------------------------------|---------|-------------------------------------------------------------------------------------------------------------| -| name | TEXT | Node name, from `metadata.name` | -| uid | TEXT | Node UID, from `metadata.uid` | -| cpu_alloc, gpu_alloc, mem_alloc | REAL | CPU, GPU and memory values from `status.allocatable`; GPU looks for the value tagged `nvidia.com/gpu` | -| cpu_cap, gpu_cap, mem_cap | REAL | CPU GPU and memory values from `status.capacity`; GPU looks for the value tagged `nvidia.com/gpu` | - -### node_labels - -Built from `kubectl get nodes`, one row per label. - -| Column | Type | Description | -|------------|---------|----------------------------------------------------------| -| node_uid | TEXT | Node UID, from `metadata.uid` | -| key, value | TEXT | Label key and value from each entry in `metadata.labels` | - -### node_taints - -Built from `kubectl get nodes`, one row per taint - -| Column | Type | Description | -|:-------------------|---------|--------------------------------------------------------------| -| node_uid | TEXT | Node UID, from `metadata.uid` | -| key, value, effect | TEXT | Taint key, value and effect from each entry in `spec.taints` | - -## Built-in functions - -`now()` - returns the current time as an integer, in epoch seconds - -`to_utc(timestamp)` - convert epoch time to string form e.g. `YYYY-MM-DDTHH:MM:SSZ` - -`to_age(seconds)` - convert seconds to a more readable age string as seen in the `AGE` column of `kubectl get pods`, e.g. `5d`, `4h30m`. - -`to_size(bytes)` - convert a byte count to a more readable string, e.g. `1Gi`, `3.4Mi` \ No newline at end of file diff --git a/docs-tmp/extending.md b/docs-tmp/extending.md deleted file mode 100644 index c13df2f..0000000 --- a/docs-tmp/extending.md +++ /dev/null @@ -1,257 +0,0 @@ - -## Note - -Configuration files should be protected to the same degree as your shell scripts and anything -on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Adding columns to an existing table - -To extend a table, use the `extend:` section in `~/.kugl/init.yaml`. This is a list of table names, -each with a list of new columns. An extension column specifies the column name, its -SQLite type (one of `int`, `real`, `text`) and a [JMESPath](https://jmespath.org/) -expression showing how to extract the column value from the JSON form of the resource. - -Example - -```yaml -extend: - - # Add the "owner" column to the pods table as shown in the Kugl README - -- table: pods - columns: - - name: owner - type: text - path: metadata.labels."com.mycompany/ml-job-owner" - # Comments are optional; you can see these with 'kugl --schema' - comment: ML team member who submitted the work - - # Using Karpenter on AWS? Add the Karpenter node pool and AWS provider ID - # to the nodes table. - -- table: nodes - columns: - - name: node_pool - type: text - path: metadata.labels."karpenter.sh/nodepool" - - name: provider_id - type: text - path: spec.providerID -``` - -## Adding a new table - -This works just like extending a table, with these differences -* Use the `create:` section rather than `extend:` -* Provide the name of the resource argument to `kubectl get` -* If the resource isn't built in (like `pods` or `nodes`), declare the resource and indicate whether it's namespaced. - -Example: this defines a new resource type and table for Argo workflows. - -```yaml -resources: - - name: workflows - namespaced: true - -create: - - table: workflows - resource: workflows - columns: - - name: name - type: text - path: metadata.name - - name: uid - type: text - path: metadata.uid - - name: namespace - type: text - path: metadata.namespace - - name: status - type: text - path: metadata.labels."workflows.argoproj.io/phase" -``` - -## Column extractors and defaults - -You've seen how the `path` extractor works, using JMESPath to identify an element in -the response JSON. You can also use the `label` extractor, which is a shortcut to -`metadata.labels`, and can either be a single string or a list of labels to check in order - -There are some useful defaults as well: -* resources are namespaced by default -* resources in `kubernetes.yaml` default to type `kubernetes` -* the default column type is `text` - -Here's a more concise way of defining the `workflows` table, above - -```yaml -resources: - - name: workflows - -create: - - table: workflows - resource: workflows - columns: - - name: name - path: metadata.name - - name: uid - path: metadata.uid - - name: namespace - path: metadata.namespace - - name: status - label: workflows.argoproj.io/phase -``` - -## Parsing data into numeric columns - -`kubectl` response values like `50Mi` (of memory) are unhelpful in queries, since you can't treat -them numerically. Kugl fixes this, offering additional data types that can be used in the `type` field -of a column definition and automatically convert response values. - -| Kugl type | SQLite type | Description | -|------------|--------------|-----------------------------------------------------------------------------| -| `size` | `INTEGER` | Memory size in bytes; accepts values like `50Mi` | -| `age` | `INTEGER` | Time delta in seconds; accepts values like `5d` or `4h30m` | -| `cpu` | `REAL` | CPU limit or request; accepts values like `0.5` or `300m` | -| `date` | `INTEGER` | Unix epoch timestamp in seconds; accepts values like `2021-01-01T12:34:56Z` | - -## Generating multiple rows per response item - -It's rare for a `kubectl get` response item to map directly to a single row in a table. For example, -a node can have multiple taints, and a pod can have multiple containers. Kugl handles this using -the `row_source` field in a column definition. Here's how the `node_taints` built-in table is defined. - -```yaml -create: - - table: node_taints - resource: nodes - row_source: - - items - - spec.taints - columns: - - name: node_uid - path: ^metadata.uid - - name: key - path: key - - name: effect - path: effect -``` - -Each element in `row_source` is a JMESPath expression that selects items relative to the prior selector. -Only the last element in the list is used to generate a row, but `path`s can refer to any part of the chain. -Each `"^"` at the start of a `path` refers to the part of the response one level higher than the bottom -`row_source` element. In this case - -* `^metadata.uid` means the `.metadata.uid` in each element of the response `items` array -* `key` and `effect` refer to each taint in the `spec.taints` array - -The default `row_source` is just `items`, which is why the example `workflows` table shown earlier doesn't -need to specify it. - -This syntax also applies to the `label` extractor. For example, if the `row_source` of a table needs to -address Job metadata but also metadata from the Job pod template, you can write this: - -```yaml - ... - resource: jobs - row_source: - - items - - spec.template - columns: - - name: label_from_job - label: ^a-job-label - - name: label_from_pod - label: a-pod-label -``` - -### More about row_source - -In detail, here's how `row_source` is handled. -* Begin with a list containing a single element, which is the entire response JSON. -* Apply the first `row_source` expression to each element of this list to build a new list - * If the expression yields a non-list result, add it to the new list - * If the expression yields a list, add each item (not the whole list) to the new list - * In either case, establish a parent / child relationship between the old and new items -* Repeat with each successive `row_source` entry. - -This can produce surprising results if one step in the `row_source` list tries to do too much. -Let's say the `node_taints` table didn't need a `^metadata.uid` reference, so only requires the -taint lists. This source list would not work, because `.spec` is not a child of `.items`. - -```yaml -row_source: - - items.spec.taints -``` - -Addressing each element in `items` requires a JMESpath [projection](https://jmespath.org/tutorial.html#projections), -in this case `items[*].spec`. Continuing this with `.taints` in a single expression will then create a list of lists -that must be flattened: - -```yaml -row_source: - - items[*].spec.taints[] -``` - -Although the multi-step `row_source` is incrementally slower for large lists, it's clearly less error-prone than -projecting and flattening, so is the recommended approach. - -As noted in [Troubleshooting](./trouble.md), running with `--debug itemize` will show the intermediate results of -`row_source` processing. - -### Extracting from dicts - -JMESPath lacks adequate support for addressing dictionaries. For example, if you want to build a table of -keys and values from environmet settings in YAML, there is no construct that will give you key-value pairs -from the fragment below. You can get the keys, or the values, but not both. - -```yaml -... -env: - AWS_BUCKET_NAME: my_budket - AWS_REGION: us-east-1 - ... -``` - -Kugl has a simple workaround for this. A `row_source` entry can have additional processing options, and for any row -source entry that addresses a dictionary, you can add the option `"kv"` to get key-value pairs. For example, if you -have adressed the above YAML data with - -```yaml -row_source: - - env -``` - -Change this to - -```yaml -row_source: - - env; kv -``` - -and Kugl will present the dictionary as if the data source originally looked like this: - -```yaml -env: - - key: AWS_BUCKET_NAME - value: my_bucket - - key: AWS_REGION - value: us-east-1 -``` - -It's then straightforward to take columns from these items with - -```yaml -columns: - - name: variable - path: key - - name: value - path: value -``` - -## Tips - -If creating multiple tables from a resource, you should use the `uid` column (sourced from `metadata.uid`) -as a join key, since this is a guaranteed unique key. (Example: `nodes` and `node_labels`, `pods` and `pod_labels`.) - -The `utils:` section of `~/.kugl/init.yaml` is ignored during configuration parsing, so you can use it to store -reusable bits of YAML. \ No newline at end of file diff --git a/docs-tmp/multi.md b/docs-tmp/multi.md deleted file mode 100644 index 6992f4e..0000000 --- a/docs-tmp/multi.md +++ /dev/null @@ -1,73 +0,0 @@ -## Note - -Since configuration files can contain scripts, they should be protected to the same degree as your shell scripts -and anything on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Extending Kugl to AWS - -(So far this is just an experiment, the functionality is pretty limited.) - -Using the `exec` resource type described in [Other resource types](./docs-tmp/resources.md), you can -make AWS data available for query. For example: if `~/.kugl/ec2.yaml` contains - -```yaml -resources: - - name: instances - exec: aws ec2 describe-instances - -create: - - table: instances - resource: instances - row_source: - - Reservations - - Instances - columns: - - name: type - path: InstanceType - - name: zone - path: Placement.AvailabilityZone - - name: private_dns - path: PrivateDnsName - - name: state - path: State.Name - - name: launched - path: LaunchTime -``` - -you can write - -```shell -kugl "select type, zone, launched from ec2.instances where state = 'running'" -``` - -To make the instance data cacheable, you would need to use a cache key that varies based on your -AWS account settings, referencing something set in the environment. Kugl will use this to generate -the cache pathname. Example: - -```yaml -resources: - - name: instances - exec: aws ec2 describe-instances - cacheable: true - cache_key: $AWS_PROFILE -``` - -Obviously this has limited utility, since there's no way to filter the data before it's returned. -For example, you can't add an argument to a resource `exec` command based on the query terms. -This is still being developed. - -## Multi-schema queries - -You can also join across schemas. For example, given the above `instances` table, report on the -capacity per zone in an EKS cluster: - -```shell -kugl "SELECT e.zone, sum(n.cpu_alloc) as cpus, sum(n.gpu_alloc) as gpus - FROM kubernetes.nodes n - JOIN ec2.instances e ON n.name = e.hostname - GROUP BY 1 -``` - -Note the explicit use of a `kubernetes.` schema prefix. This is required when joining across schemas. -(While `kubernetes` is the default schema, you can't always rely on SQLite's search behavior for -unqualified table names. It's better to be explicit.) \ No newline at end of file diff --git a/docs-tmp/recommended.md b/docs-tmp/recommended.md deleted file mode 100644 index 2388e98..0000000 --- a/docs-tmp/recommended.md +++ /dev/null @@ -1,44 +0,0 @@ - -## Note - -Configuration files should be protected to the same degree as your shell scripts and anything -on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Recommended configuration - -Instance type is a useful column to have in the `nodes` table. Unfortunately, there is no standard -label for it. You can fix this with configuration. In `~/.kugl/kubernetes.yaml`, add - -```yaml -extend: - - table: nodes - columns: - - name: instance_type - label: - - node.kubernetes.io/instance-type - - beta.kubernetes.io/instance-type -``` - -This will handle common cases. If your cluster uses a different label, add it to the list. -You can use Kugl itself to find what's available, for example: - -```shell -kugl "select distinct key from node_labels where key like '%instance-type%'" -``` - -Once you've set up the correct labels, here's a handy report that reports available capacity, -partitioning nodes by instance type and `NoSchedule` / `NoExecute` taints: - -```shell -kugl " - WITH t AS ( - SELECT node_uid, group_concat(key) AS taints FROM node_taints - WHERE effect IN ('NoSchedule', 'NoExecute') GROUP BY 1 - ) - SELECT instance_type, count(1) AS count, sum(cpu_alloc) AS cpu, sum(gpu_alloc) AS gpu, t.taints - FROM nodes LEFT OUTER JOIN t ON t.node_uid = nodes.uid - GROUP BY 1, 5 ORDER BY 1, 5 -" -``` - -If this query is helpful, [save it](./shortcuts.md) for future use. diff --git a/docs-tmp/resources.md b/docs-tmp/resources.md deleted file mode 100644 index d9901f9..0000000 --- a/docs-tmp/resources.md +++ /dev/null @@ -1,122 +0,0 @@ -## Note - -Since configuration files can contain scripts, they should be protected to the same degree as your shell scripts -and anything on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Exec resources - -By replacing `file: pathname` with `exec: some command` you can have Kugl run any shell script that generates -JSON or YAML output. For example, this is equivalent to the above `file:` resource: - -```yaml -resource: - - name: kubeconfig - exec: cat ~/.kube/config -``` - -Unlike file resources, the results of running external commands can be cached, just as with Kubernetes resources. -To enable this, set `cacaheable: true` and provide a `cache_key` that will be used to generate the cache pathname. -This will need to have at least one environment variable reference, on the assumption that the command output -can vary based on the environment. - -For an example, see the table built on `aws ec2` [here](./multi.md). - -## File resources - -Kugl can be used to query YAML data in a file. For instance, this will implement a bit of `kubectl config get-contexts`. - -```yaml -resource: - - name: kubeconfig - file: ~/.kube/config - -create: - - table: contexts - resource: kubeconfig - row_source: - - contexts - columns: - - name: name - path: name - - name: cluster - path: context.cluster -``` - -Then - -```shell -kugl "select name, cluster from contexts" -``` - -(Not that helpful, but you may have much larger config files worth summarizing this way.) - -Environment variable references like `$HOME` are allowed in resource filenames. -Using `file: stdin` also works, and lets you pipe JSON or YAML to a Kugl query. - -## Folder resources - -These are like `file` resources except they can match files in a tree. Let's say you have a set of -configuration files per AWS region, with settings to be summarized from one specific file, example: - -```shell -~/env/us-east-1/config.yaml -~/env/us-east-2/config.yaml -~/env/us-west-1/config.yaml -... -``` - -Within each config file is a set of environment variables: - -```shell -env: - - name: AWS_REGION - value: us-east-1 - - name: AWS_ACCOUNT - value: 123456789012 - - name: AWS_VPC - value: vpc-12345678 -``` - -This folder resource definition will address each of the files. - -```yaml -resource: - - name: by_region - # The root of the folder tree - folder: ~/env - # Pattern to match files, as understood by Path.glob - glob: "**/config.yaml" - # Regexp to extract additional metadata from filenames - match: "env/(?P.+)/config.yaml" -``` - -The resource presents each file as a dictionary, with the `match` element offering the metadate extracted -from the pattern match, example - -```json -[ - { "match": {"region": "us-east-1" }, "content": { ... file contents ... } }, - { "match": {"region": "us-east-2" }, "content": { ... file contents ... } }, - { "match": {"region": "us-west-1" }, "content": { ... file contents ... } }, -] -``` - -To build a table showing environment settings by region: - -```yaml -create: - - table: env_settings - resource: by_region - row_source: - # Address each element in the result list - - "[]" - # Focus on the environment settings - - content.env - columns: - - name: region - path: ^match.region - - name: name - path: name - - name: value - path: value -``` diff --git a/docs-tmp/settings.md b/docs-tmp/settings.md deleted file mode 100644 index 15fa92e..0000000 --- a/docs-tmp/settings.md +++ /dev/null @@ -1,23 +0,0 @@ - -## Note - -Configuration files should be protected to the same degree as your shell scripts and anything -on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Settings - -The `settings` section in `~/.kugl/init.yaml` can be used to specify cache behaviors once, -rather than on every usage from the command line. Example: - -```yaml -settings: - cache_timeout: 5m - quiet: true -``` - -The `init_path` section of `settings` can be used to specify multiple configuration folders. -This is useful for team configuration files. [Shortcuts|./shortcuts.md] in `init.yaml` and -schema configurations in those folders will be applied before entries in `~/.kugl`. - -NOTE: other `init.yaml` fils can contain only shortcuts; the `settings` section of `init.yaml` -is valid only in `~/.kugl/init.yaml`. \ No newline at end of file diff --git a/docs-tmp/shortcuts.md b/docs-tmp/shortcuts.md deleted file mode 100644 index 1793766..0000000 --- a/docs-tmp/shortcuts.md +++ /dev/null @@ -1,36 +0,0 @@ - -## Saving queries - -The `shortcuts` section in `~/.kugl/init.yaml` is a map from query names to lists of command-line arguments. - -Example, to save the queries shown in the [README](../README.md) and in -[recommended configuration](./recommended.md), add this to `~/.kugl/init.yaml`: - -```yaml -shortcuts: - - - name: hi-mem - args: - - | - SELECT name, to_size(mem_req) FROM pods - WHERE phase = 'Running' - ORDER BY mem_req DESC LIMIT 15 - - - name: nodes - # Comment field is optional - comment: Schedulable vs unschedulable capacity - args: - - | - WITH t AS ( - SELECT node_uid, group_concat(key) AS taints FROM node_taints - WHERE effect IN ('NoSchedule', 'NoExecute') GROUP BY 1 - ) - SELECT instance_type, count(1) AS count, sum(cpu_alloc) AS cpu, sum(gpu_alloc) AS gpu, t.taints - FROM nodes LEFT OUTER JOIN t ON t.node_uid = nodes.uid - GROUP BY 1, 5 ORDER BY 1, 5 -``` - -To run, type `kugl hi-mem` or `kugl nodes`. - -Simple parameter substitution might be offered in the future, but if you -need more powerful templates, your own wrapper script is the short-term answer. \ No newline at end of file diff --git a/docs-tmp/syntax.md b/docs-tmp/syntax.md deleted file mode 100644 index 8b80f6e..0000000 --- a/docs-tmp/syntax.md +++ /dev/null @@ -1,25 +0,0 @@ - -## Usage - -```shell -kugl [options] [sql | shortcut] -``` - -### Kubernetes options - -Most invocations of Kugl will need `-a` or `-n namespace`, just like `kubectl`. -If your cluster is small, you could also (for instance) `alias kg="kugl -a"` and use `where namespace = ...` instead. - -* `-a, --all, --all-namespaces` - Look in all namespaces for Kubernetes resources. May not be combined with `-n`. -* `-n, --namespace NS` - Look in namespace `NS` for Kubernetes resources. May not be combined with `-a`. - -### Cache control - -* `-s, --stale` - Always use cached data, if available, regardless of its age -* `-r, --refresh` - Always fetch fresh data from `kubectl`, regardless of data age -* `-q, --quiet` - Don't print stale data warnings -* `-t, --timeout AGE` - Change the expiration time for cached data, e.g. `5m`, `1h`; the default is `2m` (two minutes) - -## Other - -* `-H, --no-header` -- Suppress column headers diff --git a/docs-tmp/trouble.md b/docs-tmp/trouble.md deleted file mode 100644 index 5a81cc5..0000000 --- a/docs-tmp/trouble.md +++ /dev/null @@ -1,45 +0,0 @@ - -### My query isn't working - -Don't forget to use `-n/--namespace ` or `-a/--all`. The `default` namespace in -Kubernetes often has few or no resources. - -Read the [JMESPath tutorial](https://jmespath.org/tutorial.html) -and [SQLite documentation](https://www.sqlite.org/docs.html) thoroughly. - -Debug `row_source` and `path` problems by installing [jp](https://github.com/jmespath/jp) and feeding -it examples of your JSON data. JMESPath and `jq` don't behave the same. - -Several flags are available for the `--debug` option, try whatever seems relevant: -* `--debug cache` prints the cache files consulted and what resources will be refreshed -* `--debug fetch` prints each invocation of `kubectl` -* `--debug folder` prints each file considered for a `folder` resource -* `--debug itemize` summarizes the item generated for each step in a `row_source` (verbose) -* `--debug extract` prints the source and value of every row, by column (verbose) -* `--debug sqlite` shows the SQL for all statements executed, including table creation - -These can be combined, e.g. `--debug fetch,itemize`. To turn on all debugging options, use `--debug all`. - -### I found a bug - -Help me help you! I don't have access to your Kubernetes cluster, so you'll have to capture the -neccessary detail. - -* Follow recommendations for debugging queries, above. -* Use a low-activity namespace if possible, so the amount of data involved is small. -* Try to reproduce the problem with as simple a query as possible, ideally on one table with no joins. -* Run the command with the relevant `--debug` options and include the output -* If possible, include the content of the cache files that are named in the debug output. - -If there is too much material, you can post it to a service like [Pastebin](https://pastebin.com). -If it includes secure information from your cluster, please redact it before posting. - -### Can I give feedback without opening an issue? - -Sure, you can email `kugl dot devel at gmail dot com`. - -### I didn't receive a response - -Like many open source committers, the author has a family and a day job. 🙂 - -Please be patient, and thank you for trying Kugl! \ No newline at end of file From aa5f1fb04106b303955eae156f070f03881f9c76 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 16:43:36 -0400 Subject: [PATCH 22/35] Update changelogs --- CHANGELOG.md | 21 +++++++++++++++++++++ docs/breaking.rst | 3 +++ 2 files changed, 24 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2379b69..3019c60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,24 @@ +## 0.8.0 + +New tables in ``kubernetes`` schema: + +- ``events`` +- ``cronjobs`` and ``cronjob_labels`` +- ``services`` and ``service_labels`` +- ``deployments`` and ``deployment_labels`` + +CLI changes: + +- Added ``-c``/``--context`` option to specify a Kubernetes context +- Renamed ``-a`` option to ``-A`` for consistency with ``kubectl`` +- Renamed ``-c``/``--cache`` to ``-s``/``--stale`` +- Renamed ``-u``/``--update`` to ``-r``/``--refresh`` +- Renamed ``-r``/``--reckless`` to ``-q``/``--quiet`` (and ``reckless:`` in settings to ``quiet:``) + +Other: + +- New masthead example of ``kugl`` vs ``kubectl | jq`` + ## 0.7.0 - Add `init` subcommand to generate `kubernetes.yaml` per recommended post-install configuration diff --git a/docs/breaking.rst b/docs/breaking.rst index 95e34bf..465b3e1 100644 --- a/docs/breaking.rst +++ b/docs/breaking.rst @@ -9,6 +9,9 @@ Please expect bugs and backward-incompatible changes. 0.8.0 ~~~~~ +CLI changes: + +- Added ``-c``/``--context`` option to specify a Kubernetes context - Renamed ``-a`` option to ``-A`` for consistency with ``kubectl`` - Renamed ``-c``/``--cache`` to ``-s``/``--stale`` - Renamed ``-u``/``--update`` to ``-r``/``--refresh`` From b0e0ad2ca7dfa8c019fda2ea5040ded2e6315915 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 17:57:11 -0400 Subject: [PATCH 23/35] Doc tweak --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3019c60..25ff210 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,7 +61,7 @@ Other: - Allow environment variables in `file` resource paths - Fix the `exec` resource by adding a `cache_key` field; these resources would otherwise experience cache collisions - Resource cache paths and file formats have changed, and cache now lives in `~/.kuglcache` -- `rm -r ~/.kugl/cache` is recommended to clear obsolete files +- `rm -r ~/.kuglcache` is recommended to clear obsolete files ## 0.3.3 From 15119156bc5b24a903c2c1d44b725fec5ff8c6f7 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sat, 16 May 2026 21:20:43 -0400 Subject: [PATCH 24/35] Update row_source plan --- .claude/plans/row-source.md | 72 +++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/.claude/plans/row-source.md b/.claude/plans/row-source.md index 36a802a..16eae53 100644 --- a/.claude/plans/row-source.md +++ b/.claude/plans/row-source.md @@ -10,21 +10,14 @@ scope-aware path resolution. ### Goal -Replace the `^` parent-hop syntax with named scope references. Before: +Replace the `^` parent-hop syntax with named scope references. -```yaml -create: - - table: node_taints - resource: nodes - row_source: - - items - - spec.taints - columns: - - name: node_uid - path: ^metadata.uid # ^ = "go up one level" -``` +**Single row_source** (the common case — default `["items"]` or one explicit entry): +path expressions resolve against the one implicit object; no scope prefix required. -After: +**Multiple row_source entries**: every entry must carry `as `, and every path / +label expression must begin with an explicit scope name. There is no implicit +"current object" when more than one level exists. ```yaml create: @@ -32,10 +25,12 @@ create: resource: nodes row_source: - items as node - - spec.taints + - spec.taints as taint columns: - name: node_uid - path: node.metadata.uid # named scope, no counting required + path: node.metadata.uid + - name: taint_key + path: taint.key ``` ### Changes @@ -64,14 +59,14 @@ create: **`kugl/impl/extract.py` — `FieldRef` / `PathExtractor` / `LabelExtractor`** -- `FieldRef.parse` currently strips leading `^` characters. Extend it to also - detect a leading `.` prefix that could be a scope name. Store as - `scope_name: Optional[str]` and strip it from the target before JMESPath - compilation. +- `FieldRef.parse`: remove `^` handling; detect a leading `.` prefix as a + scope name. Store as `scope_name: Optional[str]` and strip it from the target + before JMESPath compilation. - In `PathExtractor.extract` and `LabelExtractor.extract`, when `self._ref.scope_name` - is set, resolve the object via `context.get_scope(obj, scope_name)` instead of - `context.get_parent(obj, n_parents)`. -- Keep `^` handling intact for backward compatibility. + is set, resolve the object via `context.get_scope(obj, scope_name)`. +- Validation at table-build time (`TableFromConfig.__init__`): if `len(row_source) > 1`, + every `row_source` entry must have a name and every column path/label must carry a + scope prefix; raise a clear `ConfigError` if either constraint is violated. ### Builtin Update @@ -81,10 +76,12 @@ as a self-contained example: ```yaml row_source: - items as node - - spec.taints + - spec.taints as taint columns: - name: node_uid path: node.metadata.uid + - name: taint_key + path: taint.key ``` ### Tests @@ -93,7 +90,9 @@ as a self-contained example: syntax produces the same output. - Add a new test with three levels of nesting (e.g. `pod → container → env`) using two named scopes, verifying that both ancestor levels are reachable by name. -- Add a test that `^` still works (backward compat). +- Add a test that `^` in a path raises a clear parse error. +- Add a test that a multi-step `row_source` with a missing `as` name raises a `ConfigError`. +- Add a test that a multi-step `row_source` with a bare (un-scoped) column path raises a `ConfigError`. --- @@ -104,16 +103,29 @@ as a self-contained example: Replace the two-key `path:` / `label:` vocabulary with a single `from:` key that auto-detects extraction type. Named scope prefixes compose naturally. +Single row_source (bare paths, no scope prefix needed): + ```yaml columns: - name: node_pool - from: karpenter.sh/nodepool # auto-detected: label (contains / before first .) + from: karpenter.sh/nodepool # auto-detected: label - name: provider_id - from: spec.providerID # auto-detected: JMESPath + from: spec.providerID # auto-detected: JMESPath +``` + +Multi-step row_source (all entries named, all columns scoped): + +```yaml + row_source: + - items as pod + - spec.containers as container + columns: - name: pod_name - from: pod.metadata.name # named scope + JMESPath + from: pod.metadata.name # named scope + JMESPath - name: pod_pool - from: pod.karpenter.sh/nodepool # named scope + label + from: pod.karpenter.sh/nodepool # named scope + label + - name: container_name + from: container.name # named scope + JMESPath ``` ### Auto-Detection Rule @@ -143,7 +155,7 @@ this correctly because `metadata.labels.foo` is not a valid DNS domain segment. **`kugl/impl/extract.py` — `FieldRef`** -- Move the scope-prefix parsing here (it's already handling `^`); `gen_extractor` +- Move the scope-prefix parsing here; `gen_extractor` delegates to `FieldRef.parse_from(s, known_scopes=None)`. - Known scopes are not available at Pydantic parse time (they live in `CreateTable` which is a sibling, not a parent). Two options: @@ -184,5 +196,3 @@ this correctly because `metadata.labels.foo` is not a valid DNS domain segment. - The broader resource-coverage gaps from `discuss.md` (deployments, containers table, etc.) are separate work and should not be bundled here. -- `^` removal: keep the old syntax working indefinitely. No deprecation warning needed - until named scopes have been in a release. From f1a0b7244e709ed3031801d0a0cbfaeedb40747e Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 09:43:01 -0400 Subject: [PATCH 25/35] Implement named scopes in row_source (Phase 1) Replace ^ parent-hop syntax with named scope references. Multi-step row_source entries must carry 'as ', and column paths/labels must be prefixed with a declared scope name. Single-step tables are unchanged. Co-Authored-By: Claude Sonnet 4.6 --- CLAUDE.md | 18 +++-- kugl/builtins/schemas/kubernetes.yaml | 10 +-- kugl/impl/config.py | 43 +++++++++++ kugl/impl/extract.py | 63 +++++++++------- kugl/impl/tables.py | 56 +++++++++++++-- tests/k8s/test_jobs.py | 8 +-- tests/resource/test_folder.py | 9 ++- tests/resource/test_row_source.py | 100 +++++++++++++++++--------- 8 files changed, 227 insertions(+), 80 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 968ca26..75aa272 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -145,9 +145,9 @@ Two extractor types, specified by keyword in a column definition: **`path:`** — JMESPath expression into the current row item **`label:`** — shortcut to `metadata.labels`; can be a list to try in order -**Parent navigation** — prefix `^` chars to walk up the `row_source` chain: -- `^metadata.uid` means `.metadata.uid` one level above the current item -- `^^foo` means two levels up +**Named scope navigation** — in multi-step `row_source`, each entry must carry `as `, and every column path/label must be prefixed with a scope name: +- `node.metadata.uid` extracts `metadata.uid` from the object named `node` at a higher level +- All named scopes from ancestor levels are available at each step ## Column Types @@ -171,12 +171,18 @@ Multi-step JMESPath iteration for generating multiple rows per API response item ```yaml row_source: - - items # step 1: each element of the top-level items array - - spec.taints # step 2: each taint within each node + - items as node # step 1: each element of the top-level items array, named "node" + - spec.taints as taint # step 2: each taint within each node, named "taint" +columns: + - name: node_uid + path: node.metadata.uid # scope prefix "node." resolves to the step-1 object + - name: taint_key + path: taint.key # scope prefix "taint." resolves to the step-2 object ``` - Each step applies to results of the prior step -- Parent/child relationships are tracked for `^` path navigation +- Multi-step tables require `as ` on every entry; all column paths/labels must carry a scope prefix +- Single-step tables use bare JMESPath paths with no scope prefix - Dict sources can be unpacked to key/value pairs with `; kv` suffix: `- env; kv` - Default `row_source` is `["items"]` diff --git a/kugl/builtins/schemas/kubernetes.yaml b/kugl/builtins/schemas/kubernetes.yaml index 525798d..e7d4df4 100644 --- a/kugl/builtins/schemas/kubernetes.yaml +++ b/kugl/builtins/schemas/kubernetes.yaml @@ -25,15 +25,15 @@ create: - table: node_taints resource: nodes row_source: - - items - - spec.taints + - items as node + - spec.taints as taint columns: - name: node_uid - path: ^metadata.uid + path: node.metadata.uid comment: node UID, from metadata.uid - name: key - path: key + path: taint.key comment: taint key - name: effect - path: effect + path: taint.effect comment: taint effect diff --git a/kugl/impl/config.py b/kugl/impl/config.py index 1c66e5a..a449769 100644 --- a/kugl/impl/config.py +++ b/kugl/impl/config.py @@ -3,6 +3,7 @@ """ from os.path import expandvars, expanduser +import re from typing import Optional, Tuple, Callable, Union import jmespath @@ -158,6 +159,48 @@ def gen_extractor(cls, column: "UserColumn") -> "UserColumn": def extract(self, obj: object, context) -> object: return self._extractor(obj, context) + def rebuild_for_scope(self, scope_names: set, table_name: str): + """Re-create the extractor with scope awareness for multi-step row_source tables. + + Called at TableFromConfig build time when scope names are known. + """ + _SCOPE_PREFIX = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\.(.+)$") + if self.path: + m = _SCOPE_PREFIX.match(self.path) + if m and m.group(1) in scope_names: + scope_name, target = m.group(1), m.group(2) + else: + fail( + f"Table '{table_name}', column '{self.name}': " + f"path '{self.path}' must begin with a scope name " + f"(one of: {sorted(scope_names)})" + ) + try: + self._extractor = PathExtractor(self.name, self.type, target, scope_name=scope_name) + except ValueError as e: + fail(str(e)) + elif self.label: + labels = self.label if isinstance(self.label, list) else [self.label] + scope_name = None + stripped_labels = [] + for label in labels: + m = _SCOPE_PREFIX.match(label) + if m and m.group(1) in scope_names: + if scope_name and scope_name != m.group(1): + fail( + f"Table '{table_name}', column '{self.name}': " + f"all labels must use the same scope name" + ) + scope_name = m.group(1) + stripped_labels.append(m.group(2)) + else: + fail( + f"Table '{table_name}', column '{self.name}': " + f"label '{label}' must begin with a scope name " + f"(one of: {sorted(scope_names)})" + ) + self._extractor = LabelExtractor(self.name, self.type, stripped_labels, scope_name=scope_name) + class ExtendTable(BaseModel): """Holds the extend: section from a user config file.""" diff --git a/kugl/impl/extract.py b/kugl/impl/extract.py index 5a05803..757377e 100644 --- a/kugl/impl/extract.py +++ b/kugl/impl/extract.py @@ -5,14 +5,13 @@ from dataclasses import dataclass import re -from typing import Literal +from typing import Literal, Optional import jmespath from kugl.util import parse_utc, parse_age, parse_size, parse_cpu, abbreviate, fail ColumnType = Literal["text", "integer", "real", "date", "age", "size", "cpu"] -PARENTED_PATH = re.compile(r"^(\^*)(.*)") KUGL_TYPE_CONVERTERS = { # Valid choices for column type in config -> function to extract that from a string @@ -39,15 +38,17 @@ @dataclass class FieldRef: - """Parsed form of a parented JMESPath expression or label, e.g. '^^metadata.name'""" + """Parsed form of a potentially-scoped JMESPath expression or label.""" - n_parents: int + scope_name: Optional[str] target: str @classmethod - def parse(cls, s): - m = PARENTED_PATH.match(s) - return cls(len(m.group(1)), m.group(2)) + def parse(cls, s: str) -> "FieldRef": + """Parse a path/label string, raising if ^ syntax is used.""" + if "^" in s: + fail("^ parent navigation is no longer supported; use named row_source scopes instead") + return cls(None, s) class Extractor: @@ -81,22 +82,28 @@ def __call__(self, obj: object, context) -> object: class LabelExtractor(Extractor): """Extract a column value from the first matching label in a list of labels.""" - def __init__(self, column_name: str, column_type: ColumnType, labels: list[str]): + def __init__(self, column_name: str, column_type: ColumnType, labels: list[str], + scope_name: Optional[str] = None): super().__init__(column_name, column_type) + for label in labels: + if "^" in label: + raise ValueError( + f"^ parent navigation is no longer supported in column {column_name}; " + f"use named row_source scopes instead" + ) self._labels = labels - self._refs = [FieldRef.parse(label) for label in labels] + self._scope_name = scope_name def extract(self, obj: object, context) -> object: """Resolve the metadata location for each label and see if the label is present.""" - for ref in self._refs: - if ref.n_parents > 0: - obj = context.get_parent(obj, ref.n_parents) + if self._scope_name: + obj = context.get_scope(obj, self._scope_name) if obj is None: - fail(f"Missing parent or too many ^ while evaluating {ref.target}") - if available := obj.get("metadata", {}).get("labels", {}): - # If the label is present here, return the value here, even if null - if ref.target in available: - return available[ref.target] + fail(f"Unknown scope '{self._scope_name}' for column '{self.column_name}'") + if available := obj.get("metadata", {}).get("labels", {}): + for label in self._labels: + if label in available: + return available[label] def __str__(self): """For debug output""" @@ -106,23 +113,29 @@ def __str__(self): class PathExtractor(Extractor): """Extract a column value from the target of a JMESPath expression.""" - def __init__(self, column_name: str, column_type: ColumnType, path: str): + def __init__(self, column_name: str, column_type: ColumnType, path: str, + scope_name: Optional[str] = None): super().__init__(column_name, column_type) - self._ref = FieldRef.parse(path) + if "^" in path: + raise ValueError( + f"^ parent navigation is no longer supported in column {column_name}; " + f"use named row_source scopes instead" + ) + self._scope_name = scope_name self._path = path try: - self._finder = jmespath.compile(self._ref.target) + self._finder = jmespath.compile(path) except jmespath.exceptions.ParseError as e: raise ValueError( - f"invalid JMESPath expression {self._ref.target} in column {column_name}" + f"invalid JMESPath expression {path} in column {column_name}" ) from e def extract(self, obj: object, context) -> object: """Extract a value from an object using a JMESPath finder.""" - if self._ref.n_parents > 0: - obj = context.get_parent(obj, self._ref.n_parents) - if obj is None: - fail(f"Missing parent or too many ^ while evaluating {self._path}") + if self._scope_name: + obj = context.get_scope(obj, self._scope_name) + if obj is None: + fail(f"Unknown scope '{self._scope_name}' for column '{self.column_name}'") return self._finder.search(obj) def __str__(self): diff --git a/kugl/impl/tables.py b/kugl/impl/tables.py index cbd578e..9577fb9 100644 --- a/kugl/impl/tables.py +++ b/kugl/impl/tables.py @@ -4,6 +4,7 @@ """ from dataclasses import dataclass +import re from typing import Optional, Type import jmespath @@ -129,6 +130,16 @@ def __init__( creator.columns + (extender.columns if extender else []), ) self.row_source = [Itemizer.parse(x, name) for x in (creator.row_source or ["items"])] + if len(self.row_source) > 1: + scope_names = {s.name for s in self.row_source if s.name is not None} + unnamed = [s.expr for s in self.row_source if s.name is None] + if unnamed: + fail( + f"Table '{name}': multi-step row_source entries must all have 'as '; " + f"missing for: {unnamed}" + ) + for column in self.added_columns: + column.rebuild_for_scope(scope_names, name) def make_rows(self, context: "RowContext") -> list[tuple[dict, tuple]]: """ @@ -165,6 +176,8 @@ def _itemize(self, context: "RowContext") -> list[dict]: # Fix #132 -- don't do this at pass 0, or it sets the parent to the entire # response object. context.set_parent(child, item) + if source.name is not None: + context.set_scope(child, source.name, item if index > 0 else None) new_items.append(child) if debug: debug("add " + abbreviate(child)) @@ -172,6 +185,8 @@ def _itemize(self, context: "RowContext") -> list[dict]: if index > 0: # See comment above. context.set_parent(found, item) + if source.name is not None: + context.set_scope(found, source.name, item if index > 0 else None) new_items.append(found) if debug: debug("add " + abbreviate(found)) @@ -184,12 +199,15 @@ class RowContext: Primarily, the `.data` attribute holds the JSON data from 'kubectl get' or similar. The `.set_parent` and `.get_parent` methods allow row-generating functions to track - parent objects as they iterate through nested data structures.""" + parent objects as they iterate through nested data structures. + The `.set_scope` and `.get_scope` methods support named scope resolution for + multi-step row_source tables.""" def __init__(self, data): self.data = data self.debug = debugging("extract") self._parents = {} + self._scopes = {} def set_parent(self, child, parent): self._parents[id(child)] = parent @@ -200,21 +218,38 @@ def get_parent(self, child, depth: int = 1): depth -= 1 return child + def set_scope(self, child, name: str, parent=None): + """Register child as the named scope, inheriting ancestor scopes from parent.""" + parent_scopes = self._scopes.get(id(parent), {}) if parent is not None else {} + child_scopes = dict(parent_scopes) + child_scopes[name] = child + self._scopes[id(child)] = child_scopes + + def get_scope(self, obj, name: str): + """Look up a named scope for obj, returning None if not found.""" + return self._scopes.get(id(obj), {}).get(name) + @dataclass class Itemizer: """Helper class to hold information parsed from one line of a row_source""" - # Original row_source expression + # JMESPath expression (without the 'as ' suffix) expr: str # JMESPath expression to find the items finder: ParsedResult # Should dictionaries be unpacked to a key/value array unpack: bool + # Optional scope name from 'as ' suffix + name: Optional[str] = None @classmethod def parse(cls, s: str, table_name: str): - """Parse a line from the row_source section of a config file""" + """Parse a line from the row_source section of a config file. + + Syntax: 'expr [as name][; kv]' + """ + # Split off options (;kv etc.) parts = s.split(";") if len(parts) == 1: unpack = False @@ -222,7 +257,18 @@ def parse(cls, s: str, table_name: str): unpack = True else: fail(f"Invalid row_source options: {s}") + + # Parse 'expr as name' from the expression part + expr_part = parts[0].strip() + name = None + as_index = expr_part.find(" as ") + if as_index >= 0: + name = expr_part[as_index + 4:].strip() + expr_part = expr_part[:as_index].strip() + if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name): + fail(f"Invalid scope name '{name}' in row_source: {s}") + try: - return Itemizer(s, jmespath.compile(parts[0]), unpack) + return Itemizer(expr=expr_part, finder=jmespath.compile(expr_part), unpack=unpack, name=name) except jmespath.exceptions.ParseError as e: - fail(f"invalid row_source {parts[0]} for table {table_name}", e) + fail(f"invalid row_source {expr_part} for table {table_name}", e) diff --git a/tests/k8s/test_jobs.py b/tests/k8s/test_jobs.py index d8d6b26..584640f 100644 --- a/tests/k8s/test_jobs.py +++ b/tests/k8s/test_jobs.py @@ -77,13 +77,13 @@ def test_label_parents(test_home): - table: job_users resource: jobs row_source: - - items - - spec.template + - items as job + - spec.template as template columns: - name: job_username - label: ^user + label: job.user - name: pod_username - label: user + label: template.user """) kubectl_response( "jobs", diff --git a/tests/resource/test_folder.py b/tests/resource/test_folder.py index 1b928ec..e9687ea 100644 --- a/tests/resource/test_folder.py +++ b/tests/resource/test_folder.py @@ -74,9 +74,12 @@ def test_folder_content(hr, tmp_path, capsys): match="(?P[^/]+)/data.yaml", ) # Update the row_source of the people table to match the folder data layout. - config["create"][0]["row_source"] = ["[]", "content"] - # Add a column to capture the region. - config["create"][0]["columns"].append(dict(name="region", path="^match.region")) + config["create"][0]["row_source"] = ["[] as file", "content as person"] + config["create"][0]["columns"] = [ + dict(name="name", path="person.name"), + dict(name="age", path="person.age", type="integer"), + dict(name="region", path="file.match.region"), + ] hr.save(config) with features_debugged("folder"): assert_query( diff --git a/tests/resource/test_row_source.py b/tests/resource/test_row_source.py index 50c1667..0cff5ea 100644 --- a/tests/resource/test_row_source.py +++ b/tests/resource/test_row_source.py @@ -11,8 +11,8 @@ from ..k8s.k8s_mocks import kubectl_response -def test_too_many_parents(test_home): - """Ensure correct error when a parent field reference is too long.""" +def test_caret_rejected(test_home): + """Ensure ^ parent navigation raises a clear error.""" kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" resources: - name: things @@ -22,18 +22,10 @@ def test_too_many_parents(test_home): resource: things columns: - name: something - path: ^^^invalid + path: ^parent """) - kubectl_response( - "things", - { - "items": [ - {"something": "foo"}, - {"something": "foo"}, - ] - }, - ) - with pytest.raises(KuglError, match="Missing parent or too many . while evaluating ...invalid"): + kubectl_response("things", {"items": [{"something": "foo"}]}) + with pytest.raises(KuglError, match=r"\^ parent navigation is no longer supported"): assert_query("SELECT * FROM things", "") @@ -46,13 +38,13 @@ def test_too_many_parents(test_home): - table: things resource: things row_source: - - items - - children + - items as item + - children as child columns: - name: parent_id - path: ^parent + path: item.parent - name: val - path: val + path: child.val """ @pytest.mark.parametrize("items,expected", [ @@ -82,7 +74,7 @@ def test_too_many_parents(test_home): ), ]) def test_multi_step_row_source(test_home, items, expected): - """Multi-step row_source with ^ parent navigation; also checks empty sublists produce no rows.""" + """Multi-step row_source with named scopes; also checks empty sublists produce no rows.""" kugl_home().prep().joinpath("kubernetes.yaml").write_text( _MULTI_STEP_CONFIG.format(items=json.dumps(items)) ) @@ -90,7 +82,7 @@ def test_multi_step_row_source(test_home, items, expected): def test_kv_with_parent_nav(test_home): - """'; kv' expansion combined with ^ to reference a field from the parent item.""" + """'; kv' expansion combined with named scope to reference a field from the parent item.""" kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" resources: - name: things @@ -107,15 +99,15 @@ def test_kv_with_parent_nav(test_home): - table: things resource: things row_source: - - items - - env; kv + - items as item + - env as kv_pair; kv columns: - name: service - path: ^service + path: item.service - name: key - path: key + path: kv_pair.key - name: value - path: value + path: kv_pair.value """) assert_query( "SELECT * FROM things ORDER BY service, key", @@ -128,8 +120,8 @@ def test_kv_with_parent_nav(test_home): ) -def test_double_parent_nav(test_home): - """^^ navigates two levels up through a three-step row_source chain.""" +def test_three_level_named_scopes(test_home): + """Three-step row_source with named scopes; verifies ancestor scopes are reachable.""" kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" resources: - name: things @@ -148,16 +140,16 @@ def test_double_parent_nav(test_home): - table: things resource: things row_source: - - items - - groups - - tags + - items as section_item + - groups as group + - tags as tag_item columns: - name: section - path: ^^section + path: section_item.section - name: grp - path: ^grp + path: group.grp - name: tag - path: tag + path: tag_item.tag """) assert_query( "SELECT * FROM things ORDER BY section, grp, tag", @@ -170,6 +162,50 @@ def test_double_parent_nav(test_home): ) +def test_missing_scope_name(test_home): + """Multi-step row_source without 'as ' raises a ConfigError.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - children: [{val: a}] + create: + - table: things + resource: things + row_source: + - items as item + - children + columns: + - name: val + path: item.val + """) + with pytest.raises(KuglError, match="must all have 'as '"): + assert_query("SELECT * FROM things", "") + + +def test_unscoped_column_in_multi_step(test_home): + """Multi-step row_source with a bare (un-scoped) column path raises a ConfigError.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - children: [{val: a}] + create: + - table: things + resource: things + row_source: + - items as item + - children as child + columns: + - name: val + path: val + """) + with pytest.raises(KuglError, match="must begin with a scope name"): + assert_query("SELECT * FROM things", "") + + def test_data_dict_expansion(test_home): """Verify the behavior of the '; kv' option in row_source""" kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" From 9402acfa18a95dfbb875ccf7fb79ba91a512c70d Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 10:12:15 -0400 Subject: [PATCH 26/35] Move scope parsing into FieldRef.parse_scoped Centralizes the scope-prefix detection logic that was inline in UserColumn.rebuild_for_scope, making FieldRef the single place responsible for splitting a scoped expression into name + target. Co-Authored-By: Claude Sonnet 4.6 --- kugl/impl/config.py | 31 +++++++++++++------------------ kugl/impl/extract.py | 17 +++++++++++++++++ 2 files changed, 30 insertions(+), 18 deletions(-) diff --git a/kugl/impl/config.py b/kugl/impl/config.py index a449769..aad9099 100644 --- a/kugl/impl/config.py +++ b/kugl/impl/config.py @@ -3,14 +3,13 @@ """ from os.path import expandvars, expanduser -import re from typing import Optional, Tuple, Callable, Union import jmespath from pydantic import BaseModel, ConfigDict, ValidationError from pydantic.functional_validators import model_validator -from .extract import ColumnType, KUGL_TYPE_TO_SQL_TYPE, LabelExtractor, PathExtractor +from .extract import ColumnType, KUGL_TYPE_TO_SQL_TYPE, FieldRef, LabelExtractor, PathExtractor from kugl.util import ( Age, ConfigPath, @@ -164,19 +163,16 @@ def rebuild_for_scope(self, scope_names: set, table_name: str): Called at TableFromConfig build time when scope names are known. """ - _SCOPE_PREFIX = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\.(.+)$") if self.path: - m = _SCOPE_PREFIX.match(self.path) - if m and m.group(1) in scope_names: - scope_name, target = m.group(1), m.group(2) - else: + ref = FieldRef.parse_scoped(self.path, scope_names) + if ref.scope_name is None: fail( f"Table '{table_name}', column '{self.name}': " f"path '{self.path}' must begin with a scope name " f"(one of: {sorted(scope_names)})" ) try: - self._extractor = PathExtractor(self.name, self.type, target, scope_name=scope_name) + self._extractor = PathExtractor(self.name, self.type, ref.target, scope_name=ref.scope_name) except ValueError as e: fail(str(e)) elif self.label: @@ -184,21 +180,20 @@ def rebuild_for_scope(self, scope_names: set, table_name: str): scope_name = None stripped_labels = [] for label in labels: - m = _SCOPE_PREFIX.match(label) - if m and m.group(1) in scope_names: - if scope_name and scope_name != m.group(1): - fail( - f"Table '{table_name}', column '{self.name}': " - f"all labels must use the same scope name" - ) - scope_name = m.group(1) - stripped_labels.append(m.group(2)) - else: + ref = FieldRef.parse_scoped(label, scope_names) + if ref.scope_name is None: fail( f"Table '{table_name}', column '{self.name}': " f"label '{label}' must begin with a scope name " f"(one of: {sorted(scope_names)})" ) + if scope_name and scope_name != ref.scope_name: + fail( + f"Table '{table_name}', column '{self.name}': " + f"all labels must use the same scope name" + ) + scope_name = ref.scope_name + stripped_labels.append(ref.target) self._extractor = LabelExtractor(self.name, self.type, stripped_labels, scope_name=scope_name) diff --git a/kugl/impl/extract.py b/kugl/impl/extract.py index 757377e..c3f258b 100644 --- a/kugl/impl/extract.py +++ b/kugl/impl/extract.py @@ -36,6 +36,9 @@ } +_SCOPE_PREFIX = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\.(.+)$") + + @dataclass class FieldRef: """Parsed form of a potentially-scoped JMESPath expression or label.""" @@ -50,6 +53,20 @@ def parse(cls, s: str) -> "FieldRef": fail("^ parent navigation is no longer supported; use named row_source scopes instead") return cls(None, s) + @classmethod + def parse_scoped(cls, s: str, scope_names: set) -> "FieldRef": + """Parse a path/label string, detecting a scope prefix if it matches a declared scope name. + + Returns FieldRef with scope_name=None if the leading word is not a declared scope, + leaving the full string as the target. + """ + if "^" in s: + fail("^ parent navigation is no longer supported; use named row_source scopes instead") + m = _SCOPE_PREFIX.match(s) + if m and m.group(1) in scope_names: + return cls(m.group(1), m.group(2)) + return cls(None, s) + class Extractor: """Base class for JSON field -> column value extractor. This is a Callable with common From 08cf705772d41d67dfa283297be1d1dd16e0f7fd Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 10:14:57 -0400 Subject: [PATCH 27/35] Remove unused method --- kugl/impl/extract.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/kugl/impl/extract.py b/kugl/impl/extract.py index c3f258b..e9adbd0 100644 --- a/kugl/impl/extract.py +++ b/kugl/impl/extract.py @@ -46,13 +46,6 @@ class FieldRef: scope_name: Optional[str] target: str - @classmethod - def parse(cls, s: str) -> "FieldRef": - """Parse a path/label string, raising if ^ syntax is used.""" - if "^" in s: - fail("^ parent navigation is no longer supported; use named row_source scopes instead") - return cls(None, s) - @classmethod def parse_scoped(cls, s: str, scope_names: set) -> "FieldRef": """Parse a path/label string, detecting a scope prefix if it matches a declared scope name. From 2df46624d07a89b38f13ce72a925d1e1abb79af9 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 10:19:38 -0400 Subject: [PATCH 28/35] Clarify field name: Itemizer.scope_name --- kugl/impl/tables.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/kugl/impl/tables.py b/kugl/impl/tables.py index 9577fb9..0f9c56d 100644 --- a/kugl/impl/tables.py +++ b/kugl/impl/tables.py @@ -131,8 +131,8 @@ def __init__( ) self.row_source = [Itemizer.parse(x, name) for x in (creator.row_source or ["items"])] if len(self.row_source) > 1: - scope_names = {s.name for s in self.row_source if s.name is not None} - unnamed = [s.expr for s in self.row_source if s.name is None] + scope_names = {s.scope_name for s in self.row_source if s.scope_name is not None} + unnamed = [s.expr for s in self.row_source if s.scope_name is None] if unnamed: fail( f"Table '{name}': multi-step row_source entries must all have 'as '; " @@ -176,8 +176,8 @@ def _itemize(self, context: "RowContext") -> list[dict]: # Fix #132 -- don't do this at pass 0, or it sets the parent to the entire # response object. context.set_parent(child, item) - if source.name is not None: - context.set_scope(child, source.name, item if index > 0 else None) + if source.scope_name is not None: + context.set_scope(child, source.scope_name, item if index > 0 else None) new_items.append(child) if debug: debug("add " + abbreviate(child)) @@ -185,8 +185,8 @@ def _itemize(self, context: "RowContext") -> list[dict]: if index > 0: # See comment above. context.set_parent(found, item) - if source.name is not None: - context.set_scope(found, source.name, item if index > 0 else None) + if source.scope_name is not None: + context.set_scope(found, source.scope_name, item if index > 0 else None) new_items.append(found) if debug: debug("add " + abbreviate(found)) @@ -241,7 +241,7 @@ class Itemizer: # Should dictionaries be unpacked to a key/value array unpack: bool # Optional scope name from 'as ' suffix - name: Optional[str] = None + scope_name: Optional[str] = None @classmethod def parse(cls, s: str, table_name: str): @@ -269,6 +269,6 @@ def parse(cls, s: str, table_name: str): fail(f"Invalid scope name '{name}' in row_source: {s}") try: - return Itemizer(expr=expr_part, finder=jmespath.compile(expr_part), unpack=unpack, name=name) + return Itemizer(expr=expr_part, finder=jmespath.compile(expr_part), unpack=unpack, scope_name=name) except jmespath.exceptions.ParseError as e: fail(f"invalid row_source {expr_part} for table {table_name}", e) From 48da528c8c1b2c238e9cf92ac9cc3bdfb9219eed Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 10:53:27 -0400 Subject: [PATCH 29/35] Child scope optimization --- kugl/impl/tables.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/kugl/impl/tables.py b/kugl/impl/tables.py index 0f9c56d..af84c7e 100644 --- a/kugl/impl/tables.py +++ b/kugl/impl/tables.py @@ -171,13 +171,15 @@ def _itemize(self, context: "RowContext") -> list[dict]: if isinstance(found, dict) and source.unpack: found = [{"key": k, "value": v} for k, v in found.items()] if isinstance(found, list): + # Compute base scopes once for all children from this item. + base_scopes = (context._scopes.get(id(item), {}) if index > 0 else {}) if source.scope_name else None for child in found: if index > 0: # Fix #132 -- don't do this at pass 0, or it sets the parent to the entire # response object. context.set_parent(child, item) if source.scope_name is not None: - context.set_scope(child, source.scope_name, item if index > 0 else None) + context.set_scope_with_base(child, source.scope_name, base_scopes) new_items.append(child) if debug: debug("add " + abbreviate(child)) @@ -220,10 +222,12 @@ def get_parent(self, child, depth: int = 1): def set_scope(self, child, name: str, parent=None): """Register child as the named scope, inheriting ancestor scopes from parent.""" - parent_scopes = self._scopes.get(id(parent), {}) if parent is not None else {} - child_scopes = dict(parent_scopes) - child_scopes[name] = child - self._scopes[id(child)] = child_scopes + base = self._scopes.get(id(parent), {}) if parent is not None else {} + self.set_scope_with_base(child, name, base) + + def set_scope_with_base(self, child, name: str, base: dict): + """Like set_scope but accepts a pre-computed base scope dict.""" + self._scopes[id(child)] = {**base, name: child} def get_scope(self, obj, name: str): """Look up a named scope for obj, returning None if not found.""" From 2bd7b42a495a4b3c71045bc1cbbae37f6163908e Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 11:28:09 -0400 Subject: [PATCH 30/35] Switch scope reference syntax from prefix to suffix: 'expr in scope' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirrors the 'as ' suffix in row_source declarations — 'as' binds, 'in' references. Eliminates ambiguity when scope prefixes and label DNS domains collide (e.g. 'pod.karpenter.sh/nodepool'). Co-Authored-By: Claude Sonnet 4.6 --- .claude/plans/row-source.md | 83 ++++++++++++++------------- kugl/builtins/schemas/kubernetes.yaml | 6 +- kugl/impl/config.py | 10 +++- kugl/impl/extract.py | 12 ++-- tests/k8s/test_jobs.py | 4 +- tests/resource/test_folder.py | 6 +- tests/resource/test_row_source.py | 20 +++---- 7 files changed, 74 insertions(+), 67 deletions(-) diff --git a/.claude/plans/row-source.md b/.claude/plans/row-source.md index 16eae53..09178ec 100644 --- a/.claude/plans/row-source.md +++ b/.claude/plans/row-source.md @@ -4,19 +4,24 @@ Two related improvements to the YAML extension mechanism. They can be implemente sequentially on one branch or separately; Phase 1 is a prerequisite for Phase 2's scope-aware path resolution. +Scope references use a consistent `in ` suffix, mirroring the `as ` suffix +in `row_source` declarations: `as` binds a name, `in` references it. + --- ## Phase 1: Named Scopes in `row_source` +**Status: implemented with `.` prefix syntax — needs revision to `in ` suffix.** + ### Goal Replace the `^` parent-hop syntax with named scope references. **Single row_source** (the common case — default `["items"]` or one explicit entry): -path expressions resolve against the one implicit object; no scope prefix required. +path expressions resolve against the one implicit object; no scope qualifier required. **Multiple row_source entries**: every entry must carry `as `, and every path / -label expression must begin with an explicit scope name. There is no implicit +label expression must end with an explicit `in ` qualifier. There is no implicit "current object" when more than one level exists. ```yaml @@ -28,9 +33,9 @@ create: - spec.taints as taint columns: - name: node_uid - path: node.metadata.uid + path: metadata.uid in node - name: taint_key - path: taint.key + path: key in taint ``` ### Changes @@ -45,28 +50,26 @@ create: - Add `_scopes: dict[int, dict[str, object]]`. Key is `id(child)`; value is the map of scope names visible at that child's level. -- Update `set_parent` to also record named scopes: when a child is created from a - level that had a name, include that name → parent-object in the child's scope map, - merging with any scopes already inherited. -- Add `get_scope(child, name) -> Optional[object]` that walks up the scope chain - to find the named object. +- `set_scope(child, name, parent)` records the child's scope map, inheriting all + ancestor scopes from parent and adding `name → child`. +- Add `get_scope(obj, name) -> Optional[object]` that looks up the named object. **`kugl/impl/tables.py` — `TableFromConfig._itemize`** -- After calling `context.set_parent(child, item)`, also call a new +- After calling `context.set_parent(child, item)`, also call `context.set_scope(child, source.name, item)` when `source.name` is not None, carrying forward all ancestor scopes so deeper levels can still reference `node`. **`kugl/impl/extract.py` — `FieldRef` / `PathExtractor` / `LabelExtractor`** -- `FieldRef.parse`: remove `^` handling; detect a leading `.` prefix as a +- `FieldRef.parse`: remove `^` handling; detect a trailing ` in ` suffix as a scope name. Store as `scope_name: Optional[str]` and strip it from the target before JMESPath compilation. - In `PathExtractor.extract` and `LabelExtractor.extract`, when `self._ref.scope_name` is set, resolve the object via `context.get_scope(obj, scope_name)`. - Validation at table-build time (`TableFromConfig.__init__`): if `len(row_source) > 1`, - every `row_source` entry must have a name and every column path/label must carry a - scope prefix; raise a clear `ConfigError` if either constraint is violated. + every `row_source` entry must have a name and every column path/label must carry an + `in ` qualifier; raise a clear `ConfigError` if either constraint is violated. ### Builtin Update @@ -79,9 +82,9 @@ as a self-contained example: - spec.taints as taint columns: - name: node_uid - path: node.metadata.uid + path: metadata.uid in node - name: taint_key - path: taint.key + path: key in taint ``` ### Tests @@ -101,9 +104,10 @@ as a self-contained example: ### Goal Replace the two-key `path:` / `label:` vocabulary with a single `from:` key that -auto-detects extraction type. Named scope prefixes compose naturally. +auto-detects extraction type. Named scope qualifiers compose naturally via the same +`in ` suffix. -Single row_source (bare paths, no scope prefix needed): +Single row_source (no scope qualifier needed): ```yaml columns: @@ -121,24 +125,27 @@ Multi-step row_source (all entries named, all columns scoped): - spec.containers as container columns: - name: pod_name - from: pod.metadata.name # named scope + JMESPath + from: metadata.name in pod # JMESPath on pod scope - name: pod_pool - from: pod.karpenter.sh/nodepool # named scope + label + from: karpenter.sh/nodepool in pod # label on pod scope — unambiguous - name: container_name - from: container.name # named scope + JMESPath + from: name in container # JMESPath on container scope ``` ### Auto-Detection Rule -After stripping any `.` prefix: +Strip any trailing ` in ` suffix first, then apply to the remainder: - Matches `[a-zA-Z0-9.-]+/[a-zA-Z0-9._/-]+` (K8s label format: DNS domain + `/` + key) → `LabelExtractor` - Otherwise → `PathExtractor` A value like `metadata.labels.foo/bar` is a JMESPath, not a label — the `/` appears -inside a path segment, not as the label-domain separator. The regex above handles -this correctly because `metadata.labels.foo` is not a valid DNS domain segment. +inside a path segment, not as the label-domain separator. The regex handles this +correctly because `metadata.labels.foo` is not a valid DNS domain segment. + +Parsing ` in ` is safe because neither JMESPath expressions nor label keys +contain spaces, so the delimiter is unambiguous. ### Changes @@ -148,34 +155,30 @@ this correctly because `metadata.labels.foo` is not a valid DNS domain segment. because `from` is a Python keyword). - In `gen_extractor`, handle `from_` alongside `path` and `label`. - If `from_` is set alongside `path` or `label`, raise `ValueError`. - - Parse any scope prefix from `from_`. + - Strip any ` in ` suffix from `from_` to extract the scope name. - Apply the label-vs-path regex to the remainder. - Construct the appropriate extractor, passing the scope name through. - Keep `path:` and `label:` fully supported so existing configs are not broken. **`kugl/impl/extract.py` — `FieldRef`** -- Move the scope-prefix parsing here; `gen_extractor` - delegates to `FieldRef.parse_from(s, known_scopes=None)`. -- Known scopes are not available at Pydantic parse time (they live in `CreateTable` - which is a sibling, not a parent). Two options: - - **Lazy validation**: accept any `.` prefix as a potential scope; fail at - table-build time in `TableFromConfig.__init__` if a referenced scope name is not - declared in `row_source`. - - **Two-pass**: `CreateTable` validates column scope references after parsing. - Lazy validation is simpler and consistent with how `path:` expressions are - currently validated (JMESPath compilation errors surface at parse time, but - missing-path errors surface at query time). +- Centralise the ` in ` parsing in `FieldRef.parse_scoped(s)`; both + `gen_extractor` (for `from:`) and `FieldRef.parse` (for `path:`/`label:`) delegate + to it. +- Known scopes are not available at Pydantic parse time. Use lazy validation: accept + any ` in ` suffix as a potential scope; fail at table-build time in + `TableFromConfig.__init__` if the referenced scope name is not declared in + `row_source`. ### Tests - `from: karpenter.sh/nodepool` produces the same result as `label: karpenter.sh/nodepool`. - `from: spec.providerID` produces the same result as `path: spec.providerID`. -- `from: node.metadata.name` with a named `node` scope resolves correctly. -- `from: node.karpenter.sh/nodepool` with a named `node` scope resolves as a label - on the node object. +- `from: metadata.name in pod` with a named `pod` scope resolves correctly. +- `from: karpenter.sh/nodepool in pod` with a named `pod` scope resolves as a label + on the pod object. - Error: `from:` and `path:` both specified → validation error. -- Error: `from: unknownscope.foo` where `unknownscope` is not in `row_source` → clear +- Error: `from: foo in unknownscope` where `unknownscope` is not in `row_source` → clear error message at table-build time. --- @@ -184,7 +187,7 @@ this correctly because `metadata.labels.foo` is not a valid DNS domain segment. | File | Change | |---|---| -| `kugl/impl/extract.py` | `FieldRef.parse`: detect scope prefix; extractors: resolve via scope | +| `kugl/impl/extract.py` | `FieldRef.parse`: detect ` in ` suffix; extractors: resolve via scope | | `kugl/impl/tables.py` | `Itemizer`: parse `as `; `RowContext`: track named scopes | | `kugl/impl/config.py` | `UserColumn`: add `from_` field and dispatch in `gen_extractor` | | `kugl/builtins/schemas/kubernetes.yaml` | Convert `node_taints` to named scope syntax | diff --git a/kugl/builtins/schemas/kubernetes.yaml b/kugl/builtins/schemas/kubernetes.yaml index e7d4df4..6e19125 100644 --- a/kugl/builtins/schemas/kubernetes.yaml +++ b/kugl/builtins/schemas/kubernetes.yaml @@ -29,11 +29,11 @@ create: - spec.taints as taint columns: - name: node_uid - path: node.metadata.uid + path: metadata.uid in node comment: node UID, from metadata.uid - name: key - path: taint.key + path: key in taint comment: taint key - name: effect - path: taint.effect + path: effect in taint comment: taint effect diff --git a/kugl/impl/config.py b/kugl/impl/config.py index aad9099..e1cc8c7 100644 --- a/kugl/impl/config.py +++ b/kugl/impl/config.py @@ -2,6 +2,7 @@ Pydantic models for configuration files. """ +import re from os.path import expandvars, expanduser from typing import Optional, Tuple, Callable, Union @@ -146,7 +147,10 @@ def gen_extractor(cls, column: "UserColumn") -> "UserColumn": if column.path and column.label: raise ValueError("cannot specify both path and label") elif column.path: - column._extractor = PathExtractor(column.name, column.type, column.path) + # Strip any 'in ' suffix before JMESPath compilation; scope resolution + # is deferred to rebuild_for_scope when scope names are known. + path_target = re.sub(r"\s+in\s+[a-zA-Z_][a-zA-Z0-9_]*$", "", column.path) + column._extractor = PathExtractor(column.name, column.type, path_target) elif column.label: if not isinstance(column.label, list): column.label = [column.label] @@ -168,7 +172,7 @@ def rebuild_for_scope(self, scope_names: set, table_name: str): if ref.scope_name is None: fail( f"Table '{table_name}', column '{self.name}': " - f"path '{self.path}' must begin with a scope name " + f"path '{self.path}' must end with 'in ' " f"(one of: {sorted(scope_names)})" ) try: @@ -184,7 +188,7 @@ def rebuild_for_scope(self, scope_names: set, table_name: str): if ref.scope_name is None: fail( f"Table '{table_name}', column '{self.name}': " - f"label '{label}' must begin with a scope name " + f"label '{label}' must end with 'in ' " f"(one of: {sorted(scope_names)})" ) if scope_name and scope_name != ref.scope_name: diff --git a/kugl/impl/extract.py b/kugl/impl/extract.py index e9adbd0..d067b09 100644 --- a/kugl/impl/extract.py +++ b/kugl/impl/extract.py @@ -36,7 +36,7 @@ } -_SCOPE_PREFIX = re.compile(r"^([a-zA-Z_][a-zA-Z0-9_]*)\.(.+)$") +_SCOPE_SUFFIX = re.compile(r"^(.+)\s+in\s+([a-zA-Z_][a-zA-Z0-9_]*)$") @dataclass @@ -48,16 +48,16 @@ class FieldRef: @classmethod def parse_scoped(cls, s: str, scope_names: set) -> "FieldRef": - """Parse a path/label string, detecting a scope prefix if it matches a declared scope name. + """Parse a path/label string, detecting a trailing 'in ' scope qualifier. - Returns FieldRef with scope_name=None if the leading word is not a declared scope, + Returns FieldRef with scope_name=None if no matching qualifier is found, leaving the full string as the target. """ if "^" in s: fail("^ parent navigation is no longer supported; use named row_source scopes instead") - m = _SCOPE_PREFIX.match(s) - if m and m.group(1) in scope_names: - return cls(m.group(1), m.group(2)) + m = _SCOPE_SUFFIX.match(s) + if m and m.group(2) in scope_names: + return cls(m.group(2), m.group(1)) return cls(None, s) diff --git a/tests/k8s/test_jobs.py b/tests/k8s/test_jobs.py index 584640f..001f5e9 100644 --- a/tests/k8s/test_jobs.py +++ b/tests/k8s/test_jobs.py @@ -81,9 +81,9 @@ def test_label_parents(test_home): - spec.template as template columns: - name: job_username - label: job.user + label: user in job - name: pod_username - label: template.user + label: user in template """) kubectl_response( "jobs", diff --git a/tests/resource/test_folder.py b/tests/resource/test_folder.py index e9687ea..80a65d7 100644 --- a/tests/resource/test_folder.py +++ b/tests/resource/test_folder.py @@ -76,9 +76,9 @@ def test_folder_content(hr, tmp_path, capsys): # Update the row_source of the people table to match the folder data layout. config["create"][0]["row_source"] = ["[] as file", "content as person"] config["create"][0]["columns"] = [ - dict(name="name", path="person.name"), - dict(name="age", path="person.age", type="integer"), - dict(name="region", path="file.match.region"), + dict(name="name", path="name in person"), + dict(name="age", path="age in person", type="integer"), + dict(name="region", path="match.region in file"), ] hr.save(config) with features_debugged("folder"): diff --git a/tests/resource/test_row_source.py b/tests/resource/test_row_source.py index 0cff5ea..77ef0ff 100644 --- a/tests/resource/test_row_source.py +++ b/tests/resource/test_row_source.py @@ -42,9 +42,9 @@ def test_caret_rejected(test_home): - children as child columns: - name: parent_id - path: item.parent + path: parent in item - name: val - path: child.val + path: val in child """ @pytest.mark.parametrize("items,expected", [ @@ -103,11 +103,11 @@ def test_kv_with_parent_nav(test_home): - env as kv_pair; kv columns: - name: service - path: item.service + path: service in item - name: key - path: kv_pair.key + path: key in kv_pair - name: value - path: kv_pair.value + path: value in kv_pair """) assert_query( "SELECT * FROM things ORDER BY service, key", @@ -145,11 +145,11 @@ def test_three_level_named_scopes(test_home): - tags as tag_item columns: - name: section - path: section_item.section + path: section in section_item - name: grp - path: group.grp + path: grp in group - name: tag - path: tag_item.tag + path: tag in tag_item """) assert_query( "SELECT * FROM things ORDER BY section, grp, tag", @@ -178,7 +178,7 @@ def test_missing_scope_name(test_home): - children columns: - name: val - path: item.val + path: val in item """) with pytest.raises(KuglError, match="must all have 'as '"): assert_query("SELECT * FROM things", "") @@ -202,7 +202,7 @@ def test_unscoped_column_in_multi_step(test_home): - name: val path: val """) - with pytest.raises(KuglError, match="must begin with a scope name"): + with pytest.raises(KuglError, match="must end with 'in '"): assert_query("SELECT * FROM things", "") From d3b9fc0c0f58ff08fced06d5da5c573bf7d1a6bf Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 11:44:28 -0400 Subject: [PATCH 31/35] Add 'from:' column key with auto-detection of label vs path 'from: domain/key' resolves as a label extractor; 'from: jmespath.expr' resolves as a path extractor. Both support the 'in ' suffix for multi-step row_source tables. path: and label: remain fully supported. Co-Authored-By: Claude Sonnet 4.6 --- kugl/builtins/schemas/kubernetes.yaml | 6 +- kugl/impl/config.py | 44 ++++++-- kugl/impl/extract.py | 12 ++- tests/config/test_config.py | 2 +- tests/resource/test_row_source.py | 148 ++++++++++++++++++++++++++ 5 files changed, 199 insertions(+), 13 deletions(-) diff --git a/kugl/builtins/schemas/kubernetes.yaml b/kugl/builtins/schemas/kubernetes.yaml index 6e19125..b01f93f 100644 --- a/kugl/builtins/schemas/kubernetes.yaml +++ b/kugl/builtins/schemas/kubernetes.yaml @@ -29,11 +29,11 @@ create: - spec.taints as taint columns: - name: node_uid - path: metadata.uid in node + from: metadata.uid in node comment: node UID, from metadata.uid - name: key - path: key in taint + from: key in taint comment: taint key - name: effect - path: effect in taint + from: effect in taint comment: taint effect diff --git a/kugl/impl/config.py b/kugl/impl/config.py index e1cc8c7..0332f0a 100644 --- a/kugl/impl/config.py +++ b/kugl/impl/config.py @@ -7,10 +7,10 @@ from typing import Optional, Tuple, Callable, Union import jmespath -from pydantic import BaseModel, ConfigDict, ValidationError +from pydantic import BaseModel, ConfigDict, Field, ValidationError from pydantic.functional_validators import model_validator -from .extract import ColumnType, KUGL_TYPE_TO_SQL_TYPE, FieldRef, LabelExtractor, PathExtractor +from .extract import ColumnType, KUGL_TYPE_TO_SQL_TYPE, FieldRef, LabelExtractor, PathExtractor, is_label from kugl.util import ( Age, ConfigPath, @@ -125,9 +125,10 @@ def recognize_type(cls, column: "Column") -> "Column": class UserColumn(Column): """Holds one entry from a columns: list in a user config file.""" - model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) + model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True, populate_by_name=True) path: Optional[str] = None label: Optional[Union[str, list[str]]] = None + from_: Optional[str] = Field(None, alias="from") # Parsed value of self.path _finder: jmespath.parser.Parser # Number of ^ in self.path @@ -144,19 +145,33 @@ def gen_extractor(cls, column: "UserColumn") -> "UserColumn": Generate the Extractor instance for a column definition; given an object, it will return a column value of the appropriate type. """ - if column.path and column.label: + has_path = column.path is not None + has_label = column.label is not None + has_from = column.from_ is not None + + if has_from and (has_path or has_label): + raise ValueError("cannot specify 'from' alongside 'path' or 'label'") + if has_path and has_label: raise ValueError("cannot specify both path and label") - elif column.path: + + if has_from: + # Strip any 'in ' suffix; scope validation is deferred to rebuild_for_scope. + target = re.sub(r"\s+in\s+[a-zA-Z_][a-zA-Z0-9_]*$", "", column.from_) + if is_label(target): + column._extractor = LabelExtractor(column.name, column.type, [target]) + else: + column._extractor = PathExtractor(column.name, column.type, target) + elif has_path: # Strip any 'in ' suffix before JMESPath compilation; scope resolution # is deferred to rebuild_for_scope when scope names are known. path_target = re.sub(r"\s+in\s+[a-zA-Z_][a-zA-Z0-9_]*$", "", column.path) column._extractor = PathExtractor(column.name, column.type, path_target) - elif column.label: + elif has_label: if not isinstance(column.label, list): column.label = [column.label] column._extractor = LabelExtractor(column.name, column.type, column.label) else: - raise ValueError("must specify either path or label") + raise ValueError("must specify path, label, or from") return column def extract(self, obj: object, context) -> object: @@ -199,6 +214,21 @@ def rebuild_for_scope(self, scope_names: set, table_name: str): scope_name = ref.scope_name stripped_labels.append(ref.target) self._extractor = LabelExtractor(self.name, self.type, stripped_labels, scope_name=scope_name) + elif self.from_: + ref = FieldRef.parse_scoped(self.from_, scope_names) + if ref.scope_name is None: + fail( + f"Table '{table_name}', column '{self.name}': " + f"'from' value '{self.from_}' must end with 'in ' " + f"(one of: {sorted(scope_names)})" + ) + if is_label(ref.target): + self._extractor = LabelExtractor(self.name, self.type, [ref.target], scope_name=ref.scope_name) + else: + try: + self._extractor = PathExtractor(self.name, self.type, ref.target, scope_name=ref.scope_name) + except ValueError as e: + fail(str(e)) class ExtendTable(BaseModel): diff --git a/kugl/impl/extract.py b/kugl/impl/extract.py index d067b09..37f04bf 100644 --- a/kugl/impl/extract.py +++ b/kugl/impl/extract.py @@ -37,6 +37,12 @@ _SCOPE_SUFFIX = re.compile(r"^(.+)\s+in\s+([a-zA-Z_][a-zA-Z0-9_]*)$") +_LABEL_PATTERN = re.compile(r"^[a-zA-Z0-9.-]+/[a-zA-Z0-9._/-]+$") + + +def is_label(s: str) -> bool: + """Return True if s looks like a Kubernetes label key (domain/name format).""" + return bool(_LABEL_PATTERN.match(s)) @dataclass @@ -56,8 +62,10 @@ def parse_scoped(cls, s: str, scope_names: set) -> "FieldRef": if "^" in s: fail("^ parent navigation is no longer supported; use named row_source scopes instead") m = _SCOPE_SUFFIX.match(s) - if m and m.group(2) in scope_names: - return cls(m.group(2), m.group(1)) + if m: + if m.group(2) in scope_names: + return cls(m.group(2), m.group(1)) + fail(f"Unknown scope '{m.group(2)}'; valid scopes are: {sorted(scope_names)}") return cls(None, s) diff --git a/tests/config/test_config.py b/tests/config/test_config.py index dbb3f4e..5acc3aa 100644 --- a/tests/config/test_config.py +++ b/tests/config/test_config.py @@ -201,4 +201,4 @@ def test_must_have_path_or_label(): """), return_errors=True, ) - assert errors == ["columns.0: Value error, must specify either path or label"] + assert errors == ["columns.0: Value error, must specify path, label, or from"] diff --git a/tests/resource/test_row_source.py b/tests/resource/test_row_source.py index 77ef0ff..749ace6 100644 --- a/tests/resource/test_row_source.py +++ b/tests/resource/test_row_source.py @@ -206,6 +206,154 @@ def test_unscoped_column_in_multi_step(test_home): assert_query("SELECT * FROM things", "") +def test_from_detects_label(test_home): + """`from: domain/key` auto-detects as a label extractor.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + labels: + test.io/group: team-a + create: + - table: things + resource: things + columns: + - name: grp + from: test.io/group + """) + assert_query("SELECT * FROM things", """ + grp + team-a + """) + + +def test_from_detects_path(test_home): + """`from: jmespath.expr` auto-detects as a path extractor.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + name: my-thing + create: + - table: things + resource: things + columns: + - name: thing_name + from: metadata.name + """) + assert_query("SELECT * FROM things", """ + thing_name + my-thing + """) + + +def test_from_scoped_path(test_home): + """`from: expr in scope` resolves a JMESPath on the named scope.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + name: pod-a + spec: + containers: + - name: c1 + - name: c2 + create: + - table: things + resource: things + row_source: + - items as pod + - spec.containers as container + columns: + - name: pod_name + from: metadata.name in pod + - name: container_name + from: name in container + """) + assert_query("SELECT * FROM things ORDER BY container_name", """ + pod_name container_name + pod-a c1 + pod-a c2 + """) + + +def test_from_scoped_label(test_home): + """`from: domain/key in scope` resolves as a label on the named scope object.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + labels: + test.io/group: team-b + children: + - val: x + create: + - table: things + resource: things + row_source: + - items as item + - children as child + columns: + - name: grp + from: test.io/group in item + - name: val + from: val in child + """) + assert_query("SELECT * FROM things", """ + grp val + team-b x + """) + + +def test_from_conflicts_with_path(test_home): + """Specifying both `from` and `path` raises a validation error.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: [{val: a}] + create: + - table: things + resource: things + columns: + - name: val + from: val + path: val + """) + with pytest.raises(KuglError, match="cannot specify .from. alongside"): + assert_query("SELECT * FROM things", "") + + +def test_from_unknown_scope(test_home): + """`from: expr in unknownscope` raises a clear error at table-build time.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - children: [{val: a}] + create: + - table: things + resource: things + row_source: + - items as item + - children as child + columns: + - name: val + from: val in ghost + """) + with pytest.raises(KuglError, match="Unknown scope 'ghost'"): + assert_query("SELECT * FROM things", "") + + def test_data_dict_expansion(test_home): """Verify the behavior of the '; kv' option in row_source""" kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" From 8eadc6c63d3577b3cc48f6ef64030322b03987f4 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 12:04:22 -0400 Subject: [PATCH 32/35] Update docs, examples, and CHANGELOG for named scopes and from: key Replace ^ parent-hop syntax with as/in named scope syntax throughout docs and CLAUDE.md; add from: auto-detection description; bump CHANGELOG to 0.9.0. Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 9 ++++++++ CLAUDE.md | 17 +++++++------- docs/extending.rst | 57 +++++++++++++++++++++++++++------------------- docs/multi.rst | 14 ++++++------ docs/resources.rst | 12 ++++------ 5 files changed, 64 insertions(+), 45 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25ff210..0fee40c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,12 @@ +## 0.9.0 + +- Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and + columns reference ancestor objects with ``in `` suffix (e.g. ``metadata.uid in node``); + the old ``^`` parent-hop syntax is removed +- New ``from:`` column key that auto-detects label vs JMESPath: values matching + ``domain/key`` format (e.g. ``karpenter.sh/nodepool``) use label extraction, everything + else uses JMESPath + ## 0.8.0 New tables in ``kubernetes`` schema: diff --git a/CLAUDE.md b/CLAUDE.md index 75aa272..9f5949a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -140,13 +140,14 @@ extend: ## Column Extractors -Two extractor types, specified by keyword in a column definition: +Three extractor keys, specified in a column definition: **`path:`** — JMESPath expression into the current row item -**`label:`** — shortcut to `metadata.labels`; can be a list to try in order +**`label:`** — shortcut to `metadata.labels`; can be a list to try in order +**`from:`** — unified key that auto-detects label vs path: values matching `domain/key` (e.g. `karpenter.sh/nodepool`) use `LabelExtractor`; everything else uses `PathExtractor` -**Named scope navigation** — in multi-step `row_source`, each entry must carry `as `, and every column path/label must be prefixed with a scope name: -- `node.metadata.uid` extracts `metadata.uid` from the object named `node` at a higher level +**Named scope navigation** — in multi-step `row_source`, each entry must carry `as `, and every column expression must end with `in ` to identify which scope to resolve against: +- `metadata.uid in node` extracts `metadata.uid` from the object named `node` at a higher level - All named scopes from ancestor levels are available at each step ## Column Types @@ -175,14 +176,14 @@ row_source: - spec.taints as taint # step 2: each taint within each node, named "taint" columns: - name: node_uid - path: node.metadata.uid # scope prefix "node." resolves to the step-1 object + path: metadata.uid in node # "in node" suffix resolves to the step-1 object - name: taint_key - path: taint.key # scope prefix "taint." resolves to the step-2 object + path: key in taint # "in taint" suffix resolves to the step-2 object ``` - Each step applies to results of the prior step -- Multi-step tables require `as ` on every entry; all column paths/labels must carry a scope prefix -- Single-step tables use bare JMESPath paths with no scope prefix +- Multi-step tables require `as ` on every entry; all column paths/labels must end with `in ` +- Single-step tables use bare JMESPath paths with no scope qualifier - Dict sources can be unpacked to key/value pairs with `; kv` suffix: `- env; kv` - Default `row_source` is `["items"]` diff --git a/docs/extending.rst b/docs/extending.rst index d93c4a8..410f083 100644 --- a/docs/extending.rst +++ b/docs/extending.rst @@ -85,7 +85,19 @@ Column extractors and defaults You've seen how the ``path`` extractor works, using JMESPath to identify an element in the response JSON. You can also use the ``label`` extractor, which is a shortcut to ``metadata.labels``, and can either be -a single string or a list of labels to check in order +a single string or a list of labels to check in order. + +A third option, ``from:``, combines both: Kugl auto-detects whether a +value is a label (matches ``domain/key`` format like +``karpenter.sh/nodepool``) or a JMESPath expression (everything else). +So these two column definitions are equivalent: + +.. code:: yaml + + - name: node_pool + label: karpenter.sh/nodepool + - name: node_pool + from: karpenter.sh/nodepool There are some useful defaults as well: @@ -152,30 +164,29 @@ a pod can have multiple containers. Kugl handles this using the - table: node_taints resource: nodes row_source: - - items - - spec.taints + - items as node + - spec.taints as taint columns: - name: node_uid - path: ^metadata.uid + path: metadata.uid in node - name: key - path: key + path: key in taint - name: effect - path: effect + path: effect in taint -Each element in ``row_source`` is a JMESPath expression that selects -items relative to the prior selector. Only the last element in the list -is used to generate a row, but ``path``\ s can refer to any part of the -chain. Each ``"^"`` at the start of a ``path`` refers to the part of the -response one level higher than the bottom ``row_source`` element. In -this case +Each element in ``row_source`` is a JMESPath expression followed by an +``as `` label. Every step must be named when the table has more +than one ``row_source`` entry. Column ``path`` and ``label`` values +identify which level they address by ending with ``in ``: -- ``^metadata.uid`` means the ``.metadata.uid`` in each element of the - response ``items`` array -- ``key`` and ``effect`` refer to each taint in the ``spec.taints`` - array +- ``metadata.uid in node`` reads ``.metadata.uid`` from each element of + the ``items`` array (named ``node``) +- ``key in taint`` and ``effect in taint`` read fields from each taint + in the ``spec.taints`` array (named ``taint``) The default ``row_source`` is just ``items``, which is why the example -``workflows`` table shown earlier doesn't need to specify it. +``workflows`` table shown earlier doesn't need to specify it. Single-step +tables use bare JMESPath paths with no ``in `` qualifier. This syntax also applies to the ``label`` extractor. For example, if the ``row_source`` of a table needs to address Job metadata but also @@ -186,13 +197,13 @@ metadata from the Job pod template, you can write this: ... resource: jobs row_source: - - items - - spec.template + - items as job + - spec.template as template columns: - name: label_from_job - label: ^a-job-label + label: a-job-label in job - name: label_from_pod - label: a-pod-label + label: a-pod-label in template More about row_source ~~~~~~~~~~~~~~~~~~~~~ @@ -213,8 +224,8 @@ In detail, here's how ``row_source`` is handled. - Repeat with each successive ``row_source`` entry. This can produce surprising results if one step in the ``row_source`` -list tries to do too much. Let's say the ``node_taints`` table didn't -need a ``^metadata.uid`` reference, so only requires the taint lists. +list tries to do too much. Let's say the ``node_taints`` table only +needed the taint lists, with no reference to node metadata. This source list would not work, because ``.spec`` is not a child of ``.items``. diff --git a/docs/multi.rst b/docs/multi.rst index 07c6cdb..6be9c40 100644 --- a/docs/multi.rst +++ b/docs/multi.rst @@ -28,19 +28,19 @@ query. For example: if ``~/.kugl/ec2.yaml`` contains - table: instances resource: instances row_source: - - Reservations - - Instances + - Reservations as reservation + - Instances as instance columns: - name: type - path: InstanceType + path: InstanceType in instance - name: zone - path: Placement.AvailabilityZone + path: Placement.AvailabilityZone in instance - name: private_dns - path: PrivateDnsName + path: PrivateDnsName in instance - name: state - path: State.Name + path: State.Name in instance - name: launched - path: LaunchTime + path: LaunchTime in instance you can write diff --git a/docs/resources.rst b/docs/resources.rst index 8285842..9a67ddb 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -124,14 +124,12 @@ To build a table showing environment settings by region: - table: env_settings resource: by_region row_source: - # Address each element in the result list - - "[]" - # Focus on the environment settings - - content.env + - "[]" as file + - content.env as setting columns: - name: region - path: ^match.region + path: match.region in file - name: name - path: name + path: name in setting - name: value - path: value + path: value in setting From 56903ce95e4e5c21eeb5838603de69b616e62033 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 12:06:12 -0400 Subject: [PATCH 33/35] Doc tweaks --- CHANGELOG.md | 23 ++++++++++++----------- docs/breaking.rst | 3 +++ 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0fee40c..cac9486 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,3 @@ -## 0.9.0 - -- Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and - columns reference ancestor objects with ``in `` suffix (e.g. ``metadata.uid in node``); - the old ``^`` parent-hop syntax is removed -- New ``from:`` column key that auto-detects label vs JMESPath: values matching - ``domain/key`` format (e.g. ``karpenter.sh/nodepool``) use label extraction, everything - else uses JMESPath - ## 0.8.0 New tables in ``kubernetes`` schema: @@ -16,7 +7,7 @@ New tables in ``kubernetes`` schema: - ``services`` and ``service_labels`` - ``deployments`` and ``deployment_labels`` -CLI changes: +CLI changes (breaking): - Added ``-c``/``--context`` option to specify a Kubernetes context - Renamed ``-a`` option to ``-A`` for consistency with ``kubectl`` @@ -24,10 +15,20 @@ CLI changes: - Renamed ``-u``/``--update`` to ``-r``/``--refresh`` - Renamed ``-r``/``--reckless`` to ``-q``/``--quiet`` (and ``reckless:`` in settings to ``quiet:``) -Other: +Extending tables: + +- Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and + columns reference ancestor objects with ``in `` suffix (e.g. ``metadata.uid in node``); + the old ``^`` parent-hop syntax is removed +- New ``from:`` column key that auto-detects label vs JMESPath: values matching + ``domain/key`` format (e.g. ``karpenter.sh/nodepool``) use label extraction, everything + else uses JMESPath + +Documentation: - New masthead example of ``kugl`` vs ``kubectl | jq`` + ## 0.7.0 - Add `init` subcommand to generate `kubernetes.yaml` per recommended post-install configuration diff --git a/docs/breaking.rst b/docs/breaking.rst index 465b3e1..4a22c42 100644 --- a/docs/breaking.rst +++ b/docs/breaking.rst @@ -9,6 +9,9 @@ Please expect bugs and backward-incompatible changes. 0.8.0 ~~~~~ +The new `from:` syntax alternative to `path:` and `label:` is backwards compatible, but +the old syntax is deprecated and will be removed in a future release. + CLI changes: - Added ``-c``/``--context`` option to specify a Kubernetes context From 798580f1ddff74621feb21cd9be5ec1363dd2c91 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 12:18:13 -0400 Subject: [PATCH 34/35] Doc tweaks --- CHANGELOG.md | 4 ++-- docs/breaking.rst | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cac9486..576f4d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,12 +17,12 @@ CLI changes (breaking): Extending tables: -- Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and +- Breaking: Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and columns reference ancestor objects with ``in `` suffix (e.g. ``metadata.uid in node``); the old ``^`` parent-hop syntax is removed - New ``from:`` column key that auto-detects label vs JMESPath: values matching ``domain/key`` format (e.g. ``karpenter.sh/nodepool``) use label extraction, everything - else uses JMESPath + else uses JMESPath (``path:`` and ``label:`` to be removed in a future release) Documentation: diff --git a/docs/breaking.rst b/docs/breaking.rst index 4a22c42..c036468 100644 --- a/docs/breaking.rst +++ b/docs/breaking.rst @@ -9,9 +9,17 @@ Please expect bugs and backward-incompatible changes. 0.8.0 ~~~~~ +Breaking changes are significant, gearing up for a 1.0 release. + The new `from:` syntax alternative to `path:` and `label:` is backwards compatible, but the old syntax is deprecated and will be removed in a future release. +Extending tables: + +- Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and + columns reference ancestor objects with ``in `` suffix (e.g. ``metadata.uid in node``); + the old ``^`` parent-hop syntax is removed + CLI changes: - Added ``-c``/``--context`` option to specify a Kubernetes context From e52baf261558bede2f97cf4ea9635947acf6c826 Mon Sep 17 00:00:00 2001 From: Jonathan Ross Date: Sun, 17 May 2026 15:00:46 -0400 Subject: [PATCH 35/35] Add comments --- kugl/impl/tables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kugl/impl/tables.py b/kugl/impl/tables.py index af84c7e..fd597f5 100644 --- a/kugl/impl/tables.py +++ b/kugl/impl/tables.py @@ -220,11 +220,13 @@ def get_parent(self, child, depth: int = 1): depth -= 1 return child + # FIXME: rethink how this is done def set_scope(self, child, name: str, parent=None): """Register child as the named scope, inheriting ancestor scopes from parent.""" base = self._scopes.get(id(parent), {}) if parent is not None else {} self.set_scope_with_base(child, name, base) + # FIXME: rethink how this is done def set_scope_with_base(self, child, name: str, base: dict): """Like set_scope but accepts a pre-computed base scope dict.""" self._scopes[id(child)] = {**base, name: child}