diff --git a/.claude/plans/row-source.md b/.claude/plans/row-source.md new file mode 100644 index 0000000..09178ec --- /dev/null +++ b/.claude/plans/row-source.md @@ -0,0 +1,201 @@ +# Implementation Plan: Named Scopes + `from:` Unification + +Two related improvements to the YAML extension mechanism. They can be implemented +sequentially on one branch or separately; Phase 1 is a prerequisite for Phase 2's +scope-aware path resolution. + +Scope references use a consistent `in ` suffix, mirroring the `as ` suffix +in `row_source` declarations: `as` binds a name, `in` references it. + +--- + +## Phase 1: Named Scopes in `row_source` + +**Status: implemented with `.` prefix syntax — needs revision to `in ` suffix.** + +### Goal + +Replace the `^` parent-hop syntax with named scope references. + +**Single row_source** (the common case — default `["items"]` or one explicit entry): +path expressions resolve against the one implicit object; no scope qualifier required. + +**Multiple row_source entries**: every entry must carry `as `, and every path / +label expression must end with an explicit `in ` qualifier. There is no implicit +"current object" when more than one level exists. + +```yaml +create: + - table: node_taints + resource: nodes + row_source: + - items as node + - spec.taints as taint + columns: + - name: node_uid + path: metadata.uid in node + - name: taint_key + path: key in taint +``` + +### Changes + +**`kugl/impl/tables.py` — `Itemizer`** + +- Parse `as ` suffix from row_source entries. `"items as node"` yields + `Itemizer(expr="items", name="node", finder=..., unpack=False)`. +- Store `name: Optional[str]` on the dataclass. + +**`kugl/impl/tables.py` — `RowContext`** + +- Add `_scopes: dict[int, dict[str, object]]`. Key is `id(child)`; value is the + map of scope names visible at that child's level. +- `set_scope(child, name, parent)` records the child's scope map, inheriting all + ancestor scopes from parent and adding `name → child`. +- Add `get_scope(obj, name) -> Optional[object]` that looks up the named object. + +**`kugl/impl/tables.py` — `TableFromConfig._itemize`** + +- After calling `context.set_parent(child, item)`, also call + `context.set_scope(child, source.name, item)` when `source.name` is not None, + carrying forward all ancestor scopes so deeper levels can still reference `node`. + +**`kugl/impl/extract.py` — `FieldRef` / `PathExtractor` / `LabelExtractor`** + +- `FieldRef.parse`: remove `^` handling; detect a trailing ` in ` suffix as a + scope name. Store as `scope_name: Optional[str]` and strip it from the target + before JMESPath compilation. +- In `PathExtractor.extract` and `LabelExtractor.extract`, when `self._ref.scope_name` + is set, resolve the object via `context.get_scope(obj, scope_name)`. +- Validation at table-build time (`TableFromConfig.__init__`): if `len(row_source) > 1`, + every `row_source` entry must have a name and every column path/label must carry an + `in ` qualifier; raise a clear `ConfigError` if either constraint is violated. + +### Builtin Update + +`kugl/builtins/schemas/kubernetes.yaml` — convert `node_taints` to use named scopes +as a self-contained example: + +```yaml + row_source: + - items as node + - spec.taints as taint + columns: + - name: node_uid + path: metadata.uid in node + - name: taint_key + path: key in taint +``` + +### Tests + +- Update the existing `node_taints` test (wherever it lives) to verify the new + syntax produces the same output. +- Add a new test with three levels of nesting (e.g. `pod → container → env`) using + two named scopes, verifying that both ancestor levels are reachable by name. +- Add a test that `^` in a path raises a clear parse error. +- Add a test that a multi-step `row_source` with a missing `as` name raises a `ConfigError`. +- Add a test that a multi-step `row_source` with a bare (un-scoped) column path raises a `ConfigError`. + +--- + +## Phase 2: `from:` Key Unification + +### Goal + +Replace the two-key `path:` / `label:` vocabulary with a single `from:` key that +auto-detects extraction type. Named scope qualifiers compose naturally via the same +`in ` suffix. + +Single row_source (no scope qualifier needed): + +```yaml + columns: + - name: node_pool + from: karpenter.sh/nodepool # auto-detected: label + - name: provider_id + from: spec.providerID # auto-detected: JMESPath +``` + +Multi-step row_source (all entries named, all columns scoped): + +```yaml + row_source: + - items as pod + - spec.containers as container + columns: + - name: pod_name + from: metadata.name in pod # JMESPath on pod scope + - name: pod_pool + from: karpenter.sh/nodepool in pod # label on pod scope — unambiguous + - name: container_name + from: name in container # JMESPath on container scope +``` + +### Auto-Detection Rule + +Strip any trailing ` in ` suffix first, then apply to the remainder: + +- Matches `[a-zA-Z0-9.-]+/[a-zA-Z0-9._/-]+` (K8s label format: DNS domain + `/` + + key) → `LabelExtractor` +- Otherwise → `PathExtractor` + +A value like `metadata.labels.foo/bar` is a JMESPath, not a label — the `/` appears +inside a path segment, not as the label-domain separator. The regex handles this +correctly because `metadata.labels.foo` is not a valid DNS domain segment. + +Parsing ` in ` is safe because neither JMESPath expressions nor label keys +contain spaces, so the delimiter is unambiguous. + +### Changes + +**`kugl/impl/config.py` — `UserColumn`** + +- Add `from_: Optional[str] = Field(None, alias="from")` (Pydantic alias needed + because `from` is a Python keyword). +- In `gen_extractor`, handle `from_` alongside `path` and `label`. + - If `from_` is set alongside `path` or `label`, raise `ValueError`. + - Strip any ` in ` suffix from `from_` to extract the scope name. + - Apply the label-vs-path regex to the remainder. + - Construct the appropriate extractor, passing the scope name through. +- Keep `path:` and `label:` fully supported so existing configs are not broken. + +**`kugl/impl/extract.py` — `FieldRef`** + +- Centralise the ` in ` parsing in `FieldRef.parse_scoped(s)`; both + `gen_extractor` (for `from:`) and `FieldRef.parse` (for `path:`/`label:`) delegate + to it. +- Known scopes are not available at Pydantic parse time. Use lazy validation: accept + any ` in ` suffix as a potential scope; fail at table-build time in + `TableFromConfig.__init__` if the referenced scope name is not declared in + `row_source`. + +### Tests + +- `from: karpenter.sh/nodepool` produces the same result as `label: karpenter.sh/nodepool`. +- `from: spec.providerID` produces the same result as `path: spec.providerID`. +- `from: metadata.name in pod` with a named `pod` scope resolves correctly. +- `from: karpenter.sh/nodepool in pod` with a named `pod` scope resolves as a label + on the pod object. +- Error: `from:` and `path:` both specified → validation error. +- Error: `from: foo in unknownscope` where `unknownscope` is not in `row_source` → clear + error message at table-build time. + +--- + +## Files Touched + +| File | Change | +|---|---| +| `kugl/impl/extract.py` | `FieldRef.parse`: detect ` in ` suffix; extractors: resolve via scope | +| `kugl/impl/tables.py` | `Itemizer`: parse `as `; `RowContext`: track named scopes | +| `kugl/impl/config.py` | `UserColumn`: add `from_` field and dispatch in `gen_extractor` | +| `kugl/builtins/schemas/kubernetes.yaml` | Convert `node_taints` to named scope syntax | +| `tests/` | Update node_taints test; add multi-level and `from:` tests | + +--- + +## Out of Scope + +- The broader resource-coverage gaps from `discuss.md` (deployments, containers table, + etc.) are separate work and should not be bundled here. diff --git a/.claude/plans/shortcomings.md b/.claude/plans/shortcomings.md new file mode 100644 index 0000000..9cca333 --- /dev/null +++ b/.claude/plans/shortcomings.md @@ -0,0 +1,113 @@ +# Kugl Discussion Summary + +## What Kugl Is + +Kugl is a Python CLI tool that queries Kubernetes resources using SQL (SQLite). It runs `kubectl get` commands, caches the JSON output, and loads it into an in-memory SQLite database. Users write SQL queries directly on the command line or via saved shortcuts. + +Built-in tables: `pods`, `jobs`, `nodes`, `node_labels`, `pod_labels`, `job_labels`, `node_taints`. Resource types, namespaces, and cache TTL are controlled via CLI flags (`-a`, `-n`, `-u`, `-c`, `-t`). + +Kugl automatically converts Kubernetes-specific value formats to queryable numerics: `50Mi` → bytes, `100m` CPU → float, ISO8601 timestamps → epoch seconds. Helper functions `to_size()`, `to_age()`, `to_utc()` convert back to human-readable strings for output. + +--- + +## Strengths + +- **SQL is better than jq for aggregation.** Queries involving `GROUP BY`, `SUM`, `JOIN`, `ORDER BY`, and CTEs are dramatically more readable in SQL than in jq pipelines. The target use case — "how is compute distributed across node pools and taints?" — is well served. +- **Automatic type coercion.** CPU, memory, and timestamp conversion is handled transparently. Steampipe's Kubernetes plugin likely exposes these as raw strings or JSONB; kugl makes them directly comparable numerically. +- **Built-in caching.** A 2-minute TTL cache avoids hammering the API server during exploratory queries. +- **Declarative extensions require no code.** Adding a label or nested field to an existing table takes 4 lines of YAML, no build step, no Go, no Python. Far more accessible than Steampipe's Go plugin model. +- **Multi-schema queries.** Joining Kubernetes data with other JSON sources (files, exec output) via `kubernetes.nodes JOIN ec2.instances` is architecturally sound, even if the AWS side is experimental. + +--- + +## Weaknesses + +### Priority (blocking credibility) + +1. **Narrow built-in resource coverage.** Only pods, jobs, and nodes are built in. Deployments, StatefulSets, DaemonSets, CronJobs, Services, Ingresses, Namespaces, PVs/PVCs are absent. Users can add them via YAML config, but requiring setup before querying standard resources is a significant barrier. + +2. **No per-container table.** Pod-level resource data aggregates across all containers. For multi-container pods (sidecars, init containers), individual container visibility is lost. A `containers` table (one row per container, joinable to `pods` via pod UID) is needed. + +3. **No context selection at invocation time.** Users must `kubectl config use-context` before running kugl. A `--context` flag is table stakes for anyone with more than one cluster. + +4. **No structured output.** Output is human-readable tabular text only. Without `--output csv` or `--output json`, kugl cannot participate in pipelines or feed dashboards. + +5. **No shortcut parameters.** Shortcuts are static query aliases. The docs acknowledge this gap and suggest wrapper scripts as the workaround. Named parameter substitution (e.g., `{{namespace}}`) is needed for real team adoption. + +### Nice-to-Have + +- **Events table.** `kubectl get events` is one of the most-used debugging commands; it should be built in. +- **PVs/PVCs.** Important for stateful workloads. +- **RBAC tables.** Roles, RoleBindings, ClusterRoles for security auditing. +- **Metrics integration.** Joining `kubectl top pods` data with resource requests would enable requests-vs-actual-usage analysis. +- **Shell completions,** especially for shortcuts. +- **Richer `--schema` output** (columns, types, source paths). + +--- + +## Comparison to Steampipe (Kubernetes plugin) + +| Capability | Kugl | Steampipe | +|---|---|---| +| Built-in resource types | pods, jobs, nodes + labels/taints | All standard K8s types | +| SQL dialect | SQLite | PostgreSQL (full) | +| CPU/memory type handling | Auto-converted to numerics | Likely raw strings/JSONB | +| Adding a label column | 4 lines of YAML | Go code + rebuild + reinstall | +| Adding a new resource type | YAML `create:` block | Go plugin with K8s client call | +| Ecosystem integration | CLI output only | Postgres wire protocol (Grafana, psql, etc.) | +| Multi-cluster | Not supported | Aggregator plugins | +| Cross-source joins | Experimental | Core feature, 100+ plugins | +| Caching | Built-in TTL cache | Plugin-level | +| Maintenance | Personal project | Turbot-backed, active community | + +Steampipe's Kubernetes plugin likely does **not** pre-convert CPU/memory strings to numerics — this appears to be a genuine and specific kugl advantage for resource utilization queries. + +--- + +## Extension Mechanism + +### Current model + +Users add columns via `~/.kugl/init.yaml` or `~/.kugl/kubernetes.yaml`: + +```yaml +extend: + - table: nodes + columns: + - name: node_pool + type: text + label: karpenter.sh/nodepool # shortcut for metadata.labels."..." + - name: provider_id + type: text + path: spec.providerID # JMESPath expression +``` + +Special kugl types (`size`, `age`, `cpu`, `date`) handle K8s-specific string-to-numeric conversion. + +Multi-row-per-resource tables (e.g., one row per container or taint) use `row_source:` — a sequential JMESPath pipeline — with `^` prefix to reference parent-level fields. + +### Friction points + +1. **Two-vocabulary system (`path:` vs `label:`).** Users who don't know about `label:` write awkward quoted JMESPath: `metadata.labels."karpenter.sh/nodepool"`. The shortcut is useful but invisible until you need it. +2. **`path:` is a required key even when it's the only thing expressed.** Three keys for a conceptually one-line mapping. +3. **`row_source` + `^` parent references** are non-obvious, but affect only the minority of multi-row-per-resource cases. + +### Recommended improvement: unified `from:` key + +Replace `path:` / `label:` with a single `from:` key that auto-detects the extraction type: +- Value containing `/` with no leading dot-path segment → label name (matches all real K8s labels) +- Otherwise → JMESPath expression + +```yaml +extend: + - table: nodes + columns: + - name: node_pool + type: text + from: karpenter.sh/nodepool # auto-detected as label + - name: provider_id + type: text + from: spec.providerID # auto-detected as JSON path +``` + +**Implementation:** add `from_` field to `UserColumn` in `config.py`; dispatch to `LabelExtractor` or `PathExtractor` in `gen_extractor` validator. Keep `path:` and `label:` for backward compatibility. Change is small and non-breaking. diff --git a/CHANGELOG.md b/CHANGELOG.md index 2379b69..576f4d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,34 @@ +## 0.8.0 + +New tables in ``kubernetes`` schema: + +- ``events`` +- ``cronjobs`` and ``cronjob_labels`` +- ``services`` and ``service_labels`` +- ``deployments`` and ``deployment_labels`` + +CLI changes (breaking): + +- Added ``-c``/``--context`` option to specify a Kubernetes context +- Renamed ``-a`` option to ``-A`` for consistency with ``kubectl`` +- Renamed ``-c``/``--cache`` to ``-s``/``--stale`` +- Renamed ``-u``/``--update`` to ``-r``/``--refresh`` +- Renamed ``-r``/``--reckless`` to ``-q``/``--quiet`` (and ``reckless:`` in settings to ``quiet:``) + +Extending tables: + +- Breaking: Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and + columns reference ancestor objects with ``in `` suffix (e.g. ``metadata.uid in node``); + the old ``^`` parent-hop syntax is removed +- New ``from:`` column key that auto-detects label vs JMESPath: values matching + ``domain/key`` format (e.g. ``karpenter.sh/nodepool``) use label extraction, everything + else uses JMESPath (``path:`` and ``label:`` to be removed in a future release) + +Documentation: + +- New masthead example of ``kugl`` vs ``kubectl | jq`` + + ## 0.7.0 - Add `init` subcommand to generate `kubernetes.yaml` per recommended post-install configuration @@ -40,7 +71,7 @@ - Allow environment variables in `file` resource paths - Fix the `exec` resource by adding a `cache_key` field; these resources would otherwise experience cache collisions - Resource cache paths and file formats have changed, and cache now lives in `~/.kuglcache` -- `rm -r ~/.kugl/cache` is recommended to clear obsolete files +- `rm -r ~/.kuglcache` is recommended to clear obsolete files ## 0.3.3 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..9f5949a --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,245 @@ +# Kugl — Claude Seed Context + +> **Note to Claude:** Please keep this file current as the project evolves. + +Kugl lets you query Kubernetes (and other) resources using SQL. It fetches JSON via `kubectl get` +(or other sources), loads it into an in-memory SQLite database, runs the query, and formats results. + +## Package Layout + +``` +kugl/ + api.py # Public decorators: @table, @resource_type, @column + main.py # CLI entry point + impl/ + engine.py # Engine (query execution), DataCache, ResourceRef + registry.py # Registry (singleton), Schema, Resource base class + tables.py # Table, TableFromCode, TableFromConfig, TableDef, RowContext, Itemizer + config.py # Pydantic models: Settings, UserConfig, Column, UserColumn, + # ResourceDef, CreateTable, ExtendTable, Shortcut + extract.py # PathExtractor, LabelExtractor, Extractor base, FieldRef, type maps + builtins/ + resources.py # Built-in resource families (kubernetes, file, folder, exec, data) + schemas/ + kubernetes.py # Built-in @table classes for pods, nodes, jobs, etc. + util/ # Helpers: Age, KPath, SqliteDb, Query, clock, debugging, etc. +tests/ + k8s/ # Kubernetes table tests + config/ # Config parsing and merge tests + resource/ # Per-resource-type tests (cache, exec, file, folder, etc.) +docs/ # RST documentation (syntax, builtins, extending, resources, multi, settings) +kugl/builtins/schemas/ # Built-in YAML schema configs (kubernetes.yaml, etc.) +``` + +## Core Concepts + +### Schema +A named group of tables and resources. The default schema is `"kubernetes"`. Multi-schema queries +attach each schema as a separate in-memory SQLite database and require explicit `schema.table` +qualification in SQL. + +### Resource (family) +Where data comes from. The **resource family** is the type (e.g., `kubernetes`, `file`, `folder`, +`exec`, `data`); a **resource** is a specific instance with a name and family-specific config. + +Built-in families: +- `kubernetes` — runs `kubectl get -o json`; supports `-n`/`-a` namespace flags +- `file` — reads a local file (YAML or JSON); `file: ~/.kube/config` +- `folder` — globs files in a tree, presents each as `{match: {...}, content: {...}}` +- `exec` — runs any shell command producing JSON/YAML; optionally cacheable with `cache_key` +- `data` — static inline data + +### Registry (singleton) +`Registry.get()` is the process-wide singleton. It maps: +- resource family name → Resource subclass (`resources_by_family`) +- schema name → default Resource subclass (`resources_by_schema`) +- schema name → `Schema` object (`schemas`) + +Populated at import time by `@table` and `@resource_type` decorators. + +### Schema object +`Schema` holds: +- `builtin`: `{name: TableDef}` — tables defined in Python via `@table` +- `_create`: tables defined in user config `create:` sections +- `_extend`: column extensions from `extend:` sections +- `_resources`: resource instances from config `resources:` sections + +`Schema.read_configs()` merges config files from (in order): builtin schemas package, +any `init_path` folders, then `~/.kugl/`. + +### Table hierarchy +- `Table` — base; holds column lists, implements `build()` (CREATE TABLE + INSERT) +- `TableFromCode` — wraps a `@table`-decorated class; delegates `make_rows()` to it +- `TableFromConfig` — built from a `create:` config block; uses `Itemizer` for row generation + +### ResourceRef +A `(schema, resource)` pair used as a hashable set member for cache tracking. +Name property is `"schema_name.resource_name"`. + +### Engine +`Engine.query()` orchestrates: +1. Identify schemas from the SQL query; attach them as SQLite databases +2. `Schema.read_configs()` for each schema +3. Build `Table` and `ResourceRef` objects for each named table +4. `DataCache.advise_refresh()` to decide what to fetch vs read from cache +5. Parallel fetch using ThreadPoolExecutor +6. `Table.build()` to CREATE TABLE and INSERT rows +7. Execute the SQL query and return rows + column names + +### DataCache +Stores JSON responses under `~/.kugl/cache//.json`. +Cache age is based on file mtime. Three cache flags: +- `ALWAYS_UPDATE` (`-u`) — fetch everything, no stale warning +- `CHECK` (default) — fetch expired/missing, warn about stale data +- `NEVER_UPDATE` (`-c`) — only fetch missing, never update existing cache + +## Config Files + +### `~/.kugl/init.yaml` +Top-level settings and shortcuts. Only this file may contain `settings:`. + +```yaml +settings: + cache_timeout: 5m # default 2m + quiet: true # suppress stale-data warnings + init_path: # extra config folders, applied before ~/.kugl/ + - ~/team-kugl + +shortcuts: + - name: mypods + args: ["select name, status from pods where namespace = 'default'"] +``` + +### `~/.kugl/.yaml` (e.g. `kubernetes.yaml`) +Defines resources and tables for a schema. + +```yaml +resources: + - name: workflows # resource name used by create: tables + namespaced: true # for kubernetes family; default true + +create: + - table: workflows + resource: workflows + row_source: # default is ["items"] + - items + columns: + - name: name + path: metadata.name + - name: status + label: workflows.argoproj.io/phase + +extend: + - table: pods + columns: + - name: owner + type: text + label: com.mycompany/owner + comment: ML team owner +``` + +## Column Extractors + +Three extractor keys, specified in a column definition: + +**`path:`** — JMESPath expression into the current row item +**`label:`** — shortcut to `metadata.labels`; can be a list to try in order +**`from:`** — unified key that auto-detects label vs path: values matching `domain/key` (e.g. `karpenter.sh/nodepool`) use `LabelExtractor`; everything else uses `PathExtractor` + +**Named scope navigation** — in multi-step `row_source`, each entry must carry `as `, and every column expression must end with `in ` to identify which scope to resolve against: +- `metadata.uid in node` extracts `metadata.uid` from the object named `node` at a higher level +- All named scopes from ancestor levels are available at each step + +## Column Types + +Kugl types (used in config `type:`) → SQLite storage type: + +| Kugl type | SQLite | Accepts | +|-----------|--------|---------| +| `text` | TEXT | strings | +| `integer` | INTEGER | ints | +| `real` | REAL | floats | +| `size` | INTEGER | `50Mi`, bytes | +| `age` | INTEGER | `5d`, `4h30m`, seconds | +| `cpu` | REAL | `0.5`, `300m` | +| `date` | INTEGER | `2021-01-01T12:34:56Z`, epoch secs | + +Built-in SQL functions: `now()`, `to_utc(ts)`, `to_age(secs)`, `to_size(bytes)` + +## row_source + +Multi-step JMESPath iteration for generating multiple rows per API response item. + +```yaml +row_source: + - items as node # step 1: each element of the top-level items array, named "node" + - spec.taints as taint # step 2: each taint within each node, named "taint" +columns: + - name: node_uid + path: metadata.uid in node # "in node" suffix resolves to the step-1 object + - name: taint_key + path: key in taint # "in taint" suffix resolves to the step-2 object +``` + +- Each step applies to results of the prior step +- Multi-step tables require `as ` on every entry; all column paths/labels must end with `in ` +- Single-step tables use bare JMESPath paths with no scope qualifier +- Dict sources can be unpacked to key/value pairs with `; kv` suffix: `- env; kv` +- Default `row_source` is `["items"]` + +## Decorators (kugl/api.py) + +```python +from kugl.api import resource_type, table, column, Resource + +@resource_type(type="myfamily", schema_defaults=["myschema"]) +class MyResource(Resource): + def get_objects(self): ... + def cache_path(self): ... + +@table(schema="kubernetes", name="pods", resource="pods") +class PodsTable: + def columns(self) -> list[Column]: ... + def make_rows(self, context: RowContext) -> list[tuple[dict, tuple]]: ... +``` + +`make_rows` returns `[(item_dict, row_tuple), ...]` where `row_tuple` contains one value per +builtin column (non-builtin/extension columns are appended by `Table.build()`). + +## Multi-Schema Queries + +When a query references `schema.table`, each schema gets an `ATTACH DATABASE ':memory:' AS schema` +and all table names must be fully qualified. + +```sql +SELECT k.name, e.zone +FROM kubernetes.nodes k +JOIN ec2.instances e ON k.name = e.hostname +``` + +The `ec2` schema is defined in `~/.kugl/ec2.yaml` with an `exec:` resource. + +## Debugging + +Set `KUGL_DEBUG` env var to a comma-separated list of topics: +- `cache` — cache hit/miss decisions +- `extract` — column value extraction +- `itemize` — row_source iteration steps + +## Testing Notes + +- The Registry is a process singleton; tests use `Schema.read_configs()` to reset non-builtin state +- `clock.CLOCK` is patched in tests to control time (for cache age calculations) +- Tests use actual in-memory SQLite; no mocking of the DB layer +- Kubernetes tests mock `kubectl` via fixtures in `tests/k8s/k8s_mocks.py` + +## Running Tests + +```bash +uv run pytest tests/ # full suite +uv run pytest tests/ -k foo # filter by name +``` + +**Important:** always run from the project root with `tests/` as the target, not an individual file. +The Registry is populated by decorator side-effects at import time; running a single test module in +isolation skips those imports and causes "Resource family X is not registered" errors. diff --git a/README.rst b/README.rst index b495c75..ff77e51 100644 --- a/README.rst +++ b/README.rst @@ -6,40 +6,62 @@ Explore Kubernetes resources using SQLite. Example ------- -Find the top users of a GPU pool, based on instance type and a -team-specific pod label. - -With Kugl (and a bit of configuration for owner and instance type) +Report memory pressure by node — how much memory is requested by running and initializing +pods, versus what each node can allocate. Kugl understands Kubernetes memory and CPU +units natively, and offers ``kubectl``'s human-friendly status string as a column: .. code:: shell - kugl -a "select owner, sum(gpu_req), sum(cpu_req) - from pods join nodes on pods.node_name = nodes.name - where instance_type like 'g5.%large' and pods.phase in ('Running', 'Pending') - group by 1 order by 2 desc limit 10" + kugl -a "select n.name, to_size(sum(p.mem_req)) as requested, to_size(n.mem_alloc) as allocatable + from nodes n join pods p on p.node_name = n.name + where p.phase = 'Running' or p.status like 'Init:%' + group by n.name order by sum(p.mem_req) desc" + +Result: + +.. code:: text -With ``kubectl`` and ``jq``, that's a little more work: + name requested allocatable + ip-10-12-18-252.us-east-2.compute.internal 42Gi 59Gi + ip-10-12-188-56.us-east-2.compute.internal 36Gi 120Gi + ... + +With ``kubectl -o json`` and ``jq``, that's rather more work. Parsing units is your problem, +status is derived from multiple fields, joins are awkward, and this doesn't yet cover +output formatting: .. code:: shell - kubectl get pods -o json --all | - jq -r --argjson nodes "$(kubectl get nodes -o json | jq '[.items[] - | select((.metadata.labels["node.kubernetes.io/instance-type"] // "") | test("g5.*large")) - | .metadata.name]')" \ - '[ .items[] - | select(.spec.nodeName as $node | $nodes | index($node)) - | select(.status.phase == "Running" or .status.phase == "Pending") - | . as $pod | $pod.spec.containers[] - | select(.resources.requests["nvidia.com/gpu"] != null) - | {owner: $pod.metadata.labels["com.mycompany/job-owner"], - gpu: .resources.requests["nvidia.com/gpu"], - cpu: .resources.requests["cpu"]} - ] | group_by(.owner) - | map({owner: .[0].owner, - gpu: map(.gpu | tonumber) | add, - cpu: map(.cpu | if test("m$") then (sub("m$"; "") | tonumber / 1000) else tonumber end) | add}) - | sort_by(-.gpu) | .[:10] | .[] - | "\(.owner) \(.gpu) \(.cpu)"' + { kubectl get nodes -o json; kubectl get pods -A -o json; } | jq -rn ' + def membytes: + if test("Ki$") then (gsub("Ki$"; "") | tonumber * 1024) + elif test("Mi$") then (gsub("Mi$"; "") | tonumber * 1048576) + elif test("Gi$") then (gsub("Gi$"; "") | tonumber * 1073741824) + elif test("K$") then (gsub("K$"; "") | tonumber * 1000) + elif test("M$") then (gsub("M$"; "") | tonumber * 1000000) + elif test("G$") then (gsub("G$"; "") | tonumber * 1000000000) + else tonumber end; + (input | .items | map({ + name: .metadata.name, + alloc: (.status.allocatable.memory | membytes) + }) | INDEX(.name)) as $nodeMap | + [input | .items[] | + select( + .status.phase == "Running" or + (((.spec.initContainers // []) | length) > 0 and + ((.status.initContainerStatuses // []) | map(select(.ready)) | length) < + ((.spec.initContainers // []) | length)) + ) | + select(.spec.nodeName) | + { + node: .spec.nodeName, + mem: ([.spec.containers[].resources.requests.memory // "0"] | map(membytes) | add) + } + ] | + group_by(.node) | + map({node: .[0].node, requested: (map(.mem) | add), allocatable: $nodeMap[.[0].node].alloc}) | + sort_by(-.requested)[] | + [.node, .requested, .allocatable] | @tsv' Installing ---------- @@ -47,7 +69,7 @@ Installing Kugl requires Python 3.9 or later, and kubectl. **This is an alpha release.** Please expect bugs and -`backward-incompatible changes <./docs-tmp/breaking.md>`__ +`backward-incompatible changes `__ If you don't mind Kugl cluttering your Python with its `dependencies <./reqs_public.txt>`__: @@ -95,11 +117,11 @@ Find the pods using the most memory: kugl -a "select namespace, name, to_size(mem_req) from pods order by mem_req desc limit 15" -If this query is helpful, `save it <./docs-tmp/shortcuts.md>`__, then +If this query is helpful, `save it `__, then you can run ``kugl hi-mem``. Please also see the `recommended -configuration <./docs-tmp/recommended.md>`__. +configuration `__. How it works (important) ------------------------ @@ -117,31 +139,36 @@ Server load by **caching responses for two minutes**. This is why it often prints "Data delayed up to ..." messages. Depending on your cluster activity, the cache can be a help or a -hindrance. You can suppress the "delayed" messages with the ``-r`` / -``--reckless`` option, or always update data using the ``-u`` / -``--update`` option. These behaviors, and the cache expiration time, can +hindrance. You can suppress the "delayed" messages with the ``-q`` / +``--quiet`` option, or always fetch fresh data using the ``-r`` / +``--refresh`` option. These behaviors, and the cache expiration time, can be set in the config file as well. In any case, please be mindful of stale data and server load. +.. BEGIN_LEARN_MORE + Learn more ---------- -- `Command-line syntax <./docs-tmp/syntax.md>`__ -- `Recommended configuration <./docs-tmp/recommended.md>`__ -- `Settings <./docs-tmp/settings.md>`__ -- `Built-in tables and functions <./docs-tmp/builtins.md>`__ -- `Configuring new columns and tables <./docs-tmp/extending.md>`__ -- `Troubleshooting and feedback <./docs-tmp/trouble.md>`__ +- `Command-line syntax `__ +- `Recommended configuration `__ +- `Settings `__ +- `Shortcuts `__ +- `Built-in tables and functions `__ +- `Configuring new columns and tables `__ +- `Troubleshooting and feedback `__ - Beyond Kubernetes and kubectl - - `Other resource types <./docs-tmp/resources.md>`__ - - `Additional schemas <./docs-tmp/multi.md>`__ + - `Other resource types `__ + - `Additional schemas `__ - `Release notes <./CHANGELOG.md>`__ -- `Breaking changes <./docs-tmp/breaking.md>`__ +- `Breaking changes `__ - `License <./LICENSE>`__ +.. END_LEARN_MORE + Pronunciation ~~~~~~~~~~~~~ diff --git a/docs-tmp/breaking.md b/docs-tmp/breaking.md deleted file mode 100644 index e390473..0000000 --- a/docs-tmp/breaking.md +++ /dev/null @@ -1,9 +0,0 @@ -# Breaking changes - -## 0.5.0 - -- Shortcut syntax in `init.yaml` has changed, but old syntax is still supported (a warning will be printed) - -## 0.4.2 - -- The `namespaced` field in a Kubernetes resource definition is now required. \ No newline at end of file diff --git a/docs-tmp/builtins.md b/docs-tmp/builtins.md deleted file mode 100644 index 4b37a3b..0000000 --- a/docs-tmp/builtins.md +++ /dev/null @@ -1,103 +0,0 @@ - -## Built-in tables - -A note about data types - -* Timestamps are stored as integers, representing seconds since the Unix epoch. Timestamps and deltas can be converted -back to strings like `2021-01-01 12:34:56Z` or `5d`, `4h30m` using the `to_utc` and `to_age` functions, below. -* Memory is stored as bytes, and can be coverted back to a string like `1Gi` or `3.4Mi` using the `to_size` function, below -* CPU and GPU limits are stored as floats - -### pods - -Built from `kubectl get pods`, one row per pod. Two calls are made to `get pods`, one to get textual outut -of the STATUS column, since this is difficult to determine from the pod detail. - -NOTE: some of the containers in a pod may have no limits expressed. If all have no limits for e.g. CPU, -`cpu_req` will be null; otherwise, to sum container resources, a null value will be treated as zero. - -| Column | Type | Description | -|---------------------------|---------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| name | TEXT | Pod name, from `metadata.name` | -| uid | TEXT | Pod UID, from `metadata.uid` | -| namespace | TEXT | Pod namespace, from `metadata.namespace` | -| node_name | TEXT | Node name, from `spec.nodeName` | -| phase | TEXT | Pod phase, from `status.phase` | -| status | TEXT | Pod status as reported by `kubectl get pods` | -| creation_ts | INTEGER | Pod creation timestamp, from `metadata.creationTimestamp` | -| deletion_ts | INTEGER | Pod deletion timestamp (or null) from `metadata.deletionTimestamp` | -| is_daemon | INTEGER | 1 if the pod is in a DaemonSet, 0 otherwise | -| command | TEXT | The concatenated command args from what appears to be the main container (look for containers named `main`, `app`, or `notebook`) else from the first container | -| cpu_req, gpu_req, mem_req | REAL | Sum of CPU, GPU and memory values from `resources.requests` in each `spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` | -| cpu_lim, gpu_lim, mem_lim | REAL | Sum of CPU, GPU and memory values from `resources.limits` in each `spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` (this isn't necessarily helpful, since limits can be absent) | - -### pod_labels - -Built from `kubectl get pods`, one row per label. - -| Column | Type | Description | -|------------|---------|----------------------------------------------------------| -| pod_uid | TEXT | Pod UID, from `metadata.uid` | -| key, value | TEXT | Label key and value from each entry in `metadata.labels` | - -### jobs - -Built from `kubectl get jobs`, one row per job - -| Column | Type | Description | -|---------------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| name | TEXT | Job name, from `metadata.name` | -| uid | TEXT | Job UID, from `metadata.uid` | -| namespace | TEXT | Job namespace, from `metadata.namespace` | -| status | TEXT | Job status as described by [V1JobStatus](https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/V1JobStatus.md) -- this is one of `Running`, `Complete`, `Suspended`, Failed`, `Unknown` | -| cpu_req, gpu_req, mem_req | REAL | Sum of CPU, GPU and memory values from `resources.requests` in each `spec.template.spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` | -| cpu_lim, gpu_lim, mem_lim | REAL | Sum of CPU, GPU and memory values from `resources.limits` in each `spec.template.spec.containers`; GPU looks for the value tagged `nvidia.com/gpu` (this isn't necessarily helpful, since limits can be | - -### job_labels - -Built from `kubectl get jobs`, one row per label. - -| Column | Type | Description | -|------------|---------|----------------------------------------------------------| -| job_uid | TEXT | Job UID, from `metadata.uid` | -| key, value | TEXT | Label key and value from each entry in `metadata.labels` | - -### nodes - -Built from `kubectl get nodes`, one row per node. See [recommended configuration](./recommended.md) -about adding node instance type. - -| Column | Type | Description | -|---------------------------------|---------|-------------------------------------------------------------------------------------------------------------| -| name | TEXT | Node name, from `metadata.name` | -| uid | TEXT | Node UID, from `metadata.uid` | -| cpu_alloc, gpu_alloc, mem_alloc | REAL | CPU, GPU and memory values from `status.allocatable`; GPU looks for the value tagged `nvidia.com/gpu` | -| cpu_cap, gpu_cap, mem_cap | REAL | CPU GPU and memory values from `status.capacity`; GPU looks for the value tagged `nvidia.com/gpu` | - -### node_labels - -Built from `kubectl get nodes`, one row per label. - -| Column | Type | Description | -|------------|---------|----------------------------------------------------------| -| node_uid | TEXT | Node UID, from `metadata.uid` | -| key, value | TEXT | Label key and value from each entry in `metadata.labels` | - -### node_taints - -Built from `kubectl get nodes`, one row per taint - -| Column | Type | Description | -|:-------------------|---------|--------------------------------------------------------------| -| node_uid | TEXT | Node UID, from `metadata.uid` | -| key, value, effect | TEXT | Taint key, value and effect from each entry in `spec.taints` | - -## Built-in functions - -`now()` - returns the current time as an integer, in epoch seconds - -`to_utc(timestamp)` - convert epoch time to string form e.g. `YYYY-MM-DDTHH:MM:SSZ` - -`to_age(seconds)` - convert seconds to a more readable age string as seen in the `AGE` column of `kubectl get pods`, e.g. `5d`, `4h30m`. - -`to_size(bytes)` - convert a byte count to a more readable string, e.g. `1Gi`, `3.4Mi` \ No newline at end of file diff --git a/docs-tmp/extending.md b/docs-tmp/extending.md deleted file mode 100644 index db879a2..0000000 --- a/docs-tmp/extending.md +++ /dev/null @@ -1,257 +0,0 @@ - -## Note - -Configuration files should be protected to the same degree as your shell scripts and anything -on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Adding columns to an existing table - -To extend a table, use the `extend:` section in `~/.kugl/init.yaml`. This is a list of table names, -each with a list of new columns. An extension column specifies the column name, its -SQLite type (one of `int`, `real`, `text`) and a [JMESPath](https://jmespath.org/) -expression showing how to extract the column value from the JSON form of the resource. - -Example - -```yaml -extend: - - # Add the "owner" column to the pods table as shown in the Kugl README - -- table: pods - columns: - - name: owner - type: text - path: metadata.labels."com.mycompany/ml-job-owner" - # Comments are optional; you can see these with 'kugl --schema' - comment: ML team member who submitted the work - - # Using Karpenter on AWS? Add the Karpenter node pool and AWS provider ID - # to the nodes table. - -- table: nodes - columns: - - name: node_pool - type: text - path: metadata.labels."karpenter.sh/nodepool" - - name: provider_id - type: text - path: spec.providerID -``` - -## Adding a new table - -This works just like extending a table, with these differences -* Use the `create:` section rather than `extend:` -* Provide the name of the resource argument to `kubectl get` -* If the resource isn't built in (like `pods` or `nodes`), declare the resource and indicate whether it's namespaced. - -Example: this defines a new resource type and table for Argo workflows. - -```yaml -resources: - - name: workflows - namespaced: true - -create: - - table: workflows - resource: workflows - columns: - - name: name - type: text - path: metadata.name - - name: uid - type: text - path: metadata.uid - - name: namespace - type: text - path: metadata.namespace - - name: status - type: text - path: metadata.labels."workflows.argoproj.io/phase" -``` - -## Column extractors and defaults - -You've seen how the `path` extractor works, using JMESPath to identify an element in -the response JSON. You can also use the `label` extractor, which is a shortcut to -`metadata.labels`, and can either be a single string or a list of labels to check in order - -There are some useful defaults as well: -* resources are namespaced by default -* resources in `kubernetes.yaml` default to type `kubernetes` -* the default column type is `text` - -Here's a more concise way of defining the `workflows` table, above - -```yaml -resources: - - name: workflows - -create: - - table: workflows - resource: workflows - columns: - - name: name - path: metadata.name - - name: uid - path: metadata.uid - - name: namespace - path: metadata.namespace - - name: status - label: workflows.argoproj.io/phase -``` - -## Parsing data into numeric columns - -`kubectl` response values like `50Mi` (of memory) are unhelpful in queries, since you can't treat -them numerically. Kugl fixes this, offering additional data types that can be used in the `type` field -of a column definition and automatically convert response values. - -| Kugl type | SQLite type | Description | -|------------|--------------|-----------------------------------------------------------------------------| -| `size` | `INTEGER` | Memory size in bytes; accepts values like `50Mi` | -| `age` | `INTEGER` | Time delta in seconds; accepts values like `5d` or `4h30m` | -| `cpu` | `REAL` | CPU limit or request; accepts values like `0.5` or `300m` | -| `date` | `INTEGER` | Unix epoch timestamp in seconds; accepts values like `2021-01-01T12:34:56Z` | - -## Generating multiple rows per response item - -It's rare for a `kubectl get` response item to map directly to a single row in a table. For example, -a node can have multiple taints, and a pod can have multiple containers. Kugl handles this using -the `row_source` field in a column definition. Here's how the `node_taints` built-in table is defined. - -```yaml -create: - - table: node_taints - resource: nodes - row_source: - - items - - spec.taints - columns: - - name: node_uid - path: ^metadata.uid - - name: key - path: key - - name: effect - path: effect -``` - -Each element in `row_source` is a JMESPath expression that selects items relative to the prior selector. -Only the last element in the list is used to generate a row, but `path`s can refer to any part of the chain. -Each `"^"` at the start of a `path` refers to the part of the response one level higher than the bottom -`row_source` element. In this case - -* `^metadata.uid` means the `.metadata.uid` in each element of the response `items` array -* `key` and `effect` refer to each taint in the `spec.taints` array - -The default `row_source` is just `items`, which is why the example `workflows` table shown earlier doesn't -need to specify it. - -This syntax also applies to the `label` extractor. For example, if the `row_source` of a table needs to -address Job metadata but also metadata from the Job pod template, you can write this: - -```yaml - ... - resource: jobs - row_source: - - items - - spec.template - columns: - - name: label_from_job - label: ^a-job-label - - name: label_from_pod - label: a-pod-label -``` - -### More about row_source - -In detail, here's how `row_source` is handled. -* Begin with a list containing a single element, which is the entire response JSON. -* Apply the first `row_source` expression to each element of this list to build a new list - * If the expression yields a non-list result, add it to the new list - * If the expression yields a list, add each item (not the whole list) to the new list - * In either case, establish a parent / child relationship between the old and new items -* Repeat with each successive `row_source` entry. - -This can produce surprising results if one step in the `row_source` list tries to do too much. -Let's say the `node_taints` table didn't need a `^metadata.uid` reference, so only requires the -taint lists. This source list would not work, because `.spec` is not a child of `.items`. - -```yaml -row_source: - - items.spec.taints -``` - -Addressing each element in `items` requires a JMESpath [projection](https://jmespath.org/tutorial.html#projections), -in this case `items[*].spec`. Continuing this with `.taints` in a single expression will then create a list of lists -that must be flattened: - -```yaml -row_source: - - items[*].spec.taints[] -``` - -Although the multi-step `row_source` is incrementally slower for large lists, it's clearly less error-prone than -projecting and flattening, so is the recommended approach. - -As noted in [Troubleshooting](./trouble.md), running with `--debug itemize` will show the intermediate results of -`row_source` processing. - -### Extracting from dicts - -JMESPath lacks adequate support for addressing dictionaries. For example, if you want to build a table of -keys and values from environmet settings in YAML, there is no construct that will give you key-value pairs -from the fragment below. You can get the keys, or the values, but not both. - -```yaml -... -env: - AWS_BUCKET_NAME: my_budket - AWS_REGION: us-east-1 - ... -``` - -Kugl has a simple workaround for this. A `row_source` entry can have additional processing options, and for any row -source entry that addresses a dictionary, you can add the option `"kv"` to get key-value pairs. For example, if you -have adressed the above YAML data with - -```yaml -row_source: - - env -``` - -Change this to - -```yaml -row_source: - - env; kv -``` - -and Kugl will present the dictionary as if the data source originally looked like this: - -```yaml -env: - - key: AWS_BUCKET_NAME - value: my_bucket - - key: AWS_REGION - value: us-east-1 -``` - -It's then straightforward to take columns from these items with - -```yaml -columns: - - name: variable - path: key - - name: value - path: value -``` - -## Tips - -If creating multiple tables from a resource, you should use the `uid` column (sourced from `metadata.uid`) -as a join key, since this is a guaranteed unique key. - -The `utils:` section of `~/.kugl/init.yaml` is ignored during configuration parsing, so you can use it to store -reusable bits of YAML. \ No newline at end of file diff --git a/docs-tmp/multi.md b/docs-tmp/multi.md deleted file mode 100644 index 6992f4e..0000000 --- a/docs-tmp/multi.md +++ /dev/null @@ -1,73 +0,0 @@ -## Note - -Since configuration files can contain scripts, they should be protected to the same degree as your shell scripts -and anything on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Extending Kugl to AWS - -(So far this is just an experiment, the functionality is pretty limited.) - -Using the `exec` resource type described in [Other resource types](./docs-tmp/resources.md), you can -make AWS data available for query. For example: if `~/.kugl/ec2.yaml` contains - -```yaml -resources: - - name: instances - exec: aws ec2 describe-instances - -create: - - table: instances - resource: instances - row_source: - - Reservations - - Instances - columns: - - name: type - path: InstanceType - - name: zone - path: Placement.AvailabilityZone - - name: private_dns - path: PrivateDnsName - - name: state - path: State.Name - - name: launched - path: LaunchTime -``` - -you can write - -```shell -kugl "select type, zone, launched from ec2.instances where state = 'running'" -``` - -To make the instance data cacheable, you would need to use a cache key that varies based on your -AWS account settings, referencing something set in the environment. Kugl will use this to generate -the cache pathname. Example: - -```yaml -resources: - - name: instances - exec: aws ec2 describe-instances - cacheable: true - cache_key: $AWS_PROFILE -``` - -Obviously this has limited utility, since there's no way to filter the data before it's returned. -For example, you can't add an argument to a resource `exec` command based on the query terms. -This is still being developed. - -## Multi-schema queries - -You can also join across schemas. For example, given the above `instances` table, report on the -capacity per zone in an EKS cluster: - -```shell -kugl "SELECT e.zone, sum(n.cpu_alloc) as cpus, sum(n.gpu_alloc) as gpus - FROM kubernetes.nodes n - JOIN ec2.instances e ON n.name = e.hostname - GROUP BY 1 -``` - -Note the explicit use of a `kubernetes.` schema prefix. This is required when joining across schemas. -(While `kubernetes` is the default schema, you can't always rely on SQLite's search behavior for -unqualified table names. It's better to be explicit.) \ No newline at end of file diff --git a/docs-tmp/recommended.md b/docs-tmp/recommended.md deleted file mode 100644 index 2388e98..0000000 --- a/docs-tmp/recommended.md +++ /dev/null @@ -1,44 +0,0 @@ - -## Note - -Configuration files should be protected to the same degree as your shell scripts and anything -on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Recommended configuration - -Instance type is a useful column to have in the `nodes` table. Unfortunately, there is no standard -label for it. You can fix this with configuration. In `~/.kugl/kubernetes.yaml`, add - -```yaml -extend: - - table: nodes - columns: - - name: instance_type - label: - - node.kubernetes.io/instance-type - - beta.kubernetes.io/instance-type -``` - -This will handle common cases. If your cluster uses a different label, add it to the list. -You can use Kugl itself to find what's available, for example: - -```shell -kugl "select distinct key from node_labels where key like '%instance-type%'" -``` - -Once you've set up the correct labels, here's a handy report that reports available capacity, -partitioning nodes by instance type and `NoSchedule` / `NoExecute` taints: - -```shell -kugl " - WITH t AS ( - SELECT node_uid, group_concat(key) AS taints FROM node_taints - WHERE effect IN ('NoSchedule', 'NoExecute') GROUP BY 1 - ) - SELECT instance_type, count(1) AS count, sum(cpu_alloc) AS cpu, sum(gpu_alloc) AS gpu, t.taints - FROM nodes LEFT OUTER JOIN t ON t.node_uid = nodes.uid - GROUP BY 1, 5 ORDER BY 1, 5 -" -``` - -If this query is helpful, [save it](./shortcuts.md) for future use. diff --git a/docs-tmp/resources.md b/docs-tmp/resources.md deleted file mode 100644 index d9901f9..0000000 --- a/docs-tmp/resources.md +++ /dev/null @@ -1,122 +0,0 @@ -## Note - -Since configuration files can contain scripts, they should be protected to the same degree as your shell scripts -and anything on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Exec resources - -By replacing `file: pathname` with `exec: some command` you can have Kugl run any shell script that generates -JSON or YAML output. For example, this is equivalent to the above `file:` resource: - -```yaml -resource: - - name: kubeconfig - exec: cat ~/.kube/config -``` - -Unlike file resources, the results of running external commands can be cached, just as with Kubernetes resources. -To enable this, set `cacaheable: true` and provide a `cache_key` that will be used to generate the cache pathname. -This will need to have at least one environment variable reference, on the assumption that the command output -can vary based on the environment. - -For an example, see the table built on `aws ec2` [here](./multi.md). - -## File resources - -Kugl can be used to query YAML data in a file. For instance, this will implement a bit of `kubectl config get-contexts`. - -```yaml -resource: - - name: kubeconfig - file: ~/.kube/config - -create: - - table: contexts - resource: kubeconfig - row_source: - - contexts - columns: - - name: name - path: name - - name: cluster - path: context.cluster -``` - -Then - -```shell -kugl "select name, cluster from contexts" -``` - -(Not that helpful, but you may have much larger config files worth summarizing this way.) - -Environment variable references like `$HOME` are allowed in resource filenames. -Using `file: stdin` also works, and lets you pipe JSON or YAML to a Kugl query. - -## Folder resources - -These are like `file` resources except they can match files in a tree. Let's say you have a set of -configuration files per AWS region, with settings to be summarized from one specific file, example: - -```shell -~/env/us-east-1/config.yaml -~/env/us-east-2/config.yaml -~/env/us-west-1/config.yaml -... -``` - -Within each config file is a set of environment variables: - -```shell -env: - - name: AWS_REGION - value: us-east-1 - - name: AWS_ACCOUNT - value: 123456789012 - - name: AWS_VPC - value: vpc-12345678 -``` - -This folder resource definition will address each of the files. - -```yaml -resource: - - name: by_region - # The root of the folder tree - folder: ~/env - # Pattern to match files, as understood by Path.glob - glob: "**/config.yaml" - # Regexp to extract additional metadata from filenames - match: "env/(?P.+)/config.yaml" -``` - -The resource presents each file as a dictionary, with the `match` element offering the metadate extracted -from the pattern match, example - -```json -[ - { "match": {"region": "us-east-1" }, "content": { ... file contents ... } }, - { "match": {"region": "us-east-2" }, "content": { ... file contents ... } }, - { "match": {"region": "us-west-1" }, "content": { ... file contents ... } }, -] -``` - -To build a table showing environment settings by region: - -```yaml -create: - - table: env_settings - resource: by_region - row_source: - # Address each element in the result list - - "[]" - # Focus on the environment settings - - content.env - columns: - - name: region - path: ^match.region - - name: name - path: name - - name: value - path: value -``` diff --git a/docs-tmp/settings.md b/docs-tmp/settings.md deleted file mode 100644 index de150c4..0000000 --- a/docs-tmp/settings.md +++ /dev/null @@ -1,23 +0,0 @@ - -## Note - -Configuration files should be protected to the same degree as your shell scripts and anything -on your `PYTHONPATH.` Kugl will refuse to read a configuration file that is world-writable. - -## Settings - -The `settings` section in `~/.kugl/init.yaml` can be used to specify cache behaviors once, -rather than on every usage from the command line. Example: - -```yaml -settings: - cache_timeout: 5m - reckless: true -``` - -The `init_path` section of `settings` can be used to specify multiple configuration folders. -This is useful for team configuration files. [Shortcuts|./shortcuts.md] in `init.yaml` and -schema configurations in those folders will be applied before entries in `~/.kugl`. - -NOTE: other `init.yaml` fils can contain only shortcuts; the `settings` section of `init.yaml` -is valid only in `~/.kugl/init.yaml`. \ No newline at end of file diff --git a/docs-tmp/shortcuts.md b/docs-tmp/shortcuts.md deleted file mode 100644 index 1793766..0000000 --- a/docs-tmp/shortcuts.md +++ /dev/null @@ -1,36 +0,0 @@ - -## Saving queries - -The `shortcuts` section in `~/.kugl/init.yaml` is a map from query names to lists of command-line arguments. - -Example, to save the queries shown in the [README](../README.md) and in -[recommended configuration](./recommended.md), add this to `~/.kugl/init.yaml`: - -```yaml -shortcuts: - - - name: hi-mem - args: - - | - SELECT name, to_size(mem_req) FROM pods - WHERE phase = 'Running' - ORDER BY mem_req DESC LIMIT 15 - - - name: nodes - # Comment field is optional - comment: Schedulable vs unschedulable capacity - args: - - | - WITH t AS ( - SELECT node_uid, group_concat(key) AS taints FROM node_taints - WHERE effect IN ('NoSchedule', 'NoExecute') GROUP BY 1 - ) - SELECT instance_type, count(1) AS count, sum(cpu_alloc) AS cpu, sum(gpu_alloc) AS gpu, t.taints - FROM nodes LEFT OUTER JOIN t ON t.node_uid = nodes.uid - GROUP BY 1, 5 ORDER BY 1, 5 -``` - -To run, type `kugl hi-mem` or `kugl nodes`. - -Simple parameter substitution might be offered in the future, but if you -need more powerful templates, your own wrapper script is the short-term answer. \ No newline at end of file diff --git a/docs-tmp/syntax.md b/docs-tmp/syntax.md deleted file mode 100644 index e40444e..0000000 --- a/docs-tmp/syntax.md +++ /dev/null @@ -1,25 +0,0 @@ - -## Usage - -```shell -kugl [options] [sql | shortcut] -``` - -### Kubernetes options - -Most invocations of Kugl will need `-a` or `-n namespace`, just like `kubectl`. -If your cluster is small, you could also (for instance) `alias kg="kugl -a"` and use `where namespace = ...` instead. - -* `-a, --all, --all-namespaces` - Look in all namespaces for Kubernetes resources. May not be combined with `-n`. -* `-n, --namespace NS` - Look in namespace `NS` for Kubernetes resources. May not be combined with `-a`. - -### Cache control - -* `-c, --cache` - Always use cached data, if available, regardless of its age -* `-r, --reckless` - Don't print stale data warnings -* `-t, --timeout AGE` - Change the expiration time for cached data, e.g. `5m`, `1h`; the default is `2m` (two minutes) -* `-u, --update` - Always updated from `kubectl`, regardless of data age - -## Other - -* `-H, --no-header` -- Suppress column headers diff --git a/docs-tmp/trouble.md b/docs-tmp/trouble.md deleted file mode 100644 index 5a81cc5..0000000 --- a/docs-tmp/trouble.md +++ /dev/null @@ -1,45 +0,0 @@ - -### My query isn't working - -Don't forget to use `-n/--namespace ` or `-a/--all`. The `default` namespace in -Kubernetes often has few or no resources. - -Read the [JMESPath tutorial](https://jmespath.org/tutorial.html) -and [SQLite documentation](https://www.sqlite.org/docs.html) thoroughly. - -Debug `row_source` and `path` problems by installing [jp](https://github.com/jmespath/jp) and feeding -it examples of your JSON data. JMESPath and `jq` don't behave the same. - -Several flags are available for the `--debug` option, try whatever seems relevant: -* `--debug cache` prints the cache files consulted and what resources will be refreshed -* `--debug fetch` prints each invocation of `kubectl` -* `--debug folder` prints each file considered for a `folder` resource -* `--debug itemize` summarizes the item generated for each step in a `row_source` (verbose) -* `--debug extract` prints the source and value of every row, by column (verbose) -* `--debug sqlite` shows the SQL for all statements executed, including table creation - -These can be combined, e.g. `--debug fetch,itemize`. To turn on all debugging options, use `--debug all`. - -### I found a bug - -Help me help you! I don't have access to your Kubernetes cluster, so you'll have to capture the -neccessary detail. - -* Follow recommendations for debugging queries, above. -* Use a low-activity namespace if possible, so the amount of data involved is small. -* Try to reproduce the problem with as simple a query as possible, ideally on one table with no joins. -* Run the command with the relevant `--debug` options and include the output -* If possible, include the content of the cache files that are named in the debug output. - -If there is too much material, you can post it to a service like [Pastebin](https://pastebin.com). -If it includes secure information from your cluster, please redact it before posting. - -### Can I give feedback without opening an issue? - -Sure, you can email `kugl dot devel at gmail dot com`. - -### I didn't receive a response - -Like many open source committers, the author has a family and a day job. 🙂 - -Please be patient, and thank you for trying Kugl! \ No newline at end of file diff --git a/docs/breaking.rst b/docs/breaking.rst index c68a1a4..c036468 100644 --- a/docs/breaking.rst +++ b/docs/breaking.rst @@ -1,6 +1,33 @@ Breaking changes ---------------- +Kugl is still in alpha. +Please expect bugs and backward-incompatible changes. + +.. _080: + +0.8.0 +~~~~~ + +Breaking changes are significant, gearing up for a 1.0 release. + +The new `from:` syntax alternative to `path:` and `label:` is backwards compatible, but +the old syntax is deprecated and will be removed in a future release. + +Extending tables: + +- Named scope syntax for multi-step ``row_source``: each entry takes ``as `` and + columns reference ancestor objects with ``in `` suffix (e.g. ``metadata.uid in node``); + the old ``^`` parent-hop syntax is removed + +CLI changes: + +- Added ``-c``/``--context`` option to specify a Kubernetes context +- Renamed ``-a`` option to ``-A`` for consistency with ``kubectl`` +- Renamed ``-c``/``--cache`` to ``-s``/``--stale`` +- Renamed ``-u``/``--update`` to ``-r``/``--refresh`` +- Renamed ``-r``/``--reckless`` to ``-q``/``--quiet`` (and ``reckless:`` in settings to ``quiet:``) + .. _050: 0.5.0 diff --git a/docs/builtins.rst b/docs/builtins.rst index 09119cd..839b5c2 100644 --- a/docs/builtins.rst +++ b/docs/builtins.rst @@ -229,6 +229,138 @@ Built from ``kubectl get nodes``, one row per taint | | | entry in ``spec.taints`` | +--------------------+------+------------------------------------------+ +services +~~~~~~~~ + +Built from ``kubectl get services``, one row per service. + ++-------------+---------+--------------------------------------------------+ +| Column | Type | Description | ++=============+=========+==================================================+ +| name | TEXT | Service name, from ``metadata.name`` | ++-------------+---------+--------------------------------------------------+ +| uid | TEXT | Service UID, from ``metadata.uid`` | ++-------------+---------+--------------------------------------------------+ +| namespace | TEXT | Service namespace, from ``metadata.namespace`` | ++-------------+---------+--------------------------------------------------+ +| type | TEXT | Service type: ``ClusterIP``, ``NodePort``, | +| | | ``LoadBalancer``, or ``ExternalName`` | ++-------------+---------+--------------------------------------------------+ +| cluster_ip | TEXT | Cluster IP, from ``spec.clusterIP``; null for | +| | | headless services and ``ExternalName`` type | ++-------------+---------+--------------------------------------------------+ +| external_ip | TEXT | External IP or hostname for ``LoadBalancer`` | +| | | services, from ``status.loadBalancer.ingress``; | +| | | null otherwise | ++-------------+---------+--------------------------------------------------+ +| creation_ts | INTEGER | Creation timestamp in epoch seconds, from | +| | | ``metadata.creationTimestamp`` | ++-------------+---------+--------------------------------------------------+ + +service_labels +~~~~~~~~~~~~~~ + +Built from ``kubectl get services``, one row per label. + ++--------------+------+--------------------------------------------------+ +| Column | Type | Description | ++==============+======+==================================================+ +| service_uid | TEXT | Service UID, from ``metadata.uid`` | ++--------------+------+--------------------------------------------------+ +| key, value | TEXT | Label key and value from each entry in | +| | | ``metadata.labels`` | ++--------------+------+--------------------------------------------------+ + +deployments +~~~~~~~~~~~ + +Built from ``kubectl get deployments``, one row per deployment. + ++-------------+---------+--------------------------------------------------+ +| Column | Type | Description | ++=============+=========+==================================================+ +| name | TEXT | Deployment name, from ``metadata.name`` | ++-------------+---------+--------------------------------------------------+ +| uid | TEXT | Deployment UID, from ``metadata.uid`` | ++-------------+---------+--------------------------------------------------+ +| namespace | TEXT | Deployment namespace, from | +| | | ``metadata.namespace`` | ++-------------+---------+--------------------------------------------------+ +| replicas | INTEGER | Desired replica count, from ``spec.replicas`` | ++-------------+---------+--------------------------------------------------+ +| ready | INTEGER | Ready replicas, from ``status.readyReplicas`` | ++-------------+---------+--------------------------------------------------+ +| available | INTEGER | Available replicas, from | +| | | ``status.availableReplicas`` | ++-------------+---------+--------------------------------------------------+ +| updated | INTEGER | Updated replicas, from | +| | | ``status.updatedReplicas`` | ++-------------+---------+--------------------------------------------------+ +| strategy | TEXT | Rollout strategy, from ``spec.strategy.type``; | +| | | ``RollingUpdate`` or ``Recreate`` | ++-------------+---------+--------------------------------------------------+ +| creation_ts | INTEGER | Creation timestamp in epoch seconds, from | +| | | ``metadata.creationTimestamp`` | ++-------------+---------+--------------------------------------------------+ + +deployment_labels +~~~~~~~~~~~~~~~~~ + +Built from ``kubectl get deployments``, one row per label. + ++----------------+------+--------------------------------------------------+ +| Column | Type | Description | ++================+======+==================================================+ +| deployment_uid | TEXT | Deployment UID, from ``metadata.uid`` | ++----------------+------+--------------------------------------------------+ +| key, value | TEXT | Label key and value from each entry in | +| | | ``metadata.labels`` | ++----------------+------+--------------------------------------------------+ + +events +~~~~~~ + +Built from ``kubectl get events``, one row per event. Kubernetes deduplicates +repeated events, so ``count`` reflects how many times an event occurred rather +than the number of rows. Note that ``type`` and ``count`` conflict with SQL +keywords / aggregate function names and must be backtick-quoted in queries, +e.g. ``SELECT \`type\`, \`count\` FROM events``. + ++---------------+---------+------------------------------------------------------------+ +| Column | Type | Description | ++===============+=========+============================================================+ +| namespace | TEXT | Event namespace, from ``metadata.namespace`` | ++---------------+---------+------------------------------------------------------------+ +| type | TEXT | Event type: ``Normal`` or ``Warning``; backtick-quote | +| | | in SQL | ++---------------+---------+------------------------------------------------------------+ +| reason | TEXT | Short machine-readable reason, e.g. ``Scheduled``, | +| | | ``OOMKilling`` | ++---------------+---------+------------------------------------------------------------+ +| message | TEXT | Human-readable event description | ++---------------+---------+------------------------------------------------------------+ +| count | INTEGER | Number of times this event has occurred; backtick-quote | +| | | in SQL | ++---------------+---------+------------------------------------------------------------+ +| first_ts | INTEGER | First occurrence timestamp in epoch seconds, from | +| | | ``firstTimestamp`` | ++---------------+---------+------------------------------------------------------------+ +| last_ts | INTEGER | Last occurrence timestamp in epoch seconds, from | +| | | ``lastTimestamp`` | ++---------------+---------+------------------------------------------------------------+ +| obj_kind | TEXT | Involved object kind, from ``involvedObject.kind``, | +| | | e.g. ``Pod``, ``Node`` | ++---------------+---------+------------------------------------------------------------+ +| obj_name | TEXT | Involved object name, from ``involvedObject.name``; | +| | | primary join key to other tables | ++---------------+---------+------------------------------------------------------------+ +| obj_namespace | TEXT | Involved object namespace, from | +| | | ``involvedObject.namespace`` | ++---------------+---------+------------------------------------------------------------+ +| source | TEXT | Generating component, from ``source.component``, | +| | | e.g. ``kubelet``, ``default-scheduler`` | ++---------------+---------+------------------------------------------------------------+ + Built-in functions ------------------ diff --git a/docs/extending.rst b/docs/extending.rst index a7580af..410f083 100644 --- a/docs/extending.rst +++ b/docs/extending.rst @@ -85,7 +85,19 @@ Column extractors and defaults You've seen how the ``path`` extractor works, using JMESPath to identify an element in the response JSON. You can also use the ``label`` extractor, which is a shortcut to ``metadata.labels``, and can either be -a single string or a list of labels to check in order +a single string or a list of labels to check in order. + +A third option, ``from:``, combines both: Kugl auto-detects whether a +value is a label (matches ``domain/key`` format like +``karpenter.sh/nodepool``) or a JMESPath expression (everything else). +So these two column definitions are equivalent: + +.. code:: yaml + + - name: node_pool + label: karpenter.sh/nodepool + - name: node_pool + from: karpenter.sh/nodepool There are some useful defaults as well: @@ -152,30 +164,29 @@ a pod can have multiple containers. Kugl handles this using the - table: node_taints resource: nodes row_source: - - items - - spec.taints + - items as node + - spec.taints as taint columns: - name: node_uid - path: ^metadata.uid + path: metadata.uid in node - name: key - path: key + path: key in taint - name: effect - path: effect + path: effect in taint -Each element in ``row_source`` is a JMESPath expression that selects -items relative to the prior selector. Only the last element in the list -is used to generate a row, but ``path``\ s can refer to any part of the -chain. Each ``"^"`` at the start of a ``path`` refers to the part of the -response one level higher than the bottom ``row_source`` element. In -this case +Each element in ``row_source`` is a JMESPath expression followed by an +``as `` label. Every step must be named when the table has more +than one ``row_source`` entry. Column ``path`` and ``label`` values +identify which level they address by ending with ``in ``: -- ``^metadata.uid`` means the ``.metadata.uid`` in each element of the - response ``items`` array -- ``key`` and ``effect`` refer to each taint in the ``spec.taints`` - array +- ``metadata.uid in node`` reads ``.metadata.uid`` from each element of + the ``items`` array (named ``node``) +- ``key in taint`` and ``effect in taint`` read fields from each taint + in the ``spec.taints`` array (named ``taint``) The default ``row_source`` is just ``items``, which is why the example -``workflows`` table shown earlier doesn't need to specify it. +``workflows`` table shown earlier doesn't need to specify it. Single-step +tables use bare JMESPath paths with no ``in `` qualifier. This syntax also applies to the ``label`` extractor. For example, if the ``row_source`` of a table needs to address Job metadata but also @@ -186,13 +197,13 @@ metadata from the Job pod template, you can write this: ... resource: jobs row_source: - - items - - spec.template + - items as job + - spec.template as template columns: - name: label_from_job - label: ^a-job-label + label: a-job-label in job - name: label_from_pod - label: a-pod-label + label: a-pod-label in template More about row_source ~~~~~~~~~~~~~~~~~~~~~ @@ -213,8 +224,8 @@ In detail, here's how ``row_source`` is handled. - Repeat with each successive ``row_source`` entry. This can produce surprising results if one step in the ``row_source`` -list tries to do too much. Let's say the ``node_taints`` table didn't -need a ``^metadata.uid`` reference, so only requires the taint lists. +list tries to do too much. Let's say the ``node_taints`` table only +needed the taint lists, with no reference to node metadata. This source list would not work, because ``.spec`` is not a child of ``.items``. @@ -302,6 +313,7 @@ Tips If creating multiple tables from a resource, you should use the ``uid`` column (sourced from ``metadata.uid``) as a join key, since this is a guaranteed unique key. +(Example: `nodes` and `node_labels`, `pods` and `pod_labels`.) The ``utils:`` section of ``~/.kugl/init.yaml`` is ignored during configuration parsing, so you can use it to store reusable bits of YAML. diff --git a/docs/index.rst b/docs/index.rst index 168d5f5..9d413b0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,18 +1,39 @@ -Kugl Documentation -================== +.. include:: ../README.rst + :end-before: BEGIN_LEARN_MORE -Kugl lets you use SQL to query Kubernetes resources. +Learn more +---------- + +- :doc:`Command-line syntax ` +- :doc:`Recommended configuration ` +- :doc:`Settings ` +- :doc:`Shortcuts ` +- :doc:`Built-in tables and functions ` +- :doc:`Configuring new columns and tables ` +- :doc:`Troubleshooting and feedback ` +- Beyond Kubernetes and kubectl + + - :doc:`Other resource types ` + - :doc:`Additional schemas ` + +- `Release notes `__ +- :doc:`Breaking changes ` +- `License `__ + +.. include:: ../README.rst + :start-after: END_LEARN_MORE .. toctree:: + :hidden: :maxdepth: 1 :caption: Contents: Usage + Recommended Configuration + Settings + Shortcuts Built-in Tables Extending Tables - Shortcuts - Settings - Recommended Configuration Other Resource Types Multi-schema Queries Breaking Changes diff --git a/docs/multi.rst b/docs/multi.rst index 1ae3f6d..6be9c40 100644 --- a/docs/multi.rst +++ b/docs/multi.rst @@ -7,12 +7,13 @@ Other resource types ``PYTHONPATH.`` Kugl will refuse to read a configuration file that is world-writable. +.. warning:: + This is experimental, the functionality is pretty limited. + It's also subject to change. + Extending Kugl to AWS ~~~~~~~~~~~~~~~~~~~~~ -(So far this is just an experiment, the functionality is pretty -limited.) - Using the ``exec`` resource type described in `Other resource types <./resources.rst>`__, you can make AWS data available for query. For example: if ``~/.kugl/ec2.yaml`` contains @@ -27,19 +28,19 @@ query. For example: if ``~/.kugl/ec2.yaml`` contains - table: instances resource: instances row_source: - - Reservations - - Instances + - Reservations as reservation + - Instances as instance columns: - name: type - path: InstanceType + path: InstanceType in instance - name: zone - path: Placement.AvailabilityZone + path: Placement.AvailabilityZone in instance - name: private_dns - path: PrivateDnsName + path: PrivateDnsName in instance - name: state - path: State.Name + path: State.Name in instance - name: launched - path: LaunchTime + path: LaunchTime in instance you can write diff --git a/docs/resources.rst b/docs/resources.rst index 06734d0..9a67ddb 100644 --- a/docs/resources.rst +++ b/docs/resources.rst @@ -111,9 +111,9 @@ element offering the metadate extracted from the pattern match, example .. code:: json [ - { "match": {"region": "us-east-1" }, "content": { ... file contents ... } }, - { "match": {"region": "us-east-2" }, "content": { ... file contents ... } }, - { "match": {"region": "us-west-1" }, "content": { ... file contents ... } }, + { "match": {"region": "us-east-1" }, "content": "file contents as string" }, + { "match": {"region": "us-east-2" }, "content": "file contents as string" }, + { "match": {"region": "us-west-1" }, "content": "file contents as string" }, ] To build a table showing environment settings by region: @@ -124,14 +124,12 @@ To build a table showing environment settings by region: - table: env_settings resource: by_region row_source: - # Address each element in the result list - - "[]" - # Focus on the environment settings - - content.env + - "[]" as file + - content.env as setting columns: - name: region - path: ^match.region + path: match.region in file - name: name - path: name + path: name in setting - name: value - path: value + path: value in setting diff --git a/docs/settings.rst b/docs/settings.rst index 86c6563..6b5808b 100644 --- a/docs/settings.rst +++ b/docs/settings.rst @@ -14,7 +14,7 @@ Example: settings: cache_timeout: 5m - reckless: true + quiet: true The ``init_path`` section of ``settings`` can be used to specify multiple configuration folders. This is useful for team configuration diff --git a/docs/syntax.rst b/docs/syntax.rst index 3d94164..64b0a64 100644 --- a/docs/syntax.rst +++ b/docs/syntax.rst @@ -16,17 +16,19 @@ Most invocations of Kugl will need ``-a`` or ``-n namespace``, just like Kubernetes resources. May not be combined with ``-n``. - ``-n, --namespace NS`` - Look in namespace ``NS`` for Kubernetes resources. May not be combined with ``-a``. +- ``-c, --context CONTEXT`` - Use context ``CONTEXT`` for Kubernetes + resources. If not specified, the current context is used. Cache control ~~~~~~~~~~~~~ -- ``-c, --cache`` - Always use cached data, if available, regardless of +- ``-s, --stale`` - Always use cached data, if available, regardless of its age -- ``-r, --reckless`` - Don't print stale data warnings +- ``-r, --refresh`` - Always fetch fresh data from ``kubectl``, + regardless of data age +- ``-q, --quiet`` - Don't print stale data warnings - ``-t, --timeout AGE`` - Change the expiration time for cached data, e.g. ``5m``, ``1h``; the default is ``2m`` (two minutes) -- ``-u, --update`` - Always updated from ``kubectl``, regardless of data - age Other ~~~~~~~~~~~~~ diff --git a/kugl/api.py b/kugl/api.py index eed1cc5..47eb62a 100644 --- a/kugl/api.py +++ b/kugl/api.py @@ -17,7 +17,7 @@ ) __all__ = [ - "resource", + "resource_type", "table", "column", "Resource", @@ -30,7 +30,7 @@ ] -def resource(type: str, schema_defaults: list[str] = []): +def resource_type(type: str, schema_defaults: list[str] = []): def wrap(cls): _Registry.get().add_resource(cls, type, schema_defaults) return cls diff --git a/kugl/builtins/resources.py b/kugl/builtins/resources.py index 7287c0b..2e81b19 100644 --- a/kugl/builtins/resources.py +++ b/kugl/builtins/resources.py @@ -6,7 +6,7 @@ from pydantic import model_validator -from kugl.api import resource, fail, run, Resource +from kugl.api import resource_type, fail, run, Resource from kugl.util import best_guess_parse, KPath, debugging @@ -21,7 +21,7 @@ def set_cacheable(cls, resource: "NonCacheableResource") -> "NonCacheableResourc return resource -@resource("data") +@resource_type("data") class DataResource(NonCacheableResource): """A resource whose data is provided directly in the configuration file.""" @@ -31,7 +31,7 @@ def get_objects(self): return self.data -@resource("file") +@resource_type("file") class FileResource(NonCacheableResource): """A resource that reads a file from disk. @@ -51,7 +51,7 @@ def get_objects(self): fail(f"failed to read {self.file} in resource {self.name}", e) -@resource("folder") +@resource_type("folder") class FolderResource(NonCacheableResource): """A resource that reads selectively from a folder tree. @@ -95,7 +95,7 @@ def get_objects(self): return result -@resource("exec") +@resource_type("exec") class ExecResource(Resource): exec: Union[str, list[str]] cache_key: Optional[str] = None diff --git a/kugl/builtins/schemas/kubernetes.py b/kugl/builtins/schemas/kubernetes.py index 1ac32f6..cb9b22e 100644 --- a/kugl/builtins/schemas/kubernetes.py +++ b/kugl/builtins/schemas/kubernetes.py @@ -10,19 +10,25 @@ import os from argparse import ArgumentParser from threading import Thread +from typing import Optional from pydantic import model_validator from ..helpers import Limits, ItemHelper, PodHelper, JobHelper, CronJobHelper -from kugl.api import table, fail, resource, run, parse_utc, Resource, column +from kugl.api import table, fail, resource_type, run, parse_utc, Resource, column from kugl.util import WHITESPACE_RE, kube_context -@resource("kubernetes", schema_defaults=["kubernetes"]) +@resource_type("kubernetes", schema_defaults=["kubernetes"]) class KubernetesResource(Resource): + # Does 'kubectl get' for this resource need a --namespace flag? namespaced: bool + # User specified -A on the command line _all_ns: bool - _ns: str + # User specified -n on the command line and this is the namespace + _ns: Optional[str] + # User specified -c on the command line and this is the context + _context: Optional[str] @model_validator(mode="after") @classmethod @@ -34,21 +40,22 @@ def set_cacheable(cls, resource: "KubernetesResource") -> "KubernetesResource": @classmethod def add_cli_options(cls, ap: ArgumentParser): - ap.add_argument("-a", "--all", "--all-namespaces", dest="all", default=False, action="store_true") + ap.add_argument("-A", "--all", "--all-namespaces", dest="all", default=False, action="store_true") ap.add_argument("-n", "--namespace", type=str) def handle_cli_options(self, args): if args.all and args.namespace: - fail("Cannot use both -a/--all and -n/--namespace") + fail("Cannot use both -A/--all and -n/--namespace") if args.all: self._ns = "__all" self._all_ns = True else: self._ns = args.namespace or "default" self._all_ns = False + self._context = args.context def cache_path(self) -> str: - return f"{kube_context()}/{self._ns}.{self.name}.json" + return f"{self._context or kube_context()}/{self._ns}.{self.name}.json" def get_objects(self) -> dict: """Fetch resources from Kubernetes using kubectl. @@ -56,13 +63,14 @@ def get_objects(self) -> dict: :return: JSON as output by "kubectl get {self.name} -o json" """ unit_testing = "KUGL_UNIT_TESTING" in os.environ + context_flag = ["--context", self._context] if self._context else [] namespace_flag = ["--all-namespaces"] if self._all_ns else ["-n", self._ns] if self.name == "pods": pod_statuses = {} # Kick off a thread to get pod statuses def _fetch(): - _, output, _ = run(["kubectl", "get", "pods", *namespace_flag]) + _, output, _ = run(["kubectl", *context_flag, "get", "pods", *namespace_flag]) pod_statuses.update(self._pod_status_from_pod_list(output)) status_thread = Thread(target=_fetch, daemon=True) @@ -71,9 +79,9 @@ def _fetch(): if unit_testing: status_thread.join() if self.namespaced: - _, output, _ = run(["kubectl", "get", self.name, *namespace_flag, "-o", "json"]) + _, output, _ = run(["kubectl", *context_flag, "get", self.name, *namespace_flag, "-o", "json"]) else: - _, output, _ = run(["kubectl", "get", self.name, "-o", "json"]) + _, output, _ = run(["kubectl", *context_flag, "get", self.name, "-o", "json"]) data = json.loads(output) if self.name == "pods": # Add pod status to pods @@ -311,3 +319,126 @@ def make_rows(self, context) -> list[tuple[dict, tuple]]: @table(schema="kubernetes", name="cronjob_labels", resource="cronjobs") class CronJobLabelsTable(LabelsTable): UID_FIELD = "cronjob_uid" + + +@table(schema="kubernetes", name="services", resource="services") +class ServicesTable: + _COLUMNS = [ + column("name", "TEXT", "service name, from metadata.name"), + column("uid", "TEXT", "service UID, from metadata.uid"), + column("namespace", "TEXT", "service namespace, from metadata.namespace"), + column("type", "TEXT", "service type: ClusterIP, NodePort, LoadBalancer, or ExternalName"), + column("cluster_ip", "TEXT", "cluster IP, or null for headless services"), + column("external_ip", "TEXT", "external IP or hostname for LoadBalancer services, or null"), + column("creation_ts", "INTEGER", "creation timestamp in epoch seconds, from metadata.creationTimestamp"), + ] + + def columns(self): + return self._COLUMNS + + def make_rows(self, context) -> list[tuple[dict, tuple]]: + for item in context.data["items"]: + svc = ItemHelper(item) + cluster_ip = item["spec"].get("clusterIP") + ingress = item.get("status", {}).get("loadBalancer", {}).get("ingress", []) + external_ip = (ingress[0].get("ip") or ingress[0].get("hostname")) if ingress else None + yield ( + item, + ( + svc.name, + svc.metadata.get("uid"), + svc.namespace, + item["spec"].get("type"), + None if cluster_ip == "None" else cluster_ip, + external_ip, + parse_utc(svc.metadata.get("creationTimestamp")), + ), + ) + + +@table(schema="kubernetes", name="service_labels", resource="services") +class ServiceLabelsTable(LabelsTable): + UID_FIELD = "service_uid" + + +@table(schema="kubernetes", name="deployments", resource="deployments") +class DeploymentsTable: + _COLUMNS = [ + column("name", "TEXT", "deployment name, from metadata.name"), + column("uid", "TEXT", "deployment UID, from metadata.uid"), + column("namespace", "TEXT", "deployment namespace, from metadata.namespace"), + column("replicas", "INTEGER", "desired replica count, from spec.replicas"), + column("ready", "INTEGER", "ready replicas, from status.readyReplicas"), + column("available", "INTEGER", "available replicas, from status.availableReplicas"), + column("updated", "INTEGER", "updated replicas, from status.updatedReplicas"), + column("strategy", "TEXT", "rollout strategy: RollingUpdate or Recreate, from spec.strategy.type"), + column("creation_ts", "INTEGER", "creation timestamp in epoch seconds, from metadata.creationTimestamp"), + ] + + def columns(self): + return self._COLUMNS + + def make_rows(self, context) -> list[tuple[dict, tuple]]: + for item in context.data["items"]: + deploy = ItemHelper(item) + status = item.get("status", {}) + yield ( + item, + ( + deploy.name, + deploy.metadata.get("uid"), + deploy.namespace, + item["spec"].get("replicas"), + status.get("readyReplicas"), + status.get("availableReplicas"), + status.get("updatedReplicas"), + item["spec"].get("strategy", {}).get("type"), + parse_utc(deploy.metadata.get("creationTimestamp")), + ), + ) + + +@table(schema="kubernetes", name="deployment_labels", resource="deployments") +class DeploymentLabelsTable(LabelsTable): + UID_FIELD = "deployment_uid" + + +@table(schema="kubernetes", name="events", resource="events") +class EventsTable: + _COLUMNS = [ + column("namespace", "TEXT", "event namespace, from metadata.namespace"), + column("type", "TEXT", "event type: Normal or Warning — quote with backticks in SQL"), + column("reason", "TEXT", "short machine-readable event reason"), + column("message", "TEXT", "human-readable event description"), + column("count", "INTEGER", "number of times this event has occurred — quote with backticks in SQL"), + column("first_ts", "INTEGER", "first occurrence timestamp in epoch seconds, from firstTimestamp"), + column("last_ts", "INTEGER", "last occurrence timestamp in epoch seconds, from lastTimestamp"), + column("obj_kind", "TEXT", "involved object kind, from involvedObject.kind"), + column("obj_name", "TEXT", "involved object name, from involvedObject.name"), + column("obj_namespace", "TEXT", "involved object namespace, from involvedObject.namespace"), + column("source", "TEXT", "component that generated the event, from source.component"), + ] + + def columns(self): + return self._COLUMNS + + def make_rows(self, context) -> list[tuple[dict, tuple]]: + for item in context.data["items"]: + event = ItemHelper(item) + obj = item.get("involvedObject", {}) + yield ( + item, + ( + event.namespace, + item.get("type"), + item.get("reason"), + item.get("message"), + item.get("count"), + parse_utc(item.get("firstTimestamp")), + parse_utc(item.get("lastTimestamp")), + obj.get("kind"), + obj.get("name"), + obj.get("namespace"), + item.get("source", {}).get("component"), + ), + ) diff --git a/kugl/builtins/schemas/kubernetes.yaml b/kugl/builtins/schemas/kubernetes.yaml index ff49c86..b01f93f 100644 --- a/kugl/builtins/schemas/kubernetes.yaml +++ b/kugl/builtins/schemas/kubernetes.yaml @@ -11,6 +11,12 @@ resources: namespaced: true - name: nodes namespaced: false + - name: services + namespaced: true + - name: deployments + namespaced: true + - name: events + namespaced: true # node_taints builtin table is done here because it doesn't have any special column extraction # logic, and it serves as a good unit test. @@ -19,15 +25,15 @@ create: - table: node_taints resource: nodes row_source: - - items - - spec.taints + - items as node + - spec.taints as taint columns: - name: node_uid - path: ^metadata.uid + from: metadata.uid in node comment: node UID, from metadata.uid - name: key - path: key + from: key in taint comment: taint key - name: effect - path: effect + from: effect in taint comment: taint effect diff --git a/kugl/impl/config.py b/kugl/impl/config.py index 129467b..0332f0a 100644 --- a/kugl/impl/config.py +++ b/kugl/impl/config.py @@ -2,14 +2,15 @@ Pydantic models for configuration files. """ +import re from os.path import expandvars, expanduser from typing import Optional, Tuple, Callable, Union import jmespath -from pydantic import BaseModel, ConfigDict, ValidationError +from pydantic import BaseModel, ConfigDict, Field, ValidationError from pydantic.functional_validators import model_validator -from .extract import ColumnType, KUGL_TYPE_TO_SQL_TYPE, LabelExtractor, PathExtractor +from .extract import ColumnType, KUGL_TYPE_TO_SQL_TYPE, FieldRef, LabelExtractor, PathExtractor, is_label from kugl.util import ( Age, ConfigPath, @@ -35,7 +36,7 @@ class Settings(BaseModel): model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) cache_timeout: Union[Age, int] = Age(120) - reckless: bool = False + quiet: bool = False no_headers: bool = False init_path: list[str] = [] @@ -124,9 +125,10 @@ def recognize_type(cls, column: "Column") -> "Column": class UserColumn(Column): """Holds one entry from a columns: list in a user config file.""" - model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True) + model_config = ConfigDict(extra="forbid", arbitrary_types_allowed=True, populate_by_name=True) path: Optional[str] = None label: Optional[Union[str, list[str]]] = None + from_: Optional[str] = Field(None, alias="from") # Parsed value of self.path _finder: jmespath.parser.Parser # Number of ^ in self.path @@ -143,21 +145,91 @@ def gen_extractor(cls, column: "UserColumn") -> "UserColumn": Generate the Extractor instance for a column definition; given an object, it will return a column value of the appropriate type. """ - if column.path and column.label: + has_path = column.path is not None + has_label = column.label is not None + has_from = column.from_ is not None + + if has_from and (has_path or has_label): + raise ValueError("cannot specify 'from' alongside 'path' or 'label'") + if has_path and has_label: raise ValueError("cannot specify both path and label") - elif column.path: - column._extractor = PathExtractor(column.name, column.type, column.path) - elif column.label: + + if has_from: + # Strip any 'in ' suffix; scope validation is deferred to rebuild_for_scope. + target = re.sub(r"\s+in\s+[a-zA-Z_][a-zA-Z0-9_]*$", "", column.from_) + if is_label(target): + column._extractor = LabelExtractor(column.name, column.type, [target]) + else: + column._extractor = PathExtractor(column.name, column.type, target) + elif has_path: + # Strip any 'in ' suffix before JMESPath compilation; scope resolution + # is deferred to rebuild_for_scope when scope names are known. + path_target = re.sub(r"\s+in\s+[a-zA-Z_][a-zA-Z0-9_]*$", "", column.path) + column._extractor = PathExtractor(column.name, column.type, path_target) + elif has_label: if not isinstance(column.label, list): column.label = [column.label] column._extractor = LabelExtractor(column.name, column.type, column.label) else: - raise ValueError("must specify either path or label") + raise ValueError("must specify path, label, or from") return column def extract(self, obj: object, context) -> object: return self._extractor(obj, context) + def rebuild_for_scope(self, scope_names: set, table_name: str): + """Re-create the extractor with scope awareness for multi-step row_source tables. + + Called at TableFromConfig build time when scope names are known. + """ + if self.path: + ref = FieldRef.parse_scoped(self.path, scope_names) + if ref.scope_name is None: + fail( + f"Table '{table_name}', column '{self.name}': " + f"path '{self.path}' must end with 'in ' " + f"(one of: {sorted(scope_names)})" + ) + try: + self._extractor = PathExtractor(self.name, self.type, ref.target, scope_name=ref.scope_name) + except ValueError as e: + fail(str(e)) + elif self.label: + labels = self.label if isinstance(self.label, list) else [self.label] + scope_name = None + stripped_labels = [] + for label in labels: + ref = FieldRef.parse_scoped(label, scope_names) + if ref.scope_name is None: + fail( + f"Table '{table_name}', column '{self.name}': " + f"label '{label}' must end with 'in ' " + f"(one of: {sorted(scope_names)})" + ) + if scope_name and scope_name != ref.scope_name: + fail( + f"Table '{table_name}', column '{self.name}': " + f"all labels must use the same scope name" + ) + scope_name = ref.scope_name + stripped_labels.append(ref.target) + self._extractor = LabelExtractor(self.name, self.type, stripped_labels, scope_name=scope_name) + elif self.from_: + ref = FieldRef.parse_scoped(self.from_, scope_names) + if ref.scope_name is None: + fail( + f"Table '{table_name}', column '{self.name}': " + f"'from' value '{self.from_}' must end with 'in ' " + f"(one of: {sorted(scope_names)})" + ) + if is_label(ref.target): + self._extractor = LabelExtractor(self.name, self.type, [ref.target], scope_name=ref.scope_name) + else: + try: + self._extractor = PathExtractor(self.name, self.type, ref.target, scope_name=ref.scope_name) + except ValueError as e: + fail(str(e)) + class ExtendTable(BaseModel): """Holds the extend: section from a user config file.""" diff --git a/kugl/impl/engine.py b/kugl/impl/engine.py index 9e81315..bcf3942 100644 --- a/kugl/impl/engine.py +++ b/kugl/impl/engine.py @@ -114,7 +114,7 @@ def query(self, query: Query) -> Tuple[list[Tuple], list[str]]: for named_table in query.named_tables: schema = schemas[named_table.schema_name or DEFAULT_SCHEMA] if table := schema.table_builder(named_table.name): - resource_ref = ResourceRef(schema, schema.resource_for(table)) + resource_ref = ResourceRef(schema, schema.resource_def(table)) tables.append((table, resource_ref)) resource_refs.add(resource_ref) @@ -122,7 +122,7 @@ def query(self, query: Query) -> Tuple[list[Tuple], list[str]]: for r in resource_refs: r.resource.handle_cli_options(self.args) refreshable, max_staleness = self.cache.advise_refresh(resource_refs, self.cache_flag) - if not self.settings.reckless and max_staleness is not None: + if not self.settings.quiet and max_staleness is not None: print(f"(Data may be up to {max_staleness} seconds old.)", file=sys.stderr) clock.CLOCK.sleep(0.5) diff --git a/kugl/impl/extract.py b/kugl/impl/extract.py index 5a05803..37f04bf 100644 --- a/kugl/impl/extract.py +++ b/kugl/impl/extract.py @@ -5,14 +5,13 @@ from dataclasses import dataclass import re -from typing import Literal +from typing import Literal, Optional import jmespath from kugl.util import parse_utc, parse_age, parse_size, parse_cpu, abbreviate, fail ColumnType = Literal["text", "integer", "real", "date", "age", "size", "cpu"] -PARENTED_PATH = re.compile(r"^(\^*)(.*)") KUGL_TYPE_CONVERTERS = { # Valid choices for column type in config -> function to extract that from a string @@ -37,17 +36,37 @@ } +_SCOPE_SUFFIX = re.compile(r"^(.+)\s+in\s+([a-zA-Z_][a-zA-Z0-9_]*)$") +_LABEL_PATTERN = re.compile(r"^[a-zA-Z0-9.-]+/[a-zA-Z0-9._/-]+$") + + +def is_label(s: str) -> bool: + """Return True if s looks like a Kubernetes label key (domain/name format).""" + return bool(_LABEL_PATTERN.match(s)) + + @dataclass class FieldRef: - """Parsed form of a parented JMESPath expression or label, e.g. '^^metadata.name'""" + """Parsed form of a potentially-scoped JMESPath expression or label.""" - n_parents: int + scope_name: Optional[str] target: str @classmethod - def parse(cls, s): - m = PARENTED_PATH.match(s) - return cls(len(m.group(1)), m.group(2)) + def parse_scoped(cls, s: str, scope_names: set) -> "FieldRef": + """Parse a path/label string, detecting a trailing 'in ' scope qualifier. + + Returns FieldRef with scope_name=None if no matching qualifier is found, + leaving the full string as the target. + """ + if "^" in s: + fail("^ parent navigation is no longer supported; use named row_source scopes instead") + m = _SCOPE_SUFFIX.match(s) + if m: + if m.group(2) in scope_names: + return cls(m.group(2), m.group(1)) + fail(f"Unknown scope '{m.group(2)}'; valid scopes are: {sorted(scope_names)}") + return cls(None, s) class Extractor: @@ -81,22 +100,28 @@ def __call__(self, obj: object, context) -> object: class LabelExtractor(Extractor): """Extract a column value from the first matching label in a list of labels.""" - def __init__(self, column_name: str, column_type: ColumnType, labels: list[str]): + def __init__(self, column_name: str, column_type: ColumnType, labels: list[str], + scope_name: Optional[str] = None): super().__init__(column_name, column_type) + for label in labels: + if "^" in label: + raise ValueError( + f"^ parent navigation is no longer supported in column {column_name}; " + f"use named row_source scopes instead" + ) self._labels = labels - self._refs = [FieldRef.parse(label) for label in labels] + self._scope_name = scope_name def extract(self, obj: object, context) -> object: """Resolve the metadata location for each label and see if the label is present.""" - for ref in self._refs: - if ref.n_parents > 0: - obj = context.get_parent(obj, ref.n_parents) + if self._scope_name: + obj = context.get_scope(obj, self._scope_name) if obj is None: - fail(f"Missing parent or too many ^ while evaluating {ref.target}") - if available := obj.get("metadata", {}).get("labels", {}): - # If the label is present here, return the value here, even if null - if ref.target in available: - return available[ref.target] + fail(f"Unknown scope '{self._scope_name}' for column '{self.column_name}'") + if available := obj.get("metadata", {}).get("labels", {}): + for label in self._labels: + if label in available: + return available[label] def __str__(self): """For debug output""" @@ -106,23 +131,29 @@ def __str__(self): class PathExtractor(Extractor): """Extract a column value from the target of a JMESPath expression.""" - def __init__(self, column_name: str, column_type: ColumnType, path: str): + def __init__(self, column_name: str, column_type: ColumnType, path: str, + scope_name: Optional[str] = None): super().__init__(column_name, column_type) - self._ref = FieldRef.parse(path) + if "^" in path: + raise ValueError( + f"^ parent navigation is no longer supported in column {column_name}; " + f"use named row_source scopes instead" + ) + self._scope_name = scope_name self._path = path try: - self._finder = jmespath.compile(self._ref.target) + self._finder = jmespath.compile(path) except jmespath.exceptions.ParseError as e: raise ValueError( - f"invalid JMESPath expression {self._ref.target} in column {column_name}" + f"invalid JMESPath expression {path} in column {column_name}" ) from e def extract(self, obj: object, context) -> object: """Extract a value from an object using a JMESPath finder.""" - if self._ref.n_parents > 0: - obj = context.get_parent(obj, self._ref.n_parents) - if obj is None: - fail(f"Missing parent or too many ^ while evaluating {self._path}") + if self._scope_name: + obj = context.get_scope(obj, self._scope_name) + if obj is None: + fail(f"Unknown scope '{self._scope_name}' for column '{self.column_name}'") return self._finder.search(obj) def __str__(self): diff --git a/kugl/impl/registry.py b/kugl/impl/registry.py index a38476c..e2fdd59 100644 --- a/kugl/impl/registry.py +++ b/kugl/impl/registry.py @@ -54,7 +54,7 @@ def add_table(self, cls: type, **kwargs): def add_resource(self, cls: type, family: str, schema_defaults: list[str]): """ - Register a resource type. This is called by the @resource decorator. + Register a resource type. This is called by the @resource_type decorator. :param cls: The class to register :param family: e.g. "file", "kubernetes", "aws" @@ -87,7 +87,7 @@ def get_resource_by_family(self, family: str, error_ok: bool = False) -> Type: fail(f"Resource family {family} is not registered") return impl - def get_resource_by_schema(self, schema_name: str) -> Type: + def get_schema_default_resource(self, schema_name: str) -> Type: return self.resources_by_schema.get(schema_name) def augment_cli(self, ap: ArgumentParser): @@ -132,10 +132,15 @@ def cache_path(self): class Schema(BaseModel): """Collection of tables and resource definitions.""" + # schema name name: str + # name -> table defined in python code builtin: dict[str, TableDef] = {} + # name -> table defined in config file _create: dict[str, CreateTable] = {} + # name -> table extended in config file _extend: dict[str, ExtendTable] = {} + # name -> resource defined in config file _resources: dict[str, Resource] = {} def read_configs(self, init_path: list[str]): @@ -148,6 +153,7 @@ def read_configs(self, init_path: list[str]): ] # Reset the non-builtin tables, since these can change during unit tests. + # (We don't create a new registry per test.) self._create.clear() self._extend.clear() self._resources.clear() @@ -160,14 +166,15 @@ def read_configs(self, init_path: list[str]): columns_known.add(column.name) def _check_column(table_name, column_name): - # Detect duplicate columns + # Reject duplicate column, or add to known if new. columns_known = tables_known[table_name] if column_name in columns_known: fail(f"Column '{column_name}' is already defined in table '{table_name}'") columns_known.add(column_name) - def _apply(folder: ConfigPath): + def _merge_config(folder: ConfigPath): # Merge one UserConfig into the schema. + # Return True if the folder exists, else False path = folder / f"{self.name}.yaml" if not path.exists(): return False @@ -201,7 +208,7 @@ def _apply(folder: ConfigPath): return True # Apply builtin config and user config. - found = any([_apply(folder) for folder in init_path]) + found = any([_merge_config(folder) for folder in init_path]) if not found and self.name != DEFAULT_SCHEMA: # There's a built-in schema for Kubernetes, so no issue if no config files fail(f"no configurations found for schema '{self.name}'") @@ -218,31 +225,33 @@ def _find_resource(self, r: ResourceDef) -> Resource: if family in fields: return parse_model(rgy.get_resource_by_family(family), fields) # If no family is specified, the schema may have a default one - if impl := rgy.get_resource_by_schema(self.name): + if impl := rgy.get_schema_default_resource(self.name): return parse_model(impl, fields) fail( f"can't infer type of resource '{r.name}' -- need one of 'file', 'data', 'namespaced' etc" ) - def table_builder(self, name, missing_ok=True): + def table_builder(self, table_name: str, missing_ok=True): """Return the Table builder subclass (see tables.py) for a table name. - :param missing_ok: Defaults to True because we normally let SQLite flag missing tables. + :param missing_ok: Defaults to True because we normally let SQLite identify missing + tables by surfacing the exception from the query. """ - builtin = self.builtin.get(name) - creator = self._create.get(name) - extender = self._extend.get(name) + builtin = self.builtin.get(table_name) + creator = self._create.get(table_name) + extender = self._extend.get(table_name) if builtin and creator: - fail(f"Pre-defined table {name} can't be created from config") + fail(f"Pre-defined table {table_name} can't be created from config") if builtin: return TableFromCode(builtin, extender) if creator: - return TableFromConfig(name, self.name, creator, extender) + return TableFromConfig(table_name, self.name, creator, extender) if not missing_ok: - fail(f"Table '{name}' is not defined in schema {self.name}") + fail(f"Table '{table_name}' is not defined in schema {self.name}") def all_table_names(self): + """Return all the table names that are built in or defined in user configs.""" return set(chain(self.builtin.keys(), self._create.keys(), self._extend.keys())) - def resource_for(self, table: Table) -> set[ResourceDef]: + def resource_def(self, table: Table) -> set[ResourceDef]: """Return the ResourceDef used by a Table.""" return self._resources[table.resource] diff --git a/kugl/impl/tables.py b/kugl/impl/tables.py index b7099ad..fd597f5 100644 --- a/kugl/impl/tables.py +++ b/kugl/impl/tables.py @@ -4,6 +4,7 @@ """ from dataclasses import dataclass +import re from typing import Optional, Type import jmespath @@ -38,7 +39,7 @@ def __init__( schema_name, resource: str, builtin_columns: list[Column], - non_builtin_columns: list[UserColumn], + added_columns: list[UserColumn], ): """ :param name: table name, e.g. "pods" @@ -49,7 +50,7 @@ def __init__( self.schema_name = schema_name self.resource = resource self.builtin_columns = builtin_columns - self.non_builtin_columns = non_builtin_columns + self.added_columns = added_columns def build(self, db, raw_data: dict, multi_schema: bool): """Create the table in SQLite and insert the data. @@ -60,15 +61,15 @@ def build(self, db, raw_data: dict, multi_schema: bool): """ context = RowContext(raw_data) table_name = f"{self.schema_name}.{self.name}" if multi_schema else self.name - all_columns = self.builtin_columns + self.non_builtin_columns + all_columns = self.builtin_columns + self.added_columns db.execute( f"""CREATE TABLE {table_name} ({", ".join(f"{c.name} {c._sqltype}" for c in all_columns)})""" ) item_rows = list(self.make_rows(context)) if item_rows: - if self.non_builtin_columns: + if self.added_columns: extend_row = lambda item, row: row + tuple( - column.extract(item, context) for column in self.non_builtin_columns + column.extract(item, context) for column in self.added_columns ) else: extend_row = lambda item, row: row @@ -79,7 +80,7 @@ def build(self, db, raw_data: dict, multi_schema: bool): def printable_schema(self): rows = [ (c.name, c._sqltype, c.comment or "") - for c in self.builtin_columns + self.non_builtin_columns + for c in self.builtin_columns + self.added_columns ] return f"## {self.name}\n" + tabulate(rows, tablefmt="plain") @@ -129,6 +130,16 @@ def __init__( creator.columns + (extender.columns if extender else []), ) self.row_source = [Itemizer.parse(x, name) for x in (creator.row_source or ["items"])] + if len(self.row_source) > 1: + scope_names = {s.scope_name for s in self.row_source if s.scope_name is not None} + unnamed = [s.expr for s in self.row_source if s.scope_name is None] + if unnamed: + fail( + f"Table '{name}': multi-step row_source entries must all have 'as '; " + f"missing for: {unnamed}" + ) + for column in self.added_columns: + column.rebuild_for_scope(scope_names, name) def make_rows(self, context: "RowContext") -> list[tuple[dict, tuple]]: """ @@ -160,11 +171,15 @@ def _itemize(self, context: "RowContext") -> list[dict]: if isinstance(found, dict) and source.unpack: found = [{"key": k, "value": v} for k, v in found.items()] if isinstance(found, list): + # Compute base scopes once for all children from this item. + base_scopes = (context._scopes.get(id(item), {}) if index > 0 else {}) if source.scope_name else None for child in found: if index > 0: # Fix #132 -- don't do this at pass 0, or it sets the parent to the entire - # response object, breaking self.get_root() + # response object. context.set_parent(child, item) + if source.scope_name is not None: + context.set_scope_with_base(child, source.scope_name, base_scopes) new_items.append(child) if debug: debug("add " + abbreviate(child)) @@ -172,6 +187,8 @@ def _itemize(self, context: "RowContext") -> list[dict]: if index > 0: # See comment above. context.set_parent(found, item) + if source.scope_name is not None: + context.set_scope(found, source.scope_name, item if index > 0 else None) new_items.append(found) if debug: debug("add " + abbreviate(found)) @@ -184,12 +201,15 @@ class RowContext: Primarily, the `.data` attribute holds the JSON data from 'kubectl get' or similar. The `.set_parent` and `.get_parent` methods allow row-generating functions to track - parent objects as they iterate through nested data structures.""" + parent objects as they iterate through nested data structures. + The `.set_scope` and `.get_scope` methods support named scope resolution for + multi-step row_source tables.""" def __init__(self, data): self.data = data self.debug = debugging("extract") self._parents = {} + self._scopes = {} def set_parent(self, child, parent): self._parents[id(child)] = parent @@ -200,26 +220,42 @@ def get_parent(self, child, depth: int = 1): depth -= 1 return child - def get_root(self, child): - while (parent := self._parents.get(id(child))) is not None: - child = parent - return child + # FIXME: rethink how this is done + def set_scope(self, child, name: str, parent=None): + """Register child as the named scope, inheriting ancestor scopes from parent.""" + base = self._scopes.get(id(parent), {}) if parent is not None else {} + self.set_scope_with_base(child, name, base) + + # FIXME: rethink how this is done + def set_scope_with_base(self, child, name: str, base: dict): + """Like set_scope but accepts a pre-computed base scope dict.""" + self._scopes[id(child)] = {**base, name: child} + + def get_scope(self, obj, name: str): + """Look up a named scope for obj, returning None if not found.""" + return self._scopes.get(id(obj), {}).get(name) @dataclass class Itemizer: """Helper class to hold information parsed from one line of a row_source""" - # Original row_source expression + # JMESPath expression (without the 'as ' suffix) expr: str # JMESPath expression to find the items finder: ParsedResult # Should dictionaries be unpacked to a key/value array unpack: bool + # Optional scope name from 'as ' suffix + scope_name: Optional[str] = None @classmethod def parse(cls, s: str, table_name: str): - """Parse a line from the row_source section of a config file""" + """Parse a line from the row_source section of a config file. + + Syntax: 'expr [as name][; kv]' + """ + # Split off options (;kv etc.) parts = s.split(";") if len(parts) == 1: unpack = False @@ -227,7 +263,18 @@ def parse(cls, s: str, table_name: str): unpack = True else: fail(f"Invalid row_source options: {s}") + + # Parse 'expr as name' from the expression part + expr_part = parts[0].strip() + name = None + as_index = expr_part.find(" as ") + if as_index >= 0: + name = expr_part[as_index + 4:].strip() + expr_part = expr_part[:as_index].strip() + if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name): + fail(f"Invalid scope name '{name}' in row_source: {s}") + try: - return Itemizer(s, jmespath.compile(parts[0]), unpack) + return Itemizer(expr=expr_part, finder=jmespath.compile(expr_part), unpack=unpack, scope_name=name) except jmespath.exceptions.ParseError as e: - fail(f"invalid row_source {parts[0]} for table {table_name}", e) + fail(f"invalid row_source {expr_part} for table {table_name}", e) diff --git a/kugl/main.py b/kugl/main.py index d4cdd9d..53c1c1e 100644 --- a/kugl/main.py +++ b/kugl/main.py @@ -101,23 +101,24 @@ def parse_args( argv: list[str], ap: ArgumentParser, settings: Settings ) -> tuple[argparse.Namespace, CacheFlag]: """Add stock arguments to parser, parse the command line, and override settings.""" + ap.add_argument("-c", "--context", type=str) ap.add_argument("-D", "--debug", type=str) - ap.add_argument("-c", "--cache", default=False, action="store_true") ap.add_argument("-H", "--no-headers", default=False, action="store_true") - ap.add_argument("-r", "--reckless", default=False, action="store_true") + ap.add_argument("-q", "--quiet", default=False, action="store_true") + ap.add_argument("-r", "--refresh", default=False, action="store_true") + ap.add_argument("-s", "--stale", default=False, action="store_true") ap.add_argument("-t", "--timeout", type=str) - ap.add_argument("-u", "--update", default=False, action="store_true") ap.add_argument("sql") args = ap.parse_args(argv) - if args.cache and args.update: - fail("Cannot use both -c/--cache and -u/--update") + if args.stale and args.refresh: + fail("Cannot use both -s/--stale and -r/--refresh") if args.timeout: settings.cache_timeout = Age(args.timeout) - if args.reckless: - settings.reckless = True + if args.quiet: + settings.quiet = True if args.no_headers: settings.no_headers = True - return args, (ALWAYS_UPDATE if args.update else NEVER_UPDATE if args.cache else CHECK) + return args, (ALWAYS_UPDATE if args.refresh else NEVER_UPDATE if args.stale else CHECK) def _merge_init_files() -> tuple[UserInit, dict[str, Shortcut]]: diff --git a/tests/config/test_config.py b/tests/config/test_config.py index 766bff3..5acc3aa 100644 --- a/tests/config/test_config.py +++ b/tests/config/test_config.py @@ -19,7 +19,7 @@ def test_settings_defaults(): s = Settings() assert s.cache_timeout == Age(120) - assert not s.reckless + assert not s.quiet assert not s.no_headers assert s.init_path == [] @@ -30,7 +30,7 @@ def test_settings_custom(monkeypatch): Settings, yaml.safe_load(""" cache_timeout: 5s - reckless: true + quiet: true no_headers: true init_path: - $FOO/abc @@ -39,7 +39,7 @@ def test_settings_custom(monkeypatch): """), ) assert s.cache_timeout == Age(5) - assert s.reckless + assert s.quiet assert s.no_headers assert s.init_path == ["/tmp/abc", "/tmp/xyz", "$BAR/xyz"] @@ -53,7 +53,7 @@ def test_empty_config(): def test_empty_init(): c = UserInit() assert c.settings.cache_timeout == Age(120) - assert not c.settings.reckless + assert not c.settings.quiet assert c.shortcuts == [] @@ -201,4 +201,4 @@ def test_must_have_path_or_label(): """), return_errors=True, ) - assert errors == ["columns.0: Value error, must specify either path or label"] + assert errors == ["columns.0: Value error, must specify path, label, or from"] diff --git a/tests/k8s/k8s_mocks.py b/tests/k8s/k8s_mocks.py index 0f6f614..fdebf31 100644 --- a/tests/k8s/k8s_mocks.py +++ b/tests/k8s/k8s_mocks.py @@ -215,6 +215,104 @@ def make_cronjob( return obj +def make_service( + name: str, + namespace: str = None, + svc_type: str = "ClusterIP", + cluster_ip: str = "10.96.0.1", + external_ip: str = None, + labels: Optional[dict] = None, +): + """ + Construct a Service dict from a generic chunk of YAML that we can alter to simulate different + responses from the K8S API. + + :param name: Service name + :param svc_type: Service type (ClusterIP, NodePort, LoadBalancer, ExternalName) + :param cluster_ip: Cluster IP, or "None" for headless/ExternalName services + :param external_ip: External IP for LoadBalancer services + """ + obj = yaml.safe_load(_static_content("sample_service.yaml")) + obj["metadata"]["name"] = name + obj["metadata"]["uid"] = "uid-" + name + if namespace is not None: + obj["metadata"]["namespace"] = namespace + obj["spec"]["type"] = svc_type + obj["spec"]["clusterIP"] = cluster_ip + if external_ip is not None: + obj["status"]["loadBalancer"]["ingress"] = [{"ip": external_ip}] + if labels is not None: + obj["metadata"]["labels"] = labels + return obj + + +def make_deployment( + name: str, + namespace: str = None, + replicas: int = 3, + ready: int = None, + available: int = None, + updated: int = None, + strategy: str = "RollingUpdate", + labels: Optional[dict] = None, +): + """ + Construct a Deployment dict from a generic chunk of YAML that we can alter to simulate different + responses from the K8S API. + + :param name: Deployment name + :param replicas: Desired replica count + :param ready: Ready replicas (defaults to replicas) + :param available: Available replicas (defaults to replicas) + :param updated: Updated replicas (defaults to replicas) + :param strategy: Rollout strategy (RollingUpdate or Recreate) + """ + obj = yaml.safe_load(_static_content("sample_deployment.yaml")) + obj["metadata"]["name"] = name + obj["metadata"]["uid"] = "uid-" + name + if namespace is not None: + obj["metadata"]["namespace"] = namespace + obj["spec"]["replicas"] = replicas + obj["spec"]["strategy"]["type"] = strategy + obj["status"]["replicas"] = replicas + obj["status"]["readyReplicas"] = replicas if ready is None else ready + obj["status"]["availableReplicas"] = replicas if available is None else available + obj["status"]["updatedReplicas"] = replicas if updated is None else updated + if labels is not None: + obj["metadata"]["labels"] = labels + return obj + + +def make_event( + name: str, + namespace: str = "default", + event_type: str = "Normal", + reason: str = "Scheduled", + message: str = "Event message", + count: int = 1, + first_ts: int = UNIT_TEST_TIMEBASE, + last_ts: int = UNIT_TEST_TIMEBASE, + obj_kind: str = "Pod", + obj_name: str = "example-pod", + obj_namespace: str = "default", + source: str = "default-scheduler", +): + obj = yaml.safe_load(_static_content("sample_event.yaml")) + obj["metadata"]["name"] = f"{obj_name}.{name}" + obj["metadata"]["namespace"] = namespace + obj["type"] = event_type + obj["reason"] = reason + obj["message"] = message + obj["count"] = count + obj["firstTimestamp"] = to_utc(first_ts) + obj["lastTimestamp"] = to_utc(last_ts) + obj["involvedObject"]["kind"] = obj_kind + obj["involvedObject"]["name"] = obj_name + obj["involvedObject"]["namespace"] = obj_namespace + obj["source"]["component"] = source + return obj + + @cache def _static_content(filename: str): return Path(__file__).parent.parent.joinpath("static", filename).read_text() diff --git a/tests/k8s/kubectl b/tests/k8s/kubectl index e64a05d..d6f22aa 100755 --- a/tests/k8s/kubectl +++ b/tests/k8s/kubectl @@ -8,8 +8,13 @@ from pathlib import Path import re import sys -args = " ".join(sys.argv[1:]) -if m := re.match("get (pods|jobs|cronjobs|things) (-n default|--all-namespaces) -o json", args): +argv = sys.argv[1:] +if "--context" in argv: + idx = argv.index("--context") + argv = argv[:idx] + argv[idx+2:] + +args = " ".join(argv) +if m := re.match("get (pods|jobs|cronjobs|services|deployments|events|things) (-n default|--all-namespaces) -o json", args): kind = m.group(1) elif re.match("get pods (-n default|--all-namespaces)", args): kind = "pod_statuses" @@ -20,4 +25,4 @@ else: mockdir = Path(os.environ["KUGL_MOCKDIR"]) content = mockdir.joinpath(kind).read_text() -print(content) \ No newline at end of file +print(content) diff --git a/tests/k8s/test_deployments.py b/tests/k8s/test_deployments.py new file mode 100644 index 0000000..3698699 --- /dev/null +++ b/tests/k8s/test_deployments.py @@ -0,0 +1,50 @@ +""" +Tests for the deployments table. +""" + +from .k8s_mocks import make_deployment, kubectl_response +from ..testing import assert_query + + +def test_deployment_replicas(test_home): + kubectl_response( + "deployments", + { + "items": [ + make_deployment("deploy-1"), + make_deployment("deploy-2", replicas=5, ready=3, available=3, updated=5), + make_deployment("deploy-3", replicas=2, strategy="Recreate"), + ] + }, + ) + assert_query( + "SELECT name, replicas, ready, available, updated, strategy FROM deployments ORDER BY 1", + """ + name replicas ready available updated strategy + deploy-1 3 3 3 3 RollingUpdate + deploy-2 5 3 3 5 RollingUpdate + deploy-3 2 2 2 2 Recreate + """, + ) + + +def test_deployment_labels(test_home): + kubectl_response( + "deployments", + { + "items": [ + make_deployment("deploy-1", labels=dict(app="web", env="prod")), + make_deployment("deploy-2", labels=dict(app="api")), + make_deployment("deploy-3", labels=dict()), + ] + }, + ) + assert_query( + "SELECT deployment_uid, key, value FROM deployment_labels ORDER BY 2, 3, 1", + """ + deployment_uid key value + uid-deploy-2 app api + uid-deploy-1 app web + uid-deploy-1 env prod + """, + ) diff --git a/tests/k8s/test_events.py b/tests/k8s/test_events.py new file mode 100644 index 0000000..5efd18c --- /dev/null +++ b/tests/k8s/test_events.py @@ -0,0 +1,50 @@ +""" +Tests for the events table. +""" + +from kugl.util import UNIT_TEST_TIMEBASE +from .k8s_mocks import make_event, kubectl_response +from ..testing import assert_query + + +def test_event_columns(test_home): + kubectl_response( + "events", + { + "items": [ + make_event("ev1", event_type="Normal", reason="Scheduled", count=1, + obj_kind="Pod", obj_name="my-pod", source="default-scheduler"), + make_event("ev2", event_type="Warning", reason="OOMKilling", count=5, + obj_kind="Pod", obj_name="my-pod", source="kubelet"), + make_event("ev3", event_type="Warning", reason="Failed", count=3, + obj_kind="Node", obj_name="node-1", obj_namespace="", source="kubelet"), + ] + }, + ) + assert_query( + "SELECT namespace, `type`, reason, `count`, obj_kind, obj_name, source FROM events ORDER BY reason", + """ + namespace type reason count obj_kind obj_name source + default Warning Failed 3 Node node-1 kubelet + default Warning OOMKilling 5 Pod my-pod kubelet + default Normal Scheduled 1 Pod my-pod default-scheduler + """, + ) + + +def test_event_timestamps(test_home): + kubectl_response( + "events", + { + "items": [ + make_event("ev1", first_ts=UNIT_TEST_TIMEBASE, last_ts=UNIT_TEST_TIMEBASE + 300), + ] + }, + ) + assert_query( + "SELECT first_ts, last_ts, last_ts - first_ts AS elapsed FROM events", + f""" + first_ts last_ts elapsed + {UNIT_TEST_TIMEBASE} {UNIT_TEST_TIMEBASE + 300} 300 + """, + ) diff --git a/tests/k8s/test_jobs.py b/tests/k8s/test_jobs.py index d8d6b26..001f5e9 100644 --- a/tests/k8s/test_jobs.py +++ b/tests/k8s/test_jobs.py @@ -77,13 +77,13 @@ def test_label_parents(test_home): - table: job_users resource: jobs row_source: - - items - - spec.template + - items as job + - spec.template as template columns: - name: job_username - label: ^user + label: user in job - name: pod_username - label: user + label: user in template """) kubectl_response( "jobs", diff --git a/tests/k8s/test_services.py b/tests/k8s/test_services.py new file mode 100644 index 0000000..1f2cdf8 --- /dev/null +++ b/tests/k8s/test_services.py @@ -0,0 +1,52 @@ +""" +Tests for the services table. +""" + +from .k8s_mocks import make_service, kubectl_response +from ..testing import assert_query + + +def test_service_types(test_home): + kubectl_response( + "services", + { + "items": [ + make_service("svc-1"), + make_service("svc-2", svc_type="NodePort", cluster_ip="10.96.0.2"), + make_service("svc-3", svc_type="LoadBalancer", cluster_ip="10.96.0.3", external_ip="203.0.113.5"), + make_service("svc-4", svc_type="ExternalName", cluster_ip="None"), + ] + }, + ) + assert_query( + "SELECT name, type, cluster_ip, external_ip FROM services ORDER BY 1", + """ + name type cluster_ip external_ip + svc-1 ClusterIP 10.96.0.1 + svc-2 NodePort 10.96.0.2 + svc-3 LoadBalancer 10.96.0.3 203.0.113.5 + svc-4 ExternalName + """, + ) + + +def test_service_labels(test_home): + kubectl_response( + "services", + { + "items": [ + make_service("svc-1", labels=dict(foo="bar")), + make_service("svc-2", labels=dict(a="b", c="d")), + make_service("svc-3", labels=dict()), + ] + }, + ) + assert_query( + "SELECT service_uid, key, value FROM service_labels ORDER BY 2, 1", + """ + service_uid key value + uid-svc-2 a b + uid-svc-2 c d + uid-svc-1 foo bar + """, + ) diff --git a/tests/resource/test_cache.py b/tests/resource/test_cache.py index 13aff36..58f3bba 100644 --- a/tests/resource/test_cache.py +++ b/tests/resource/test_cache.py @@ -24,7 +24,7 @@ def test_cache(test_home, capsys): all_res = {pods, jobs, nodes, events} for r in all_res: - r.resource.handle_cli_options(SimpleNamespace(namespace="foo", all=False)) + r.resource.handle_cli_options(SimpleNamespace(namespace="foo", all=False, context=None)) # Pretend we have cached data for pods, nodes, and events, but not jobs. diff --git a/tests/resource/test_folder.py b/tests/resource/test_folder.py index 1b928ec..80a65d7 100644 --- a/tests/resource/test_folder.py +++ b/tests/resource/test_folder.py @@ -74,9 +74,12 @@ def test_folder_content(hr, tmp_path, capsys): match="(?P[^/]+)/data.yaml", ) # Update the row_source of the people table to match the folder data layout. - config["create"][0]["row_source"] = ["[]", "content"] - # Add a column to capture the region. - config["create"][0]["columns"].append(dict(name="region", path="^match.region")) + config["create"][0]["row_source"] = ["[] as file", "content as person"] + config["create"][0]["columns"] = [ + dict(name="name", path="name in person"), + dict(name="age", path="age in person", type="integer"), + dict(name="region", path="match.region in file"), + ] hr.save(config) with features_debugged("folder"): assert_query( diff --git a/tests/resource/test_row_source.py b/tests/resource/test_row_source.py index 5d99619..749ace6 100644 --- a/tests/resource/test_row_source.py +++ b/tests/resource/test_row_source.py @@ -2,6 +2,8 @@ Unit tests for row_source errors and special cases. """ +import json + import pytest from kugl.util import KuglError, kugl_home @@ -9,8 +11,8 @@ from ..k8s.k8s_mocks import kubectl_response -def test_too_many_parents(test_home): - """Ensure correct error when a parent field reference is too long.""" +def test_caret_rejected(test_home): + """Ensure ^ parent navigation raises a clear error.""" kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" resources: - name: things @@ -20,18 +22,335 @@ def test_too_many_parents(test_home): resource: things columns: - name: something - path: ^^^invalid - """) - kubectl_response( - "things", - { - "items": [ - {"something": "foo"}, - {"something": "foo"}, - ] - }, + path: ^parent + """) + kubectl_response("things", {"items": [{"something": "foo"}]}) + with pytest.raises(KuglError, match=r"\^ parent navigation is no longer supported"): + assert_query("SELECT * FROM things", "") + + +_MULTI_STEP_CONFIG = """ + resources: + - name: things + data: + items: {items} + create: + - table: things + resource: things + row_source: + - items as item + - children as child + columns: + - name: parent_id + path: parent in item + - name: val + path: val in child +""" + +@pytest.mark.parametrize("items,expected", [ + pytest.param( + [ + {"parent": "p1", "children": [{"val": "a"}, {"val": "b"}]}, + {"parent": "p2", "children": [{"val": "c"}]}, + ], + """ + parent_id val + p1 a + p1 b + p2 c + """, + id="normal", + ), + pytest.param( + [ + {"parent": "p1", "children": [{"val": "a"}]}, + {"parent": "p2", "children": []}, + ], + """ + parent_id val + p1 a + """, + id="empty_sublist", + ), +]) +def test_multi_step_row_source(test_home, items, expected): + """Multi-step row_source with named scopes; also checks empty sublists produce no rows.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text( + _MULTI_STEP_CONFIG.format(items=json.dumps(items)) + ) + assert_query("SELECT * FROM things ORDER BY parent_id, val", expected) + + +def test_kv_with_parent_nav(test_home): + """'; kv' expansion combined with named scope to reference a field from the parent item.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - service: svc-a + env: + FOO: bar + BAZ: glig + - service: svc-b + env: + QUX: quux + create: + - table: things + resource: things + row_source: + - items as item + - env as kv_pair; kv + columns: + - name: service + path: service in item + - name: key + path: key in kv_pair + - name: value + path: value in kv_pair + """) + assert_query( + "SELECT * FROM things ORDER BY service, key", + """ + service key value + svc-a BAZ glig + svc-a FOO bar + svc-b QUX quux + """, ) - with pytest.raises(KuglError, match="Missing parent or too many . while evaluating ...invalid"): + + +def test_three_level_named_scopes(test_home): + """Three-step row_source with named scopes; verifies ancestor scopes are reachable.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - section: sec-a + groups: + - grp: grp-1 + tags: + - tag: t1 + - tag: t2 + - grp: grp-2 + tags: + - tag: t3 + create: + - table: things + resource: things + row_source: + - items as section_item + - groups as group + - tags as tag_item + columns: + - name: section + path: section in section_item + - name: grp + path: grp in group + - name: tag + path: tag in tag_item + """) + assert_query( + "SELECT * FROM things ORDER BY section, grp, tag", + """ + section grp tag + sec-a grp-1 t1 + sec-a grp-1 t2 + sec-a grp-2 t3 + """, + ) + + +def test_missing_scope_name(test_home): + """Multi-step row_source without 'as ' raises a ConfigError.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - children: [{val: a}] + create: + - table: things + resource: things + row_source: + - items as item + - children + columns: + - name: val + path: val in item + """) + with pytest.raises(KuglError, match="must all have 'as '"): + assert_query("SELECT * FROM things", "") + + +def test_unscoped_column_in_multi_step(test_home): + """Multi-step row_source with a bare (un-scoped) column path raises a ConfigError.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - children: [{val: a}] + create: + - table: things + resource: things + row_source: + - items as item + - children as child + columns: + - name: val + path: val + """) + with pytest.raises(KuglError, match="must end with 'in '"): + assert_query("SELECT * FROM things", "") + + +def test_from_detects_label(test_home): + """`from: domain/key` auto-detects as a label extractor.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + labels: + test.io/group: team-a + create: + - table: things + resource: things + columns: + - name: grp + from: test.io/group + """) + assert_query("SELECT * FROM things", """ + grp + team-a + """) + + +def test_from_detects_path(test_home): + """`from: jmespath.expr` auto-detects as a path extractor.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + name: my-thing + create: + - table: things + resource: things + columns: + - name: thing_name + from: metadata.name + """) + assert_query("SELECT * FROM things", """ + thing_name + my-thing + """) + + +def test_from_scoped_path(test_home): + """`from: expr in scope` resolves a JMESPath on the named scope.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + name: pod-a + spec: + containers: + - name: c1 + - name: c2 + create: + - table: things + resource: things + row_source: + - items as pod + - spec.containers as container + columns: + - name: pod_name + from: metadata.name in pod + - name: container_name + from: name in container + """) + assert_query("SELECT * FROM things ORDER BY container_name", """ + pod_name container_name + pod-a c1 + pod-a c2 + """) + + +def test_from_scoped_label(test_home): + """`from: domain/key in scope` resolves as a label on the named scope object.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - metadata: + labels: + test.io/group: team-b + children: + - val: x + create: + - table: things + resource: things + row_source: + - items as item + - children as child + columns: + - name: grp + from: test.io/group in item + - name: val + from: val in child + """) + assert_query("SELECT * FROM things", """ + grp val + team-b x + """) + + +def test_from_conflicts_with_path(test_home): + """Specifying both `from` and `path` raises a validation error.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: [{val: a}] + create: + - table: things + resource: things + columns: + - name: val + from: val + path: val + """) + with pytest.raises(KuglError, match="cannot specify .from. alongside"): + assert_query("SELECT * FROM things", "") + + +def test_from_unknown_scope(test_home): + """`from: expr in unknownscope` raises a clear error at table-build time.""" + kugl_home().prep().joinpath("kubernetes.yaml").write_text(""" + resources: + - name: things + data: + items: + - children: [{val: a}] + create: + - table: things + resource: things + row_source: + - items as item + - children as child + columns: + - name: val + from: val in ghost + """) + with pytest.raises(KuglError, match="Unknown scope 'ghost'"): assert_query("SELECT * FROM things", "") diff --git a/tests/static/sample_deployment.yaml b/tests/static/sample_deployment.yaml new file mode 100644 index 0000000..183aaea --- /dev/null +++ b/tests/static/sample_deployment.yaml @@ -0,0 +1,28 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: "2024-11-20T01:05:00Z" + name: example-deployment + namespace: example + uid: aaaabbbb-1234-5678-abcd-000000000002 + labels: {} +spec: + replicas: 3 + strategy: + type: RollingUpdate + selector: + matchLabels: + app: example + template: + metadata: + labels: + app: example + spec: + containers: + - name: main + image: example.com/app:latest +status: + replicas: 3 + readyReplicas: 3 + availableReplicas: 3 + updatedReplicas: 3 diff --git a/tests/static/sample_event.yaml b/tests/static/sample_event.yaml new file mode 100644 index 0000000..5c1981d --- /dev/null +++ b/tests/static/sample_event.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Event +metadata: + name: example-pod.17c8a9f0c0b23456 + namespace: default + uid: aaaabbbb-1234-5678-abcd-000000000099 +type: Normal +reason: Scheduled +message: Successfully assigned default/example-pod to node-1 +count: 1 +firstTimestamp: "2024-11-20T01:00:00Z" +lastTimestamp: "2024-11-20T01:00:00Z" +involvedObject: + kind: Pod + name: example-pod + namespace: default + uid: aaaabbbb-1234-5678-abcd-000000000001 +source: + component: default-scheduler + host: master-node diff --git a/tests/static/sample_service.yaml b/tests/static/sample_service.yaml new file mode 100644 index 0000000..9b7c995 --- /dev/null +++ b/tests/static/sample_service.yaml @@ -0,0 +1,19 @@ +apiVersion: v1 +kind: Service +metadata: + creationTimestamp: "2024-11-20T01:05:00Z" + name: example-service + namespace: example + uid: aaaabbbb-1234-5678-abcd-000000000001 + labels: {} +spec: + clusterIP: 10.96.0.1 + type: ClusterIP + selector: + app: example + ports: + - port: 80 + targetPort: 8080 + protocol: TCP +status: + loadBalancer: {} diff --git a/tests/test_cli.py b/tests/test_cli.py index 4eef2c1..6a433d0 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -15,8 +15,8 @@ def test_enforce_cache_option(test_home): - with pytest.raises(KuglError, match="Cannot use both -c/--cache and -u/--update"): - main1(["-c", "-u", "select 1"]) + with pytest.raises(KuglError, match="Cannot use both -s/--stale and -r/--refresh"): + main1(["-s", "-r", "select 1"]) def test_enforce_cache_option_via_shortcut(test_home, capsys): @@ -24,16 +24,16 @@ def test_enforce_cache_option_via_shortcut(test_home, capsys): shortcuts: - name: foo args: - - -u + - -r - "select 1" """) - with pytest.raises(KuglError, match="Cannot use both -c/--cache and -u/--update"): - main1(["-c", "foo"]) + with pytest.raises(KuglError, match="Cannot use both -s/--stale and -r/--refresh"): + main1(["-s", "foo"]) def test_enforce_one_namespace_option(test_home): - with pytest.raises(KuglError, match="Cannot use both -a/--all and -n/--namespace"): - main1(["-a", "-n", "x", "select * from pods"]) + with pytest.raises(KuglError, match="Cannot use both -A/--all and -n/--namespace"): + main1(["-A", "-n", "x", "select * from pods"]) def test_no_such_table(test_home): @@ -65,7 +65,7 @@ def test_unknown_option_in_shortcut(test_home, capsys): - "select * from pods" """) with pytest.raises(SystemExit): - main1(["-a", "foo"]) + main1(["-A", "foo"]) assert "unrecognized arguments: --badoption" in capsys.readouterr().err @@ -77,22 +77,22 @@ def test_no_headers(test_home, capsys): @pytest.mark.parametrize( - "argv,expected_flag,age,reckless,error", + "argv,expected_flag,age,quiet,error", [ - (["-u", "select 1"], ALWAYS_UPDATE, Age(120), False, None), + (["-r", "select 1"], ALWAYS_UPDATE, Age(120), False, None), (["-t", "5", "select 1"], CHECK, Age(5), False, None), - (["-c", "-r", "select 1"], NEVER_UPDATE, Age(120), True, None), + (["-s", "-q", "select 1"], NEVER_UPDATE, Age(120), True, None), ( - ["-c", "-u", "select 1"], + ["-s", "-r", "select 1"], None, None, None, - "Cannot use both -c/--cache and -u/--update", + "Cannot use both -s/--stale and -r/--refresh", ), ], ) -def test_parse_args(test_home, argv, expected_flag, age, reckless, error): - """Verify correct values received for -u, -t, -c, -r options""" +def test_parse_args(test_home, argv, expected_flag, age, quiet, error): + """Verify correct values received for -r, -t, -s, -q options""" ap = ArgumentParser() settings = Settings() if error: @@ -102,7 +102,7 @@ def test_parse_args(test_home, argv, expected_flag, age, reckless, error): args, actual_flag = parse_args(argv, ap, settings) assert actual_flag == expected_flag assert settings.cache_timeout == age - assert settings.reckless == reckless + assert settings.quiet == quiet def test_init_command(test_home, capsys): diff --git a/tests/testing.py b/tests/testing.py index 5d6eec2..83f20a6 100644 --- a/tests/testing.py +++ b/tests/testing.py @@ -24,7 +24,7 @@ def assert_query(sql: str, expected: Union[str, list], all_ns: bool = False): caller can indent for neatness. Or, if a list, each item will be checked in order. :param all_ns: FIXME temporary hack until we get namespaces out of engine.py """ - args = SimpleNamespace(all=all_ns, namespace=None) + args = SimpleNamespace(all=all_ns, namespace=None, context=None) engine = Engine(args, ALWAYS_UPDATE, Settings()) if isinstance(expected, str): actual = engine.query_and_format(Query(sql))