Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 16 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ jobs:
fail-fast: false
matrix:
include:
# DuckDB examples
# ---------- DuckDB examples ----------
- engine: duckdb
extra: ""
example: api_demo
Expand All @@ -130,6 +130,10 @@ jobs:
extra: ""
example: dq_demo
env_file: examples/dq_demo/.env.dev_duckdb
- engine: duckdb
extra: ""
example: hooks_demo
env_file: examples/hooks_demo/.env.dev_duckdb
- engine: duckdb
extra: ""
example: incremental_demo
Expand All @@ -142,7 +146,7 @@ jobs:
extra: ""
example: materializations_demo
env_file: examples/materializations_demo/.env.dev_duckdb
# Postgres examples
# ---------- Postgres examples ----------
- engine: postgres
extra: "postgres"
example: api_demo
Expand All @@ -163,6 +167,10 @@ jobs:
extra: "postgres"
example: dq_demo
env_file: examples/dq_demo/.env.dev_postgres
- engine: postgres
extra: "postgres"
example: hooks_demo
env_file: examples/hooks_demo/.env.dev_postgres
- engine: postgres
extra: "postgres"
example: incremental_demo
Expand All @@ -175,7 +183,7 @@ jobs:
extra: "postgres"
example: materializations_demo
env_file: examples/materializations_demo/.env.dev_postgres
# Spark examples
# ---------- Spark examples ----------
- engine: databricks_spark
extra: "spark"
example: api_demo
Expand All @@ -201,6 +209,11 @@ jobs:
example: dq_demo
java: true
env_file: examples/dq_demo/.env.dev_databricks
- engine: databricks_spark
extra: "spark"
example: hooks_demo
java: true
env_file: examples/hooks_demo/.env.dev_databricks
- engine: databricks_spark
extra: "spark"
example: incremental_demo
Expand Down
11 changes: 8 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,19 @@ jobs:
id: meta
run: |
python - << 'PY'
import pathlib, tomllib
import os
import pathlib
import tomllib

data = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
version = data.get("project", {}).get("version")
if not version:
raise SystemExit("No [project].version found in pyproject.toml")
raise SystemExit("No [project].version found in pyproject.toml")

print(f"Version: {version}")
with open(os.environ["GITHUB_OUTPUT"], "a", encoding="utf8") as fh:

out_path = os.environ["GITHUB_OUTPUT"]
with open(out_path, "a", encoding="utf8") as fh:
fh.write(f"version={version}\n")
PY

Expand Down
106 changes: 64 additions & 42 deletions docs/Cost_Monitoring.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,54 +221,76 @@ my_project/
```yaml
version: 1

# Default behaviour when a budget is exceeded.
# One of: "warn", "error"
defaults:
on_exceed: "warn"

budgets:
# Global cap for total bytes scanned by all models.
total_bytes_scanned:
warn_after: "10GB" # log a warning above this
error_after: "50GB" # fail the run above this

# Optional: total query time across all models.
total_query_duration_ms:
warn_after: 600000 # 10 minutes
error_after: 3600000 # 60 minutes

# Per-engine budgets
by_engine:
bigquery:
bytes_scanned:
warn_after: "5GB"
error_after: "20GB"

snowflake_snowpark:
bytes_scanned:
warn_after: "2GB"
error_after: "10GB"

# Optional per-model budgets (key = model name)
per_model:
fct_events:
bytes_scanned:
warn_after: "500MB"
error_after: "2GB"

dim_users:
bytes_scanned:
warn_after: "100MB"
# Per-engine query limits (applied before executing individual queries)
query_limits:
duckdb:
max_bytes: 5_000_000
postgres:
max_bytes: 10_000_000
bigquery:
max_bytes: 50_000_000
databricks_spark:
max_bytes: 50_000_000
snowflake_snowpark:
max_bytes: 50_000_000

# Global limits across the entire fft run
total:
bytes_scanned:
# ~10 MB – adjust down if you want to force a warning
warn: 100
# ~100 MB – adjust down if you want to force an error
error: 100_000_000

# Optional: total query time across all queries in the run
query_duration_ms:
warn: "30s" # human-friendly duration, parsed to ms
error: "2m"

# Per-model limits (keys must match node names: stg_users.ff, mart_user_orders.ff, http_users, ...)
models:
stg_users.ff:
bytes_scanned:
# keep this fairly low so you can see a warn if you want
warn: 100
error: 10_000_000

stg_orders.ff:
bytes_scanned:
warn: 1_000_000
error: 10_000_000

mart_user_orders.ff:
bytes_scanned:
warn: 1_000_000
error: 100_000_000

http_users:
# HTTP model → mainly interesting on engines that can report bytes_scanned
bytes_scanned:
warn: 5_000_000
error: 50_000_000

py_constants:
bytes_scanned:
warn: 5_000_000
error: 50_000_000

# Per-tag budgets (aggregated over all models with that tag)
tags:
"example:cache_demo":
bytes_scanned:
warn: 10_000_000
```

#### Value syntax

* `warn_after` / `error_after` for **bytes** use the same notation as
* `warn` / `error` for **bytes** use the same notation as
`FF_*_MAX_BYTES`:

```yaml
warn_after: "5GB"
error_after: "500_000_000"
warn: "5GB"
error: "500_000_000"
```

* Durations are currently in **milliseconds** (plain integers).
Expand All @@ -290,7 +312,7 @@ budgets:
* If an **error** budget is exceeded, FFT treats it like a failed run and exits
with `1`.

If both `warn_after` and `error_after` are defined and exceeded, the **error**
If both `arn` and `error` are defined and exceeded, the **error**
behaviour wins.

### Interaction with env-level guards
Expand Down
Loading