Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
323 changes: 266 additions & 57 deletions docs/Config_and_Macros.md

Large diffs are not rendered by default.

497 changes: 377 additions & 120 deletions docs/examples/Macros_Demo.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{{ config(
materialized = 'table',
tags = [
'example:macros_demo',
'scope:common',
'engine:duckdb',
'engine:postgres',
'engine:databricks_spark',
'engine:bigquery',
'engine:snowflake_snowpark'
]
) }}

with sales as (
select
u.user_id,
u.user_segment,
u.country,
o.amount,
o.order_ts
from {{ ref('stg_orders.ff') }} as o
join {{ ref('dim_users.ff') }} as u
on u.user_id = o.user_id
where
-- demo: stdlib partition IN helper on a dimension
{{ ff_partition_in('u.country', var('partition_countries', ['DE', 'AT'])) }}
)
select
user_id,
user_segment,
country,
count(*) as order_count,
sum(amount) as total_amount
from sales
group by user_id, user_segment, country;
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{{ config(
materialized = 'table',
tags = [
'example:macros_demo',
'scope:common',
'engine:duckdb',
'engine:postgres',
'engine:databricks_spark',
'engine:bigquery',
'engine:snowflake_snowpark'
]
) }}

with sales as (
select
u.user_id,
u.user_segment,
o.order_ts,
o.amount
from {{ ref('stg_orders.ff') }} as o
join {{ ref('dim_users.ff') }} as u
on u.user_id = o.user_id
where
-- demo: engine-aware partition predicate using stdlib
{{ ff_partition_filter('o.order_ts', var('from_date', '2025-10-01'), var('to_date', '2025-10-31')) }}
)
select
user_id,
user_segment,
count(*) as order_count,
sum(amount) as total_amount
from sales
group by user_id, user_segment;
13 changes: 12 additions & 1 deletion examples/macros_demo/models/common/stg_orders.ff.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
{{ config(
materialized='view',
tags=['example:macros_demo', 'scope:common', 'engine:duckdb', 'engine:postgres', 'engine:databricks_spark', 'engine:bigquery', 'engine:snowflake_snowpark']
tags=[
'example:macros_demo',
'scope:common',
'engine:duckdb',
'engine:postgres',
'engine:databricks_spark',
'engine:bigquery',
'engine:snowflake_snowpark'
]
) }}

select
cast(order_id as int) as order_id,
cast(customer_id as int) as user_id,
{{ safe_cast_amount("amount") }} as amount,
{{ ff_safe_cast("amount", "numeric", default = "0") }} as amount_safe,
{{ ff_date_trunc("order_ts", "day") }} as order_day,
{{ ff_date_add("order_ts", "day", 1) }} as order_ts_plus_1d,
cast(order_ts as timestamp) as order_ts
from {{ source('sales', 'orders') }};
10 changes: 9 additions & 1 deletion examples/macros_demo/models/common/stg_users.ff.sql
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
{{ config(
materialized='view',
tags=['example:macros_demo', 'scope:common', 'engine:duckdb', 'engine:postgres', 'engine:databricks_spark', 'engine:bigquery', 'engine:snowflake_snowpark']
tags=[
'example:macros_demo',
'scope:common',
'engine:duckdb',
'engine:postgres',
'engine:databricks_spark',
'engine:bigquery',
'engine:snowflake_snowpark'
]
) }}

with src as (
Expand Down
3 changes: 1 addition & 2 deletions examples/macros_demo/models/macros/utils.sql
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
{# Reusable SQL helpers #}

{%- macro email_domain(expr) -%}
{%- set e = engine('duckdb') -%}
{%- if e == 'bigquery' -%}
{%- if ff_is_engine('bigquery') -%}
lower(split({{ expr }}, '@')[SAFE_OFFSET(1)])
{%- else -%}
lower(split_part({{ expr }}, '@', 2))
Expand Down
80 changes: 78 additions & 2 deletions examples/macros_demo/project.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,95 @@ name: macros_demo
version: "0.1"

vars:
# used by macros and examples
default_country: "DE"

models: {}

seeds: {}

tests:
# dim_users
- type: not_null
table: dim_users
column: user_id
tags: [batch]

# fct_user_sales (existing)
- type: not_null
table: fct_user_sales
column: user_id
tags: [batch]
- type: row_count_between
table: fct_user_sales
min_rows: 1
tags: [batch]

# NEW: stg_orders – tests stdlib safe_cast/date helpers
- type: not_null
table: stg_orders
column: user_id
tags: [batch]
- type: row_count_between
table: stg_orders
min_rows: 1
tags: [batch]

# amount_safe produced via ff_safe_cast should never be NULL and ≥ 0
- type: not_null
table: stg_orders
column: amount_safe
tags: [batch]
- type: greater_equal
table: stg_orders
column: amount_safe
threshold: 0.0
tags: [batch]
- type: non_negative_sum
table: stg_orders
column: amount_safe
tags: [batch]

# order_day produced via ff_date_trunc should never be NULL
- type: not_null
table: stg_orders
column: order_day
tags: [batch]

# stg_users (your existing checks)
- type: not_null
table: stg_users
column: user_id
tags: [batch]
- type: row_count_between
table: stg_users
min_rows: 1
tags: [batch]

# fct_user_sales_by_country – uses ff_partition_in
- type: not_null
table: fct_user_sales_by_country
column: user_id
tags: [batch]
- type: row_count_between
table: fct_user_sales_by_country
min_rows: 1
tags: [batch]

# Only DE / AT should appear (given your seed + ff_partition_in)
- type: accepted_values
table: fct_user_sales_by_country
column: country
values: ["DE", "AT"]
tags: [batch]

# fct_user_sales_partitioned – uses ff_partition_filter
- type: not_null
table: fct_user_sales_partitioned
column: user_id
tags: [batch]

# We know the date filter gives exactly two users (1 & 2) in the demo seeds
- type: row_count_between
table: fct_user_sales_partitioned
min_rows: 2
max_rows: 2
tags: [batch]
128 changes: 64 additions & 64 deletions src/fastflowtransform/cli/init_cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,25 @@ def _build_project_yaml(ctx: _InitContext) -> str:
'# run_date: "2024-01-01"',
"vars: {}",
"",
"# Optional project-wide hooks that run before/after the pipeline or per model.",
"# See docs/Hooks.md for examples (SQL + Python) and selector usage.",
"hooks:",
" on_run_start: []",
" on_run_end: []",
" before_model: []",
" after_model: []",
"",
"# Optional storage & incremental defaults applied per model name.",
"# See docs/Project_Config.md#models for field meanings.",
"models:",
" storage: {}",
" incremental: {}",
"",
"# Optional seed storage overrides (e.g., external locations per seed).",
"# See docs/Project_Config.md#seeds for supported keys.",
"seeds:",
" storage: {}",
"",
"# Declare project-wide data quality checks under `tests`. "
"See docs/Data_Quality_Tests.md.",
"tests: []",
Expand All @@ -137,80 +156,52 @@ def _build_sources_yaml() -> str:
)


def _build_packages_yaml() -> str:
return "\n".join(
[
"# Packages bring in external models and macros. See docs/Packages.md.",
"packages:",
" # - name: shared_macros",
' # path: "../shared_macros"',
' # models_dir: "models"',
"",
]
)


def _write_file(path: Path, content: str) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(content, encoding="utf-8")


def _create_directory_notes(target: Path) -> None:
notes = {
"models/README.md": "\n".join(
[
"# Models directory",
"",
"Place SQL (`*.ff.sql`) and Python (`*.ff.py`) models here.",
"See docs/Config_and_Macros.md for modeling guidance and config options.",
"",
]
),
"seeds/README.md": "\n".join(
[
"# Seeds directory",
"",
"Add CSV or Parquet files for reproducible seeds.",
"Usage examples are covered in docs/Quickstart.md and "
"docs/Config_and_Macros.md#13-seeds-sources-and-dependencies.",
"",
]
),
"tests/unit/README.md": "\n".join(
[
"# Unit tests",
"",
"Define YAML unit specs as described in "
"docs/Config_and_Macros.md#73-model-unit-tests-fft-utest.",
"Invoke them with `fft utest <project> --env <profile>`.",
"",
]
),
"tests/dq/README.md": "\n".join(
[
"# Data quality tests",
"",
"Store custom data-quality tests that run via `fft test` "
"(docs/Data_Quality_Tests.md).",
"Use this directory for schema-bound tests separate from unit specs.",
"",
]
),
"docs/README.md": "\n".join(
[
"# Project documentation",
"",
"Write operator or contributor notes here and keep "
"them in sync with generated docs.",
"See docs/Technical_Overview.md#auto-docs-and-lineage "
"for `fft dag` / `fft docgen` guidance.",
"",
]
),
}
for rel, text in notes.items():
_write_file(target / rel, text)


def _build_root_readme(ctx: _InitContext) -> str:
return "\n".join(
[
"# FastFlowTransform project scaffold",
"",
"This project was created with `fft init`.",
"",
"What lives here:",
"- models/: SQL (`*.ff.sql`) and Python (`*.ff.py`) models.",
" - models/macros/: Jinja SQL macros loaded automatically.",
" - models/macros_py/: Python helpers exposed as Jinja globals/filters.",
"- seeds/: CSV/Parquet inputs for reproducible seeds (see docs/Quickstart.md).",
"- sources.yml: External tables for source('group','table').",
"- profiles.yml: Engine connections; defaults come from docs/Profiles.md.",
"- packages.yml: Optional shared models/macros (docs/Packages.md).",
"- tests/unit/: YAML specs for `fft utest` (docs/Unit_Tests.md).",
"- tests/dq/: Custom data-quality tests for `fft test` (docs/Data_Quality_Tests.md).",
"- hooks/: SQL or Python hooks referenced from project.yml (docs/Hooks.md).",
"- docs/: Notes plus generated DAG site when using `fft dag --html`.",
"",
"Next steps:",
"1. Update `profiles.yml` with real connection details (docs/Profiles.md).",
"2. Add sources in `sources.yml` and author models "
"under `models/` (docs/Config_and_Macros.md).",
"3. Seed sample data with `fft seed` and execute models "
"with `fft run` (docs/Quickstart.md).",
"2. Add sources in `sources.yml` and author models under `models/` "
" (docs/Config_and_Macros.md).",
"3. Wire packages (optional) in `packages.yml` if you reuse shared "
" models/macros (docs/Packages.md).",
"4. Seed sample data with `fft seed` and execute models "
" with `fft run` (docs/Quickstart.md).",
"",
]
)
Expand Down Expand Up @@ -274,16 +265,25 @@ def init(
engine=resolved_engine,
)

for sub in ("models", "seeds", "tests/unit", "tests/dq", "docs"):
for sub in (
"models",
"models/macros",
"models/macros_py",
"seeds",
"tests/unit",
"tests/dq",
"hooks",
"docs",
):
(project_dir / sub).mkdir(parents=True, exist_ok=True)

_write_file(project_dir / "project.yml", _build_project_yaml(ctx))
_write_file(project_dir / "profiles.yml", _build_profiles_yaml(ctx))
_write_file(project_dir / "sources.yml", _build_sources_yaml())
_write_file(project_dir / "packages.yml", _build_packages_yaml())
_write_file(project_dir / "README.md", _build_root_readme(ctx))
_create_directory_notes(project_dir)

typer.secho(f"Project skeleton created at {project_dir}", fg="green")
typer.secho(f"Project skeleton created at {project_dir}", fg="green")


def register(app: typer.Typer) -> None:
Expand Down
Loading