Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
.git
.venv
.pytest_cache
__pycache__
**/__pycache__
*.pyc
.env
data
dbt/target
dbt/dbt_packages
dbt/logs
dbt/.user.yml
airflow/logs
outputs
work
eventsim/data
eventsim/target
images
10 changes: 10 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
YANDEX_MUSIC_TOKEN=
STREAMIFY_DATA_DIR=data
STREAMIFY_RAW_DIR=data/raw/yamusic
STREAMIFY_DUCKDB_PATH=data/streamify.duckdb
STREAMIFY_REPORT_PATH=data/streamify_summary.md
STREAMIFY_SNAPSHOT_PATH=data/streamify_snapshot.json
STREAMIFY_RECOMMENDATIONS_DIR=data/recommendations
STREAMIFY_DBT_PROFILES_DIR=dbt
STREAMIFY_DASHBOARD_PORT=8501
DBT_THREADS=1
41 changes: 41 additions & 0 deletions .github/ISSUE_TEMPLATE/agent_task.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
name: Agent task
description: Track a multi-agent implementation task for Streamify.
title: "[Agent] "
labels: ["agent-task", "triage"]
body:
- type: dropdown
id: agent
attributes:
label: Agent lane
options:
- Repo/Build
- Yandex Ingestion
- Analytics/dbt
- Product/Dashboard
- QA/Integration
validations:
required: true
- type: textarea
id: objective
attributes:
label: Objective
description: State the product or engineering outcome, not only the code change.
placeholder: "Example: Make real-account ingestion observable and idempotent for playlist metadata."
validations:
required: true
- type: textarea
id: acceptance
attributes:
label: Acceptance checks
description: Commands, artifacts, or runtime behavior that prove completion.
placeholder: |
- make raw-contract
- make dbt-build
- make dashboard-smoke
validations:
required: true
- type: textarea
id: notes
attributes:
label: Notes and risks
description: API risk, privacy constraints, data quality assumptions, or dependencies.
37 changes: 37 additions & 0 deletions .github/ISSUE_TEMPLATE/data_quality.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Data quality issue
description: Report a raw, dbt, dashboard, or readiness quality issue.
title: "[DQ] "
labels: ["data-quality", "triage"]
body:
- type: dropdown
id: layer
attributes:
label: Affected layer
options:
- Raw/Bronze
- Silver/dbt staging
- Gold marts
- Dashboard
- CI/Release
validations:
required: true
- type: textarea
id: symptom
attributes:
label: Symptom
description: What failed or looked wrong?
validations:
required: true
- type: textarea
id: evidence
attributes:
label: Evidence
description: Command output, table/model name, manifest counts, or screenshot notes. Do not paste tokens or raw private data.
validations:
required: true
- type: textarea
id: expected
attributes:
label: Expected behavior
validations:
required: true
28 changes: 28 additions & 0 deletions .github/ISSUE_TEMPLATE/product_request.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Product request
description: Propose a user-facing analytics or workflow improvement.
title: "[Product] "
labels: ["product", "triage"]
body:
- type: textarea
id: user_value
attributes:
label: Product value
description: What decision or action should this help a listener take?
validations:
required: true
- type: textarea
id: data
attributes:
label: Data needed
description: Which Yandex Music metadata, marts, or dashboard views are involved?
validations:
required: true
- type: textarea
id: acceptance
attributes:
label: Acceptance checks
placeholder: |
- make product-answers-smoke
- make dashboard-smoke
validations:
required: true
25 changes: 25 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
## Summary

-

## Product Value

-

## Data Engineering Impact

- Raw/Bronze:
- Silver/dbt:
- Gold/dashboard:
- Privacy/security:

## Checks

- [ ] `make test`
- [ ] `make acceptance-local`
- [ ] `make acceptance-real` when changing real-account ingestion
- [ ] No `.env`, raw Yandex Music data, DuckDB files, or audio artifacts are tracked

## Notes

-
22 changes: 11 additions & 11 deletions .github/workflows/data-quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ on:
- main

jobs:
validate-dbt-quality-contract:
validate-local-product-contract:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
Expand All @@ -18,14 +18,14 @@ jobs:
with:
python-version: "3.12"

- name: Validate dbt quality contract
run: python3 scripts/validate_dbt_quality.py
- name: Install local dependencies
run: |
python3 -m venv .venv
.venv/bin/python -m pip install --upgrade pip
.venv/bin/python -m pip install -r requirements.txt

- name: Compile Python files
run: python3 -m compileall -q airflow/dags spark_streaming scripts

- name: Validate Airflow Compose config
run: cd airflow && GCP_PROJECT_ID=dummy GCP_GCS_BUCKET=dummy docker compose config --quiet

- name: Validate Kafka Compose config
run: cd kafka && docker compose config --quiet
- name: Run local product acceptance checks
env:
YANDEX_MUSIC_TOKEN: ""
DBT_THREADS: "1"
run: make test
58 changes: 58 additions & 0 deletions .github/workflows/pages.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
name: GitHub Pages

on:
push:
branches:
- main
workflow_dispatch:

permissions:
contents: read
pages: write
id-token: write

concurrency:
group: pages
cancel-in-progress: true

jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"

- name: Install dependencies
run: |
python3 -m venv .venv
.venv/bin/python -m pip install --upgrade pip
.venv/bin/python -m pip install -r requirements.txt

- name: Build sample product artifacts
env:
YANDEX_MUSIC_TOKEN: ""
DBT_THREADS: "1"
run: |
make acceptance-local
.venv/bin/python scripts/build_pages_site.py

- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v4
with:
path: public

deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: build
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4
53 changes: 53 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: Release

on:
push:
tags:
- "v*.*.*"
workflow_dispatch:

permissions:
contents: write

jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v6
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v6
with:
python-version: "3.12"

- name: Install dependencies
run: |
python3 -m venv .venv
.venv/bin/python -m pip install --upgrade pip
.venv/bin/python -m pip install -r requirements.txt

- name: Validate release candidate with sample metadata
env:
YANDEX_MUSIC_TOKEN: ""
DBT_THREADS: "1"
run: |
make test
.venv/bin/python scripts/build_pages_site.py

- name: Package tracked source
run: |
mkdir -p dist
git archive --format=tar.gz --output "dist/streamify-${GITHUB_REF_NAME:-manual}.tar.gz" HEAD
tar -czf "dist/streamify-pages-${GITHUB_REF_NAME:-manual}.tar.gz" public

- name: Create GitHub release
if: startsWith(github.ref, 'refs/tags/')
env:
GH_TOKEN: ${{ github.token }}
run: |
NOTES="docs/releases/${GITHUB_REF_NAME}.md"
if [ ! -f "$NOTES" ]; then NOTES="docs/releases/v0.1.0.md"; fi
gh release create "$GITHUB_REF_NAME" dist/* --notes-file "$NOTES" --title "Streamify ${GITHUB_REF_NAME}"
15 changes: 14 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,14 @@
logs/
logs/
.env
.venv/
__pycache__/
*.pyc
.pytest_cache/
data/*
!data/.gitkeep
*.duckdb
*.duckdb.wal
dbt/target/
dbt/dbt_packages/
dbt/logs/
public/
5 changes: 5 additions & 0 deletions .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[browser]
gatherUsageStats = false

[server]
headless = true
Loading