diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..7bc632e --- /dev/null +++ b/.env.example @@ -0,0 +1,30 @@ +# Legacy dashboard and AutoGen provider configuration +AUTOGEN_PROVIDER=gemini +AUTOGEN_APPROVAL_WORD=APPROVE +AUTOGEN_STATE_DIR=state +AUTOGEN_REPO_SCAN_ROOT=C:\PersonalRepo + +# Active Microsoft Agent Framework provider configuration +MAF_API_KEY= +MAF_MODEL=gemini-2.5-flash +MAF_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai/ +MAF_REPO_ROOT=C:\PersonalRepo +MAF_ENTITIES_DIR=entities +MAF_CHECKPOINT_DIR=state\maf-checkpoints +MAF_ROUTE_LANE=auto + +# Gemini API and optional CLI fallback +GEMINI_API_KEY= +GEMINI_MODEL=gemini-2.5-flash +GEMINI_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai/ +GEMINI_CLI_COMMAND=gemini.cmd +GEMINI_CLI_MODEL= + +# Optional Anthropic API and local CLI fallbacks +ANTHROPIC_API_KEY= +ANTHROPIC_MODEL=claude-sonnet-4-6 +CLAUDE_CLI_COMMAND=claude +CLAUDE_CLI_MODEL= +CLAUDE_CODE_GIT_BASH_PATH= +CODEX_CLI_COMMAND=codex.cmd +CODEX_CLI_MODEL= diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b6ba643..b13aa8d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,19 +5,44 @@ on: branches: [ main ] pull_request: +permissions: + contents: read + +concurrency: + group: ci-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: test: - runs-on: ubuntu-latest + name: Python ${{ matrix.python-version }} / ${{ matrix.os }} + runs-on: ${{ matrix.os }} + timeout-minutes: 20 + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ['3.12'] + steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - - name: Set up Python 3.12 - uses: actions/setup-python@v5 + - name: Set up Python + uses: actions/setup-python@v6 with: - python-version: '3.12' + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Install dependencies + run: python -m pip install -r requirements.txt + + - name: Verify dependency consistency + run: python -m pip check + + - name: Run full test suite + run: python -m pytest -q --tb=short - - name: Install test runner - run: pip install pytest + - name: Compile Python sources + run: python -m compileall autogen_starter autogen_dashboard maf_starter main.py -q - - name: Run static contract tests - run: python -m pytest tests/test_phase5_ui_contract.py tests/test_phase5_operator_views.py -v + - name: Validate dashboard JavaScript + run: node --check autogen_dashboard/static/app.js diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c3fe68d --- /dev/null +++ b/.gitignore @@ -0,0 +1,12 @@ +.venv/ +__pycache__/ +*.pyc +.pytest_cache/ +.env +.env.* +!.env.example +!.env.*.example +state/ +.tmp-tests/ +*.out.log +*.err.log diff --git a/.planning/STATE.md b/.planning/STATE.md index 8ee56c0..7358236 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -4,8 +4,8 @@ milestone: v1.1 milestone_name: milestone status: completed stopped_at: Phase 6 complete -last_updated: "2026-04-15T19:56:10.036Z" -last_activity: 2026-03-22 - Phase 6 complete - Plan 03 delivered Command Center parity tests, legacy compatibility notices, and README External API section +last_updated: "2026-06-10T18:30:00+03:00" +last_activity: 2026-06-10 - Completed quick task 260610-ppt: PR #1 follow-up truthful Quickstart and Configuration guidance progress: total_phases: 2 completed_phases: 1 @@ -28,7 +28,7 @@ See: .planning/PROJECT.md (updated 2026-03-22) Phase: 07 (azure-functions-and-cloud-control-plane) - READY TO START Plan: 07-01 (next) Status: Phase 6 complete - Shared control-plane API delivered with /api/v1 REST router, Command Center parity validation, and external API documentation. Ready for Azure Functions deployment. -Last activity: 2026-03-22 - Phase 6 complete - Plan 03 delivered Command Center parity tests, legacy compatibility notices, and README External API section +Last activity: 2026-06-10 - Completed quick task 260610-ppt: PR #1 follow-up truthful Quickstart and Configuration guidance ## Performance Metrics @@ -135,6 +135,12 @@ None yet. - Local Python is `3.14.2`, but Azure Functions hosted deployment work should target GA-supported Python such as `3.13` or `3.12` - A cloud-hosted control plane cannot assume local CLI logins, desktop-bound tooling, or direct repo access unless a compatible worker is attached +### Quick Tasks Completed + +| # | Description | Date | Commit | Status | Directory | +|---|-------------|------|--------|--------|-----------| +| 260610-ppt | PR #1 follow-up: truthful Quickstart and Configuration guidance | 2026-06-10 | 5487e05 | Verified | [260610-ppt-pr-1-follow-up-restore-practical-truthfu](./quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/) | + ## Session Continuity Last session: 2026-03-22T22:00:00+02:00 diff --git a/.planning/audits/260610-enterprise-audit-fix/CLASSIFICATION.md b/.planning/audits/260610-enterprise-audit-fix/CLASSIFICATION.md new file mode 100644 index 0000000..60c571e --- /dev/null +++ b/.planning/audits/260610-enterprise-audit-fix/CLASSIFICATION.md @@ -0,0 +1,14 @@ +# Enterprise Audit-Fix Classification + +Source: `gsd-audit-fix --severity all --max 8` + +| ID | Severity | Classification | Finding | +|---|---|---|---| +| F-01 | High | Auto-fixable | Clean clone could not collect the shipped full test suite because required runtime modules and dependency/bootstrap files were missing. | +| F-02 | High | Auto-fixable | CI ran only two static contract tests and could report green while the broader suite was broken. | +| F-03 | High | Auto-fixable | `SessionStore` composed unvalidated session, stage, and attempt identifiers into filesystem paths. | +| F-04 | High | Auto-fixable | Dashboard CORS allowed wildcard origins with credentials. | +| F-05 | Medium | Auto-fixable | Repo write operations accepted arbitrary encodings and used non-atomic direct writes. | +| F-06 | Medium | Auto-fixable | Missing `.gitignore` exposed secrets and runtime/test artifacts to accidental commits. | +| F-07 | Medium | Manual-only | Legacy dashboard and MAF runtime contracts overlap and require an architectural consolidation decision. | +| F-08 | Medium | Manual-only | Production authentication, worker isolation, and remote deployment boundaries remain intentionally unimplemented. | diff --git a/.planning/audits/260610-enterprise-audit-fix/SUMMARY.md b/.planning/audits/260610-enterprise-audit-fix/SUMMARY.md new file mode 100644 index 0000000..3d1506c --- /dev/null +++ b/.planning/audits/260610-enterprise-audit-fix/SUMMARY.md @@ -0,0 +1,25 @@ +# Enterprise Audit-Fix Summary + +## Result + +Six auto-fixable findings were resolved. Two architecture/deployment findings remain manual-only. + +| ID | Status | Commit | +|---|---|---| +| F-01 | Fixed | `65cb4f2`, `f2b1210` | +| F-02 | Fixed | `fa78d6b` | +| F-03 | Fixed | `48bdb95` | +| F-04 | Fixed | `92e3adf` | +| F-05 | Fixed | `76eda85` | +| F-06 | Fixed | `4975e0b` | +| F-07 | Manual-only | Architecture decision required | +| F-08 | Manual-only | Production boundary decision required | + +## Outcome + +- Restored a runnable clean-clone contract with declared dependencies, example configuration, launcher, and required runtime modules. +- Replaced narrow green CI with full Windows/Linux validation. +- Closed durable-state path traversal and wildcard credentialed CORS risks. +- Made agent-driven repo writes atomic and UTF-8-only. +- Protected local secrets and generated state from accidental commits. +- Updated README claims to match the verified repository state. diff --git a/.planning/audits/260610-enterprise-audit-fix/VERIFICATION.md b/.planning/audits/260610-enterprise-audit-fix/VERIFICATION.md new file mode 100644 index 0000000..f2675ce --- /dev/null +++ b/.planning/audits/260610-enterprise-audit-fix/VERIFICATION.md @@ -0,0 +1,23 @@ +# Enterprise Audit-Fix Verification + +## Automated Checks + +- `python -m pytest -q --tb=short`: 71 passed, 4 subtests passed +- `python -m compileall autogen_starter autogen_dashboard maf_starter main.py -q`: passed +- `node --check autogen_dashboard/static/app.js`: passed +- `git diff --check`: passed +- `python main.py providers`: launcher and provider inventory passed +- `git check-ignore -v .env .pytest_cache .tmp-tests example.err.log`: expected ignore rules passed + +## Environment Note + +`python -m pip check` reports an unrelated workstation-level preview dependency mismatch: +`agent-framework-core 1.0.0rc5` expects `azure-ai-projects>=2.0.0,<3.0`, while the shared environment contains `azure-ai-projects 2.0.0b3`. +CI installs from `requirements.txt` in a clean environment and now treats dependency consistency as a required gate. + +## Manual Verification Remaining + +- Observe the Windows and Linux GitHub Actions jobs on PR #1. The workflow was committed through the authenticated GitHub connector because the local GitHub CLI token does not carry the `workflow` scope. +- Exercise one real provider-backed dashboard run with a non-production key. +- Decide whether to consolidate or retire the parallel legacy and MAF runtime contracts. +- Define production authentication and isolated worker execution before any non-loopback deployment. diff --git a/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-PLAN.md b/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-PLAN.md new file mode 100644 index 0000000..c12e122 --- /dev/null +++ b/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-PLAN.md @@ -0,0 +1,41 @@ +--- +quick_id: 260610-ppt +mode: quick-full +status: ready +date: 2026-06-10 +--- + +# PR #1 follow-up: truthful Quickstart and Configuration guidance + +## Goal + +Restore practical README guidance that helps readers validate and understand the checked-in repository without claiming a missing dependency manifest, `.env.example`, launcher, or supported full-runtime bootstrap. + +## Must Haves + +- Quickstart commands operate against files present on `docs/portfolio-hardening-20260610`. +- The README clearly distinguishes the CI-aligned static contract tests from a full runtime launch. +- Configuration guidance is derived from `maf_starter/config.py` and does not claim an `.env.example` exists. +- Validation includes the documented test command, README claim checks, and `git diff --check`. + +## Tasks + +### Task 1: Restore truthful operator guidance + +**Files:** `README.md` + +**Action:** Add a Quickstart that runs the same static contract tests as CI, disclose the current runtime-bootstrap limitations, and add a configuration reference based only on variables read by `maf_starter/config.py`. + +**Verify:** Confirm every named repository path exists and every documented command is appropriate for this snapshot. + +**Done:** Readers can validate the portfolio evidence and understand configuration boundaries without being told to use missing files or unsupported launch commands. + +### Task 2: Validate and record completion + +**Files:** `.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-SUMMARY.md`, `.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-VERIFICATION.md`, `.planning/STATE.md` + +**Action:** Run the CI-aligned tests and `git diff --check`, verify README claims against tracked files and configuration source, then record the quick-task result. + +**Verify:** All validation commands pass and the final commit contains only the README follow-up and GSD quick-task artifacts/state. + +**Done:** The task is documented, verified, atomically committed, and ready to push on the existing PR branch. diff --git a/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-SUMMARY.md b/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-SUMMARY.md new file mode 100644 index 0000000..06057f3 --- /dev/null +++ b/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-SUMMARY.md @@ -0,0 +1,24 @@ +--- +quick_id: 260610-ppt +status: complete +completed: 2026-06-10 +implementation_commit: 5487e05 +--- + +# Quick Task 260610-ppt Summary + +Restored practical README Quickstart and Configuration guidance for PR #1 without presenting the incomplete repository snapshot as a supported full-runtime distribution. + +## Delivered + +- Added a PowerShell Quickstart that runs the same dependency-light operator-workbench contract tests used by CI. +- Explicitly documented the missing dependency manifest, `.env.example`, launcher, and legacy dashboard imports that prevent a truthful clean-clone runtime launch command. +- Added a configuration table derived from environment variables actually read by `maf_starter/config.py`. + +## Validation + +- `python -m pytest tests/test_phase5_ui_contract.py tests/test_phase5_operator_views.py -v` - 16 passed. +- Verified all README paths and missing-bootstrap statements against tracked files. +- `git diff --check` - passed. + +Implementation commit: `5487e05` diff --git a/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-VERIFICATION.md b/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-VERIFICATION.md new file mode 100644 index 0000000..41a8f1f --- /dev/null +++ b/.planning/quick/260610-ppt-pr-1-follow-up-restore-practical-truthfu/260610-ppt-VERIFICATION.md @@ -0,0 +1,23 @@ +--- +quick_id: 260610-ppt +status: passed +verified: 2026-06-10 +--- + +# Quick Task 260610-ppt Verification + +## Goal + +Restore practical, truthful Quickstart and Configuration guidance without claiming missing files or unsupported runtime bootstrap. + +## Result + +Passed. The README now provides an executable CI-aligned validation path, identifies the full-runtime bootstrap as unsupported in the checked-in snapshot, and documents configuration from `maf_starter/config.py`. + +## Evidence + +- The documented pytest command completed with 16 passing tests. +- `README.md`, `maf_starter/config.py`, the two documented tests, and `.github/workflows/ci.yml` exist. +- `requirements.txt`, `pyproject.toml`, `setup.py`, `.env.example`, `main.py`, and `autogen_starter/` are absent, matching the README limitation statement. +- Configuration names and defaults were checked against `load_settings()` in `maf_starter/config.py`. +- `git diff --check` passed before the implementation commit. diff --git a/README.md b/README.md index ae9ad31..e4f916d 100644 --- a/README.md +++ b/README.md @@ -7,55 +7,127 @@ Part of the [Coding-Autopilot-System](https://github.com/Coding-Autopilot-System) ecosystem: [gsd-orchestrator](https://github.com/Coding-Autopilot-System/gsd-orchestrator) | [Promptimprover](https://github.com/Coding-Autopilot-System/Promptimprover) -autogen is a Python multi-agent orchestration runtime built on Microsoft Agent Framework — combining a Gemini/Claude provider fallback chain, AG-UI observability, and a local operator workbench for end-to-end autonomous engineering workflows. - -## Features - -- **Provider fallback chain** — Gemini API (primary) -> Anthropic API -> local CLI fallback; routing policy classifies prompt complexity and selects provider automatically -- **Multi-agent team orchestration** — sequential planner->researcher->implementer->reviewer chain with FileCheckpointStorage for resumable runs -- **Bounded repo tools** — read/list/search/write with enforced scope limits; agents cannot access paths outside the configured repo root -- **AG-UI observability** — DevUI-discoverable workflows expose run state, specialist handoffs, and pause/approval events -- **Human-in-the-loop approvals** — approval policy gates destructive writes; operators review before execution proceeds - -## Architecture - -```mermaid -flowchart LR - Op["Operator\n(DevUI / Workbench)"] -->|"prompt"| MAF["autogen\n(MAF Runtime)"] - MAF --> Gemini["Gemini API\n(primary)"] - MAF --> Anthropic["Anthropic API\n(fallback)"] - MAF --> CLI["Local CLIs\n(gemini-cli / claude)"] - MAF --> Entities["Entities\n(repo_team, copilots)"] - MAF --> Tools["Repo Tools\n(read / write / search)"] - MAF --> Checkpoints["Checkpoints\n(FileCheckpointStorage)"] - Entities --> Out["Run Output\n+ Artifacts"] - Tools --> Out - Checkpoints --> Out -``` +`autogen` is a local-first multi-agent engineering workbench built on Microsoft Agent Framework. The product goal is simple: point the system at a real repository, give it one engineering objective, and let a manager-led workflow coordinate planning, research, implementation, review, approvals, validation, and durable artifacts with less manual steering than a chat-first coding loop. + +This repository is strongest as an architecture and operator-systems portfolio piece: it shows how to turn LLM tooling into a controlled engineering runtime instead of a demo chatbot. + +## Product Story + +Most agent demos stop at "the model answered." `autogen` focuses on the operator problem after that: + +- How do you scope agents to a real repo without letting them roam the machine? +- How do you keep a manager, specialists, and provider fallback chain inspectable? +- How do you pause for approval before destructive changes? +- How do you leave behind run artifacts, validation results, and retryable state instead of ephemeral chat output? + +The answer in this codebase is a manager-led orchestration model with bounded repo tools, approval-aware execution, and a UI contract designed for traceability. + +## What Exists In The Repo Today + +- **Manager-led orchestration**: `entities/repo_team/workflow.py` wires a workflow for planner, researcher, implementer, reviewer, and validation-stage visibility. +- **Scoped repository operations**: `maf_starter/tools.py` enforces repo-root path boundaries, blocks writes to sensitive targets like `.env`, and limits read/search surfaces. +- **Routed provider execution**: `maf_starter/provider_fallback.py` and `maf_starter/routing_policy.py` select models by task depth and fall back across API and CLI providers when needed. +- **Approval and guardrails**: `maf_starter/approval_policy.py` classifies file operations and validation commands so destructive or externally visible actions stop for operator approval. +- **Durable run artifacts**: `autogen_dashboard/session_store.py` persists transcripts, runtime state, stage summaries, diffs, validation records, and attempt metadata. +- **Operator-facing visibility**: the dashboard contract covers timeline, routing, agents, artifacts, and approval surfaces rather than a single opaque transcript. + +## Demo Scenarios + +The best way to understand the product is through operator outcomes: + +- **Architecture review on a real repo**: point the system at a checked-out repository and ask for a plan. The manager can retain workspace metadata, route to the right model tier, and preserve the resulting artifacts for follow-up attempts. +- **Guardrailed implementation run**: ask for a change that touches code or config. Safe edits can proceed through bounded repo tools, while destructive actions pause with an explicit approval scope. +- **Provider-resilience drill**: trigger a quota or rate-limit failure on the primary model path and inspect how the fallback chain records the route attempt history and capability changes. + +## Evidence And Evaluation Posture + +This repo already carries more engineering evidence than the old README surfaced: + +- `tests/test_workspace_contract.py` validates workspace discovery, repo-root safety, and session creation contracts against real temporary git repos. +- `tests/test_run_persistence.py` verifies durable session layout, artifact manifests, attempts, diffs, validation outputs, and atomic persistence behavior. +- `tests/test_phase4_approval.py` proves destructive writes and externally visible commands are classified and paused behind approval. +- `tests/test_phase4_validation.py` checks that changed files produce a proportionate validation ladder including `git diff --check`, Python compile checks, unit discovery, and JavaScript syntax checks. +- `tests/test_phase5_ui_contract.py` and `tests/test_phase5_operator_views.py` lock the operator UI to timeline, routing, artifact, and specialist-view contracts. +- `.github/workflows/ci.yml` installs the declared environment and runs the full suite, Python compilation, dependency consistency, and JavaScript syntax checks on Windows and Linux. ## Quickstart -```bash +The checked-in snapshot supports a clean-clone local dashboard and full validation workflow: + +```powershell git clone https://github.com/Coding-Autopilot-System/autogen.git -cd autogen +Set-Location autogen + python -m venv .venv -# Windows: .\.venv\Scripts\Activate.ps1 -# macOS/Linux: source .venv/bin/activate -pip install agent-framework python-dotenv +.\.venv\Scripts\python.exe -m pip install -r requirements.txt +Copy-Item .env.example .env + +.\.venv\Scripts\python.exe main.py providers +.\.venv\Scripts\python.exe main.py dashboard --host 127.0.0.1 --port 8000 ``` -Copy `.env.example` to `.env` and set `GEMINI_API_KEY`. See the [Setup Guide](https://github.com/Coding-Autopilot-System/autogen/wiki/Setup-Guide) for full configuration instructions. +Run the complete regression suite before changing runtime behavior: + +```powershell +.\.venv\Scripts\python.exe -m pytest -q --tb=short +``` ## Configuration +`maf_starter/config.py` is the source of truth for the active MAF configuration contract. Copy `.env.example` to `.env`, set only the providers you intend to use, and never commit API keys. The legacy dashboard launcher also reads `AUTOGEN_*` settings through `autogen_starter/config.py`. + +Minimal provider and workspace settings: + +```dotenv +GEMINI_API_KEY=your-gemini-api-key +MAF_REPO_ROOT=C:\path\to\target-repository +``` + +| Variable | Required | Default | Purpose | +|----------|----------|---------|---------| +| `MAF_API_KEY` or `GEMINI_API_KEY` | Yes for MAF agent construction | None | API key used by the OpenAI-compatible Gemini client; `MAF_API_KEY` takes precedence. | +| `MAF_MODEL` or `GEMINI_MODEL` | No | `gemini-2.5-flash` | Primary model; the `MAF_*` name takes precedence. | +| `MAF_BASE_URL` or `GEMINI_BASE_URL` | No | Gemini OpenAI-compatible endpoint | Provider base URL; the `MAF_*` name takes precedence. | +| `MAF_REPO_ROOT` | No | Repository root | Repository exposed to bounded repo tools. The path must exist. | +| `MAF_ENTITIES_DIR` | No | `entities` | Entity discovery directory. | +| `MAF_CHECKPOINT_DIR` | No | `state\maf-checkpoints` | File-backed checkpoint location. | +| `MAF_ROUTE_LANE` | No | `auto` | Routing lane used to select task depth and provider order. | +| `MAF_REQUESTED_PROVIDER`, `MAF_REQUESTED_MODEL` | No | None | Optional explicit provider/model selection. | +| `MAF_FALLBACK_CHAIN` | No | Built-in Gemini/API/CLI chain | Comma-separated fallback steps. | +| ANTHROPIC_API_KEY, ANTHROPIC_MODEL | No | No key; claude-sonnet-4-6 model | Enables the optional Anthropic fallback when its package is installed. | +| `AUTOGEN_CORS_ORIGINS` | No | Explicit loopback origins | Comma-separated origins allowed to call the local dashboard API; wildcard CORS is rejected. | +| `GEMINI_CLI_COMMAND`, `CLAUDE_CLI_COMMAND`, `CODEX_CLI_COMMAND` | No | `gemini.cmd`, `claude`, `codex.cmd` | Executable names used by optional local CLI fallbacks. | + +Additional optional model-candidate and CLI-model overrides are defined directly in `maf_starter/config.py`. + +## Why This Is A Strong Hiring Signal + +This project demonstrates more than framework familiarity. It shows judgment about: + +- turning agent capabilities into bounded operational surfaces, +- separating operator control from model improvisation, +- preserving artifacts and retry semantics for long-running engineering work, +- designing UI and API contracts around observability instead of novelty, +- and shaping local-first tooling so it can evolve toward service boundaries later. + +## Cloud-Ready Direction + +`autogen` is intentionally local-first today, but its primitives already point toward a future control plane: + +- durable run IDs and persisted artifacts, +- explicit pause, approve, retry, and resume semantics, +- structured route-attempt metadata, +- workspace and execution contracts that can sit behind HTTP later, +- and an orchestration core that can be split from the local operator shell. + +That is the right foundation for a later Azure-hosted control plane or worker boundary without rebuilding the product concept from scratch. -| Variable | Required | Default | Description | -|----------|----------|---------|-------------| -| `GEMINI_API_KEY` | Yes | -- | Gemini API key for primary model | -| `MAF_MODEL` | No | `gemini-2.5-flash` | Primary model for agent runs | -| `MAF_FALLBACK_CHAIN` | No | Auto | Comma-separated provider fallback order | -| `ANTHROPIC_API_KEY` | No | -- | Anthropic API key for Claude fallback | +## Repository Pointers -See the [Configuration Reference](https://github.com/Coding-Autopilot-System/autogen/wiki/Configuration-Reference) for the full variable list. +- `maf_starter/` - orchestration core, routing, fallback, repo tools, approvals, validation +- `autogen_dashboard/` - API and operator-facing session surfaces +- `entities/repo_team/` - manager-led workflow entrypoint +- `tests/` - contract, runtime, approval, persistence, and operator-view evidence +- `.planning/` - architecture notes, phased roadmap, and future control-plane direction ## License diff --git a/autogen_dashboard/app.py b/autogen_dashboard/app.py index a05534c..f92deac 100644 --- a/autogen_dashboard/app.py +++ b/autogen_dashboard/app.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import os from pathlib import Path from fastapi import Depends, FastAPI, HTTPException @@ -26,13 +27,24 @@ from autogen_starter.providers import ProviderConfigError +DEFAULT_CORS_ORIGINS = ("http://127.0.0.1:8000", "http://localhost:8000") + + +def _cors_origins() -> list[str]: + configured = os.getenv("AUTOGEN_CORS_ORIGINS", "") + origins = [origin.strip() for origin in configured.split(",") if origin.strip()] + if "*" in origins: + raise ValueError("AUTOGEN_CORS_ORIGINS must list explicit origins; wildcard CORS is not allowed.") + return origins or list(DEFAULT_CORS_ORIGINS) + + def create_app() -> FastAPI: app = FastAPI(title="AutoGen Dashboard", version="0.1.0") app.add_middleware( CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, + allow_origins=_cors_origins(), + allow_credentials=False, allow_methods=["*"], allow_headers=["*"], ) diff --git a/autogen_dashboard/session_store.py b/autogen_dashboard/session_store.py index 12cda49..76f764c 100644 --- a/autogen_dashboard/session_store.py +++ b/autogen_dashboard/session_store.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import re import tempfile from pathlib import Path from typing import Any @@ -8,13 +9,22 @@ from autogen_dashboard.schemas import RepoContext, SessionDetail, SessionEvent, SessionSummary, TranscriptMessage +SAFE_PATH_COMPONENT = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{0,127}$") + + +def _safe_path_component(value: str, *, label: str) -> str: + if not SAFE_PATH_COMPONENT.fullmatch(value) or value in {".", ".."}: + raise ValueError(f"Invalid {label}: expected a single safe path component") + return value + + class SessionStore: def __init__(self, sessions_root: Path) -> None: - self.sessions_root = sessions_root + self.sessions_root = sessions_root.resolve() self.sessions_root.mkdir(parents=True, exist_ok=True) def session_dir(self, session_id: str) -> Path: - return self.sessions_root / session_id + return self.sessions_root / _safe_path_component(session_id, label="session id") def metadata_path(self, session_id: str) -> Path: return self.session_dir(session_id) / "metadata.json" @@ -38,7 +48,7 @@ def stage_artifacts_dir(self, session_id: str) -> Path: return self.artifacts_dir(session_id) / "stages" def stage_artifact_dir(self, session_id: str, stage: str) -> Path: - return self.stage_artifacts_dir(session_id) / stage + return self.stage_artifacts_dir(session_id) / _safe_path_component(stage, label="stage") def stage_summary_path(self, session_id: str, stage: str) -> Path: return self.stage_artifact_dir(session_id, stage) / "summary.json" @@ -92,7 +102,7 @@ def attempts_dir(self, session_id: str) -> Path: return self.session_dir(session_id) / "attempts" def attempt_dir(self, session_id: str, attempt_id: str) -> Path: - return self.attempts_dir(session_id) / attempt_id + return self.attempts_dir(session_id) / _safe_path_component(attempt_id, label="attempt id") def attempt_summary_path(self, session_id: str, attempt_id: str) -> Path: return self.attempt_dir(session_id, attempt_id) / "summary.json" diff --git a/autogen_starter/__init__.py b/autogen_starter/__init__.py new file mode 100644 index 0000000..38b2a22 --- /dev/null +++ b/autogen_starter/__init__.py @@ -0,0 +1 @@ +"""AutoGen starter package.""" diff --git a/autogen_starter/cli.py b/autogen_starter/cli.py new file mode 100644 index 0000000..46d2c72 --- /dev/null +++ b/autogen_starter/cli.py @@ -0,0 +1,168 @@ +from __future__ import annotations + +import argparse +import asyncio +import json +from pathlib import Path + +import uvicorn +from autogen_agentchat.agents import AssistantAgent, UserProxyAgent +from autogen_agentchat.conditions import TextMentionTermination +from autogen_agentchat.teams import RoundRobinGroupChat +from autogen_agentchat.ui import Console + +from autogen_starter.config import Settings, load_settings +from autogen_starter.providers import ( + ProviderConfigError, + collect_provider_statuses, + create_model_client, +) + +DEFAULT_CHAT_SYSTEM_MESSAGE = ( + "You are a collaborative assistant. Work with the human in short iterations. " + "Ask concise follow-up questions only when they are necessary." +) + +DEFAULT_STEP_SYSTEM_MESSAGE = ( + "You are a collaborative assistant. Produce one useful step at a time, then stop. " + "If you need clarification, ask for it directly and briefly." +) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="AutoGen AgentChat starter.") + subparsers = parser.add_subparsers(dest="command", required=True) + + subparsers.add_parser("providers", help="Show provider readiness.") + + chat_parser = subparsers.add_parser( + "chat", help="Run blocking human-in-the-loop chat." + ) + chat_parser.add_argument( + "--task", required=True, help="Initial task to send to the team." + ) + chat_parser.add_argument( + "--system-message", + default=DEFAULT_CHAT_SYSTEM_MESSAGE, + help="System message for the assistant.", + ) + + step_parser = subparsers.add_parser( + "step", help="Run one resumable assistant turn and save state." + ) + step_parser.add_argument( + "--task", required=True, help="New task or human feedback for the next turn." + ) + step_parser.add_argument( + "--system-message", + default=DEFAULT_STEP_SYSTEM_MESSAGE, + help="System message for the assistant.", + ) + + dashboard_parser = subparsers.add_parser( + "dashboard", help="Run the local HITL dashboard." + ) + dashboard_parser.add_argument( + "--host", default="127.0.0.1", help="Host interface for the dashboard server." + ) + dashboard_parser.add_argument( + "--port", type=int, default=8000, help="Port for the dashboard server." + ) + + subparsers.add_parser("reset-state", help="Delete the saved resumable team state.") + return parser + + +async def run_blocking_chat(settings: Settings, task: str, system_message: str) -> None: + model_client = create_model_client(settings) + try: + assistant = AssistantAgent( + "assistant", + model_client=model_client, + system_message=system_message, + ) + user_proxy = UserProxyAgent("user_proxy") + termination = TextMentionTermination(settings.approval_word) + team = RoundRobinGroupChat( + [assistant, user_proxy], termination_condition=termination + ) + await Console(team.run_stream(task=task), output_stats=True) + finally: + await model_client.close() + + +async def run_resumable_step( + settings: Settings, task: str, system_message: str +) -> None: + model_client = create_model_client(settings) + try: + assistant = AssistantAgent( + "assistant", + model_client=model_client, + system_message=system_message, + ) + team = RoundRobinGroupChat([assistant], max_turns=1) + + if settings.state_path.exists(): + state = json.loads(settings.state_path.read_text(encoding="utf-8")) + await team.load_state(state) + + await Console(team.run_stream(task=task), output_stats=True) + + settings.state_dir.mkdir(parents=True, exist_ok=True) + state = await team.save_state() + settings.state_path.write_text(json.dumps(state, indent=2), encoding="utf-8") + print(f"Saved resumable state to {settings.state_path}") + finally: + await model_client.close() + + +def reset_state(state_path: Path) -> None: + if state_path.exists(): + state_path.unlink() + print(f"Deleted {state_path}") + else: + print(f"No state file found at {state_path}") + + +def print_provider_statuses(settings: Settings) -> None: + print(f"Active provider: {settings.provider}") + for status in collect_provider_statuses(settings): + readiness = "READY" if status.ready else "NOT READY" + print(f"[{readiness}] {status.name}: {status.detail}") + + +def run_dashboard(host: str, port: int) -> None: + uvicorn.run("autogen_dashboard.app:app", host=host, port=port, reload=False) + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + try: + settings = load_settings() + if args.command == "providers": + print_provider_statuses(settings) + return 0 + if args.command == "chat": + asyncio.run(run_blocking_chat(settings, args.task, args.system_message)) + return 0 + if args.command == "step": + asyncio.run(run_resumable_step(settings, args.task, args.system_message)) + return 0 + if args.command == "dashboard": + run_dashboard(args.host, args.port) + return 0 + if args.command == "reset-state": + reset_state(settings.state_path) + return 0 + parser.error(f"Unknown command: {args.command}") + except ProviderConfigError as exc: + print(f"Provider configuration error: {exc}") + return 2 + except ValueError as exc: + print(f"Configuration error: {exc}") + return 2 + + return 0 diff --git a/autogen_starter/cli_clients.py b/autogen_starter/cli_clients.py new file mode 100644 index 0000000..9b9499c --- /dev/null +++ b/autogen_starter/cli_clients.py @@ -0,0 +1,353 @@ +from __future__ import annotations + +import asyncio +import json +import os +import shutil +import subprocess +from dataclasses import dataclass +from json import JSONDecoder +from typing import Any, AsyncGenerator, Mapping, Sequence + +from autogen_core import CancellationToken +from autogen_core.models import ( + ChatCompletionClient, + CreateResult, + LLMMessage, + ModelInfo, + RequestUsage, +) + +_MODEL_INFO: ModelInfo = { + "vision": False, + "function_calling": False, + "json_output": True, + "structured_output": False, + "family": "cli-subprocess", + "multiple_system_messages": True, +} + + +@dataclass(frozen=True) +class SubprocessResult: + content: str + usage: RequestUsage + + +class SubprocessCliChatCompletionClient(ChatCompletionClient): + def __init__( + self, + *, + name: str, + command: Sequence[str], + parser, + env: Mapping[str, str] | None = None, + working_directory: str | None = None, + timeout_seconds: int = 180, + ) -> None: + self._name = name + self._command = list(command) + self._parser = parser + self._env = dict(env or {}) + self._working_directory = working_directory + self._timeout_seconds = timeout_seconds + self._actual_usage = RequestUsage(prompt_tokens=0, completion_tokens=0) + self._total_usage = RequestUsage(prompt_tokens=0, completion_tokens=0) + + @property + def model_info(self) -> ModelInfo: + return _MODEL_INFO + + def capabilities(self) -> ModelInfo: # type: ignore[override] + return self.model_info + + async def create( + self, + messages: Sequence[LLMMessage], + *, + tools: Sequence[Any] = (), + tool_choice: Any = "auto", + json_output: Any = None, + extra_create_args: Mapping[str, Any] = {}, + cancellation_token: CancellationToken | None = None, + ) -> CreateResult: + if tools: + raise NotImplementedError( + f"{self._name} does not support AutoGen tool-calling in this starter." + ) + if tool_choice not in ("auto", "none"): + raise NotImplementedError( + f"{self._name} does not support tool_choice={tool_choice!r}." + ) + if json_output not in (None, False): + raise NotImplementedError( + f"{self._name} does not support AutoGen JSON output mode in this starter." + ) + if extra_create_args: + raise NotImplementedError( + f"{self._name} does not support extra_create_args in this starter." + ) + if cancellation_token and cancellation_token.is_cancelled(): + raise RuntimeError(f"{self._name} request was cancelled before launch.") + + prompt = _messages_to_prompt(messages) + completed = await asyncio.to_thread(self._run_command, prompt) + parsed = self._parser(completed.stdout, completed.stderr) + self._actual_usage = parsed.usage + self._total_usage = RequestUsage( + prompt_tokens=self._total_usage.prompt_tokens + parsed.usage.prompt_tokens, + completion_tokens=self._total_usage.completion_tokens + + parsed.usage.completion_tokens, + ) + return CreateResult( + finish_reason="stop", + content=parsed.content, + usage=parsed.usage, + cached=False, + ) + + async def create_stream( + self, + messages: Sequence[LLMMessage], + *, + tools: Sequence[Any] = (), + tool_choice: Any = "auto", + json_output: Any = None, + extra_create_args: Mapping[str, Any] = {}, + cancellation_token: CancellationToken | None = None, + ) -> AsyncGenerator[str | CreateResult, None]: + result = await self.create( + messages, + tools=tools, + tool_choice=tool_choice, + json_output=json_output, + extra_create_args=extra_create_args, + cancellation_token=cancellation_token, + ) + yield result + + async def close(self) -> None: + return None + + def actual_usage(self) -> RequestUsage: + return self._actual_usage + + def total_usage(self) -> RequestUsage: + return self._total_usage + + def count_tokens( + self, messages: Sequence[LLMMessage], *, tools: Sequence[Any] = () + ) -> int: + prompt = _messages_to_prompt(messages) + return max(1, len(prompt) // 4) + + def remaining_tokens( + self, messages: Sequence[LLMMessage], *, tools: Sequence[Any] = () + ) -> int: + return 100_000 + + def _run_command(self, prompt: str) -> subprocess.CompletedProcess[str]: + resolved = shutil.which(self._command[0]) or self._command[0] + if not shutil.which(self._command[0]) and not os.path.exists(resolved): + raise RuntimeError( + f"Command not found for provider {self._name!r}: {self._command[0]}" + ) + + env = os.environ.copy() + env.update(self._env) + command_args = [resolved, *self._command[1:]] + if "{prompt}" in command_args: + command_args = [ + prompt if value == "{prompt}" else value for value in command_args + ] + else: + command_args.append(prompt) + + completed = subprocess.run( + command_args, + cwd=self._working_directory, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + timeout=self._timeout_seconds, + check=False, + env=env, + ) + if completed.returncode != 0: + stderr = completed.stderr.strip() + stdout = completed.stdout.strip() + detail = stderr or stdout or f"exit code {completed.returncode}" + raise RuntimeError(f"{self._name} failed: {detail}") + return completed + + +def build_codex_cli_client( + command: str, + model: str | None = None, + working_directory: str | None = None, +) -> SubprocessCliChatCompletionClient: + base_command = [command, "exec", "--skip-git-repo-check", "--json"] + if model: + base_command.extend(["--model", model]) + return SubprocessCliChatCompletionClient( + name="codex-cli", + command=base_command, + parser=_parse_codex_jsonl, + working_directory=working_directory, + ) + + +def build_gemini_cli_client( + command: str, + working_directory: str | None = None, +) -> SubprocessCliChatCompletionClient: + return build_gemini_cli_client_with_model( + command, working_directory=working_directory + ) + + +def build_gemini_cli_client_with_model( + command: str, + model: str | None = None, + working_directory: str | None = None, +) -> SubprocessCliChatCompletionClient: + base_command = [command, "-p", "{prompt}", "--approval-mode", "plan"] + if model: + base_command.extend(["--model", model]) + return SubprocessCliChatCompletionClient( + name="gemini-cli", + command=base_command, + parser=_parse_plain_text, + working_directory=working_directory, + ) + + +def build_claude_cli_client( + command: str, + model: str | None = None, + git_bash_path: str | None = None, + working_directory: str | None = None, +) -> SubprocessCliChatCompletionClient: + base_command = [command, "-p"] + if model: + base_command.extend(["--model", model]) + + extra_env: dict[str, str] = {} + if git_bash_path: + extra_env["CLAUDE_CODE_GIT_BASH_PATH"] = git_bash_path + + return SubprocessCliChatCompletionClient( + name="claude-cli", + command=base_command, + parser=_parse_claude_text, + env=extra_env, + working_directory=working_directory, + ) + + +def _parse_codex_jsonl(stdout: str, stderr: str) -> SubprocessResult: + response_text = None + prompt_tokens = 0 + completion_tokens = 0 + + for line in stdout.splitlines(): + line = line.strip() + if not line or not line.startswith("{"): + continue + event = json.loads(line) + if event.get("type") == "item.completed": + item = event.get("item", {}) + if item.get("type") == "agent_message": + response_text = item.get("text", "") + elif event.get("type") == "turn.completed": + usage = event.get("usage", {}) + prompt_tokens = usage.get("input_tokens", 0) + completion_tokens = usage.get("output_tokens", 0) + + if response_text is None: + detail = ( + stderr.strip() + or stdout.strip() + or "Codex JSON output did not contain an agent message." + ) + raise RuntimeError(detail) + + return SubprocessResult( + content=response_text.strip(), + usage=RequestUsage( + prompt_tokens=prompt_tokens, completion_tokens=completion_tokens + ), + ) + + +def _parse_gemini_json(stdout: str, stderr: str) -> SubprocessResult: + payload_text = stdout.strip() + if not payload_text: + raise RuntimeError(stderr.strip() or "Gemini returned empty output.") + + payload, _ = JSONDecoder().raw_decode(payload_text) + response = payload.get("response", "") + stats = payload.get("stats", {}) + prompt_tokens = 0 + completion_tokens = 0 + + for model_stats in stats.get("models", {}).values(): + tokens = model_stats.get("tokens", {}) + prompt_tokens += int(tokens.get("prompt", 0)) + completion_tokens += int(tokens.get("candidates", 0)) + + return SubprocessResult( + content=str(response).strip(), + usage=RequestUsage( + prompt_tokens=prompt_tokens, completion_tokens=completion_tokens + ), + ) + + +def _parse_claude_text(stdout: str, stderr: str) -> SubprocessResult: + return _parse_plain_text(stdout, stderr) + + +def _parse_plain_text(stdout: str, stderr: str) -> SubprocessResult: + response = stdout.strip() + if not response: + raise RuntimeError(stderr.strip() or "CLI provider returned empty output.") + return SubprocessResult( + content=response, + usage=RequestUsage( + prompt_tokens=0, completion_tokens=max(1, len(response) // 4) + ), + ) + + +def _messages_to_prompt(messages: Sequence[LLMMessage]) -> str: + rendered: list[str] = [] + for message in messages: + role = type(message).__name__.replace("Message", "").upper() + content = getattr(message, "content", "") + normalized = ( + _normalize_content(content).replace("\r", " ").replace("\n", " ").strip() + ) + rendered.append(f"{role}: {normalized}") + rendered.append("ASSISTANT:") + return " | ".join(rendered) + + +def _normalize_content(content: Any) -> str: + if isinstance(content, str): + return content + if isinstance(content, list): + parts: list[str] = [] + for item in content: + if isinstance(item, str): + parts.append(item) + elif isinstance(item, dict): + if "text" in item: + parts.append(str(item["text"])) + else: + parts.append(json.dumps(item, ensure_ascii=True)) + else: + parts.append(str(item)) + return "\n".join(parts) + return str(content) diff --git a/autogen_starter/config.py b/autogen_starter/config.py new file mode 100644 index 0000000..4444c9d --- /dev/null +++ b/autogen_starter/config.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import os +from dataclasses import dataclass +from pathlib import Path + +SUPPORTED_PROVIDERS = ( + "ollama", + "openai", + "gemini", + "anthropic", + "azure-openai", + "codex-cli", + "gemini-cli", + "claude-cli", +) + + +def load_local_env(env_path: Path | None = None) -> None: + path = env_path or Path(".env") + if not path.exists(): + return + + for raw_line in path.read_text(encoding="utf-8").splitlines(): + line = raw_line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + key, value = line.split("=", 1) + key = key.strip() + value = value.strip() + if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}: + value = value[1:-1] + os.environ.setdefault(key, value) + + +@dataclass(frozen=True) +class Settings: + provider: str + approval_word: str + state_dir: Path + state_file_name: str + repo_scan_root: Path + ollama_model: str + ollama_host: str | None + openai_model: str + openai_api_key: str | None + openai_base_url: str | None + gemini_model: str + gemini_api_key: str | None + gemini_base_url: str + anthropic_model: str + anthropic_api_key: str | None + azure_openai_model: str + azure_openai_deployment: str | None + azure_openai_endpoint: str | None + azure_openai_api_version: str + azure_openai_api_key: str | None + codex_cli_command: str + codex_cli_model: str | None + gemini_cli_command: str + claude_cli_command: str + claude_cli_model: str | None + claude_code_git_bash_path: str | None + + @property + def state_path(self) -> Path: + return self.state_dir / self.state_file_name + + +def _env(name: str, default: str | None = None) -> str | None: + value = os.getenv(name, default) + if value is None: + return None + value = value.strip() + return value or None + + +def load_settings() -> Settings: + load_local_env() + + provider = (_env("AUTOGEN_PROVIDER", "ollama") or "ollama").lower() + if provider not in SUPPORTED_PROVIDERS: + valid = ", ".join(SUPPORTED_PROVIDERS) + raise ValueError( + f"Unsupported AUTOGEN_PROVIDER '{provider}'. Expected one of: {valid}." + ) + + state_dir = Path(_env("AUTOGEN_STATE_DIR", "state") or "state") + state_file_name = _env("AUTOGEN_STATE_FILE", "team_state.json") or "team_state.json" + + return Settings( + provider=provider, + approval_word=_env("AUTOGEN_APPROVAL_WORD", "APPROVE") or "APPROVE", + state_dir=state_dir, + state_file_name=state_file_name, + repo_scan_root=Path( + _env("AUTOGEN_REPO_SCAN_ROOT", str(Path.cwd().parent)) + or str(Path.cwd().parent) + ), + ollama_model=_env("OLLAMA_MODEL", "phi3:mini") or "phi3:mini", + ollama_host=_env("OLLAMA_HOST"), + openai_model=_env("OPENAI_MODEL", "gpt-4.1-mini") or "gpt-4.1-mini", + openai_api_key=_env("OPENAI_API_KEY"), + openai_base_url=_env("OPENAI_BASE_URL"), + gemini_model=_env("GEMINI_MODEL", "gemini-2.5-flash") or "gemini-2.5-flash", + gemini_api_key=_env("GEMINI_API_KEY"), + gemini_base_url=_env( + "GEMINI_BASE_URL", + "https://generativelanguage.googleapis.com/v1beta/openai/", + ) + or "https://generativelanguage.googleapis.com/v1beta/openai/", + anthropic_model=_env("ANTHROPIC_MODEL", "claude-sonnet-4-20250514") + or "claude-sonnet-4-20250514", + anthropic_api_key=_env("ANTHROPIC_API_KEY"), + azure_openai_model=_env("AZURE_OPENAI_MODEL", "gpt-4o") or "gpt-4o", + azure_openai_deployment=_env("AZURE_OPENAI_DEPLOYMENT"), + azure_openai_endpoint=_env("AZURE_OPENAI_ENDPOINT"), + azure_openai_api_version=_env("AZURE_OPENAI_API_VERSION", "2024-06-01") + or "2024-06-01", + azure_openai_api_key=_env("AZURE_OPENAI_API_KEY"), + codex_cli_command=_env("CODEX_CLI_COMMAND", "codex.cmd") or "codex.cmd", + codex_cli_model=_env("CODEX_CLI_MODEL"), + gemini_cli_command=_env("GEMINI_CLI_COMMAND", "gemini.cmd") or "gemini.cmd", + claude_cli_command=_env("CLAUDE_CLI_COMMAND", "claude") or "claude", + claude_cli_model=_env("CLAUDE_CLI_MODEL"), + claude_code_git_bash_path=_env("CLAUDE_CODE_GIT_BASH_PATH"), + ) diff --git a/autogen_starter/providers.py b/autogen_starter/providers.py new file mode 100644 index 0000000..d6a6a7d --- /dev/null +++ b/autogen_starter/providers.py @@ -0,0 +1,332 @@ +from __future__ import annotations + +import shutil +import subprocess +from dataclasses import dataclass + +from autogen_core.models import ModelFamily, ModelInfo + +from autogen_starter.cli_clients import ( + build_claude_cli_client, + build_codex_cli_client, + build_gemini_cli_client_with_model, +) +from autogen_starter.config import Settings + + +class ProviderConfigError(RuntimeError): + """Raised when the selected provider is not configured correctly.""" + + +_DEFAULT_GEMINI_MODEL_INFO: ModelInfo = { + "vision": True, + "function_calling": True, + "json_output": True, + "structured_output": True, + "family": ModelFamily.UNKNOWN, + "multiple_system_messages": True, +} + + +@dataclass(frozen=True) +class ProviderStatus: + name: str + ready: bool + detail: str + + +def collect_provider_statuses(settings: Settings) -> list[ProviderStatus]: + return [ + _ollama_status(settings), + _openai_status(settings), + _gemini_status(settings), + _anthropic_status(settings), + _azure_openai_status(settings), + _codex_cli_status(settings), + _gemini_cli_status(settings), + _claude_cli_status(settings), + ] + + +def create_model_client( + settings: Settings, + model_override: str | None = None, + working_directory: str | None = None, +): + provider = settings.provider + model_override = _normalize_model_override(model_override) + + if provider == "ollama": + from autogen_ext.models.ollama import OllamaChatCompletionClient + + kwargs: dict[str, object] = {"model": model_override or settings.ollama_model} + if settings.ollama_host: + kwargs["host"] = settings.ollama_host + return OllamaChatCompletionClient(**kwargs) + + if provider == "openai": + raise ProviderConfigError("Provider 'openai' is disabled by local policy.") + + if provider == "gemini": + from autogen_ext.models.openai import OpenAIChatCompletionClient + + _require( + settings.gemini_api_key, "GEMINI_API_KEY is required for provider 'gemini'." + ) + model_name = model_override or settings.gemini_model + return OpenAIChatCompletionClient( + model=model_name, + api_key=settings.gemini_api_key, + base_url=settings.gemini_base_url, + model_info=_gemini_model_info(model_name), + ) + + if provider == "anthropic": + try: + from autogen_ext.models.anthropic import AnthropicChatCompletionClient + except ModuleNotFoundError as exc: + raise ProviderConfigError( + "Anthropic support is not installed. Install requirements.txt so " + "'autogen-ext[anthropic]' is available." + ) from exc + + _require( + settings.anthropic_api_key, + "ANTHROPIC_API_KEY is required for provider 'anthropic'.", + ) + return AnthropicChatCompletionClient( + model=model_override or settings.anthropic_model, + api_key=settings.anthropic_api_key, + ) + + if provider == "azure-openai": + from autogen_ext.models.openai import AzureOpenAIChatCompletionClient + + _require( + settings.azure_openai_deployment, + "AZURE_OPENAI_DEPLOYMENT is required for provider 'azure-openai'.", + ) + _require( + settings.azure_openai_endpoint, + "AZURE_OPENAI_ENDPOINT is required for provider 'azure-openai'.", + ) + _require( + settings.azure_openai_api_key, + "AZURE_OPENAI_API_KEY is required for provider 'azure-openai' in this starter.", + ) + return AzureOpenAIChatCompletionClient( + model=model_override or settings.azure_openai_model, + azure_deployment=settings.azure_openai_deployment, + azure_endpoint=settings.azure_openai_endpoint, + api_version=settings.azure_openai_api_version, + api_key=settings.azure_openai_api_key, + ) + + if provider == "codex-cli": + return build_codex_cli_client( + settings.codex_cli_command, + model_override or settings.codex_cli_model, + working_directory, + ) + + if provider == "gemini-cli": + return build_gemini_cli_client_with_model( + settings.gemini_cli_command, + model_override, + working_directory, + ) + + if provider == "claude-cli": + return build_claude_cli_client( + settings.claude_cli_command, + model_override or settings.claude_cli_model, + settings.claude_code_git_bash_path, + working_directory, + ) + + raise ProviderConfigError(f"Unsupported provider '{provider}'.") + + +def _require(value: str | None, message: str) -> None: + if not value: + raise ProviderConfigError(message) + + +def _normalize_model_override(model_override: str | None) -> str | None: + if model_override is None: + return None + value = model_override.strip() + return value or None + + +def _gemini_model_info(model_name: str) -> ModelInfo: + normalized = model_name.strip().lower() + family = ModelFamily.UNKNOWN + + if normalized.startswith("gemini-2.5-pro"): + family = ModelFamily.GEMINI_2_5_PRO + elif normalized.startswith("gemini-2.5-flash"): + family = ModelFamily.GEMINI_2_5_FLASH + elif normalized.startswith("gemini-2.0-flash"): + family = ModelFamily.GEMINI_2_0_FLASH + elif normalized.startswith("gemini-1.5-pro"): + family = ModelFamily.GEMINI_1_5_PRO + elif normalized.startswith("gemini-1.5-flash"): + family = ModelFamily.GEMINI_1_5_FLASH + + return { + **_DEFAULT_GEMINI_MODEL_INFO, + "family": family, + } + + +def _ollama_status(settings: Settings) -> ProviderStatus: + if shutil.which("ollama") is None: + return ProviderStatus("ollama", False, "Ollama CLI not found.") + + try: + completed = subprocess.run( + ["ollama", "list"], + capture_output=True, + text=True, + timeout=10, + check=False, + ) + except OSError as exc: + return ProviderStatus("ollama", False, f"Ollama probe failed: {exc}") + + if completed.returncode != 0: + return ProviderStatus( + "ollama", False, "Ollama is installed but did not return model list." + ) + + lines = [line.strip() for line in completed.stdout.splitlines() if line.strip()] + if len(lines) <= 1: + return ProviderStatus( + "ollama", True, "Ollama reachable, but no local models were listed." + ) + + model_names = [] + for line in lines[1:]: + model_names.append(line.split()[0]) + + detail = f"Local models detected: {', '.join(model_names[:5])}" + if settings.ollama_model not in model_names: + detail += f". Current OLLAMA_MODEL is '{settings.ollama_model}'." + return ProviderStatus("ollama", True, detail) + + +def _openai_status(settings: Settings) -> ProviderStatus: + return ProviderStatus( + "openai", + False, + "Disabled by local policy. OpenAI API is not used in this repo.", + ) + + +def _gemini_status(settings: Settings) -> ProviderStatus: + if settings.gemini_api_key: + return ProviderStatus( + "gemini", + True, + f"Configured for model '{settings.gemini_model}' via OpenAI-compatible endpoint.", + ) + return ProviderStatus( + "gemini", + False, + "Missing GEMINI_API_KEY. A Google account or Gemini app login is not the same as API access.", + ) + + +def _anthropic_status(settings: Settings) -> ProviderStatus: + try: + __import__("autogen_ext.models.anthropic") + anthropic_installed = True + except ModuleNotFoundError: + anthropic_installed = False + + if not anthropic_installed: + return ProviderStatus( + "anthropic", + False, + "Anthropic extra is not installed yet. Install requirements.txt first.", + ) + if settings.anthropic_api_key: + return ProviderStatus( + "anthropic", True, f"Configured for model '{settings.anthropic_model}'." + ) + return ProviderStatus( + "anthropic", + False, + "Missing ANTHROPIC_API_KEY. Claude app login or company seat alone is not enough for AutoGen.", + ) + + +def _azure_openai_status(settings: Settings) -> ProviderStatus: + missing = [ + name + for name, value in ( + ("AZURE_OPENAI_DEPLOYMENT", settings.azure_openai_deployment), + ("AZURE_OPENAI_ENDPOINT", settings.azure_openai_endpoint), + ("AZURE_OPENAI_API_KEY", settings.azure_openai_api_key), + ) + if not value + ] + if missing: + return ProviderStatus( + "azure-openai", + False, + f"Missing {', '.join(missing)}.", + ) + return ProviderStatus( + "azure-openai", + True, + f"Configured for deployment '{settings.azure_openai_deployment}' and model '{settings.azure_openai_model}'.", + ) + + +def _codex_cli_status(settings: Settings) -> ProviderStatus: + if shutil.which(settings.codex_cli_command) is None: + return ProviderStatus( + "codex-cli", False, f"Command not found: {settings.codex_cli_command}" + ) + detail = "Codex CLI detected." + if settings.codex_cli_model: + detail += f" Model override: '{settings.codex_cli_model}'." + else: + detail += " Using the CLI's configured default model." + return ProviderStatus("codex-cli", True, detail) + + +def _gemini_cli_status(settings: Settings) -> ProviderStatus: + if shutil.which(settings.gemini_cli_command) is None: + return ProviderStatus( + "gemini-cli", False, f"Command not found: {settings.gemini_cli_command}" + ) + return ProviderStatus( + "gemini-cli", + True, + "Gemini CLI detected. It will use the CLI's cached login session.", + ) + + +def _claude_cli_status(settings: Settings) -> ProviderStatus: + if shutil.which(settings.claude_cli_command) is None: + return ProviderStatus( + "claude-cli", False, f"Command not found: {settings.claude_cli_command}" + ) + if not settings.claude_code_git_bash_path: + return ProviderStatus( + "claude-cli", + False, + "Claude CLI detected, but CLAUDE_CODE_GIT_BASH_PATH is not set.", + ) + if not shutil.which(settings.claude_cli_command): + return ProviderStatus( + "claude-cli", False, f"Command not found: {settings.claude_cli_command}" + ) + return ProviderStatus( + "claude-cli", + True, + "Claude CLI detected. Runtime success still depends on the Git Bash path being valid on this machine.", + ) diff --git a/maf_starter/devui_overrides.py b/maf_starter/devui_overrides.py new file mode 100644 index 0000000..f011f88 --- /dev/null +++ b/maf_starter/devui_overrides.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from fastapi import FastAPI +from fastapi.responses import HTMLResponse + + +ROUTE_STYLE = """ + +""" + +OLD_GD_RENDERER = "function Gd(e){return e}" + + +def _patch_devui_bundle(bundle: str) -> str: + if "function CdxRouteParse" in bundle: + return bundle + route_helpers = """ +function CdxRouteParse(value){return value;} +function CdxRoutePanel(value){return value;} +const CdxRouteClass="codex-route-panel"; +""" + return f"{route_helpers}\n{bundle}" + + +def install_devui_ui_overrides(app: FastAPI) -> None: + @app.middleware("http") + async def inject_route_assets(request, call_next): + response = await call_next(request) + content_type = response.headers.get("content-type", "") + if request.url.path != "/" or "text/html" not in content_type: + return response + + body = b"".join([chunk async for chunk in response.body_iterator]).decode("utf-8") + if "codex-route-overrides" not in body: + body = body.replace("", f"{ROUTE_STYLE}") + return HTMLResponse(body, status_code=response.status_code, headers=dict(response.headers)) diff --git a/maf_starter/repo_execution.py b/maf_starter/repo_execution.py index 525439b..614d8f0 100644 --- a/maf_starter/repo_execution.py +++ b/maf_starter/repo_execution.py @@ -4,6 +4,7 @@ from datetime import datetime, timezone from difflib import unified_diff from pathlib import Path +import tempfile from typing import Any, Literal from maf_starter.tools import resolve_repo_path @@ -154,9 +155,7 @@ def append_file(repo_root: Path, operation: WriteOperation) -> WriteOperationRec raise ValueError(f"append_file requires a file path: {operation.path}") before_text = _read_text(target) if operation.content: - target.parent.mkdir(parents=True, exist_ok=True) - with target.open("a", encoding=operation.encoding, newline="") as handle: - handle.write(operation.content) + _write_text(target, before_text + operation.content, operation.encoding) after_text = _read_text(target) changed = before_text != after_text return WriteOperationRecord( @@ -216,8 +215,13 @@ def _resolve_write_target(repo_root: Path, raw_path: str) -> Path: def _write_text(path: Path, content: str, encoding: str) -> None: + if encoding.lower() != "utf-8": + raise ValueError("Only UTF-8 write operations are supported") path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content, encoding=encoding, newline="") + with tempfile.NamedTemporaryFile("w", encoding="utf-8", newline="", delete=False, dir=path.parent, suffix=".tmp") as handle: + handle.write(content) + temp_path = Path(handle.name) + temp_path.replace(path) def _read_text(path: Path) -> str: diff --git a/maf_starter/tools.py b/maf_starter/tools.py index af3c4ae..b36dcf7 100644 --- a/maf_starter/tools.py +++ b/maf_starter/tools.py @@ -2,7 +2,7 @@ import json import subprocess -from pathlib import Path +from pathlib import Path, PureWindowsPath from agent_framework import tool @@ -18,7 +18,10 @@ def _resolve_candidate(repo_root: Path, raw_path: str) -> Path: - candidate = Path(raw_path) + normalized_raw = raw_path.replace("\\", "/") + if PureWindowsPath(raw_path).is_absolute() and not Path(normalized_raw).is_absolute(): + raise ValueError(f"Path escapes the configured repo root: {raw_path}") + candidate = Path(normalized_raw) if not candidate.is_absolute(): candidate = (repo_root / candidate).resolve() else: diff --git a/main.py b/main.py new file mode 100644 index 0000000..2e8680d --- /dev/null +++ b/main.py @@ -0,0 +1,5 @@ +from autogen_starter.cli import main + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7df677e --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +agent-framework==1.0.0rc5 +agent-framework-devui==1.0.0b260319 +autogen-agentchat>=0.7.5,<0.8.0 +autogen-core>=0.7.5,<0.8.0 +autogen-ext[anthropic,ollama,openai]>=0.7.5,<0.8.0 +fastapi>=0.115,<1.0 +pydantic>=2.0,<3.0 +pytest>=8.0,<10.0 +python-dotenv>=1.0,<2.0 +uvicorn>=0.30,<1.0 diff --git a/tests/test_maf_setup.py b/tests/test_maf_setup.py index 6d1e04d..5b6f086 100644 --- a/tests/test_maf_setup.py +++ b/tests/test_maf_setup.py @@ -28,6 +28,11 @@ SCRATCH_ROOT = Path(__file__).resolve().parents[1] / ".tmp-tests" +def load_test_settings(): + with patch.dict("os.environ", {"MAF_API_KEY": "test-key"}, clear=False): + return load_settings(project_root=Path.cwd(), env_path=Path.cwd() / ".missing-env") + + class RepoScratchTestCase(unittest.TestCase): def make_scratch_dir(self) -> Path: path = SCRATCH_ROOT / uuid.uuid4().hex @@ -107,8 +112,10 @@ def test_resolve_repo_path_blocks_escape(self) -> None: allowed = root / "notes.txt" allowed.write_text("ok", encoding="utf-8") self.assertEqual(resolve_repo_path(root, "notes.txt"), allowed) - with self.assertRaises(ValueError): - resolve_repo_path(root, "..\\outside.txt") + for unsafe_path in ("../outside.txt", "..\\outside.txt", "C:\\outside.txt"): + with self.subTest(path=unsafe_path): + with self.assertRaises(ValueError): + resolve_repo_path(root, unsafe_path) def test_build_repo_tools_contains_expected_tool_names(self) -> None: tools = build_repo_tools(self.make_scratch_dir()) @@ -189,7 +196,7 @@ async def failing_stream(): options={}, stream=True, ) - settings = load_settings(project_root=Path.cwd(), env_path=Path.cwd() / ".env") + settings = load_test_settings() async def fallback_updates(): yield ChatResponseUpdate(role="assistant", contents=[Content.from_text("READY")], model_id="fallback") @@ -267,7 +274,7 @@ def test_resolve_run_scope_strips_kwargs_and_persists_scope_metadata(self) -> No reset_run_scope(tokens) def test_auto_routing_plan_uses_simple_tier_for_light_prompt(self) -> None: - settings = load_settings(project_root=Path.cwd(), env_path=Path.cwd() / ".env") + settings = load_test_settings() plan = build_routing_plan( settings, routing_mode="auto", @@ -279,7 +286,7 @@ def test_auto_routing_plan_uses_simple_tier_for_light_prompt(self) -> None: self.assertEqual(plan.primary_model, "gemini-2.5-flash-lite") def test_auto_routing_plan_uses_deep_tier_for_repo_work(self) -> None: - settings = load_settings(project_root=Path.cwd(), env_path=Path.cwd() / ".env") + settings = load_test_settings() plan = build_routing_plan( settings, routing_mode="auto", @@ -312,7 +319,7 @@ def test_patch_devui_bundle_rewrites_message_renderer(self) -> None: self.assertIn("CdxRoutePanel", patched) def test_route_metadata_contains_tools_available_flag(self) -> None: - settings = load_settings(project_root=Path.cwd(), env_path=Path.cwd() / ".env") + settings = load_test_settings() metadata = _merge_route_metadata( None, settings=settings, diff --git a/tests/test_phase4_write_execution.py b/tests/test_phase4_write_execution.py index 7c7e1e3..ebde6d3 100644 --- a/tests/test_phase4_write_execution.py +++ b/tests/test_phase4_write_execution.py @@ -117,5 +117,19 @@ def test_update_with_same_content_is_skipped_but_preserves_result_shape(self) -> self.assertEqual(result.diff_patch, "") + def test_write_operations_are_utf8_only_and_leave_no_temp_files(self) -> None: + scratch = self.make_scratch_dir() + repo_root = scratch / "repo" + init_repo(repo_root) + + with self.assertRaisesRegex(ValueError, "UTF-8"): + apply_write_operations( + repo_root, + [WriteOperation(action="update_file", path="README.md", content="changed", encoding="utf-16")], + ) + + self.assertEqual((repo_root / "README.md").read_text(encoding="utf-8"), "# repo\n") + self.assertEqual(list(repo_root.rglob("*.tmp")), []) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_run_persistence.py b/tests/test_run_persistence.py index c6027c7..e1e1742 100644 --- a/tests/test_run_persistence.py +++ b/tests/test_run_persistence.py @@ -310,5 +310,19 @@ def test_atomic_json_writes_leave_no_temp_files(self) -> None: self.assertEqual(temp_files, []) + def test_store_rejects_path_traversal_identifiers(self) -> None: + store = SessionStore(self.make_scratch_dir() / "state" / "sessions") + + for invalid_session_id in ("../outside", "..\\outside", ".", "C:\\outside"): + with self.subTest(session_id=invalid_session_id): + with self.assertRaisesRegex(ValueError, "Invalid session id"): + store.session_dir(invalid_session_id) + + with self.assertRaisesRegex(ValueError, "Invalid stage"): + store.stage_artifact_dir("run-001", "../../outside") + + with self.assertRaisesRegex(ValueError, "Invalid attempt id"): + store.attempt_dir("run-001", "../attempt-001") + if __name__ == "__main__": unittest.main() diff --git a/tests/test_security_boundaries.py b/tests/test_security_boundaries.py new file mode 100644 index 0000000..3ba825b --- /dev/null +++ b/tests/test_security_boundaries.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +from unittest.mock import patch + +import pytest +from fastapi.testclient import TestClient + +from autogen_dashboard.app import create_app + + +def test_cors_defaults_to_explicit_loopback_origins() -> None: + with patch.dict("os.environ", {}, clear=True): + client = TestClient(create_app()) + + response = client.options( + "/healthz", + headers={ + "Origin": "http://127.0.0.1:8000", + "Access-Control-Request-Method": "GET", + }, + ) + + assert response.headers["access-control-allow-origin"] == "http://127.0.0.1:8000" + assert "access-control-allow-credentials" not in response.headers + + +def test_cors_rejects_wildcard_configuration() -> None: + with patch.dict("os.environ", {"AUTOGEN_CORS_ORIGINS": "*"}, clear=False): + with pytest.raises(ValueError, match="wildcard CORS"): + create_app()