From 09273b6f3d10912c968b7e64ad7b45735d3cf1be Mon Sep 17 00:00:00 2001 From: chutstack <112586305+chutstack@users.noreply.github.com> Date: Wed, 18 Feb 2026 18:16:24 -0500 Subject: [PATCH] Improve model tuning and calibrated decision thresholds --- .env.example | 4 + Makefile | 13 ++ README.md | 42 +++++++ configs/config.yaml | 35 ++++++ logs/.gitkeep | 0 models/.gitkeep | 0 pyproject.toml | 12 ++ requirements.txt | 8 ++ tests/test_adapters.py | 6 + tests/test_features.py | 12 ++ tests/test_regime.py | 41 +++++++ tests/test_sizing.py | 14 +++ tests/test_splitter.py | 13 ++ tests/test_train.py | 24 ++++ trader/__init__.py | 4 + trader/__main__.py | 4 + trader/backtest/__init__.py | 1 + trader/backtest/walkforward.py | 33 +++++ trader/cli.py | 147 ++++++++++++++++++++++ trader/config.py | 25 ++++ trader/data/__init__.py | 1 + trader/data/adapters.py | 84 +++++++++++++ trader/data/storage.py | 19 +++ trader/data/synthetic.py | 20 +++ trader/execution/__init__.py | 1 + trader/execution/broker_base.py | 13 ++ trader/execution/live_placeholders.py | 17 +++ trader/execution/paper.py | 34 ++++++ trader/features/__init__.py | 1 + trader/features/compute.py | 46 +++++++ trader/features/cross_asset.py | 27 ++++ trader/features/indicators.py | 33 +++++ trader/logging.py | 20 +++ trader/model/__init__.py | 1 + trader/model/artifacts.py | 23 ++++ trader/model/predict.py | 10 ++ trader/model/splitter.py | 29 +++++ trader/model/train.py | 169 ++++++++++++++++++++++++++ trader/risk/__init__.py | 1 + trader/risk/caps.py | 13 ++ trader/risk/regime.py | 36 ++++++ trader/risk/sizing.py | 20 +++ trader/utils.py | 23 ++++ 43 files changed, 1079 insertions(+) create mode 100644 .env.example create mode 100644 Makefile create mode 100644 README.md create mode 100644 configs/config.yaml create mode 100644 logs/.gitkeep create mode 100644 models/.gitkeep create mode 100644 pyproject.toml create mode 100644 requirements.txt create mode 100644 tests/test_adapters.py create mode 100644 tests/test_features.py create mode 100644 tests/test_regime.py create mode 100644 tests/test_sizing.py create mode 100644 tests/test_splitter.py create mode 100644 tests/test_train.py create mode 100644 trader/__init__.py create mode 100644 trader/__main__.py create mode 100644 trader/backtest/__init__.py create mode 100644 trader/backtest/walkforward.py create mode 100644 trader/cli.py create mode 100644 trader/config.py create mode 100644 trader/data/__init__.py create mode 100644 trader/data/adapters.py create mode 100644 trader/data/storage.py create mode 100644 trader/data/synthetic.py create mode 100644 trader/execution/__init__.py create mode 100644 trader/execution/broker_base.py create mode 100644 trader/execution/live_placeholders.py create mode 100644 trader/execution/paper.py create mode 100644 trader/features/__init__.py create mode 100644 trader/features/compute.py create mode 100644 trader/features/cross_asset.py create mode 100644 trader/features/indicators.py create mode 100644 trader/logging.py create mode 100644 trader/model/__init__.py create mode 100644 trader/model/artifacts.py create mode 100644 trader/model/predict.py create mode 100644 trader/model/splitter.py create mode 100644 trader/model/train.py create mode 100644 trader/risk/__init__.py create mode 100644 trader/risk/caps.py create mode 100644 trader/risk/regime.py create mode 100644 trader/risk/sizing.py create mode 100644 trader/utils.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e131d24 --- /dev/null +++ b/.env.example @@ -0,0 +1,4 @@ +ALPACA_API_KEY= +ALPACA_API_SECRET= +ENABLE_LIVE_TRADING=false +KILL_SWITCH=false diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f53ef9e --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +.PHONY: test train run backtest + +test: + pytest -q + +train: + python -m trader train --config configs/config.yaml + +run: + python -m trader run --config configs/config.yaml --mode paper + +backtest: + python -m trader backtest --config configs/config.yaml diff --git a/README.md b/README.md new file mode 100644 index 0000000..33a5628 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +# AI Prediction Trader + +Production-minded AI auto-trading scaffold for BTC-USD, ETH-USD, and SPY with strict safeguards and paper trading by default. + +## Quickstart + +1. Create env and install: + - `pip install -r requirements.txt` +2. Copy env file: + - `cp .env.example .env` +3. Train models: + - `python -m trader train --config configs/config.yaml` +4. Run paper trading: + - `python -m trader run --config configs/config.yaml --mode paper` +5. Run walk-forward backtest: + - `python -m trader backtest --config configs/config.yaml` + +## What training now does + +- Performs per-asset walk-forward parameter search over several model candidates. +- Selects the best candidate by walk-forward AUC (with logloss tie-break). +- Calibrates each asset's `p_long` cutoff from walk-forward predictions (balanced accuracy objective). +- Stores tuned settings in `models/_meta.json` and uses calibrated `p_long` during `run`. + +## Backtest output + +`python -m trader backtest` writes `backtest_results.csv` with per-split: + +- `accuracy` +- `auc` +- `logloss` +- `n_test` + +## Scheduling + +Use cron/systemd to call the run command once daily after market close. + +## Safety + +- Live mode requires both `--mode live` and `ENABLE_LIVE_TRADING=true`. +- `KILL_SWITCH=true` disables all order placement. +- Drawdown kill switch disables trading if portfolio drawdown exceeds 20%. diff --git a/configs/config.yaml b/configs/config.yaml new file mode 100644 index 0000000..20f7d73 --- /dev/null +++ b/configs/config.yaml @@ -0,0 +1,35 @@ +assets: + - BTC-USD + - ETH-USD + - SPY +thresholds: + BTC-USD: 0.005 + ETH-USD: 0.005 + SPY: 0.002 +p_long: + BTC-USD: 0.60 + ETH-USD: 0.60 + SPY: 0.58 +vol_target: + BTC-USD: 0.010 + ETH-USD: 0.010 + SPY: 0.006 +caps: + per_asset: + BTC-USD: 0.30 + ETH-USD: 0.30 + SPY: 1.0 + total_crypto: 0.40 +walkforward: + train_days: 756 + test_days: 63 + step_days: 63 + embargo_days: 1 +data: + crypto_source: coinbase + equity_source: alpaca_or_fallback +paper_costs: + fee_bps: 1.0 + slippage_bps: 2.0 +timezone: America/New_York +include_crypto_for_spy: false diff --git a/logs/.gitkeep b/logs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/models/.gitkeep b/models/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..5cde01b --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "ai-prediction-trader" +version = "0.1.0" +requires-python = ">=3.11" + +[tool.pytest.ini_options] +testpaths = ["tests"] +pythonpath = ["."] diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..84d1531 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +numpy +pandas +scikit-learn +xgboost +pyyaml +requests +python-dotenv +pytest diff --git a/tests/test_adapters.py b/tests/test_adapters.py new file mode 100644 index 0000000..5accb3d --- /dev/null +++ b/tests/test_adapters.py @@ -0,0 +1,6 @@ +from trader.data.adapters import _to_coinbase_product + + +def test_coinbase_product_format() -> None: + assert _to_coinbase_product("BTCUSD") == "BTC-USD" + assert _to_coinbase_product("BTC-USD") == "BTC-USD" diff --git a/tests/test_features.py b/tests/test_features.py new file mode 100644 index 0000000..ad729b1 --- /dev/null +++ b/tests/test_features.py @@ -0,0 +1,12 @@ +import pandas as pd + +from trader.data.synthetic import generate_synthetic_ohlcv +from trader.features.compute import compute_features + + +def test_feature_generation_has_expected_columns() -> None: + df = generate_synthetic_ohlcv("BTC-USD", periods=300) + feats = compute_features(df) + expected = {"r1", "r5", "sma20_dist", "slope20", "atr_pct", "distance_from_high_20"} + assert expected.issubset(set(feats.columns)) + assert feats.dropna().shape[0] > 0 diff --git a/tests/test_regime.py b/tests/test_regime.py new file mode 100644 index 0000000..6a97ad9 --- /dev/null +++ b/tests/test_regime.py @@ -0,0 +1,41 @@ +import numpy as np +import pandas as pd + +from trader.features.compute import compute_features +from trader.risk.regime import regime_is_bad, regime_status + + +def test_regime_flag_returns_bool() -> None: + idx = pd.date_range("2020-01-01", periods=300, freq="D") + close = pd.Series(np.linspace(100, 130, len(idx)), index=idx) + df = pd.DataFrame( + { + "open": close, + "high": close * 1.01, + "low": close * 0.99, + "close": close, + "volume": 1000.0, + } + ) + feats = compute_features(df) + bad = regime_is_bad("SPY", feats) + assert isinstance(bad, bool) + + +def test_regime_status_exposes_trigger_details() -> None: + idx = pd.date_range("2020-01-01", periods=300, freq="D") + close = pd.Series(np.linspace(100, 60, len(idx)), index=idx) + df = pd.DataFrame( + { + "open": close, + "high": close * 1.01, + "low": close * 0.99, + "close": close, + "volume": 1000.0, + } + ) + feats = compute_features(df) + status = regime_status("SPY", feats) + assert status["bad"] is True + assert status["drawdown_breach"] is True + assert status["drawdown"] > status["drawdown_limit"] diff --git a/tests/test_sizing.py b/tests/test_sizing.py new file mode 100644 index 0000000..4054af6 --- /dev/null +++ b/tests/test_sizing.py @@ -0,0 +1,14 @@ +from trader.risk.caps import apply_crypto_cap +from trader.risk.sizing import confidence_fraction, target_weight + + +def test_confidence_ladder_and_weight() -> None: + assert confidence_fraction(0.57, 0.58) == 0.0 + assert confidence_fraction(0.60, 0.58) == 0.25 + w = target_weight(prob=0.75, p_long=0.60, sigma20=0.02, vol_target=0.01, cap=0.30) + assert 0 <= w <= 0.30 + + +def test_crypto_cap_scaling() -> None: + out = apply_crypto_cap({"BTC-USD": 0.3, "ETH-USD": 0.3, "SPY": 0.5}, total_cap=0.4) + assert abs(out["BTC-USD"] + out["ETH-USD"] - 0.4) < 1e-8 diff --git a/tests/test_splitter.py b/tests/test_splitter.py new file mode 100644 index 0000000..1e9b9bf --- /dev/null +++ b/tests/test_splitter.py @@ -0,0 +1,13 @@ +import pandas as pd + +from trader.model.splitter import WalkForwardSplitter + + +def test_walkforward_splitter_embargo() -> None: + idx = pd.date_range("2020-01-01", periods=1000, freq="B") + splitter = WalkForwardSplitter(train_days=200, test_days=50, step_days=50, embargo_days=1) + splits = splitter.split(idx) + assert len(splits) > 0 + tr, te = splits[0] + assert te.min() > tr.max() + assert (te.min() - tr.max()).days >= 2 diff --git a/tests/test_train.py b/tests/test_train.py new file mode 100644 index 0000000..aaea0b9 --- /dev/null +++ b/tests/test_train.py @@ -0,0 +1,24 @@ +import pandas as pd + +from trader.data.synthetic import generate_synthetic_ohlcv +from trader.features.compute import make_dataset +from trader.model.splitter import WalkForwardSplitter +from trader.model.train import _find_best_params, _optimize_cutoff + + +def test_training_search_and_cutoff_optimization_ranges() -> None: + prices = { + "BTC-USD": generate_synthetic_ohlcv("BTC-USD", periods=420), + "ETH-USD": generate_synthetic_ohlcv("ETH-USD", periods=420), + "SPY": generate_synthetic_ohlcv("SPY", periods=420), + } + X, y = make_dataset("BTC-USD", prices, threshold=0.005) + splitter = WalkForwardSplitter(train_days=220, test_days=50, step_days=50, embargo_days=1) + + params, auc, ll = _find_best_params(X, y, splitter) + cutoff = _optimize_cutoff(X, y, splitter, params, default_cutoff=0.60) + + assert isinstance(params, dict) + assert 0.0 <= auc <= 1.0 + assert ll > 0.0 + assert 0.50 <= cutoff <= 0.70 diff --git a/trader/__init__.py b/trader/__init__.py new file mode 100644 index 0000000..d60bafc --- /dev/null +++ b/trader/__init__.py @@ -0,0 +1,4 @@ +"""AI trading package.""" + +__all__ = ["__version__"] +__version__ = "0.1.0" diff --git a/trader/__main__.py b/trader/__main__.py new file mode 100644 index 0000000..3be69f2 --- /dev/null +++ b/trader/__main__.py @@ -0,0 +1,4 @@ +from trader.cli import main + +if __name__ == "__main__": + main() diff --git a/trader/backtest/__init__.py b/trader/backtest/__init__.py new file mode 100644 index 0000000..8d06382 --- /dev/null +++ b/trader/backtest/__init__.py @@ -0,0 +1 @@ +"""Backtesting utilities.""" diff --git a/trader/backtest/walkforward.py b/trader/backtest/walkforward.py new file mode 100644 index 0000000..ba7c41a --- /dev/null +++ b/trader/backtest/walkforward.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pandas as pd +from sklearn.metrics import log_loss, roc_auc_score + +from trader.features.compute import make_dataset +from trader.model.splitter import WalkForwardSplitter +from trader.model.train import build_model + + +def run_walkforward(prices: dict[str, pd.DataFrame], cfg: dict[str, Any]) -> pd.DataFrame: + rows: list[dict[str, Any]] = [] + for asset in cfg["assets"]: + X, y = make_dataset(asset, prices, cfg["thresholds"][asset]) + splitter = WalkForwardSplitter(**cfg["walkforward"]) + for i, (tr, te) in enumerate(splitter.split(X.index)): + y_tr = y.loc[tr] + if y_tr.nunique() < 2: + continue + scale_pos_weight = float((y_tr == 0).sum() / max((y_tr == 1).sum(), 1)) + model = build_model({"scale_pos_weight": scale_pos_weight}) + model.fit(X.loc[tr], y_tr) + p = model.predict_proba(X.loc[te])[:, 1] + pred = (p >= cfg["p_long"][asset]).astype(int) + y_te = y.loc[te].values + acc = float((pred == y_te).mean()) + auc = float(roc_auc_score(y_te, p)) if len(np.unique(y_te)) > 1 else 0.5 + ll = float(log_loss(y_te, np.clip(p, 1e-6, 1 - 1e-6))) + rows.append({"asset": asset, "split": i, "accuracy": acc, "auc": auc, "logloss": ll, "n_test": len(te)}) + return pd.DataFrame(rows) diff --git a/trader/cli.py b/trader/cli.py new file mode 100644 index 0000000..4b6eaaf --- /dev/null +++ b/trader/cli.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +import argparse +import os +from pathlib import Path + +import pandas as pd + +from trader.backtest.walkforward import run_walkforward +from trader.config import load_config +from trader.data.adapters import CryptoAdapter, EquityAdapter +from trader.execution.live_placeholders import LiveBrokerPlaceholder +from trader.execution.paper import PaperBroker +from trader.features.compute import compute_features, make_dataset +from trader.logging import JsonlLogger +from trader.model.artifacts import ArtifactStore +from trader.model.predict import predict_probability +from trader.model.train import train_asset +from trader.risk.caps import apply_crypto_cap +from trader.risk.regime import regime_status +from trader.risk.sizing import target_weight + + +def load_prices(cfg: dict) -> dict[str, pd.DataFrame]: + c, e = CryptoAdapter(), EquityAdapter() + out: dict[str, pd.DataFrame] = {} + for asset in cfg["assets"]: + out[asset] = c.fetch(asset) if asset in {"BTC-USD", "ETH-USD"} else e.fetch(asset) + return out + + +def ensure_models(cfg: dict, prices: dict[str, pd.DataFrame], store: ArtifactStore) -> None: + for asset in cfg["assets"]: + if not Path(f"models/{asset}_model.pkl").exists(): + train_asset(asset, prices, cfg, store) + + +def cmd_train(args: argparse.Namespace) -> None: + cfg = load_config(args.config).raw + prices = load_prices(cfg) + store = ArtifactStore() + for asset in cfg["assets"]: + metrics = train_asset(asset, prices, cfg, store) + print(asset, metrics) + + +def cmd_run(args: argparse.Namespace) -> None: + cfg = load_config(args.config).raw + if os.getenv("KILL_SWITCH", "false").lower() == "true": + print("Kill switch enabled; no trading.") + return + prices = load_prices(cfg) + logger = JsonlLogger() + store = ArtifactStore() + ensure_models(cfg, prices, store) + broker = ( + PaperBroker(fee_bps=cfg["paper_costs"]["fee_bps"], slippage_bps=cfg["paper_costs"]["slippage_bps"]) + if args.mode == "paper" + else LiveBrokerPlaceholder() + ) + weights: dict[str, float] = {} + probs: dict[str, float] = {} + regimes: dict[str, dict[str, float | bool]] = {} + p_longs: dict[str, float] = {} + latest_prices = {a: float(prices[a]["close"].iloc[-1]) for a in cfg["assets"]} + + snap = broker.snapshot() + if bool(snap.get("disable_until_reset", False)): + print("Trading disabled by drawdown lock.") + return + + for asset in cfg["assets"]: + model, meta = store.load(asset) + feats = compute_features(prices[asset]) + full_x, _ = make_dataset(asset, prices, cfg["thresholds"][asset]) + prob = predict_probability(model, full_x, meta["features"]) + probs[asset] = prob + p_long = float(meta.get("p_long", cfg["p_long"][asset])) + p_longs[asset] = p_long + regime = regime_status(asset, feats) + regimes[asset] = regime + sigma20 = float(feats["r1"].rolling(20).std().iloc[-1]) + tw = 0.0 if regime["bad"] else target_weight(prob, p_long, sigma20, cfg["vol_target"][asset], cfg["caps"]["per_asset"][asset]) + weights[asset] = tw + print( + f"{asset}: prob={prob:.4f}, p_long={p_long:.2f}, " + f"regime_bad={regime['bad']}, sigma20={sigma20:.4f}, target={tw:.4f}" + ) + + weights = apply_crypto_cap(weights, cfg["caps"]["total_crypto"]) + + portfolio_value = float(snap.get("cash", 0.0)) + sum( + float(q) * latest_prices.get(sym, 0.0) for sym, q in snap.get("positions", {}).items() + ) + if snap.get("equity_curve"): + peak = max(v["equity"] for v in snap["equity_curve"]) + dd = 1 - portfolio_value / peak if peak else 0.0 + if dd > 0.20: + snap["disable_until_reset"] = True + broker.store.save(snap) + print("Drawdown kill-switch triggered") + return + + for asset, tw in weights.items(): + fill = broker.rebalance(asset, tw, latest_prices[asset]) + logger.log( + { + "asset": asset, + "prob": probs[asset], + "p_long": p_longs[asset], + "regime_bad": regimes[asset]["bad"], + "regime": regimes[asset], + "target_weight": tw, + "order": fill, + "timestamp": pd.Timestamp.utcnow().isoformat(), + } + ) + state = broker.snapshot() + eq = state.get("cash", 0.0) + sum(q * latest_prices.get(sym, 0.0) for sym, q in state.get("positions", {}).items()) + state.setdefault("equity_curve", []).append({"ts": pd.Timestamp.utcnow().isoformat(), "equity": eq}) + broker.store.save(state) + print("Run complete", weights) + + +def cmd_backtest(args: argparse.Namespace) -> None: + cfg = load_config(args.config).raw + prices = load_prices(cfg) + results = run_walkforward(prices, cfg) + results.to_csv("backtest_results.csv", index=False) + print(results.groupby("asset")[["accuracy", "auc", "logloss"]].mean()) + + +def main() -> None: + parser = argparse.ArgumentParser(prog="trader") + sub = parser.add_subparsers(dest="cmd", required=True) + for name in ["train", "run", "backtest"]: + p = sub.add_parser(name) + p.add_argument("--config", default="configs/config.yaml") + if name == "run": + p.add_argument("--mode", default="paper", choices=["paper", "live"]) + args = parser.parse_args() + if args.cmd == "train": + cmd_train(args) + elif args.cmd == "run": + cmd_run(args) + else: + cmd_backtest(args) diff --git a/trader/config.py b/trader/config.py new file mode 100644 index 0000000..97dd7cc --- /dev/null +++ b/trader/config.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +import yaml +from dotenv import load_dotenv + + +@dataclass(slots=True) +class AppConfig: + raw: dict[str, Any] + + @property + def assets(self) -> list[str]: + return list(self.raw["assets"]) + + +def load_config(config_path: str | Path) -> AppConfig: + load_dotenv() + path = Path(config_path) + with path.open("r", encoding="utf-8") as fh: + data: dict[str, Any] = yaml.safe_load(fh) + return AppConfig(raw=data) diff --git a/trader/data/__init__.py b/trader/data/__init__.py new file mode 100644 index 0000000..89f41f9 --- /dev/null +++ b/trader/data/__init__.py @@ -0,0 +1 @@ +"""Data adapters and storage.""" diff --git a/trader/data/adapters.py b/trader/data/adapters.py new file mode 100644 index 0000000..0fbe7fd --- /dev/null +++ b/trader/data/adapters.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +import io +import os +from typing import Any + +import pandas as pd +import requests + +from trader.data.synthetic import generate_synthetic_ohlcv + + +def _to_coinbase_product(symbol: str) -> str: + if "-" in symbol: + return symbol + if symbol.endswith("USD") and len(symbol) > 3: + return f"{symbol[:-3]}-USD" + return symbol + + +class CryptoAdapter: + def fetch(self, symbol: str, limit: int = 1200) -> pd.DataFrame: + product = _to_coinbase_product(symbol) + url = f"https://api.exchange.coinbase.com/products/{product}/candles" + params = {"granularity": 86400, "limit": min(300, limit)} + try: + resp = requests.get(url, params=params, timeout=10) + resp.raise_for_status() + rows: list[list[Any]] = resp.json() + if not rows: + raise ValueError("empty payload") + df = pd.DataFrame(rows, columns=["time", "low", "high", "open", "close", "volume"]) + df["time"] = pd.to_datetime(df["time"], unit="s", utc=True).tz_convert(None) + df = df.set_index("time").sort_index() + return df[["open", "high", "low", "close", "volume"]].astype(float).tail(limit) + except Exception: + return generate_synthetic_ohlcv(symbol, periods=limit) + + +class EquityAdapter: + def fetch(self, symbol: str, limit: int = 1200) -> pd.DataFrame: + key = os.getenv("ALPACA_API_KEY") + secret = os.getenv("ALPACA_API_SECRET") + if key and secret: + try: + url = f"https://data.alpaca.markets/v2/stocks/{symbol}/bars" + params = {"timeframe": "1Day", "limit": limit} + headers = {"APCA-API-KEY-ID": key, "APCA-API-SECRET-KEY": secret} + resp = requests.get(url, params=params, headers=headers, timeout=10) + resp.raise_for_status() + bars = resp.json().get("bars", []) + if bars: + df = pd.DataFrame(bars) + df["t"] = pd.to_datetime(df["t"], utc=True).dt.tz_convert(None) + df = df.set_index("t").rename(columns={"o": "open", "h": "high", "l": "low", "c": "close", "v": "volume"}) + return df[["open", "high", "low", "close", "volume"]].astype(float).tail(limit) + except Exception: + pass + + try: + stooq_url = f"https://stooq.com/q/d/l/?s={symbol.lower()}.us&i=d" + resp = requests.get(stooq_url, timeout=10) + resp.raise_for_status() + df = pd.read_csv(io.StringIO(resp.text)) + if {"Date", "Open", "High", "Low", "Close"}.issubset(df.columns): + df = df.rename( + columns={ + "Date": "time", + "Open": "open", + "High": "high", + "Low": "low", + "Close": "close", + "Volume": "volume", + } + ) + df["time"] = pd.to_datetime(df["time"]) + if "volume" not in df.columns: + df["volume"] = 0.0 + df = df.set_index("time").sort_index() + return df[["open", "high", "low", "close", "volume"]].astype(float).tail(limit) + except Exception: + pass + + return generate_synthetic_ohlcv(symbol, periods=limit) diff --git a/trader/data/storage.py b/trader/data/storage.py new file mode 100644 index 0000000..a5231f9 --- /dev/null +++ b/trader/data/storage.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +class JsonStateStore: + def __init__(self, path: str = "logs/paper_state.json") -> None: + self.path = Path(path) + self.path.parent.mkdir(parents=True, exist_ok=True) + + def load(self) -> dict[str, Any]: + if not self.path.exists(): + return {"cash": 100000.0, "positions": {}, "equity_curve": []} + return json.loads(self.path.read_text(encoding="utf-8")) + + def save(self, state: dict[str, Any]) -> None: + self.path.write_text(json.dumps(state, indent=2), encoding="utf-8") diff --git a/trader/data/synthetic.py b/trader/data/synthetic.py new file mode 100644 index 0000000..10d79cd --- /dev/null +++ b/trader/data/synthetic.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + + +def generate_synthetic_ohlcv(symbol: str, periods: int = 1200, seed: int = 42) -> pd.DataFrame: + rng = np.random.default_rng(abs(hash(symbol)) % (2**16) + seed) + dates = pd.date_range(end=pd.Timestamp.today().normalize(), periods=periods, freq="B") + drift = 0.0003 if symbol == "SPY" else 0.0008 + vol = 0.01 if symbol == "SPY" else 0.025 + rets = rng.normal(drift, vol, size=periods) + close = 100 * np.exp(np.cumsum(rets)) + open_ = close * (1 + rng.normal(0, vol / 3, size=periods)) + high = np.maximum(open_, close) * (1 + np.abs(rng.normal(0.001, 0.005, size=periods))) + low = np.minimum(open_, close) * (1 - np.abs(rng.normal(0.001, 0.005, size=periods))) + volume = rng.integers(1_000, 20_000, size=periods).astype(float) + return pd.DataFrame( + {"open": open_, "high": high, "low": low, "close": close, "volume": volume}, index=dates + ) diff --git a/trader/execution/__init__.py b/trader/execution/__init__.py new file mode 100644 index 0000000..3ab7c04 --- /dev/null +++ b/trader/execution/__init__.py @@ -0,0 +1 @@ +"""Execution brokers.""" diff --git a/trader/execution/broker_base.py b/trader/execution/broker_base.py new file mode 100644 index 0000000..6a05eea --- /dev/null +++ b/trader/execution/broker_base.py @@ -0,0 +1,13 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod + + +class BrokerBase(ABC): + @abstractmethod + def rebalance(self, symbol: str, target_weight: float, price: float) -> dict: + raise NotImplementedError + + @abstractmethod + def snapshot(self) -> dict: + raise NotImplementedError diff --git a/trader/execution/live_placeholders.py b/trader/execution/live_placeholders.py new file mode 100644 index 0000000..c8202d7 --- /dev/null +++ b/trader/execution/live_placeholders.py @@ -0,0 +1,17 @@ +from __future__ import annotations + +import os + +from trader.execution.broker_base import BrokerBase + + +class LiveBrokerPlaceholder(BrokerBase): + def __init__(self) -> None: + if os.getenv("ENABLE_LIVE_TRADING", "false").lower() != "true": + raise RuntimeError("Live trading disabled. Set ENABLE_LIVE_TRADING=true explicitly.") + + def rebalance(self, symbol: str, target_weight: float, price: float) -> dict: + raise NotImplementedError("Integrate real broker APIs before live use.") + + def snapshot(self) -> dict: + return {} diff --git a/trader/execution/paper.py b/trader/execution/paper.py new file mode 100644 index 0000000..d3da57b --- /dev/null +++ b/trader/execution/paper.py @@ -0,0 +1,34 @@ +from __future__ import annotations + +from trader.data.storage import JsonStateStore +from trader.execution.broker_base import BrokerBase + + +class PaperBroker(BrokerBase): + def __init__(self, state_path: str = "logs/paper_state.json", fee_bps: float = 1.0, slippage_bps: float = 2.0) -> None: + self.store = JsonStateStore(state_path) + self.state = self.store.load() + self.fee_bps = fee_bps + self.slippage_bps = slippage_bps + + def equity(self, prices: dict[str, float] | None = None) -> float: + prices = prices or {} + pos_val = sum(q * prices.get(sym, 0.0) for sym, q in self.state["positions"].items()) + return float(self.state["cash"] + pos_val) + + def rebalance(self, symbol: str, target_weight: float, price: float) -> dict: + eq = self.equity({symbol: price}) + target_notional = eq * target_weight + current_qty = float(self.state["positions"].get(symbol, 0.0)) + current_notional = current_qty * price + delta_notional = target_notional - current_notional + fill_price = price * (1 + self.slippage_bps / 10000) + qty = delta_notional / fill_price + fee = abs(delta_notional) * self.fee_bps / 10000 + self.state["cash"] -= qty * fill_price + fee + self.state["positions"][symbol] = current_qty + qty + self.store.save(self.state) + return {"symbol": symbol, "qty": qty, "fill_price": fill_price, "fee": fee} + + def snapshot(self) -> dict: + return self.state diff --git a/trader/features/__init__.py b/trader/features/__init__.py new file mode 100644 index 0000000..bee105c --- /dev/null +++ b/trader/features/__init__.py @@ -0,0 +1 @@ +"""Feature engineering package.""" diff --git a/trader/features/compute.py b/trader/features/compute.py new file mode 100644 index 0000000..11bd675 --- /dev/null +++ b/trader/features/compute.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + +from trader.features.cross_asset import add_cross_features +from trader.features.indicators import atr14, rolling_slope + + +def compute_features(df: pd.DataFrame) -> pd.DataFrame: + out = df.copy() + lr = np.log(out["close"]).diff() + out["r1"] = lr + for n in [3, 5, 10, 20]: + out[f"r{n}"] = np.log(out["close"]).diff(n) + out["sma20_dist"] = out["close"] / out["close"].rolling(20).mean() - 1 + out["sma50_dist"] = out["close"] / out["close"].rolling(50).mean() - 1 + out["slope20"] = rolling_slope(out["close"], 20) + out["slope50"] = rolling_slope(out["close"], 50) + out["vol5"] = lr.rolling(5).std() + out["vol10"] = lr.rolling(10).std() + out["vol20"] = lr.rolling(20).std() + out["vol60"] = lr.rolling(60).std() + out["vol_ratio"] = out["vol5"] / out["vol20"].replace(0, np.nan) + out["atr14"] = atr14(out) + out["atr_pct"] = out["atr14"] / out["close"] + out["hl_pct"] = (out["high"] - out["low"]) / out["close"] + out["upper_wick"] = (out["high"] - out[["open", "close"]].max(axis=1)) / out["close"] + out["lower_wick"] = (out[["open", "close"]].min(axis=1) - out["low"]) / out["close"] + out["vol_z20"] = (out["volume"] - out["volume"].rolling(20).mean()) / out["volume"].rolling(20).std() + dollar_vol = out["volume"] * out["close"] + out["dollar_vol_z20"] = (dollar_vol - dollar_vol.rolling(20).mean()) / dollar_vol.rolling(20).std() + out["z_close_20"] = (out["close"] - out["close"].rolling(20).mean()) / out["close"].rolling(20).std() + out["z_return_20"] = (lr - lr.rolling(20).mean()) / lr.rolling(20).std() + out["distance_from_high_20"] = out["close"] / out["high"].rolling(20).max() - 1 + out["distance_from_low_20"] = out["close"] / out["low"].rolling(20).min() - 1 + return out + + +def make_dataset(asset: str, prices: dict[str, pd.DataFrame], threshold: float, include_crypto_for_spy: bool = False) -> tuple[pd.DataFrame, pd.Series]: + feats = compute_features(prices[asset]) + feats = add_cross_features(feats, asset, prices, include_crypto_for_spy=include_crypto_for_spy) + fut_ret = prices[asset]["close"].shift(-1) / prices[asset]["close"] - 1 + y = (fut_ret > threshold).astype(int) + aligned = feats.join(y.rename("y"), how="inner").dropna() + return aligned.drop(columns=["y"]), aligned["y"] diff --git a/trader/features/cross_asset.py b/trader/features/cross_asset.py new file mode 100644 index 0000000..d821760 --- /dev/null +++ b/trader/features/cross_asset.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + + +def add_cross_features(base: pd.DataFrame, asset: str, prices: dict[str, pd.DataFrame], include_spy_for_crypto: bool = True, include_crypto_for_spy: bool = False) -> pd.DataFrame: + out = base.copy() + if asset in {"BTC-USD", "ETH-USD"}: + other = "ETH-USD" if asset == "BTC-USD" else "BTC-USD" + other_r = np.log(prices[other]["close"]).diff() + out[f"{other}_r1"] = other_r + out[f"{other}_r5"] = np.log(prices[other]["close"]).diff(5) + out[f"{other}_vol10"] = other_r.rolling(10).std() + self_r = np.log(prices[asset]["close"]).diff() + out["corr_20_crypto"] = self_r.rolling(20).corr(other_r) + spread = np.log(prices["BTC-USD"]["close"]) - np.log(prices["ETH-USD"]["close"]) + out["z_spread_60"] = (spread - spread.rolling(60).mean()) / spread.rolling(60).std() + if include_spy_for_crypto and "SPY" in prices: + spy_r = np.log(prices["SPY"]["close"]).diff() + out["SPY_r1"] = spy_r + out["SPY_r5"] = np.log(prices["SPY"]["close"]).diff(5) + out["corr_60_spy"] = self_r.rolling(60).corr(spy_r) + elif asset == "SPY" and include_crypto_for_spy and "BTC-USD" in prices: + btc_r = np.log(prices["BTC-USD"]["close"]).diff() + out["BTC_r1"] = btc_r + return out diff --git a/trader/features/indicators.py b/trader/features/indicators.py new file mode 100644 index 0000000..e9e6c6f --- /dev/null +++ b/trader/features/indicators.py @@ -0,0 +1,33 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + + +def rolling_slope(series: pd.Series, window: int) -> pd.Series: + x = np.arange(window) + + def _slope(vals: np.ndarray) -> float: + if np.any(~np.isfinite(vals)): + return np.nan + y = vals + x_mean = x.mean() + y_mean = y.mean() + num = ((x - x_mean) * (y - y_mean)).sum() + den = ((x - x_mean) ** 2).sum() + return float(num / den) if den else 0.0 + + return np.log(series).rolling(window).apply(_slope, raw=True) + + +def atr14(df: pd.DataFrame) -> pd.Series: + prev_close = df["close"].shift(1) + tr = pd.concat( + [ + df["high"] - df["low"], + (df["high"] - prev_close).abs(), + (df["low"] - prev_close).abs(), + ], + axis=1, + ).max(axis=1) + return tr.rolling(14).mean() diff --git a/trader/logging.py b/trader/logging.py new file mode 100644 index 0000000..41ff156 --- /dev/null +++ b/trader/logging.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +import pandas as pd + +from trader.utils import ensure_dir + + +class JsonlLogger: + def __init__(self, logs_dir: str = "logs") -> None: + ensure_dir(logs_dir) + stamp = pd.Timestamp.now().strftime("%Y%m%d") + self.path = Path(logs_dir) / f"run_{stamp}.jsonl" + + def log(self, event: dict[str, Any]) -> None: + with self.path.open("a", encoding="utf-8") as fh: + fh.write(json.dumps(event, default=str) + "\n") diff --git a/trader/model/__init__.py b/trader/model/__init__.py new file mode 100644 index 0000000..f4a78d1 --- /dev/null +++ b/trader/model/__init__.py @@ -0,0 +1 @@ +"""Model training and prediction.""" diff --git a/trader/model/artifacts.py b/trader/model/artifacts.py new file mode 100644 index 0000000..8d4bf08 --- /dev/null +++ b/trader/model/artifacts.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import json +import pickle +from pathlib import Path +from typing import Any + + +class ArtifactStore: + def __init__(self, model_dir: str = "models") -> None: + self.model_dir = Path(model_dir) + self.model_dir.mkdir(parents=True, exist_ok=True) + + def save(self, asset: str, model: Any, meta: dict[str, Any]) -> None: + with (self.model_dir / f"{asset}_model.pkl").open("wb") as fh: + pickle.dump(model, fh) + (self.model_dir / f"{asset}_meta.json").write_text(json.dumps(meta, indent=2), encoding="utf-8") + + def load(self, asset: str) -> tuple[Any, dict[str, Any]]: + with (self.model_dir / f"{asset}_model.pkl").open("rb") as fh: + model = pickle.load(fh) + meta = json.loads((self.model_dir / f"{asset}_meta.json").read_text(encoding="utf-8")) + return model, meta diff --git a/trader/model/predict.py b/trader/model/predict.py new file mode 100644 index 0000000..cdf4a79 --- /dev/null +++ b/trader/model/predict.py @@ -0,0 +1,10 @@ +from __future__ import annotations + +from typing import Any + +import pandas as pd + + +def predict_probability(model: Any, latest: pd.DataFrame, feature_order: list[str]) -> float: + row = latest[feature_order].tail(1) + return float(model.predict_proba(row)[:, 1][0]) diff --git a/trader/model/splitter.py b/trader/model/splitter.py new file mode 100644 index 0000000..a46c909 --- /dev/null +++ b/trader/model/splitter.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import pandas as pd + + +@dataclass(slots=True) +class WalkForwardSplitter: + train_days: int = 756 + test_days: int = 63 + step_days: int = 63 + embargo_days: int = 1 + + def split(self, index: pd.Index) -> list[tuple[pd.Index, pd.Index]]: + dates = pd.Index(index).sort_values() + out: list[tuple[pd.Index, pd.Index]] = [] + start = 0 + while True: + train_end = start + self.train_days - 1 + test_start = train_end + self.embargo_days + 1 + test_end = test_start + self.test_days - 1 + if test_end >= len(dates): + break + tr_idx = dates[start : train_end + 1] + te_idx = dates[test_start : test_end + 1] + out.append((tr_idx, te_idx)) + start += self.step_days + return out diff --git a/trader/model/train.py b/trader/model/train.py new file mode 100644 index 0000000..53b061c --- /dev/null +++ b/trader/model/train.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pandas as pd +from sklearn.metrics import log_loss, roc_auc_score + +from trader.features.compute import make_dataset +from trader.model.artifacts import ArtifactStore +from trader.model.splitter import WalkForwardSplitter + +try: + from xgboost import XGBClassifier + + def build_model(params: dict[str, Any] | None = None) -> Any: + base: dict[str, Any] = { + "n_estimators": 180, + "max_depth": 4, + "learning_rate": 0.03, + "subsample": 0.9, + "colsample_bytree": 0.9, + "random_state": 42, + "eval_metric": "logloss", + "n_jobs": 1, + } + if params: + base.update(params) + return XGBClassifier(**base) + + + def parameter_candidates() -> list[dict[str, Any]]: + return [ + {"n_estimators": 120, "max_depth": 3, "learning_rate": 0.05, "subsample": 0.9, "colsample_bytree": 0.9}, + {"n_estimators": 180, "max_depth": 4, "learning_rate": 0.03, "subsample": 0.85, "colsample_bytree": 0.85}, + {"n_estimators": 240, "max_depth": 3, "learning_rate": 0.02, "subsample": 0.9, "colsample_bytree": 0.8}, + {"n_estimators": 220, "max_depth": 5, "learning_rate": 0.03, "subsample": 0.8, "colsample_bytree": 0.8}, + ] +except Exception: + from sklearn.ensemble import GradientBoostingClassifier + + def build_model(params: dict[str, Any] | None = None) -> Any: + base: dict[str, Any] = {"random_state": 42, "n_estimators": 150, "learning_rate": 0.05, "max_depth": 3} + if params: + base.update(params) + return GradientBoostingClassifier(**base) + + def parameter_candidates() -> list[dict[str, Any]]: + return [ + {"n_estimators": 120, "learning_rate": 0.08, "max_depth": 2}, + {"n_estimators": 180, "learning_rate": 0.05, "max_depth": 3}, + {"n_estimators": 240, "learning_rate": 0.03, "max_depth": 3}, + ] + + +def _evaluate_model( + X: pd.DataFrame, + y: pd.Series, + splitter: WalkForwardSplitter, + params: dict[str, Any] | None = None, +) -> tuple[float, float]: + aucs: list[float] = [] + losses: list[float] = [] + for tr_idx, te_idx in splitter.split(X.index): + y_tr = y.loc[tr_idx] + if y_tr.nunique() < 2: + continue + scale_pos_weight = float((y_tr == 0).sum() / max((y_tr == 1).sum(), 1)) + model_params = dict(params or {}) + if "scale_pos_weight" not in model_params: + model_params["scale_pos_weight"] = scale_pos_weight + model = build_model(model_params) + model.fit(X.loc[tr_idx], y_tr) + proba = model.predict_proba(X.loc[te_idx])[:, 1] + y_te = y.loc[te_idx] + if y_te.nunique() > 1: + aucs.append(float(roc_auc_score(y_te, proba))) + losses.append(float(log_loss(y_te, np.clip(proba, 1e-6, 1 - 1e-6)))) + mean_auc = float(np.mean(aucs)) if aucs else 0.5 + mean_logloss = float(np.mean(losses)) if losses else 0.693 + return mean_auc, mean_logloss + + +def _find_best_params(X: pd.DataFrame, y: pd.Series, splitter: WalkForwardSplitter) -> tuple[dict[str, Any], float, float]: + best_params: dict[str, Any] = {} + best_auc = -1.0 + best_logloss = float("inf") + for params in parameter_candidates(): + auc, ll = _evaluate_model(X, y, splitter, params) + if auc > best_auc or (np.isclose(auc, best_auc) and ll < best_logloss): + best_auc = auc + best_logloss = ll + best_params = params + return best_params, best_auc, best_logloss + + +def _optimize_cutoff( + X: pd.DataFrame, + y: pd.Series, + splitter: WalkForwardSplitter, + params: dict[str, Any], + default_cutoff: float, +) -> float: + thresholds = np.arange(0.50, 0.71, 0.01) + scores = {float(t): [] for t in thresholds} + + for tr_idx, te_idx in splitter.split(X.index): + y_tr = y.loc[tr_idx] + if y_tr.nunique() < 2: + continue + scale_pos_weight = float((y_tr == 0).sum() / max((y_tr == 1).sum(), 1)) + model_params = dict(params) + model_params["scale_pos_weight"] = scale_pos_weight + model = build_model(model_params) + model.fit(X.loc[tr_idx], y_tr) + proba = model.predict_proba(X.loc[te_idx])[:, 1] + y_te = y.loc[te_idx].to_numpy() + for th in thresholds: + pred = (proba >= th).astype(int) + tp = int(((pred == 1) & (y_te == 1)).sum()) + tn = int(((pred == 0) & (y_te == 0)).sum()) + fp = int(((pred == 1) & (y_te == 0)).sum()) + fn = int(((pred == 0) & (y_te == 1)).sum()) + tpr = tp / max(tp + fn, 1) + tnr = tn / max(tn + fp, 1) + scores[float(th)].append(0.5 * (tpr + tnr)) + + best_t = default_cutoff + best_score = -1.0 + for th, vals in scores.items(): + if not vals: + continue + s = float(np.mean(vals)) + if s > best_score: + best_score = s + best_t = th + return float(best_t) + + +def train_asset(asset: str, prices: dict[str, pd.DataFrame], cfg: dict[str, Any], store: ArtifactStore) -> dict[str, float]: + threshold = float(cfg["thresholds"][asset]) + default_p_long = float(cfg["p_long"][asset]) + X, y = make_dataset(asset, prices, threshold, include_crypto_for_spy=bool(cfg.get("include_crypto_for_spy", False))) + splitter = WalkForwardSplitter(**cfg["walkforward"]) + + best_params, best_auc, best_logloss = _find_best_params(X, y, splitter) + calibrated_p_long = _optimize_cutoff(X, y, splitter, best_params, default_p_long) + + scale_pos_weight = float((y == 0).sum() / max((y == 1).sum(), 1)) + final_params = dict(best_params) + final_params["scale_pos_weight"] = scale_pos_weight + final_model = build_model(final_params) + final_model.fit(X, y) + + meta = { + "features": list(X.columns), + "threshold": threshold, + "p_long": calibrated_p_long, + "p_long_default": default_p_long, + "vol_target": cfg["vol_target"][asset], + "per_asset_cap": cfg["caps"]["per_asset"][asset], + "best_params": best_params, + "cv_auc": best_auc, + "cv_logloss": best_logloss, + "trained_rows": int(len(X)), + "timestamp": pd.Timestamp.utcnow().isoformat(), + } + store.save(asset, final_model, meta) + return {"mean_auc": best_auc, "mean_logloss": best_logloss, "calibrated_p_long": calibrated_p_long} diff --git a/trader/risk/__init__.py b/trader/risk/__init__.py new file mode 100644 index 0000000..c60c363 --- /dev/null +++ b/trader/risk/__init__.py @@ -0,0 +1 @@ +"""Risk controls.""" diff --git a/trader/risk/caps.py b/trader/risk/caps.py new file mode 100644 index 0000000..72c7583 --- /dev/null +++ b/trader/risk/caps.py @@ -0,0 +1,13 @@ +from __future__ import annotations + + +def apply_crypto_cap(weights: dict[str, float], total_cap: float = 0.4) -> dict[str, float]: + crypto = [a for a in ["BTC-USD", "ETH-USD"] if a in weights] + s = sum(weights[a] for a in crypto) + if s <= total_cap or s <= 0: + return weights + scale = total_cap / s + out = weights.copy() + for a in crypto: + out[a] *= scale + return out diff --git a/trader/risk/regime.py b/trader/risk/regime.py new file mode 100644 index 0000000..0bfa9ed --- /dev/null +++ b/trader/risk/regime.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + + +def regime_status(asset: str, feats: pd.DataFrame) -> dict[str, float | bool]: + """Return per-rule regime checks for the latest bar.""" + vol20 = feats["vol20"] + latest_vol20 = float(vol20.iloc[-1]) + p95 = float(vol20.rolling(252).quantile(0.95).iloc[-1]) + vol60 = float(feats["vol60"].iloc[-1]) + vol_ratio = float(feats["vol5"].iloc[-1] / max(vol60, 1e-9)) + close = feats["close"] + drawdown = float(1 - close.iloc[-1] / close.rolling(252).max().iloc[-1]) + dd_limit = 0.18 if asset == "SPY" else 0.25 + + vol_spike = bool(np.isfinite(p95) and latest_vol20 > p95) + vol_ratio_spike = bool(vol_ratio > 2.2) + drawdown_breach = bool(drawdown > dd_limit) + bad = bool(vol_spike or vol_ratio_spike or drawdown_breach) + return { + "bad": bad, + "vol20": latest_vol20, + "vol20_p95": p95, + "vol_spike": vol_spike, + "vol_ratio": vol_ratio, + "vol_ratio_spike": vol_ratio_spike, + "drawdown": drawdown, + "drawdown_limit": dd_limit, + "drawdown_breach": drawdown_breach, + } + + +def regime_is_bad(asset: str, feats: pd.DataFrame) -> bool: + return bool(regime_status(asset, feats)["bad"]) diff --git a/trader/risk/sizing.py b/trader/risk/sizing.py new file mode 100644 index 0000000..f1634ba --- /dev/null +++ b/trader/risk/sizing.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import numpy as np + + +def confidence_fraction(prob: float, p_long: float) -> float: + if prob < p_long: + return 0.0 + if prob < p_long + 0.04: + return 0.25 + if prob < p_long + 0.08: + return 0.50 + if prob < p_long + 0.12: + return 0.75 + return 1.0 + + +def target_weight(prob: float, p_long: float, sigma20: float, vol_target: float, cap: float) -> float: + raw = vol_target / max(sigma20, 1e-8) + return float(np.clip(raw, 0.0, cap) * confidence_fraction(prob, p_long)) diff --git a/trader/utils.py b/trader/utils.py new file mode 100644 index 0000000..8fb67b7 --- /dev/null +++ b/trader/utils.py @@ -0,0 +1,23 @@ +from __future__ import annotations + +import hashlib +import json +from pathlib import Path +from typing import Any + +import pandas as pd + + +def ensure_dir(path: str | Path) -> Path: + out = Path(path) + out.mkdir(parents=True, exist_ok=True) + return out + + +def now_utc_ts() -> pd.Timestamp: + return pd.Timestamp.utcnow().tz_localize("UTC") if pd.Timestamp.utcnow().tzinfo is None else pd.Timestamp.utcnow() + + +def stable_hash(payload: dict[str, Any]) -> str: + blob = json.dumps(payload, sort_keys=True, default=str).encode("utf-8") + return hashlib.sha256(blob).hexdigest()[:16]