Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions backend/scripts/ingest/backfill_ecos_cycle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""ecos_timeseries.cycle 100% NULL 채움.

audit-null-orphan-2026-05-04 발견 — ECOS API ETL 이 ``cycle`` 컬럼 미적재.
3 stat_code (121Y006/722Y001/901Y009) × ECOS StatisticItemList API 호출 후
``(stat_code, item_code1)`` 매핑으로 cycle UPDATE.

ECOS API rate limit / pagination:
- 페이지당 1000 item (901Y009 는 페이지 2 추가 호출 필요)
- 기준: 2026-05-05 — 901Y009 = 1,743 items (페이지 1+2 합산), 121Y006 = 57, 722Y001 = 48

결과: 0% → 100% (2,783/2,783)

사용법:
cd backend && python scripts/ingest/backfill_ecos_cycle.py
"""

from __future__ import annotations

import sys
from pathlib import Path

import httpx
import sqlalchemy as sa

sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from src.config.settings import settings # noqa: E402

PAGE = 1000


def _fetch_meta(stat_code: str, key: str) -> dict[str, str]:
"""stat_code 의 모든 item 메타 (item_code → cycle) 가져오기. 페이지네이션 포함."""
out: dict[str, str] = {}
start = 1
while True:
url = f"http://ecos.bok.or.kr/api/StatisticItemList/{key}/json/kr/{start}/{start + PAGE - 1}/{stat_code}"
block = httpx.get(url, timeout=30).json().get("StatisticItemList")
if not block or not block.get("row"):
break
rows = block["row"]
for row in rows:
ic = row.get("ITEM_CODE")
cy = row.get("CYCLE")
if ic and cy:
out[ic] = cy
if len(rows) < PAGE:
break
start += PAGE
return out


def main() -> None:
key = settings.ecos_api_key
if not key:
raise RuntimeError("ECOS_API_KEY missing in settings")

engine = sa.create_engine(settings.postgres_url)

# DB 의 stat_code distinct
with engine.connect() as conn:
stat_codes = [r[0] for r in conn.execute(sa.text("SELECT DISTINCT stat_code FROM ecos_timeseries")).fetchall()]
print(f"ecos_timeseries stat_codes: {stat_codes}")

total_updates = 0
for sc in stat_codes:
meta = _fetch_meta(sc, key)
print(f" {sc}: meta {len(meta)} items")
with engine.begin() as conn:
updated = 0
for ic, cy in meta.items():
result = conn.execute(
sa.text(
"UPDATE ecos_timeseries SET cycle=:cy WHERE stat_code=:sc AND item_code1=:ic AND cycle IS NULL"
),
{"cy": cy, "sc": sc, "ic": ic},
)
updated += result.rowcount
total_updates += updated
print(f" updated: {updated} rows")

with engine.connect() as conn:
n = conn.execute(sa.text("SELECT COUNT(*) FROM ecos_timeseries")).scalar()
n_cy = conn.execute(sa.text("SELECT COUNT(*) FROM ecos_timeseries WHERE cycle IS NOT NULL")).scalar()
pct = (n_cy / n * 100) if n else 0
print()
print(f"=== AFTER ===\n cycle non-NULL: {n_cy}/{n} ({pct:.1f}%) — total update {total_updates}")


if __name__ == "__main__":
main()
95 changes: 95 additions & 0 deletions backend/scripts/ingest/fill_ttareungi_dong_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""master_ttareungi_station.dong_code 채우기 (마포 한정).

PR #184 가 ttareungi station 의 lat/lon + sigungu_code 채움 (3,230 row API 매핑).
본 스크립트는 그 결과 위에서 마포(sigungu_code='11440') station 의 dong_code 를
``dong_centroid`` 16 동과의 haversine 거리 비교로 매핑.

- 마포 station: 가장 가까운 dong_centroid → dong_code 적용
- 마포 외 station: 서울 전체 dong_centroid 부재 (E4 한계) — skip
- ``opened_at`` 컬럼: 따릉이 API 응답에 없음 — skip

사용법:
cd backend && python scripts/ingest/fill_ttareungi_dong_code.py

idempotent — dong_code 이미 채워진 row 는 skip.
"""

from __future__ import annotations

import math
import sys
from pathlib import Path

import sqlalchemy as sa

sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from src.config.settings import settings # noqa: E402


def _haversine_m(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
"""두 좌표 간 거리 (미터)."""
radius = 6371000 # Earth radius m
phi1, phi2 = math.radians(lat1), math.radians(lat2)
dphi = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = math.sin(dphi / 2) ** 2 + math.cos(phi1) * math.cos(phi2) * math.sin(dlon / 2) ** 2
return radius * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))


def main() -> None:
engine = sa.create_engine(settings.postgres_url)

with engine.connect() as conn:
centroids = conn.execute(
sa.text("SELECT dong_code, lat, lon FROM dong_centroid WHERE lat IS NOT NULL")
).fetchall()
cents = [(r._mapping["dong_code"], r._mapping["lat"], r._mapping["lon"]) for r in centroids]
print(f"dong_centroid: {len(cents)} 동")

rows = conn.execute(
sa.text(
"SELECT station_id, lat, lon FROM master_ttareungi_station "
"WHERE sigungu_code='11440' AND dong_code IS NULL AND lat IS NOT NULL"
)
).fetchall()
print(f"마포 ttareungi (dong_code NULL): {len(rows)}")

matched: list[tuple[str, str]] = []
for r in rows:
sid = r._mapping["station_id"]
lat = r._mapping["lat"]
lon = r._mapping["lon"]
best = min(cents, key=lambda c: _haversine_m(lat, lon, c[1], c[2]))
matched.append((sid, best[0]))

print(f"matched: {len(matched)}")

with engine.begin() as conn:
for sid, dc in matched:
conn.execute(
sa.text("UPDATE master_ttareungi_station SET dong_code=:dc WHERE station_id=:sid"),
{"dc": dc, "sid": sid},
)

with engine.connect() as conn:
total = conn.execute(sa.text("SELECT COUNT(*) FROM master_ttareungi_station")).scalar()
null_dc = conn.execute(
sa.text("SELECT COUNT(*) FROM master_ttareungi_station WHERE dong_code IS NULL")
).scalar()
mapo_filled = conn.execute(
sa.text(
"SELECT COUNT(*) FROM master_ttareungi_station WHERE sigungu_code='11440' AND dong_code IS NOT NULL"
)
).scalar()

print()
print("=== AFTER ===")
print(f" total {total}, dong_code NULL {null_dc} ({null_dc / total * 100:.1f}%)")
print(f" 마포 dong_code 채워짐: {mapo_filled}")
print()
print("미적재 사유 (마포 외 5,298 row):")
print(" 서울 전체 dong_centroid 부재 (E4 한계). DongCentroid 서울 확장 별 PR 필요.")


if __name__ == "__main__":
main()
2 changes: 2 additions & 0 deletions backend/src/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class Settings(BaseSettings):
sgis_secret_key: str = os.getenv("SGIS_SECRET_KEY", "")
molit_api_key: str = os.getenv("MOLIT_API_KEY", "")
ftc_api_key: str = os.getenv("FTC_API_KEY", "")
kakao_api_key: str = os.getenv("KAKAO_API_KEY", "")
ecos_api_key: str = os.getenv("ECOS_API_KEY", "")
law_oc: str = os.getenv("LAW_OC", "")

# Naver DataLab API
Expand Down
60 changes: 60 additions & 0 deletions docs/retrospective/2026-05-05.md
Original file line number Diff line number Diff line change
Expand Up @@ -549,3 +549,63 @@
```

---

## 12:31:32 세션 완료

### 변경 파일
- backend/src/agents/legal/categories.py
- docs/retrospective/2026-05-05.md

### diff 요약
```
backend/src/agents/legal/categories.py | 18 +++++++++---------
docs/retrospective/2026-05-05.md | 9 +++++++++
2 files changed, 18 insertions(+), 9 deletions(-)
```

---

## 12:36:12 세션 완료

### 변경 파일
- backend/src/agents/legal/categories.py
- docs/retrospective/2026-05-05.md

### diff 요약
```
backend/src/agents/legal/categories.py | 18 +++++++++---------
docs/retrospective/2026-05-05.md | 24 ++++++++++++++++++++++++
2 files changed, 33 insertions(+), 9 deletions(-)
```

---

## 12:40:27 세션 완료

### 변경 파일
- backend/src/agents/legal/categories.py
- docs/retrospective/2026-05-05.md

### diff 요약
```
backend/src/agents/legal/categories.py | 18 ++++++++--------
docs/retrospective/2026-05-05.md | 39 ++++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+), 9 deletions(-)
```

---

## 12:41:12 세션 완료

### 변경 파일
- backend/src/agents/legal/categories.py
- docs/retrospective/2026-05-05.md

### diff 요약
```
backend/src/agents/legal/categories.py | 18 ++++++------
docs/retrospective/2026-05-05.md | 54 ++++++++++++++++++++++++++++++++++
2 files changed, 63 insertions(+), 9 deletions(-)
```

---
Loading