diff --git a/backend/data/seed/master_subway_station_all.csv b/backend/data/seed/master_subway_station_all.csv index 930e4398..250dd7ce 100644 --- a/backend/data/seed/master_subway_station_all.csv +++ b/backend/data/seed/master_subway_station_all.csv @@ -46,8 +46,8 @@ fdd04d96df,구로디지털단지,2호선,,, 8aae3a90fb,문래,2호선,,, e0edb339bb,영등포구청,2호선,,, 8b2d06e78c,당산,2호선,,, -8da92f5b3a,합정,2호선,11440,, -ee2743c5bd,홍대입구,2호선,11440,, +8da92f5b3a,합정,2호선,11440,37.5501151,126.9146385 +ee2743c5bd,홍대입구,2호선,11440,37.5568904,126.9236745 17ab6b4d6b,신촌,2호선,,, 7318e44bc0,이대,2호선,,, 6443933cab,아현,2호선,,, @@ -136,9 +136,9 @@ b8ac94d347,양평,5호선,,, 2189142563,신길,5호선,,, 4e42ed8cf7,여의도,5호선,,, 02fac88a69,여의나루,5호선,,, -9ce960fea8,마포,5호선,11440,, -ba5c0f41c7,공덕,5호선,11440,, -151948d424,애오개,5호선,11440,, +9ce960fea8,마포,5호선,11440,37.5395838,126.9459206 +ba5c0f41c7,공덕,5호선,11440,37.5444902,126.9511944 +151948d424,애오개,5호선,11440,37.5533623,126.9566135 6a84a1a047,충정로(경기대입구),5호선,,, 0aa4652117,서대문,5호선,,, ad3ba52ffa,광화문(세종문화회관),5호선,,, @@ -181,15 +181,15 @@ a17b14787b,독바위,6호선,,, b48711e072,구산,6호선,,, 9f1d39eb80,새절(신사),6호선,,, 1c3112d189,증산(명지대앞),6호선,,, -613c87f8ff,디지털미디어시티,6호선,11440,, -be01e5a8aa,월드컵경기장(성산),6호선,,, -6eef83a448,마포구청,6호선,11440,, -3e79429c25,망원,6호선,11440,, -35dbc330ac,합정,6호선,11440,, -ed6616630b,상수,6호선,11440,, -b58415995f,광흥창(서강),6호선,11440,, -a1751d3fec,대흥(서강대앞),6호선,11440,, -4a4333f5b5,공덕,6호선,11440,, +613c87f8ff,디지털미디어시티,6호선,11440,37.5769528,126.9016341 +be01e5a8aa,월드컵경기장(성산),6호선,11440,37.5699425,126.8990336 +6eef83a448,마포구청,6호선,11440,37.5634262,126.9033573 +3e79429c25,망원,6호선,11440,37.5560572,126.9100342 +35dbc330ac,합정,6호선,11440,37.5491330,126.9133441 +ed6616630b,상수,6호선,11440,37.5479816,126.9226555 +b58415995f,광흥창(서강),6호선,11440,37.5475679,126.9324099 +a1751d3fec,대흥(서강대앞),6호선,11440,37.5477578,126.9423558 +4a4333f5b5,공덕,6호선,11440,37.5444902,126.9511944 3d4e876281,효창공원앞,6호선,,, fc04ea70c4,삼각지,6호선,,, 4d772422f1,녹사평(용산구청),6호선,,, diff --git a/backend/data/seed/raw/seoul_subway_stations_with_coords.csv b/backend/data/seed/raw/seoul_subway_stations_with_coords.csv new file mode 100644 index 00000000..fb3adc40 --- /dev/null +++ b/backend/data/seed/raw/seoul_subway_stations_with_coords.csv @@ -0,0 +1,255 @@ +lat,lon,name,no_line +37.580059,127.0477395,청량리역,1 +37.5783167,127.0387987,제기동역,1 +37.5761056,127.0245335,신설동역,1 +37.5736037,127.0171515,동묘앞역,1 +37.571762,127.0112532,동대문역,1 +37.5711368,127.0004054,종로5가역,1 +37.5704364,126.9908467,종로3가역,1 +37.5704393,126.9817831,종각역,1 +37.5665841,126.978236,시청역,1 +37.5578848,126.969503,서울역,1 +37.5639433,126.9753301,시청역,2 +37.5660574,126.982031,을지로입구역,2 +37.5662151,126.9902129,을지로3가역,2 +37.5664369,126.9972767,을지로4가역,2 +37.5680073,127.0105944,동대문역사문화공원역,2 +37.5656221,127.0202747,신당역,2 +37.563974,127.0298211,상왕십리역,2 +37.5598976,127.0362047,왕십리역,2 +37.5555778,127.0435848,한양대역,2 +37.5471882,127.0473655,뚝섬역,2 +37.5446953,127.0569957,성수역,2 +37.5407939,127.0690568,건대입구역,2 +37.5378419,127.0884874,구의역,2 +37.5353734,127.0945598,강변역,2 +37.5218958,127.0991262,잠실나루역,2 +37.5127271,127.0951166,잠실역,2 +37.5114774,127.089344,신천역,2 +37.5111054,127.0743133,종합운동장역,2 +37.5091759,127.0579099,삼성역,2 +37.5064317,127.0538834,선릉역,2 +37.4998063,127.0366235,역삼역,2 +37.4986144,127.0280696,강남역,2 +37.4948671,127.0173034,교대역,2 +37.491042,127.0068266,서초역,2 +37.4811928,126.9967368,방배역,2 +37.4808275,126.981678,사당역,2 +37.4794192,126.9575749,낙성대역,2 +37.4810756,126.953368,서울대입구역,2 +37.4821835,126.9416357,봉천역,2 +37.4841329,126.928721,신림역,2 +37.4875528,126.9130145,신대방역,2 +37.4852595,126.90123,구로디지털단지역,2 +37.5051807,126.8889041,대림역,2 +37.508716,126.8910861,신도림역,2 +37.5160389,126.896786,문래역,2 +37.5255161,126.8957813,영등포구청역,2 +37.5346563,126.9023953,당산역,2 +37.5501151,126.9146385,합정역,2 +37.5568904,126.9236745,홍대입구역,2 +37.5546845,126.9375524,신촌역,2 +37.5565895,126.9462461,이대역,2 +37.5572198,126.9540333,아현역,2 +37.5609771,126.9633433,충정로역,2 +37.5659035,127.0494423,용답역,2 +37.5704687,127.0471946,신답역,2 +37.5761056,127.0245335,용두역,2 +37.5145133,126.882391,신설동역,2 +37.511286,126.8686227,도림천역,2 +37.5182982,126.8536312,양천구청역,2 +37.5735375,127.040034,신정네거리역,2 +37.5324143,126.8463323,까치산역,2 +37.6382313,126.918498,구파발역,3 +37.6246139,126.9159199,연신내역,3 +37.6116741,126.9315413,불광역,3 +37.6011808,126.935001,녹번역,3 +37.5887411,126.9438051,홍제역,3 +37.5824216,126.9505914,무악재역,3 +37.5739073,126.9565091,독립문역,3 +37.5738491,126.9651516,경복궁역,3 +37.5766131,126.9851511,안국역,3 +37.5711219,126.9919626,종로3가역,3 +37.5666832,126.9922314,을지로3가역,3 +37.5615795,126.9955938,충무로역,3 +37.5594809,127.0054604,동대입구역,3 +37.5544552,127.0110376,약수역,3 +37.5481124,127.015894,금호역,3 +37.5408406,127.0181266,옥수역,3 +37.5264114,127.0284389,압구정역,3 +37.51686,127.0198073,신사역,3 +37.5136512,127.0072863,잠원역,3 +37.5055717,127.0071375,고속터미널역,3 +37.493781,127.0130086,교대역,3 +37.4841908,127.0157291,남부터미널역,3 +37.4675894,127.0231988,양재역,3 +37.487408,127.0482109,매봉역,3 +37.4905609,127.0551271,도곡역,3 +37.4942415,127.0638569,대치역,3 +37.4955815,127.0717546,학여울역,3 +37.4936653,127.0795999,대청역,3 +37.4840029,127.0844491,일원역,3 +37.4881073,127.1006967,수서역,3 +37.4930574,127.118076,가락시장역,3 +37.4983182,127.1256891,경찰병원역,3 +37.5034295,127.1261623,오금역,3 +37.6706197,127.0793227,당고개역,4 +37.6603679,127.0719963,상계역,4 +37.6561088,127.0634683,노원역,4 +37.6532914,127.0477428,창동역,4 +37.6486248,127.0350659,쌍문역,4 +37.6374739,127.0249644,수유역,4 +37.6257206,127.0239903,미아역,4 +37.6133549,127.0304248,미아사거리역,4 +37.6032061,127.0248295,길음역,4 +37.5931105,127.0167884,성신여대입구역,4 +37.5880767,127.0058062,한성대입구역,4 +37.5834282,127.0013295,혜화역,4 +37.571152,127.0097126,동대문역,4 +37.5646949,127.0069763,동대문역사문화공원역,4 +37.5615795,126.9955938,충무로역,4 +37.5608497,126.9857347,명동역,4 +37.5598035,126.978484,회현역,4 +37.5503481,126.972209,서울역,4 +37.5451764,126.9718428,숙대입구역,4 +37.5345707,126.9729782,삼각지역,4 +37.5291812,126.9684234,신용산역,4 +37.522997,126.9757555,이촌역,4 +37.502884,126.9799113,동작역,4 +37.4886442,126.9823291,총신대입구역,4 +37.4808275,126.981678,사당역,4 +37.4643715,126.9892869,남태령역,4 +37.5771916,126.8126532,방화역,5 +37.572362,126.8057737,개화산역,5 +37.5614233,126.8025687,김포공항역,5 +37.5614547,126.8105902,송정역,5 +37.5601985,126.825548,마곡역,5 +37.5589887,126.8373351,발산역,5 +37.5492431,126.8364155,우장산역,5 +37.5410891,126.8408492,화곡역,5 +37.5324143,126.8463323,까치산역,5 +37.5247776,126.8542782,신정역,5 +37.5259734,126.8662904,목동역,5 +37.5247144,126.8738322,오목교역,5 +37.5256583,126.8857681,양평역,5 +37.5244093,126.8943075,영등포구청역,5 +37.5221062,126.9059661,영등포시장역,5 +37.5177216,126.9145919,신길역,5 +37.5239459,126.9278442,여의도역,5 +37.5280839,126.9326299,여의나루역,5 +37.5395838,126.9459206,마포역,5 +37.5444902,126.9511944,공덕역 ,5 +37.5533623,126.9566135,애오개역,5 +37.5594073,126.9629996,충정로역,5 +37.5661195,126.9668778,서대문역,5 +37.5715854,126.9772199,광화문역,5 +37.5725947,126.9897587,종로3가역,5 +37.5671083,126.9980407,을지로4가역,5 +37.564835,127.0050117,동대문역사문화공원역,5 +37.5598849,127.014936,청구역,5 +37.5545799,127.0197774,신금호역,5 +37.5576232,127.0299107,행당역,5 +37.5613065,127.0374819,왕십리역,5 +37.5662827,127.0431638,마장역,5 +37.5664458,127.0531812,답십리역,5 +37.5621592,127.0606458,장한평역,5 +37.5567365,127.0805441,군자역 ,5 +37.5544481,127.0861676,아차산역,5 +37.5451736,127.1035258,광나루역,5 +37.5385067,127.1251276,천호역 ,5 +37.5357899,127.1341189,강동역,5 +37.5378114,127.1399992,길동역,5 +37.5458768,127.1431484,굽은다리역,5 +37.551257,127.1440039,명일역,5 +37.555613,127.1539213,고덕역,5 +37.5535428,127.1569041,상일동역,5 +37.5281346,127.136375,둔촌동역,5 +37.5164993,127.1311332,올림픽공원역,5 +37.508013,127.1255037,방이역,5 +37.5034295,127.1261623,오금역,5 +37.4986379,127.1340037,개롱역,5 +37.4942555,127.1420024,거여역,5 +37.4944351,127.1519703,마천역,5 +37.5993102,126.9153528,응암역,6 +37.6062424,126.9233076,역촌역,6 +37.6105576,126.9293107,불광역,6 +37.6183788,126.9328702,독바위역,6 +37.61897,126.9208595,연신내역,6 +37.6112121,126.9171827,구산역,6 +37.5911691,126.9133077,새절역,6 +37.5835518,126.9092813,증산역,6 +37.5769528,126.9016341,디지털미디어시티역,6 +37.5699425,126.8990336,월드컵경기장역,6 +37.5634262,126.9033573,마포구청역,6 +37.5560572,126.9100342,망원역,6 +37.549133,126.9133441,합정역,6 +37.5479816,126.9226555,상수역,6 +37.5475679,126.9324099,광흥창역,6 +37.5477578,126.9423558,대흥역,6 +37.5394996,126.9610563,효창공원앞역,6 +37.5344036,126.9730304,삼각지역,6 +37.5344698,126.9856082,녹사평역,6 +37.5345461,126.9946761,이태원역,6 +37.5409028,127.0018677,한강진역,6 +37.548149,127.0070342,버티고개역,6 +37.5544036,127.0103967,약수역,6 +37.5671552,127.0161291,신당역,6 +37.571686,127.0158295,동묘앞역,6 +37.5794607,127.015221,창신역,6 +37.5854476,127.0197191,보문역,6 +37.5861006,127.0294109,안암역,6 +37.5896843,127.0359392,고려대역,6 +37.6018433,127.0414334,월곡역,6 +37.6070179,127.0497123,상월곡역,6 +37.6107345,127.0571227,돌곶이역,6 +37.6152745,127.0668606,석계역,6 +37.6198984,127.0832063,화랑대역,6 +37.616757,127.0926836,봉화산역,6 +37.6889484,127.0466752,도봉산역,7 +37.6763131,127.0553467,수락산역,7 +37.6677166,127.0570141,마들역,7 +37.653609,127.060791,노원역,7 +37.6426533,127.0652297,중계역,7 +37.6352529,127.0685664,하계역,7 +37.6254242,127.0725361,공릉역,7 +37.6191493,127.0751537,태릉입구역 ,7 +37.6108106,127.0777145,먹골역,7 +37.6015857,127.0786549,중화역,7 +37.5960515,127.084396,상봉역,7 +37.5892322,127.0873747,면목역,7 +37.5807618,127.0887198,사가정역,7 +37.57339,127.0865146,용마산역,7 +37.5657613,127.0842209,중곡역,7 +37.547149,127.0739671,어린이대공원역,7 +37.5402235,127.0701033,건대입구역,7 +37.5319437,127.0670183,뚝섬유원지역,7 +37.5187724,127.0493819,청담역,7 +37.518203,127.0471323,강남구청역,7 +37.5141778,127.03138,학동역,7 +37.5097151,127.0221516,논현역,7 +37.508299,127.0118276,반포역,7 +37.5050053,127.0048574,고속터미널역,7 +37.4875321,126.9931792,내방역,7 +37.48497,126.9806891,이수역,7 +37.4847692,126.9710282,남성역,7 +37.4958722,126.9540207,숭실대입구역,7 +37.5035231,126.9472973,상도역,7 +37.5051723,126.9418806,장승배기역,7 +37.4997572,126.9275869,신대방삼거리역,7 +37.4999159,126.9207594,보라매역,7 +37.50032,126.9094539,신풍역,7 +37.4929075,126.896874,대림역,7 +37.4849003,126.8868122,남구로역,7 +37.4806834,126.8829855,가산디지털단지역,7 +37.486829,126.8388736,천왕역,7 +37.4922518,126.8233145,온수역,7 +37.5511715,127.1282291,암사역,8 +37.5306697,127.1205768,강동구청역,8 +37.5169846,127.110759,몽촌토성역,8 +37.5148995,127.1040519,잠실역,8 +37.5069101,127.1054956,석촌역,8 +37.4997883,127.1122667,송파역,8 +37.4930574,127.118076,가락시장역,8 +37.4877901,127.121335,문정역,8 +37.4777723,127.1265301,장지역,8 +37.4707366,127.1267262,복정역,8 diff --git a/backend/scripts/ingest/fill_subway_coords.py b/backend/scripts/ingest/fill_subway_coords.py new file mode 100644 index 00000000..f20bc76c --- /dev/null +++ b/backend/scripts/ingest/fill_subway_coords.py @@ -0,0 +1,185 @@ +"""master_subway_station 좌표 채우기 (마포 한정). + +소스: backend/data/seed/raw/seoul_subway_stations_with_coords.csv + - 서울 1~9호선 역사 좌표 CSV (yoon-gu gist, Naver API 기반) + - 컬럼: lat,lon,name(역명+'역' 접미),no_line(숫자) + +매칭: (정규화 station_name, line_name) → master_subway_station UPDATE. +정규화: 괄호 부속 명칭 제거('월드컵경기장(성산)' → '월드컵경기장'), '역' 접미 제거, trim. +범위: 마포 14역 (sigungu_code='11440' 인 행 + master 의 마포 환승역). + +사용법: + cd backend + python -m scripts.ingest.fill_subway_coords --dry-run # 매칭만 보고 DB 미수정 + python -m scripts.ingest.fill_subway_coords # 실제 UPDATE +""" + +from __future__ import annotations + +import argparse +import csv +import os +import re +import sys +from pathlib import Path + +import psycopg +from dotenv import load_dotenv + +# repo root .env auto-load (settings.py 와 동일 패턴) — backend/scripts/ingest/X.py → parents[3] +_REPO_ROOT_ENV = Path(__file__).resolve().parents[3] / ".env" +if _REPO_ROOT_ENV.exists(): + load_dotenv(_REPO_ROOT_ENV) + +_DEFAULT_DB_URL = os.environ.get( + "POSTGRES_URL", + "postgresql://postgres:postgres@localhost:5432/mapo_simulator", +) + +# 마포 14역 — master_subway_station 의 (station_name, line_name) 그대로 (괄호 포함). +# UPDATE 의 WHERE 절에 station_name 원본 그대로 사용해야 함. +MAPO_STATIONS: list[tuple[str, str]] = [ + ("합정", "2호선"), + ("홍대입구", "2호선"), + ("마포", "5호선"), + ("공덕", "5호선"), + ("애오개", "5호선"), + ("디지털미디어시티", "6호선"), + ("월드컵경기장(성산)", "6호선"), + ("마포구청", "6호선"), + ("망원", "6호선"), + ("합정", "6호선"), + ("상수", "6호선"), + ("광흥창(서강)", "6호선"), + ("대흥(서강대앞)", "6호선"), + ("공덕", "6호선"), +] + +_PAREN_RE = re.compile(r"\(.*?\)") + + +def _normalize(name: str) -> str: + s = _PAREN_RE.sub("", name).strip() + if s.endswith("역"): + s = s[:-1] + return s.strip() + + +def _load_raw( + path: Path, +) -> tuple[dict[tuple[str, str], tuple[float, float]], dict[str, tuple[float, float]]]: + by_pair: dict[tuple[str, str], tuple[float, float]] = {} + by_name: dict[str, tuple[float, float]] = {} + with path.open(encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + try: + norm = _normalize(row["name"]) + line = f"{int(row['no_line'].strip())}호선" + lat = float(row["lat"]) + lon = float(row["lon"]) + except (ValueError, KeyError): + continue + by_pair[(norm, line)] = (lat, lon) + by_name.setdefault(norm, (lat, lon)) + return by_pair, by_name + + +def fill(raw_path: Path, db_url: str, dry_run: bool) -> int: + if not raw_path.exists(): + print(f"[fill] ERROR: raw CSV not found: {raw_path}", file=sys.stderr) + return 2 + + by_pair, by_name = _load_raw(raw_path) + print(f"[fill] raw stations loaded: pairs={len(by_pair)} unique_names={len(by_name)}") + + matched: list[dict] = [] + rejects: list[dict] = [] + + for station_name, line_name in MAPO_STATIONS: + norm = _normalize(station_name) + coord = by_pair.get((norm, line_name)) or by_name.get(norm) + if coord is None: + rejects.append( + { + "station_name": station_name, + "line_name": line_name, + "_reason": "no match in raw CSV", + } + ) + continue + matched.append( + { + "station_name": station_name, + "line_name": line_name, + "lat": coord[0], + "lon": coord[1], + "_match": "exact" if (norm, line_name) in by_pair else "name_fallback", + } + ) + + print(f"[fill] matched={len(matched)}/{len(MAPO_STATIONS)}, rejects={len(rejects)}") + for m in matched: + print(f" {m['station_name']:<14} {m['line_name']} lat={m['lat']:.6f} lon={m['lon']:.6f} ({m['_match']})") + for r in rejects: + print(f" REJECT: {r}") + + if dry_run: + print("[fill] dry-run - DB not modified") + return 0 + + url = db_url.replace("+asyncpg", "").replace("+psycopg", "") + updated = 0 + with psycopg.connect(url) as conn: + with conn.cursor() as cur: + for m in matched: + cur.execute( + """ + UPDATE master_subway_station + SET lat = %s, lon = %s, sigungu_code = '11440' + WHERE station_name = %s AND line_name = %s + """, + (m["lat"], m["lon"], m["station_name"], m["line_name"]), + ) + if cur.rowcount == 0: + rejects.append( + { + "station_name": m["station_name"], + "line_name": m["line_name"], + "_reason": "DB row not found", + } + ) + else: + updated += cur.rowcount + conn.commit() + + print(f"[fill] DB updated rows: {updated}") + + if rejects: + reject_dir = Path("backend/data/cleaned/reject") + reject_dir.mkdir(parents=True, exist_ok=True) + reject_path = reject_dir / "subway_coords_unmatched.csv" + with reject_path.open("w", encoding="utf-8", newline="") as f: + w = csv.DictWriter(f, fieldnames=["station_name", "line_name", "_reason"]) + w.writeheader() + w.writerows(rejects) + print(f"[fill] rejects: {reject_path}") + + return 0 if updated == len(MAPO_STATIONS) else 1 + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--raw", + type=Path, + default=Path("backend/data/seed/raw/seoul_subway_stations_with_coords.csv"), + ) + parser.add_argument("--db-url", default=_DEFAULT_DB_URL) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args() + return fill(args.raw, args.db_url, args.dry_run) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/scripts/ingest/fill_subway_coords_csv.py b/backend/scripts/ingest/fill_subway_coords_csv.py new file mode 100644 index 00000000..68ac2dee --- /dev/null +++ b/backend/scripts/ingest/fill_subway_coords_csv.py @@ -0,0 +1,84 @@ +"""seed CSV (master_subway_station_all.csv) 의 마포 행에 좌표 in-place 업데이트. + +DB UPDATE 와 별개로 seed CSV truth 도 같이 갱신 — 재배포/DB 재생성 시 빈 좌표 재출하 방지. + +매칭 로직은 fill_subway_coords.py 와 동일. +""" + +from __future__ import annotations + +import argparse +import csv +import sys +from pathlib import Path + +from scripts.ingest.fill_subway_coords import MAPO_STATIONS, _load_raw, _normalize + + +def update_csv(seed_path: Path, raw_path: Path, dry_run: bool) -> int: + if not seed_path.exists(): + print(f"[csv] seed missing: {seed_path}", file=sys.stderr) + return 2 + if not raw_path.exists(): + print(f"[csv] raw missing: {raw_path}", file=sys.stderr) + return 2 + + by_pair, by_name = _load_raw(raw_path) + target = {(s, ln) for s, ln in MAPO_STATIONS} + + rows: list[dict] = [] + fieldnames: list[str] = [] + with seed_path.open(encoding="utf-8") as f: + reader = csv.DictReader(f) + fieldnames = list(reader.fieldnames or []) + rows = list(reader) + + updated = 0 + for row in rows: + key = (row["station_name"].strip(), row["line_name"].strip()) + if key not in target: + continue + norm = _normalize(key[0]) + coord = by_pair.get((norm, key[1])) or by_name.get(norm) + if coord is None: + continue + new_lat, new_lon = coord + if row.get("lat") != f"{new_lat:.7f}" or row.get("lon") != f"{new_lon:.7f}": + row["lat"] = f"{new_lat:.7f}" + row["lon"] = f"{new_lon:.7f}" + row["sigungu_code"] = "11440" + updated += 1 + + print(f"[csv] mapo rows updated in-memory: {updated}/14") + + if dry_run: + print("[csv] dry-run - file not written") + return 0 + + with seed_path.open("w", encoding="utf-8", newline="") as f: + w = csv.DictWriter(f, fieldnames=fieldnames) + w.writeheader() + w.writerows(rows) + print(f"[csv] wrote: {seed_path}") + return 0 + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument( + "--seed", + type=Path, + default=Path("data/seed/master_subway_station_all.csv"), + ) + parser.add_argument( + "--raw", + type=Path, + default=Path("data/seed/raw/seoul_subway_stations_with_coords.csv"), + ) + parser.add_argument("--dry-run", action="store_true") + args = parser.parse_args() + return update_csv(args.seed, args.raw, args.dry_run) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/backend/scripts/verify/verify_emerging_trend_data.py b/backend/scripts/verify/verify_emerging_trend_data.py index 6bbbee16..5059d446 100644 --- a/backend/scripts/verify/verify_emerging_trend_data.py +++ b/backend/scripts/verify/verify_emerging_trend_data.py @@ -15,9 +15,15 @@ import os import sys +from pathlib import Path import psycopg +from dotenv import load_dotenv +# repo root .env auto-load +_REPO_ROOT_ENV = Path(__file__).resolve().parents[3] / ".env" +if _REPO_ROOT_ENV.exists(): + load_dotenv(_REPO_ROOT_ENV) _DEFAULT_DB_URL = os.environ.get( "POSTGRES_URL", @@ -102,6 +108,29 @@ def main() -> int: print(f" ERROR: PK duplicates: {dup}") errors += 1 + # 마포 지하철역 좌표 coverage — fill_subway_coords 실행 후 필수. + with conn.cursor() as cur: + cur.execute("SELECT COUNT(*) FROM master_subway_station WHERE sigungu_code='11440'") + mapo_total = cur.fetchone()[0] + cur.execute( + "SELECT COUNT(*) FROM master_subway_station " + "WHERE sigungu_code='11440' AND lat IS NOT NULL AND lon IS NOT NULL" + ) + mapo_with_coord = cur.fetchone()[0] + cur.execute( + "SELECT COUNT(*) FROM master_subway_station " + "WHERE sigungu_code='11440' AND lat IS NOT NULL " + "AND (lat NOT BETWEEN 37.53 AND 37.59 OR lon NOT BETWEEN 126.87 AND 126.97)" + ) + out_of_bbox = cur.fetchone()[0] + print(f"[master_subway_station coords] mapo={mapo_total} with_coord={mapo_with_coord}") + if mapo_with_coord < mapo_total: + print(f" WARN: {mapo_total - mapo_with_coord} mapo stations missing coord") + warnings += 1 + if out_of_bbox > 0: + print(f" ERROR: {out_of_bbox} mapo stations outside Mapo bbox") + errors += 1 + print() print(f"errors={errors} warnings={warnings}") return 1 if errors else 0