-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
23 lines (20 loc) · 707 Bytes
/
main.py
File metadata and controls
23 lines (20 loc) · 707 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import asyncio
import json
from extractor_engine import extract_all
from smart_fetcher import smart_fetch
async def run_pipeline(urls):
results = []
for url in urls:
page = await smart_fetch(url)
if page["status"] == "ok":
extracted = extract_all(page["content"], url)
results.append(extracted)
else:
results.append({"url": url, "status": page["status"]})
with open("data/final_output.json", "w", encoding="utf-8") as f:
json.dump(results, f, indent=2, ensure_ascii=False)
asyncio.run(run_pipeline([
"https://www.vsa-nj.com/about-us",
"https://fcmalagacity.com/about/",
"https://www.spanishprofootball.com/"
]))