-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
98 lines (78 loc) · 3.31 KB
/
main.py
File metadata and controls
98 lines (78 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
"""Main pipeline: load sources, fetch, generate 10 best post ideas from all (no grouping by source)."""
from __future__ import annotations
import logging
import os
import sys
from config import DEFAULT_MODEL, DEFAULT_URLS_FILE
from fetcher import fetch_entries_by_source
from langfuse import get_client
from llm import generate_post_ideas
from models import FeedEntry
from sources import load_sources
logger = logging.getLogger("content_engine")
TOTAL_IDEAS = 10
MAX_ENTRIES_FOR_LLM = 50 # Cap total entries to avoid token overflow
def build_content_block(entries: list[FeedEntry], include_links: bool = True) -> str:
"""Build a single text block for the LLM from entries."""
parts: list[str] = []
for i, e in enumerate(entries, 1):
lines = [
f"[{i}] Title: {e.title}",
f"Source: {e.source_feed}",
f"Published: {e.published}",
f"Content:\n{e.content[:8000]}",
]
if include_links:
lines.insert(1, f"Link: {e.link}")
parts.append("\n".join(lines))
return "\n---\n\n".join(parts)
def build_sources_list(entries: list[FeedEntry]) -> str:
"""Build [N] -> url list for the LLM."""
return "\n".join(f"[{i}] {e.link}" for i, e in enumerate(entries, 1))
def main() -> None:
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
stream=sys.stdout,
)
urls_path = os.environ.get("URLS_FILE", DEFAULT_URLS_FILE)
model = os.environ.get("OPENAI_MODEL", DEFAULT_MODEL)
if not os.environ.get("OPENAI_API_KEY"):
print("Set OPENAI_API_KEY in .env or environment.", file=sys.stderr)
sys.exit(1)
sources = load_sources(urls_path)
if not sources:
print(
f"No sources in {urls_path}. Add lines: type\\turl (type: rss | html).",
file=sys.stderr,
)
sys.exit(1)
logger.info("Fetching entries from %s source(s)...", len(sources))
by_source = fetch_entries_by_source(sources)
all_entries: list[FeedEntry] = []
for _src, entries in by_source:
all_entries.extend(entries)
if not all_entries:
print("No entries fetched.", file=sys.stderr)
sys.exit(1)
entries = all_entries[:MAX_ENTRIES_FOR_LLM]
logger.info("Using %s entries (of %s) for LLM, generating %s best ideas total", len(entries), len(all_entries), TOTAL_IDEAS)
content_block = build_content_block(entries, include_links=True)
sources_list = build_sources_list(entries)
logger.info("Content block: %s chars, calling LLM...", len(content_block))
ideas = generate_post_ideas(
content_block, model, count=TOTAL_IDEAS, sources_list=sources_list
)
logger.info("Post ideas received: %s", len(ideas))
print("\n--- 10 best post ideas (all sources) ---\n")
for i, idea in enumerate(ideas, 1):
print(f"### Idea {i}")
print("**Source(s):** " + (", ".join(idea.source_links) if idea.source_links else "—"))
print(f"**Source insight:** {idea.source_insight}")
print(f"**Post idea:** {idea.post_idea}")
print(f"**Description:** {idea.description}")
print(f"**Recommended format:** {idea.recommended_format}")
print(f"**How to use:** {idea.how_to_use}")
print()
get_client().flush()