declaude/exporter.py at main · jftuga/declaude · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Orchestrates the full export pipeline: loads the Claude conversations
# JSON, converts each conversation to a styled HTML file organized in
# date-based folders, and generates an index page for navigation.

import json
import sys
from pathlib import Path
from zoneinfo import ZoneInfo

from conversation import Conversation
from html_renderer import HtmlRenderer


def load_conversations(json_path: Path) -> list[Conversation]:
    """Load and parse all conversations from a Claude export JSON file.

    Args:
        json_path: Path to the conversations.json file.

    Returns:
        List of Conversation objects sorted by creation date.

    Raises:
        FileNotFoundError: If json_path does not exist.
        json.JSONDecodeError: If the file is not valid JSON.
    """
    with open(json_path, encoding="utf-8") as f:
        raw = json.load(f)

    conversations = [Conversation.from_dict(item) for item in raw]
    conversations.sort(key=lambda c: c.created_at)
    return conversations


def resolve_filename(folder: Path, base_name: str, seen: dict[str, int]) -> str:
    """Generate a unique filename, appending a numeric suffix on collision.

    Uses case-insensitive comparison for macOS/Windows compatibility.

    Args:
        folder: Target directory for the file.
        base_name: Desired filename stem (without extension).
        seen: Dict tracking previously used names (lowercased) and their counts.

    Returns:
        A unique filename stem.
    """
    key = (str(folder) + "/" + base_name).lower()
    if key in seen:
        seen[key] += 1
        return f"{base_name}_{seen[key]}"
    seen[key] = 1
    return base_name


def export_conversations(
    json_path: Path,
    output_dir: Path,
    tz: ZoneInfo | None = None,
    source_file: str = "",
) -> int:
    """Run the full export pipeline.

    Loads conversations, renders each as HTML into date-based subdirectories,
    and generates an index.html at the output root.

    Args:
        json_path: Path to the conversations.json file.
        output_dir: Root output directory for HTML files.
        tz: Timezone for timestamps. Defaults to US/Eastern.
        source_file: Optional source filename shown in the index header.

    Returns:
        Number of conversations exported.
    """
    tz = tz or ZoneInfo("US/Eastern")
    renderer = HtmlRenderer(tz)
    conversations = [c for c in load_conversations(json_path) if c.chat_messages]

    output_dir.mkdir(parents=True, exist_ok=True)

    seen: dict[str, int] = {}
    index_entries: list[dict] = []
    total = len(conversations)
    count = 0

    for conv in conversations:

        folder_rel = conv.folder_path(tz)
        folder = output_dir / folder_rel
        folder.mkdir(parents=True, exist_ok=True)

        base_name = conv.filename_stem(tz)
        unique_name = resolve_filename(folder, base_name, seen)

        html_content = renderer.render_conversation(conv)
        html_path = folder / f"{unique_name}.html"
        html_path.write_text(html_content, encoding="utf-8")

        rel_path = f"{folder_rel}/{unique_name}.html"
        created_dt = conv.created_datetime(tz)

        has_attachments = any(msg.attachments for msg in conv.chat_messages)
        index_entries.append({
            "date": created_dt.strftime("%Y-%m-%d"),
            "title": conv.name,
            "path": rel_path,
            "created_dt": created_dt,
            "has_attachments": has_attachments,
        })

        count += 1
        if count % 25 == 0:
            print(f"Processing: {count}/{total}", file=sys.stderr)

    index_html = renderer.render_index(index_entries, source_file)
    (output_dir / "index.html").write_text(index_html, encoding="utf-8")

    return count