-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathaggregate_classification.py
More file actions
63 lines (51 loc) · 1.67 KB
/
aggregate_classification.py
File metadata and controls
63 lines (51 loc) · 1.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python3
import argparse
from pathlib import Path
import pandas as pd
def main():
parser = argparse.ArgumentParser(
description="Aggregate all classification_summary.csv files into one all_classification.csv"
)
parser.add_argument(
"--root",
type=str,
default="~/IsoCLIP/exp_img_classification/clip_b32_classification",
help="Root folder containing exp_* subfolders",
)
parser.add_argument(
"--output",
type=str,
default=None,
help="Output CSV path. Default: <root>/all_classification.csv",
)
args = parser.parse_args()
root = Path(args.root).expanduser().resolve()
output_path = (
Path(args.output).expanduser().resolve()
if args.output is not None
else root / "all_classification.csv"
)
csv_files = sorted(root.glob("exp_*/classification_summary.csv"))
if not csv_files:
print(f"No classification_summary.csv found in: {root}")
return
dfs = []
for csv_file in csv_files:
try:
df = pd.read_csv(csv_file)
df["source_file"] = str(csv_file)
dfs.append(df)
print(f"Loaded: {csv_file}")
except Exception as e:
print(f"Skipping {csv_file} due to error: {e}")
if not dfs:
print("No valid CSV files could be loaded.")
return
all_df = pd.concat(dfs, ignore_index=True)
# Optional: remove exact duplicate rows
all_df = all_df.drop_duplicates()
all_df.to_csv(output_path, index=False)
print(f"\nSaved aggregated CSV to: {output_path}")
print(f"Total rows: {len(all_df)}")
if __name__ == "__main__":
main()