-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata_loader.py
More file actions
148 lines (116 loc) · 6.11 KB
/
Copy pathdata_loader.py
File metadata and controls
148 lines (116 loc) · 6.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3
"""
Data loading utilities for Fantasy Football Draft Algorithm
Handles loading ESPN rankings and actual performance data
"""
import pandas as pd
import os
from typing import Dict, Any
class DataLoader:
"""Handles loading and preprocessing of fantasy football data"""
def __init__(self):
self.years = [2020, 2021, 2022, 2023, 2024]
def load_espn_rankings(self) -> Dict[int, pd.DataFrame]:
"""Load ESPN draft rankings for all years"""
espn_data = {}
for year in self.years:
try:
filename = f'data/fantasy_rankings_{year}.xlsx'
if not os.path.exists(filename):
print(f"❌ File not found: {filename}")
continue
df = pd.read_excel(filename)
# Standardize column names if needed
column_mapping = {
'player_name': 'Player',
'overall_rank': 'ADP_Rank',
'position_rank': 'Position_Rank'
}
for old_col, new_col in column_mapping.items():
if old_col in df.columns and new_col not in df.columns:
df = df.rename(columns={old_col: new_col})
# Ensure required columns exist
required_cols = ['Player', 'position', 'team', 'ADP_Rank', 'auction_value']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
print(f" Missing columns in {filename}: {missing_cols}")
# Try alternative column names
if 'adp_rank' in df.columns and 'ADP_Rank' not in df.columns:
df['ADP_Rank'] = df['adp_rank']
if 'overall_rank' in df.columns and 'ADP_Rank' not in df.columns:
df['ADP_Rank'] = df['overall_rank']
espn_data[year] = df
print(f"✅ Loaded {year} ESPN rankings: {len(df)} players")
except Exception as e:
print(f"❌ Error loading {filename}: {str(e)}")
return espn_data
def load_actual_points(self) -> Dict[int, Dict[str, float]]:
"""Load actual fantasy points for all years"""
actual_data = {}
for year in self.years:
try:
filename = f'data/{year}.xlsx'
if not os.path.exists(filename):
print(f"❌ File not found: {filename}")
continue
# Skip first row which often contains merged headers
df = pd.read_excel(filename, skiprows=1)
# Clean player names (remove asterisks, plus signs)
if 'Player' in df.columns:
df['Player_Clean'] = (df['Player']
.str.replace('*', '', regex=False)
.str.replace('+', '', regex=False)
.str.strip())
else:
print(f"❌ No 'Player' column found in {filename}")
continue
# Find fantasy points column
points_col = None
possible_cols = ['PPR', 'FantPt', 'Fantasy Points', 'FPTS', 'Points']
for col in possible_cols:
if col in df.columns:
points_col = col
break
if not points_col:
print(f"❌ No fantasy points column found in {filename}")
print(f" Available columns: {df.columns.tolist()}")
continue
# Create player-to-points mapping
player_points = {}
for _, row in df.iterrows():
player_name = row.get('Player_Clean')
points = row.get(points_col)
if pd.notna(player_name) and pd.notna(points):
try:
player_points[player_name] = float(points)
except (ValueError, TypeError):
continue
actual_data[year] = player_points
print(f"✅ Loaded {year} actual points: {len(player_points)} players")
except Exception as e:
print(f"❌ Error loading {filename}: {str(e)}")
return actual_data
def match_player_points(self, player_name: str, actual_data: Dict[str, float]) -> float:
"""Match player name to actual points with fuzzy matching"""
# Direct match
if player_name in actual_data:
return actual_data[player_name]
# Fuzzy matching for name variations
player_lower = player_name.lower()
for actual_name, points in actual_data.items():
actual_lower = actual_name.lower()
# Check if names contain each other
if (player_lower in actual_lower or
actual_lower in player_lower or
self._names_similar(player_lower, actual_lower)):
return points
return 0.0
def _names_similar(self, name1: str, name2: str) -> bool:
"""Check if two names are similar (basic similarity check)"""
# Split names and check for common parts
parts1 = set(name1.split())
parts2 = set(name2.split())
# If they share at least 2 name parts, consider them similar
common_parts = parts1.intersection(parts2)
return len(common_parts) >= 2 or (len(common_parts) >= 1 and
(len(parts1) <= 2 or len(parts2) <= 2))