-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata_loader.py
More file actions
65 lines (51 loc) · 2.08 KB
/
Copy pathdata_loader.py
File metadata and controls
65 lines (51 loc) · 2.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import pandas as pd
import os
import glob
def load_stock_data(data_folder):
"""
加载所有股票数据
Args:
data_folder (str): 股票数据文件夹路径
Returns:
dict: 包含股票代码、名称和DataFrame的字典
"""
print("正在加载股票数据...")
stock_data = {}
csv_files = glob.glob(os.path.join(data_folder, "*.csv"))
for file_path in csv_files:
try:
# 从文件名提取股票代码和名称
filename = os.path.basename(file_path)
# 兼容不同命名格式,防止split报错
if '_' in filename:
stock_code = filename.split('_')[0]
stock_name = filename.split('_')[1].replace('.csv', '')
else:
stock_code = filename.replace('.csv', '')
stock_name = stock_code
# 读取CSV文件
df = pd.read_csv(file_path, encoding='utf-8')
# 标准化列名
df.columns = df.columns.str.strip()
# 检查必要列是否存在
required_columns = ['日期', '收盘价', '总市值(元)', '流通市值(元)', '换手率(%)']
if not all(col in df.columns for col in required_columns):
continue
# 数据预处理
df['日期'] = pd.to_datetime(df['日期'])
df = df.sort_values('日期')
# 保留足够的历史数据用于分析(120天)
df = df.tail(120)
# 数据清洗
df = df.dropna(subset=['收盘价', '总市值(元)', '换手率(%)'])
if len(df) < 60: # 至少需要60天数据
continue
stock_data[stock_code] = {
'name': stock_name,
'data': df
}
except Exception as e:
print(f"加载 {filename} 失败: {e}")
continue
print(f"成功加载 {len(stock_data)} 只股票数据")
return stock_data