-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathproject_part_2.py
More file actions
164 lines (120 loc) · 6.1 KB
/
project_part_2.py
File metadata and controls
164 lines (120 loc) · 6.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
This module contains the code required to operate the second part of the project
"""
import matplotlib.pyplot as plot
import pandas as p
from matplotlib.figure import Figure
import project_part_1 as proj1
industries_and_indexes = proj1.industry_and_its_index # The dictionary corresponding to the industries
pre_pandemic_data = p.read_csv('Datasets/pre_pandemic.csv')
during_the_pandemic_data = p.read_csv('Datasets/during_the_pandemic (1).csv')
# (Change the input string if the file names or the folder names are different.)
def create_dataframe(industry: str, time_frame: str) -> p.DataFrame:
"""
Create and return a new pandas dataframe
Preconditions:
- timeframe in ["January 2019- 20", "February 2019- 20", "March 2019- 20",
"April 2019- 20", "May 2019- 20", "June 2019- 20",
"July 2019- 20", "August 2019- 20", "September 2019-r 2019- 2 20",
"October 2019- 20", "November 2019- 20", "Decemebe0"]
- industry in proj1.industry_and_its_index
"""
# Creating a new DataFrame where rows and columns are 0-indexed by default
new_dataframe = p.DataFrame(columns=["Industry", "Time Frame", "Pre-Pandemic Value", "Value During the Pandemic",
"Increase- Decrease", "Percentage of Increase - Decrease"], index=["values"])
industry_name = industry.split(' ')
industry_name_fixed = [industry_name[0]]
# Accumulator for the new industry name so that the first alphabet of each word is capitalized,
# index 0 doesn't count because its first letter is capitalized by default.
for i in range(1, len(industry_name)):
if industry_name[i][0].islower() and industry_name[i] != 'and':
industry_name_fixed.append(industry_name[i].replace(industry_name[i][0],
industry_name[i][0].capitalize(), 1))
else:
industry_name_fixed.append(industry_name[i])
# If the first alphabet of the word is lower cased then change it to upper case,
# the first word of the industry should always have an uppercase letter
if len(industry) > 30:
industry_name_fixed.insert(len(industry_name) // 2, '\n')
new_industry_name = ' '.join(industry_name_fixed)
new_dataframe["Industry"] = new_industry_name
dates_to_indexes = {"January 2019- 20": 0, "February 2019- 20": 1,
"March 2019- 20": 2, "April 2019- 20": 3,
"May 2019- 20": 4, "June 2019- 20": 5,
"July 2019- 20": 6, "August 2019- 20": 7,
"September 2019- 20": 8, "October 2019- 20": 9,
"November 2019- 20": 10, "Decemeber 2019- 20": 11}
dates = list(dates_to_indexes.keys())
# The iloc[:, :] function allows us to access specific values from the DataFrame by their indexes.
# The first parameter, which is the rows, takes 2 integers separated by a colon (:)
# and gives out that part of the data frame. The end point is exclusive.
# The second parameter, which is the colums, takes 2 integers separated by a colon (:)
# and gives out that part of the data frame. The end point is exclusive.
# For example iloc[4 : 7, 3 : 14] would return the rows 4, 5 and 6; the columns from 3 up to but not including 14.
new_dataframe["Time Frame"] = dates[dates_to_indexes[time_frame]] # Modifying the "timeframe" column
value_1 = during_the_pandemic_data.iloc[industries_and_indexes[industry], dates_to_indexes[time_frame] + 1]
# Getting the necessary value from during_the_pandemic_data
value_2 = pre_pandemic_data.iloc[industries_and_indexes[industry], dates_to_indexes[time_frame] + 1]
# Getting the necessary value from pre_pandemic_data
new_dataframe["Pre-Pandemic Value"] = value_2
new_dataframe["Value During the Pandemic"] = value_1
differences = round(value_1 - value_2, 2)
new_dataframe["Increase- Decrease"] = differences
different_percentage = str(round(100 * ((value_1 - value_2) / value_2), 2)) + '%'
new_dataframe["Percentage of Increase - Decrease"] = different_percentage
# Getting a percentage
return new_dataframe
def create_table_value(df: p.DataFrame) -> list[tuple[str, float]]:
"""Returns the value of a DataFrame into a list of tuples
>>> values = create_dataframe('Agriculture', 'January 2019- 20')
>>> create_table_value(values)
[('Industry', 'Agriculture'),\
('Time Frame', 'January 2019- 20'),\
('Pre-Pandemic Value', 40.2),\
('Value During the Pandemic', 38.9),\
('Increase- Decrease', -1.3),\
('Percentage of Increase - Decrease', '-3.23%')]
"""
df_to_dict = df.to_dict()
data_list = []
for item in df_to_dict:
data_list.append((item, df_to_dict[item]['values']))
return data_list
def create_table(industry: str, time_frame: str) -> Figure:
"""Creates the table with the data
>>> create_table('Agriculture', 'January 2019- 20')
<Figure size 1600x1200 with 1 Axes>
"""
data = create_table_value(create_dataframe(industry, time_frame))
values = [data[i][0] for i in range(len(data))]
numerics = [str(data[i][1]) for i in range(len(data))]
figure, tables = plot.subplots(facecolor='#73C2FB')
tables.set_axis_off()
table = tables.table(
cellText=[numerics],
rowLabels=None,
colLabels=values,
loc='upper center'
)
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1.25, 1.5)
figure.set_size_inches(16, 12)
tables.set_title('Difference in Percentage Pre and During Pandemic')
plot.box(on=None)
return figure
if __name__ == '__main__':
import doctest
doctest.testmod()
import python_ta
import python_ta.contracts
python_ta.contracts.DEBUG_CONTRACTS = False
python_ta.contracts.check_all_contracts()
python_ta.check_all(config={
'extra-imports': ['python_ta.contracts', 'pandas', 'matplotlib.pyplot', 'project_part_1',
'matplotlib.figure'],
'max-line-length': 121,
'max-nested-blocks': 4,
'disable': ['R1705', 'C0200']
}
)