WorkVis/project_part_2.py at main · FakeDeepLearner/WorkVis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
"""
This module contains the code required to operate the second part of the project
"""
import matplotlib.pyplot as plot
import pandas as p
from matplotlib.figure import Figure
import project_part_1 as proj1


industries_and_indexes = proj1.industry_and_its_index  # The dictionary corresponding to the industries

pre_pandemic_data = p.read_csv('Datasets/pre_pandemic.csv')
during_the_pandemic_data = p.read_csv('Datasets/during_the_pandemic (1).csv')
# (Change the input string if the file names or the folder names are different.)


def create_dataframe(industry: str, time_frame: str) -> p.DataFrame:
    """
    Create and return a new pandas dataframe

    Preconditions:
        - timeframe in ["January 2019- 20", "February 2019- 20", "March 2019- 20",
                "April 2019- 20", "May 2019- 20", "June 2019- 20",
               "July 2019- 20", "August 2019- 20", "September 2019-r 2019- 2 20",
                "October 2019- 20", "November 2019- 20", "Decemebe0"]

        - industry in proj1.industry_and_its_index
    """
    # Creating a new DataFrame where rows and columns are 0-indexed by default
    new_dataframe = p.DataFrame(columns=["Industry", "Time Frame", "Pre-Pandemic Value", "Value During the Pandemic",
                                         "Increase- Decrease", "Percentage of Increase - Decrease"], index=["values"])

    industry_name = industry.split(' ')
    industry_name_fixed = [industry_name[0]]
    # Accumulator for the new industry name so that the first alphabet of each word is capitalized,
    # index 0 doesn't count because its first letter is capitalized by default.

    for i in range(1, len(industry_name)):
        if industry_name[i][0].islower() and industry_name[i] != 'and':
            industry_name_fixed.append(industry_name[i].replace(industry_name[i][0],
                                                                industry_name[i][0].capitalize(), 1))
        else:
            industry_name_fixed.append(industry_name[i])

    # If the first alphabet of the word is lower cased then change it to upper case,
    # the first word of the industry should always have an uppercase letter

    if len(industry) > 30:
        industry_name_fixed.insert(len(industry_name) // 2, '\n')

    new_industry_name = ' '.join(industry_name_fixed)

    new_dataframe["Industry"] = new_industry_name

    dates_to_indexes = {"January 2019- 20": 0, "February 2019- 20": 1,
                        "March 2019- 20": 2, "April 2019- 20": 3,
                        "May 2019- 20": 4, "June 2019- 20": 5,
                        "July 2019- 20": 6, "August 2019- 20": 7,
                        "September 2019- 20": 8, "October 2019- 20": 9,
                        "November 2019- 20": 10, "Decemeber 2019- 20": 11}

    dates = list(dates_to_indexes.keys())

    # The iloc[:, :] function allows us to access specific values from the DataFrame by their indexes.

    # The first parameter, which is the rows,  takes 2 integers separated by a colon (:)
    # and gives out that part of the data frame. The end point is exclusive.

    # The second parameter, which is the colums,  takes 2 integers separated by a colon (:)
    # and gives out that part of the data frame. The end point is exclusive.

    # For example iloc[4 : 7, 3 : 14] would return the rows 4, 5 and 6; the columns from 3 up to but not including 14.

    new_dataframe["Time Frame"] = dates[dates_to_indexes[time_frame]]  # Modifying the "timeframe" column

    value_1 = during_the_pandemic_data.iloc[industries_and_indexes[industry], dates_to_indexes[time_frame] + 1]
    # Getting the necessary value from during_the_pandemic_data
    value_2 = pre_pandemic_data.iloc[industries_and_indexes[industry], dates_to_indexes[time_frame] + 1]
    # Getting the necessary value from pre_pandemic_data

    new_dataframe["Pre-Pandemic Value"] = value_2
    new_dataframe["Value During the Pandemic"] = value_1

    differences = round(value_1 - value_2, 2)

    new_dataframe["Increase- Decrease"] = differences

    different_percentage = str(round(100 * ((value_1 - value_2) / value_2), 2)) + '%'
    new_dataframe["Percentage of Increase - Decrease"] = different_percentage
    # Getting a percentage

    return new_dataframe


def create_table_value(df: p.DataFrame) -> list[tuple[str, float]]:
    """Returns the value of a DataFrame into a list of tuples

    >>> values = create_dataframe('Agriculture', 'January 2019- 20')
    >>> create_table_value(values)
    [('Industry', 'Agriculture'),\
 ('Time Frame', 'January 2019- 20'),\
 ('Pre-Pandemic Value', 40.2),\
 ('Value During the Pandemic', 38.9),\
 ('Increase- Decrease', -1.3),\
 ('Percentage of Increase - Decrease', '-3.23%')]


    """
    df_to_dict = df.to_dict()

    data_list = []
    for item in df_to_dict:
        data_list.append((item, df_to_dict[item]['values']))

    return data_list


def create_table(industry: str, time_frame: str) -> Figure:
    """Creates the table with the data

    >>> create_table('Agriculture', 'January 2019- 20')
    <Figure size 1600x1200 with 1 Axes>
    """

    data = create_table_value(create_dataframe(industry, time_frame))
    values = [data[i][0] for i in range(len(data))]
    numerics = [str(data[i][1]) for i in range(len(data))]

    figure, tables = plot.subplots(facecolor='#73C2FB')
    tables.set_axis_off()
    table = tables.table(
        cellText=[numerics],
        rowLabels=None,
        colLabels=values,
        loc='upper center'
    )
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.25, 1.5)
    figure.set_size_inches(16, 12)

    tables.set_title('Difference in Percentage Pre and During Pandemic')

    plot.box(on=None)

    return figure


if __name__ == '__main__':
    import doctest
    doctest.testmod()

    import python_ta
    import python_ta.contracts
    python_ta.contracts.DEBUG_CONTRACTS = False
    python_ta.contracts.check_all_contracts()
    python_ta.check_all(config={
        'extra-imports': ['python_ta.contracts', 'pandas', 'matplotlib.pyplot', 'project_part_1',
                        'matplotlib.figure'],
        'max-line-length': 121,
        'max-nested-blocks': 4,
        'disable': ['R1705', 'C0200']
    }
    )