forked from alanyuma/waedd-material-design
-
Notifications
You must be signed in to change notification settings - Fork 0
/
distress.py
200 lines (178 loc) · 8.08 KB
/
distress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
A script designed to update and display the Distress page data for waedd.org/distress
By: Aaron Finocchiaro
"""
import datetime
import re
import pandas as pd
import plotly.graph_objects as go
from dateutil.relativedelta import relativedelta
from bea_data.bea_data import getData
from bls_data.bls import BlsData
from pyCensus.censusdata import censusData
def distress_table_fill_colors(df:pd.DataFrame) -> list:
"""
Takes a dataframe and determines the fill colors for the table that is going to be
created from the dataframe. It will color the column headers and the index column orange,
use white and light grey stripes for the regular columns, and highlight cells that are beyond
the threshold limit in the threshold column yellow.
"""
fill_color = ['orange']
for col in df.columns:
if col == "Threshold":
threshold_color = ['white'] #unemployment cell, will always be white
for val in list(df["Threshold"])[1:]:
if val < .8:
threshold_color.append('yellow')
elif val % 2 == 0:
threshold_color.append('lightgrey')
elif val % 2 != 0:
threshold_color.append('white')
fill_color.append(threshold_color)
else:
fill_color.append(['white', 'lightgrey']*len(df.index))
return fill_color
def distress_table(df:pd.DataFrame) -> go.Figure:
"""
Takes a pandas dataframe and constructs a plotly graph objects table based on
the dataframe.
"""
#determine the fill colors
fill_colors = distress_table_fill_colors(df)
#apply units ($ and %) to the appropriate data
df['Threshold'] = df['Threshold'].apply('{}%'.format)
df.iloc[0, df.columns != 'Threshold'] = df.iloc[0, df.columns != 'Threshold'].apply('{}%'.format)
df.loc['2019 Per Capita Money Income (5-year ACS)':, df.columns != 'Threshold'] = (
df.loc['2019 Per Capita Money Income (5-year ACS)':, df.columns != 'Threshold'].applymap('${:,.2f}'.format)
)
#create table
col_vals = [df[col].to_list() for col in df]
return go.Figure(data=[go.Table(
columnwidth=[80,50],
header=dict(values=["Criteria"] + df.columns.to_list(),
line_color="black",
fill_color="orange",
font=dict(color='black', size=12),
align='left'),
cells=dict(values=[df.index.to_list()] + col_vals,
line_color="black",
fill_color=fill_colors,
align='left'),
)])
def make_df(data:dict) -> pd.DataFrame:
"""
Makes a dataframe based on a dict of values passed in and creates the threshold column for that
data.
Arguments:
- data = dict; data to be added to dataframe
Returns dataframe
"""
df = pd.DataFrame(
data,
index=["24-month Average Unemployment Rate (BLS)",
"2019 Per Capita Money Income (5-year ACS)",
"2019 Per Capita Personal Income (BEA)"],
)
df = df.apply(pd.to_numeric)
df['Threshold'] = round(df.iloc[:,0]/df['United States'], 2)
return df
if __name__ == '__main__':
#gather BLS data for the past 3 years from most recent available month
bls_unemployment = BlsData(
["LAUST040000000000003", "LAUCN040120000000003", "LAUCN040270000000003", "LNU04000000"],
(datetime.date.today() - relativedelta(years=3)).year,
datetime.date.today().year,
)
bls_unemployment_df = bls_unemployment.clean_df(custom_column_names={"LNU04000000": "United States"})
# remove NaN entries for lines with national data and not state data (usually most recent month)
bls_unemployment_df = bls_unemployment_df.dropna()
# remove entries outside of 24 previous months
bls_unemployment_df = (
bls_unemployment_df[~(
pd.to_datetime(bls_unemployment_df.index) < pd.to_datetime(bls_unemployment_df.iloc[-1].name) -
pd.DateOffset(months=24))]
)
# gather Census ACS data
county_data = censusData(
['acs','acs5','profile'],
2019,
{
'get': "NAME,DP03_0088E",
'in' : "state:04",
'for': "county:012,027",
}
).df
county_data = county_data.set_index("NAME").transpose()
state_data = censusData(
['acs','acs5','profile'],
2019,
{
'get': "NAME,DP03_0088E",
'for': "state:04",
}
).df
state_data = state_data.set_index("NAME").transpose()
national_data = censusData(
['acs','acs5','profile'],
2019,
{
'get': "NAME,DP03_0088E",
'for': "us:1",
}
).df
national_data = national_data.set_index("NAME").transpose()
# merge census data into single dataframe
census_data = pd.concat([county_data, state_data, national_data], axis=1)
census_data = census_data.rename(columns=lambda x: re.sub(',.*', '', x))
#gather BEA data
bea_data = getData(
datasetname="Regional",
TableName="CAINC1",
method='getdata',
LineCode=3,
GeoFIPS="04027,04012,04000,00000",
Year=2019
).clean_df('TimePeriod','GeoName','DataValue')
bea_data = bea_data.rename(columns=lambda x: re.sub(',.*', ' County', x))
bea_data.iloc[0] = bea_data.replace(',','',regex=True)
#make graph
bls_graph = bls_unemployment.create_graph('24 month Unemployment Data (BLS)',
graph_type='line',
graph_labels={"date":"Date", "value": "Percent Unemployed"},
custom_column_names={"LNU04000000": "United States"})
bls_graph.update_traces(mode='markers+lines', hovertemplate='%{y}%')
bls_graph.update_layout(hovermode='x',
dragmode=False,
legend=dict(title={'text':""},yanchor="top", y=1.02, xanchor='left', x=1, font=dict(size=8)))
bls_graph.write_html("./graphs/region_distress_unemployment.html", include_plotlyjs='cdn')
#make a dataframe from the combined averages for all counties in the region
combined_data = {
'Region' : [round(bls_unemployment_df[['La Paz County', 'Yuma County']].mean().mean(), 2),
round(census_data.loc[:'DP03_0088E', 'Yuma County'].mean().mean(), 2),
round(bea_data[['La Paz County', 'Yuma County']].mean().mean(), 2)],
'Arizona' : [round(bls_unemployment_df['Arizona'].mean(), 2), census_data.loc['DP03_0088E']['Arizona'], bea_data['Arizona'][0]],
'United States' : [round(bls_unemployment_df['United States'].mean(), 2),
census_data.loc['DP03_0088E']['United States'],
bea_data['United States'][0]],
}
combined_region_df = make_df(combined_data)
#make the region distress table from the dataframe and write to html doc
combined_table = distress_table(combined_region_df)
combined_table.update_layout(height=275, margin=dict(l=0,r=0,t=0,b=0))
combined_table.write_html("./tables/region_combined_distress.html", include_plotlyjs='cdn')
# make county-based tables
for region in ['La Paz County', 'Yuma County']:
# gather data
county_data = {
region : [round(bls_unemployment_df[region].mean(), 2), census_data.loc['DP03_0088E'][region], bea_data[region][0]],
'Arizona' : [round(bls_unemployment_df['Arizona'].mean(), 2), census_data.loc['DP03_0088E']['Arizona'], bea_data['Arizona'][0]],
'United States' : [round(bls_unemployment_df['United States'].mean(), 2),
census_data.loc['DP03_0088E']['United States'],
bea_data['United States'][0]],
}
#create new df for data
county_df = make_df(county_data)
#create table and write to html doc
table = distress_table(county_df)
table.update_layout(height=275, margin=dict(l=0,r=0,t=0,b=0))
table.write_html(f"./tables/{'_'.join(region.lower().split()[:-1])}_distress.html", include_plotlyjs='cdn')