-
Notifications
You must be signed in to change notification settings - Fork 14
/
Makefile
173 lines (141 loc) · 8.16 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
.PHONY: download clean import run
DATABASE_NAME?=disclosure-backend
CSV_PATH?=downloads/csv
CD := $(shell pwd)
WGET=bin/wget-wrapper --no-verbose --tries=3
clean-spreadsheets:
rm -rf downloads/csv/*.csv downloads/csv/office_elections.csv downloads/csv/measure_committees.csv downloads/csv/elections.csv
clean:
rm -rf downloads/raw downloads/csv
process: process.rb
# todo: remove RUBYOPT variable when activerecord fixes deprecation warnings
echo 'delete from calculations;'| psql $(DATABASE_NAME)
rm -rf build && RUBYOPT="-W:no-deprecated -W:no-experimental" bundle exec ruby process.rb
bin/report-schema $(DATABASE_NAME)
bin/create-digests
bin/report-candidates
git --no-pager diff build/digests.json
download-spreadsheets: downloads/csv/candidates.csv downloads/csv/committees.csv \
downloads/csv/referendums.csv downloads/csv/name_to_number.csv \
downloads/csv/office_elections.csv downloads/csv/elections.csv
download-cached:
$(WGET) -O- "https://s3-us-west-2.amazonaws.com/odca-data-cache/$(shell \
git log --author 'OpenDisclosure Deploybot' -n1 --pretty=format:%aI | cut -d"T" -f1 \
).tar.gz" | tar xz
upload-cache:
mkdir -p downloads/cached-db/
pg_dump $(DATABASE_NAME) > downloads/cached-db/$(DATABASE_NAME).sql
tar czf - downloads/csv downloads/static downloads/cached-db \
| aws s3 cp - s3://odca-data-cache/$(shell date +%Y-%m-%d).tar.gz --acl public-read
download: download-spreadsheets \
download-COAK-2014 download-COAK-2015 download-COAK-2016 \
download-COAK-2017 download-COAK-2018 \
download-COAK-2019 download-COAK-2020 \
download-COAK-2021 download-COAK-2022 \
download-COAK-2023 download-COAK-2024
download-SFO-%:
mkdir -p downloads/raw
$(WGET) http://public.netfile.com/pub2/excel/SFOBrowsable/efile_SFO_$(subst download-SFO-,,$@).zip -O \
downloads/raw/efile_SFO_$(subst download-SFO-,,$@).zip
unzip -p downloads/raw/efile_SFO_$(subst download-SFO-,,$@).zip > downloads/raw/efile_SFO_$(subst download-SFO-,,$@).xlsx
ruby ssconvert.rb downloads/raw/efile_SFO_$(subst download-SFO-,,$@).xlsx 'downloads/csv/efile_SFO_$(subst download-SFO-,,$@)_%{sheet}.csv'
download-COAK-%:
mkdir -p downloads/raw
$(WGET) http://public.netfile.com/pub2/excel/COAKBrowsable/efile_newest_COAK_$(subst download-COAK-,,$@).zip -O \
downloads/raw/efile_COAK_$(subst download-COAK-,,$@).zip
unzip -p downloads/raw/efile_COAK_$(subst download-COAK-,,$@).zip > downloads/raw/efile_COAK_$(subst download-COAK-,,$@).xlsx
ruby ssconvert.rb downloads/raw/efile_COAK_$(subst download-COAK-,,$@).xlsx 'downloads/csv/efile_COAK_$(subst download-COAK-,,$@)_%{sheet}.csv'
download-BRK-%:
ruby ssconvert.rb downloads/static/efile_BRK_$(subst download-BRK-,,$@).xlsx 'downloads/csv/efile_BRK_$(subst download-BRK-,,$@)_%{sheet}.csv'
import: recreatedb
$(MAKE) do-import-spreadsheets
$(MAKE) import-data
import-cached: recreatedb
cat downloads/cached-db/$(DATABASE_NAME).sql | psql $(DATABASE_NAME)
import-spreadsheets: do-import-spreadsheets
./bin/make_view
do-import-spreadsheets:
echo 'DROP TABLE IF EXISTS candidates CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) candidates
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/candidates.csv
echo 'ALTER TABLE "candidates" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
./bin/remove-whitespace $(DATABASE_NAME) candidates Candidate
./bin/remove-whitespace $(DATABASE_NAME) candidates Committee_Name
./bin/remove-whitespace $(DATABASE_NAME) candidates Facebook
./bin/remove-whitespace $(DATABASE_NAME) candidates Instagram
./bin/remove-whitespace $(DATABASE_NAME) candidates Twitter
./bin/remove-whitespace $(DATABASE_NAME) candidates Bio
echo 'DROP TABLE IF EXISTS referendums CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) referendums
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/referendums.csv
echo 'ALTER TABLE "referendums" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
./bin/remove-whitespace $(DATABASE_NAME) referendums Short_Title
./bin/remove-whitespace $(DATABASE_NAME) referendums Summary
echo 'DROP TABLE IF EXISTS name_to_number CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) name_to_number
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/name_to_number.csv
echo 'DROP TABLE IF EXISTS committees CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) committees
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference $(CSV_PATH)/committees.csv
echo 'ALTER TABLE "committees" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
./bin/remove-whitespace $(DATABASE_NAME) committees Filer_NamL
echo 'DROP TABLE IF EXISTS office_elections CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) office_elections
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference downloads/csv/office_elections.csv
echo 'ALTER TABLE "office_elections" ALTER COLUMN title TYPE varchar(50);' | psql $(DATABASE_NAME)
echo 'ALTER TABLE "office_elections" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
echo 'DROP TABLE IF EXISTS elections CASCADE;' | psql $(DATABASE_NAME)
./bin/create-table $(DATABASE_NAME) $(CSV_PATH) elections
csvsql --db postgresql:///$(DATABASE_NAME) --insert --no-create --no-inference downloads/csv/elections.csv
echo 'ALTER TABLE "elections" ADD COLUMN id SERIAL PRIMARY KEY;' | psql $(DATABASE_NAME)
import-data: 496 497 A-Contributions B1-Loans B2-Loans C-Contributions \
D-Expenditure E-Expenditure F-Expenses F461P5-Expenditure F465P3-Expenditure \
F496P3-Contributions G-Expenditure H-Loans I-Contributions Summary
echo 'CREATE TABLE IF NOT EXISTS "calculations" (id SERIAL PRIMARY KEY, subject_id integer, subject_type varchar(30), name varchar(40), value jsonb);' | psql $(DATABASE_NAME)
./bin/remove_duplicate_transactions
./bin/make_view
recreatedb:
dropdb $(DATABASE_NAME) || true
createdb $(DATABASE_NAME) --lc-collate=C --template=template0
reindex:
ruby search_index.rb
496 497 A-Contributions B1-Loans B2-Loans C-Contributions D-Expenditure E-Expenditure F-Expenses F461P5-Expenditure F465P3-Expenditure F496P3-Contributions G-Expenditure H-Loans I-Contributions Summary:
DATABASE_NAME=$(DATABASE_NAME) ./bin/import-file $(CSV_PATH) $@
downloads/csv/candidates.csv:
mkdir -p downloads/csv downloads/raw
$(WGET) -O- \
'https://docs.google.com/spreadsheets/d/e/2PACX-1vRZNbqOzI3TlelO3OSh7QGC1Y4rofoRPs0TefWDLJvleFkaXq_6CSWgX89HfxLYrHhy0lr4QqUEryuc/pub?gid=0&single=true&output=csv' | \
sed -e '1s/ /_/g' | \
sed -e '1s/[^a-zA-Z,_]//g' > $@
downloads/csv/office_elections.csv:
mkdir -p downloads/csv downloads/raw
$(WGET) -O- \
'https://docs.google.com/spreadsheets/d/e/2PACX-1vRZNbqOzI3TlelO3OSh7QGC1Y4rofoRPs0TefWDLJvleFkaXq_6CSWgX89HfxLYrHhy0lr4QqUEryuc/pub?gid=585313505&single=true&output=csv' | \
sed -e '1s/ /_/g' | \
sed -e '1s/[^a-zA-Z,_]//g' > $@
downloads/csv/referendums.csv:
mkdir -p downloads/csv downloads/raw
$(WGET) -O- \
'https://docs.google.com/spreadsheets/d/e/2PACX-1vRZNbqOzI3TlelO3OSh7QGC1Y4rofoRPs0TefWDLJvleFkaXq_6CSWgX89HfxLYrHhy0lr4QqUEryuc/pub?gid=608094632&single=true&output=csv' | \
sed -e '1s/ /_/g' | \
sed -e '1s/[^a-zA-Z,_]//g' > $@
downloads/csv/name_to_number.csv:
mkdir -p downloads/csv
$(WGET) -O- \
'https://docs.google.com/spreadsheets/d/e/2PACX-1vRZNbqOzI3TlelO3OSh7QGC1Y4rofoRPs0TefWDLJvleFkaXq_6CSWgX89HfxLYrHhy0lr4QqUEryuc/pub?gid=102954444&single=true&output=csv' | \
sed -e '1s/ /_/g' | \
sed -e '1s/[^a-zA-Z,_]//g' > $@
downloads/csv/committees.csv:
mkdir -p downloads/csv
$(WGET) -O- \
'https://docs.google.com/spreadsheets/d/e/2PACX-1vRZNbqOzI3TlelO3OSh7QGC1Y4rofoRPs0TefWDLJvleFkaXq_6CSWgX89HfxLYrHhy0lr4QqUEryuc/pub?gid=1015408103&single=true&output=csv' | \
sed -e '1s/ /_/g' | \
sed -e '1s/[^a-zA-Z,_]//g' > $@
downloads/csv/elections.csv:
mkdir -p downloads/csv
$(WGET) -O- \
'https://docs.google.com/spreadsheets/d/1vJR8GR5Bk3bUQXziPiQe7to1O-QEm-_5GfD7hPjp-Xc/pub?gid=2138925841&single=true&output=csv' | \
sed -e '1s/ /_/g' | \
sed -e '1s/[^a-zA-Z,_]//g' > $@
run:
bundle exec ruby server.rb