-
Notifications
You must be signed in to change notification settings - Fork 0
/
reactions.py
193 lines (176 loc) · 4.96 KB
/
reactions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import pandas as pd
def split_reactions_file(path_to_csv):
"""Clean up and split the reactions file into separate tables
This will return a dictionary containing pandas tables:
- The reactions
- The questionnaire responses
- The demographic portion of the questionnaire
- The political portion of the questionnaire
"""
a = pd.read_csv(path_to_csv)
a.columns = [c.strip() for c in """
UserID
Reaction
Time
how_watching_25
economy_priority_10
health_care_party_12
foreign_policy_party_13
abortion_party_14
economy_party_15
health_care_priority_7
foreign_policy_priority_8
abortion_priority_9
interested_23
news_sources_24
gender_16
age_17
family_income_18
race_19
religion_20
christian_21
state_22
tv_channel_26
economy_candidate_27
foreign_policy_candidate_28
candidate_preferred_29
candidate_choice_3
confidence_in_choice_4
likely_to_vote_5
political_views_2
ready
immigration_priority_6
immigration_party_11
party_1
next
""".split('\n') if not c.strip()=='']
a = a.reindex(columns=[c.strip() for c in """
UserID
Time
Reaction
party_1
political_views_2
candidate_choice_3
confidence_in_choice_4
likely_to_vote_5
immigration_priority_6
health_care_priority_7
foreign_policy_priority_8
abortion_priority_9
economy_priority_10
immigration_party_11
health_care_party_12
foreign_policy_party_13
abortion_party_14
economy_party_15
gender_16
age_17
family_income_18
race_19
religion_20
christian_21
state_22
interested_23
news_sources_24
how_watching_25
tv_channel_26
economy_candidate_27
foreign_policy_candidate_28
candidate_preferred_29
ready
next
""".split('\n') if not c.strip()==''])
a['Reaction_who'] = a.Reaction.str.split(':').str.get(0)
a['Reaction_what'] = a.Reaction.str.split(':').str.get(1)
a['Time'] = pd.to_datetime(a['Time'])
r = a[['UserID','Time','Reaction_who','Reaction_what']]
q = a[[c.strip() for c in """
UserID
party_1
political_views_2
candidate_choice_3
confidence_in_choice_4
likely_to_vote_5
immigration_priority_6
health_care_priority_7
foreign_policy_priority_8
abortion_priority_9
economy_priority_10
immigration_party_11
health_care_party_12
foreign_policy_party_13
abortion_party_14
economy_party_15
gender_16
age_17
family_income_18
race_19
religion_20
christian_21
state_22
interested_23
news_sources_24
how_watching_25
tv_channel_26
economy_candidate_27
foreign_policy_candidate_28
candidate_preferred_29
ready
next
""".split('\n') if not c.strip()=='']]
q = q.drop_duplicates()
d = q[[c.strip() for c in """
UserID
gender_16
age_17
family_income_18
race_19
religion_20
christian_21
state_22
""".split('\n') if not c.strip()=='']]
p = q[[c.strip() for c in """
UserID
party_1
political_views_2
candidate_choice_3
confidence_in_choice_4
likely_to_vote_5
immigration_priority_6
health_care_priority_7
foreign_policy_priority_8
abortion_priority_9
economy_priority_10
immigration_party_11
health_care_party_12
foreign_policy_party_13
abortion_party_14
economy_party_15
interested_23
news_sources_24
economy_candidate_27
foreign_policy_candidate_28
candidate_preferred_29
""".split('\n') if not c.strip()=='']]
return {'reactions':r, 'questionnaire':q, 'quest_demographic':d, 'quest_political':p}
def link_reactions_to_transcript(path_to_reactions_file, path_to_transcript_file, truncate_after='2:33'):
"""Return a table with reactions data next to transcript entries
This will return a table with an entry for each reaction and columns
from the reaction data, plus the most recent statement from the transcript.
It removes reactions before the debate stated (about 1/2 hour) and after
the debate ended (about 1/2 hour).
"""
parts = split_reactions_file(path_to_reactions_file)
r = parts['reactions']
r['start'] = r.Time.apply(lambda t: pd.datetime.time(t)) # A col to merge on
c = pd.read_csv(path_to_transcript_file)
c['start'] = pd.to_datetime(c["Sync'd start"]).apply(lambda t: pd.datetime.time(t))
c['statement'] = range(len(c))
m = c.append(r) # merge on 'state' col
m = m.sort(columns='start') # sort on 'state'
m = m.fillna(method='ffill') # give last transcript info to subsequent reactions
m = m.dropna() # remove reactions without transcript info
m = m[m.start < pd.datetime.time(pd.to_datetime(truncate_after))] # remove reactions after the debate
m['turn'] = (m.Speaker.shift(1) != m.Speaker).astype(int).cumsum() # identify turns
m['Speaker_name'] = m.Speaker.apply(lambda s: {0:'Moderator', 1:'Romney', 2:'Obama'}[s])
return m