opencivicdata · paultag · Feb 3, 2015 · Dec 26, 2014 · Dec 26, 2014 · Dec 28, 2014
diff --git a/chicago/bills.py b/chicago/bills.py
@@ -1,13 +1,28 @@
 from .legistar import LegistarScraper
 import lxml
 import lxml.etree
+import datetime
+import pytz
 
-from pupa.scrape import Bill
+from pupa.scrape import Bill, Vote
 
 
 class ChicagoBillScraper(LegistarScraper):
     base_url = 'https://chicago.legistar.com/'
     legislation_url = 'https://chicago.legistar.com/Legislation.aspx'
+    timezone = "US/Central"
+
+    def session(self, action_date) :
+        if action_date < datetime.datetime(2011, 5, 18, 
+                                           tzinfo=pytz.timezone(self.timezone)) :
+            return "2007"
+        elif action_date < datetime.datetime(2015, 5, 18,
+                                             tzinfo=pytz.timezone(self.timezone)) :
+            return "2011"
+        else :
+            return "2015"
+
+
 
     def searchLegislation(self, search_text='', created_before=None,
                           created_after=None, num_pages = None):
@@ -64,136 +79,177 @@ def parseSearchResults(self, page) :
                 continue
             legislation_url = legislation[id_key]['url'].split(self.base_url)[-1]
             legislation[id_key] = legislation_id
-            legislation['URL'] = self.base_url + legislation_url.split('&Options')[0]
+            legislation['url'] = self.base_url + legislation_url.split('&Options')[0]
 
             yield legislation
 
-    def expandLegislationSummary(self, summary):
-        """
-        Take a row as given from the searchLegislation method and retrieve the
-        details of the legislation summarized by that row.
-        """
-        return self.expandSummaryRow(summary,
-                                     self.parseLegislationDetail)
-
-    def expandHistorySummary(self, action):
-        """
-        Take a row as given from the parseLegislationDetail method and
-        retrieve the details of the history event summarized by that
-        row.
-        """
-        return self.expandSummaryRow(action,
-                                     self.parseHistoryDetail)
-
-    def expandSummaryRow(self, row, parse_function):
-        """
-        Take a row from a data table and use the URL value from that row to
-        retrieve more details. Parse those details with parse_function.
-        """
-        print(row['URL'])
-        page = self.lxmlize(row['URL'])
-
-        return parse_function(page)
-
-    def _get_general_details(self, detail_div) :
-        """
-        Parse the data in the top section of a detail page.
-        """
-        key_query = ".//span[contains(@id, 'ctl00_ContentPlaceHolder1_lbl') "\
-                    "        and not(contains(@id, '2'))]"
-
-        value_query = ".//*[(contains(@id, 'ctl00_ContentPlaceHolder1_lbl') "\
-                      "     or contains(@id, 'ctl00_ContentPlaceHolder1_hyp')) "\
-                      "     and contains(@id, '2')]"
-
-
-        keys = [span.text_content().replace(':', '').strip()
-                for span
-                in detail_div.xpath(key_query)]
-
-        values = [element.text_content().strip()
-                  for element
-                  in detail_div.xpath(value_query)]
-
-        return dict(zip(keys, values))
-
-
-    def parseLegislationDetail(self, page):
-        """
-        Take a legislation detail page and return a dictionary of
-        the different data appearing on the page
-
-        Example URL: http://chicago.legistar.com/LegislationDetail.aspx?ID=1050678&GUID=14361244-D12A-467F-B93D-E244CB281466&Options=ID|Text|&Search=zoning
-        """
-
-        # Pull out the top matter
-        detail_div = page.xpath("//div[@id='ctl00_ContentPlaceHolder1_pageDetails']")[0]
-        details = self._get_general_details(detail_div)
-
-        details[u'Attachments'] = []
-
-        attachment_url = detail_div.xpath(".//span[@id='ctl00_ContentPlaceHolder1_lblAttachments2']/a")
-
-        for attachment in attachment_url :
-            details[u'Attachments'].append({
-                'url' : attachment.attrib['href'],
-                'label' : attachment.text_content()})
-
-        if u'Related files' in details :
-            details[u'Related files'] = details[u'Related files'].split(',')
-
-        if u'Sponsors' in details :
-            details[u'Sponsors'] = details[u'Sponsors'].split(',')
-
-        if u'Topics' in details :
-            details[u'Topics'] = details[u'Topics'].split(',')
-
-        return details
-
 
     def scrape(self):
-        self.session = '2011'
 
         for i, page in enumerate(self.searchLegislation()) :
             for legislation_summary in self.parseSearchResults(page) :
                 title = legislation_summary['Title'].strip()
                 if title == "":
                     continue
 
-                bill = Bill(name=legislation_summary['Record #'],
-                            session=self.session,
+                if legislation_summary['Type'].lower() in ('order', 
+                                                           'claim', 
+                                                           'communication', 
+                                                           'report', 
+                                                           'oath of office') :
+                    continue
+                else :
+                    bill_type = legislation_summary['Type'].lower()
+
+                bill_session = self.session(legislation_summary['Intro\xa0Date'])
+
+                bill = Bill(identifier=legislation_summary['Record #'],
+                            legislative_session=bill_session,
                             title=title,
-                            type=[legislation_summary['Type'].lower()],
-                            organization=self.jurisdiction.name)
+                            classification=bill_type,
+                            from_organization=self.jurisdiction.name)
+
+                bill.add_source(legislation_summary['url'])
 
-                bill.add_source(legislation_summary['URL'])
+                bill, votes = self.addDetails(bill, legislation_summary['url'])
 
-                legislation_details = self.expandLegislationSummary(legislation_summary)
+                yield bill
+                for vote in votes :
+                    yield vote
+
 
-                for related_bill in legislation_details.get('Related files', []) :
-                    bill.add_related_bill(name = related_bill,
-                                          session = self.session,
-                                          relation='other-session',
-                                          chamber=None)
 
-                for i, sponsor in enumerate(legislation_details.get('Sponsors', [])) :
-                    if i == 0 :
-                        primary = True
-                        sponsorship_type = "Primary"
-                    else :
-                        primary = False
-                        sponsorship_type = "Regular"
+    def extractVotes(self, action_detail_url) :
+        action_detail_page = self.lxmlize(action_detail_url)
+        vote_table = action_detail_page.xpath("//table[@id='ctl00_ContentPlaceHolder1_gridVote_ctl00']")[0]
+        votes = list(self.parseDataTable(vote_table))
+        vote_list = []
+        for vote, _, _ in votes :
+            raw_option = vote['Vote'].lower()
+            vote_list.append((VOTE_OPTIONS.get(raw_option, raw_option), 
+                              vote['Person Name']['label']))
 
-                    bill.add_sponsor(sponsor, sponsorship_type,
-                                     'person', primary)
+        action_detail_div = action_detail_page.xpath(".//div[@id='ctl00_ContentPlaceHolder1_pageTop1']")[0]
+        action_details = self.parseDetails(action_detail_div)
+        result = action_details['Result'].lower()
 
-                for subject in legislation_details.get(u'Topics', []) :
-                    bill.add_subject(subject)
+        return result, vote_list
 
-                for attachment in legislation_details.get(u'Attachments', []) :
-                    bill.add_version_link('PDF',
-                                          attachment['url'],
-                                          mimetype="application/pdf")
 
 
-                yield bill
+    def addBillHistory(self, bill, history_table) :
+        all_votes = []
+
+        history = self.parseDataTable(history_table)
+
+        for action, _, _ in history :
+            action_description = action['Action']
+            try :
+                action_date =  action['Date'].date().isoformat()
+            except AttributeError : # https://chicago.legistar.com/LegislationDetail.aspx?ID=1424866&GUID=CEC53337-B991-4268-AE8A-D4D174F8D492
+                continue
+
+            if action_description :
+                bill.add_action(action_description,
+                                action_date,
+                                organization=action['Action\xa0By'],
+                                classification=ACTION_CLASSIFICATION[action_description])
+                if 'url' in action['Action\xa0Details'] :
+                    action_detail_url = action['Action\xa0Details']['url']
+                    result, votes = self.extractVotes(action_detail_url)
+
+                    if votes and result : # see https://github.com/datamade/municipal-scrapers-us/issues/15
+                        action_vote = Vote(legislative_session=bill.legislative_session, 
+                                           motion_text=action_description,
+                                           classification=None,
+                                           start_date=action_date,
+                                           result=result,
+                                           bill=bill.identifier)
+                        action_vote.add_source(action_detail_url)
+                        for option, voter in votes :
+                            action_vote.vote(option, voter)
+
+                        all_votes.append(action_vote)
+
+
+        return all_votes
+
+
+    def addDetails(self, bill, detail_url) :
+        detail_page = self.lxmlize(detail_url)
+        detail_div = detail_page.xpath(".//div[@id='ctl00_ContentPlaceHolder1_pageDetails']")[0]
+
+        legislation_details = self.parseDetails(detail_div)
+
+
+        for related_bill in legislation_details.get('Related files', []) :
+            bill.add_related_bill(identifier = related_bill['label'],
+                                  legislative_session = bill.legislative_session,
+                                  relation_type='pending')
+
+        for i, sponsor in enumerate(legislation_details.get('Sponsors', [])) :
+            if i == 0 :
+                primary = True
+                sponsorship_type = "Primary"
+            else :
+                primary = False
+                sponsorship_type = "Regular"
+
+            bill.add_sponsorship(sponsor['label'], sponsorship_type,
+                                 'person', primary)
+
+        if u'Topics' in legislation_details :
+            for subjuct in legislation_details[u'Topics'].split(',') :
+                bill.add_subject(subject)
+
+        for attachment in legislation_details.get(u'Attachments', []) :
+            if attachment['label'] :
+                bill.add_version_link(attachment['label'],
+                                      attachment['url'],
+                                      media_type="application/pdf")
+
+        history_table = detail_page.xpath("//table[@id='ctl00_ContentPlaceHolder1_gridLegislation_ctl00']")[0]
+
+
+        votes = self.addBillHistory(bill, history_table)
+
+        return bill, votes
+
+
+
+
+ACTION_CLASSIFICATION = {'Referred' : 'committee-referral',
+                         'Re-Referred' : 'committee-referral',
+                         'Recommended to Pass' : 'committee-passage-favorable',
+                         'Passed as Substitute' : 'passage',
+                         'Adopted' : 'passage',
+                         'Approved' : 'passage',
+                         'Passed'  : 'passage',
+                         'Substituted in Committee' : 'substitution',
+                         'Failed to Pass' : 'failure',
+                         'Recommended Do Not Pass' : 'committee-passage-unfavorable',
+                         'Amended in Committee' : 'amendment-passage',
+                         'Placed on File' : 'filing',
+                         'Withdrawn' : 'withdrawal',
+                         'Signed by Mayor' : 'executive-signature',
+                         'Appointment' : 'appointment',
+                         'Direct Introduction' : None,
+                         'Remove Co-Sponsor(s)' : None,
+                         'Add Co-Sponsor(s)' : None,
+                         'Tabled' : None,
+                         'Rules Suspended - Immediate Consideration' : None,
+                         'Committee Discharged' : None,
+                         'Held in Committee' : None,
+                         'Recommended for Re-Referral' : None,
+                         'Published in Special Pamphlet' : None,
+                         'Adopted as Substitute' : None,
+                         'Deferred and Published' : None,
+}
+
+VOTE_OPTIONS = {'yea' : 'yes',
+                'rising vote' : 'yes',
+                'nay' : 'no',
+                'recused' : 'excused'}
+
+
+