diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2aea3241..5f6d4c99 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ ci: autoupdate_schedule: quarterly skip: [pip-compile] default_language_version: - python: python3.10 + python: python3.11 repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.6.9 diff --git a/ca_qc_kirkland/people.py b/ca_qc_kirkland/people.py index 3f0bab4b..6b64b6e5 100644 --- a/ca_qc_kirkland/people.py +++ b/ca_qc_kirkland/people.py @@ -8,9 +8,18 @@ class KirklandPersonScraper(CanadianScraper): def scrape(self): - page = self.lxmlize(COUNCIL_PAGE) + def decode_email(e): + de = "" + k = int(e[:2], 16) - councillors = page.xpath('//div[@class="container_content"]//tbody/tr') + for i in range(2, len(e) - 1, 2): + de += chr(int(e[i : i + 2], 16) ^ k) + + return de + + page = self.lxmlize(COUNCIL_PAGE, "iso-8859-1") + + councillors = page.xpath('//table/tbody[not(@id)]/tr/td[@valign="top"]') assert len(councillors), "No councillors found" for councillor in councillors: if councillor == councillors[0]: @@ -23,19 +32,24 @@ def scrape(self): name = councillor.xpath(".//strong/text()")[0] + # Using self.get_phone does not include the extension # phone = ( councillor.xpath('.//div[contains(text(), "#")]/text()')[0] .replace("T ", "") .replace(" ", "-") - .replace(".", ",") # correcting a typo + .replace(".", ",") .replace(",-#-", " x") ) - email = self.get_email(councillor) + encrypted_email = councillor.xpath('.//@href[contains(., "email")]')[0].split("#")[1] + email = decode_email(encrypted_email) + # cloudflare encrypts the email data + email = councillor.xpath(".//div/*/*/@href | .//div/*/@href | .//@href")[0] + decoded_email = decode_email(email.split("#", 1)[1]) p = Person(primary_org="legislature", name=name, district=district, role=role) p.add_source(COUNCIL_PAGE) p.add_contact("voice", phone, "legislature") - p.add_contact("email", email) + p.add_contact("email", decoded_email) image = councillor.xpath(".//img/@src") if image: p.image = image[0] diff --git a/requirements.txt b/requirements.txt index 080af981..9ec2ad3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -69,8 +69,6 @@ sqlparse==0.5.1 # via django text-unidecode==1.3 # via python-slugify -typing-extensions==4.12.2 - # via asgiref unidecode==0.4.14 # via -r requirements.in urllib3==1.26.20