Skip to content

Commit

Permalink
chore: Use Scraper.get instead of requests.get
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Sep 21, 2024
1 parent db3825c commit a7d4dd2
Show file tree
Hide file tree
Showing 10 changed files with 10 additions and 25 deletions.
4 changes: 1 addition & 3 deletions ca_mb_winnipeg/people.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import json

import requests

from utils import CanadianPerson as Person
from utils import CanadianScraper

Expand All @@ -12,7 +10,7 @@ class WinnipegPersonScraper(CanadianScraper):
def scrape(self):
# from https://data.winnipeg.ca/Council-Services/Council-Data/r4tk-7dip/about_data
api_url = "https://data.winnipeg.ca/resource/r4tk-7dip.json"
data = json.loads(requests.get(api_url).content)
data = json.loads(self.get(api_url).content)
assert len(data), "No councillors found via API"

page = self.lxmlize(COUNCIL_PAGE)
Expand Down
4 changes: 1 addition & 3 deletions ca_nb_moncton/people.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json
from collections import defaultdict

import requests

from utils import CanadianPerson as Person
from utils import CanadianScraper

Expand All @@ -13,7 +11,7 @@
class MonctonPersonScraper(CanadianScraper):
def scrape(self):
seat_numbers = defaultdict(int)
data = json.loads(requests.get(API_URL).content)["features"]
data = json.loads(self.get(API_URL).content)["features"]
assert len(data), "No councillors found"

for item in data:
Expand Down
4 changes: 1 addition & 3 deletions ca_on_caledon/people.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import re

import requests

from utils import CanadianPerson as Person
from utils import CanadianScraper

Expand Down Expand Up @@ -33,7 +31,7 @@ def scrape(self):

# phone numbers populated by JS request
contact_num = page.xpath('//div[@class="contactBody"]/div/@id')[0].replace("contactEntry_", "")
contact_data = requests.get(
contact_data = self.get(
f"https://www.caledon.ca//Modules/Contact/services/GetContactHTML.ashx?isMobile=false&param={contact_num}&lang=en"
).text
voice = re.findall(r"(?<=tel://)\d+(?=\">)", contact_data)
Expand Down
3 changes: 1 addition & 2 deletions ca_on_chatham_kent/people.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
from collections import defaultdict

import requests
from lxml import etree

from utils import CanadianPerson as Person
Expand All @@ -19,7 +18,7 @@ def scrape(self):
headers = {"content-type": "text/xml"}
body = '<soap:Envelope xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"><soap:Body><GetListItems xmlns="http://schemas.microsoft.com/sharepoint/soap/"><listName>councillorsByWard</listName><viewName></viewName><query><Query><OrderBy Override="TRUE"><FieldRef Ascending="True" Name="Title" /></OrderBy></Query></query><viewFields><ViewFields Properties="True" /></viewFields><rowLimit>50</rowLimit><queryOptions><QueryOptions></QueryOptions></queryOptions></GetListItems></soap:Body></soap:Envelope>'

response = requests.post(url=COUNCIL_DATA_URL, data=body, headers=headers)
response = self.post(url=COUNCIL_DATA_URL, data=body, headers=headers)
page = etree.fromstring(response.content) # noqa: S320
namespace = {"z": "#RowsetSchema", "rs": "urn:schemas-microsoft-com:rowset"}

Expand Down
4 changes: 1 addition & 3 deletions ca_on_windsor/people.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import json

import requests

from utils import CanadianPerson as Person
from utils import CanadianScraper

Expand All @@ -12,7 +10,7 @@ class WindsorPersonScraper(CanadianScraper):
def scrape(self):
page = self.lxmlize(COUNCIL_PAGE)
data_url = page.xpath('//comment()[contains(., "SITE JS")]/following-sibling::script/@src')[0]
data = json.loads(requests.get(data_url).text.split(" = ")[1])
data = json.loads(self.get(data_url).text.split(" = ")[1])
nav_items = []
for item in data:
if item["RollupType"] == "SidebarNavigation":
Expand Down
4 changes: 1 addition & 3 deletions ca_qc_brossard/people.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json
import re

import requests

from utils import CanadianPerson as Person
from utils import CanadianScraper

Expand Down Expand Up @@ -33,7 +31,7 @@ def get_children(parent_id, element_dict):
return return_list

# The whole page is rendered in javascript and stored as a massive json object
page = requests.get(DATA_PAGE)
page = self.get(DATA_PAGE)
page = json.loads(page.content)
containers = page["content"].values()
for container in containers:
Expand Down
3 changes: 1 addition & 2 deletions disabled/ca_nl_municipalities/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import subprocess
import tempfile

import requests
from pupa.scrape import Organization

from utils import CanadianPerson as Person
Expand All @@ -17,7 +16,7 @@ def scrape(self):
page = self.lxmlize(COUNCIL_PAGE)
url = page.xpath('//a[contains(text(),"Municipal Directory")]/@href')[0]

response = requests.get(url).content
response = self.get(url).content
with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf:
pdf.write(response)

Expand Down
3 changes: 1 addition & 2 deletions disabled/ca_ns_municipalities/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import subprocess
import tempfile

import requests
from pupa.scrape import Organization

from utils import CanadianPerson as Person
Expand All @@ -14,7 +13,7 @@

class NovaScotiaMunicipalitiesPersonScraper(CanadianScraper):
def scrape(self):
response = requests.get(COUNCIL_PAGE).content
response = self.get(COUNCIL_PAGE).content
with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf:
pdf.write(response)

Expand Down
3 changes: 1 addition & 2 deletions disabled/ca_sk_municipalities/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import subprocess
import tempfile

import requests
from pupa.scrape import Organization

from utils import CanadianPerson as Person
Expand All @@ -15,7 +14,7 @@

class SaskatchewanMunicipalitiesPersonScraper(CanadianScraper):
def scrape(self):
response = requests.get(COUNCIL_PAGE).read()
response = self.get(COUNCIL_PAGE).read()
with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf:
pdf.write(response)

Expand Down
3 changes: 1 addition & 2 deletions disabled/ca_yt_municipalities/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import subprocess
import tempfile

import requests
from pupa.scrape import Organization

from utils import CanadianPerson as Person
Expand All @@ -14,7 +13,7 @@

class YukonMunicipalitiesPersonScraper(CanadianScraper):
def scrape(self):
response = requests.get(COUNCIL_PAGE).content
response = self.get(COUNCIL_PAGE).content
with tempfile.NamedTemporaryFile(delete_on_close=False) as pdf:
pdf.write(response)

Expand Down

0 comments on commit a7d4dd2

Please sign in to comment.