Skip to content

Commit

Permalink
Add proxy config to prefect flows
Browse files Browse the repository at this point in the history
  • Loading branch information
VincentAntoine committed Mar 24, 2021
1 parent 7ff7c07 commit 6239f04
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
10 changes: 10 additions & 0 deletions datascience/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,17 @@

ROOT_DIRECTORY = Path(__file__).parent
LIBRARY_LOCATION = ROOT_DIRECTORY / "src"

#
ERS_FILES_LOCATION = Path("/opt2/monitorfish-data/ers")

# Proxies for pipeline flows requiring Internet access
PROXIES = {
"http": "http://172.27.229.197:8090",
"https": "http://172.27.229.197:8090",
}

# URLs to fetch data from
PORTS_URL = (
"https://www.data.gouv.fr/fr/datasets/r/60fe965d-5888-493b-9321-24bc3b1f84db"
)
8 changes: 4 additions & 4 deletions datascience/src/pipeline/flows/ports.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from dotenv import load_dotenv
from prefect import Flow, Parameter, task

from config import LIBRARY_LOCATION, PORTS_URL
from config import LIBRARY_LOCATION, PORTS_URL, PROXIES
from src.db_config import create_engine
from src.pipeline.processing import combine_overlapping_columns
from src.pipeline.utils import delete
Expand Down Expand Up @@ -411,8 +411,8 @@ def load_ports(ports):


@task(checkpoint=False)
def extract_datagouv_ports(ports_url: str = PORTS_URL):
r = requests.get(ports_url)
def extract_datagouv_ports(ports_url: str = PORTS_URL, proxies: dict = None):
r = requests.get(ports_url, proxies=proxies)
f = io.StringIO(r.text)

dtype = {
Expand Down Expand Up @@ -467,5 +467,5 @@ def load_ports_to_monitorfish(ports):


with Flow("Extract ports from data.gouv.fr and load to Monitorfish") as flow:
ports = extract_datagouv_ports()
ports = extract_datagouv_ports(ports_url=PORTS_URL, proxies=PROXIES)
load_ports_to_monitorfish(ports)

0 comments on commit 6239f04

Please sign in to comment.