Support the SHUB_APIURL and SHUB_STORAGE environment variables (#177)

scrapinghub · Dec 16, 2024 · 2fd8f21 · 2fd8f21
1 parent 6354a96
commit 2fd8f21
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 14 deletions.
diff --git a/docs/quickstart.rst b/docs/quickstart.rst
@@ -23,15 +23,14 @@ it provides better response time and improved bandwidth usage::
 Basic usage
 -----------
 
-Instantiate a new client with your Scrapinghub API key::
+Instantiate a new client with your Scrapy Cloud API key::
 
     >>> from scrapinghub import ScrapinghubClient
     >>> apikey = '84c87545607a4bc0****************' # your API key as a string
     >>> client = ScrapinghubClient(apikey)
 
-.. note::
-    Your Scrapinghub API key is available at https://app.scrapinghub.com/account/apikey
-    after you sign up with the service.
+.. note:: Your Scrapy Cloud API key is available at the bottom of
+    https://app.zyte.com/o/settings after you sign up.
 
 List your deployed projects::
 

diff --git a/scrapinghub/client/__init__.py b/scrapinghub/client/__init__.py
@@ -27,17 +27,20 @@ def request(self, *args, **kwargs):
 
 
 class ScrapinghubClient(object):
-    """Main class to work with Scrapinghub API.
+    """Main class to work with the Scrapy Cloud API.
 
-    :param auth: (optional) Scrapinghub APIKEY or other SH auth credentials.
-        If not provided, it will read, respectively, from 
+    :param auth: (optional) Scrapy Cloud API key or other Scrapy Cloud auth
+        credentials. If not provided, it will read, respectively, from
         ``SH_APIKEY`` or ``SHUB_JOBAUTH`` environment variables.
         ``SHUB_JOBAUTH`` is available by default in *Scrapy Cloud*, but it does
         not provide access to all endpoints (e.g. job scheduling), but it is allowed
         to access job data, collections, crawl frontier.
         If you need full access to *Scrapy Cloud* features, you'll need to
-        provide a Scrapinghub APIKEY through this argument or deploying ``SH_APIKEY``.
-    :param dash_endpoint: (optional) Scrapinghub Dash panel url.
+        provide a Scrapy Cloud API key through this argument or deploying
+        ``SH_APIKEY``.
+    :param dash_endpoint: (optional) Scrapy Cloud API URL.
+        If not provided, it will be read from the ``SHUB_APIURL`` environment
+        variable, or fall back to ``"https://app.zyte.com/api/"``.
     :param kwargs: (optional) Additional arguments for
         :class:`~scrapinghub.hubstorage.HubstorageClient` constructor.
 

diff --git a/scrapinghub/hubstorage/client.py b/scrapinghub/hubstorage/client.py
@@ -2,6 +2,8 @@
 High level Hubstorage client
 """
 import logging
+import os
+
 from requests import session, HTTPError, ConnectionError, Timeout
 from retrying import Retrying
 from .utils import xauth, urlpathjoin
@@ -71,14 +73,14 @@ def __init__(self, auth=None, endpoint=None, connection_timeout=None,
 
         Args:
             auth (str): The client authentication token
-            endpoint (str): The API root address
+            endpoint (str, optional): The API root address. If not provided, it will be read from the ``SHUB_STORAGE`` environment variable, or fall back to ``"https://storage.scrapinghub.com/"``.
             connection_timeout (int): The connection timeout for a _single request_
             max_retries (int): The number of time idempotent requests may be retried
             max_retry_time (int): The time, in seconds, during which the client can retry a request
             use_msgpack (bool): Flag to enable/disable msgpack use for serialization
         """
         self.auth = xauth(auth)
-        self.endpoint = endpoint or self.DEFAULT_ENDPOINT
+        self.endpoint = endpoint or os.getenv("SHUB_STORAGE", self.DEFAULT_ENDPOINT)
         self.connection_timeout = connection_timeout or self.DEFAULT_CONNECTION_TIMEOUT_S
         self.user_agent = user_agent or self.DEFAULT_USER_AGENT
         self.session = self._create_session()

diff --git a/scrapinghub/legacy.py b/scrapinghub/legacy.py
@@ -30,7 +30,7 @@ class Connection(object):
     """Main class to access Scrapinghub API.
     """
 
-    DEFAULT_ENDPOINT = 'https://app.scrapinghub.com/api/'
+    DEFAULT_ENDPOINT = 'https://app.zyte.com/api/'
 
     API_METHODS = {
         'addversion': 'scrapyd/addversion',
@@ -66,7 +66,7 @@ def __init__(self, apikey=None, password='', _old_passwd='',
             warnings.warn("A lot of endpoints support authentication only via apikey.", stacklevel=2)
         self.apikey = apikey
         self.password = password or ''
-        self.url = url or self.DEFAULT_ENDPOINT
+        self.url = url or os.getenv("SHUB_APIURL", self.DEFAULT_ENDPOINT)
         self._session = self._create_session()
         self._connection_timeout = connection_timeout
 

diff --git a/tests/legacy/test_connection.py b/tests/legacy/test_connection.py
@@ -11,7 +11,7 @@
 
 
 def test_connection_class_attrs():
-    assert Connection.DEFAULT_ENDPOINT == 'https://app.scrapinghub.com/api/'
+    assert Connection.DEFAULT_ENDPOINT == 'https://app.zyte.com/api/'
     assert isinstance(Connection.API_METHODS, dict)