Skip to content

Commit

Permalink
Support the SHUB_APIURL and SHUB_STORAGE environment variables (#177)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio authored Dec 16, 2024
1 parent 6354a96 commit 2fd8f21
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 14 deletions.
7 changes: 3 additions & 4 deletions docs/quickstart.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,14 @@ it provides better response time and improved bandwidth usage::
Basic usage
-----------

Instantiate a new client with your Scrapinghub API key::
Instantiate a new client with your Scrapy Cloud API key::

>>> from scrapinghub import ScrapinghubClient
>>> apikey = '84c87545607a4bc0****************' # your API key as a string
>>> client = ScrapinghubClient(apikey)

.. note::
Your Scrapinghub API key is available at https://app.scrapinghub.com/account/apikey
after you sign up with the service.
.. note:: Your Scrapy Cloud API key is available at the bottom of
https://app.zyte.com/o/settings after you sign up.

List your deployed projects::

Expand Down
13 changes: 8 additions & 5 deletions scrapinghub/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,20 @@ def request(self, *args, **kwargs):


class ScrapinghubClient(object):
"""Main class to work with Scrapinghub API.
"""Main class to work with the Scrapy Cloud API.
:param auth: (optional) Scrapinghub APIKEY or other SH auth credentials.
If not provided, it will read, respectively, from
:param auth: (optional) Scrapy Cloud API key or other Scrapy Cloud auth
credentials. If not provided, it will read, respectively, from
``SH_APIKEY`` or ``SHUB_JOBAUTH`` environment variables.
``SHUB_JOBAUTH`` is available by default in *Scrapy Cloud*, but it does
not provide access to all endpoints (e.g. job scheduling), but it is allowed
to access job data, collections, crawl frontier.
If you need full access to *Scrapy Cloud* features, you'll need to
provide a Scrapinghub APIKEY through this argument or deploying ``SH_APIKEY``.
:param dash_endpoint: (optional) Scrapinghub Dash panel url.
provide a Scrapy Cloud API key through this argument or deploying
``SH_APIKEY``.
:param dash_endpoint: (optional) Scrapy Cloud API URL.
If not provided, it will be read from the ``SHUB_APIURL`` environment
variable, or fall back to ``"https://app.zyte.com/api/"``.
:param kwargs: (optional) Additional arguments for
:class:`~scrapinghub.hubstorage.HubstorageClient` constructor.
Expand Down
6 changes: 4 additions & 2 deletions scrapinghub/hubstorage/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
High level Hubstorage client
"""
import logging
import os

from requests import session, HTTPError, ConnectionError, Timeout
from retrying import Retrying
from .utils import xauth, urlpathjoin
Expand Down Expand Up @@ -71,14 +73,14 @@ def __init__(self, auth=None, endpoint=None, connection_timeout=None,
Args:
auth (str): The client authentication token
endpoint (str): The API root address
endpoint (str, optional): The API root address. If not provided, it will be read from the ``SHUB_STORAGE`` environment variable, or fall back to ``"https://storage.scrapinghub.com/"``.
connection_timeout (int): The connection timeout for a _single request_
max_retries (int): The number of time idempotent requests may be retried
max_retry_time (int): The time, in seconds, during which the client can retry a request
use_msgpack (bool): Flag to enable/disable msgpack use for serialization
"""
self.auth = xauth(auth)
self.endpoint = endpoint or self.DEFAULT_ENDPOINT
self.endpoint = endpoint or os.getenv("SHUB_STORAGE", self.DEFAULT_ENDPOINT)
self.connection_timeout = connection_timeout or self.DEFAULT_CONNECTION_TIMEOUT_S
self.user_agent = user_agent or self.DEFAULT_USER_AGENT
self.session = self._create_session()
Expand Down
4 changes: 2 additions & 2 deletions scrapinghub/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class Connection(object):
"""Main class to access Scrapinghub API.
"""

DEFAULT_ENDPOINT = 'https://app.scrapinghub.com/api/'
DEFAULT_ENDPOINT = 'https://app.zyte.com/api/'

API_METHODS = {
'addversion': 'scrapyd/addversion',
Expand Down Expand Up @@ -66,7 +66,7 @@ def __init__(self, apikey=None, password='', _old_passwd='',
warnings.warn("A lot of endpoints support authentication only via apikey.", stacklevel=2)
self.apikey = apikey
self.password = password or ''
self.url = url or self.DEFAULT_ENDPOINT
self.url = url or os.getenv("SHUB_APIURL", self.DEFAULT_ENDPOINT)
self._session = self._create_session()
self._connection_timeout = connection_timeout

Expand Down
2 changes: 1 addition & 1 deletion tests/legacy/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


def test_connection_class_attrs():
assert Connection.DEFAULT_ENDPOINT == 'https://app.scrapinghub.com/api/'
assert Connection.DEFAULT_ENDPOINT == 'https://app.zyte.com/api/'
assert isinstance(Connection.API_METHODS, dict)


Expand Down

0 comments on commit 2fd8f21

Please sign in to comment.