diff --git a/requirements.txt b/requirements.txt index 2666c38c..4df335bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ lxml>=4.1.1 requests>=2.20.0 +html5lib==1.1 diff --git a/safaribooks.py b/safaribooks.py index 1d23bee3..ebe91957 100755 --- a/safaribooks.py +++ b/safaribooks.py @@ -593,9 +593,11 @@ def get_html(self, url): ) root = None + html_text = response.text try: - root = html.fromstring(response.text, base_url=SAFARI_BASE_URL) - + if not re.search("