diff --git a/atproto.py b/atproto.py index bc19ebfb..c790ea82 100644 --- a/atproto.py +++ b/atproto.py @@ -879,9 +879,17 @@ def fetch_blob(url, blob_field, name, check_size=True, check_type=True): for facet in ret.get('facets', []): if feats := facet.get('features'): if feats[0]['$type'] == 'app.bsky.richtext.facet#link': - if link := web.Web.load(feats[0]['uri'], metaformats=True, + try: + link = web.Web.load(feats[0]['uri'], metaformats=True, authorship_fetch_mf2=False, - raise_=False): + raise_=False) + except AssertionError as e: + # we probably have an Object already stored for this URL + # with source_protocol that's not web + logger.warning(e) + continue + + if link: if img := util.get_url(link.as1, 'image'): props = appview.defs['app.bsky.embed.external#external']['properties'] fetch_blob(img, props, name='thumb', diff --git a/tests/test_atproto.py b/tests/test_atproto.py index 85c2ba94..87dad606 100644 --- a/tests/test_atproto.py +++ b/tests/test_atproto.py @@ -1535,6 +1535,49 @@ def test_send_note_first_link_preview_embed(self, _, __): }, }, repo.get_record('app.bsky.feed.post', last_tid), ignore=['facets']) + @patch.object(tasks_client, 'create_task', return_value=Task(name='my task')) + @patch('requests.get', side_effect=[ + requests_response(f'A poast', + url='http://orig.co/inal'), + ]) + def test_send_note_link_preview_non_web_url(self, mock_get, mock_create_task): + user = self.make_user_and_repo() + + obj = Object(id='fake:post', source_protocol='fake', our_as1={ + **NOTE_AS, + 'content': 'My original post', + }) + self.assertTrue(ATProto.send(obj, 'https://bsky.brid.gy')) + + # check repo, record + did = user.key.get().get_copy(ATProto) + repo = self.storage.load_repo(did) + last_tid = arroba.util.int_to_tid(arroba.util._tid_ts_last) + self.assert_equals({ + **NOTE_BSKY, + 'bridgyOriginalText': 'My original post', + }, repo.get_record('app.bsky.feed.post', last_tid), ignore=['facets']) + + @patch.object(tasks_client, 'create_task', return_value=Task(name='my task')) + @patch('requests.get') + def test_send_note_link_preview_blocklisted_domain(self, mock_get, __): + user = self.make_user_and_repo() + + obj = Object(id='fake:post', source_protocol='fake', our_as1={ + **NOTE_AS, + 'content': 'My original post', + }) + self.assertTrue(ATProto.send(obj, 'https://bsky.brid.gy')) + + # check repo, record + did = user.key.get().get_copy(ATProto) + repo = self.storage.load_repo(did) + last_tid = arroba.util.int_to_tid(arroba.util._tid_ts_last) + self.assert_equals({ + **NOTE_BSKY, + 'bridgyOriginalText': 'My original post', + }, repo.get_record('app.bsky.feed.post', last_tid), ignore=['facets']) + @patch.object(tasks_client, 'create_task', return_value=Task(name='my task')) @patch('requests.get', side_effect=[ requests_response(f"""\