Skip to content

Commit

Permalink
ATProto.send: generate external embeds for link previews, take 3
Browse files Browse the repository at this point in the history
one last time with feeling. for #1411
  • Loading branch information
snarfed committed Dec 1, 2024
1 parent bc3885a commit 36256ac
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 31 deletions.
55 changes: 33 additions & 22 deletions atproto.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from google.cloud import ndb
import googleapiclient.discovery
from granary import as1, bluesky
from granary.bluesky import Bluesky, FROM_AS1_TYPES
from granary.bluesky import Bluesky, FROM_AS1_TYPES, _to_external_embed
from granary.source import html_to_text, INCLUDE_LINK, Source
from lexrpc import Client, ValidationError
from requests import RequestException
Expand All @@ -53,6 +53,7 @@
import ids
from models import Follower, Object, PROTOCOLS, Target, User
from protocol import Protocol
import web

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -824,27 +825,21 @@ def _convert(cls, obj, fetch_blobs=False, from_user=None):

obj_as1 = obj.as1

# generate link preview attachment for first link in content, if any
# Source.postprocess_object(
# (as1.get_object(obj_as1) if obj_as1.get('objectType') == 'activity'
# else obj_as1),
# first_link_to_attachment=True)
def fetch_blob(url, blob_field, name, check_size=True, check_type=True):
if url and url not in blobs:
max_size = blob_field[name].get('maxSize') if check_size else None
accept = blob_field[name].get('accept') if check_type else None
try:
blob = AtpRemoteBlob.get_or_create(
url=url, get_fn=util.requests_get, max_size=max_size,
accept_types=accept)
blobs[url] = blob.as_object()
except (RequestException, ValidationError) as e:
logger.info(f'failed, skipping {url} : {e}')

blobs = {} # maps str URL to dict blob object
if fetch_blobs:
def fetch_blob(url, blob_field, name, check_size=True, check_type=True):
if url and url not in blobs:
max_size = blob_field[name].get('maxSize') if check_size else None
accept = blob_field[name].get('accept') if check_type else None
try:
blob = AtpRemoteBlob.get_or_create(
url=url, get_fn=util.requests_get, max_size=max_size,
accept_types=accept)
blobs[url] = blob.as_object()
except (RequestException, ValidationError) as e:
logger.info(f'failed, skipping {url} : {e}')

for o in obj_as1, as1.get_object(obj_as1):
for o in obj.as1, as1.get_object(obj.as1):
for url in util.get_urls(o, 'image'):
# TODO: maybe eventually check size and type? the current
# 1MB limit feels too small though, and the AppView doesn't
Expand All @@ -865,25 +860,41 @@ def fetch_blob(url, blob_field, name, check_size=True, check_type=True):
fetch_blob(url, props, name='thumb',
check_size=False, check_type=False)

inner_obj = as1.get_object(obj_as1) or obj_as1
inner_obj = as1.get_object(obj.as1) or obj.as1
orig_url = as1.get_url(inner_obj) or inner_obj.get('id')

# convert! using our records in the datastore and fetching code instead
# of granary's
client = DatastoreClient(f'https://{os.environ["APPVIEW_HOST"]}')
as_embed = obj.atom or obj.rss
try:
ret = bluesky.from_as1(cls.translate_ids(obj_as1), blobs=blobs,
ret = bluesky.from_as1(cls.translate_ids(obj.as1), blobs=blobs,
client=client, original_fields_prefix='bridgy',
as_embed=as_embed)
except (ValueError, RequestException):
logger.info(f"Couldn't convert to ATProto", exc_info=True)
return {}

# if there are any links, generate an external embed as a preview
# for the first link
if ret.get('$type') == 'app.bsky.feed.post' and not ret.get('embed'):
for facet in ret.get('facets', []):
if feats := facet.get('features'):
if feats[0]['$type'] == 'app.bsky.richtext.facet#link':
if link := web.Web.load(feats[0]['uri'], metaformats=True,
authorship_fetch_mf2=False,
raise_=False):
if img := util.get_url(link.as1, 'image'):
props = appview.defs['app.bsky.embed.external#external']['properties']
fetch_blob(img, props, name='thumb',
check_size=False, check_type=False)
ret['embed'] = _to_external_embed(link.as1, blobs=blobs)
break

if from_proto != ATProto:
if ret['$type'] == 'app.bsky.actor.profile':
# populated by Protocol.convert
if orig_summary := obj_as1.get('bridgyOriginalSummary'):
if orig_summary := obj.as1.get('bridgyOriginalSummary'):
ret['bridgyOriginalDescription'] = orig_summary
else:
# don't use granary's since it will include source links
Expand Down
13 changes: 6 additions & 7 deletions tests/test_atproto.py
Original file line number Diff line number Diff line change
Expand Up @@ -1475,7 +1475,6 @@ def test_send_note_existing_repo(self, mock_create_task):

mock_create_task.assert_called() # atproto-commit

@skip
@patch.object(tasks_client, 'create_task', return_value=Task(name='my task'))
@patch('requests.get', side_effect=[
requests_response(f"""\
Expand All @@ -1486,15 +1485,15 @@ def test_send_note_existing_repo(self, mock_create_task):
<meta property="og:title" content="Titull" />
<meta property="og:description" content="Descrypshun" />
</head>
</html>""", url='http://orig/inal'),
</html>""", url='http://orig.co/inal'),
requests_response('blob contents', content_type='image/png'),
])
def test_send_note_first_link_to_attachment(self, _, __):
def test_send_note_first_link_preview_embed(self, _, __):
user = self.make_user_and_repo()

obj = Object(id='fake:post', source_protocol='fake', our_as1={
**NOTE_AS,
'content': 'My <a href="http://orig/inal">original</a> post',
'content': 'My <a href="http://orig.co/inal">original</a> post',
})
self.assertTrue(ATProto.send(obj, 'https://bsky.brid.gy'))

Expand All @@ -1504,14 +1503,14 @@ def test_send_note_first_link_to_attachment(self, _, __):
last_tid = arroba.util.int_to_tid(arroba.util._tid_ts_last)
self.assertEqual({
**NOTE_BSKY,
'bridgyOriginalText': 'My <a href="http://orig/inal">original</a> post',
'bridgyOriginalText': 'My <a href="http://orig.co/inal">original</a> post',
'embed': {
'$type': 'app.bsky.embed.external',
'external': {
'$type': 'app.bsky.embed.external#external',
'description': 'Descrypshun',
'title': 'Titull',
'uri': 'http://orig/inal',
'uri': 'http://orig.co/inal',
'thumb': {
'$type': 'blob',
'mimeType': 'image/png',
Expand All @@ -1525,7 +1524,7 @@ def test_send_note_first_link_to_attachment(self, _, __):
'index': {'byteStart': 3, 'byteEnd': 11},
'features': [{
'$type': 'app.bsky.richtext.facet#link',
'uri': 'http://orig/inal',
'uri': 'http://orig.co/inal',
}],
}],
}, repo.get_record('app.bsky.feed.post', last_tid))
Expand Down
2 changes: 0 additions & 2 deletions tests/test_integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,8 +506,6 @@ def test_activitypub_follow_bsky_bot_bad_username_error(self, mock_get):
requests_response(PROFILE_GETRECORD), # alice profile
requests_response(DID_DOC), # alice DID
requests_response(PROFILE_GETRECORD), # alice profile
# TODO: uncomment for link preview embeds
# requests_response(''), # fed.brid.gy/docs for preview embed
requests_response({ # getConvoForMembers
'convo': {
'id': 'convo123',
Expand Down

0 comments on commit 36256ac

Please sign in to comment.