From a6fcfd5924d9da1246021cd4ee8ef9cf5502c8b1 Mon Sep 17 00:00:00 2001 From: StephanAkkerman Date: Thu, 16 Nov 2023 16:19:37 +0100 Subject: [PATCH] Catch parse tweet bugs Is there not a library for this? --- src/util/parse_tweet.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/src/util/parse_tweet.py b/src/util/parse_tweet.py index c9e79e5a..6996e2bd 100644 --- a/src/util/parse_tweet.py +++ b/src/util/parse_tweet.py @@ -113,16 +113,18 @@ def parse_tweet(tweet: dict, update_tweet_id: bool = False): # Media media = [] media_types = [] - if "extended_entities" in tweet["legacy"].keys(): - if "media" in tweet["legacy"]["extended_entities"].keys(): - media = [ - image["media_url_https"] - for image in tweet["legacy"]["extended_entities"]["media"] - ] - # photo, video - media_types = [ - image["type"] for image in tweet["legacy"]["extended_entities"]["media"] - ] + if "legacy" in tweet.keys(): + if "extended_entities" in tweet["legacy"].keys(): + if "media" in tweet["legacy"]["extended_entities"].keys(): + media = [ + image["media_url_https"] + for image in tweet["legacy"]["extended_entities"]["media"] + ] + # photo, video + media_types = [ + image["type"] + for image in tweet["legacy"]["extended_entities"]["media"] + ] # Remove t.co url from text text = remove_twitter_url_at_end(tweet["legacy"]["full_text"])