Skip to content

Commit

Permalink
Fixed long tweet support
Browse files Browse the repository at this point in the history
close #474
  • Loading branch information
StephanAkkerman committed Apr 15, 2024
1 parent 1f78a7a commit 00e9ff9
Showing 1 changed file with 62 additions and 40 deletions.
102 changes: 62 additions & 40 deletions src/util/parse_tweet.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def save_errored_tweet(tweet, error_msg: str):

def parse_tweet(tweet: dict, update_tweet_id: bool = False):
reply = None
is_long_tweet = False

## TODO: split the below logic up into functions

Expand All @@ -83,8 +84,14 @@ def parse_tweet(tweet: dict, update_tweet_id: bool = False):
)
return
# For long tweets
elif "note_results" in tweet.keys():
tweet = tweet["note_results"]["note_tweet_results"]
elif "note_tweet" in tweet.keys():
is_long_tweet = True
tweet_text = tweet["note_tweet"]["note_tweet_results"]["result"]["text"]
tweet_entities = tweet["note_tweet"]["note_tweet_results"]["result"][
"entity_set"
]
media = tweet["note_tweet"]["note_tweet_results"]["result"]["media"]
tweet_id = tweet["rest_id"]

try:
tweet = tweet["result"]
Expand All @@ -93,23 +100,24 @@ def parse_tweet(tweet: dict, update_tweet_id: bool = False):
return

# Ignore Tweets that are older than the latest tweet
if "legacy" not in tweet:
if "tweet" not in tweet:
save_errored_tweet(tweet, "Error getting tweet key in parse_tweet()")
return

tweet_id = int(tweet["tweet"]["rest_id"])
else:
tweet_id = int(tweet["legacy"]["id_str"])
if not is_long_tweet:
if "legacy" not in tweet:
if "tweet" not in tweet:
save_errored_tweet(tweet, "Error getting tweet key in parse_tweet()")
return

if "core" not in tweet:
if "tweet" in tweet:
tweet = tweet["tweet"]
tweet_id = int(tweet["tweet"]["rest_id"])
else:
save_errored_tweet(
tweet, "Error getting [core][tweet] key in parse_tweet()"
)
return
tweet_id = int(tweet["legacy"]["id_str"])

if "core" not in tweet:
if "tweet" in tweet:
tweet = tweet["tweet"]
else:
save_errored_tweet(
tweet, "Error getting [core][tweet] key in parse_tweet()"
)
return

# So we can use this function recursively
if update_tweet_id:
Expand All @@ -126,33 +134,47 @@ def parse_tweet(tweet: dict, update_tweet_id: bool = False):
# Media
media = []
media_types = []
if "legacy" in tweet.keys():
if "extended_entities" in tweet["legacy"].keys():
if "media" in tweet["legacy"]["extended_entities"].keys():
media = [
image["media_url_https"]
for image in tweet["legacy"]["extended_entities"]["media"]
]
# photo, video
media_types = [
image["type"]
for image in tweet["legacy"]["extended_entities"]["media"]
]

# Remove t.co url from text
text = remove_twitter_url_at_end(tweet["legacy"]["full_text"])

# Tweet url
tweet_url = f"https://twitter.com/user/status/{tweet_id}"
if not is_long_tweet:
if "legacy" in tweet.keys():
if "extended_entities" in tweet["legacy"].keys():
if "media" in tweet["legacy"]["extended_entities"].keys():
media = [
image["media_url_https"]
for image in tweet["legacy"]["extended_entities"]["media"]
]
# photo, video
media_types = [
image["type"]
for image in tweet["legacy"]["extended_entities"]["media"]
]

# Remove t.co url from text
text = remove_twitter_url_at_end(tweet["legacy"]["full_text"])

# Tickers
tickers = get_entities(tweet, "symbols")

# Hashtags
hashtags = get_entities(tweet, "hashtags")

# Tickers
tickers = get_entities(tweet, "symbols")
else:
text = tweet_text

# Hashtags
hashtags = get_entities(tweet, "hashtags")
# Is this correct?
if "inline_media" in media.keys():
media = [image["media_url_https"] for image in media["inline_media"]]
media_types = [image["type"] for image in media["inline_media"]]
tickers = tweet_entities["symbols"]
hashtags = tweet_entities["hashtags"]

quoted_status_result = tweet.get("quoted_status_result")
retweeted_status_result = tweet["legacy"].get("retweeted_status_result")
# Tweet url
tweet_url = f"https://twitter.com/user/status/{tweet_id}"

quoted_status_result = tweet.get("quoted_status_result", False)
retweeted_status_result = tweet.get("legacy", {}).get(
"retweeted_status_result", False
)

e_title = f"{user_name} tweeted"

Expand Down

0 comments on commit 00e9ff9

Please sign in to comment.