Improved error handling + new model

close #495
StephanAkkerman · Feb 12, 2024 · a3899d7 · a3899d7
1 parent 29ede31
commit a3899d7
Show file tree

Hide file tree

Showing 9 changed files with 109 additions and 105 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,7 +7,7 @@ models/*
 yield.png
 src/cogs/loops/option_alert.py
 .vscode/
-data/
+data/*
 scraper.py
 scraped/*
 logs/*

diff --git a/config.yaml b/config.yaml
@@ -116,16 +116,6 @@ LOOPS:
     FOREX:
       ENABLED: True
 
-  LISTINGS:
-    ENABLED: True
-    CHANNEL: 🆕┃listings
-
-    DELISTINGS:
-      ENABLED: True
-      CHANNEL: ⛔┃delistings
-
-    EXCHANGES: ['kucoin', 'binance', 'coinbase']
-
   LIQUIDATIONS:
     ENABLED: True
     CHANNEL: 💸┃liquidations
@@ -140,6 +130,10 @@ LOOPS:
     STOCKS:
       ENABLED: True
 
+  NEW_LISTINGS:
+    ENABLED: True
+    CHANNEL: 🆕┃listings
+
   NFTS:
     ENABLED: True
 

diff --git a/requirements.txt b/requirements.txt
@@ -10,7 +10,6 @@ tradingview-ta==3.3.0
 aiohttp>=3.8.0
 asyncpraw==7.7.1
 ccxt==4.1.76
-nltk==3.8.1
 transformers==4.35.0
 matplotlib==3.8.2
 scipy==1.11.4

diff --git a/src/cogs/loops/liquidations.py b/src/cogs/loops/liquidations.py
@@ -34,7 +34,7 @@ def __init__(self, bot: commands.Bot) -> None:
             )
             self.post_liquidations.start()
 
-    async def get_df(self):
+    async def get_df(self) -> pd.DataFrame:
         data = await get_json_data(
             "https://open-api.coinglass.com/public/v2/liquidation_history?time_type=all&symbol=all",
             headers={
@@ -43,6 +43,10 @@ async def get_df(self):
             },
         )
 
+        if "data" not in data:
+            print("Could not get liquidation data from coinglass")
+            return pd.DataFrame()
+
         df = pd.DataFrame(data["data"])
 
         df.rename(
@@ -69,6 +73,10 @@ async def post_liquidations(self):
 
         # Process dataframe
         df = await self.get_df()
+
+        if df is None or df.empty:
+            return
+
         df_price = df[["price"]].copy()
         df_without_price = df.drop("price", axis=1)
         df_without_price["Shorts"] = df_without_price["Shorts"] * -1

diff --git a/src/cogs/loops/nfts.py b/src/cogs/loops/nfts.py
@@ -374,37 +374,43 @@ async def top_cmc():
     nfts = []
 
     session = AsyncHTMLSession()
-    r = await session.get("https://coinmarketcap.com/nft/collections/")
-    rows = r.html.find("tbody tr")
-
-    for row in rows:
-        d = {}
-        columns = row.find("td")
-
-        if len(columns) < 6:
-            continue
 
-        if columns[1].find("div", first=True) is not None:
-            url = columns[1].find("a", first=True)
-            if url:
-                url = url.attrs["href"]
-
-            name_and_net = columns[1].find("span")
-            name = name_and_net[0].text.strip()
-            volume_and_change = columns[2].text.split("\n\n")
-            avg_price_and_change = columns[5].text.split("\n\n")
-            change = avg_price_and_change[1].replace("%", "")
-
-            if change != "-":
-                price = f"{avg_price_and_change[0]} ({format_change(float(change))})"
-            else:
-                price = avg_price_and_change[0]
-
-            d["symbol"] = f"[{name}]({url})"
-            d["volume"] = volume_and_change[0]
-            d["price"] = price
-
-            nfts.append(d)
+    try:
+        r = await session.get("https://coinmarketcap.com/nft/collections/")
+        rows = r.html.find("tbody tr")
+
+        for row in rows:
+            d = {}
+            columns = row.find("td")
+
+            if len(columns) < 6:
+                continue
+
+            if columns[1].find("div", first=True) is not None:
+                url = columns[1].find("a", first=True)
+                if url:
+                    url = url.attrs["href"]
+
+                name_and_net = columns[1].find("span")
+                name = name_and_net[0].text.strip()
+                volume_and_change = columns[2].text.split("\n\n")
+                avg_price_and_change = columns[5].text.split("\n\n")
+                change = avg_price_and_change[1].replace("%", "")
+
+                if change != "-":
+                    price = (
+                        f"{avg_price_and_change[0]} ({format_change(float(change))})"
+                    )
+                else:
+                    price = avg_price_and_change[0]
+
+                d["symbol"] = f"[{name}]({url})"
+                d["volume"] = volume_and_change[0]
+                d["price"] = price
+
+                nfts.append(d)
+    except Exception as e:
+        print("Error in top_cmc:", e)
 
     await session.close()
     return pd.DataFrame(nfts)

diff --git a/src/cogs/loops/trending.py b/src/cogs/loops/trending.py
@@ -107,6 +107,9 @@ async def crypto(self) -> None:
     async def crypto_categories(self) -> None:
         df = await get_top_categories()
 
+        if df is None or df.empty:
+            return
+
         # Only use top 10
         df = df.head(10)
 

diff --git a/src/main.py b/src/main.py
@@ -105,9 +105,10 @@ def load_folder(foldername: str) -> None:
     # Start by loading the database
     bot.load_extension("util.db")
 
-    # Ensure the logs directory exists
+    # Ensure the all directories exist
     os.makedirs("logs", exist_ok=True)
     os.makedirs("temp", exist_ok=True)
+    os.makedirs("data", exist_ok=True)
 
     # Load commands
     load_folder("commands")
@@ -119,6 +120,10 @@ def load_folder(foldername: str) -> None:
         else os.getenv("DISCORD_TOKEN")
     )
 
+    if not TOKEN:
+        print("No Discord token found. Exiting...")
+        sys.exit(1)
+
     # Main event loop
     try:
         bot.loop.run_until_complete(bot.run(TOKEN))

diff --git a/src/util/cg_data.py b/src/util/cg_data.py
@@ -206,9 +206,11 @@ async def get_coin_info(
     # Return the information
     return (
         total_vol,
-        f"https://coingecko.com/en/coins/{id}"
-        if id
-        else "https://coingecko.com/en/coins/id_not_found",
+        (
+            f"https://coingecko.com/en/coins/{id}"
+            if id
+            else "https://coingecko.com/en/coins/id_not_found"
+        ),
         exchanges,
         price,
         format_change(change) if change else "N/A",
@@ -240,6 +242,10 @@ async def get_trending_coins() -> pd.DataFrame:
     try:
         table = soup.find("table")
 
+        if table is None:
+            print("Error getting trending coingecko coins")
+            return pd.DataFrame()
+
         # Try converting the table to pandas
         df = pd.read_html(StringIO(str(table)))[0]
 
@@ -283,13 +289,17 @@ async def get_trending_coins() -> pd.DataFrame:
         return pd.DataFrame()
 
 
-async def get_top_categories():
+async def get_top_categories() -> pd.DataFrame | None:
     html = scraper.get("https://www.coingecko.com/en/categories").text
 
     soup = BeautifulSoup(html, "html.parser")
 
     table = soup.find("table")
 
+    if table is None:
+        print("Error getting top categories from CoinGecko")
+        return
+
     data = []
     for tr in table.find_all("tr")[1:]:
         coin_data = {}

diff --git a/src/util/sentiment_analyis.py b/src/util/sentiment_analyis.py
@@ -4,23 +4,31 @@
 
 # > Third party libraries
 import discord
-from transformers import BertTokenizer, BertForSequenceClassification, pipeline
-import nltk
-import numpy as np
-from nltk.sentiment.vader import SentimentIntensityAnalyzer
-
+from transformers import AutoTokenizer, BertForSequenceClassification, pipeline
 
 # Load model
-try:
-    finbert = BertForSequenceClassification.from_pretrained('./models')
-    tokenizer = BertTokenizer.from_pretrained("yiyanghkust/finbert-tone")
-    nlp = pipeline("text-classification", model=finbert, tokenizer=tokenizer)
-    use_finbert = True
-except Exception as e:
-    use_finbert = False
-    print("Did not load premium model...")
-
-def classify_sentiment(text: str) -> tuple[str,str]:
+model = BertForSequenceClassification.from_pretrained(
+    "StephanAkkerman/FinTwitBERT-sentiment",
+    num_labels=3,
+    id2label={0: "NEUTRAL", 1: "BULLISH", 2: "BEARISH"},
+    label2id={"NEUTRAL": 0, "BULLISH": 1, "BEARISH": 2},
+    cache_dir="models/",
+)
+model.config.problem_type = "single_label_classification"
+tokenizer = AutoTokenizer.from_pretrained(
+    "StephanAkkerman/FinTwitBERT-sentiment", cache_dir="models/"
+)
+model.eval()
+pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
+
+label_to_emoji = {
+    "NEUTRAL": "🦆",
+    "BULLISH": "🐂",
+    "BEARISH": "🐻",
+}
+
+
+def classify_sentiment(text: str) -> tuple[str, str]:
     """
     Uses the text of a tweet to classify the sentiment of the tweet.
 
@@ -34,25 +42,16 @@ def classify_sentiment(text: str) -> tuple[str,str]:
     np.ndarray
         The probability of the tweet being bullish, neutral, or bearish.
     """
-
-    pred = nlp(text)[0]
-    label = pred['label']
-
-    if label == "Positive":
-        label = "🐂 - Bullish"
-        emoji = "🐂"
-    elif label == "Neutral":
-        label = "🦆 - Neutral"
-        emoji = "🦆"
-    elif label == "Negative":
-        label = "🐻 - Bearish"
-        emoji = "🐻"
-
-    #score = round(score*100, 2)
-
+
+    label = pipe(text)[0].get("label")
+    emoji = label_to_emoji[label]
+
+    label = f"{emoji} - {label.capitalize()}"
+
     return label, emoji
 
-def add_sentiment(e : discord.Embed, text: str) -> tuple[discord.Embed, str]:
+
+def add_sentiment(e: discord.Embed, text: str) -> tuple[discord.Embed, str]:
     """
     Adds sentiment to a discord embed, based on the given text.
 
@@ -71,34 +70,14 @@ def add_sentiment(e : discord.Embed, text: str) -> tuple[discord.Embed, str]:
         str
             The sentiment of the tweet.
     """
-    
+
     # Remove quote tweet formatting
-    if use_finbert:
-        prediction, emoji = classify_sentiment(text.split('\n\n> [@')[0])
-    else:
-        try:
-            analyzer = SentimentIntensityAnalyzer()
-            sentiment = analyzer.polarity_scores(text)
-        except LookupError:
-            # Download the NLTK packages
-            nltk.download("vader_lexicon")
-
-            # Try again
-            analyzer = SentimentIntensityAnalyzer()
-            sentiment = analyzer.polarity_scores(text)
-
-        neg = sentiment['neg']
-        neu = sentiment['neu']
-        pos = sentiment['pos']
-
-        # Pick the highest value
-        prediction = ['🐻 - Bearish', '🦆 - Neutral', '🐂 - Bullish'][np.argmax([neg, neu, pos])]
-        emoji = prediction[0]
-
+    prediction, emoji = classify_sentiment(text.split("\n\n> [@")[0])
+
     e.add_field(
         name="Sentiment",
         value=f"{prediction}",
         inline=False,
     )
-    
-    return e, emoji
+
+    return e, emoji