From 9f2c4ebe032c81c7dd15c8d84000cb52b26d341b Mon Sep 17 00:00:00 2001 From: Stephan Akkerman Date: Wed, 21 Feb 2024 14:15:01 +0100 Subject: [PATCH] Fixed coingecko scraper close #506 --- requirements.txt | 4 ++-- src/cogs/loops/assets.py | 2 -- src/cogs/loops/liquidations.py | 3 ++- src/main.py | 10 ++-------- src/util/cg_data.py | 21 ++++++++++++++------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/requirements.txt b/requirements.txt index f2c66007..c6fc8bdc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,5 +14,5 @@ matplotlib==3.8.2 scipy==1.11.4 brotli==1.1.0 py-cord==2.4.1 -cloudscraper==1.2.71 -python-dotenv==1.0.0 \ No newline at end of file +python-dotenv==1.0.1 +tls-client==1.0.1 \ No newline at end of file diff --git a/src/cogs/loops/assets.py b/src/cogs/loops/assets.py index f5078528..0c51558f 100644 --- a/src/cogs/loops/assets.py +++ b/src/cogs/loops/assets.py @@ -186,8 +186,6 @@ async def format_exchange( # Necessary to prevent panda warnings new_df = exchange_df.copy() - print("Exchange df:", exchange_df) - # Add stock data to the DataFrame stock_df = util.vars.assets_db[util.vars.assets_db["exchange"] == "stock"] if not stock_df.empty: diff --git a/src/cogs/loops/liquidations.py b/src/cogs/loops/liquidations.py index eab20cd4..547d0edc 100644 --- a/src/cogs/loops/liquidations.py +++ b/src/cogs/loops/liquidations.py @@ -32,7 +32,8 @@ def __init__(self, bot: commands.Bot) -> None: self.channel = get_channel( self.bot, config["LOOPS"]["LIQUIDATIONS"]["CHANNEL"] ) - self.post_liquidations.start() + # Disabled for now + # self.post_liquidations.start() async def get_df(self) -> pd.DataFrame: data = await get_json_data( diff --git a/src/main.py b/src/main.py index bf9b62e1..6720429b 100644 --- a/src/main.py +++ b/src/main.py @@ -76,11 +76,7 @@ def load_folder(foldername: str) -> None: if filename.endswith(".py") and filename in enabled_cogs: try: # Do not start timeline if the -no_timeline argument is given - if ( - filename == "timeline.py" - and len(sys.argv) > 2 - and sys.argv[2] == "-no_timeline" - ): + if filename == "timeline.py" and "-no_timeline" in sys.argv: continue # Overview.py has no setup function, but should be considered as a loop / cog @@ -110,9 +106,7 @@ def load_folder(foldername: str) -> None: # Read the token from the config TOKEN = ( - os.getenv("DEBUG_TOKEN") - if len(sys.argv) > 1 and sys.argv[1] == "-test" - else os.getenv("DISCORD_TOKEN") + os.getenv("DEBUG_TOKEN") if "-test" in sys.argv else os.getenv("DISCORD_TOKEN") ) if not TOKEN: diff --git a/src/util/cg_data.py b/src/util/cg_data.py index 844a6f41..c3ed92e1 100644 --- a/src/util/cg_data.py +++ b/src/util/cg_data.py @@ -7,7 +7,7 @@ # > Third party libraries from pycoingecko import CoinGeckoAPI -import cloudscraper +import tls_client from bs4 import BeautifulSoup import pandas as pd @@ -18,7 +18,9 @@ from util.formatting import format_change cg = CoinGeckoAPI() -scraper = cloudscraper.create_scraper() +session = tls_client.Session( + client_identifier="chrome112", random_tls_extension_order=True +) def get_crypto_info(ids): @@ -236,9 +238,11 @@ async def get_trending_coins() -> pd.DataFrame: The volumes of the trending coins. """ - html = scraper.get("https://www.coingecko.com/en/watchlists/trending-crypto").text + html = session.get( + "https://www.coingecko.com/en/highlights/trending-crypto", + ) - soup = BeautifulSoup(html, "html.parser") + soup = BeautifulSoup(html.text, "html.parser") try: table = soup.find("table") @@ -250,6 +254,9 @@ async def get_trending_coins() -> pd.DataFrame: # Try converting the table to pandas df = pd.read_html(StringIO(str(table)))[0] + # Drop first row + df = df.drop(0) + # Split the "Coin" column into "Symbol" and "Name" # The last word is the symbol, the rest is the name df["Symbol"] = df["Coin"].apply(lambda x: x.split(" ")[-1]) @@ -264,10 +271,10 @@ async def get_trending_coins() -> pd.DataFrame: df["Symbol"] = "[" + df["Symbol"] + "](" + df["Website"] + ")" # Replace NaN values in '24h Volume' with values from 'Mkt Cap' - df["24h Volume"] = df["24h Volume"].fillna(df["Mkt Cap"]) + df["24h Volume"] = df["24h Volume"].fillna(df["Market Cap"]) # Fix volume if it contains a % - df.loc[df["24h Volume"].str.contains("%"), "24h Volume"] = df["Mkt Cap"] + df.loc[df["24h Volume"].str.contains("%"), "24h Volume"] = df["Market Cap"] # Rename 24h to % Change and 24h Volume to Volume df.rename(columns={"24h": "% Change", "24h Volume": "Volume"}, inplace=True) @@ -291,7 +298,7 @@ async def get_trending_coins() -> pd.DataFrame: async def get_top_categories() -> pd.DataFrame | None: - html = scraper.get("https://www.coingecko.com/en/categories").text + html = session.get("https://www.coingecko.com/en/categories").text soup = BeautifulSoup(html, "html.parser")