Merge pull request #70 from ValueRaider/unblock-pandas-update

Fix Pandas warnings and unblock Pandas update
ValueRaider · Jul 27, 2024 · 26af771 · 26af771
2 parents 48d5bc3 + 7e45458
commit 26af771
Show file tree

Hide file tree

Showing 6 changed files with 87 additions and 9 deletions.
diff --git a/setup.cfg.template b/setup.cfg.template
@@ -21,8 +21,8 @@ packages = find:
 python_requires = >=3.9
 install_requires = 
     yfinance >= 0.2.36
-    numpy >= 1.26, <2.0  # Pandas<2.1 requires Numpy<2, see Github issue #59052
-    pandas >=1.5, <2.1  # Pandas 2.1 has datetime bug, see Github issue #55487
+    numpy >= 1.26
+    pandas >= 1.5
     exchange_calendars >= 4.5.5
     scipy >= 1.6.3
     click

diff --git a/yfinance_cache/yfc_financials_manager.py b/yfinance_cache/yfc_financials_manager.py
@@ -691,7 +691,7 @@ def _calc_release_dates(self, period, refresh=True, check=False):
                 # Drop dates that occurred just before another
                 edf = edf.sort_index(ascending=True)
                 d = edf.index.to_series().diff()
-                d[0] = pd.Timedelta(999, unit='d')
+                d.iloc[0] = pd.Timedelta(999, unit='d')
                 x_near = np.abs(d) < pd.Timedelta(5, "days")
                 if x_near.any():
                     edf = edf[~x_near]

diff --git a/yfinance_cache/yfc_prices_manager.py b/yfinance_cache/yfc_prices_manager.py
@@ -211,6 +211,8 @@ def UpdateSplits(self, splits_df):
             # Prepare 'splits_df' for append
             splits_df["Superseded split"] = 0.0
             splits_df["Superseded split FetchDate"] = pd.NaT
+            if splits_df['Superseded split FetchDate'].dt.tz is None and self.splits is not None:
+                splits_df['Superseded split FetchDate'] = splits_df['Superseded split FetchDate'].dt.tz_localize(self.splits['FetchDate'].dt.tz)
             for dt in splits_df.index:
                 new_split = splits_df.loc[dt, "Stock Splits"]
                 if self.splits is not None and dt in self.splits.index:
@@ -240,6 +242,10 @@ def UpdateSplits(self, splits_df):
                 if self.splits is None:
                     self.splits = splits_df[cols].copy()
                 else:
+                    f_na = self.splits['Superseded split FetchDate'].isna()
+                    if f_na.all():
+                        # Drop column. It breaks concat, and anyway 'divs_df' will restore it.
+                        self.splits = self.splits.drop('Superseded split FetchDate', axis=1)
                     self.splits = pd.concat([self.splits, splits_df[cols]], sort=True).sort_index()
                 yfcm.StoreCacheDatum(self.ticker, "splits", self.splits)
             elif self_splits_modified:
@@ -270,6 +276,8 @@ def UpdateDividends(self, divs_df):
             divs_df["Superseded div"] = 0.0
             divs_df["Superseded back adj."] = 0.0
             divs_df["Superseded div FetchDate"] = pd.NaT
+            if divs_df['Superseded div FetchDate'].dt.tz is None and self.divs is not None:
+                divs_df['Superseded div FetchDate'] = divs_df['Superseded div FetchDate'].dt.tz_localize(self.divs['FetchDate'].dt.tz)
             divs_df_dts = divs_df.index.copy()
             for dt in divs_df_dts:
                 new_div = divs_df.loc[dt, "Dividends"]
@@ -340,6 +348,10 @@ def UpdateDividends(self, divs_df):
                 if self.divs is None:
                     self.divs = divs_df[cols].copy()
                 else:
+                    f_na = self.divs['Superseded div FetchDate'].isna()
+                    if f_na.all():
+                        # Drop column. It breaks concat, and anyway 'divs_df' will restore it.
+                        self.divs = self.divs.drop('Superseded div FetchDate', axis=1)
                     self.divs = pd.concat([self.divs, divs_df[cols]], sort=True).sort_index()
                 yfcm.StoreCacheDatum(self.ticker, "dividends", self.divs)
             elif self_divs_modified:
@@ -417,7 +429,7 @@ def _getCachedPrices(self):
 
             f_na = np.isnan(h["CDF"].to_numpy())
             if f_na.any():
-                h["CDF"] = h["CDF"].fillna(method="bfill").fillna(method="ffill")
+                h["CDF"] = h["CDF"].bfill().ffill()
                 f_na = h["CDF"].isna()
                 if f_na.any():
                     raise Exception("CDF NaN repair failed")
@@ -3786,7 +3798,7 @@ def _reverseYahooAdjust(self, df):
             else:
                 cdf = np.full(df.shape[0], np.nan)
                 cdf[f_nna] = df.loc[f_nna, "Adj Close"] / df.loc[f_nna, "Close"]
-                cdf = pd.Series(cdf).fillna(method="bfill").fillna(method="ffill").to_numpy()
+                cdf = pd.Series(cdf).bfill().ffill().to_numpy()
 
         # In rare cases, Yahoo is not calculating 'Adj Close' correctly
         if self.interday:

diff --git a/yfinance_cache/yfc_ticker.py b/yfinance_cache/yfc_ticker.py
@@ -725,9 +725,9 @@ def _fetch_shares(self, start, end):
         df = pd.DataFrame(df, columns=['Shares'])
 
         if start_d < df.index[0].date():
-            df.loc[start_dt] = np.nan
+            df.loc[start_dt, 'Shares'] = np.nan
         if (end_d-td_1d) > df.index[-1].date():
-            df.loc[end_dt] = np.nan
+            df.loc[end_dt, 'Shares'] = np.nan
         df = df.sort_index()
 
         df['FetchDate'] = fetch_dt

diff --git a/yfinance_cache/yfc_time.py b/yfinance_cache/yfc_time.py
@@ -201,6 +201,8 @@ def _customModSchedule(cal):
             cal, md = yfcm.ReadCacheDatum(cache_key, "cal", True)
             if xcal.__version__ != md["version"]:
                 cal = None
+            elif 'np version' not in md or md['np version'] != np.__version__:
+                cal = None
 
         # Calculate missing data
         pre_range = None ; post_range = None
@@ -246,7 +248,7 @@ def _customModSchedule(cal):
         # Write to cache
         calCache[cal_name] = cal
         if pre_range is not None or post_range is not None:
-            yfcm.StoreCacheDatum(cache_key, "cal", cal, metadata={"version": xcal.__version__})
+            yfcm.StoreCacheDatum(cache_key, "cal", cal, metadata={"version": xcal.__version__, 'np version': np.__version__})
 
     return cal
 
@@ -645,7 +647,7 @@ def GetExchangeScheduleIntervals(exchange, interval, start, end, discardTimes=No
             # Implemented by flooring then applying offset calculated from floored market open.
             intervals_grp = intervals_df.groupby(intervals_df["interval_open"].dt.date)
             # 1 - calculate offset
-            res = istr.replace('h', 'H') if istr.endswith('h') else istr.replace('m', 'T')
+            res = 'h' if istr.endswith('h') else istr.replace('m', 'T')
             market_opens = intervals_grp.min()["interval_open"]
             if len(market_opens.dt.time.unique()) == 1:
                 open0 = market_opens.iloc[0]

diff --git a/yfinance_cache/yfc_upgrade.py b/yfinance_cache/yfc_upgrade.py
@@ -1,4 +1,6 @@
 import os
+import pickle as pkl
+import pandas as pd
 
 from . import yfc_cache_manager as yfcm
 
@@ -51,3 +53,65 @@ def _reset_cached_cals():
     with open(state_fp, 'w'):
         pass
 
+
+def _fix_dt_types_in_divs_splits():
+    d = yfcm.GetCacheDirpath()
+    yfc_dp = os.path.join(d, "_YFC_")
+    state_fp = os.path.join(yfc_dp, "have-fixed-types-in-divs-splits")
+    if os.path.isfile(state_fp):
+        return
+    if not os.path.isdir(d):
+        if not os.path.isdir(yfc_dp):
+            os.makedirs(yfc_dp)
+        with open(state_fp, 'w'):
+            pass
+        return
+
+    dp = yfcm.GetCacheDirpath()
+    for d in os.listdir(dp):
+        if d.startswith("exchange-"):
+            pass
+        else:
+            # dividends
+            divs_fp = os.path.join(dp, d, 'dividends.pkl')
+            if os.path.isfile(divs_fp):
+                with open(divs_fp, 'rb') as F:
+                    data = pkl.load(F)
+                df = data['data']
+                # print(df)
+                c = 'Superseded div FetchDate'
+                if c in df.columns:
+                    # Ensure NaN values are pd.NaT not np.nan
+                    f_na = df[c].isna()    
+                    if f_na.any():
+                        if not pd.api.types.is_datetime64_any_dtype(df[c]):
+                            df.loc[f_na, c] = pd.NaT
+                            df[c] = pd.to_datetime(df[c])
+                            with open(divs_fp, 'wb') as F:
+                                data['data'] = df
+                                pkl.dump(data, F, 4)
+
+            # splits
+            splits_fp = os.path.join(dp, d, 'splits.pkl')
+            if os.path.isfile(splits_fp):
+                with open(splits_fp, 'rb') as F:
+                    data = pkl.load(F)
+                df = data['data']
+                # print(df)
+                c = 'Superseded split FetchDate'
+                if c in df.columns:
+                    # Ensure NaN values are pd.NaT not np.nan
+                    f_na = df[c].isna()    
+                    if f_na.any():
+                        if not pd.api.types.is_datetime64_any_dtype(df[c]):
+                            df.loc[f_na, c] = pd.NaT
+                            df[c] = pd.to_datetime(df[c])
+                            with open(splits_fp, 'wb') as F:
+                                data['data'] = df
+                                pkl.dump(data, F, 4)
+
+    if not os.path.isdir(yfc_dp):
+        os.makedirs(yfc_dp)
+    with open(state_fp, 'w'):
+        pass
+