Skip to content

Commit

Permalink
Merge pull request #70 from ValueRaider/unblock-pandas-update
Browse files Browse the repository at this point in the history
Fix Pandas warnings and unblock Pandas update
  • Loading branch information
ValueRaider authored Jul 27, 2024
2 parents 48d5bc3 + 7e45458 commit 26af771
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 9 deletions.
4 changes: 2 additions & 2 deletions setup.cfg.template
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ packages = find:
python_requires = >=3.9
install_requires =
yfinance >= 0.2.36
numpy >= 1.26, <2.0 # Pandas<2.1 requires Numpy<2, see Github issue #59052
pandas >=1.5, <2.1 # Pandas 2.1 has datetime bug, see Github issue #55487
numpy >= 1.26
pandas >= 1.5
exchange_calendars >= 4.5.5
scipy >= 1.6.3
click
Expand Down
2 changes: 1 addition & 1 deletion yfinance_cache/yfc_financials_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def _calc_release_dates(self, period, refresh=True, check=False):
# Drop dates that occurred just before another
edf = edf.sort_index(ascending=True)
d = edf.index.to_series().diff()
d[0] = pd.Timedelta(999, unit='d')
d.iloc[0] = pd.Timedelta(999, unit='d')
x_near = np.abs(d) < pd.Timedelta(5, "days")
if x_near.any():
edf = edf[~x_near]
Expand Down
16 changes: 14 additions & 2 deletions yfinance_cache/yfc_prices_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ def UpdateSplits(self, splits_df):
# Prepare 'splits_df' for append
splits_df["Superseded split"] = 0.0
splits_df["Superseded split FetchDate"] = pd.NaT
if splits_df['Superseded split FetchDate'].dt.tz is None and self.splits is not None:
splits_df['Superseded split FetchDate'] = splits_df['Superseded split FetchDate'].dt.tz_localize(self.splits['FetchDate'].dt.tz)
for dt in splits_df.index:
new_split = splits_df.loc[dt, "Stock Splits"]
if self.splits is not None and dt in self.splits.index:
Expand Down Expand Up @@ -240,6 +242,10 @@ def UpdateSplits(self, splits_df):
if self.splits is None:
self.splits = splits_df[cols].copy()
else:
f_na = self.splits['Superseded split FetchDate'].isna()
if f_na.all():
# Drop column. It breaks concat, and anyway 'divs_df' will restore it.
self.splits = self.splits.drop('Superseded split FetchDate', axis=1)
self.splits = pd.concat([self.splits, splits_df[cols]], sort=True).sort_index()
yfcm.StoreCacheDatum(self.ticker, "splits", self.splits)
elif self_splits_modified:
Expand Down Expand Up @@ -270,6 +276,8 @@ def UpdateDividends(self, divs_df):
divs_df["Superseded div"] = 0.0
divs_df["Superseded back adj."] = 0.0
divs_df["Superseded div FetchDate"] = pd.NaT
if divs_df['Superseded div FetchDate'].dt.tz is None and self.divs is not None:
divs_df['Superseded div FetchDate'] = divs_df['Superseded div FetchDate'].dt.tz_localize(self.divs['FetchDate'].dt.tz)
divs_df_dts = divs_df.index.copy()
for dt in divs_df_dts:
new_div = divs_df.loc[dt, "Dividends"]
Expand Down Expand Up @@ -340,6 +348,10 @@ def UpdateDividends(self, divs_df):
if self.divs is None:
self.divs = divs_df[cols].copy()
else:
f_na = self.divs['Superseded div FetchDate'].isna()
if f_na.all():
# Drop column. It breaks concat, and anyway 'divs_df' will restore it.
self.divs = self.divs.drop('Superseded div FetchDate', axis=1)
self.divs = pd.concat([self.divs, divs_df[cols]], sort=True).sort_index()
yfcm.StoreCacheDatum(self.ticker, "dividends", self.divs)
elif self_divs_modified:
Expand Down Expand Up @@ -417,7 +429,7 @@ def _getCachedPrices(self):

f_na = np.isnan(h["CDF"].to_numpy())
if f_na.any():
h["CDF"] = h["CDF"].fillna(method="bfill").fillna(method="ffill")
h["CDF"] = h["CDF"].bfill().ffill()
f_na = h["CDF"].isna()
if f_na.any():
raise Exception("CDF NaN repair failed")
Expand Down Expand Up @@ -3786,7 +3798,7 @@ def _reverseYahooAdjust(self, df):
else:
cdf = np.full(df.shape[0], np.nan)
cdf[f_nna] = df.loc[f_nna, "Adj Close"] / df.loc[f_nna, "Close"]
cdf = pd.Series(cdf).fillna(method="bfill").fillna(method="ffill").to_numpy()
cdf = pd.Series(cdf).bfill().ffill().to_numpy()

# In rare cases, Yahoo is not calculating 'Adj Close' correctly
if self.interday:
Expand Down
4 changes: 2 additions & 2 deletions yfinance_cache/yfc_ticker.py
Original file line number Diff line number Diff line change
Expand Up @@ -725,9 +725,9 @@ def _fetch_shares(self, start, end):
df = pd.DataFrame(df, columns=['Shares'])

if start_d < df.index[0].date():
df.loc[start_dt] = np.nan
df.loc[start_dt, 'Shares'] = np.nan
if (end_d-td_1d) > df.index[-1].date():
df.loc[end_dt] = np.nan
df.loc[end_dt, 'Shares'] = np.nan
df = df.sort_index()

df['FetchDate'] = fetch_dt
Expand Down
6 changes: 4 additions & 2 deletions yfinance_cache/yfc_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,8 @@ def _customModSchedule(cal):
cal, md = yfcm.ReadCacheDatum(cache_key, "cal", True)
if xcal.__version__ != md["version"]:
cal = None
elif 'np version' not in md or md['np version'] != np.__version__:
cal = None

# Calculate missing data
pre_range = None ; post_range = None
Expand Down Expand Up @@ -246,7 +248,7 @@ def _customModSchedule(cal):
# Write to cache
calCache[cal_name] = cal
if pre_range is not None or post_range is not None:
yfcm.StoreCacheDatum(cache_key, "cal", cal, metadata={"version": xcal.__version__})
yfcm.StoreCacheDatum(cache_key, "cal", cal, metadata={"version": xcal.__version__, 'np version': np.__version__})

return cal

Expand Down Expand Up @@ -645,7 +647,7 @@ def GetExchangeScheduleIntervals(exchange, interval, start, end, discardTimes=No
# Implemented by flooring then applying offset calculated from floored market open.
intervals_grp = intervals_df.groupby(intervals_df["interval_open"].dt.date)
# 1 - calculate offset
res = istr.replace('h', 'H') if istr.endswith('h') else istr.replace('m', 'T')
res = 'h' if istr.endswith('h') else istr.replace('m', 'T')
market_opens = intervals_grp.min()["interval_open"]
if len(market_opens.dt.time.unique()) == 1:
open0 = market_opens.iloc[0]
Expand Down
64 changes: 64 additions & 0 deletions yfinance_cache/yfc_upgrade.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import pickle as pkl
import pandas as pd

from . import yfc_cache_manager as yfcm

Expand Down Expand Up @@ -51,3 +53,65 @@ def _reset_cached_cals():
with open(state_fp, 'w'):
pass


def _fix_dt_types_in_divs_splits():
d = yfcm.GetCacheDirpath()
yfc_dp = os.path.join(d, "_YFC_")
state_fp = os.path.join(yfc_dp, "have-fixed-types-in-divs-splits")
if os.path.isfile(state_fp):
return
if not os.path.isdir(d):
if not os.path.isdir(yfc_dp):
os.makedirs(yfc_dp)
with open(state_fp, 'w'):
pass
return

dp = yfcm.GetCacheDirpath()
for d in os.listdir(dp):
if d.startswith("exchange-"):
pass
else:
# dividends
divs_fp = os.path.join(dp, d, 'dividends.pkl')
if os.path.isfile(divs_fp):
with open(divs_fp, 'rb') as F:
data = pkl.load(F)
df = data['data']
# print(df)
c = 'Superseded div FetchDate'
if c in df.columns:
# Ensure NaN values are pd.NaT not np.nan
f_na = df[c].isna()
if f_na.any():
if not pd.api.types.is_datetime64_any_dtype(df[c]):
df.loc[f_na, c] = pd.NaT
df[c] = pd.to_datetime(df[c])
with open(divs_fp, 'wb') as F:
data['data'] = df
pkl.dump(data, F, 4)

# splits
splits_fp = os.path.join(dp, d, 'splits.pkl')
if os.path.isfile(splits_fp):
with open(splits_fp, 'rb') as F:
data = pkl.load(F)
df = data['data']
# print(df)
c = 'Superseded split FetchDate'
if c in df.columns:
# Ensure NaN values are pd.NaT not np.nan
f_na = df[c].isna()
if f_na.any():
if not pd.api.types.is_datetime64_any_dtype(df[c]):
df.loc[f_na, c] = pd.NaT
df[c] = pd.to_datetime(df[c])
with open(splits_fp, 'wb') as F:
data['data'] = df
pkl.dump(data, F, 4)

if not os.path.isdir(yfc_dp):
os.makedirs(yfc_dp)
with open(state_fp, 'w'):
pass

0 comments on commit 26af771

Please sign in to comment.