-
Notifications
You must be signed in to change notification settings - Fork 0
/
build.py
244 lines (202 loc) · 7.08 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
"""
Main utility methods for portfolio automation.
"""
import pandas_datareader as pdr
import yfinance as yf
import numpy as np
import sys
from urllib.request import urlopen
from lxml import etree
from tqdm import tqdm
import pandas as pd
import os
class Portfolio:
def __init__(self, ticker_file, start, end):
"""
:param ticker_file: .txt file with a list of tickers.
:type ticker_file: str
:param start, end: user-defined dates.
:type start, end: str
"""
self.ticker_file = ticker_file
self.tickers = self.tickers()
self.n = len(self.tickers)
self.start = start
self.end = end
self.w = self.init_weights() # Starting portfolio weights in an array
self.weights = self.weight_dict() # Starting portfolio weights in a dictionary
self.sectors = self.sectors()
self.prices = self.prices()
self.riskfree = self.riskfree()
def tickers(self):
"""
Create list for tickers on init.
:return: ticker list.
:rtype: list
"""
# Read in the ticker list
file_open = open(self.ticker_file).read()
tickers = list(filter(None, list(map(str, file_open.split("\n")))))
return tickers
def init_weights(self):
"""
Initial starting weights for the portfolio.
:return: N array of ticker weights.
:rtype: np.array
"""
return np.ones((self.n), dtype='float64') / self.n
def weight_dict(self, weights=None):
"""
Ticker weight dictionary.
:return: Ticker: weight dictionary
:rtype: dictionary
"""
if weights is None:
w = list(self.w)
if weights is not None:
w = list(weights)
self.weights = dict(zip(self.tickers, w))
return self.weights
def sectors(self):
"""
Get the sector id for each ticker.
:return: ticker dictionary.
:rtype: dictionary
"""
tickers = self.tickers
urls = [
f"https://ca.finance.yahoo.com/quote/{ticker}/profile?p={ticker}"
for ticker in tickers]
sector_id = []
for url in tqdm(urls, desc='parsing', total=len(urls)):
r = urlopen(url)
htmlparser = etree.HTMLParser()
tree = etree.parse(r, htmlparser)
sector = tree.xpath(
"normalize-space(//span[@class='Fw(600)']/text()[last()])")
sector_id.append(sector)
ticker_dict = dict(zip(tickers, sector_id))
return ticker_dict
def prices(self):
"""
Get ticker prices from a list in a text file.
:return: (daily) adjusted closing prices
:rtype: pd.DataFrame
"""
tickers = self.tickers
start = self.start
end = self.end
for ticker in tqdm(tickers, desc='downloading', total=len(tickers)):
prices = yf.download(tickers, start=start, end=end)['Adj Close']
nan_sum = prices[ticker].isnull().sum(axis=0)
if nan_sum > 1:
sys.exit(f"""{nan_sum} days of data for {ticker} are missing.\n
Try to reconsider the time frame.""")
else:
continue
return prices
def daily_returns(self):
"""
Create return matrix from price dataframe.
:return: (daily) returns of stocks.
:rtype: pd.DataFrame
"""
prices = self.prices
daily_returns = prices.pct_change().dropna(how="all")
return daily_returns
def log_returns(self):
"""
Create log return matrix from price dataframe.
:return: (daily) log returns of stocks.
:rtype: pd.DataFrame
"""
prices = self.prices
log_returns = np.log(prices).diff().dropna(how="all")
return log_returns
def annualized_returns(self, freq=252):
"""
Generate an annualized return matrix
:param freq: number of trading days in a year.
:type freq: int
:return: annualized mean daily returns.
:rtype: pd.Series
"""
r = self.daily_returns()
return r.mean() * freq
def riskfree(self):
"""
Calculate the average risk-free rate over the portfolio's time horizon.
:return: The average 1-year risk free rate over the portfolio's time horizon.
:rtype: np.float64
"""
r = self.daily_returns()
start = r.index.min()
end = r.index.max()
rf = np.divide((pdr.get_data_fred('DGS1', start=start,
end=end).dropna(how="all")).mean(), 100)
return rf.iloc[0]
def excess_returns(self):
"""
Calculate the annualized return less the risk free rate.
:return: annualized excess returns
:rtype: pd.Series
"""
r = self.annualized_returns()
rf = self.riskfree
# construct the risk free array and excess returns.
rshape = np.shape(r)
rf = np.tile(rf, rshape)
excess = np.subtract(r, rf)
return excess
def ema_returns(self, freq=252, span=500):
"""
The exponentially weighted moving average of the daily stock returns.
A less noisy means for a price indicator.
Using the ema returns gives more weight to recent data (parameterized
by the span.)
:param freq: number of trading days in a year.
:type freq: int
:param span: specify the decay for the span of the time period. (i.e.
span=20 implies a 20-day ema) A higher value will imply a longer
holding period and less rebalancing. A smaller value will imply a
shorter time horizon for holdings.
:type span: int
:return: annualized mean ema returns.
:rtype: pd.Series
"""
r = self.daily_returns()
ema_returns = r.ewm(span=span).mean().iloc[-1] * freq
return ema_returns
def excess_ema_returns(self):
"""
Calculate the annualized ema return less the risk free rate.
:return: annualized excess returns
:rtype: pd.Series
"""
r = self.ema_returns()
rf = self.riskfree
# construct the risk free array and excess returns.
rshape = np.shape(r)
rf = np.tile(rf, rshape)
excess = np.subtract(r, rf)
return excess
@staticmethod
def save_to_csv(array, name):
"""
Save an array to csv format.
:param array: A price or return dataframe.
:type array: pd.DataFrame, np.array
:param name: Desired filename.
:type name: str
:return: csv file in the current working directory.
:rtype: .csv
"""
cwd = os.getcwd()
filename = f"{cwd}/{name}.csv"
if isinstance(array, np.ndarray or np.array):
new_file = np.savetxt(filename, array, delimiter=",")
return new_file
if isinstance(array, pd.DataFrame):
pandas = pd.DataFrame(array)
new_file = pandas.to_csv(filename)
return new_file