ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
traditional_value_algorithm.py
(10587B)
1 """
2 Long/Short Cross-Sectional Momentum
3
4 Author: Gilbert Wassermann
5
6 This algorithm creates traditional value factors and standardizes
7 them using a synthetic S&P500. It then uses a 130/30 strategy to trade.
8
9 https://www.math.nyu.edu/faculty/avellane/Lo13030.pdf
10
11 Please direct any questions, feedback, or corrections to help@quantopian.com
12
13 The material on this website is provided for informational purposes only
14 and does not constitute an offer to sell, a solicitation to buy, or a
15 recommendation or endorsement for any security or strategy,
16 nor does it constitute an offer to provide investment advisory or other services by Quantopian.
17
18 In addition, the content of the website neither constitutes investment advice
19 nor offers any opinion with respect to the suitability of any security or any specific investment.
20 Quantopian makes no guarantees as to accuracy or completeness of the
21 views expressed in the website. The views are subject to change,
22 and may have become unreliable for various reasons,
23 including changes in market conditions or economic circumstances.
24 """
25
26 import numpy as np
27 import pandas as pd
28 from quantopian.pipeline import Pipeline
29 from quantopian.pipeline.data import morningstar
30 from quantopian.pipeline.factors import CustomFactor
31 from quantopian.algorithm import attach_pipeline, pipeline_output
32 from quantopian.pipeline.data.builtin import USEquityPricing
33 from quantopian.pipeline.factors import SimpleMovingAverage, AverageDollarVolume
34 from quantopian.pipeline.filters.morningstar import IsPrimaryShare
35 from quantopian.pipeline.data import morningstar as mstar
36
37 # Custom Factor 1 : Dividend Yield
38 class Div_Yield(CustomFactor):
39
40 inputs = [morningstar.valuation_ratios.dividend_yield]
41 window_length = 1
42
43 def compute(self, today, assets, out, d_y):
44 out[:] = d_y[-1]
45
46
47 # Custom Factor 2 : P/B Ratio
48 class Price_to_Book(CustomFactor):
49
50 inputs = [morningstar.valuation_ratios.pb_ratio]
51 window_length = 1
52
53 def compute(self, today, assets, out, p_b_r):
54 out[:] = -p_b_r[-1]
55
56
57 # Custom Factor 3 : Price to Trailing 12 Month Sales
58 class Price_to_TTM_Sales(CustomFactor):
59 inputs = [morningstar.valuation_ratios.ps_ratio]
60 window_length = 1
61
62 def compute(self, today, assets, out, ps):
63 out[:] = -ps[-1]
64
65
66 # Custom Factor 4 : Price to Trailing 12 Month Cashflow
67 class Price_to_TTM_Cashflows(CustomFactor):
68 inputs = [morningstar.valuation_ratios.pcf_ratio]
69 window_length = 1
70
71 def compute(self, today, assets, out, pcf):
72 out[:] = -pcf[-1]
73
74
75 # This factor creates the synthetic S&P500
76 class SPY_proxy(CustomFactor):
77 inputs = [morningstar.valuation.market_cap]
78 window_length = 1
79
80 def compute(self, today, assets, out, mc):
81 out[:] = mc[-1]
82
83
84 # This pulls all necessary data in one step
85 def Data_Pull():
86
87 # create the pipeline for the data pull
88 Data_Pipe = Pipeline()
89
90 # create SPY proxy
91 Data_Pipe.add(SPY_proxy(), 'SPY Proxy')
92
93 # Div Yield
94 Data_Pipe.add(Div_Yield(), 'Dividend Yield')
95
96 # Price to Book
97 Data_Pipe.add(Price_to_Book(), 'Price to Book')
98
99 # Price / TTM Sales
100 Data_Pipe.add(Price_to_TTM_Sales(), 'Price / TTM Sales')
101
102 # Price / TTM Cashflows
103 Data_Pipe.add(Price_to_TTM_Cashflows(), 'Price / TTM Cashflow')
104
105 return Data_Pipe
106
107
108 # function to filter out unwanted values in the scores
109 def filter_fn(x):
110 if x <= -10:
111 x = -10.0
112 elif x >= 10:
113 x = 10.0
114 return x
115
116
117 def standard_frame_compute(df):
118 """
119 Standardizes the Pipeline API data pull
120 using the S&P500's means and standard deviations for
121 particular CustomFactors.
122
123 parameters
124 ----------
125 df: numpy.array
126 full result of Data_Pull
127
128 returns
129 -------
130 numpy.array
131 standardized Data_Pull results
132
133 numpy.array
134 index of equities
135 """
136
137 # basic clean of dataset to remove infinite values
138 df = df.replace([np.inf, -np.inf], np.nan)
139 df = df.dropna()
140
141 # need standardization params from synthetic S&P500
142 df_SPY = df.sort(columns='SPY Proxy', ascending=False)
143
144 # create separate dataframe for SPY
145 # to store standardization values
146 df_SPY = df_SPY.head(500)
147
148 # get dataframes into numpy array
149 df_SPY = df_SPY.as_matrix()
150
151 # store index values
152 index = df.index.values
153
154 # turn iinto a numpy array for speed
155 df = df.as_matrix()
156
157 # create an empty vector on which to add standardized values
158 df_standard = np.empty(df.shape[0])
159
160 for col_SPY, col_full in zip(df_SPY.T, df.T):
161
162 # summary stats for S&P500
163 mu = np.mean(col_SPY)
164 sigma = np.std(col_SPY)
165 col_standard = np.array(((col_full - mu) / sigma))
166
167 # create vectorized function (lambda equivalent)
168 fltr = np.vectorize(filter_fn)
169 col_standard = (fltr(col_standard))
170
171 # make range between -10 and 10
172 col_standard = (col_standard / df.shape[1])
173
174 # attach calculated values as new row in df_standard
175 df_standard = np.vstack((df_standard, col_standard))
176
177 # get rid of first entry (empty scores)
178 df_standard = np.delete(df_standard,0,0)
179
180 return (df_standard, index)
181
182
183 def composite_score(df, index):
184 """
185 Summarize standardized data in a single number.
186
187 parameters
188 ----------
189 df: numpy.array
190 standardized results
191
192 index: numpy.array
193 index of equities
194
195 returns
196 -------
197 pandas.Series
198 series of summarized, ranked results
199
200 """
201
202 # sum up transformed data
203 df_composite = df.sum(axis=0)
204
205 # put into a pandas dataframe and connect numbers
206 # to equities via reindexing
207 df_composite = pd.Series(data=df_composite,index=index)
208
209 # sort descending
210 df_composite.sort(ascending=False)
211
212 return df_composite
213
214
215 def initialize(context):
216
217 # get data from pipeline
218 data_pull = Data_Pull()
219 attach_pipeline(data_pull,'Data')
220
221 # filter out bad stocks for universe
222 mask = filter_universe()
223 data_pull.set_screen(mask)
224
225 # set leverage ratios for longs and shorts
226 context.long_leverage = 1.3
227 context.short_leverage = -0.3
228
229 # at the start of each moth, run the rebalancing function
230 schedule_function(rebalance, date_rules.month_start(), time_rules.market_open(minutes=30))
231
232 # clean untradeable securities daily
233 schedule_function(daily_clean,
234 date_rule=date_rules.every_day(),
235 time_rule=time_rules.market_close(minutes=30))
236
237 # record variables
238 schedule_function(record_vars,
239 date_rule=date_rules.every_day(),
240 time_rule=time_rules.market_close())
241 pass
242
243
244 # called before every day of trading
245 def before_trading_start(context, data):
246
247 # apply the logic to the data pull in order to get a ranked list of equities
248 context.output = pipeline_output('Data')
249 context.output, index = standard_frame_compute(context.output)
250 context.output = composite_score(context.output, index)
251
252 # create lists of stocks on which to go long and short
253 context.long_set = set(context.output.head(26).index)
254 context.short_set = set(context.output.tail(6).index)
255
256 # log long and short equities and their corresponding composite scores
257 def handle_data(context, data):
258 """
259 print "LONG LIST"
260 log.info(context.long_set)
261
262 print "SHORT LIST"
263 log.info(context.short_set)
264 """
265 pass
266
267
268 # called at the start of every month in order to rebalance the longs and shorts lists
269 def rebalance(context, data):
270
271 # calculate how much of each stock to buy or hold
272 long_pct = context.long_leverage / len(context.long_set)
273 short_pct = context.short_leverage / len(context.short_set)
274
275 # universe now contains just longs and shorts
276 context.security_set = set(context.long_set.union(context.short_set))
277
278 for stock in context.security_set:
279 if data.can_trade(stock):
280 if stock in context.long_set:
281 order_target_percent(stock, long_pct)
282 elif stock in context.short_set:
283 order_target_percent(stock, short_pct)
284
285 # close out stale positions
286 daily_clean(context, data)
287
288 # make sure all untradeable securities are sold off each day
289 def daily_clean(context, data):
290
291 for stock in context.portfolio.positions:
292 if stock not in context.security_set and data.can_trade(stock):
293 order_target_percent(stock, 0)
294
295 def record_vars(context, data):
296
297 # number of long and short positions. Even in minute mode, only the end-of-day
298 # leverage is plotted.
299
300 shorts = longs = 0
301 for position in context.portfolio.positions.itervalues():
302 if position.amount < 0:
303 shorts += 1
304 elif position.amount > 0:
305 longs += 1
306 record(leverage=context.account.leverage, short_count=shorts, long_count=longs,
307 exposure=context.account.net_leverage)
308
309 def filter_universe():
310 """
311 9 filters:
312 1. common stock
313 2 & 3. not limited partnership - name and database check
314 4. database has fundamental data
315 5. not over the counter
316 6. not when issued
317 7. not depository receipts
318 8. primary share
319 9. high dollar volume
320 Check Scott's notebook for more details.
321 """
322 common_stock = mstar.share_class_reference.security_type.latest.eq('ST00000001')
323 not_lp_name = ~mstar.company_reference.standard_name.latest.matches('.* L[\\. ]?P\.?$')
324 not_lp_balance_sheet = mstar.balance_sheet.limited_partnership.latest.isnull()
325 have_data = mstar.valuation.market_cap.latest.notnull()
326 not_otc = ~mstar.share_class_reference.exchange_id.latest.startswith('OTC')
327 not_wi = ~mstar.share_class_reference.symbol.latest.endswith('.WI')
328 not_depository = ~mstar.share_class_reference.is_depositary_receipt.latest
329 primary_share = IsPrimaryShare()
330
331 # Combine the above filters.
332 tradable_filter = (common_stock & not_lp_name & not_lp_balance_sheet &
333 have_data & not_otc & not_wi & not_depository & primary_share)
334
335 high_volume_tradable = (AverageDollarVolume(window_length=21,
336 mask=tradable_filter).percentile_between(70, 100))
337
338 screen = high_volume_tradable
339
340 return screen