ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
kelly_kalman_pairs.py
(11530B)
1 """
2 Pairs Trading with Kalman Filters
3
4 Author: David Edwards
5
6 This algorithm extends the Kalman Filtering pairs trading algorithm from a
7 previous lecture to support multiple pairs. In order to extend the idea,
8 the previous algorithm was factored into a class so several instances can be
9 created with different assets.
10
11 We chose to apply the kelly criterion to this algo because we felt it would
12 be a good fit after looking at the beta and max drawdown values. One challenge
13 with applying Kelly in this case was the initial burn-in period over the course
14 of collecting returns data.
15
16 In this example we are updating leverage based on the kelly score using
17 historical returns. If the kelly score is less than 1, it uses a leverage
18 of 1. Since the kelly score can be negative we use max(1.0, kelly) to get the
19 portfolio's leverage.
20 """
21
22
23 import numpy as np
24 import pandas as pd
25 from pykalman import KalmanFilter
26 import statsmodels.api as sm
27
28
29 def initialize(context):
30 # Quantopian backtester specific variables
31 set_slippage(slippage.FixedSlippage(spread=0))
32 set_commission(commission.PerShare(cost=0.01, min_trade_cost=1.0))
33 set_symbol_lookup_date('2014-01-01')
34
35 context.pairs = [
36 KalmanPairTrade(symbol('STX'), symbol('WDC'),
37 initial_bars=300, freq='1m', delta=1e-3, maxlen=300),
38 KalmanPairTrade(symbol('CBI'), symbol('JEC'),
39 initial_bars=300, freq='1m', delta=1e-3, maxlen=300),
40 KalmanPairTrade(symbol('MAS'), symbol('VMC'),
41 initial_bars=300, freq='1m', delta=1e-3, maxlen=300),
42 KalmanPairTrade(symbol('JPM'), symbol('C'),
43 initial_bars=300, freq='1m', delta=1e-3, maxlen=300),
44 KalmanPairTrade(symbol('AON'), symbol('MMC'),
45 initial_bars=300, freq='1m', delta=1e-3, maxlen=300),
46 KalmanPairTrade(symbol('COP'), symbol('CVX'),
47 initial_bars=300, freq='1m', delta=1e-3, maxlen=300),
48
49 ]
50
51 weight = 1.8 / len(context.pairs)
52 for pair in context.pairs:
53 pair.leverage = weight
54
55 context.kelly = KellyLeverage()
56 schedule_function(context.kelly.update,
57 time_rule=time_rules.market_close())
58 schedule_function(update_leverage,
59 time_rule=time_rules.market_open())
60
61 for minute in range(10, 390, 120):
62 for pair in context.pairs:
63 schedule_function(pair.trading_logic,
64 time_rule=time_rules.market_open(minutes=minute))
65
66 class KellyLeverage(object):
67
68 def __init__(self, minlen=30, maxlen=500):
69 self.equity = []
70 self.minlen = minlen
71 self.maxlen = maxlen
72
73 def update(self, context, data):
74 self.equity.append(context.portfolio.portfolio_value)
75 if len(self.equity) > self.maxlen:
76 self.equity = self.equity[-self.maxlen::]
77
78 def kelly_score(self):
79 if len(self.equity) < self.minlen:
80 return np.nan
81 returns = np.diff(np.log(self.equity))[1:]
82 return np.mean(returns) / np.var(returns)
83
84
85 def handle_data(context, data):
86 record(market_exposure=context.account.net_leverage,
87 leverage=context.account.leverage,
88 kelly=context.kelly.kelly_score())
89
90
91 def update_leverage(context, data):
92 kelly = context.kelly.kelly_score()
93 if np.isnan(kelly):
94 return
95
96 weight = max(1.0, kelly) / len(context.pairs)
97 for pair in context.pairs:
98 pair.leverage = weight
99
100 class KalmanPairTrade(object):
101
102 def __init__(self, y, x, leverage=1.0, initial_bars=10,
103 freq='1d', delta=1e-3, maxlen=3000):
104 self._y = y
105 self._x = x
106 self.maxlen = maxlen
107 self.initial_bars = initial_bars
108 self.freq = freq
109 self.delta = delta
110 self.leverage = leverage
111 self.Y = KalmanMovingAverage(self._y, maxlen=self.maxlen)
112 self.X = KalmanMovingAverage(self._x, maxlen=self.maxlen)
113 self.kf = None
114 self.entry_dt = pd.Timestamp('1900-01-01', tz='utc')
115
116 @property
117 def name(self):
118 return "{}~{}".format(self._y.symbol, self._x.symbol)
119
120 def trading_logic(self, context, data):
121 try:
122 if self.kf is None:
123 self.initialize_filters(context, data)
124 return
125 self.update()
126 if get_open_orders(sid=self._x) or get_open_orders(sid=self._y):
127 return
128 spreads = self.mean_spread()
129
130 zscore = (spreads[-1] - spreads.mean()) / spreads.std()
131
132 reference_pos = context.portfolio.positions[self._y].amount
133
134 now = get_datetime()
135 if reference_pos:
136 if (now - self.entry_dt).days > 20:
137 order_target(self._y, 0.0)
138 order_target(self._x, 0.0)
139 return
140 # Do a PNL check to make sure a reversion at least covered trading costs
141 # I do this because parameter drift often causes trades to be exited
142 # before the original spread has become profitable.
143 pnl = self.get_pnl(context, data)
144 if zscore > -0.0 and reference_pos > 0 and pnl > 0:
145 order_target(self._y, 0.0)
146 order_target(self._x, 0.0)
147
148 elif zscore < 0.0 and reference_pos < 0 and pnl > 0:
149 order_target(self._y, 0.0)
150 order_target(self._x, 0.0)
151
152 else:
153 if zscore > 1.5:
154 order_target_percent(self._y, -self.leverage / 2.)
155 order_target_percent(self._x, self.leverage / 2.)
156 self.entry_dt = now
157 if zscore < -1.5:
158 order_target_percent(self._y, self.leverage / 2.)
159 order_target_percent(self._x, -self.leverage / 2.)
160 self.entry_dt = now
161 except Exception as e:
162 log.debug("[{}] {}".format(self.name, str(e)))
163
164 def update(self):
165 prices = np.log(history(bar_count=1, frequency='1m', field='price'))
166 self.X.update(prices)
167 self.Y.update(prices)
168 self.kf.update(self.means_frame().iloc[-1])
169
170 def mean_spread(self):
171 means = self.means_frame()
172 beta, alpha = self.kf.state_mean
173 return means[self._y] - (beta * means[self._x] + alpha)
174
175
176 def means_frame(self):
177 mu_Y = self.Y.state_means
178 mu_X = self.X.state_means
179 return pd.DataFrame([mu_Y, mu_X]).T
180
181
182 def initialize_filters(self, context, data):
183 prices = np.log(history(self.initial_bars, self.freq, 'price'))
184 self.X.update(prices)
185 self.Y.update(prices)
186
187 # Drops the initial 0 mean value from the kalman filter
188 self.X.state_means = self.X.state_means.iloc[-self.initial_bars:]
189 self.Y.state_means = self.Y.state_means.iloc[-self.initial_bars:]
190 self.kf = KalmanRegression(self.Y.state_means, self.X.state_means,
191 delta=self.delta, maxlen=self.maxlen)
192
193 def get_pnl(self, context, data):
194 x = self._x
195 y = self._y
196 prices = history(1, '1d', 'price').iloc[-1]
197 positions = context.portfolio.positions
198 dx = prices[x] - positions[x].cost_basis
199 dy = prices[y] - positions[y].cost_basis
200 return (positions[x].amount * dx +
201 positions[y].amount * dy)
202
203
204
205
206
207
208 class KalmanMovingAverage(object):
209 """
210 Estimates the moving average of a price process
211 via Kalman Filtering.
212
213 See http://pykalman.github.io/ for docs on the
214 filtering process.
215 """
216
217 def __init__(self, asset, observation_covariance=1.0, initial_value=0,
218 initial_state_covariance=1.0, transition_covariance=0.05,
219 initial_window=20, maxlen=3000, freq='1d'):
220
221 self.asset = asset
222 self.freq = freq
223 self.initial_window = initial_window
224 self.maxlen = maxlen
225 self.kf = KalmanFilter(transition_matrices=[1],
226 observation_matrices=[1],
227 initial_state_mean=initial_value,
228 initial_state_covariance=initial_state_covariance,
229 observation_covariance=observation_covariance,
230 transition_covariance=transition_covariance)
231 self.state_means = pd.Series([self.kf.initial_state_mean], name=self.asset)
232 self.state_vars = pd.Series([self.kf.initial_state_covariance], name=self.asset)
233
234
235 def update(self, observations):
236 for dt, observation in observations[self.asset].iterkv():
237 self._update(dt, observation)
238
239 def _update(self, dt, observation):
240 mu, cov = self.kf.filter_update(self.state_means.iloc[-1],
241 self.state_vars.iloc[-1],
242 observation)
243 self.state_means[dt] = mu.flatten()[0]
244 self.state_vars[dt] = cov.flatten()[0]
245 if self.state_means.shape[0] > self.maxlen:
246 self.state_means = self.state_means.iloc[-self.maxlen:]
247 if self.state_vars.shape[0] > self.maxlen:
248 self.state_vars = self.state_vars.iloc[-self.maxlen:]
249
250
251 class KalmanRegression(object):
252 """
253 Uses a Kalman Filter to estimate regression parameters
254 in an online fashion.
255
256 Estimated model: y ~ beta * x + alpha
257 """
258
259 def __init__(self, initial_y, initial_x, delta=1e-5, maxlen=3000):
260 self._x = initial_x.name
261 self._y = initial_y.name
262 self.maxlen = maxlen
263 trans_cov = delta / (1 - delta) * np.eye(2)
264 obs_mat = np.expand_dims(
265 np.vstack([[initial_x], [np.ones(initial_x.shape[0])]]).T, axis=1)
266
267 self.kf = KalmanFilter(n_dim_obs=1, n_dim_state=2,
268 initial_state_mean=np.zeros(2),
269 initial_state_covariance=np.ones((2, 2)),
270 transition_matrices=np.eye(2),
271 observation_matrices=obs_mat,
272 observation_covariance=1.0,
273 transition_covariance=trans_cov)
274 state_means, state_covs = self.kf.filter(initial_y.values)
275 self.means = pd.DataFrame(state_means,
276 index=initial_y.index,
277 columns=['beta', 'alpha'])
278 self.state_cov = state_covs[-1]
279
280 def update(self, observations):
281 x = observations[self._x]
282 y = observations[self._y]
283 mu, self.state_cov = self.kf.filter_update(
284 self.state_mean, self.state_cov, y,
285 observation_matrix=np.array([[x, 1.0]]))
286 mu = pd.Series(mu, index=['beta', 'alpha'],
287 name=observations.name)
288 self.means = self.means.append(mu)
289 if self.means.shape[0] > self.maxlen:
290 self.means = self.means.iloc[-self.maxlen:]
291
292 def get_spread(self, observations):
293 x = observations[self._x]
294 y = observations[self._y]
295 return y - (self.means.beta[-1] * x + self.means.alpha[-1])
296
297 @property
298 def state_mean(self):
299 return self.means.iloc[-1]
300
301