ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
trading_env.py
(11641B)
1 """
2 The MIT License (MIT)
3
4 Copyright (c) 2016 Tito Ingargiola
5 Copyright (c) 2019 Stefan Jansen
6
7 Permission is hereby granted, free of charge, to any person obtaining a copy
8 of this software and associated documentation files (the "Software"), to deal
9 in the Software without restriction, including without limitation the rights
10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 copies of the Software, and to permit persons to whom the Software is
12 furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice shall be included in all
15 copies or substantial portions of the Software.
16
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 SOFTWARE.
24 """
25
26 import logging
27 import tempfile
28
29 import gym
30 import numpy as np
31 import pandas as pd
32 from gym import spaces
33 from gym.utils import seeding
34 from sklearn.preprocessing import scale
35
36 logging.basicConfig()
37 log = logging.getLogger(__name__)
38 log.setLevel(logging.INFO)
39 log.info('%s logger started.', __name__)
40
41
42 class DataSource:
43 """
44 Data source for TradingEnvironment
45
46 Loads & preprocesses daily price & volume data
47 Provides data for each new episode.
48 Stocks with longest history:
49
50 ticker # obs
51 KO 14155
52 GE 14155
53 BA 14155
54 CAT 14155
55 DIS 14155
56
57 """
58
59 def __init__(self, trading_days=252, ticker='AAPL', normalize=True, min_perc_days=100):
60 self.ticker = ticker
61 self.trading_days = trading_days + 1
62 self.normalize = normalize
63 self.min_perc_days = min_perc_days
64 self.data = self.load_data()
65 self.preprocess_data()
66 self.min_values = self.data.min()
67 self.max_values = self.data.max()
68 self.step = 0
69 self.idx = None
70
71 def load_data(self):
72 log.info('loading data for {}...'.format(self.ticker))
73 idx = pd.IndexSlice
74 with pd.HDFStore('../data/assets.h5') as store:
75 df = (store['quandl/wiki/prices']
76 .loc[idx[:, self.ticker],
77 ['adj_close', 'adj_volume']]
78 .dropna())
79 df.columns = ['close', 'volume']
80 log.info('got data for {}...'.format(self.ticker))
81 return df
82
83 @staticmethod
84 def rsi(data, window=14):
85 diff = data.diff().dropna()
86
87 up, down = diff.copy(), diff.copy()
88 up[up < 0] = 0
89 down[down > 0] = 0
90
91 rolling_up = up.rolling(window).mean()
92 rolling_down = down.abs().rolling(window).mean()
93
94 RS2 = rolling_up / rolling_down
95 return 100 - (100 / (1 + RS2))
96
97 def momentum(self, data, window=100):
98 def pct_rank(x):
99 return pd.Series(x).rank(pct=True).iloc[-1]
100
101 return data.rolling(window).apply(pct_rank, raw=True)
102
103 def preprocess_data(self):
104 """calculate returns and percentiles, then removes missing values"""
105
106 # make volume positive and pre-scale
107 self.data.volume = np.log(self.data.volume.replace(0, 1))
108
109 self.data['returns'] = self.data.close.pct_change()
110 self.data['close_pct_100'] = self.momentum(self.data.close, window=100)
111 self.data['volume_pct_100'] = self.momentum(self.data.volume, window=100)
112 self.data['close_pct_20'] = self.momentum(self.data.close, window=20)
113 self.data['volume_pct_20'] = self.momentum(self.data.volume, window=20)
114 self.data['return_5'] = self.data.returns.pct_change(5)
115 self.data['return_21'] = self.data.returns.pct_change(21)
116 self.data['rsi'] = self.rsi(self.data.close)
117 self.data = self.data.replace((np.inf, -np.inf), np.nan).dropna()
118
119 r = self.data.returns.copy()
120 if self.normalize:
121 self.data = pd.DataFrame(scale(self.data),
122 columns=self.data.columns,
123 index=self.data.index)
124 self.data['returns'] = r # don't scale returns
125 log.info(self.data.info())
126
127 def reset(self):
128 """Provides starting index for time series and resets step"""
129 high = len(self.data.index) - self.trading_days
130 self.idx = np.random.randint(low=0, high=high)
131 self.step = 0
132
133 def take_step(self):
134 """Returns data for current trading day and done signal"""
135 obs = self.data.iloc[self.idx].values
136 self.idx += 1
137 self.step += 1
138 done = self.step >= self.trading_days
139 return obs, done
140
141
142 class TradingSimulator:
143 """ Implements core trading simulator for single-instrument univ """
144
145 def __init__(self, steps, trading_cost_bps, time_cost_bps):
146 # invariant for object life
147 self.trading_cost_bps = trading_cost_bps
148 self.time_cost_bps = time_cost_bps
149 self.steps = steps
150
151 # change every step
152 self.step = 0
153 self.actions = np.zeros(self.steps)
154 self.navs = np.ones(self.steps)
155 self.market_navs = np.ones(self.steps)
156 self.strategy_returns = np.ones(self.steps)
157 self.positions = np.zeros(self.steps)
158 self.costs = np.zeros(self.steps)
159 self.trades = np.zeros(self.steps)
160 self.market_returns = np.zeros(self.steps)
161
162 def reset(self):
163 self.step = 0
164 self.actions.fill(0)
165 self.navs.fill(1)
166 self.market_navs.fill(1)
167 self.strategy_returns.fill(0)
168 self.positions.fill(0)
169 self.costs.fill(0)
170 self.trades.fill(0)
171 self.market_returns.fill(0)
172
173 def take_step(self, action, market_return):
174 """ Calculates NAVs, trading costs and reward
175 based on an action and latest market return
176 etc and returns the reward and a summary of the day's activity. """
177
178 bod_position = 0.0 if self.step == 0 else self.positions[self.step - 1]
179 bod_nav = 1.0 if self.step == 0 else self.navs[self.step - 1]
180 bod_market_nav = 1.0 if self.step == 0 else self.market_navs[self.step - 1]
181
182 self.market_returns[self.step] = market_return
183 self.actions[self.step] = action
184
185 self.positions[self.step] = action - 1
186 self.trades[self.step] = self.positions[self.step] - bod_position
187
188 trade_costs_pct = abs(self.trades[self.step]) * self.trading_cost_bps
189 self.costs[self.step] = trade_costs_pct + self.time_cost_bps
190 reward = ((bod_position * market_return) - self.costs[self.step])
191 self.strategy_returns[self.step] = reward
192
193 if self.step != 0:
194 self.navs[self.step] = bod_nav * (1 + self.strategy_returns[self.step - 1])
195 self.market_navs[self.step] = bod_market_nav * (1 + self.market_returns[self.step - 1])
196
197 info = {'reward': reward,
198 'nav' : self.navs[self.step],
199 'costs' : self.costs[self.step]}
200
201 self.step += 1
202 return reward, info
203
204 def result(self):
205 """returns current state as pd.DataFrame """
206 return pd.DataFrame({'action' : self.actions, # current action
207 'nav' : self.navs, # starting Net Asset Value (NAV)
208 'market_nav' : self.market_navs,
209 'market_return' : self.market_returns,
210 'strategy_return': self.strategy_returns,
211 'position' : self.positions, # eod position
212 'cost' : self.costs, # eod costs
213 'trade' : self.trades}) # eod trade)
214
215
216 class TradingEnvironment(gym.Env):
217 """A simple trading environment for reinforcement learning.
218
219 Provides daily observations for a stock price series
220 An episode is defined as a sequence of 252 trading days with random start
221 Each day is a 'step' that allows the agent from three actions:
222
223 SHORT (0)
224 FLAT (1)
225 LONG (2)
226
227 Trades cost 10bps of the change in position value.
228 Going from short to long implies two trades.
229 Not trading also a default time cost of 1bps per step.
230
231 An episode begins with a starting Net Asset Value (NAV) of 1 unit of cash.
232 If the NAV drops to 0, the episode is ends with a loss.
233 If the NAV hits 2.0, the agent wins.
234
235 The trading simulator tracks a buy-and-hold strategy as benchmark.
236 """
237 metadata = {'render.modes': ['human']}
238
239 def __init__(self, trading_days=252, trading_cost_bps=1e-3, time_cost_bps=1e-4, ticker='AAPL'):
240 self.trading_days = trading_days
241 self.ticker = ticker
242 self.trading_cost_bps = trading_cost_bps
243 self.time_cost_bps = time_cost_bps
244 self.src = DataSource(trading_days=self.trading_days, ticker=ticker)
245 self.sim = TradingSimulator(steps=self.trading_days,
246 trading_cost_bps=self.trading_cost_bps,
247 time_cost_bps=self.time_cost_bps)
248 self.action_space = spaces.Discrete(3)
249 self.observation_space = spaces.Box(self.src.min_values,
250 self.src.max_values)
251 self.reset()
252
253 def seed(self, seed=None):
254 self.np_random, seed = seeding.np_random(seed)
255 return [seed]
256
257 def step(self, action):
258 """Returns state observation, reward, done and info"""
259 assert self.action_space.contains(action), '{} {} invalid'.format(action, type(action))
260 observation, done = self.src.take_step()
261 reward, info = self.sim.take_step(action=action,
262 market_return=observation[2])
263 return observation, reward, done, info
264
265 def reset(self):
266 """Resets DataSource and TradingSimulator; returns first observation"""
267 self.src.reset()
268 self.sim.reset()
269 return self.src.take_step()[0]
270
271 # TODO
272 def render(self, mode='human'):
273 """Not implemented"""
274 pass
275
276 def run_strategy(self, strategy, return_df=True):
277 """Runs strategy, returns DataFrame with all steps"""
278 observation = self.reset()
279 done = False
280 while not done:
281 action = strategy(observation, self) # call strategy
282 observation, reward, done, info = self.step(action)
283
284 return self.sim.result() if return_df else None
285
286 def run_strategy_episodes(self, strategy, episodes=1, write_log=True, return_df=True):
287 """ run provided strategy the specified # of times, possibly
288 writing a log and possibly returning a dataframe summarizing activity.
289
290 Note that writing the log is expensive and returning the df is more so.
291 For training purposes, you might not want to set both.
292 """
293 logfile = None
294 if write_log:
295 logfile = tempfile.NamedTemporaryFile(delete=False, mode='w+')
296 log.info('writing log to %s', logfile.name)
297 need_df = write_log or return_df
298
299 alldf = None
300
301 for i in range(episodes):
302 df = self.run_strategy(strategy, return_df=need_df)
303 if write_log:
304 df.to_csv(logfile, mode='ab')
305 if return_df:
306 alldf = df if alldf is None else pd.concat([alldf, df], axis=0)
307
308 return alldf