ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

trading_env.py

(11641B)


      1 """
      2 The MIT License (MIT)
      3 
      4 Copyright (c) 2016 Tito Ingargiola
      5 Copyright (c) 2019 Stefan Jansen
      6 
      7 Permission is hereby granted, free of charge, to any person obtaining a copy
      8 of this software and associated documentation files (the "Software"), to deal
      9 in the Software without restriction, including without limitation the rights
     10 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     11 copies of the Software, and to permit persons to whom the Software is
     12 furnished to do so, subject to the following conditions:
     13 
     14 The above copyright notice and this permission notice shall be included in all
     15 copies or substantial portions of the Software.
     16 
     17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     18 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     19 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     20 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     21 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     22 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     23 SOFTWARE.
     24 """
     25 
     26 import logging
     27 import tempfile
     28 
     29 import gym
     30 import numpy as np
     31 import pandas as pd
     32 from gym import spaces
     33 from gym.utils import seeding
     34 from sklearn.preprocessing import scale
     35 
     36 logging.basicConfig()
     37 log = logging.getLogger(__name__)
     38 log.setLevel(logging.INFO)
     39 log.info('%s logger started.', __name__)
     40 
     41 
     42 class DataSource:
     43     """
     44     Data source for TradingEnvironment
     45 
     46     Loads & preprocesses daily price & volume data
     47     Provides data for each new episode.
     48     Stocks with longest history:
     49 
     50     ticker  # obs
     51     KO      14155
     52     GE      14155
     53     BA      14155
     54     CAT     14155
     55     DIS     14155
     56 
     57     """
     58 
     59     def __init__(self, trading_days=252, ticker='AAPL', normalize=True, min_perc_days=100):
     60         self.ticker = ticker
     61         self.trading_days = trading_days + 1
     62         self.normalize = normalize
     63         self.min_perc_days = min_perc_days
     64         self.data = self.load_data()
     65         self.preprocess_data()
     66         self.min_values = self.data.min()
     67         self.max_values = self.data.max()
     68         self.step = 0
     69         self.idx = None
     70 
     71     def load_data(self):
     72         log.info('loading data for {}...'.format(self.ticker))
     73         idx = pd.IndexSlice
     74         with pd.HDFStore('../data/assets.h5') as store:
     75             df = (store['quandl/wiki/prices']
     76                   .loc[idx[:, self.ticker],
     77                        ['adj_close', 'adj_volume']]
     78                   .dropna())
     79         df.columns = ['close', 'volume']
     80         log.info('got data for {}...'.format(self.ticker))
     81         return df
     82 
     83     @staticmethod
     84     def rsi(data, window=14):
     85         diff = data.diff().dropna()
     86 
     87         up, down = diff.copy(), diff.copy()
     88         up[up < 0] = 0
     89         down[down > 0] = 0
     90 
     91         rolling_up = up.rolling(window).mean()
     92         rolling_down = down.abs().rolling(window).mean()
     93 
     94         RS2 = rolling_up / rolling_down
     95         return 100 - (100 / (1 + RS2))
     96 
     97     def momentum(self, data, window=100):
     98         def pct_rank(x):
     99             return pd.Series(x).rank(pct=True).iloc[-1]
    100 
    101         return data.rolling(window).apply(pct_rank, raw=True)
    102 
    103     def preprocess_data(self):
    104         """calculate returns and percentiles, then removes missing values"""
    105 
    106         # make volume positive and pre-scale
    107         self.data.volume = np.log(self.data.volume.replace(0, 1))
    108 
    109         self.data['returns'] = self.data.close.pct_change()
    110         self.data['close_pct_100'] = self.momentum(self.data.close, window=100)
    111         self.data['volume_pct_100'] = self.momentum(self.data.volume, window=100)
    112         self.data['close_pct_20'] = self.momentum(self.data.close, window=20)
    113         self.data['volume_pct_20'] = self.momentum(self.data.volume, window=20)
    114         self.data['return_5'] = self.data.returns.pct_change(5)
    115         self.data['return_21'] = self.data.returns.pct_change(21)
    116         self.data['rsi'] = self.rsi(self.data.close)
    117         self.data = self.data.replace((np.inf, -np.inf), np.nan).dropna()
    118 
    119         r = self.data.returns.copy()
    120         if self.normalize:
    121             self.data = pd.DataFrame(scale(self.data),
    122                                      columns=self.data.columns,
    123                                      index=self.data.index)
    124         self.data['returns'] = r  # don't scale returns
    125         log.info(self.data.info())
    126 
    127     def reset(self):
    128         """Provides starting index for time series and resets step"""
    129         high = len(self.data.index) - self.trading_days
    130         self.idx = np.random.randint(low=0, high=high)
    131         self.step = 0
    132 
    133     def take_step(self):
    134         """Returns data for current trading day and done signal"""
    135         obs = self.data.iloc[self.idx].values
    136         self.idx += 1
    137         self.step += 1
    138         done = self.step >= self.trading_days
    139         return obs, done
    140 
    141 
    142 class TradingSimulator:
    143     """ Implements core trading simulator for single-instrument univ """
    144 
    145     def __init__(self, steps, trading_cost_bps, time_cost_bps):
    146         # invariant for object life
    147         self.trading_cost_bps = trading_cost_bps
    148         self.time_cost_bps = time_cost_bps
    149         self.steps = steps
    150 
    151         # change every step
    152         self.step = 0
    153         self.actions = np.zeros(self.steps)
    154         self.navs = np.ones(self.steps)
    155         self.market_navs = np.ones(self.steps)
    156         self.strategy_returns = np.ones(self.steps)
    157         self.positions = np.zeros(self.steps)
    158         self.costs = np.zeros(self.steps)
    159         self.trades = np.zeros(self.steps)
    160         self.market_returns = np.zeros(self.steps)
    161 
    162     def reset(self):
    163         self.step = 0
    164         self.actions.fill(0)
    165         self.navs.fill(1)
    166         self.market_navs.fill(1)
    167         self.strategy_returns.fill(0)
    168         self.positions.fill(0)
    169         self.costs.fill(0)
    170         self.trades.fill(0)
    171         self.market_returns.fill(0)
    172 
    173     def take_step(self, action, market_return):
    174         """ Calculates NAVs, trading costs and reward
    175         based on an action and latest market return
    176             etc and returns the reward and a summary of the day's activity. """
    177 
    178         bod_position = 0.0 if self.step == 0 else self.positions[self.step - 1]
    179         bod_nav = 1.0 if self.step == 0 else self.navs[self.step - 1]
    180         bod_market_nav = 1.0 if self.step == 0 else self.market_navs[self.step - 1]
    181 
    182         self.market_returns[self.step] = market_return
    183         self.actions[self.step] = action
    184 
    185         self.positions[self.step] = action - 1
    186         self.trades[self.step] = self.positions[self.step] - bod_position
    187 
    188         trade_costs_pct = abs(self.trades[self.step]) * self.trading_cost_bps
    189         self.costs[self.step] = trade_costs_pct + self.time_cost_bps
    190         reward = ((bod_position * market_return) - self.costs[self.step])
    191         self.strategy_returns[self.step] = reward
    192 
    193         if self.step != 0:
    194             self.navs[self.step] = bod_nav * (1 + self.strategy_returns[self.step - 1])
    195             self.market_navs[self.step] = bod_market_nav * (1 + self.market_returns[self.step - 1])
    196 
    197         info = {'reward': reward,
    198                 'nav'   : self.navs[self.step],
    199                 'costs' : self.costs[self.step]}
    200 
    201         self.step += 1
    202         return reward, info
    203 
    204     def result(self):
    205         """returns current state as pd.DataFrame """
    206         return pd.DataFrame({'action'         : self.actions,  # current action
    207                              'nav'            : self.navs,  # starting Net Asset Value (NAV)
    208                              'market_nav'     : self.market_navs,
    209                              'market_return'  : self.market_returns,
    210                              'strategy_return': self.strategy_returns,
    211                              'position'       : self.positions,  # eod position
    212                              'cost'           : self.costs,  # eod costs
    213                              'trade'          : self.trades})  # eod trade)
    214 
    215 
    216 class TradingEnvironment(gym.Env):
    217     """A simple trading environment for reinforcement learning.
    218 
    219     Provides daily observations for a stock price series
    220     An episode is defined as a sequence of 252 trading days with random start
    221     Each day is a 'step' that allows the agent from three actions:
    222 
    223     SHORT (0)
    224     FLAT (1)
    225     LONG (2)
    226 
    227     Trades cost 10bps of the change in position value.
    228     Going from short to long implies two trades.
    229     Not trading also a default time cost of 1bps per step.
    230 
    231     An episode begins with a starting Net Asset Value (NAV) of 1 unit of cash.
    232     If the NAV drops to 0, the episode is ends with a loss.
    233     If the NAV hits 2.0, the agent wins.
    234 
    235     The trading simulator tracks a buy-and-hold strategy as benchmark.
    236     """
    237     metadata = {'render.modes': ['human']}
    238 
    239     def __init__(self, trading_days=252, trading_cost_bps=1e-3, time_cost_bps=1e-4, ticker='AAPL'):
    240         self.trading_days = trading_days
    241         self.ticker = ticker
    242         self.trading_cost_bps = trading_cost_bps
    243         self.time_cost_bps = time_cost_bps
    244         self.src = DataSource(trading_days=self.trading_days, ticker=ticker)
    245         self.sim = TradingSimulator(steps=self.trading_days,
    246                                     trading_cost_bps=self.trading_cost_bps,
    247                                     time_cost_bps=self.time_cost_bps)
    248         self.action_space = spaces.Discrete(3)
    249         self.observation_space = spaces.Box(self.src.min_values,
    250                                             self.src.max_values)
    251         self.reset()
    252 
    253     def seed(self, seed=None):
    254         self.np_random, seed = seeding.np_random(seed)
    255         return [seed]
    256 
    257     def step(self, action):
    258         """Returns state observation, reward, done and info"""
    259         assert self.action_space.contains(action), '{} {} invalid'.format(action, type(action))
    260         observation, done = self.src.take_step()
    261         reward, info = self.sim.take_step(action=action,
    262                                           market_return=observation[2])
    263         return observation, reward, done, info
    264 
    265     def reset(self):
    266         """Resets DataSource and TradingSimulator; returns first observation"""
    267         self.src.reset()
    268         self.sim.reset()
    269         return self.src.take_step()[0]
    270 
    271     # TODO
    272     def render(self, mode='human'):
    273         """Not implemented"""
    274         pass
    275 
    276     def run_strategy(self, strategy, return_df=True):
    277         """Runs strategy, returns DataFrame with all steps"""
    278         observation = self.reset()
    279         done = False
    280         while not done:
    281             action = strategy(observation, self)  # call strategy
    282             observation, reward, done, info = self.step(action)
    283 
    284         return self.sim.result() if return_df else None
    285 
    286     def run_strategy_episodes(self, strategy, episodes=1, write_log=True, return_df=True):
    287         """ run provided strategy the specified # of times, possibly
    288             writing a log and possibly returning a dataframe summarizing activity.
    289 
    290             Note that writing the log is expensive and returning the df is more so.
    291             For training purposes, you might not want to set both.
    292         """
    293         logfile = None
    294         if write_log:
    295             logfile = tempfile.NamedTemporaryFile(delete=False, mode='w+')
    296             log.info('writing log to %s', logfile.name)
    297             need_df = write_log or return_df
    298 
    299         alldf = None
    300 
    301         for i in range(episodes):
    302             df = self.run_strategy(strategy, return_df=need_df)
    303             if write_log:
    304                 df.to_csv(logfile, mode='ab')
    305                 if return_df:
    306                     alldf = df if alldf is None else pd.concat([alldf, df], axis=0)
    307 
    308         return alldf