ml-finance-python

python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
traditional_value_algorithm.py

(10587B)
      1 """
      2 Long/Short Cross-Sectional Momentum
      3 
      4 Author: Gilbert Wassermann
      5 
      6 This algorithm creates traditional value factors and standardizes
      7 them using a synthetic S&P500. It then uses a 130/30 strategy to trade.
      8 
      9     https://www.math.nyu.edu/faculty/avellane/Lo13030.pdf
     10     
     11 Please direct any questions, feedback, or corrections to help@quantopian.com
     12 
     13 The material on this website is provided for informational purposes only
     14 and does not constitute an offer to sell, a solicitation to buy, or a 
     15 recommendation or endorsement for any security or strategy, 
     16 nor does it constitute an offer to provide investment advisory or other services by Quantopian.
     17 
     18 In addition, the content of the website neither constitutes investment advice 
     19 nor offers any opinion with respect to the suitability of any security or any specific investment. 
     20 Quantopian makes no guarantees as to accuracy or completeness of the 
     21 views expressed in the website. The views are subject to change, 
     22 and may have become unreliable for various reasons, 
     23 including changes in market conditions or economic circumstances.
     24 """
     25 
     26 import numpy as np
     27 import pandas as pd
     28 from quantopian.pipeline import Pipeline
     29 from quantopian.pipeline.data import morningstar
     30 from quantopian.pipeline.factors import CustomFactor
     31 from quantopian.algorithm import attach_pipeline, pipeline_output
     32 from quantopian.pipeline.data.builtin import USEquityPricing
     33 from quantopian.pipeline.factors import SimpleMovingAverage, AverageDollarVolume
     34 from quantopian.pipeline.filters.morningstar import IsPrimaryShare
     35 from quantopian.pipeline.data import morningstar as mstar
     36 
     37 # Custom Factor 1 : Dividend Yield
     38 class Div_Yield(CustomFactor):
     39 
     40     inputs = [morningstar.valuation_ratios.dividend_yield]
     41     window_length = 1
     42 
     43     def compute(self, today, assets, out, d_y):
     44         out[:] = d_y[-1]
     45 
     46         
     47 # Custom Factor 2 : P/B Ratio
     48 class Price_to_Book(CustomFactor):
     49 
     50     inputs = [morningstar.valuation_ratios.pb_ratio]
     51     window_length = 1
     52 
     53     def compute(self, today, assets, out, p_b_r):
     54         out[:] = -p_b_r[-1]
     55 
     56         
     57 # Custom Factor 3 : Price to Trailing 12 Month Sales       
     58 class Price_to_TTM_Sales(CustomFactor):
     59     inputs = [morningstar.valuation_ratios.ps_ratio]
     60     window_length = 1
     61     
     62     def compute(self, today, assets, out, ps):
     63         out[:] = -ps[-1]
     64 
     65         
     66 # Custom Factor 4 : Price to Trailing 12 Month Cashflow
     67 class Price_to_TTM_Cashflows(CustomFactor):
     68     inputs = [morningstar.valuation_ratios.pcf_ratio]
     69     window_length = 1
     70     
     71     def compute(self, today, assets, out, pcf):
     72         out[:] = -pcf[-1] 
     73  
     74 
     75 # This factor creates the synthetic S&P500
     76 class SPY_proxy(CustomFactor):
     77     inputs = [morningstar.valuation.market_cap]
     78     window_length = 1
     79     
     80     def compute(self, today, assets, out, mc):
     81         out[:] = mc[-1]
     82         
     83         
     84 # This pulls all necessary data in one step
     85 def Data_Pull():
     86     
     87     # create the pipeline for the data pull
     88     Data_Pipe = Pipeline()
     89     
     90     # create SPY proxy
     91     Data_Pipe.add(SPY_proxy(), 'SPY Proxy')
     92 
     93     # Div Yield
     94     Data_Pipe.add(Div_Yield(), 'Dividend Yield') 
     95     
     96     # Price to Book
     97     Data_Pipe.add(Price_to_Book(), 'Price to Book')
     98     
     99     # Price / TTM Sales
    100     Data_Pipe.add(Price_to_TTM_Sales(), 'Price / TTM Sales')
    101     
    102     # Price / TTM Cashflows
    103     Data_Pipe.add(Price_to_TTM_Cashflows(), 'Price / TTM Cashflow')
    104         
    105     return Data_Pipe
    106 
    107 
    108 # function to filter out unwanted values in the scores
    109 def filter_fn(x):
    110     if x <= -10:
    111         x = -10.0
    112     elif x >= 10:
    113         x = 10.0
    114     return x   
    115 
    116 
    117 def standard_frame_compute(df):
    118     """
    119     Standardizes the Pipeline API data pull
    120     using the S&P500's means and standard deviations for
    121     particular CustomFactors.
    122 
    123     parameters
    124     ----------
    125     df: numpy.array
    126         full result of Data_Pull
    127 
    128     returns
    129     -------
    130     numpy.array
    131         standardized Data_Pull results
    132         
    133     numpy.array
    134         index of equities
    135     """
    136     
    137     # basic clean of dataset to remove infinite values
    138     df = df.replace([np.inf, -np.inf], np.nan)
    139     df = df.dropna()
    140     
    141     # need standardization params from synthetic S&P500
    142     df_SPY = df.sort(columns='SPY Proxy', ascending=False)
    143 
    144     # create separate dataframe for SPY
    145     # to store standardization values
    146     df_SPY = df_SPY.head(500)
    147     
    148     # get dataframes into numpy array
    149     df_SPY = df_SPY.as_matrix()
    150     
    151     # store index values
    152     index = df.index.values
    153     
    154     # turn iinto a numpy array for speed
    155     df = df.as_matrix()
    156     
    157     # create an empty vector on which to add standardized values
    158     df_standard = np.empty(df.shape[0])
    159     
    160     for col_SPY, col_full in zip(df_SPY.T, df.T):
    161         
    162         # summary stats for S&P500
    163         mu = np.mean(col_SPY)
    164         sigma = np.std(col_SPY)
    165         col_standard = np.array(((col_full - mu) / sigma)) 
    166 
    167         # create vectorized function (lambda equivalent)
    168         fltr = np.vectorize(filter_fn)
    169         col_standard = (fltr(col_standard))
    170         
    171         # make range between -10 and 10
    172         col_standard = (col_standard / df.shape[1])
    173         
    174         # attach calculated values as new row in df_standard
    175         df_standard = np.vstack((df_standard, col_standard))
    176      
    177     # get rid of first entry (empty scores)
    178     df_standard = np.delete(df_standard,0,0)
    179     
    180     return (df_standard, index)
    181 
    182 
    183 def composite_score(df, index):
    184     """
    185     Summarize standardized data in a single number.
    186 
    187     parameters
    188     ----------
    189     df: numpy.array
    190         standardized results
    191         
    192     index: numpy.array
    193         index of equities
    194         
    195     returns
    196     -------
    197     pandas.Series
    198         series of summarized, ranked results
    199 
    200     """
    201 
    202     # sum up transformed data
    203     df_composite = df.sum(axis=0)
    204     
    205     # put into a pandas dataframe and connect numbers
    206     # to equities via reindexing
    207     df_composite = pd.Series(data=df_composite,index=index)
    208     
    209     # sort descending
    210     df_composite.sort(ascending=False)
    211 
    212     return df_composite
    213 
    214 
    215 def initialize(context):   
    216     
    217     # get data from pipeline
    218     data_pull = Data_Pull()
    219     attach_pipeline(data_pull,'Data')
    220     
    221     # filter out bad stocks for universe
    222     mask = filter_universe()
    223     data_pull.set_screen(mask)
    224     
    225     # set leverage ratios for longs and shorts
    226     context.long_leverage = 1.3
    227     context.short_leverage = -0.3
    228     
    229     # at the start of each moth, run the rebalancing function
    230     schedule_function(rebalance, date_rules.month_start(), time_rules.market_open(minutes=30))
    231     
    232     # clean untradeable securities daily
    233     schedule_function(daily_clean,
    234                       date_rule=date_rules.every_day(),
    235                       time_rule=time_rules.market_close(minutes=30))    
    236     
    237     # record variables
    238     schedule_function(record_vars,
    239                       date_rule=date_rules.every_day(),
    240                       time_rule=time_rules.market_close())
    241     pass
    242 
    243 
    244 # called before every day of trading
    245 def before_trading_start(context, data):
    246     
    247     # apply the logic to the data pull in order to get a ranked list of equities
    248     context.output = pipeline_output('Data')
    249     context.output, index = standard_frame_compute(context.output)
    250     context.output = composite_score(context.output, index)
    251     
    252     # create lists of stocks on which to go long and short
    253     context.long_set = set(context.output.head(26).index)
    254     context.short_set =  set(context.output.tail(6).index)
    255     
    256 # log long and short equities and their corresponding composite scores
    257 def handle_data(context, data):
    258     """
    259     print "LONG LIST"
    260     log.info(context.long_set)  
    261     
    262     print "SHORT LIST"
    263     log.info(context.short_set)
    264     """
    265     pass
    266 
    267 
    268 # called at the start of every month in order to rebalance the longs and shorts lists
    269 def rebalance(context, data):
    270     
    271     # calculate how much of each stock to buy or hold
    272     long_pct = context.long_leverage / len(context.long_set)
    273     short_pct = context.short_leverage / len(context.short_set)
    274    
    275     # universe now contains just longs and shorts
    276     context.security_set = set(context.long_set.union(context.short_set))
    277 
    278     for stock in context.security_set:
    279         if data.can_trade(stock):
    280             if stock in context.long_set:
    281                 order_target_percent(stock, long_pct)
    282             elif stock in context.short_set:
    283                 order_target_percent(stock, short_pct)
    284 
    285     # close out stale positions    
    286     daily_clean(context, data)
    287 
    288 # make sure all untradeable securities are sold off each day
    289 def daily_clean(context, data):
    290     
    291     for stock in context.portfolio.positions:
    292         if stock not in context.security_set and data.can_trade(stock):
    293             order_target_percent(stock, 0)
    294     
    295 def record_vars(context, data):
    296 
    297     # number of long and short positions. Even in minute mode, only the end-of-day
    298     # leverage is plotted.
    299 
    300     shorts = longs = 0
    301     for position in context.portfolio.positions.itervalues():
    302         if position.amount < 0:
    303             shorts += 1
    304         elif position.amount > 0:
    305             longs += 1
    306     record(leverage=context.account.leverage, short_count=shorts, long_count=longs,
    307           exposure=context.account.net_leverage)
    308     
    309 def filter_universe():  
    310     """
    311     9 filters:
    312         1. common stock
    313         2 & 3. not limited partnership - name and database check
    314         4. database has fundamental data
    315         5. not over the counter
    316         6. not when issued
    317         7. not depository receipts
    318         8. primary share
    319         9. high dollar volume
    320     Check Scott's notebook for more details.
    321     """
    322     common_stock = mstar.share_class_reference.security_type.latest.eq('ST00000001')
    323     not_lp_name = ~mstar.company_reference.standard_name.latest.matches('.* L[\\. ]?P\.?$')
    324     not_lp_balance_sheet = mstar.balance_sheet.limited_partnership.latest.isnull()
    325     have_data = mstar.valuation.market_cap.latest.notnull()
    326     not_otc = ~mstar.share_class_reference.exchange_id.latest.startswith('OTC')
    327     not_wi = ~mstar.share_class_reference.symbol.latest.endswith('.WI')
    328     not_depository = ~mstar.share_class_reference.is_depositary_receipt.latest
    329     primary_share = IsPrimaryShare()
    330     
    331     # Combine the above filters.
    332     tradable_filter = (common_stock & not_lp_name & not_lp_balance_sheet &
    333                        have_data & not_otc & not_wi & not_depository & primary_share)
    334     
    335     high_volume_tradable = (AverageDollarVolume(window_length=21,
    336                                                 mask=tradable_filter).percentile_between(70, 100))
    337 
    338     screen = high_volume_tradable
    339     
    340     return screen