ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

gbm_params.py

(4519B)


      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 __author__ = 'Stefan Jansen'
      4 
      5 """
      6 This file lists the hyperparameters 
      7 and their default settings for the three GBM libraries
      8 xgboost, lightgbm and catboost
      9 """
     10 
     11 
     12 def get_params(model='xgboost'):
     13     if model == 'xgboost':
     14         params = dict(
     15                 booster='gbtree',
     16                 objective='binary:logistic',
     17                 eval_metric=['logloss', 'auc'],
     18                 tree_method='hist',
     19                 silent=0,
     20                 seed=42,
     21                 learning_rate=0.3,
     22                 gamma=0,
     23                 max_depth=6,
     24                 min_child_weight=1,
     25                 max_delta_step=0,
     26                 subsample=1,
     27                 colsample_bytree=1,
     28                 colsample_bylevel=1,
     29                 alpha=0
     30         )
     31         params['lambda'] = 1  # reserved keyword
     32     elif model == 'lightgbm':
     33         params = dict(boosting='gbdt',
     34                       objective='binary',
     35                       task='train',
     36                       max_bin=63,
     37                       metric='auc',
     38                       learning_rate=0.1,
     39                       n_estimators=250,
     40                       early_stopping=25,
     41                       max_depth=8,
     42                       num_leaves=31,
     43                       colsample_bytree=1.0,
     44                       bagging_fraction=1.0,
     45                       bagging_freq=0,
     46                       gamma=0.0,
     47                       min_gain_to_split=0,
     48                       min_child_weight=0.001,
     49                       min_data_in_leaf=100,
     50                       reg_alpha=0.0,
     51                       reg_lambda=0.0,
     52                       is_unbalance=False,
     53                       # device='gpu',
     54                       n_jobs=-1,
     55                       verbose=-1,
     56                       random_state=42)
     57     elif model == 'catboost':
     58         params = dict(iterations=500,
     59                       learning_rate=0.03,
     60                       max_depth=6,
     61                       reg_lambda=3,
     62                       model_size_reg=None,
     63                       colsample_bylevel=1,
     64                       loss_function='Logloss',
     65                       max_bin=128,
     66                       feature_border_type='MinEntropy',
     67                       od_pval=None,
     68                       od_wait=20,
     69                       od_type='Iter',
     70                       nan_mode='Min',
     71                       counter_calc_method=None,
     72                       leaf_estimation_iterations=1,
     73                       leaf_estimation_method='Gradient',
     74                       thread_count=None,
     75                       random_seed=None,
     76                       use_best_model=None,
     77                       best_model_min_trees=None,
     78                       verbose=100,
     79                       logging_level='Verbose',
     80                       metric_period=20,
     81                       simple_ctr=None,
     82                       ctr_leaf_count_limit=None,
     83                       store_all_simple_ctr=None,
     84                       max_ctr_complexity=1,
     85                       has_time=None,
     86                       allow_const_label=None,
     87                       classes_count=None,
     88                       class_weights=None,
     89                       one_hot_max_size=None,
     90                       random_strength=None,
     91                       name=None,
     92                       ignored_features=None,
     93                       train_dir=None,
     94                       custom_loss=None,
     95                       custom_metric=None,
     96                       eval_metric='AUC',
     97                       bagging_temperature=None,
     98                       save_snapshot=None,
     99                       snapshot_file=None,
    100                       snapshot_interval=None,
    101                       fold_len_multiplier=None,
    102                       used_ram_limit=None,
    103                       gpu_ram_part=.95,
    104                       pinned_memory_size=None,
    105                       allow_writing_files=None,
    106                       final_ctr_computation_mode=None,
    107                       approx_on_full_history=None,
    108                       boosting_type='Ordered',
    109                       combinations_ctr=None,
    110                       per_feature_ctr=None,
    111                       ctr_description=None,
    112                       task_type='GPU',
    113                       bootstrap_type='Bayesian',
    114                       # subsample=.66,
    115                       dev_score_calc_obj_block_size=None,
    116                       gpu_cat_features_storage=None,
    117                       data_partition=None,
    118                       metadata=None)
    119     return params