ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

eval_experiments.py

(1730B)


      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 __author__ = 'Stefan Jansen'
      4 
      5 from pathlib import Path
      6 import numpy as np
      7 import pandas as pd
      8 
      9 
     10 def timing_results():
     11     path = Path('timings')
     12     df = pd.concat([pd.read_csv(f) for f in path.glob('*.csv')])
     13     print(df.info())
     14     print(df.sort_values('workers'))
     15 
     16 
     17 timing_results()
     18 exit()
     19 
     20 
     21 def experiment_results():
     22     experiment_path = Path('experiments')
     23 
     24     # dtm params
     25     min_dfs = [50, 100, 250, 500]
     26     max_dfs = [.1, .25, .5, 1.0]
     27     binarys = [True, False]
     28 
     29     perplexity = pd.DataFrame()
     30     coherence = pd.DataFrame()
     31     for min_df in min_dfs:
     32         for max_df in max_dfs:
     33             for binary in binarys:
     34                 vocab_path = experiment_path / str(min_df) / str(max_df) / str(int(binary))
     35                 try:
     36                     # perplexity = pd.concat([perplexity,
     37                     #                         pd.read_csv(vocab_path / 'perplexity.csv')])
     38                     df = (pd.melt(pd.read_csv(vocab_path / 'coherence.csv',
     39                                               header=[0, 1]),
     40                                   var_name=['num_topics', 'passes'],
     41                                   value_name='coherence')
     42                           .dropna()
     43                           .assign(min_df=min_df,
     44                                   max_df=max_df,
     45                                   binary=binary))
     46 
     47                     coherence = pd.concat([coherence,
     48                                            df])
     49                 except FileNotFoundError:
     50                     print('Missing:', min_df, max_df, binary)
     51 
     52     with pd.HDFStore('results.h5') as store:
     53         store.put('perplexity', perplexity)
     54         store.put('coherence', coherence)