ml-finance-python

python scripts for finance machine learning

git clone https://9o.is/git/ml-finance-python.git

collect_experiments.py

(1578B)


      1 #!/usr/bin/env python
      2 # -*- coding: utf-8 -*-
      3 __author__ = 'Stefan Jansen'
      4 
      5 from pathlib import Path
      6 import numpy as np
      7 import pandas as pd
      8 
      9 experiment_path = Path('experiments')
     10 
     11 # dtm params
     12 min_dfs = [50, 100, 250, 500]
     13 max_dfs = [.1, .25, .5, 1.0]
     14 binarys = [True, False]
     15 
     16 perplexity = pd.DataFrame()
     17 coherence = pd.DataFrame()
     18 for min_df in min_dfs:
     19     for max_df in max_dfs:
     20         for binary in binarys:
     21             vocab_path = experiment_path / str(min_df) / str(max_df) / str(int(binary))
     22             try:
     23                 # perplexity = pd.concat([perplexity,
     24                 #                         pd.read_csv(vocab_path / 'perplexity.csv')])
     25                 df = pd.read_csv(vocab_path / 'coherence.csv',
     26                                  header=[0, 1]).stack()
     27                 df.index.names = ['topic', 'passes']
     28                 df = (pd.melt(df.reset_index(),
     29                               id_vars=['topic', 'passes'],
     30                               var_name=['num_topics'],
     31                               value_name='coherence')
     32                       .dropna()
     33                       .assign(min_df=min_df,
     34                               max_df=max_df,
     35                               binary=binary))
     36                 coherence = pd.concat([coherence,
     37                                        df])
     38             except FileNotFoundError:
     39                 print('Missing:', min_df, max_df, binary)
     40 
     41 # print(perplexity.info())
     42 print(coherence.info())
     43 with pd.HDFStore('results.h5') as store:
     44     # store.put('perplexity', perplexity)
     45     store.put('coherence', coherence)