ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
collect_experiments.py
(1578B)
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 __author__ = 'Stefan Jansen'
4
5 from pathlib import Path
6 import numpy as np
7 import pandas as pd
8
9 experiment_path = Path('experiments')
10
11 # dtm params
12 min_dfs = [50, 100, 250, 500]
13 max_dfs = [.1, .25, .5, 1.0]
14 binarys = [True, False]
15
16 perplexity = pd.DataFrame()
17 coherence = pd.DataFrame()
18 for min_df in min_dfs:
19 for max_df in max_dfs:
20 for binary in binarys:
21 vocab_path = experiment_path / str(min_df) / str(max_df) / str(int(binary))
22 try:
23 # perplexity = pd.concat([perplexity,
24 # pd.read_csv(vocab_path / 'perplexity.csv')])
25 df = pd.read_csv(vocab_path / 'coherence.csv',
26 header=[0, 1]).stack()
27 df.index.names = ['topic', 'passes']
28 df = (pd.melt(df.reset_index(),
29 id_vars=['topic', 'passes'],
30 var_name=['num_topics'],
31 value_name='coherence')
32 .dropna()
33 .assign(min_df=min_df,
34 max_df=max_df,
35 binary=binary))
36 coherence = pd.concat([coherence,
37 df])
38 except FileNotFoundError:
39 print('Missing:', min_df, max_df, binary)
40
41 # print(perplexity.info())
42 print(coherence.info())
43 with pd.HDFStore('results.h5') as store:
44 # store.put('perplexity', perplexity)
45 store.put('coherence', coherence)