ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
eval_experiments.py
(1730B)
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 __author__ = 'Stefan Jansen'
4
5 from pathlib import Path
6 import numpy as np
7 import pandas as pd
8
9
10 def timing_results():
11 path = Path('timings')
12 df = pd.concat([pd.read_csv(f) for f in path.glob('*.csv')])
13 print(df.info())
14 print(df.sort_values('workers'))
15
16
17 timing_results()
18 exit()
19
20
21 def experiment_results():
22 experiment_path = Path('experiments')
23
24 # dtm params
25 min_dfs = [50, 100, 250, 500]
26 max_dfs = [.1, .25, .5, 1.0]
27 binarys = [True, False]
28
29 perplexity = pd.DataFrame()
30 coherence = pd.DataFrame()
31 for min_df in min_dfs:
32 for max_df in max_dfs:
33 for binary in binarys:
34 vocab_path = experiment_path / str(min_df) / str(max_df) / str(int(binary))
35 try:
36 # perplexity = pd.concat([perplexity,
37 # pd.read_csv(vocab_path / 'perplexity.csv')])
38 df = (pd.melt(pd.read_csv(vocab_path / 'coherence.csv',
39 header=[0, 1]),
40 var_name=['num_topics', 'passes'],
41 value_name='coherence')
42 .dropna()
43 .assign(min_df=min_df,
44 max_df=max_df,
45 binary=binary))
46
47 coherence = pd.concat([coherence,
48 df])
49 except FileNotFoundError:
50 print('Missing:', min_df, max_df, binary)
51
52 with pd.HDFStore('results.h5') as store:
53 store.put('perplexity', perplexity)
54 store.put('coherence', coherence)