ml-finance-python
python scripts for finance machine learning
git clone https://9o.is/git/ml-finance-python.git
create_message_spec.py
(1629B)
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 __author__ = 'Stefan Jansen'
4
5 import pandas as pd
6
7 df = pd.read_excel('message_types.xlsx', sheet_name='messages', encoding='latin1').sort_values('id').drop('id', axis=1)
8
9 # basic cleaning
10 df.columns = [c.lower().strip() for c in df.columns]
11 df.value = df.value.str.strip()
12 df.name = df.name.str.strip().str.lower().str.replace(' ', '_').str.replace('-', '_').str.replace('/', '_')
13 df.notes = df.notes.str.strip()
14 df['message_type'] = df.loc[df.name == 'message_type', 'value']
15
16 messages = df.loc[:, ['message_type', 'notes']].dropna().rename(columns={'notes': 'name'})
17 messages.name = messages.name.str.lower().str.replace('message', '')
18 messages.name = messages.name.str.replace('.', '').str.strip().str.replace(' ', '_')
19 messages.to_csv('message_labels.csv', index=False)
20
21 df.message_type = df.message_type.ffill()
22 df = df[df.name != 'message_type']
23 df.value = df.value.str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')
24
25
26 def check_field_count(df):
27 """Helper that validates file format"""
28 message_size = pd.read_excel('message_types.xlsx', sheet_name='size', index_col=0)
29 message_size['check'] = df.groupby('message_type').size()
30 assert message_size['size'].equals(message_size.check), 'field count does not match template'
31
32
33 def check_field_specs():
34 messages = df.groupby('message_type')
35 for t, message in messages:
36 print(message.offset.add(message.length).shift().fillna(0).astype(int).equals(message.offset))
37
38
39 df[['message_type', 'name', 'value', 'length', 'offset', 'notes']].to_csv('message_types.csv', index=False)