import os
os.chdir('../src')
import pandas as pd
import numpy as np
from sklearn import metrics, model_selection
from sklearn import linear_model
import os
import math
import lightgbm as lgb
import luigi
import extramlutils
from data import DownloadPoloniex
Download the data:
luigi.build([DownloadPoloniex()], local_scheduler=True)
# parameters for light gradient boosting
param = {'n_estimators': 10000,
'learning_rate': 0.005,
'num_leaves': 27,
'boosting': 'dart',
# 'max_depth': 8,
'early_stopping_round': 500}
df = pd.read_json('../data/raw/BTC_ETH.json')
df = df.drop('date', axis=1)
df['target'] = df.close < df.close.shift(-1)
def add_log_diff(df, key):
df[key + '_log_diff'] = np.log(df[key])
df[key + '_log_diff'] = df[key + '_log_diff'].diff()
add_log_diff(df, 'close')
df = df[['target', 'close_log_diff']]
df = df.dropna()
cv = model_selection.TimeSeriesSplit(n_splits=5)
model = linear_model.LogisticRegression(solver='lbfgs')
x = np.expand_dims(np.array(df.close_log_diff), axis=1)
result = model_selection.cross_validate(model, x, np.array(df.target), cv=cv, return_estimator=True, scoring=['roc_auc', 'accuracy'])
print(result)
print('\ntest roc auc:', np.mean(result['test_roc_auc']))
print('\ntest accuracy:', np.mean(result['test_accuracy']))
df = pd.read_json('../data/raw/BTC_ETH.json')
df = df.drop('date', axis=1)
df['target'] = df.close < df.close.shift(-1)
shifts = [1,2,3,4,5,6,7,8,9,10]
df = df[['target','close']]
add_log_diff(df, 'close')
for i in list(shifts):
df["close_log_diff_{}".format(i)] = df.close_log_diff.shift(i)
df = df.dropna()
df = df.reset_index()
df = df.drop(['close', 'index'], axis=1)
cv = model_selection.TimeSeriesSplit(n_splits=5)
model = linear_model.LogisticRegression(solver='lbfgs')
x = np.array(df.drop('target', axis=1))
result = model_selection.cross_validate(model, x, np.array(df.target), cv=cv, return_estimator=True, scoring=['roc_auc', 'accuracy'])
print(result)
print('\ntest roc auc:', np.mean(result['test_roc_auc']))
print('test accuracy:', np.mean(result['test_accuracy']))
model = lgb.LGBMClassifier(**param)
key_names = list(df.drop('target', axis=1).keys())
models = extramlutils.cross_validate_lgb(model, np.array(df.drop('target', axis=1)), np.array(df.target), cv, feature_name=key_names)
extramlutils.get_best_iterations(models)