import os
os.chdir('../src')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors
import seaborn as sns
from sklearn import metrics
featureset = 'indicators1'
df = pd.read_hdf('../data/features/technical.h5', key=featureset)
df['preds'] = pd.read_hdf('../data/predictions/logistic_{}.h5'.format(featureset), key='predictions')
df.preds = df.preds
df.target = df.target > 0
df = df.dropna()
importances = pd.read_hdf('../data/predictions/logistic_{}.h5'.format(featureset), key='importances')
print('accuracy', metrics.accuracy_score(df.target, df.preds > 0.5))
print('AUC', metrics.roc_auc_score(df.target, df.preds))
importances.sort_values()
plt.plot(probs[10000:10100])
sns.kdeplot(probs[10000:], shade=True)
df['probs'] = probs
When trading, missed opportunities don't hurt as much as predictions that the market will rise, although it falls. In the long run, it might not matter in terms of expected value, but the less we trade, the less we are exposed to risk. This means that we care more about being right, when the model predicts an uptrend, than when it predicts a downtrend (Let's say we are not shorting). This measure of being right when you predict up is called the precision and the measure of predicting up when the market goes up is called the recall. We care more about the precision than about the recall, so we can say that we only predict an increase, when the model is confident about that increase with some probability.
One might consider using a weighted log loss that puts more emphasis on precision by setting pos_weight > 1.
targets * -log(sigmoid(logits)) * pos_weight +
(1 - targets) * -log(1 - sigmoid(logits))
Let's try some threashold values at which we look at the precision and recall and also at how often the model would actually give positive predictions.
for threashold in [0.4, 0.5, 0.6, 0.65, 0.7, 0.75, 0.8, 0.9]:
predictions = df.preds > threashold
print(predictions.sum() / len(predictions), 'is the postive rate for the threashold {}'.format(threashold))
print('precision', metrics.precision_score(df.target, predictions))
print('recall', metrics.recall_score(df.target, predictions))
print('\n')
For example 0.6 seems like a nice threashold, we still get 13.4% of up predictions and are right 57.7% of the time. (On second thought, maybe it is more profitable to just throw trades against a wall).
df.preds.rolling(5000).mean().plot()
df.target.rolling(5000).mean().plot()
The preceding plot(target in orange, probabilities in blue) shows that the classifier's mean of the predictions significantly varies through time, so it kind of has incorporated longer term trend dectection. It also seems interesting, that the mean of the probabilties is kind of a version of the mean of the targets lagging behind(very significantly).
Maybe this model would thus benefit of being ensambled with a longer term model, because it seems horrendous, that the model doesn't even model the expected value of the target. Let's look at a stupid ass classifier:
for tf in [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]:
probs = df.target.shift(1).rolling(tf).mean()[tf:]
print(metrics.accuracy_score(df.target[tf:], (probs > 0.5)))
It looks like it is indeed a stupid ass classifier, I will need a longer term model that is more complex, probably also a linear model because of the lack of data.
from sklearn import model_selection
def rolling_score(label, probs, time_interval=48):
n_intervals = int(len(df) / time_interval) # roughly 1000 for time_interval=48, we want one prediction to be one day
cv = model_selection.TimeSeriesSplit(n_splits=n_intervals)
splits = cv.split(probs)
performance_padded = np.zeros(len(probs))
performance_padded[:] = np.nan
performance = np.zeros(n_intervals)
performance[:] = np.nan
for i, (_, test_idx) in enumerate(splits):
if probs.iloc[test_idx].isnull().sum() == 0:
perf = metrics.roc_auc_score(label.iloc[test_idx], probs.iloc[test_idx])
performance_padded[test_idx] = perf
performance[i] = perf
return performance, performance_padded
performance, _ = rolling_score(df.target, probs, time_interval=48)
s = pd.Series(performance).dropna()
sns.kdeplot(s, label='daily auc distibution', shade=True)
plt.axvline(s.quantile(0.05), c='red')
performance, _ = rolling_score(df.target, probs, time_interval=7*48)
s = pd.Series(performance).dropna()
sns.kdeplot(s, label='weekly auc distibution', shade=True)
plt.axvline(s.quantile(0.05), c='red')
pd.Series(np.square(df.preds)).rolling(500).mean().plot()