flake8 passing, use pathlib in lieu of os.path to accommodate windows/mac OS
This commit is contained in:
parent
2600ba4e74
commit
99f7e44c30
@ -6,7 +6,7 @@
|
||||
"fiat_display_currency": "USD",
|
||||
"dry_run": true,
|
||||
"timeframe": "5m",
|
||||
"dry_run_wallet":1000,
|
||||
"dry_run_wallet": 1000,
|
||||
"cancel_open_orders_on_exit": true,
|
||||
"unfilledtimeout": {
|
||||
"entry": 10,
|
||||
@ -16,7 +16,9 @@
|
||||
"name": "ftx",
|
||||
"key": "",
|
||||
"secret": "",
|
||||
"ccxt_config": {"enableRateLimit": true},
|
||||
"ccxt_config": {
|
||||
"enableRateLimit": true
|
||||
},
|
||||
"ccxt_async_config": {
|
||||
"enableRateLimit": true,
|
||||
"rateLimit": 200
|
||||
@ -24,8 +26,7 @@
|
||||
"pair_whitelist": [
|
||||
"BTC/USDT"
|
||||
],
|
||||
"pair_blacklist": [
|
||||
]
|
||||
"pair_blacklist": []
|
||||
},
|
||||
"entry_pricing": {
|
||||
"price_side": "same",
|
||||
@ -43,16 +44,20 @@
|
||||
"order_book_top": 1
|
||||
},
|
||||
"pairlists": [
|
||||
{"method": "StaticPairList"}
|
||||
{
|
||||
"method": "StaticPairList"
|
||||
}
|
||||
],
|
||||
|
||||
"freqai": {
|
||||
"btc_pair" : "BTC/USDT",
|
||||
"timeframes" : ["5m","15m","1h"],
|
||||
"full_timerange" : "20210601-20220101",
|
||||
"train_period" : 30,
|
||||
"backtest_period" : 7,
|
||||
"identifier" : "example",
|
||||
"btc_pair": "BTC/USDT",
|
||||
"timeframes": [
|
||||
"5m",
|
||||
"15m"
|
||||
],
|
||||
"full_timerange": "20210601-20210901",
|
||||
"train_period": 30,
|
||||
"backtest_period": 7,
|
||||
"identifier": "example",
|
||||
"base_features": [
|
||||
"rsi",
|
||||
"close_over_20sma",
|
||||
@ -69,22 +74,21 @@
|
||||
"LINK/USDT",
|
||||
"DOT/USDT"
|
||||
],
|
||||
"training_timerange" : "20211220-20220117",
|
||||
|
||||
"feature_parameters" : {
|
||||
"training_timerange": "20211220-20220117",
|
||||
"feature_parameters": {
|
||||
"period": 12,
|
||||
"shift": 2,
|
||||
"shift": 1,
|
||||
"drop_features": false,
|
||||
"DI_threshold": 1,
|
||||
"weight_factor": 0,
|
||||
"principal_component_analysis": false,
|
||||
"remove_outliers": false
|
||||
},
|
||||
"data_split_parameters" : {
|
||||
"data_split_parameters": {
|
||||
"test_size": 0.25,
|
||||
"random_state": 1
|
||||
},
|
||||
"model_training_parameters" : {
|
||||
"model_training_parameters": {
|
||||
"n_estimators": 2000,
|
||||
"random_state": 1,
|
||||
"learning_rate": 0.02,
|
||||
|
@ -1,20 +1,23 @@
|
||||
import json
|
||||
import os
|
||||
import copy
|
||||
import datetime
|
||||
import json
|
||||
import pickle as pk
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from joblib import dump, load
|
||||
from pandas import DataFrame
|
||||
from joblib import dump
|
||||
from joblib import load
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics.pairwise import pairwise_distances
|
||||
import datetime
|
||||
from typing import Any, Dict, List, Tuple
|
||||
import pickle as pk
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
|
||||
|
||||
SECONDS_IN_DAY = 86400
|
||||
|
||||
|
||||
class DataHandler:
|
||||
"""
|
||||
Class designed to handle all the data for the IFreqaiModel class model.
|
||||
@ -22,21 +25,22 @@ class DataHandler:
|
||||
author: Robert Caulk, rob.caulk@gmail.com
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any], dataframe: DataFrame, data: List):
|
||||
def __init__(self, config: Dict[str, Any], dataframe: DataFrame):
|
||||
self.full_dataframe = dataframe
|
||||
(self.training_timeranges,
|
||||
self.backtesting_timeranges) = self.split_timerange(
|
||||
config['freqai']['full_timerange'],
|
||||
config['freqai']['train_period'],
|
||||
config['freqai']['backtest_period'])
|
||||
self.data = data
|
||||
self.data_dictionary = {}
|
||||
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
|
||||
config["freqai"]["full_timerange"],
|
||||
config["freqai"]["train_period"],
|
||||
config["freqai"]["backtest_period"],
|
||||
)
|
||||
self.data: Dict[Any, Any] = {}
|
||||
self.config = config
|
||||
self.freq_config = config['freqai']
|
||||
self.freq_config = config["freqai"]
|
||||
self.predictions = np.array([])
|
||||
self.do_predict = np.array([])
|
||||
self.target_mean = np.array([])
|
||||
self.target_std = np.array([])
|
||||
self.model_path = Path()
|
||||
self.model_filename = ""
|
||||
|
||||
def save_data(self, model: Any) -> None:
|
||||
"""
|
||||
@ -46,19 +50,28 @@ class DataHandler:
|
||||
predictions
|
||||
"""
|
||||
|
||||
if not os.path.exists(self.model_path): os.mkdir(self.model_path)
|
||||
save_path = self.model_path + self.model_filename
|
||||
if not self.model_path.is_dir():
|
||||
self.model_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
save_path = Path(self.model_path)
|
||||
|
||||
# if not os.path.exists(self.model_path):
|
||||
# os.mkdir(self.model_path)
|
||||
# save_path = self.model_path + self.model_filename
|
||||
|
||||
# Save the trained model
|
||||
dump(model, save_path+"_model.joblib")
|
||||
self.data['model_path'] = self.model_path
|
||||
self.data['model_filename'] = self.model_filename
|
||||
self.data['training_features_list'] = list(self.data_dictionary['train_features'].columns)
|
||||
dump(model, save_path / str(self.model_filename + "_model.joblib"))
|
||||
self.data["model_path"] = self.model_path
|
||||
self.data["model_filename"] = self.model_filename
|
||||
self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns)
|
||||
# store the metadata
|
||||
with open(save_path+"_metadata.json", 'w') as fp:
|
||||
with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp:
|
||||
json.dump(self.data, fp, default=self.np_encoder)
|
||||
|
||||
# save the train data to file so we can check preds for area of applicability later
|
||||
self.data_dictionary['train_features'].to_pickle(save_path+"_trained_df.pkl")
|
||||
self.data_dictionary["train_features"].to_pickle(
|
||||
save_path / str(self.model_filename + "_trained_df.pkl")
|
||||
)
|
||||
|
||||
return
|
||||
|
||||
@ -68,62 +81,77 @@ class DataHandler:
|
||||
:returns:
|
||||
:model: User trained model which can be inferenced for new predictions
|
||||
"""
|
||||
model = load(self.model_path+self.model_filename+"_model.joblib")
|
||||
model = load(self.model_path / str(self.model_filename + "_model.joblib"))
|
||||
|
||||
with open(self.model_path+self.model_filename+"_metadata.json", 'r') as fp:
|
||||
with open(self.model_path / str(self.model_filename + "_metadata.json"), "r") as fp:
|
||||
self.data = json.load(fp)
|
||||
if self.data.get('training_features_list'):
|
||||
self.training_features_list = [*self.data.get('training_features_list')]
|
||||
self.training_features_list = self.data["training_features_list"]
|
||||
# if self.data.get("training_features_list"):
|
||||
# self.training_features_list = [*self.data.get("training_features_list")]
|
||||
|
||||
self.data_dictionary['train_features'] = pd.read_pickle(self.model_path+
|
||||
self.model_filename+"_trained_df.pkl")
|
||||
self.data_dictionary["train_features"] = pd.read_pickle(
|
||||
self.model_path / str(self.model_filename + "_trained_df.pkl")
|
||||
)
|
||||
|
||||
self.model_path = self.data['model_path']
|
||||
self.model_filename = self.data['model_filename']
|
||||
if self.config['freqai']['feature_parameters']['principal_component_analysis']:
|
||||
self.pca = pk.load(open(self.model_path+self.model_filename+"_pca_object.pkl","rb"))
|
||||
self.model_path = self.data["model_path"]
|
||||
self.model_filename = self.data["model_filename"]
|
||||
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
|
||||
self.pca = pk.load(
|
||||
open(self.model_path / str(self.model_filename + "_pca_object.pkl"), "rb")
|
||||
)
|
||||
|
||||
return model
|
||||
|
||||
def make_train_test_datasets(self, filtered_dataframe: DataFrame, labels: DataFrame) -> None:
|
||||
'''
|
||||
def make_train_test_datasets(
|
||||
self, filtered_dataframe: DataFrame, labels: DataFrame
|
||||
) -> Dict[Any, Any]:
|
||||
"""
|
||||
Given the dataframe for the full history for training, split the data into
|
||||
training and test data according to user specified parameters in configuration
|
||||
file.
|
||||
:filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:labels: cleaned labels ready to be split.
|
||||
'''
|
||||
"""
|
||||
|
||||
if self.config['freqai']['feature_parameters']['weight_factor'] > 0:
|
||||
if self.config["freqai"]["feature_parameters"]["weight_factor"] > 0:
|
||||
weights = self.set_weights_higher_recent(len(filtered_dataframe))
|
||||
else: weights = np.ones(len(filtered_dataframe))
|
||||
else:
|
||||
weights = np.ones(len(filtered_dataframe))
|
||||
|
||||
(train_features, test_features, train_labels,
|
||||
test_labels, train_weights, test_weights) = train_test_split(
|
||||
filtered_dataframe[:filtered_dataframe.shape[0]],
|
||||
(
|
||||
train_features,
|
||||
test_features,
|
||||
train_labels,
|
||||
test_labels,
|
||||
train_weights,
|
||||
test_weights,
|
||||
) = train_test_split(
|
||||
filtered_dataframe[: filtered_dataframe.shape[0]],
|
||||
labels,
|
||||
weights,
|
||||
**self.config['freqai']['data_split_parameters']
|
||||
**self.config["freqai"]["data_split_parameters"]
|
||||
)
|
||||
|
||||
return self.build_data_dictionary(
|
||||
train_features,test_features,
|
||||
train_labels,test_labels,
|
||||
train_weights,test_weights)
|
||||
train_features, test_features, train_labels, test_labels, train_weights, test_weights
|
||||
)
|
||||
|
||||
|
||||
|
||||
def filter_features(self, unfiltered_dataframe: DataFrame, training_feature_list: List,
|
||||
labels: DataFrame = None, training_filter: bool=True) -> Tuple[DataFrame, DataFrame]:
|
||||
'''
|
||||
def filter_features(
|
||||
self,
|
||||
unfiltered_dataframe: DataFrame,
|
||||
training_feature_list: List,
|
||||
labels: DataFrame = pd.DataFrame(),
|
||||
training_filter: bool = True,
|
||||
) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
Filter the unfiltered dataframe to extract the user requested features and properly
|
||||
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_dataframe: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by self.build_feature_list()
|
||||
according to user specified parameters in the configuration file.
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
@ -131,93 +159,132 @@ class DataHandler:
|
||||
:filtered_dataframe: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
:labels: labels cleaned of NaNs.
|
||||
'''
|
||||
"""
|
||||
filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
|
||||
drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs,
|
||||
|
||||
if training_filter: # we don't care about total row number (total no. datapoints) in training, we only care about removing any row with NaNs
|
||||
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
|
||||
if (
|
||||
training_filter
|
||||
): # we don't care about total row number (total no. datapoints) in training, we only care
|
||||
# about removing any row with NaNs
|
||||
drop_index_labels = pd.isnull(labels)
|
||||
filtered_dataframe = filtered_dataframe[(drop_index==False) & (drop_index_labels==False)] # dropping values
|
||||
labels = labels[(drop_index==False) & (drop_index_labels==False)] # assuming the labels depend entirely on the dataframe here.
|
||||
print('dropped',len(unfiltered_dataframe)-len(filtered_dataframe),
|
||||
'training data points due to NaNs, ensure you have downloaded all historical training data')
|
||||
self.data['filter_drop_index_training'] = drop_index
|
||||
drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
|
||||
filtered_dataframe = filtered_dataframe[
|
||||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
] # dropping values
|
||||
labels = labels[
|
||||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
] # assuming the labels depend entirely on the dataframe here.
|
||||
print(
|
||||
"dropped",
|
||||
len(unfiltered_dataframe) - len(filtered_dataframe),
|
||||
"training data points due to NaNs, ensure you have downloaded",
|
||||
"all historical training data",
|
||||
)
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else: # we are backtesting so we need to preserve row number to send back to strategy, so now we use do_predict to avoid any prediction based on a NaN
|
||||
else:
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_dataframe).any(1)
|
||||
self.data['filter_drop_index_prediction'] = drop_index
|
||||
filtered_dataframe.fillna(0, inplace=True) # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction that was based on a single NaN is ultimately protected from buys with do_predict
|
||||
self.data["filter_drop_index_prediction"] = drop_index
|
||||
filtered_dataframe.fillna(0, inplace=True)
|
||||
# replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
|
||||
# that was based on a single NaN is ultimately protected from buys with do_predict
|
||||
drop_index = ~drop_index
|
||||
self.do_predict = np.array(drop_index.replace(True,1).replace(False,0))
|
||||
print('dropped',len(self.do_predict) - self.do_predict.sum(),'of',len(filtered_dataframe),
|
||||
'prediction data points due to NaNs. These are protected from prediction with do_predict vector returned to strategy.')
|
||||
|
||||
self.do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
|
||||
print(
|
||||
"dropped",
|
||||
len(self.do_predict) - self.do_predict.sum(),
|
||||
"of",
|
||||
len(filtered_dataframe),
|
||||
"prediction data points due to NaNs. These are protected from prediction",
|
||||
"with do_predict vector returned to strategy.",
|
||||
)
|
||||
|
||||
return filtered_dataframe, labels
|
||||
|
||||
def build_data_dictionary(self, train_df: DataFrame, test_df: DataFrame,
|
||||
train_labels: DataFrame, test_labels: DataFrame,
|
||||
train_weights: Any, test_weights: Any) -> Dict:
|
||||
def build_data_dictionary(
|
||||
self,
|
||||
train_df: DataFrame,
|
||||
test_df: DataFrame,
|
||||
train_labels: DataFrame,
|
||||
test_labels: DataFrame,
|
||||
train_weights: Any,
|
||||
test_weights: Any,
|
||||
) -> Dict:
|
||||
|
||||
self.data_dictionary = {'train_features': train_df,
|
||||
'test_features': test_df,
|
||||
'train_labels': train_labels,
|
||||
'test_labels': test_labels,
|
||||
'train_weights': train_weights,
|
||||
'test_weights': test_weights}
|
||||
self.data_dictionary = {
|
||||
"train_features": train_df,
|
||||
"test_features": test_df,
|
||||
"train_labels": train_labels,
|
||||
"test_labels": test_labels,
|
||||
"train_weights": train_weights,
|
||||
"test_weights": test_weights,
|
||||
}
|
||||
|
||||
return self.data_dictionary
|
||||
|
||||
def standardize_data(self, data_dictionary: Dict) -> None:
|
||||
'''
|
||||
def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Standardize all data in the data_dictionary according to the training dataset
|
||||
:params:
|
||||
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
'''
|
||||
"""
|
||||
# standardize the data by training stats
|
||||
train_mean = data_dictionary['train_features'].mean()
|
||||
train_std = data_dictionary['train_features'].std()
|
||||
data_dictionary['train_features'] = (data_dictionary['train_features'] - train_mean) / train_std
|
||||
data_dictionary['test_features'] = (data_dictionary['test_features'] - train_mean) / train_std
|
||||
train_mean = data_dictionary["train_features"].mean()
|
||||
train_std = data_dictionary["train_features"].std()
|
||||
data_dictionary["train_features"] = (
|
||||
data_dictionary["train_features"] - train_mean
|
||||
) / train_std
|
||||
data_dictionary["test_features"] = (
|
||||
data_dictionary["test_features"] - train_mean
|
||||
) / train_std
|
||||
|
||||
train_labels_std = data_dictionary['train_labels'].std()
|
||||
train_labels_mean = data_dictionary['train_labels'].mean()
|
||||
data_dictionary['train_labels'] = (data_dictionary['train_labels'] - train_labels_mean) / train_labels_std
|
||||
data_dictionary['test_labels'] = (data_dictionary['test_labels'] - train_labels_mean) / train_labels_std
|
||||
train_labels_std = data_dictionary["train_labels"].std()
|
||||
train_labels_mean = data_dictionary["train_labels"].mean()
|
||||
data_dictionary["train_labels"] = (
|
||||
data_dictionary["train_labels"] - train_labels_mean
|
||||
) / train_labels_std
|
||||
data_dictionary["test_labels"] = (
|
||||
data_dictionary["test_labels"] - train_labels_mean
|
||||
) / train_labels_std
|
||||
|
||||
for item in train_std.keys():
|
||||
self.data[item+'_std'] = train_std[item]
|
||||
self.data[item+'_mean'] = train_mean[item]
|
||||
self.data[item + "_std"] = train_std[item]
|
||||
self.data[item + "_mean"] = train_mean[item]
|
||||
|
||||
self.data['labels_std'] = train_labels_std
|
||||
self.data['labels_mean'] = train_labels_mean
|
||||
self.data["labels_std"] = train_labels_std
|
||||
self.data["labels_mean"] = train_labels_mean
|
||||
|
||||
return data_dictionary
|
||||
|
||||
def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
'''
|
||||
"""
|
||||
Standardizes a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:params:
|
||||
:df: Dataframe to be standardized
|
||||
'''
|
||||
"""
|
||||
|
||||
for item in df.keys():
|
||||
df[item] = (df[item] - self.data[item+'_mean']) / self.data[item+'_std']
|
||||
df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
|
||||
|
||||
return df
|
||||
|
||||
def split_timerange(self, tr: Dict, train_split: int=28, bt_split: int=7) -> list:
|
||||
'''
|
||||
def split_timerange(
|
||||
self, tr: str, train_split: int = 28, bt_split: int = 7
|
||||
) -> Tuple[list, list]:
|
||||
"""
|
||||
Function which takes a single time range (tr) and splits it
|
||||
into sub timeranges to train and backtest on based on user input
|
||||
tr: str, full timerange to train on
|
||||
train_split: the period length for the each training (days). Specified in user
|
||||
configuration file
|
||||
bt_split: the backtesting length (dats). Specified in user configuration file
|
||||
'''
|
||||
"""
|
||||
|
||||
train_period = train_split * SECONDS_IN_DAY
|
||||
bt_period = bt_split * SECONDS_IN_DAY
|
||||
@ -230,22 +297,24 @@ class DataHandler:
|
||||
tr_backtesting_list = []
|
||||
first = True
|
||||
while True:
|
||||
if not first: timerange_train.startts = timerange_train.startts + bt_period
|
||||
if not first:
|
||||
timerange_train.startts = timerange_train.startts + bt_period
|
||||
timerange_train.stopts = timerange_train.startts + train_period
|
||||
|
||||
# if a full training period doesnt fit, we stop
|
||||
if timerange_train.stopts > full_timerange.stopts: break
|
||||
if timerange_train.stopts > full_timerange.stopts:
|
||||
break
|
||||
first = False
|
||||
start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
|
||||
stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
|
||||
tr_training_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
|
||||
tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
|
||||
|
||||
## associated backtest period
|
||||
# associated backtest period
|
||||
timerange_backtest.startts = timerange_train.stopts
|
||||
timerange_backtest.stopts = timerange_backtest.startts + bt_period
|
||||
start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
|
||||
stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
|
||||
tr_backtesting_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
|
||||
tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
|
||||
|
||||
return tr_training_list, tr_backtesting_list
|
||||
|
||||
@ -260,8 +329,8 @@ class DataHandler:
|
||||
timerange = TimeRange.parse_timerange(tr)
|
||||
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
|
||||
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
|
||||
df = df.loc[df['date'] >= start, :]
|
||||
df = df.loc[df['date'] <= stop, :]
|
||||
df = df.loc[df["date"] >= start, :]
|
||||
df = df.loc[df["date"] <= stop, :]
|
||||
|
||||
return df
|
||||
|
||||
@ -274,42 +343,48 @@ class DataHandler:
|
||||
|
||||
from sklearn.decomposition import PCA # avoid importing if we dont need it
|
||||
|
||||
n_components = self.data_dictionary['train_features'].shape[1]
|
||||
n_components = self.data_dictionary["train_features"].shape[1]
|
||||
pca = PCA(n_components=n_components)
|
||||
pca = pca.fit(self.data_dictionary['train_features'])
|
||||
pca = pca.fit(self.data_dictionary["train_features"])
|
||||
n_keep_components = np.argmin(pca.explained_variance_ratio_.cumsum() < 0.999)
|
||||
pca2 = PCA(n_components=n_keep_components)
|
||||
self.data['n_kept_components'] = n_keep_components
|
||||
pca2 = pca2.fit(self.data_dictionary['train_features'])
|
||||
print('reduced feature dimension by',n_components-n_keep_components)
|
||||
print("explained variance",np.sum(pca2.explained_variance_ratio_))
|
||||
train_components = pca2.transform(self.data_dictionary['train_features'])
|
||||
test_components = pca2.transform(self.data_dictionary['test_features'])
|
||||
self.data["n_kept_components"] = n_keep_components
|
||||
pca2 = pca2.fit(self.data_dictionary["train_features"])
|
||||
print("reduced feature dimension by", n_components - n_keep_components)
|
||||
print("explained variance", np.sum(pca2.explained_variance_ratio_))
|
||||
train_components = pca2.transform(self.data_dictionary["train_features"])
|
||||
test_components = pca2.transform(self.data_dictionary["test_features"])
|
||||
|
||||
self.data_dictionary['train_features'] = pd.DataFrame(data=train_components,
|
||||
columns = ['PC'+str(i) for i in range(0,n_keep_components)],
|
||||
index = self.data_dictionary['train_features'].index)
|
||||
self.data_dictionary["train_features"] = pd.DataFrame(
|
||||
data=train_components,
|
||||
columns=["PC" + str(i) for i in range(0, n_keep_components)],
|
||||
index=self.data_dictionary["train_features"].index,
|
||||
)
|
||||
|
||||
self.data_dictionary['test_features'] = pd.DataFrame(data=test_components,
|
||||
columns = ['PC'+str(i) for i in range(0,n_keep_components)],
|
||||
index = self.data_dictionary['test_features'].index)
|
||||
self.data_dictionary["test_features"] = pd.DataFrame(
|
||||
data=test_components,
|
||||
columns=["PC" + str(i) for i in range(0, n_keep_components)],
|
||||
index=self.data_dictionary["test_features"].index,
|
||||
)
|
||||
|
||||
self.data['n_kept_components'] = n_keep_components
|
||||
self.data["n_kept_components"] = n_keep_components
|
||||
self.pca = pca2
|
||||
if not os.path.exists(self.model_path): os.mkdir(self.model_path)
|
||||
pk.dump(pca2, open(self.model_path + self.model_filename+"_pca_object.pkl","wb"))
|
||||
|
||||
if not self.model_path.is_dir():
|
||||
self.model_path.mkdir(parents=True, exist_ok=True)
|
||||
pk.dump(pca2, open(self.model_path / str(self.model_filename + "_pca_object.pkl"), "wb"))
|
||||
|
||||
return None
|
||||
|
||||
def compute_distances(self) -> float:
|
||||
print('computing average mean distance for all training points')
|
||||
pairwise = pairwise_distances(self.data_dictionary['train_features'],n_jobs=-1)
|
||||
print("computing average mean distance for all training points")
|
||||
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
|
||||
avg_mean_dist = pairwise.mean(axis=1).mean()
|
||||
print('avg_mean_dist',avg_mean_dist)
|
||||
print("avg_mean_dist", avg_mean_dist)
|
||||
|
||||
return avg_mean_dist
|
||||
|
||||
def remove_outliers(self,predict: bool) -> None:
|
||||
def remove_outliers(self, predict: bool) -> None:
|
||||
"""
|
||||
Remove data that looks like an outlier based on the distribution of each
|
||||
variable.
|
||||
@ -318,40 +393,66 @@ class DataHandler:
|
||||
training data coming in.
|
||||
"""
|
||||
|
||||
lower_quantile = self.data_dictionary['train_features'].quantile(0.001)
|
||||
upper_quantile = self.data_dictionary['train_features'].quantile(0.999)
|
||||
lower_quantile = self.data_dictionary["train_features"].quantile(0.001)
|
||||
upper_quantile = self.data_dictionary["train_features"].quantile(0.999)
|
||||
|
||||
if predict:
|
||||
|
||||
df = self.data_dictionary['prediction_features'][(self.data_dictionary['prediction_features']<upper_quantile) & (self.data_dictionary['prediction_features']>lower_quantile)]
|
||||
df = self.data_dictionary["prediction_features"][
|
||||
(self.data_dictionary["prediction_features"] < upper_quantile)
|
||||
& (self.data_dictionary["prediction_features"] > lower_quantile)
|
||||
]
|
||||
drop_index = pd.isnull(df).any(1)
|
||||
self.data_dictionary['prediction_features'].fillna(0,inplace=True)
|
||||
self.data_dictionary["prediction_features"].fillna(0, inplace=True)
|
||||
drop_index = ~drop_index
|
||||
do_predict = np.array(drop_index.replace(True,1).replace(False,0))
|
||||
do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
|
||||
|
||||
print('remove_outliers() tossed',len(do_predict)-do_predict.sum(),'predictions because they were beyond 3 std deviations from training data.')
|
||||
print(
|
||||
"remove_outliers() tossed",
|
||||
len(do_predict) - do_predict.sum(),
|
||||
"predictions because they were beyond 3 std deviations from training data.",
|
||||
)
|
||||
self.do_predict += do_predict
|
||||
self.do_predict -= 1
|
||||
|
||||
else:
|
||||
|
||||
filter_train_df = self.data_dictionary['train_features'][(self.data_dictionary['train_features']<upper_quantile) & (self.data_dictionary['train_features']>lower_quantile)]
|
||||
filter_train_df = self.data_dictionary["train_features"][
|
||||
(self.data_dictionary["train_features"] < upper_quantile)
|
||||
& (self.data_dictionary["train_features"] > lower_quantile)
|
||||
]
|
||||
drop_index = pd.isnull(filter_train_df).any(1)
|
||||
self.data_dictionary['train_features'] = self.data_dictionary['train_features'][(drop_index==False)]
|
||||
self.data_dictionary['train_labels'] = self.data_dictionary['train_labels'][(drop_index==False)]
|
||||
self.data_dictionary['train_weights'] = self.data_dictionary['train_weights'][(drop_index==False)]
|
||||
drop_index = drop_index.replace(True, 1).replace(False, 0)
|
||||
self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
|
||||
(drop_index == 0)
|
||||
]
|
||||
self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
|
||||
(drop_index == 0)
|
||||
]
|
||||
self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
|
||||
(drop_index == 0)
|
||||
]
|
||||
|
||||
# do the same for the test data
|
||||
filter_test_df = self.data_dictionary['test_features'][(self.data_dictionary['test_features']<upper_quantile) & (self.data_dictionary['test_features']>lower_quantile)]
|
||||
filter_test_df = self.data_dictionary["test_features"][
|
||||
(self.data_dictionary["test_features"] < upper_quantile)
|
||||
& (self.data_dictionary["test_features"] > lower_quantile)
|
||||
]
|
||||
drop_index = pd.isnull(filter_test_df).any(1)
|
||||
#pdb.set_trace()
|
||||
self.data_dictionary['test_labels'] = self.data_dictionary['test_labels'][(drop_index==False)]
|
||||
self.data_dictionary['test_features'] = self.data_dictionary['test_features'][(drop_index==False)]
|
||||
self.data_dictionary['test_weights'] = self.data_dictionary['test_weights'][(drop_index==False)]
|
||||
drop_index = drop_index.replace(True, 1).replace(False, 0)
|
||||
self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
|
||||
(drop_index == 0)
|
||||
]
|
||||
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
|
||||
(drop_index == 0)
|
||||
]
|
||||
self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
|
||||
(drop_index == 0)
|
||||
]
|
||||
|
||||
return
|
||||
|
||||
def build_feature_list(self, config: dict) -> int:
|
||||
def build_feature_list(self, config: dict) -> list:
|
||||
"""
|
||||
Build the list of features that will be used to filter
|
||||
the full dataframe. Feature list is construced from the
|
||||
@ -361,16 +462,17 @@ class DataHandler:
|
||||
user defined input in config['freqai] dictionary.
|
||||
"""
|
||||
features = []
|
||||
for tf in config['freqai']['timeframes']:
|
||||
for ft in config['freqai']['base_features']:
|
||||
for n in range(config['freqai']['feature_parameters']['shift']+1):
|
||||
shift=''
|
||||
if n>0: shift = '_shift-'+str(n)
|
||||
features.append(ft+shift+'_'+tf)
|
||||
for p in config['freqai']['corr_pairlist']:
|
||||
features.append(p.split("/")[0]+'-'+ft+shift+'_'+tf)
|
||||
for tf in config["freqai"]["timeframes"]:
|
||||
for ft in config["freqai"]["base_features"]:
|
||||
for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
|
||||
shift = ""
|
||||
if n > 0:
|
||||
shift = "_shift-" + str(n)
|
||||
features.append(ft + shift + "_" + tf)
|
||||
for p in config["freqai"]["corr_pairlist"]:
|
||||
features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
|
||||
|
||||
print('number of features',len(features))
|
||||
print("number of features", len(features))
|
||||
return features
|
||||
|
||||
def check_if_pred_in_training_spaces(self) -> None:
|
||||
@ -381,15 +483,25 @@ class DataHandler:
|
||||
from the training data set.
|
||||
"""
|
||||
|
||||
print('checking if prediction features are in AOA')
|
||||
distance = pairwise_distances(self.data_dictionary['train_features'],
|
||||
self.data_dictionary['prediction_features'],n_jobs=-1)
|
||||
print("checking if prediction features are in AOA")
|
||||
distance = pairwise_distances(
|
||||
self.data_dictionary["train_features"],
|
||||
self.data_dictionary["prediction_features"],
|
||||
n_jobs=-1,
|
||||
)
|
||||
|
||||
do_predict = np.where(distance.min(axis=0) /
|
||||
self.data['avg_mean_dist'] < self.config['freqai']['feature_parameters']['DI_threshold'],1,0)
|
||||
do_predict = np.where(
|
||||
distance.min(axis=0) / self.data["avg_mean_dist"]
|
||||
< self.config["freqai"]["feature_parameters"]["DI_threshold"],
|
||||
1,
|
||||
0,
|
||||
)
|
||||
|
||||
print('Distance checker tossed',len(do_predict)-do_predict.sum(),
|
||||
'predictions for being too far from training data')
|
||||
print(
|
||||
"Distance checker tossed",
|
||||
len(do_predict) - do_predict.sum(),
|
||||
"predictions for being too far from training data",
|
||||
)
|
||||
|
||||
self.do_predict += do_predict
|
||||
self.do_predict -= 1
|
||||
@ -401,8 +513,9 @@ class DataHandler:
|
||||
"""
|
||||
weights = np.zeros(num_weights)
|
||||
for i in range(1, len(weights)):
|
||||
weights[len(weights) - i] = np.exp(-i/
|
||||
(self.config['freqai']['feature_parameters']['weight_factor']*num_weights))
|
||||
weights[len(weights) - i] = np.exp(
|
||||
-i / (self.config["freqai"]["feature_parameters"]["weight_factor"] * num_weights)
|
||||
)
|
||||
return weights
|
||||
|
||||
def append_predictions(self, predictions, do_predict, len_dataframe):
|
||||
@ -411,12 +524,12 @@ class DataHandler:
|
||||
"""
|
||||
|
||||
ones = np.ones(len_dataframe)
|
||||
s_mean, s_std = ones*self.data['s_mean'], ones*self.data['s_std']
|
||||
s_mean, s_std = ones * self.data["s_mean"], ones * self.data["s_std"]
|
||||
|
||||
self.predictions = np.append(self.predictions,predictions)
|
||||
self.do_predict = np.append(self.do_predict,do_predict)
|
||||
self.target_mean = np.append(self.target_mean,s_mean)
|
||||
self.target_std = np.append(self.target_std,s_std)
|
||||
self.predictions = np.append(self.predictions, predictions)
|
||||
self.do_predict = np.append(self.do_predict, do_predict)
|
||||
self.target_mean = np.append(self.target_mean, s_mean)
|
||||
self.target_std = np.append(self.target_std, s_std)
|
||||
|
||||
return
|
||||
|
||||
@ -426,11 +539,11 @@ class DataHandler:
|
||||
when it goes back to the strategy. These rows are not included in the backtest.
|
||||
"""
|
||||
|
||||
filler = np.zeros(len_dataframe -len(self.predictions)) # startup_candle_count
|
||||
self.predictions = np.append(filler,self.predictions)
|
||||
self.do_predict = np.append(filler,self.do_predict)
|
||||
self.target_mean = np.append(filler,self.target_mean)
|
||||
self.target_std = np.append(filler,self.target_std)
|
||||
filler = np.zeros(len_dataframe - len(self.predictions)) # startup_candle_count
|
||||
self.predictions = np.append(filler, self.predictions)
|
||||
self.do_predict = np.append(filler, self.do_predict)
|
||||
self.target_mean = np.append(filler, self.target_mean)
|
||||
self.target_std = np.append(filler, self.target_std)
|
||||
|
||||
return
|
||||
|
||||
|
@ -1,16 +1,19 @@
|
||||
import gc
|
||||
import shutil
|
||||
from abc import ABC
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
import shutil
|
||||
import gc
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from abc import ABC
|
||||
|
||||
from freqtrade.freqai.data_handler import DataHandler
|
||||
|
||||
|
||||
pd.options.mode.chained_assignment = None
|
||||
|
||||
|
||||
class IFreqaiModel(ABC):
|
||||
"""
|
||||
Class containing all tools for training and prediction in the strategy.
|
||||
@ -23,23 +26,27 @@ class IFreqaiModel(ABC):
|
||||
def __init__(self, config: Dict[str, Any]) -> None:
|
||||
|
||||
self.config = config
|
||||
self.freqai_info = config['freqai']
|
||||
self.data_split_parameters = config['freqai']['data_split_parameters']
|
||||
self.model_training_parameters = config['freqai']['model_training_parameters']
|
||||
self.feature_parameters = config['freqai']['feature_parameters']
|
||||
self.full_path = (str(config['user_data_dir'])+
|
||||
"/models/"+self.freqai_info['full_timerange']+
|
||||
'-'+self.freqai_info['identifier'])
|
||||
self.metadata = {}
|
||||
self.data = {}
|
||||
self.freqai_info = config["freqai"]
|
||||
self.data_split_parameters = config["freqai"]["data_split_parameters"]
|
||||
self.model_training_parameters = config["freqai"]["model_training_parameters"]
|
||||
self.feature_parameters = config["freqai"]["feature_parameters"]
|
||||
self.full_path = Path(
|
||||
config["user_data_dir"]
|
||||
/ "models"
|
||||
/ str(self.freqai_info["full_timerange"] + self.freqai_info["identifier"])
|
||||
)
|
||||
|
||||
self.time_last_trained = None
|
||||
self.current_time = None
|
||||
self.model = None
|
||||
self.predictions = None
|
||||
|
||||
if not os.path.exists(self.full_path):
|
||||
os.mkdir(self.full_path)
|
||||
shutil.copy(self.config['config_files'][0],self.full_path+"/"+self.config['config_files'][0])
|
||||
if not self.full_path.is_dir():
|
||||
self.full_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(
|
||||
self.config["config_files"][0],
|
||||
Path(self.full_path / self.config["config_files"][0]),
|
||||
)
|
||||
|
||||
def start(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
"""
|
||||
@ -57,27 +64,33 @@ class IFreqaiModel(ABC):
|
||||
the model.
|
||||
:metadata: pair metadataa coming from strategy.
|
||||
"""
|
||||
self.pair = metadata['pair']
|
||||
self.dh = DataHandler(self.config, dataframe, self.data)
|
||||
self.pair = metadata["pair"]
|
||||
self.dh = DataHandler(self.config, dataframe)
|
||||
|
||||
print('going to train',len(self.dh.training_timeranges),
|
||||
'timeranges:',self.dh.training_timeranges)
|
||||
print(
|
||||
"going to train",
|
||||
len(self.dh.training_timeranges),
|
||||
"timeranges:",
|
||||
self.dh.training_timeranges,
|
||||
)
|
||||
|
||||
# Loop enforcing the sliding window training/backtesting paragigm
|
||||
# tr_train is the training time range e.g. 1 historical month
|
||||
# tr_backtest is the backtesting time range e.g. the week directly
|
||||
# following tr_train. Both of these windows slide through the
|
||||
# entire backtest
|
||||
for tr_train, tr_backtest in zip(self.dh.training_timeranges,
|
||||
self.dh.backtesting_timeranges):
|
||||
for tr_train, tr_backtest in zip(
|
||||
self.dh.training_timeranges, self.dh.backtesting_timeranges
|
||||
):
|
||||
gc.collect()
|
||||
#self.config['timerange'] = tr_train
|
||||
# self.config['timerange'] = tr_train
|
||||
self.dh.data = {} # clean the pair specific data between models
|
||||
self.freqai_info['training_timerange'] = tr_train
|
||||
self.freqai_info["training_timerange"] = tr_train
|
||||
dataframe_train = self.dh.slice_dataframe(tr_train, dataframe)
|
||||
dataframe_backtest = self.dh.slice_dataframe(tr_backtest, dataframe)
|
||||
print("training",self.pair,"for",tr_train)
|
||||
self.dh.model_path = self.full_path+"/"+ 'sub-train'+'-'+str(tr_train)+'/'
|
||||
print("training", self.pair, "for", tr_train)
|
||||
# self.dh.model_path = self.full_path + "/" + "sub-train" + "-" + str(tr_train) + "/"
|
||||
self.dh.model_path = Path(self.full_path / str("sub-train" + "-" + str(tr_train)))
|
||||
if not self.model_exists(self.pair, training_timerange=tr_train):
|
||||
self.model = self.train(dataframe_train, metadata)
|
||||
self.dh.save_data(self.model)
|
||||
@ -86,7 +99,7 @@ class IFreqaiModel(ABC):
|
||||
|
||||
preds, do_preds = self.predict(dataframe_backtest)
|
||||
|
||||
self.dh.append_predictions(preds,do_preds,len(dataframe_backtest))
|
||||
self.dh.append_predictions(preds, do_preds, len(dataframe_backtest))
|
||||
|
||||
self.dh.fill_predictions(len(dataframe))
|
||||
|
||||
@ -124,9 +137,9 @@ class IFreqaiModel(ABC):
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
return None
|
||||
return Any
|
||||
|
||||
def predict(self) -> Optional[Tuple[DataFrame, DataFrame]]:
|
||||
def predict(self, dataframe: DataFrame) -> Tuple[np.array, np.array]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
@ -136,20 +149,20 @@ class IFreqaiModel(ABC):
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
return None
|
||||
return np.array([]), np.array([])
|
||||
|
||||
def model_exists(self, pair: str, training_timerange: str = None) -> bool:
|
||||
def model_exists(self, pair: str, training_timerange: str) -> bool:
|
||||
"""
|
||||
Given a pair and path, check if a model already exists
|
||||
:param pair: pair e.g. BTC/USD
|
||||
:param path: path to model
|
||||
"""
|
||||
coin,_ = pair.split('/')
|
||||
self.dh.model_filename = f"cb_"+coin.lower()+"_"+training_timerange
|
||||
file_exists = os.path.isfile(self.dh.model_path+
|
||||
self.dh.model_filename+"_model.joblib")
|
||||
coin, _ = pair.split("/")
|
||||
self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange
|
||||
path_to_modelfile = Path(self.dh.model_path / str(self.dh.model_filename + "_model.joblib"))
|
||||
file_exists = path_to_modelfile.is_file()
|
||||
if file_exists:
|
||||
print("Found model at", self.dh.model_path+self.dh.model_filename)
|
||||
else: print("Could not find model at",
|
||||
self.dh.model_path+self.dh.model_filename)
|
||||
print("Found model at", self.dh.model_path / self.dh.model_filename)
|
||||
else:
|
||||
print("Could not find model at", self.dh.model_path / self.dh.model_filename)
|
||||
return file_exists
|
||||
|
@ -7,6 +7,6 @@ class CustomModel:
|
||||
and the strategy.
|
||||
"""
|
||||
|
||||
def __init__(self,config):
|
||||
def __init__(self, config):
|
||||
|
||||
self.bridge = FreqaiModelResolver.load_freqaimodel(config)
|
||||
|
@ -12,6 +12,7 @@ from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.freqai.freqai_interface import IFreqaiModel
|
||||
from freqtrade.resolvers import IResolver
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@ -19,10 +20,11 @@ class FreqaiModelResolver(IResolver):
|
||||
"""
|
||||
This class contains all the logic to load custom hyperopt loss class
|
||||
"""
|
||||
|
||||
object_type = IFreqaiModel
|
||||
object_type_str = "FreqaiModel"
|
||||
user_subdir = USERPATH_FREQAIMODELS
|
||||
initial_search_path = Path(__file__).parent.parent.joinpath('optimize').resolve()
|
||||
initial_search_path = Path(__file__).parent.parent.joinpath("optimize").resolve()
|
||||
|
||||
@staticmethod
|
||||
def load_freqaimodel(config: Dict) -> IFreqaiModel:
|
||||
@ -31,15 +33,17 @@ class FreqaiModelResolver(IResolver):
|
||||
:param config: configuration dictionary
|
||||
"""
|
||||
|
||||
freqaimodel_name = config.get('freqaimodel')
|
||||
freqaimodel_name = config.get("freqaimodel")
|
||||
if not freqaimodel_name:
|
||||
raise OperationalException(
|
||||
"No freqaimodel set. Please use `--freqaimodel` to "
|
||||
"specify the FreqaiModel class to use.\n"
|
||||
)
|
||||
freqaimodel = FreqaiModelResolver.load_object(freqaimodel_name,
|
||||
config, kwargs={'config': config},
|
||||
extra_dir=config.get('freqaimodel_path'))
|
||||
|
||||
freqaimodel = FreqaiModelResolver.load_object(
|
||||
freqaimodel_name,
|
||||
config,
|
||||
kwargs={"config": config},
|
||||
extra_dir=config.get("freqaimodel_path"),
|
||||
)
|
||||
|
||||
return freqaimodel
|
||||
|
@ -1,10 +1,12 @@
|
||||
import numpy as np
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import pandas as pd
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
from pandas import DataFrame
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
from freqtrade.freqai.freqai_interface import IFreqaiModel
|
||||
|
||||
|
||||
class ExamplePredictionModel(IFreqaiModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
@ -19,15 +21,20 @@ class ExamplePredictionModel(IFreqaiModel):
|
||||
:dataframe: the full dataframe for the present training period
|
||||
"""
|
||||
|
||||
dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling(
|
||||
self.feature_parameters['period']).max() / dataframe['close'] - 1)
|
||||
self.dh.data['s_mean'] = dataframe['s'].mean()
|
||||
self.dh.data['s_std'] = dataframe['s'].std()
|
||||
dataframe["s"] = (
|
||||
dataframe["close"]
|
||||
.shift(-self.feature_parameters["period"])
|
||||
.rolling(self.feature_parameters["period"])
|
||||
.max()
|
||||
/ dataframe["close"]
|
||||
- 1
|
||||
)
|
||||
self.dh.data["s_mean"] = dataframe["s"].mean()
|
||||
self.dh.data["s_std"] = dataframe["s"].std()
|
||||
|
||||
print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std'])
|
||||
|
||||
return dataframe['s']
|
||||
print("label mean", self.dh.data["s_mean"], "label std", self.dh.data["s_std"])
|
||||
|
||||
return dataframe["s"]
|
||||
|
||||
def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
|
||||
"""
|
||||
@ -46,8 +53,12 @@ class ExamplePredictionModel(IFreqaiModel):
|
||||
unfiltered_labels = self.make_labels(unfiltered_dataframe)
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe,
|
||||
self.dh.training_features_list, unfiltered_labels, training_filter=True)
|
||||
features_filtered, labels_filtered = self.dh.filter_features(
|
||||
unfiltered_dataframe,
|
||||
self.dh.training_features_list,
|
||||
unfiltered_labels,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
# split data into train/test data.
|
||||
data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
@ -55,18 +66,18 @@ class ExamplePredictionModel(IFreqaiModel):
|
||||
data_dictionary = self.dh.standardize_data(data_dictionary)
|
||||
|
||||
# optional additional data cleaning
|
||||
if self.feature_parameters['principal_component_analysis']:
|
||||
if self.feature_parameters["principal_component_analysis"]:
|
||||
self.dh.principal_component_analysis()
|
||||
if self.feature_parameters["remove_outliers"]:
|
||||
self.dh.remove_outliers(predict=False)
|
||||
if self.feature_parameters['DI_threshold']:
|
||||
self.dh.data['avg_mean_dist'] = self.dh.compute_distances()
|
||||
if self.feature_parameters["DI_threshold"]:
|
||||
self.dh.data["avg_mean_dist"] = self.dh.compute_distances()
|
||||
|
||||
print("length of train data", len(data_dictionary['train_features']))
|
||||
print("length of train data", len(data_dictionary["train_features"]))
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
|
||||
print('Finished training')
|
||||
print("Finished training")
|
||||
print(f'--------------------done training {metadata["pair"]}--------------------')
|
||||
|
||||
return model
|
||||
@ -82,19 +93,20 @@ class ExamplePredictionModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
train_data = Pool(
|
||||
data=data_dictionary['train_features'],
|
||||
label=data_dictionary['train_labels'],
|
||||
weight=data_dictionary['train_weights']
|
||||
data=data_dictionary["train_features"],
|
||||
label=data_dictionary["train_labels"],
|
||||
weight=data_dictionary["train_weights"],
|
||||
)
|
||||
|
||||
test_data = Pool(
|
||||
data=data_dictionary['test_features'],
|
||||
label=data_dictionary['test_labels'],
|
||||
weight=data_dictionary['test_weights']
|
||||
data=data_dictionary["test_features"],
|
||||
label=data_dictionary["test_labels"],
|
||||
weight=data_dictionary["test_weights"],
|
||||
)
|
||||
|
||||
model = CatBoostRegressor(verbose=100, early_stopping_rounds=400,
|
||||
**self.model_training_parameters)
|
||||
model = CatBoostRegressor(
|
||||
verbose=100, early_stopping_rounds=400, **self.model_training_parameters
|
||||
)
|
||||
model.fit(X=train_data, eval_set=test_data)
|
||||
|
||||
return model
|
||||
@ -112,27 +124,31 @@ class ExamplePredictionModel(IFreqaiModel):
|
||||
print("--------------------Starting prediction--------------------")
|
||||
|
||||
original_feature_list = self.dh.build_feature_list(self.config)
|
||||
filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False)
|
||||
filtered_dataframe, _ = self.dh.filter_features(
|
||||
unfiltered_dataframe, original_feature_list, training_filter=False
|
||||
)
|
||||
filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
|
||||
self.dh.data_dictionary['prediction_features'] = filtered_dataframe
|
||||
self.dh.data_dictionary["prediction_features"] = filtered_dataframe
|
||||
|
||||
# optional additional data cleaning
|
||||
if self.feature_parameters['principal_component_analysis']:
|
||||
if self.feature_parameters["principal_component_analysis"]:
|
||||
pca_components = self.dh.pca.transform(filtered_dataframe)
|
||||
self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components,
|
||||
columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])],
|
||||
index = filtered_dataframe.index)
|
||||
self.dh.data_dictionary["prediction_features"] = pd.DataFrame(
|
||||
data=pca_components,
|
||||
columns=["PC" + str(i) for i in range(0, self.dh.data["n_kept_components"])],
|
||||
index=filtered_dataframe.index,
|
||||
)
|
||||
|
||||
if self.feature_parameters["remove_outliers"]:
|
||||
self.dh.remove_outliers(predict=True) # creates dropped index
|
||||
|
||||
if self.feature_parameters['DI_threshold']:
|
||||
if self.feature_parameters["DI_threshold"]:
|
||||
self.dh.check_if_pred_in_training_spaces() # sets do_predict
|
||||
|
||||
predictions = self.model.predict(self.dh.data_dictionary['prediction_features'])
|
||||
predictions = self.model.predict(self.dh.data_dictionary["prediction_features"])
|
||||
|
||||
# compute the non-standardized predictions
|
||||
predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean']
|
||||
predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"]
|
||||
|
||||
print("--------------------Finished prediction--------------------")
|
||||
|
||||
|
@ -1,15 +1,20 @@
|
||||
import logging
|
||||
from functools import reduce
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import talib.abstract as ta
|
||||
from pandas import DataFrame
|
||||
import pandas as pd
|
||||
from technical import qtpylib
|
||||
import numpy as np
|
||||
from freqtrade.strategy import (merge_informative_pair)
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
from freqtrade.freqai.strategy_bridge import CustomModel
|
||||
from functools import reduce
|
||||
from freqtrade.strategy import merge_informative_pair
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class FreqaiExampleStrategy(IStrategy):
|
||||
"""
|
||||
Example strategy showing how the user connects their own
|
||||
@ -22,40 +27,33 @@ class FreqaiExampleStrategy(IStrategy):
|
||||
canonical freqtrade configuration file under config['freqai'].
|
||||
"""
|
||||
|
||||
minimal_roi = {
|
||||
"0": 0.01,
|
||||
"240": -1
|
||||
}
|
||||
minimal_roi = {"0": 0.01, "240": -1}
|
||||
|
||||
plot_config = {
|
||||
'main_plot': {
|
||||
"main_plot": {},
|
||||
"subplots": {
|
||||
"prediction": {"prediction": {"color": "blue"}},
|
||||
"target_roi": {
|
||||
"target_roi": {"color": "brown"},
|
||||
},
|
||||
'subplots': {
|
||||
"prediction":{
|
||||
'prediction':{'color':'blue'}
|
||||
"do_predict": {
|
||||
"do_predict": {"color": "brown"},
|
||||
},
|
||||
"target_roi":{
|
||||
'target_roi':{'color':'brown'},
|
||||
},
|
||||
"do_predict":{
|
||||
'do_predict':{'color':'brown'},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
stoploss = -0.05
|
||||
use_sell_signal = True
|
||||
startup_candle_count: int = 1000
|
||||
|
||||
|
||||
def informative_pairs(self):
|
||||
pairs = self.freqai_info['corr_pairlist']
|
||||
pairs = self.freqai_info["corr_pairlist"]
|
||||
informative_pairs = []
|
||||
for tf in self.timeframes:
|
||||
informative_pairs.append([(pair, tf) for pair in pairs])
|
||||
return informative_pairs
|
||||
|
||||
def populate_any_indicators(self, pair, df, tf, informative=None,coin=''):
|
||||
def populate_any_indicators(self, pair, df, tf, informative=None, coin=""):
|
||||
"""
|
||||
Function designed to automatically generate, name and merge features
|
||||
from user indicated timeframes in the configuration file. User can add
|
||||
@ -70,110 +68,116 @@ class FreqaiExampleStrategy(IStrategy):
|
||||
if informative is None:
|
||||
informative = self.dp.get_pair_dataframe(pair, tf)
|
||||
|
||||
informative[coin+'rsi'] = ta.RSI(informative, timeperiod=14)
|
||||
informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
|
||||
informative[coin+'adx'] = ta.ADX(informative, window=20)
|
||||
informative[coin + "rsi"] = ta.RSI(informative, timeperiod=14)
|
||||
informative[coin + "mfi"] = ta.MFI(informative, timeperiod=25)
|
||||
informative[coin + "adx"] = ta.ADX(informative, window=20)
|
||||
|
||||
informative[coin+'20sma'] = ta.SMA(informative,timeperiod=20)
|
||||
informative[coin+'21ema'] = ta.EMA(informative,timeperiod=21)
|
||||
informative[coin+'bmsb'] = np.where(informative[coin+'20sma'].lt(informative[coin+'21ema']),1,0)
|
||||
informative[coin+'close_over_20sma'] = informative['close']/informative[coin+'20sma']
|
||||
informative[coin + "20sma"] = ta.SMA(informative, timeperiod=20)
|
||||
informative[coin + "21ema"] = ta.EMA(informative, timeperiod=21)
|
||||
informative[coin + "bmsb"] = np.where(
|
||||
informative[coin + "20sma"].lt(informative[coin + "21ema"]), 1, 0
|
||||
)
|
||||
informative[coin + "close_over_20sma"] = informative["close"] / informative[coin + "20sma"]
|
||||
|
||||
informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
|
||||
informative[coin + "mfi"] = ta.MFI(informative, timeperiod=25)
|
||||
|
||||
informative[coin+'ema21'] = ta.EMA(informative, timeperiod=21)
|
||||
informative[coin+'sma20'] = ta.SMA(informative, timeperiod=20)
|
||||
informative[coin + "ema21"] = ta.EMA(informative, timeperiod=21)
|
||||
informative[coin + "sma20"] = ta.SMA(informative, timeperiod=20)
|
||||
stoch = ta.STOCHRSI(informative, 15, 20, 2, 2)
|
||||
informative[coin+'srsi-fk'] = stoch['fastk']
|
||||
informative[coin+'srsi-fd'] = stoch['fastd']
|
||||
informative[coin + "srsi-fk"] = stoch["fastk"]
|
||||
informative[coin + "srsi-fd"] = stoch["fastd"]
|
||||
|
||||
bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
|
||||
informative[coin+'bb_lowerband'] = bollinger['lower']
|
||||
informative[coin+'bb_middleband'] = bollinger['mid']
|
||||
informative[coin+'bb_upperband'] = bollinger['upper']
|
||||
informative[coin+'bb_width'] = ((informative[coin+"bb_upperband"] - informative[coin+"bb_lowerband"]) / informative[coin+"bb_middleband"])
|
||||
informative[coin+'close-bb_lower'] = informative['close'] / informative[coin+'bb_lowerband']
|
||||
informative[coin + "bb_lowerband"] = bollinger["lower"]
|
||||
informative[coin + "bb_middleband"] = bollinger["mid"]
|
||||
informative[coin + "bb_upperband"] = bollinger["upper"]
|
||||
informative[coin + "bb_width"] = (
|
||||
informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
|
||||
) / informative[coin + "bb_middleband"]
|
||||
informative[coin + "close-bb_lower"] = (
|
||||
informative["close"] / informative[coin + "bb_lowerband"]
|
||||
)
|
||||
|
||||
informative[coin+'roc'] = ta.ROC(informative, timeperiod=3)
|
||||
informative[coin+'adx'] = ta.ADX(informative, window=14)
|
||||
informative[coin + "roc"] = ta.ROC(informative, timeperiod=3)
|
||||
informative[coin + "adx"] = ta.ADX(informative, window=14)
|
||||
|
||||
macd = ta.MACD(informative)
|
||||
informative[coin+'macd'] = macd['macd']
|
||||
informative[coin+'pct-change'] = informative['close'].pct_change()
|
||||
informative[coin+'relative_volume'] = informative['volume'] / informative['volume'].rolling(10).mean()
|
||||
informative[coin + "macd"] = macd["macd"]
|
||||
informative[coin + "pct-change"] = informative["close"].pct_change()
|
||||
informative[coin + "relative_volume"] = (
|
||||
informative["volume"] / informative["volume"].rolling(10).mean()
|
||||
)
|
||||
|
||||
informative[coin+'pct-change'] = informative['close'].pct_change()
|
||||
informative[coin + "pct-change"] = informative["close"].pct_change()
|
||||
|
||||
indicators = [col for col in informative if col.startswith(coin)]
|
||||
|
||||
for n in range(self.freqai_info['feature_parameters']['shift']+1):
|
||||
if n==0: continue
|
||||
for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
|
||||
if n == 0:
|
||||
continue
|
||||
informative_shift = informative[indicators].shift(n)
|
||||
informative_shift = informative_shift.add_suffix('_shift-'+str(n))
|
||||
informative = pd.concat((informative,informative_shift),axis=1)
|
||||
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
|
||||
informative = pd.concat((informative, informative_shift), axis=1)
|
||||
|
||||
df = merge_informative_pair(df, informative, self.config['timeframe'], tf, ffill=True)
|
||||
skip_columns = [(s + '_'+tf) for s in
|
||||
['date', 'open', 'high', 'low', 'close', 'volume']]
|
||||
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
|
||||
skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
|
||||
df = df.drop(columns=skip_columns)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
|
||||
# the configuration file parameters are stored here
|
||||
self.freqai_info = self.config['freqai']
|
||||
self.freqai_info = self.config["freqai"]
|
||||
|
||||
# the model is instantiated here
|
||||
self.model = CustomModel(self.config)
|
||||
|
||||
print('Populating indicators...')
|
||||
print("Populating indicators...")
|
||||
|
||||
# the following loops are necessary for building the features
|
||||
# indicated by the user in the configuration file.
|
||||
for tf in self.freqai_info['timeframes']:
|
||||
dataframe = self.populate_any_indicators(metadata['pair'],
|
||||
dataframe.copy(), tf)
|
||||
for i in self.freqai_info['corr_pairlist']:
|
||||
dataframe = self.populate_any_indicators(i,
|
||||
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
|
||||
for tf in self.freqai_info["timeframes"]:
|
||||
dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf)
|
||||
for i in self.freqai_info["corr_pairlist"]:
|
||||
dataframe = self.populate_any_indicators(
|
||||
i, dataframe.copy(), tf, coin=i.split("/")[0] + "-"
|
||||
)
|
||||
|
||||
# the model will return 4 values, its prediction, an indication of whether or not the prediction
|
||||
# should be accepted, the target mean/std values from the labels used during each training period.
|
||||
(dataframe['prediction'], dataframe['do_predict'],
|
||||
dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
|
||||
# the model will return 4 values, its prediction, an indication of whether or not the
|
||||
# prediction should be accepted, the target mean/std values from the labels used during
|
||||
# each training period.
|
||||
(
|
||||
dataframe["prediction"],
|
||||
dataframe["do_predict"],
|
||||
dataframe["target_mean"],
|
||||
dataframe["target_std"],
|
||||
) = self.model.bridge.start(dataframe, metadata)
|
||||
|
||||
dataframe['target_roi'] = dataframe['target_mean']+dataframe['target_std']*0.5
|
||||
dataframe['sell_roi'] = dataframe['target_mean']-dataframe['target_std']*1.5
|
||||
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 0.5
|
||||
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.5
|
||||
return dataframe
|
||||
|
||||
|
||||
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
|
||||
buy_conditions = [
|
||||
(dataframe['prediction'] > dataframe['target_roi'])
|
||||
&
|
||||
(dataframe['do_predict'] == 1)
|
||||
(dataframe["prediction"] > dataframe["target_roi"]) & (dataframe["do_predict"] == 1)
|
||||
]
|
||||
|
||||
if buy_conditions:
|
||||
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), 'buy'] = 1
|
||||
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
|
||||
|
||||
return dataframe
|
||||
|
||||
|
||||
def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||
# sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
|
||||
sell_conditions = [
|
||||
(dataframe['prediction'] < dataframe['sell_roi'])
|
||||
&
|
||||
(dataframe['do_predict'] == 1)
|
||||
(dataframe["prediction"] < dataframe["sell_roi"]) & (dataframe["do_predict"] == 1)
|
||||
]
|
||||
if sell_conditions:
|
||||
dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), 'sell'] = 1
|
||||
dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), "sell"] = 1
|
||||
|
||||
return dataframe
|
||||
|
||||
def get_ticker_indicator(self):
|
||||
return int(self.config['timeframe'][:-1])
|
||||
return int(self.config["timeframe"][:-1])
|
||||
|
Loading…
Reference in New Issue
Block a user