Merge branch 'develop' into dev-merge-rl
This commit is contained in:
@@ -21,12 +21,12 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
@@ -36,14 +36,14 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_dataframe,
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
# split data into train/test data.
|
||||
@@ -61,32 +61,32 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
f' features and {len(data_dictionary["train_features"])} data points'
|
||||
)
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
dk.find_features(unfiltered_dataframe)
|
||||
filtered_dataframe, _ = dk.filter_features(
|
||||
unfiltered_dataframe, dk.training_features_list, training_filter=False
|
||||
dk.find_features(unfiltered_df)
|
||||
filtered_df, _ = dk.filter_features(
|
||||
unfiltered_df, dk.training_features_list, training_filter=False
|
||||
)
|
||||
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
|
||||
dk.data_dictionary["prediction_features"] = filtered_dataframe
|
||||
filtered_df = dk.normalize_data_from_metadata(filtered_df)
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
|
||||
self.data_cleaning_predict(dk, filtered_dataframe)
|
||||
self.data_cleaning_predict(dk, filtered_df)
|
||||
|
||||
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
||||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
@@ -20,12 +20,12 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
@@ -35,14 +35,14 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_dataframe,
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
# split data into train/test data.
|
||||
@@ -60,33 +60,33 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
f' features and {len(data_dictionary["train_features"])} data points'
|
||||
)
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
dk.find_features(unfiltered_dataframe)
|
||||
filtered_dataframe, _ = dk.filter_features(
|
||||
unfiltered_dataframe, dk.training_features_list, training_filter=False
|
||||
dk.find_features(unfiltered_df)
|
||||
filtered_df, _ = dk.filter_features(
|
||||
unfiltered_df, dk.training_features_list, training_filter=False
|
||||
)
|
||||
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
|
||||
dk.data_dictionary["prediction_features"] = filtered_dataframe
|
||||
filtered_df = dk.normalize_data_from_metadata(filtered_df)
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
|
||||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_predict(dk, filtered_dataframe)
|
||||
self.data_cleaning_predict(dk, filtered_df)
|
||||
|
||||
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
||||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
@@ -17,12 +17,12 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
@@ -32,14 +32,14 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_dataframe,
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
# split data into train/test data.
|
||||
@@ -57,7 +57,7 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
f' features and {len(data_dictionary["train_features"])} data points'
|
||||
)
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
65
freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
Normal file
65
freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
Normal file
@@ -0,0 +1,65 @@
|
||||
|
||||
from joblib import Parallel
|
||||
from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
|
||||
from sklearn.utils.fixes import delayed
|
||||
from sklearn.utils.validation import has_fit_parameter
|
||||
|
||||
|
||||
class FreqaiMultiOutputRegressor(MultiOutputRegressor):
|
||||
|
||||
def fit(self, X, y, sample_weight=None, fit_params=None):
|
||||
"""Fit the model to data, separately for each output variable.
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
The input data.
|
||||
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
|
||||
Multi-output targets. An indicator matrix turns on multilabel
|
||||
estimation.
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights. If `None`, then samples are equally weighted.
|
||||
Only supported if the underlying regressor supports sample
|
||||
weights.
|
||||
fit_params : A list of dicts for the fit_params
|
||||
Parameters passed to the ``estimator.fit`` method of each step.
|
||||
Each dict may contain same or different values (e.g. different
|
||||
eval_sets or init_models)
|
||||
.. versionadded:: 0.23
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns a fitted instance.
|
||||
"""
|
||||
|
||||
if not hasattr(self.estimator, "fit"):
|
||||
raise ValueError("The base estimator should implement a fit method")
|
||||
|
||||
y = self._validate_data(X="no_validation", y=y, multi_output=True)
|
||||
|
||||
if y.ndim == 1:
|
||||
raise ValueError(
|
||||
"y must have at least two dimensions for "
|
||||
"multi-output regression but has only one."
|
||||
)
|
||||
|
||||
if sample_weight is not None and not has_fit_parameter(
|
||||
self.estimator, "sample_weight"
|
||||
):
|
||||
raise ValueError("Underlying estimator does not support sample weights.")
|
||||
|
||||
if not fit_params:
|
||||
fit_params = [None] * y.shape[1]
|
||||
|
||||
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
|
||||
delayed(_fit_estimator)(
|
||||
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
|
||||
)
|
||||
for i in range(y.shape[1])
|
||||
)
|
||||
|
||||
if hasattr(self.estimators_[0], "n_features_in_"):
|
||||
self.n_features_in_ = self.estimators_[0].n_features_in_
|
||||
if hasattr(self.estimators_[0], "feature_names_in_"):
|
||||
self.feature_names_in_ = self.estimators_[0].feature_names_in_
|
||||
|
||||
return
|
@@ -76,6 +76,8 @@ class FreqaiDataDrawer:
|
||||
self.full_path / f"follower_dictionary-{self.follower_name}.json"
|
||||
)
|
||||
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
|
||||
self.historic_predictions_bkp_path = Path(
|
||||
self.full_path / "historic_predictions.backup.pkl")
|
||||
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
|
||||
self.follow_mode = follow_mode
|
||||
if follow_mode:
|
||||
@@ -119,13 +121,21 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
exists = self.historic_predictions_path.is_file()
|
||||
if exists:
|
||||
with open(self.historic_predictions_path, "rb") as fp:
|
||||
self.historic_predictions = cloudpickle.load(fp)
|
||||
logger.info(
|
||||
f"Found existing historic predictions at {self.full_path}, but beware "
|
||||
"that statistics may be inaccurate if the bot has been offline for "
|
||||
"an extended period of time."
|
||||
)
|
||||
try:
|
||||
with open(self.historic_predictions_path, "rb") as fp:
|
||||
self.historic_predictions = cloudpickle.load(fp)
|
||||
logger.info(
|
||||
f"Found existing historic predictions at {self.full_path}, but beware "
|
||||
"that statistics may be inaccurate if the bot has been offline for "
|
||||
"an extended period of time."
|
||||
)
|
||||
except EOFError:
|
||||
logger.warning(
|
||||
'Historical prediction file was corrupted. Trying to load backup file.')
|
||||
with open(self.historic_predictions_bkp_path, "rb") as fp:
|
||||
self.historic_predictions = cloudpickle.load(fp)
|
||||
logger.warning('FreqAI successfully loaded the backup historical predictions file.')
|
||||
|
||||
elif not self.follow_mode:
|
||||
logger.info("Could not find existing historic_predictions, starting from scratch")
|
||||
else:
|
||||
@@ -143,6 +153,9 @@ class FreqaiDataDrawer:
|
||||
with open(self.historic_predictions_path, "wb") as fp:
|
||||
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
|
||||
|
||||
# create a backup
|
||||
shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path)
|
||||
|
||||
def save_drawer_to_disk(self):
|
||||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
|
@@ -1,7 +1,8 @@
|
||||
import copy
|
||||
import datetime
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
from math import cos, sin
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Tuple
|
||||
|
||||
@@ -9,6 +10,7 @@ import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from scipy import stats
|
||||
from sklearn import linear_model
|
||||
from sklearn.cluster import DBSCAN
|
||||
from sklearn.metrics.pairwise import pairwise_distances
|
||||
@@ -182,7 +184,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
def filter_features(
|
||||
self,
|
||||
unfiltered_dataframe: DataFrame,
|
||||
unfiltered_df: DataFrame,
|
||||
training_feature_list: List,
|
||||
label_list: List = list(),
|
||||
training_filter: bool = True,
|
||||
@@ -193,31 +195,35 @@ class FreqaiDataKitchen:
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_dataframe: the full dataframe for the present training period
|
||||
:unfiltered_df: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
:returns:
|
||||
:filtered_dataframe: dataframe cleaned of NaNs and only containing the user
|
||||
:filtered_df: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
:labels: labels cleaned of NaNs.
|
||||
"""
|
||||
filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
|
||||
filtered_dataframe = filtered_dataframe.replace([np.inf, -np.inf], np.nan)
|
||||
filtered_df = unfiltered_df.filter(training_feature_list, axis=1)
|
||||
filtered_df = filtered_df.replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs,
|
||||
drop_index = pd.isnull(filtered_df).any(1) # get the rows that have NaNs,
|
||||
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
|
||||
if (training_filter):
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
if const_cols:
|
||||
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
||||
logger.warning(f"Removed features {const_cols} with constant values.")
|
||||
# we don't care about total row number (total no. datapoints) in training, we only care
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
labels = unfiltered_dataframe.filter(label_list, axis=1)
|
||||
labels = unfiltered_df.filter(label_list, axis=1)
|
||||
drop_index_labels = pd.isnull(labels).any(1)
|
||||
drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
|
||||
dates = unfiltered_dataframe['date']
|
||||
filtered_dataframe = filtered_dataframe[
|
||||
dates = unfiltered_df['date']
|
||||
filtered_df = filtered_df[
|
||||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
] # dropping values
|
||||
labels = labels[
|
||||
@@ -227,13 +233,13 @@ class FreqaiDataKitchen:
|
||||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
]
|
||||
logger.info(
|
||||
f"dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points"
|
||||
f" due to NaNs in populated dataset {len(unfiltered_dataframe)}."
|
||||
f"dropped {len(unfiltered_df) - len(filtered_df)} training points"
|
||||
f" due to NaNs in populated dataset {len(unfiltered_df)}."
|
||||
)
|
||||
if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
|
||||
worst_indicator = str(unfiltered_dataframe.count().idxmin())
|
||||
if (1 - len(filtered_df) / len(unfiltered_df)) > 0.1 and self.live:
|
||||
worst_indicator = str(unfiltered_df.count().idxmin())
|
||||
logger.warning(
|
||||
f" {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.0f} percent "
|
||||
f" {(1 - len(filtered_df)/len(unfiltered_df)) * 100:.0f} percent "
|
||||
" of training data dropped due to NaNs, model may perform inconsistent "
|
||||
f"with expectations. Verify {worst_indicator}"
|
||||
)
|
||||
@@ -242,9 +248,9 @@ class FreqaiDataKitchen:
|
||||
else:
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_dataframe).any(1)
|
||||
drop_index = pd.isnull(filtered_df).any(1)
|
||||
self.data["filter_drop_index_prediction"] = drop_index
|
||||
filtered_dataframe.fillna(0, inplace=True)
|
||||
filtered_df.fillna(0, inplace=True)
|
||||
# replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
|
||||
# that was based on a single NaN is ultimately protected from buys with do_predict
|
||||
drop_index = ~drop_index
|
||||
@@ -253,11 +259,11 @@ class FreqaiDataKitchen:
|
||||
logger.info(
|
||||
"dropped %s of %s prediction data points due to NaNs.",
|
||||
len(self.do_predict) - self.do_predict.sum(),
|
||||
len(filtered_dataframe),
|
||||
len(filtered_df),
|
||||
)
|
||||
labels = []
|
||||
|
||||
return filtered_dataframe, labels
|
||||
return filtered_df, labels
|
||||
|
||||
def build_data_dictionary(
|
||||
self,
|
||||
@@ -360,7 +366,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Normalize a set of data using the mean and standard deviation from
|
||||
Denormalize a set of data using the mean and standard deviation from
|
||||
the associated training data.
|
||||
:param df: Dataframe of predictions to be denormalized
|
||||
"""
|
||||
@@ -399,7 +405,7 @@ class FreqaiDataKitchen:
|
||||
config_timerange = TimeRange.parse_timerange(self.config["timerange"])
|
||||
if config_timerange.stopts == 0:
|
||||
config_timerange.stopts = int(
|
||||
datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
datetime.now(tz=timezone.utc).timestamp()
|
||||
)
|
||||
timerange_train = copy.deepcopy(full_timerange)
|
||||
timerange_backtest = copy.deepcopy(full_timerange)
|
||||
@@ -416,8 +422,8 @@ class FreqaiDataKitchen:
|
||||
timerange_train.stopts = timerange_train.startts + train_period_days
|
||||
|
||||
first = False
|
||||
start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
|
||||
stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
|
||||
start = datetime.fromtimestamp(timerange_train.startts, tz=timezone.utc)
|
||||
stop = datetime.fromtimestamp(timerange_train.stopts, tz=timezone.utc)
|
||||
tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
|
||||
tr_training_list_timerange.append(copy.deepcopy(timerange_train))
|
||||
|
||||
@@ -430,8 +436,8 @@ class FreqaiDataKitchen:
|
||||
if timerange_backtest.stopts > config_timerange.stopts:
|
||||
timerange_backtest.stopts = config_timerange.stopts
|
||||
|
||||
start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
|
||||
stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
|
||||
start = datetime.fromtimestamp(timerange_backtest.startts, tz=timezone.utc)
|
||||
stop = datetime.fromtimestamp(timerange_backtest.stopts, tz=timezone.utc)
|
||||
tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
|
||||
tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))
|
||||
|
||||
@@ -451,13 +457,35 @@ class FreqaiDataKitchen:
|
||||
it is sliced down to just the present training period.
|
||||
"""
|
||||
|
||||
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
|
||||
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
|
||||
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
|
||||
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
|
||||
df = df.loc[df["date"] >= start, :]
|
||||
df = df.loc[df["date"] <= stop, :]
|
||||
if not self.live:
|
||||
df = df.loc[df["date"] < stop, :]
|
||||
|
||||
return df
|
||||
|
||||
def remove_training_from_backtesting(
|
||||
self
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Function which takes the backtesting time range and
|
||||
remove training data from dataframe, keeping only the
|
||||
startup_candle_count candles
|
||||
"""
|
||||
startup_candle_count = self.config.get('startup_candle_count', 0)
|
||||
tf = self.config['timeframe']
|
||||
tr = self.config["timerange"]
|
||||
|
||||
backtesting_timerange = TimeRange.parse_timerange(tr)
|
||||
if startup_candle_count > 0 and backtesting_timerange:
|
||||
backtesting_timerange.subtract_start(timeframe_to_seconds(tf) * startup_candle_count)
|
||||
|
||||
start = datetime.fromtimestamp(backtesting_timerange.startts, tz=timezone.utc)
|
||||
df = self.return_dataframe
|
||||
df = df.loc[df["date"] >= start, :]
|
||||
return df
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -652,8 +680,6 @@ class FreqaiDataKitchen:
|
||||
is an outlier.
|
||||
"""
|
||||
|
||||
from math import cos, sin
|
||||
|
||||
if predict:
|
||||
if not self.data['DBSCAN_eps']:
|
||||
return
|
||||
@@ -746,6 +772,111 @@ class FreqaiDataKitchen:
|
||||
|
||||
return
|
||||
|
||||
def compute_inlier_metric(self, set_='train') -> None:
|
||||
"""
|
||||
|
||||
Compute inlier metric from backwards distance distributions.
|
||||
This metric defines how well features from a timepoint fit
|
||||
into previous timepoints.
|
||||
"""
|
||||
|
||||
no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
|
||||
|
||||
if set_ == 'train':
|
||||
compute_df = copy.deepcopy(self.data_dictionary['train_features'])
|
||||
elif set_ == 'test':
|
||||
compute_df = copy.deepcopy(self.data_dictionary['test_features'])
|
||||
else:
|
||||
compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
|
||||
|
||||
compute_df_reindexed = compute_df.reindex(
|
||||
index=np.flip(compute_df.index)
|
||||
)
|
||||
|
||||
pairwise = pd.DataFrame(
|
||||
np.triu(
|
||||
pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
|
||||
),
|
||||
columns=compute_df_reindexed.index,
|
||||
index=compute_df_reindexed.index
|
||||
)
|
||||
pairwise = pairwise.round(5)
|
||||
|
||||
column_labels = [
|
||||
'{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
|
||||
]
|
||||
distances = pd.DataFrame(
|
||||
columns=column_labels, index=compute_df.index
|
||||
)
|
||||
|
||||
for index in compute_df.index[no_prev_pts:]:
|
||||
current_row = pairwise.loc[[index]]
|
||||
current_row_no_zeros = current_row.loc[
|
||||
:, (current_row != 0).any(axis=0)
|
||||
]
|
||||
distances.loc[[index]] = current_row_no_zeros.iloc[
|
||||
:, :no_prev_pts
|
||||
]
|
||||
distances = distances.replace([np.inf, -np.inf], np.nan)
|
||||
drop_index = pd.isnull(distances).any(1)
|
||||
distances = distances[drop_index == 0]
|
||||
|
||||
inliers = pd.DataFrame(index=distances.index)
|
||||
for key in distances.keys():
|
||||
current_distances = distances[key].dropna()
|
||||
fit_params = stats.weibull_min.fit(current_distances)
|
||||
quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
|
||||
|
||||
df_inlier = pd.DataFrame(
|
||||
{key: quantiles}, index=distances.index
|
||||
)
|
||||
inliers = pd.concat(
|
||||
[inliers, df_inlier], axis=1
|
||||
)
|
||||
|
||||
inlier_metric = pd.DataFrame(
|
||||
data=inliers.sum(axis=1) / no_prev_pts,
|
||||
columns=['inlier_metric'],
|
||||
index=compute_df.index
|
||||
)
|
||||
|
||||
inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
|
||||
(inlier_metric.max() - inlier_metric.min()) - 1)
|
||||
|
||||
if set_ in ('train', 'test'):
|
||||
inlier_metric = inlier_metric.iloc[no_prev_pts:]
|
||||
compute_df = compute_df.iloc[no_prev_pts:]
|
||||
self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
|
||||
self.data_dictionary[f'{set_}_features'] = pd.concat(
|
||||
[compute_df, inlier_metric], axis=1)
|
||||
else:
|
||||
self.data_dictionary['prediction_features'] = pd.concat(
|
||||
[compute_df, inlier_metric], axis=1)
|
||||
self.data_dictionary['prediction_features'].fillna(0, inplace=True)
|
||||
|
||||
logger.info('Inlier metric computed and added to features.')
|
||||
|
||||
return None
|
||||
|
||||
def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
|
||||
features = self.data_dictionary[f'{set_}_features']
|
||||
weights = self.data_dictionary[f'{set_}_weights']
|
||||
labels = self.data_dictionary[f'{set_}_labels']
|
||||
self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
|
||||
self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
|
||||
self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
|
||||
|
||||
def add_noise_to_training_features(self) -> None:
|
||||
"""
|
||||
Add noise to train features to reduce the risk of overfitting.
|
||||
"""
|
||||
mu = 0 # no shift
|
||||
sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"]
|
||||
compute_df = self.data_dictionary['train_features']
|
||||
noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
|
||||
self.data_dictionary['train_features'] += noise
|
||||
return
|
||||
|
||||
def find_features(self, dataframe: DataFrame) -> None:
|
||||
"""
|
||||
Find features in the strategy provided dataframe
|
||||
@@ -848,6 +979,7 @@ class FreqaiDataKitchen:
|
||||
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
|
||||
self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1)
|
||||
|
||||
self.return_dataframe = self.remove_training_from_backtesting()
|
||||
self.full_df = DataFrame()
|
||||
|
||||
return
|
||||
@@ -871,14 +1003,14 @@ class FreqaiDataKitchen:
|
||||
"Please indicate the end date of your desired backtesting. "
|
||||
"timerange.")
|
||||
# backtest_timerange.stopts = int(
|
||||
# datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
# datetime.now(tz=timezone.utc).timestamp()
|
||||
# )
|
||||
|
||||
backtest_timerange.startts = (
|
||||
backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
|
||||
)
|
||||
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
|
||||
stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
|
||||
start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc)
|
||||
stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc)
|
||||
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
|
||||
|
||||
self.full_path = Path(
|
||||
@@ -904,7 +1036,7 @@ class FreqaiDataKitchen:
|
||||
:return:
|
||||
bool = If the model is expired or not.
|
||||
"""
|
||||
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
time = datetime.now(tz=timezone.utc).timestamp()
|
||||
elapsed_time = (time - trained_timestamp) / 3600 # hours
|
||||
max_time = self.freqai_config.get("expiration_hours", 0)
|
||||
if max_time > 0:
|
||||
@@ -916,7 +1048,7 @@ class FreqaiDataKitchen:
|
||||
self, trained_timestamp: int
|
||||
) -> Tuple[bool, TimeRange, TimeRange]:
|
||||
|
||||
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
time = datetime.now(tz=timezone.utc).timestamp()
|
||||
trained_timerange = TimeRange()
|
||||
data_load_timerange = TimeRange()
|
||||
|
||||
@@ -1094,7 +1226,6 @@ class FreqaiDataKitchen:
|
||||
def save_backtesting_prediction(
|
||||
self, append_df: DataFrame
|
||||
) -> None:
|
||||
|
||||
"""
|
||||
Save prediction dataframe from backtesting to h5 file format
|
||||
:param append_df: dataframe for backtesting period
|
||||
@@ -1108,7 +1239,6 @@ class FreqaiDataKitchen:
|
||||
def get_backtesting_prediction(
|
||||
self
|
||||
) -> DataFrame:
|
||||
|
||||
"""
|
||||
Get prediction dataframe from h5 file format
|
||||
"""
|
||||
|
@@ -1,13 +1,12 @@
|
||||
# import contextlib
|
||||
import datetime
|
||||
import logging
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@@ -15,6 +14,7 @@ from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DATETIME_PRINT_FORMAT
|
||||
from freqtrade.enums import RunMode
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
@@ -27,13 +27,6 @@ pd.options.mode.chained_assignment = None
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def threaded(fn):
|
||||
def wrapper(*args, **kwargs):
|
||||
threading.Thread(target=fn, args=args, kwargs=kwargs).start()
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class IFreqaiModel(ABC):
|
||||
"""
|
||||
Class containing all tools for training and prediction in the strategy.
|
||||
@@ -66,7 +59,6 @@ class IFreqaiModel(ABC):
|
||||
"data_split_parameters", {})
|
||||
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
|
||||
"model_training_parameters", {})
|
||||
self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
|
||||
self.retrain = False
|
||||
self.first = True
|
||||
self.set_full_path()
|
||||
@@ -77,11 +69,14 @@ class IFreqaiModel(ABC):
|
||||
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
|
||||
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
|
||||
self.scanning = False
|
||||
self.ft_params = self.freqai_info["feature_parameters"]
|
||||
self.keras: bool = self.freqai_info.get("keras", False)
|
||||
if self.keras and self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0):
|
||||
self.freqai_info["feature_parameters"]["DI_threshold"] = 0
|
||||
if self.keras and self.ft_params.get("DI_threshold", 0):
|
||||
self.ft_params["DI_threshold"] = 0
|
||||
logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
|
||||
self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
|
||||
if self.ft_params.get("inlier_metric_window", 0):
|
||||
self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
|
||||
self.pair_it = 0
|
||||
self.pair_it_train = 0
|
||||
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
|
||||
@@ -93,6 +88,16 @@ class IFreqaiModel(ABC):
|
||||
self.begin_time: float = 0
|
||||
self.begin_time_train: float = 0
|
||||
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
|
||||
self.continual_learning = self.freqai_info.get('continual_learning', False)
|
||||
|
||||
self._threads: List[threading.Thread] = []
|
||||
self._stop_event = threading.Event()
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Return an empty state to be pickled in hyperopt
|
||||
"""
|
||||
return ({})
|
||||
self.strategy: Optional[IStrategy] = None
|
||||
|
||||
def assert_config(self, config: Dict[str, Any]) -> None:
|
||||
@@ -148,15 +153,34 @@ class IFreqaiModel(ABC):
|
||||
self.model = None
|
||||
self.dk = None
|
||||
|
||||
@threaded
|
||||
def start_scanning(self, strategy: IStrategy) -> None:
|
||||
def shutdown(self):
|
||||
"""
|
||||
Cleans up threads on Shutdown, set stop event. Join threads to wait
|
||||
for current training iteration.
|
||||
"""
|
||||
logger.info("Stopping FreqAI")
|
||||
self._stop_event.set()
|
||||
|
||||
logger.info("Waiting on Training iteration")
|
||||
for _thread in self._threads:
|
||||
_thread.join()
|
||||
|
||||
def start_scanning(self, *args, **kwargs) -> None:
|
||||
"""
|
||||
Start `self._start_scanning` in a separate thread
|
||||
"""
|
||||
_thread = threading.Thread(target=self._start_scanning, args=args, kwargs=kwargs)
|
||||
self._threads.append(_thread)
|
||||
_thread.start()
|
||||
|
||||
def _start_scanning(self, strategy: IStrategy) -> None:
|
||||
"""
|
||||
Function designed to constantly scan pairs for retraining on a separate thread (intracandle)
|
||||
to improve model youth. This function is agnostic to data preparation/collection/storage,
|
||||
it simply trains on what ever data is available in the self.dd.
|
||||
:param strategy: IStrategy = The user defined strategy class
|
||||
"""
|
||||
while 1:
|
||||
while not self._stop_event.is_set():
|
||||
time.sleep(1)
|
||||
for pair in self.config.get("exchange", {}).get("pair_whitelist"):
|
||||
|
||||
@@ -175,7 +199,7 @@ class IFreqaiModel(ABC):
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
self.train_model_in_series(
|
||||
self.extract_data_and_train_model(
|
||||
new_trained_timerange, pair, strategy, dk, data_load_timerange
|
||||
)
|
||||
self.train_timer('stop')
|
||||
@@ -215,12 +239,12 @@ class IFreqaiModel(ABC):
|
||||
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
|
||||
|
||||
trained_timestamp = tr_train
|
||||
tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
|
||||
"%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
tr_train_startts_str = datetime.fromtimestamp(
|
||||
tr_train.startts,
|
||||
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
|
||||
tr_train_stopts_str = datetime.fromtimestamp(
|
||||
tr_train.stopts,
|
||||
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
|
||||
logger.info(
|
||||
f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
|
||||
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
|
||||
@@ -405,24 +429,30 @@ class IFreqaiModel(ABC):
|
||||
|
||||
def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Base data cleaning method for train
|
||||
Any function inside this method should drop training data points from the filtered_dataframe
|
||||
based on user decided logic. See FreqaiDataKitchen::use_SVM_to_remove_outliers() for an
|
||||
example of how outlier data points are dropped from the dataframe used for training.
|
||||
Base data cleaning method for train.
|
||||
Functions here improve/modify the input data by identifying outliers,
|
||||
computing additional metrics, adding noise, reducing dimensionality etc.
|
||||
"""
|
||||
|
||||
if self.freqai_info["feature_parameters"].get(
|
||||
ft_params = self.freqai_info["feature_parameters"]
|
||||
|
||||
if ft_params.get('inlier_metric_window', 0):
|
||||
dk.compute_inlier_metric(set_='train')
|
||||
if self.freqai_info["data_split_parameters"]["test_size"] > 0:
|
||||
dk.compute_inlier_metric(set_='test')
|
||||
|
||||
if ft_params.get(
|
||||
"principal_component_analysis", False
|
||||
):
|
||||
dk.principal_component_analysis()
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
|
||||
if ft_params.get("use_SVM_to_remove_outliers", False):
|
||||
dk.use_SVM_to_remove_outliers(predict=False)
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
|
||||
if ft_params.get("DI_threshold", 0):
|
||||
dk.data["avg_mean_dist"] = dk.compute_distances()
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
|
||||
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
|
||||
if dk.pair in self.dd.old_DBSCAN_eps:
|
||||
eps = self.dd.old_DBSCAN_eps[dk.pair]
|
||||
else:
|
||||
@@ -430,29 +460,31 @@ class IFreqaiModel(ABC):
|
||||
dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
|
||||
self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']
|
||||
|
||||
if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
|
||||
dk.add_noise_to_training_features()
|
||||
|
||||
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
|
||||
"""
|
||||
Base data cleaning method for predict.
|
||||
These functions each modify dk.do_predict, which is a dataframe with equal length
|
||||
to the number of candles coming from and returning to the strategy. Inside do_predict,
|
||||
1 allows prediction and < 0 signals to the strategy that the model is not confident in
|
||||
the prediction.
|
||||
See FreqaiDataKitchen::remove_outliers() for an example
|
||||
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
|
||||
for buy signals.
|
||||
Functions here are complementary to the functions of data_cleaning_train.
|
||||
"""
|
||||
if self.freqai_info["feature_parameters"].get(
|
||||
ft_params = self.freqai_info["feature_parameters"]
|
||||
|
||||
if ft_params.get('inlier_metric_window', 0):
|
||||
dk.compute_inlier_metric(set_='predict')
|
||||
|
||||
if ft_params.get(
|
||||
"principal_component_analysis", False
|
||||
):
|
||||
dk.pca_transform(dataframe)
|
||||
dk.pca_transform(self.dk.data_dictionary['prediction_features'])
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
|
||||
if ft_params.get("use_SVM_to_remove_outliers", False):
|
||||
dk.use_SVM_to_remove_outliers(predict=True)
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
|
||||
if ft_params.get("DI_threshold", 0):
|
||||
dk.check_if_pred_in_training_spaces()
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
|
||||
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
|
||||
dk.use_DBSCAN_to_remove_outliers(predict=True)
|
||||
|
||||
def model_exists(
|
||||
@@ -488,7 +520,7 @@ class IFreqaiModel(ABC):
|
||||
Path(self.full_path, Path(self.config["config_files"][0]).name),
|
||||
)
|
||||
|
||||
def train_model_in_series(
|
||||
def extract_data_and_train_model(
|
||||
self,
|
||||
new_trained_timerange: TimeRange,
|
||||
pair: str,
|
||||
@@ -580,7 +612,7 @@ class IFreqaiModel(ABC):
|
||||
|
||||
# # for keras type models, the conv_window needs to be prepended so
|
||||
# # viewing is correct in frequi
|
||||
if self.freqai_info.get('keras', False):
|
||||
if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
|
||||
n_lost_points = self.freqai_info.get('conv_width', 2)
|
||||
zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
|
||||
columns=hist_preds_df.columns)
|
||||
@@ -646,21 +678,30 @@ class IFreqaiModel(ABC):
|
||||
self.train_time = 0
|
||||
return
|
||||
|
||||
def get_init_model(self, pair: str) -> Any:
|
||||
if pair not in self.dd.model_dictionary or not self.continual_learning:
|
||||
init_model = None
|
||||
else:
|
||||
init_model = self.dd.model_dictionary[pair]
|
||||
|
||||
return init_model
|
||||
|
||||
# Following methods which are overridden by user made prediction models.
|
||||
# See freqai/prediction_models/CatboostPredictionModel.py for an example.
|
||||
|
||||
@abstractmethod
|
||||
def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any:
|
||||
def train(self, unfiltered_df: DataFrame, pair: str,
|
||||
dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datahandler
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
|
||||
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Most regressors use the same function names and arguments e.g. user
|
||||
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||
@@ -673,11 +714,11 @@ class IFreqaiModel(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def predict(
|
||||
self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
|
||||
:param first: boolean = whether this is the first prediction or not.
|
||||
:return:
|
||||
|
@@ -3,7 +3,8 @@ from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -16,7 +17,7 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
@@ -36,6 +37,8 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
cbr.fit(train_data)
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
cbr.fit(train_data, init_model=init_model)
|
||||
|
||||
return cbr
|
||||
|
@@ -1,10 +1,10 @@
|
||||
import gc
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
@@ -38,16 +38,13 @@ class CatboostRegressor(BaseRegressionModel):
|
||||
weight=data_dictionary["test_weights"],
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
model.fit(X=train_data, eval_set=test_data)
|
||||
|
||||
# some evidence that catboost pools have memory leaks:
|
||||
# https://github.com/catboost/catboost/issues/1835
|
||||
del train_data, test_data
|
||||
gc.collect()
|
||||
model.fit(X=train_data, eval_set=test_data, init_model=init_model)
|
||||
|
||||
return model
|
||||
|
@@ -1,10 +1,11 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor # , Pool
|
||||
from sklearn.multioutput import MultiOutputRegressor
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,7 +18,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
@@ -31,14 +32,37 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
|
||||
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
model = MultiOutputRegressor(estimator=cbr)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
|
||||
eval_sets = [None] * y.shape[1]
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
train_score = model.score(X, y)
|
||||
test_score = model.score(*eval_set)
|
||||
logger.info(f"Train score {train_score}, Test score {test_score}")
|
||||
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
|
||||
|
||||
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||
eval_sets[i] = Pool(
|
||||
data=data_dictionary["test_features"],
|
||||
label=data_dictionary["test_labels"].iloc[:, i],
|
||||
weight=data_dictionary["test_weights"],
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
if init_model:
|
||||
init_models = init_model.estimators_
|
||||
else:
|
||||
init_models = [None] * y.shape[1]
|
||||
|
||||
fit_params = []
|
||||
for i in range(len(eval_sets)):
|
||||
fit_params.append(
|
||||
{'eval_set': eval_sets[i], 'init_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=cbr)
|
||||
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||
if thread_training:
|
||||
model.n_jobs = y.shape[1]
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
return model
|
||||
|
@@ -3,7 +3,8 @@ from typing import Any, Dict
|
||||
|
||||
from lightgbm import LGBMClassifier
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -16,7 +17,7 @@ class LightGBMClassifier(BaseClassifierModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
@@ -35,9 +36,11 @@ class LightGBMClassifier(BaseClassifierModel):
|
||||
y = data_dictionary["train_labels"].to_numpy()[:, 0]
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = LGBMClassifier(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
eval_sample_weight=[test_weights])
|
||||
eval_sample_weight=[test_weights], init_model=init_model)
|
||||
|
||||
return model
|
||||
|
@@ -3,7 +3,8 @@ from typing import Any, Dict
|
||||
|
||||
from lightgbm import LGBMRegressor
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -16,7 +17,7 @@ class LightGBMRegressor(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Most regressors use the same function names and arguments e.g. user
|
||||
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||
@@ -35,9 +36,11 @@ class LightGBMRegressor(BaseRegressionModel):
|
||||
y = data_dictionary["train_labels"]
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = LGBMRegressor(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
eval_sample_weight=[eval_weights])
|
||||
eval_sample_weight=[eval_weights], init_model=init_model)
|
||||
|
||||
return model
|
||||
|
@@ -2,9 +2,10 @@ import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from lightgbm import LGBMRegressor
|
||||
from sklearn.multioutput import MultiOutputRegressor
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -17,7 +18,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
@@ -28,12 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
model = MultiOutputRegressor(estimator=lgb)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
|
||||
train_score = model.score(X, y)
|
||||
test_score = model.score(*eval_set)
|
||||
logger.info(f"Train score {train_score}, Test score {test_score}")
|
||||
eval_weights = None
|
||||
eval_sets = [None] * y.shape[1]
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
eval_weights = [data_dictionary["test_weights"]]
|
||||
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
|
||||
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||
eval_sets[i] = ( # type: ignore
|
||||
data_dictionary["test_features"],
|
||||
data_dictionary["test_labels"].iloc[:, i]
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
if init_model:
|
||||
init_models = init_model.estimators_
|
||||
else:
|
||||
init_models = [None] * y.shape[1]
|
||||
|
||||
fit_params = []
|
||||
for i in range(len(eval_sets)):
|
||||
fit_params.append(
|
||||
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
|
||||
'init_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=lgb)
|
||||
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||
if thread_training:
|
||||
model.n_jobs = y.shape[1]
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
return model
|
||||
|
45
freqtrade/freqai/prediction_models/XGBoostRegressor.py
Normal file
45
freqtrade/freqai/prediction_models/XGBoostRegressor.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from xgboost import XGBRegressor
|
||||
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
eval_set = None
|
||||
else:
|
||||
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
|
||||
eval_weights = [data_dictionary['test_weights']]
|
||||
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
xgb_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBRegressor(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
|
||||
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
|
||||
|
||||
return model
|
@@ -0,0 +1,63 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from xgboost import XGBRegressor
|
||||
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
xgb = XGBRegressor(**self.model_training_parameters)
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
eval_weights = None
|
||||
eval_sets = [None] * y.shape[1]
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
eval_weights = [data_dictionary["test_weights"]]
|
||||
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||
eval_sets[i] = [( # type: ignore
|
||||
data_dictionary["test_features"],
|
||||
data_dictionary["test_labels"].iloc[:, i]
|
||||
)]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
if init_model:
|
||||
init_models = init_model.estimators_
|
||||
else:
|
||||
init_models = [None] * y.shape[1]
|
||||
|
||||
fit_params = []
|
||||
for i in range(len(eval_sets)):
|
||||
fit_params.append(
|
||||
{'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights,
|
||||
'xgb_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=xgb)
|
||||
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||
if thread_training:
|
||||
model.n_jobs = y.shape[1]
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
return model
|
Reference in New Issue
Block a user