Merge pull request #7367 from freqtrade/add-continual-learning

add continual learning to catboost and friends
This commit is contained in:
Matthias 2022-09-10 20:17:28 +02:00 committed by GitHub
commit e4caccc353
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 383 additions and 76 deletions

View File

@ -98,6 +98,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> Defaults set to 0, which means models never expire. <br> **Datatype:** Positive integer.
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training data set. <br> **Datatype:** Positive integer.
| `follow_mode` | If true, this instance of FreqAI will look for models associated with `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. Default: `False`.
| `continual_learning` | If true, FreqAI will start training new models from the final state of the most recently trained model. <br> **Datatype:** Boolean. Default: `False`.
| | **Feature parameters**
| `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](#feature-engineering). <br> **Datatype:** Dictionary.
| `include_timeframes` | A list of timeframes that all indicators in `populate_any_indicators` will be created for. The list is added as features to the base asset feature set. <br> **Datatype:** List of timeframes (strings).

View File

@ -21,12 +21,12 @@ class BaseClassifierModel(IFreqaiModel):
"""
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return:
:model: Trained model which can be used to inference (self.predict)
@ -36,14 +36,14 @@ class BaseClassifierModel(IFreqaiModel):
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
unfiltered_df,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date}--------------------")
# split data into train/test data.
@ -61,32 +61,32 @@ class BaseClassifierModel(IFreqaiModel):
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary)
model = self.fit(data_dictionary, dk)
logger.info(f"--------------------done training {pair}--------------------")
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:param: unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk, filtered_dataframe)
self.data_cleaning_predict(dk, filtered_df)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)

View File

@ -20,12 +20,12 @@ class BaseRegressionModel(IFreqaiModel):
"""
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return:
:model: Trained model which can be used to inference (self.predict)
@ -35,14 +35,14 @@ class BaseRegressionModel(IFreqaiModel):
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
unfiltered_df,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date}--------------------")
# split data into train/test data.
@ -60,33 +60,33 @@ class BaseRegressionModel(IFreqaiModel):
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary)
model = self.fit(data_dictionary, dk)
logger.info(f"--------------------done training {pair}--------------------")
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:param: unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
# optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_dataframe)
self.data_cleaning_predict(dk, filtered_df)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)

View File

@ -17,12 +17,12 @@ class BaseTensorFlowModel(IFreqaiModel):
"""
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return:
:model: Trained model which can be used to inference (self.predict)
@ -32,14 +32,14 @@ class BaseTensorFlowModel(IFreqaiModel):
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
unfiltered_df,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date}--------------------")
# split data into train/test data.
@ -57,7 +57,7 @@ class BaseTensorFlowModel(IFreqaiModel):
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary)
model = self.fit(data_dictionary, dk)
logger.info(f"--------------------done training {pair}--------------------")

View File

@ -0,0 +1,75 @@
from joblib import Parallel
from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
from sklearn.utils.fixes import delayed
from sklearn.utils.validation import has_fit_parameter
class FreqaiMultiOutputRegressor(MultiOutputRegressor):
def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The input data.
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
Multi-output targets. An indicator matrix turns on multilabel
estimation.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If `None`, then samples are equally weighted.
Only supported if the underlying regressor supports sample
weights.
fit_params : A list of dicts for the fit_params
Parameters passed to the ``estimator.fit`` method of each step.
Each dict may contain same or different values (e.g. different
eval_sets or init_models)
.. versionadded:: 0.23
Returns
-------
self : object
Returns a fitted instance.
"""
if not hasattr(self.estimator, "fit"):
raise ValueError("The base estimator should implement a fit method")
y = self._validate_data(X="no_validation", y=y, multi_output=True)
# if is_classifier(self):
# check_classification_targets(y)
if y.ndim == 1:
raise ValueError(
"y must have at least two dimensions for "
"multi-output regression but has only one."
)
if sample_weight is not None and not has_fit_parameter(
self.estimator, "sample_weight"
):
raise ValueError("Underlying estimator does not support sample weights.")
# fit_params_validated = _check_fit_params(X, fit_params)
if not fit_params:
fit_params = [None] * y.shape[1]
# if not init_models:
# init_models = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)(
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
# init_model=init_models[i], eval_set=eval_sets[i],
# **fit_params_validated
)
for i in range(y.shape[1])
)
if hasattr(self.estimators_[0], "n_features_in_"):
self.n_features_in_ = self.estimators_[0].n_features_in_
if hasattr(self.estimators_[0], "feature_names_in_"):
self.feature_names_in_ = self.estimators_[0].feature_names_in_
return

View File

@ -88,6 +88,7 @@ class IFreqaiModel(ABC):
self.begin_time: float = 0
self.begin_time_train: float = 0
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
self.continual_learning = self.freqai_info.get('continual_learning', False)
self._threads: List[threading.Thread] = []
self._stop_event = threading.Event()
@ -676,21 +677,30 @@ class IFreqaiModel(ABC):
self.train_time = 0
return
def get_init_model(self, pair: str) -> Any:
if pair not in self.dd.model_dictionary or not self.continual_learning:
init_model = None
else:
init_model = self.dd.model_dictionary[pair]
return init_model
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.
@abstractmethod
def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str,
dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datahandler
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return: Trained model which can be used to inference (self.predict)
"""
@abstractmethod
def fit(self, data_dictionary: Dict[str, Any]) -> Any:
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Most regressors use the same function names and arguments e.g. user
can drop in LGBMRegressor in place of CatBoostRegressor and all data
@ -703,11 +713,11 @@ class IFreqaiModel(ABC):
@abstractmethod
def predict(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_dataframe: Full dataframe for the current backtest period.
:param unfiltered_df: Full dataframe for the current backtest period.
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:param first: boolean = whether this is the first prediction or not.
:return:

View File

@ -3,7 +3,8 @@ from typing import Any, Dict
from catboost import CatBoostClassifier, Pool
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -16,7 +17,7 @@ class CatboostClassifier(BaseClassifierModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
@ -36,6 +37,8 @@ class CatboostClassifier(BaseClassifierModel):
**self.model_training_parameters,
)
cbr.fit(train_data)
init_model = self.get_init_model(dk.pair)
cbr.fit(train_data, init_model=init_model)
return cbr

View File

@ -1,10 +1,10 @@
import gc
import logging
from typing import Any, Dict
from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -17,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
@ -38,16 +38,13 @@ class CatboostRegressor(BaseRegressionModel):
weight=data_dictionary["test_weights"],
)
init_model = self.get_init_model(dk.pair)
model = CatBoostRegressor(
allow_writing_files=False,
**self.model_training_parameters,
)
model.fit(X=train_data, eval_set=test_data)
# some evidence that catboost pools have memory leaks:
# https://github.com/catboost/catboost/issues/1835
del train_data, test_data
gc.collect()
model.fit(X=train_data, eval_set=test_data, init_model=init_model)
return model

View File

@ -1,10 +1,11 @@
import logging
from typing import Any, Dict
from catboost import CatBoostRegressor # , Pool
from sklearn.multioutput import MultiOutputRegressor
from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -17,7 +18,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
@ -31,14 +32,34 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
sample_weight = data_dictionary["train_weights"]
model = MultiOutputRegressor(estimator=cbr)
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
train_score = model.score(X, y)
test_score = model.score(*eval_set)
logger.info(f"Train score {train_score}, Test score {test_score}")
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i],
weight=data_dictionary["test_weights"],
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'init_model': init_models[i]})
model = FreqaiMultiOutputRegressor(estimator=cbr)
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model

View File

@ -3,7 +3,8 @@ from typing import Any, Dict
from lightgbm import LGBMClassifier
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -16,7 +17,7 @@ class LightGBMClassifier(BaseClassifierModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
@ -35,9 +36,11 @@ class LightGBMClassifier(BaseClassifierModel):
y = data_dictionary["train_labels"].to_numpy()[:, 0]
train_weights = data_dictionary["train_weights"]
init_model = self.get_init_model(dk.pair)
model = LGBMClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
eval_sample_weight=[test_weights])
eval_sample_weight=[test_weights], init_model=init_model)
return model

View File

@ -3,7 +3,8 @@ from typing import Any, Dict
from lightgbm import LGBMRegressor
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -16,7 +17,7 @@ class LightGBMRegressor(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Most regressors use the same function names and arguments e.g. user
can drop in LGBMRegressor in place of CatBoostRegressor and all data
@ -35,9 +36,11 @@ class LightGBMRegressor(BaseRegressionModel):
y = data_dictionary["train_labels"]
train_weights = data_dictionary["train_weights"]
init_model = self.get_init_model(dk.pair)
model = LGBMRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
eval_sample_weight=[eval_weights])
eval_sample_weight=[eval_weights], init_model=init_model)
return model

View File

@ -2,9 +2,10 @@ import logging
from typing import Any, Dict
from lightgbm import LGBMRegressor
from sklearn.multioutput import MultiOutputRegressor
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -17,7 +18,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
@ -28,12 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
sample_weight = data_dictionary["train_weights"]
model = MultiOutputRegressor(estimator=lgb)
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
train_score = model.score(X, y)
test_score = model.score(*eval_set)
logger.info(f"Train score {train_score}, Test score {test_score}")
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = ( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
'init_model': init_models[i]})
model = FreqaiMultiOutputRegressor(estimator=lgb)
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
# model = FreqaiMultiOutputRegressor(estimator=lgb)
# model.fit(X=X, y=y, sample_weight=sample_weight, init_models=init_models,
# eval_sets=eval_sets, eval_sample_weight=eval_weights)
return model

View File

@ -0,0 +1,45 @@
import logging
from typing import Any, Dict
from xgboost import XGBRegressor
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostRegressor(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']]
sample_weight = data_dictionary["train_weights"]
xgb_model = self.get_init_model(dk.pair)
model = XGBRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
return model

View File

@ -0,0 +1,60 @@
import logging
from typing import Any, Dict
from xgboost import XGBRegressor
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostRegressorMultiTarget(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
xgb = XGBRegressor(**self.model_training_parameters)
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
sample_weight = data_dictionary["train_weights"]
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = [( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)]
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights,
'xgb_model': init_models[i]})
model = FreqaiMultiOutputRegressor(estimator=xgb)
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model

View File

@ -6,3 +6,4 @@ scikit-learn==1.1.2
joblib==1.1.0
catboost==1.0.6; platform_machine != 'aarch64'
lightgbm==3.3.2
xgboost==1.6.2

View File

@ -174,6 +174,69 @@ def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path))
def test_extract_data_and_train_model_XGBoostRegressor(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "XGBoostRegressor"})
freqai_conf.update({"strategy": "freqai_test_strat"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file()
shutil.rmtree(Path(freqai.dk.full_path))
def test_extract_data_and_train_model_XGBoostRegressorMultiModel(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "XGBoostRegressorMultiTarget"})
freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert len(freqai.dk.label_list) == 2
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file()
assert len(freqai.dk.data['training_features_list']) == 26
shutil.rmtree(Path(freqai.dk.full_path))
def test_start_backtesting(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180120-20180130"})
freqai_conf.get("freqai", {}).update({"save_backtest_models": True})