diff --git a/docs/freqai.md b/docs/freqai.md index 09a51c713..5f523f58a 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -98,6 +98,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old.
Defaults set to 0, which means models never expire.
**Datatype:** Positive integer. | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training data set.
**Datatype:** Positive integer. | `follow_mode` | If true, this instance of FreqAI will look for models associated with `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean. Default: `False`. +| `continual_learning` | If true, FreqAI will start training new models from the final state of the most recently trained model.
**Datatype:** Boolean. Default: `False`. | | **Feature parameters** | `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](#feature-engineering).
**Datatype:** Dictionary. | `include_timeframes` | A list of timeframes that all indicators in `populate_any_indicators` will be created for. The list is added as features to the base asset feature set.
**Datatype:** List of timeframes (strings). diff --git a/freqtrade/freqai/prediction_models/BaseClassifierModel.py b/freqtrade/freqai/base_models/BaseClassifierModel.py similarity index 76% rename from freqtrade/freqai/prediction_models/BaseClassifierModel.py rename to freqtrade/freqai/base_models/BaseClassifierModel.py index 2edbf3b51..5142ffb0d 100644 --- a/freqtrade/freqai/prediction_models/BaseClassifierModel.py +++ b/freqtrade/freqai/base_models/BaseClassifierModel.py @@ -21,12 +21,12 @@ class BaseClassifierModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: :model: Trained model which can be used to inference (self.predict) @@ -36,14 +36,14 @@ class BaseClassifierModel(IFreqaiModel): # filter the features requested by user in the configuration file and elegantly handle NaNs features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, + unfiltered_df, dk.training_features_list, dk.label_list, training_filter=True, ) - start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d") - end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d") + start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") + end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") logger.info(f"-------------------- Training on data from {start_date} to " f"{end_date}--------------------") # split data into train/test data. @@ -61,32 +61,32 @@ class BaseClassifierModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - model = self.fit(data_dictionary) + model = self.fit(data_dictionary, dk) logger.info(f"--------------------done training {pair}--------------------") return model def predict( - self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. - :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :param: unfiltered_df: Full dataframe for the current backtest period. :return: :pred_df: dataframe containing the predictions :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (PCA and DI index) """ - dk.find_features(unfiltered_dataframe) - filtered_dataframe, _ = dk.filter_features( - unfiltered_dataframe, dk.training_features_list, training_filter=False + dk.find_features(unfiltered_df) + filtered_df, _ = dk.filter_features( + unfiltered_df, dk.training_features_list, training_filter=False ) - filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) - dk.data_dictionary["prediction_features"] = filtered_dataframe + filtered_df = dk.normalize_data_from_metadata(filtered_df) + dk.data_dictionary["prediction_features"] = filtered_df - self.data_cleaning_predict(dk, filtered_dataframe) + self.data_cleaning_predict(dk, filtered_df) predictions = self.model.predict(dk.data_dictionary["prediction_features"]) pred_df = DataFrame(predictions, columns=dk.label_list) diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/base_models/BaseRegressionModel.py similarity index 75% rename from freqtrade/freqai/prediction_models/BaseRegressionModel.py rename to freqtrade/freqai/base_models/BaseRegressionModel.py index 2ef175a2e..1d87e42c0 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/base_models/BaseRegressionModel.py @@ -20,12 +20,12 @@ class BaseRegressionModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: :model: Trained model which can be used to inference (self.predict) @@ -35,14 +35,14 @@ class BaseRegressionModel(IFreqaiModel): # filter the features requested by user in the configuration file and elegantly handle NaNs features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, + unfiltered_df, dk.training_features_list, dk.label_list, training_filter=True, ) - start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d") - end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d") + start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") + end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") logger.info(f"-------------------- Training on data from {start_date} to " f"{end_date}--------------------") # split data into train/test data. @@ -60,33 +60,33 @@ class BaseRegressionModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - model = self.fit(data_dictionary) + model = self.fit(data_dictionary, dk) logger.info(f"--------------------done training {pair}--------------------") return model def predict( - self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. - :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :param: unfiltered_df: Full dataframe for the current backtest period. :return: :pred_df: dataframe containing the predictions :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (PCA and DI index) """ - dk.find_features(unfiltered_dataframe) - filtered_dataframe, _ = dk.filter_features( - unfiltered_dataframe, dk.training_features_list, training_filter=False + dk.find_features(unfiltered_df) + filtered_df, _ = dk.filter_features( + unfiltered_df, dk.training_features_list, training_filter=False ) - filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) - dk.data_dictionary["prediction_features"] = filtered_dataframe + filtered_df = dk.normalize_data_from_metadata(filtered_df) + dk.data_dictionary["prediction_features"] = filtered_df # optional additional data cleaning/analysis - self.data_cleaning_predict(dk, filtered_dataframe) + self.data_cleaning_predict(dk, filtered_df) predictions = self.model.predict(dk.data_dictionary["prediction_features"]) pred_df = DataFrame(predictions, columns=dk.label_list) diff --git a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py b/freqtrade/freqai/base_models/BaseTensorFlowModel.py similarity index 83% rename from freqtrade/freqai/prediction_models/BaseTensorFlowModel.py rename to freqtrade/freqai/base_models/BaseTensorFlowModel.py index 04eff045f..eea80f3a2 100644 --- a/freqtrade/freqai/prediction_models/BaseTensorFlowModel.py +++ b/freqtrade/freqai/base_models/BaseTensorFlowModel.py @@ -17,12 +17,12 @@ class BaseTensorFlowModel(IFreqaiModel): """ def train( - self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen + self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs ) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datakitchen for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: :model: Trained model which can be used to inference (self.predict) @@ -32,14 +32,14 @@ class BaseTensorFlowModel(IFreqaiModel): # filter the features requested by user in the configuration file and elegantly handle NaNs features_filtered, labels_filtered = dk.filter_features( - unfiltered_dataframe, + unfiltered_df, dk.training_features_list, dk.label_list, training_filter=True, ) - start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d") - end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d") + start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d") + end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d") logger.info(f"-------------------- Training on data from {start_date} to " f"{end_date}--------------------") # split data into train/test data. @@ -57,7 +57,7 @@ class BaseTensorFlowModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - model = self.fit(data_dictionary) + model = self.fit(data_dictionary, dk) logger.info(f"--------------------done training {pair}--------------------") diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py new file mode 100644 index 000000000..aa5dbe629 --- /dev/null +++ b/freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py @@ -0,0 +1,75 @@ + +from joblib import Parallel +from sklearn.multioutput import MultiOutputRegressor, _fit_estimator +from sklearn.utils.fixes import delayed +from sklearn.utils.validation import has_fit_parameter + + +class FreqaiMultiOutputRegressor(MultiOutputRegressor): + + def fit(self, X, y, sample_weight=None, fit_params=None): + """Fit the model to data, separately for each output variable. + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. An indicator matrix turns on multilabel + estimation. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying regressor supports sample + weights. + fit_params : A list of dicts for the fit_params + Parameters passed to the ``estimator.fit`` method of each step. + Each dict may contain same or different values (e.g. different + eval_sets or init_models) + .. versionadded:: 0.23 + Returns + ------- + self : object + Returns a fitted instance. + """ + + if not hasattr(self.estimator, "fit"): + raise ValueError("The base estimator should implement a fit method") + + y = self._validate_data(X="no_validation", y=y, multi_output=True) + + # if is_classifier(self): + # check_classification_targets(y) + + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi-output regression but has only one." + ) + + if sample_weight is not None and not has_fit_parameter( + self.estimator, "sample_weight" + ): + raise ValueError("Underlying estimator does not support sample weights.") + + # fit_params_validated = _check_fit_params(X, fit_params) + + if not fit_params: + fit_params = [None] * y.shape[1] + + # if not init_models: + # init_models = [None] * y.shape[1] + + self.estimators_ = Parallel(n_jobs=self.n_jobs)( + delayed(_fit_estimator)( + self.estimator, X, y[:, i], sample_weight, **fit_params[i] + # init_model=init_models[i], eval_set=eval_sets[i], + # **fit_params_validated + ) + for i in range(y.shape[1]) + ) + + if hasattr(self.estimators_[0], "n_features_in_"): + self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ + + return diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index ee2978330..78931bed4 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -88,6 +88,7 @@ class IFreqaiModel(ABC): self.begin_time: float = 0 self.begin_time_train: float = 0 self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe']) + self.continual_learning = self.freqai_info.get('continual_learning', False) self._threads: List[threading.Thread] = [] self._stop_event = threading.Event() @@ -676,21 +677,30 @@ class IFreqaiModel(ABC): self.train_time = 0 return + def get_init_model(self, pair: str) -> Any: + if pair not in self.dd.model_dictionary or not self.continual_learning: + init_model = None + else: + init_model = self.dd.model_dictionary[pair] + + return init_model + # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModel.py for an example. @abstractmethod - def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any: + def train(self, unfiltered_df: DataFrame, pair: str, + dk: FreqaiDataKitchen, **kwargs) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datahandler for storing, saving, loading, and analyzing the data. - :param unfiltered_dataframe: Full dataframe for the current training period + :param unfiltered_df: Full dataframe for the current training period :param metadata: pair metadata from strategy. :return: Trained model which can be used to inference (self.predict) """ @abstractmethod - def fit(self, data_dictionary: Dict[str, Any]) -> Any: + def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any: """ Most regressors use the same function names and arguments e.g. user can drop in LGBMRegressor in place of CatBoostRegressor and all data @@ -703,11 +713,11 @@ class IFreqaiModel(ABC): @abstractmethod def predict( - self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs ) -> Tuple[DataFrame, NDArray[np.int_]]: """ Filter the prediction features data and predict with it. - :param unfiltered_dataframe: Full dataframe for the current backtest period. + :param unfiltered_df: Full dataframe for the current backtest period. :param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only :param first: boolean = whether this is the first prediction or not. :return: diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index b88b28b25..60536e6de 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -3,7 +3,8 @@ from typing import Any, Dict from catboost import CatBoostClassifier, Pool -from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -16,7 +17,7 @@ class CatboostClassifier(BaseClassifierModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :params: @@ -36,6 +37,8 @@ class CatboostClassifier(BaseClassifierModel): **self.model_training_parameters, ) - cbr.fit(train_data) + init_model = self.get_init_model(dk.pair) + + cbr.fit(train_data, init_model=init_model) return cbr diff --git a/freqtrade/freqai/prediction_models/CatboostRegressor.py b/freqtrade/freqai/prediction_models/CatboostRegressor.py index d93569c91..73cf6c88a 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressor.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressor.py @@ -1,10 +1,10 @@ -import gc import logging from typing import Any, Dict from catboost import CatBoostRegressor, Pool -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -17,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold @@ -38,16 +38,13 @@ class CatboostRegressor(BaseRegressionModel): weight=data_dictionary["test_weights"], ) + init_model = self.get_init_model(dk.pair) + model = CatBoostRegressor( allow_writing_files=False, **self.model_training_parameters, ) - model.fit(X=train_data, eval_set=test_data) - - # some evidence that catboost pools have memory leaks: - # https://github.com/catboost/catboost/issues/1835 - del train_data, test_data - gc.collect() + model.fit(X=train_data, eval_set=test_data, init_model=init_model) return model diff --git a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py index 9894decd1..a376b2c33 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressorMultiTarget.py @@ -1,10 +1,11 @@ import logging from typing import Any, Dict -from catboost import CatBoostRegressor # , Pool -from sklearn.multioutput import MultiOutputRegressor +from catboost import CatBoostRegressor, Pool -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -17,7 +18,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold @@ -31,14 +32,34 @@ class CatboostRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) + sample_weight = data_dictionary["train_weights"] - model = MultiOutputRegressor(estimator=cbr) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) + eval_sets = [None] * y.shape[1] if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") + eval_sets = [None] * data_dictionary['test_labels'].shape[1] + + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = Pool( + data=data_dictionary["test_features"], + label=data_dictionary["test_labels"].iloc[:, i], + weight=data_dictionary["test_weights"], + ) + + init_model = self.get_init_model(dk.pair) + + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'init_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=cbr) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + return model diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifier.py b/freqtrade/freqai/prediction_models/LightGBMClassifier.py index 4ac2c448b..3eec516ba 100644 --- a/freqtrade/freqai/prediction_models/LightGBMClassifier.py +++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py @@ -3,7 +3,8 @@ from typing import Any, Dict from lightgbm import LGBMClassifier -from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -16,7 +17,7 @@ class LightGBMClassifier(BaseClassifierModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :params: @@ -35,9 +36,11 @@ class LightGBMClassifier(BaseClassifierModel): y = data_dictionary["train_labels"].to_numpy()[:, 0] train_weights = data_dictionary["train_weights"] + init_model = self.get_init_model(dk.pair) + model = LGBMClassifier(**self.model_training_parameters) model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, - eval_sample_weight=[test_weights]) + eval_sample_weight=[test_weights], init_model=init_model) return model diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressor.py b/freqtrade/freqai/prediction_models/LightGBMRegressor.py index 2431fd2ad..85c9b691c 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressor.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressor.py @@ -3,7 +3,8 @@ from typing import Any, Dict from lightgbm import LGBMRegressor -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -16,7 +17,7 @@ class LightGBMRegressor(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ Most regressors use the same function names and arguments e.g. user can drop in LGBMRegressor in place of CatBoostRegressor and all data @@ -35,9 +36,11 @@ class LightGBMRegressor(BaseRegressionModel): y = data_dictionary["train_labels"] train_weights = data_dictionary["train_weights"] + init_model = self.get_init_model(dk.pair) + model = LGBMRegressor(**self.model_training_parameters) model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, - eval_sample_weight=[eval_weights]) + eval_sample_weight=[eval_weights], init_model=init_model) return model diff --git a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py index ecd405369..7a9b5c36a 100644 --- a/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py +++ b/freqtrade/freqai/prediction_models/LightGBMRegressorMultiTarget.py @@ -2,9 +2,10 @@ import logging from typing import Any, Dict from lightgbm import LGBMRegressor -from sklearn.multioutput import MultiOutputRegressor -from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -17,7 +18,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold @@ -28,12 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel): X = data_dictionary["train_features"] y = data_dictionary["train_labels"] - eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) sample_weight = data_dictionary["train_weights"] - model = MultiOutputRegressor(estimator=lgb) - model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) - train_score = model.score(X, y) - test_score = model.score(*eval_set) - logger.info(f"Train score {train_score}, Test score {test_score}") + eval_weights = None + eval_sets = [None] * y.shape[1] + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_weights = [data_dictionary["test_weights"]] + eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = ( # type: ignore + data_dictionary["test_features"], + data_dictionary["test_labels"].iloc[:, i] + ) + + init_model = self.get_init_model(dk.pair) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights, + 'init_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=lgb) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + + # model = FreqaiMultiOutputRegressor(estimator=lgb) + # model.fit(X=X, y=y, sample_weight=sample_weight, init_models=init_models, + # eval_sets=eval_sets, eval_sample_weight=eval_weights) return model diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRegressor.py new file mode 100644 index 000000000..c9be9ce74 --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRegressor.py @@ -0,0 +1,45 @@ +import logging +from typing import Any, Dict + +from xgboost import XGBRegressor + +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class XGBoostRegressor(BaseRegressionModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + + if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: + eval_set = None + else: + eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])] + eval_weights = [data_dictionary['test_weights']] + + sample_weight = data_dictionary["train_weights"] + + xgb_model = self.get_init_model(dk.pair) + + model = XGBRegressor(**self.model_training_parameters) + + model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, + sample_weight_eval_set=eval_weights, xgb_model=xgb_model) + + return model diff --git a/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py new file mode 100644 index 000000000..38c478c0b --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRegressorMultiTarget.py @@ -0,0 +1,60 @@ +import logging +from typing import Any, Dict + +from xgboost import XGBRegressor + +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class XGBoostRegressorMultiTarget(BaseRegressionModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + xgb = XGBRegressor(**self.model_training_parameters) + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + sample_weight = data_dictionary["train_weights"] + + eval_weights = None + eval_sets = [None] * y.shape[1] + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_weights = [data_dictionary["test_weights"]] + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = [( # type: ignore + data_dictionary["test_features"], + data_dictionary["test_labels"].iloc[:, i] + )] + + init_model = self.get_init_model(dk.pair) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights, + 'xgb_model': init_models[i]}) + + model = FreqaiMultiOutputRegressor(estimator=xgb) + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + + return model diff --git a/requirements-freqai.txt b/requirements-freqai.txt index 26e4617af..e8d950382 100644 --- a/requirements-freqai.txt +++ b/requirements-freqai.txt @@ -6,3 +6,4 @@ scikit-learn==1.1.2 joblib==1.1.0 catboost==1.0.6; platform_machine != 'aarch64' lightgbm==3.3.2 +xgboost==1.6.2 diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 6364cb04f..5f8eeb086 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -174,6 +174,69 @@ def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) +def test_extract_data_and_train_model_XGBoostRegressor(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "XGBoostRegressor"}) + freqai_conf.update({"strategy": "freqai_test_strat"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file() + + shutil.rmtree(Path(freqai.dk.full_path)) + + +def test_extract_data_and_train_model_XGBoostRegressorMultiModel(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "XGBoostRegressorMultiTarget"}) + freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + assert len(freqai.dk.label_list) == 2 + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file() + assert len(freqai.dk.data['training_features_list']) == 26 + + shutil.rmtree(Path(freqai.dk.full_path)) + + def test_start_backtesting(mocker, freqai_conf): freqai_conf.update({"timerange": "20180120-20180130"}) freqai_conf.get("freqai", {}).update({"save_backtest_models": True})