Merge pull request #7367 from freqtrade/add-continual-learning
add continual learning to catboost and friends
This commit is contained in:
commit
e4caccc353
@ -98,6 +98,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
|
||||
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> Defaults set to 0, which means models never expire. <br> **Datatype:** Positive integer.
|
||||
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training data set. <br> **Datatype:** Positive integer.
|
||||
| `follow_mode` | If true, this instance of FreqAI will look for models associated with `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. Default: `False`.
|
||||
| `continual_learning` | If true, FreqAI will start training new models from the final state of the most recently trained model. <br> **Datatype:** Boolean. Default: `False`.
|
||||
| | **Feature parameters**
|
||||
| `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](#feature-engineering). <br> **Datatype:** Dictionary.
|
||||
| `include_timeframes` | A list of timeframes that all indicators in `populate_any_indicators` will be created for. The list is added as features to the base asset feature set. <br> **Datatype:** List of timeframes (strings).
|
||||
|
@ -21,12 +21,12 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
@ -36,14 +36,14 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_dataframe,
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
# split data into train/test data.
|
||||
@ -61,32 +61,32 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
dk.find_features(unfiltered_dataframe)
|
||||
filtered_dataframe, _ = dk.filter_features(
|
||||
unfiltered_dataframe, dk.training_features_list, training_filter=False
|
||||
dk.find_features(unfiltered_df)
|
||||
filtered_df, _ = dk.filter_features(
|
||||
unfiltered_df, dk.training_features_list, training_filter=False
|
||||
)
|
||||
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
|
||||
dk.data_dictionary["prediction_features"] = filtered_dataframe
|
||||
filtered_df = dk.normalize_data_from_metadata(filtered_df)
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
|
||||
self.data_cleaning_predict(dk, filtered_dataframe)
|
||||
self.data_cleaning_predict(dk, filtered_df)
|
||||
|
||||
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
||||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
@ -20,12 +20,12 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
@ -35,14 +35,14 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_dataframe,
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
# split data into train/test data.
|
||||
@ -60,33 +60,33 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
dk.find_features(unfiltered_dataframe)
|
||||
filtered_dataframe, _ = dk.filter_features(
|
||||
unfiltered_dataframe, dk.training_features_list, training_filter=False
|
||||
dk.find_features(unfiltered_df)
|
||||
filtered_df, _ = dk.filter_features(
|
||||
unfiltered_df, dk.training_features_list, training_filter=False
|
||||
)
|
||||
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
|
||||
dk.data_dictionary["prediction_features"] = filtered_dataframe
|
||||
filtered_df = dk.normalize_data_from_metadata(filtered_df)
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
|
||||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_predict(dk, filtered_dataframe)
|
||||
self.data_cleaning_predict(dk, filtered_df)
|
||||
|
||||
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
||||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
@ -17,12 +17,12 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
"""
|
||||
|
||||
def train(
|
||||
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
|
||||
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return:
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
@ -32,14 +32,14 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
unfiltered_dataframe,
|
||||
unfiltered_df,
|
||||
dk.training_features_list,
|
||||
dk.label_list,
|
||||
training_filter=True,
|
||||
)
|
||||
|
||||
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
# split data into train/test data.
|
||||
@ -57,7 +57,7 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
)
|
||||
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
|
||||
|
||||
model = self.fit(data_dictionary)
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
|
75
freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
Normal file
75
freqtrade/freqai/base_models/FreqaiMultiOutputRegressor.py
Normal file
@ -0,0 +1,75 @@
|
||||
|
||||
from joblib import Parallel
|
||||
from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
|
||||
from sklearn.utils.fixes import delayed
|
||||
from sklearn.utils.validation import has_fit_parameter
|
||||
|
||||
|
||||
class FreqaiMultiOutputRegressor(MultiOutputRegressor):
|
||||
|
||||
def fit(self, X, y, sample_weight=None, fit_params=None):
|
||||
"""Fit the model to data, separately for each output variable.
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
The input data.
|
||||
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
|
||||
Multi-output targets. An indicator matrix turns on multilabel
|
||||
estimation.
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights. If `None`, then samples are equally weighted.
|
||||
Only supported if the underlying regressor supports sample
|
||||
weights.
|
||||
fit_params : A list of dicts for the fit_params
|
||||
Parameters passed to the ``estimator.fit`` method of each step.
|
||||
Each dict may contain same or different values (e.g. different
|
||||
eval_sets or init_models)
|
||||
.. versionadded:: 0.23
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns a fitted instance.
|
||||
"""
|
||||
|
||||
if not hasattr(self.estimator, "fit"):
|
||||
raise ValueError("The base estimator should implement a fit method")
|
||||
|
||||
y = self._validate_data(X="no_validation", y=y, multi_output=True)
|
||||
|
||||
# if is_classifier(self):
|
||||
# check_classification_targets(y)
|
||||
|
||||
if y.ndim == 1:
|
||||
raise ValueError(
|
||||
"y must have at least two dimensions for "
|
||||
"multi-output regression but has only one."
|
||||
)
|
||||
|
||||
if sample_weight is not None and not has_fit_parameter(
|
||||
self.estimator, "sample_weight"
|
||||
):
|
||||
raise ValueError("Underlying estimator does not support sample weights.")
|
||||
|
||||
# fit_params_validated = _check_fit_params(X, fit_params)
|
||||
|
||||
if not fit_params:
|
||||
fit_params = [None] * y.shape[1]
|
||||
|
||||
# if not init_models:
|
||||
# init_models = [None] * y.shape[1]
|
||||
|
||||
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
|
||||
delayed(_fit_estimator)(
|
||||
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
|
||||
# init_model=init_models[i], eval_set=eval_sets[i],
|
||||
# **fit_params_validated
|
||||
)
|
||||
for i in range(y.shape[1])
|
||||
)
|
||||
|
||||
if hasattr(self.estimators_[0], "n_features_in_"):
|
||||
self.n_features_in_ = self.estimators_[0].n_features_in_
|
||||
if hasattr(self.estimators_[0], "feature_names_in_"):
|
||||
self.feature_names_in_ = self.estimators_[0].feature_names_in_
|
||||
|
||||
return
|
@ -88,6 +88,7 @@ class IFreqaiModel(ABC):
|
||||
self.begin_time: float = 0
|
||||
self.begin_time_train: float = 0
|
||||
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
|
||||
self.continual_learning = self.freqai_info.get('continual_learning', False)
|
||||
|
||||
self._threads: List[threading.Thread] = []
|
||||
self._stop_event = threading.Event()
|
||||
@ -676,21 +677,30 @@ class IFreqaiModel(ABC):
|
||||
self.train_time = 0
|
||||
return
|
||||
|
||||
def get_init_model(self, pair: str) -> Any:
|
||||
if pair not in self.dd.model_dictionary or not self.continual_learning:
|
||||
init_model = None
|
||||
else:
|
||||
init_model = self.dd.model_dictionary[pair]
|
||||
|
||||
return init_model
|
||||
|
||||
# Following methods which are overridden by user made prediction models.
|
||||
# See freqai/prediction_models/CatboostPredictionModel.py for an example.
|
||||
|
||||
@abstractmethod
|
||||
def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any:
|
||||
def train(self, unfiltered_df: DataFrame, pair: str,
|
||||
dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Filter the training data and train a model to it. Train makes heavy use of the datahandler
|
||||
for storing, saving, loading, and analyzing the data.
|
||||
:param unfiltered_dataframe: Full dataframe for the current training period
|
||||
:param unfiltered_df: Full dataframe for the current training period
|
||||
:param metadata: pair metadata from strategy.
|
||||
:return: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def fit(self, data_dictionary: Dict[str, Any]) -> Any:
|
||||
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Most regressors use the same function names and arguments e.g. user
|
||||
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||
@ -703,11 +713,11 @@ class IFreqaiModel(ABC):
|
||||
|
||||
@abstractmethod
|
||||
def predict(
|
||||
self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
|
||||
:param first: boolean = whether this is the first prediction or not.
|
||||
:return:
|
||||
|
@ -3,7 +3,8 @@ from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -16,7 +17,7 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
@ -36,6 +37,8 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
cbr.fit(train_data)
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
cbr.fit(train_data, init_model=init_model)
|
||||
|
||||
return cbr
|
||||
|
@ -1,10 +1,10 @@
|
||||
import gc
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -17,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
@ -38,16 +38,13 @@ class CatboostRegressor(BaseRegressionModel):
|
||||
weight=data_dictionary["test_weights"],
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
model.fit(X=train_data, eval_set=test_data)
|
||||
|
||||
# some evidence that catboost pools have memory leaks:
|
||||
# https://github.com/catboost/catboost/issues/1835
|
||||
del train_data, test_data
|
||||
gc.collect()
|
||||
model.fit(X=train_data, eval_set=test_data, init_model=init_model)
|
||||
|
||||
return model
|
||||
|
@ -1,10 +1,11 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor # , Pool
|
||||
from sklearn.multioutput import MultiOutputRegressor
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -17,7 +18,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
@ -31,14 +32,34 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
|
||||
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
model = MultiOutputRegressor(estimator=cbr)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
|
||||
eval_sets = [None] * y.shape[1]
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
train_score = model.score(X, y)
|
||||
test_score = model.score(*eval_set)
|
||||
logger.info(f"Train score {train_score}, Test score {test_score}")
|
||||
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
|
||||
|
||||
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||
eval_sets[i] = Pool(
|
||||
data=data_dictionary["test_features"],
|
||||
label=data_dictionary["test_labels"].iloc[:, i],
|
||||
weight=data_dictionary["test_weights"],
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
if init_model:
|
||||
init_models = init_model.estimators_
|
||||
else:
|
||||
init_models = [None] * y.shape[1]
|
||||
|
||||
fit_params = []
|
||||
for i in range(len(eval_sets)):
|
||||
fit_params.append(
|
||||
{'eval_set': eval_sets[i], 'init_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=cbr)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
return model
|
||||
|
@ -3,7 +3,8 @@ from typing import Any, Dict
|
||||
|
||||
from lightgbm import LGBMClassifier
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -16,7 +17,7 @@ class LightGBMClassifier(BaseClassifierModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
@ -35,9 +36,11 @@ class LightGBMClassifier(BaseClassifierModel):
|
||||
y = data_dictionary["train_labels"].to_numpy()[:, 0]
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = LGBMClassifier(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
eval_sample_weight=[test_weights])
|
||||
eval_sample_weight=[test_weights], init_model=init_model)
|
||||
|
||||
return model
|
||||
|
@ -3,7 +3,8 @@ from typing import Any, Dict
|
||||
|
||||
from lightgbm import LGBMRegressor
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -16,7 +17,7 @@ class LightGBMRegressor(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
Most regressors use the same function names and arguments e.g. user
|
||||
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||
@ -35,9 +36,11 @@ class LightGBMRegressor(BaseRegressionModel):
|
||||
y = data_dictionary["train_labels"]
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = LGBMRegressor(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
eval_sample_weight=[eval_weights])
|
||||
eval_sample_weight=[eval_weights], init_model=init_model)
|
||||
|
||||
return model
|
||||
|
@ -2,9 +2,10 @@ import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from lightgbm import LGBMRegressor
|
||||
from sklearn.multioutput import MultiOutputRegressor
|
||||
|
||||
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@ -17,7 +18,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict) -> Any:
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
@ -28,12 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
model = MultiOutputRegressor(estimator=lgb)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
|
||||
train_score = model.score(X, y)
|
||||
test_score = model.score(*eval_set)
|
||||
logger.info(f"Train score {train_score}, Test score {test_score}")
|
||||
eval_weights = None
|
||||
eval_sets = [None] * y.shape[1]
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
eval_weights = [data_dictionary["test_weights"]]
|
||||
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
|
||||
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||
eval_sets[i] = ( # type: ignore
|
||||
data_dictionary["test_features"],
|
||||
data_dictionary["test_labels"].iloc[:, i]
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
if init_model:
|
||||
init_models = init_model.estimators_
|
||||
else:
|
||||
init_models = [None] * y.shape[1]
|
||||
|
||||
fit_params = []
|
||||
for i in range(len(eval_sets)):
|
||||
fit_params.append(
|
||||
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
|
||||
'init_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=lgb)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
# model = FreqaiMultiOutputRegressor(estimator=lgb)
|
||||
# model.fit(X=X, y=y, sample_weight=sample_weight, init_models=init_models,
|
||||
# eval_sets=eval_sets, eval_sample_weight=eval_weights)
|
||||
return model
|
||||
|
45
freqtrade/freqai/prediction_models/XGBoostRegressor.py
Normal file
45
freqtrade/freqai/prediction_models/XGBoostRegressor.py
Normal file
@ -0,0 +1,45 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from xgboost import XGBRegressor
|
||||
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
eval_set = None
|
||||
else:
|
||||
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
|
||||
eval_weights = [data_dictionary['test_weights']]
|
||||
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
xgb_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBRegressor(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
|
||||
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
|
||||
|
||||
return model
|
@ -0,0 +1,60 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from xgboost import XGBRegressor
|
||||
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
xgb = XGBRegressor(**self.model_training_parameters)
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
eval_weights = None
|
||||
eval_sets = [None] * y.shape[1]
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
eval_weights = [data_dictionary["test_weights"]]
|
||||
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||
eval_sets[i] = [( # type: ignore
|
||||
data_dictionary["test_features"],
|
||||
data_dictionary["test_labels"].iloc[:, i]
|
||||
)]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
if init_model:
|
||||
init_models = init_model.estimators_
|
||||
else:
|
||||
init_models = [None] * y.shape[1]
|
||||
|
||||
fit_params = []
|
||||
for i in range(len(eval_sets)):
|
||||
fit_params.append(
|
||||
{'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights,
|
||||
'xgb_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=xgb)
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
return model
|
@ -6,3 +6,4 @@ scikit-learn==1.1.2
|
||||
joblib==1.1.0
|
||||
catboost==1.0.6; platform_machine != 'aarch64'
|
||||
lightgbm==3.3.2
|
||||
xgboost==1.6.2
|
||||
|
@ -174,6 +174,69 @@ def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf):
|
||||
shutil.rmtree(Path(freqai.dk.full_path))
|
||||
|
||||
|
||||
def test_extract_data_and_train_model_XGBoostRegressor(mocker, freqai_conf):
|
||||
freqai_conf.update({"timerange": "20180110-20180130"})
|
||||
freqai_conf.update({"freqaimodel": "XGBoostRegressor"})
|
||||
freqai_conf.update({"strategy": "freqai_test_strat"})
|
||||
|
||||
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
||||
exchange = get_patched_exchange(mocker, freqai_conf)
|
||||
strategy.dp = DataProvider(freqai_conf, exchange)
|
||||
strategy.freqai_info = freqai_conf.get("freqai", {})
|
||||
freqai = strategy.freqai
|
||||
freqai.live = True
|
||||
freqai.dk = FreqaiDataKitchen(freqai_conf)
|
||||
timerange = TimeRange.parse_timerange("20180110-20180130")
|
||||
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
|
||||
|
||||
freqai.dd.pair_dict = MagicMock()
|
||||
|
||||
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
|
||||
new_timerange = TimeRange.parse_timerange("20180120-20180130")
|
||||
|
||||
freqai.extract_data_and_train_model(
|
||||
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
|
||||
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file()
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file()
|
||||
|
||||
shutil.rmtree(Path(freqai.dk.full_path))
|
||||
|
||||
|
||||
def test_extract_data_and_train_model_XGBoostRegressorMultiModel(mocker, freqai_conf):
|
||||
freqai_conf.update({"timerange": "20180110-20180130"})
|
||||
freqai_conf.update({"freqaimodel": "XGBoostRegressorMultiTarget"})
|
||||
freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
|
||||
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
||||
exchange = get_patched_exchange(mocker, freqai_conf)
|
||||
strategy.dp = DataProvider(freqai_conf, exchange)
|
||||
strategy.freqai_info = freqai_conf.get("freqai", {})
|
||||
freqai = strategy.freqai
|
||||
freqai.live = True
|
||||
freqai.dk = FreqaiDataKitchen(freqai_conf)
|
||||
timerange = TimeRange.parse_timerange("20180110-20180130")
|
||||
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
|
||||
|
||||
freqai.dd.pair_dict = MagicMock()
|
||||
|
||||
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
|
||||
new_timerange = TimeRange.parse_timerange("20180120-20180130")
|
||||
|
||||
freqai.extract_data_and_train_model(
|
||||
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
|
||||
|
||||
assert len(freqai.dk.label_list) == 2
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").is_file()
|
||||
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").is_file()
|
||||
assert len(freqai.dk.data['training_features_list']) == 26
|
||||
|
||||
shutil.rmtree(Path(freqai.dk.full_path))
|
||||
|
||||
|
||||
def test_start_backtesting(mocker, freqai_conf):
|
||||
freqai_conf.update({"timerange": "20180120-20180130"})
|
||||
freqai_conf.get("freqai", {}).update({"save_backtest_models": True})
|
||||
|
Loading…
Reference in New Issue
Block a user