2022-05-17 15:13:38 +00:00
|
|
|
import logging
|
2022-07-11 09:33:59 +00:00
|
|
|
from typing import Any, Dict
|
2022-07-26 15:29:29 +00:00
|
|
|
import gc
|
2022-05-17 15:13:38 +00:00
|
|
|
from catboost import CatBoostRegressor, Pool
|
|
|
|
|
2022-07-11 09:33:59 +00:00
|
|
|
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
|
2022-05-17 15:13:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2022-07-11 09:33:59 +00:00
|
|
|
class CatboostPredictionModel(BaseRegressionModel):
|
2022-05-17 15:13:38 +00:00
|
|
|
"""
|
|
|
|
User created prediction model. The class needs to override three necessary
|
|
|
|
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
|
|
|
has its own DataHandler where data is held, saved, loaded, and managed.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def fit(self, data_dictionary: Dict) -> Any:
|
|
|
|
"""
|
2022-05-22 15:51:49 +00:00
|
|
|
User sets up the training and test data to fit their desired model here
|
2022-07-24 14:54:39 +00:00
|
|
|
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
|
|
|
all the training and test data/labels.
|
2022-05-17 15:13:38 +00:00
|
|
|
"""
|
|
|
|
|
|
|
|
train_data = Pool(
|
|
|
|
data=data_dictionary["train_features"],
|
|
|
|
label=data_dictionary["train_labels"],
|
|
|
|
weight=data_dictionary["train_weights"],
|
|
|
|
)
|
2022-07-25 17:40:13 +00:00
|
|
|
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
|
|
|
test_data = None
|
|
|
|
else:
|
|
|
|
test_data = Pool(
|
|
|
|
data=data_dictionary["test_features"],
|
|
|
|
label=data_dictionary["test_labels"],
|
|
|
|
weight=data_dictionary["test_weights"],
|
|
|
|
)
|
2022-05-17 15:13:38 +00:00
|
|
|
|
|
|
|
model = CatBoostRegressor(
|
2022-05-22 22:06:26 +00:00
|
|
|
allow_writing_files=False,
|
2022-07-03 08:59:38 +00:00
|
|
|
**self.model_training_parameters,
|
2022-05-17 15:13:38 +00:00
|
|
|
)
|
2022-07-25 17:40:13 +00:00
|
|
|
|
2022-05-17 15:13:38 +00:00
|
|
|
model.fit(X=train_data, eval_set=test_data)
|
|
|
|
|
2022-07-26 15:29:29 +00:00
|
|
|
# some evidence that catboost pools have memory leaks:
|
|
|
|
# https://github.com/catboost/catboost/issues/1835
|
|
|
|
del train_data, test_data
|
|
|
|
gc.collect()
|
|
|
|
|
2022-05-17 15:13:38 +00:00
|
|
|
return model
|