119 lines
4.3 KiB
Python
119 lines
4.3 KiB
Python
import logging
|
|
from typing import Any, Dict, Tuple
|
|
|
|
from catboost import CatBoostClassifier # , Pool
|
|
from sklearn.multioutput import MultiOutputClassifier
|
|
from pandas import DataFrame
|
|
import numpy as np
|
|
|
|
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
|
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class CatboostPredictionBinaryMultiModel(BaseClassifierModel):
|
|
"""
|
|
User created prediction model. The class needs to override three necessary
|
|
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
|
has its own DataHandler where data is held, saved, loaded, and managed.
|
|
"""
|
|
|
|
def fit_augmented(self, data_dictionary: Dict, dk: FreqaiDataKitchen,) -> Any:
|
|
"""
|
|
User sets up the training and test data to fit their desired model here
|
|
:params:
|
|
:data_dictionary: the dictionary constructed by DataHandler to hold
|
|
all the training and test data/labels.
|
|
"""
|
|
|
|
cbr = CatBoostClassifier(
|
|
allow_writing_files=False,
|
|
gpu_ram_part=0.5,
|
|
verbose=100,
|
|
early_stopping_rounds=400,
|
|
**self.model_training_parameters,
|
|
)
|
|
|
|
X = data_dictionary["train_features"]
|
|
y = data_dictionary["train_labels"]
|
|
if data_dictionary["test_features"].size:
|
|
eval_set = (data_dictionary["test_features"],
|
|
data_dictionary["test_labels"])
|
|
sample_weight = data_dictionary["train_weights"]
|
|
|
|
if True :
|
|
# mu = 0
|
|
# sigma = 0.01
|
|
# noise = np.random.normal(mu, sigma, [X.shape[0], X.shape[1]])
|
|
# Xaugmented = X + noise
|
|
|
|
Xaugmented = X + np.random.randn(*X.shape) / 100 * X.std(0)[None, :]
|
|
X = np.vstack((X, Xaugmented))
|
|
y = y.append(y)
|
|
sample_weight = np.tile(sample_weight, 2)
|
|
|
|
from collections import Counter
|
|
weights = y.copy()
|
|
for col_name in y:
|
|
cnt = Counter(y[col_name])
|
|
for k, v in cnt.items():
|
|
weights[col_name][y[col_name] == k] = len(y) / v
|
|
|
|
# model = MultiOutputClassifier(estimator=cbr)
|
|
model = cbr
|
|
|
|
init_model = self.get_init_model(dk.pair)
|
|
|
|
model.fit(X=X, y=y,
|
|
sample_weight=sample_weight * weights.sum(1)) # , eval_set=eval_set)
|
|
train_score = model.score(X, y)
|
|
test_score = "Empty"
|
|
if data_dictionary["test_features"].size:
|
|
test_score = model.score(*eval_set)
|
|
logger.info(f"Augmented Train score {train_score}, Augmented Test score {test_score}")
|
|
return model
|
|
|
|
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen,) -> Any:
|
|
"""
|
|
User sets up the training and test data to fit their desired model here
|
|
:params:
|
|
:data_dictionary: the dictionary constructed by DataHandler to hold
|
|
all the training and test data/labels.
|
|
"""
|
|
|
|
# cbr = CatBoostClassifier(
|
|
# allow_writing_files=False,
|
|
# gpu_ram_part=0.5,
|
|
# verbose=100,
|
|
# early_stopping_rounds=400,
|
|
# **self.model_training_parameters,
|
|
# )
|
|
|
|
# X = data_dictionary["train_features"]
|
|
# y = data_dictionary["train_labels"]
|
|
# if data_dictionary["test_features"].size:
|
|
# eval_set = (data_dictionary["test_features"],
|
|
# data_dictionary["test_labels"].values.astype(np.float32))
|
|
# sample_weight = data_dictionary["train_weights"]
|
|
|
|
# from collections import Counter
|
|
# weights = y.copy()
|
|
# for col_name in y:
|
|
# cnt = Counter(y[col_name])
|
|
# for k, v in cnt.items():
|
|
# weights[col_name][y[col_name] == k] = len(y) / v
|
|
|
|
# model = MultiOutputClassifier(estimator=cbr)
|
|
# model.fit(X=X, Y=y.values.astype(np.float32),
|
|
# sample_weight=sample_weight * weights.sum(1)) # , eval_set=eval_set)
|
|
# train_score = model.score(X, y.values.astype(np.float32))
|
|
# test_score = "Empty"
|
|
# if data_dictionary["test_features"].size:
|
|
# test_score = model.score(*eval_set)
|
|
# logger.info(f"Train score {train_score}, Test score {test_score}")
|
|
|
|
model = self.fit_augmented(data_dictionary, dk)
|
|
return model
|