Merge remote-tracking branch 'origin/develop' into add-single-precision-freqai

This commit is contained in:
robcaulk
2022-11-12 10:21:38 +01:00
8 changed files with 384 additions and 11 deletions

View File

@@ -0,0 +1,93 @@
import numpy as np
from joblib import Parallel
from sklearn.base import is_classifier
from sklearn.multioutput import MultiOutputClassifier, _fit_estimator
from sklearn.utils.fixes import delayed
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import has_fit_parameter
from freqtrade.exceptions import OperationalException
class FreqaiMultiOutputClassifier(MultiOutputClassifier):
def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The input data.
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
Multi-output targets. An indicator matrix turns on multilabel
estimation.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If `None`, then samples are equally weighted.
Only supported if the underlying classifier supports sample
weights.
fit_params : A list of dicts for the fit_params
Parameters passed to the ``estimator.fit`` method of each step.
Each dict may contain same or different values (e.g. different
eval_sets or init_models)
.. versionadded:: 0.23
Returns
-------
self : object
Returns a fitted instance.
"""
if not hasattr(self.estimator, "fit"):
raise ValueError("The base estimator should implement a fit method")
y = self._validate_data(X="no_validation", y=y, multi_output=True)
if is_classifier(self):
check_classification_targets(y)
if y.ndim == 1:
raise ValueError(
"y must have at least two dimensions for "
"multi-output regression but has only one."
)
if sample_weight is not None and not has_fit_parameter(
self.estimator, "sample_weight"
):
raise ValueError("Underlying estimator does not support sample weights.")
if not fit_params:
fit_params = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)(
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
)
for i in range(y.shape[1])
)
self.classes_ = []
for estimator in self.estimators_:
self.classes_.extend(estimator.classes_)
if len(set(self.classes_)) != len(self.classes_):
raise OperationalException(f"Class labels must be unique across targets: "
f"{self.classes_}")
if hasattr(self.estimators_[0], "n_features_in_"):
self.n_features_in_ = self.estimators_[0].n_features_in_
if hasattr(self.estimators_[0], "feature_names_in_"):
self.feature_names_in_ = self.estimators_[0].feature_names_in_
return self
def predict_proba(self, X):
"""
Get predict_proba and stack arrays horizontally
"""
results = np.hstack(super().predict_proba(X))
return np.squeeze(results)
def predict(self, X):
"""
Get predict and squeeze into 2D array
"""
results = super().predict(X)
return np.squeeze(results)

View File

@@ -87,6 +87,7 @@ class FreqaiDataDrawer:
self.create_follower_dict()
self.load_drawer_from_disk()
self.load_historic_predictions_from_disk()
self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {}
self.load_metric_tracker_from_disk()
self.training_queue: Dict[str, int] = {}
self.history_lock = threading.Lock()
@@ -97,7 +98,6 @@ class FreqaiDataDrawer:
self.empty_pair_dict: pair_info = {
"model_filename": "", "trained_timestamp": 0,
"data_path": "", "extras": {}}
self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {}
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
"""
@@ -153,6 +153,7 @@ class FreqaiDataDrawer:
if exists:
with open(self.metric_tracker_path, "r") as fp:
self.metric_tracker = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
logger.info("Loading existing metric tracker from disk.")
else:
logger.info("Could not find existing metric tracker, starting from scratch")

View File

@@ -0,0 +1,74 @@
import logging
import sys
from pathlib import Path
from typing import Any, Dict
from catboost import CatBoostClassifier, Pool
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class CatboostClassifierMultiTarget(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
cbc = CatBoostClassifier(
allow_writing_files=True,
loss_function='MultiClass',
train_dir=Path(dk.data_path),
**self.model_training_parameters,
)
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
sample_weight = data_dictionary["train_weights"]
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i],
weight=data_dictionary["test_weights"],
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append({
'eval_set': eval_sets[i], 'init_model': init_models[i],
'log_cout': sys.stdout, 'log_cerr': sys.stderr,
})
model = FreqaiMultiOutputClassifier(estimator=cbc)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model

View File

@@ -0,0 +1,64 @@
import logging
from typing import Any, Dict
from lightgbm import LGBMClassifier
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class LightGBMClassifierMultiTarget(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
lgb = LGBMClassifier(**self.model_training_parameters)
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
sample_weight = data_dictionary["train_weights"]
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = ( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
'init_model': init_models[i]})
model = FreqaiMultiOutputClassifier(estimator=lgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model