Merge pull request #7644 from markdregan/multi-target-classifier

Support for multi target multi-class classification (FreqaiMultiOutputRegressor for Classification)
This commit is contained in:
Robert Caulk 2022-11-11 18:48:38 +01:00 committed by GitHub
commit 833578716c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 382 additions and 10 deletions

View File

@ -0,0 +1,93 @@
import numpy as np
from joblib import Parallel
from sklearn.base import is_classifier
from sklearn.multioutput import MultiOutputClassifier, _fit_estimator
from sklearn.utils.fixes import delayed
from sklearn.utils.multiclass import check_classification_targets
from sklearn.utils.validation import has_fit_parameter
from freqtrade.exceptions import OperationalException
class FreqaiMultiOutputClassifier(MultiOutputClassifier):
def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The input data.
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
Multi-output targets. An indicator matrix turns on multilabel
estimation.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If `None`, then samples are equally weighted.
Only supported if the underlying classifier supports sample
weights.
fit_params : A list of dicts for the fit_params
Parameters passed to the ``estimator.fit`` method of each step.
Each dict may contain same or different values (e.g. different
eval_sets or init_models)
.. versionadded:: 0.23
Returns
-------
self : object
Returns a fitted instance.
"""
if not hasattr(self.estimator, "fit"):
raise ValueError("The base estimator should implement a fit method")
y = self._validate_data(X="no_validation", y=y, multi_output=True)
if is_classifier(self):
check_classification_targets(y)
if y.ndim == 1:
raise ValueError(
"y must have at least two dimensions for "
"multi-output regression but has only one."
)
if sample_weight is not None and not has_fit_parameter(
self.estimator, "sample_weight"
):
raise ValueError("Underlying estimator does not support sample weights.")
if not fit_params:
fit_params = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)(
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
)
for i in range(y.shape[1])
)
self.classes_ = []
for estimator in self.estimators_:
self.classes_.extend(estimator.classes_)
if len(set(self.classes_)) != len(self.classes_):
raise OperationalException(f"Class labels must be unique across targets: "
f"{self.classes_}")
if hasattr(self.estimators_[0], "n_features_in_"):
self.n_features_in_ = self.estimators_[0].n_features_in_
if hasattr(self.estimators_[0], "feature_names_in_"):
self.feature_names_in_ = self.estimators_[0].feature_names_in_
return self
def predict_proba(self, X):
"""
Get predict_proba and stack arrays horizontally
"""
results = np.hstack(super().predict_proba(X))
return np.squeeze(results)
def predict(self, X):
"""
Get predict and squeeze into 2D array
"""
results = super().predict(X)
return np.squeeze(results)

View File

@ -0,0 +1,74 @@
import logging
import sys
from pathlib import Path
from typing import Any, Dict
from catboost import CatBoostClassifier, Pool
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class CatboostClassifierMultiTarget(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
cbc = CatBoostClassifier(
allow_writing_files=True,
loss_function='MultiClass',
train_dir=Path(dk.data_path),
**self.model_training_parameters,
)
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
sample_weight = data_dictionary["train_weights"]
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i],
weight=data_dictionary["test_weights"],
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append({
'eval_set': eval_sets[i], 'init_model': init_models[i],
'log_cout': sys.stdout, 'log_cerr': sys.stderr,
})
model = FreqaiMultiOutputClassifier(estimator=cbc)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model

View File

@ -0,0 +1,64 @@
import logging
from typing import Any, Dict
from lightgbm import LGBMClassifier
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class LightGBMClassifierMultiTarget(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
lgb = LGBMClassifier(**self.model_training_parameters)
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
sample_weight = data_dictionary["train_weights"]
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = ( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
'init_model': init_models[i]})
model = FreqaiMultiOutputClassifier(estimator=lgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model

View File

@ -77,17 +77,19 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
shutil.rmtree(Path(freqai.dk.full_path))
@pytest.mark.parametrize('model', [
'LightGBMRegressorMultiTarget',
'XGBoostRegressorMultiTarget',
'CatboostRegressorMultiTarget',
@pytest.mark.parametrize('model, strat', [
('LightGBMRegressorMultiTarget', "freqai_test_multimodel_strat"),
('XGBoostRegressorMultiTarget', "freqai_test_multimodel_strat"),
('CatboostRegressorMultiTarget', "freqai_test_multimodel_strat"),
('LightGBMClassifierMultiTarget', "freqai_test_multimodel_classifier_strat"),
('CatboostClassifierMultiTarget', "freqai_test_multimodel_classifier_strat")
])
def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model):
if is_arm() and model == 'CatboostRegressorMultiTarget':
def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model, strat):
if is_arm() and 'Catboost' in model:
pytest.skip("CatBoost is not supported on ARM")
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
freqai_conf.update({"strategy": strat})
freqai_conf.update({"freqaimodel": model})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)

View File

@ -1460,6 +1460,7 @@ def test_api_strategies(botclient, tmpdir):
'StrategyTestV3CustomEntryPrice',
'StrategyTestV3Futures',
'freqai_test_classifier',
'freqai_test_multimodel_classifier_strat',
'freqai_test_multimodel_strat',
'freqai_test_strat'
]}

View File

@ -0,0 +1,138 @@
import logging
from functools import reduce
import numpy as np
import pandas as pd
import talib.abstract as ta
from pandas import DataFrame
from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair
logger = logging.getLogger(__name__)
class freqai_test_multimodel_classifier_strat(IStrategy):
"""
Test strategy - used for testing freqAI multimodel functionalities.
DO not use in production.
"""
minimal_roi = {"0": 0.1, "240": -1}
plot_config = {
"main_plot": {},
"subplots": {
"prediction": {"prediction": {"color": "blue"}},
"target_roi": {
"target_roi": {"color": "brown"},
},
"do_predict": {
"do_predict": {"color": "brown"},
},
},
}
process_only_new_candles = True
stoploss = -0.05
use_exit_signal = True
startup_candle_count: int = 300
can_short = False
linear_roi_offset = DecimalParameter(
0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
)
max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True)
def populate_any_indicators(
self, pair, df, tf, informative=None, set_generalized_indicators=False
):
coin = pair.split('/')[0]
if informative is None:
informative = self.dp.get_pair_dataframe(pair, tf)
# first loop is automatically duplicating indicators for time periods
for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
t = int(t)
informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t)
informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
informative[f"%-{coin}raw_volume"] = informative["volume"]
informative[f"%-{coin}raw_price"] = informative["close"]
indicators = [col for col in informative if col.startswith("%")]
# This loop duplicates and shifts all indicators to add a sense of recency to data
for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
if n == 0:
continue
informative_shift = informative[indicators].shift(n)
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
informative = pd.concat((informative, informative_shift), axis=1)
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
skip_columns = [
(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]
]
df = df.drop(columns=skip_columns)
# Add generalized indicators here (because in live, it will call this
# function to populate indicators during training). Notice how we ensure not to
# add them multiple times
if set_generalized_indicators:
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
# user adds targets here by prepending them with &- (see convention below)
# If user wishes to use multiple targets, a multioutput prediction model
# needs to be used such as templates/CatboostPredictionMultiModel.py
df['&s-up_or_down'] = np.where(df["close"].shift(-50) >
df["close"], 'up', 'down')
df['&s-up_or_down2'] = np.where(df["close"].shift(-50) >
df["close"], 'up2', 'down2')
return df
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
self.freqai_info = self.config["freqai"]
dataframe = self.freqai.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25
dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25
return dataframe
def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
enter_long_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"]]
if enter_long_conditions:
df.loc[
reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
] = (1, "long")
enter_short_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"]]
if enter_short_conditions:
df.loc[
reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"]
] = (1, "short")
return df
def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
exit_long_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"] * 0.25]
if exit_long_conditions:
df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1
exit_short_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"] * 0.25]
if exit_short_conditions:
df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1
return df

View File

@ -34,7 +34,7 @@ def test_search_all_strategies_no_failed():
directory = Path(__file__).parent / "strats"
strategies = StrategyResolver._search_all_objects(directory, enum_failed=False)
assert isinstance(strategies, list)
assert len(strategies) == 10
assert len(strategies) == 11
assert isinstance(strategies[0], dict)
@ -42,10 +42,10 @@ def test_search_all_strategies_with_failed():
directory = Path(__file__).parent / "strats"
strategies = StrategyResolver._search_all_objects(directory, enum_failed=True)
assert isinstance(strategies, list)
assert len(strategies) == 11
assert len(strategies) == 12
# with enum_failed=True search_all_objects() shall find 2 good strategies
# and 1 which fails to load
assert len([x for x in strategies if x['class'] is not None]) == 10
assert len([x for x in strategies if x['class'] is not None]) == 11
assert len([x for x in strategies if x['class'] is None]) == 1