Merge pull request #7644 from markdregan/multi-target-classifier
Support for multi target multi-class classification (FreqaiMultiOutputRegressor for Classification)
This commit is contained in:
commit
833578716c
93
freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
Normal file
93
freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py
Normal file
@ -0,0 +1,93 @@
|
|||||||
|
import numpy as np
|
||||||
|
from joblib import Parallel
|
||||||
|
from sklearn.base import is_classifier
|
||||||
|
from sklearn.multioutput import MultiOutputClassifier, _fit_estimator
|
||||||
|
from sklearn.utils.fixes import delayed
|
||||||
|
from sklearn.utils.multiclass import check_classification_targets
|
||||||
|
from sklearn.utils.validation import has_fit_parameter
|
||||||
|
|
||||||
|
from freqtrade.exceptions import OperationalException
|
||||||
|
|
||||||
|
|
||||||
|
class FreqaiMultiOutputClassifier(MultiOutputClassifier):
|
||||||
|
|
||||||
|
def fit(self, X, y, sample_weight=None, fit_params=None):
|
||||||
|
"""Fit the model to data, separately for each output variable.
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||||
|
The input data.
|
||||||
|
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
|
||||||
|
Multi-output targets. An indicator matrix turns on multilabel
|
||||||
|
estimation.
|
||||||
|
sample_weight : array-like of shape (n_samples,), default=None
|
||||||
|
Sample weights. If `None`, then samples are equally weighted.
|
||||||
|
Only supported if the underlying classifier supports sample
|
||||||
|
weights.
|
||||||
|
fit_params : A list of dicts for the fit_params
|
||||||
|
Parameters passed to the ``estimator.fit`` method of each step.
|
||||||
|
Each dict may contain same or different values (e.g. different
|
||||||
|
eval_sets or init_models)
|
||||||
|
.. versionadded:: 0.23
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
self : object
|
||||||
|
Returns a fitted instance.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not hasattr(self.estimator, "fit"):
|
||||||
|
raise ValueError("The base estimator should implement a fit method")
|
||||||
|
|
||||||
|
y = self._validate_data(X="no_validation", y=y, multi_output=True)
|
||||||
|
|
||||||
|
if is_classifier(self):
|
||||||
|
check_classification_targets(y)
|
||||||
|
|
||||||
|
if y.ndim == 1:
|
||||||
|
raise ValueError(
|
||||||
|
"y must have at least two dimensions for "
|
||||||
|
"multi-output regression but has only one."
|
||||||
|
)
|
||||||
|
|
||||||
|
if sample_weight is not None and not has_fit_parameter(
|
||||||
|
self.estimator, "sample_weight"
|
||||||
|
):
|
||||||
|
raise ValueError("Underlying estimator does not support sample weights.")
|
||||||
|
|
||||||
|
if not fit_params:
|
||||||
|
fit_params = [None] * y.shape[1]
|
||||||
|
|
||||||
|
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
|
||||||
|
delayed(_fit_estimator)(
|
||||||
|
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
|
||||||
|
)
|
||||||
|
for i in range(y.shape[1])
|
||||||
|
)
|
||||||
|
|
||||||
|
self.classes_ = []
|
||||||
|
for estimator in self.estimators_:
|
||||||
|
self.classes_.extend(estimator.classes_)
|
||||||
|
if len(set(self.classes_)) != len(self.classes_):
|
||||||
|
raise OperationalException(f"Class labels must be unique across targets: "
|
||||||
|
f"{self.classes_}")
|
||||||
|
|
||||||
|
if hasattr(self.estimators_[0], "n_features_in_"):
|
||||||
|
self.n_features_in_ = self.estimators_[0].n_features_in_
|
||||||
|
if hasattr(self.estimators_[0], "feature_names_in_"):
|
||||||
|
self.feature_names_in_ = self.estimators_[0].feature_names_in_
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict_proba(self, X):
|
||||||
|
"""
|
||||||
|
Get predict_proba and stack arrays horizontally
|
||||||
|
"""
|
||||||
|
results = np.hstack(super().predict_proba(X))
|
||||||
|
return np.squeeze(results)
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
"""
|
||||||
|
Get predict and squeeze into 2D array
|
||||||
|
"""
|
||||||
|
results = super().predict(X)
|
||||||
|
return np.squeeze(results)
|
@ -0,0 +1,74 @@
|
|||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from catboost import CatBoostClassifier, Pool
|
||||||
|
|
||||||
|
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||||
|
from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier
|
||||||
|
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class CatboostClassifierMultiTarget(BaseClassifierModel):
|
||||||
|
"""
|
||||||
|
User created prediction model. The class needs to override three necessary
|
||||||
|
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||||
|
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||||
|
"""
|
||||||
|
User sets up the training and test data to fit their desired model here
|
||||||
|
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||||
|
all the training and test data/labels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
cbc = CatBoostClassifier(
|
||||||
|
allow_writing_files=True,
|
||||||
|
loss_function='MultiClass',
|
||||||
|
train_dir=Path(dk.data_path),
|
||||||
|
**self.model_training_parameters,
|
||||||
|
)
|
||||||
|
|
||||||
|
X = data_dictionary["train_features"]
|
||||||
|
y = data_dictionary["train_labels"]
|
||||||
|
|
||||||
|
sample_weight = data_dictionary["train_weights"]
|
||||||
|
|
||||||
|
eval_sets = [None] * y.shape[1]
|
||||||
|
|
||||||
|
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
|
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
|
||||||
|
|
||||||
|
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||||
|
eval_sets[i] = Pool(
|
||||||
|
data=data_dictionary["test_features"],
|
||||||
|
label=data_dictionary["test_labels"].iloc[:, i],
|
||||||
|
weight=data_dictionary["test_weights"],
|
||||||
|
)
|
||||||
|
|
||||||
|
init_model = self.get_init_model(dk.pair)
|
||||||
|
|
||||||
|
if init_model:
|
||||||
|
init_models = init_model.estimators_
|
||||||
|
else:
|
||||||
|
init_models = [None] * y.shape[1]
|
||||||
|
|
||||||
|
fit_params = []
|
||||||
|
for i in range(len(eval_sets)):
|
||||||
|
fit_params.append({
|
||||||
|
'eval_set': eval_sets[i], 'init_model': init_models[i],
|
||||||
|
'log_cout': sys.stdout, 'log_cerr': sys.stderr,
|
||||||
|
})
|
||||||
|
|
||||||
|
model = FreqaiMultiOutputClassifier(estimator=cbc)
|
||||||
|
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||||
|
if thread_training:
|
||||||
|
model.n_jobs = y.shape[1]
|
||||||
|
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||||
|
|
||||||
|
return model
|
@ -0,0 +1,64 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from lightgbm import LGBMClassifier
|
||||||
|
|
||||||
|
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||||
|
from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier
|
||||||
|
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class LightGBMClassifierMultiTarget(BaseClassifierModel):
|
||||||
|
"""
|
||||||
|
User created prediction model. The class needs to override three necessary
|
||||||
|
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||||
|
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||||
|
"""
|
||||||
|
User sets up the training and test data to fit their desired model here
|
||||||
|
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||||
|
all the training and test data/labels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
lgb = LGBMClassifier(**self.model_training_parameters)
|
||||||
|
|
||||||
|
X = data_dictionary["train_features"]
|
||||||
|
y = data_dictionary["train_labels"]
|
||||||
|
sample_weight = data_dictionary["train_weights"]
|
||||||
|
|
||||||
|
eval_weights = None
|
||||||
|
eval_sets = [None] * y.shape[1]
|
||||||
|
|
||||||
|
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||||
|
eval_weights = [data_dictionary["test_weights"]]
|
||||||
|
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
|
||||||
|
for i in range(data_dictionary['test_labels'].shape[1]):
|
||||||
|
eval_sets[i] = ( # type: ignore
|
||||||
|
data_dictionary["test_features"],
|
||||||
|
data_dictionary["test_labels"].iloc[:, i]
|
||||||
|
)
|
||||||
|
|
||||||
|
init_model = self.get_init_model(dk.pair)
|
||||||
|
if init_model:
|
||||||
|
init_models = init_model.estimators_
|
||||||
|
else:
|
||||||
|
init_models = [None] * y.shape[1]
|
||||||
|
|
||||||
|
fit_params = []
|
||||||
|
for i in range(len(eval_sets)):
|
||||||
|
fit_params.append(
|
||||||
|
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
|
||||||
|
'init_model': init_models[i]})
|
||||||
|
|
||||||
|
model = FreqaiMultiOutputClassifier(estimator=lgb)
|
||||||
|
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||||
|
if thread_training:
|
||||||
|
model.n_jobs = y.shape[1]
|
||||||
|
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||||
|
|
||||||
|
return model
|
@ -77,17 +77,19 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
|
|||||||
shutil.rmtree(Path(freqai.dk.full_path))
|
shutil.rmtree(Path(freqai.dk.full_path))
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('model', [
|
@pytest.mark.parametrize('model, strat', [
|
||||||
'LightGBMRegressorMultiTarget',
|
('LightGBMRegressorMultiTarget', "freqai_test_multimodel_strat"),
|
||||||
'XGBoostRegressorMultiTarget',
|
('XGBoostRegressorMultiTarget', "freqai_test_multimodel_strat"),
|
||||||
'CatboostRegressorMultiTarget',
|
('CatboostRegressorMultiTarget', "freqai_test_multimodel_strat"),
|
||||||
|
('LightGBMClassifierMultiTarget', "freqai_test_multimodel_classifier_strat"),
|
||||||
|
('CatboostClassifierMultiTarget', "freqai_test_multimodel_classifier_strat")
|
||||||
])
|
])
|
||||||
def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model):
|
def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model, strat):
|
||||||
if is_arm() and model == 'CatboostRegressorMultiTarget':
|
if is_arm() and 'Catboost' in model:
|
||||||
pytest.skip("CatBoost is not supported on ARM")
|
pytest.skip("CatBoost is not supported on ARM")
|
||||||
|
|
||||||
freqai_conf.update({"timerange": "20180110-20180130"})
|
freqai_conf.update({"timerange": "20180110-20180130"})
|
||||||
freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
|
freqai_conf.update({"strategy": strat})
|
||||||
freqai_conf.update({"freqaimodel": model})
|
freqai_conf.update({"freqaimodel": model})
|
||||||
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
||||||
exchange = get_patched_exchange(mocker, freqai_conf)
|
exchange = get_patched_exchange(mocker, freqai_conf)
|
||||||
|
@ -1460,6 +1460,7 @@ def test_api_strategies(botclient, tmpdir):
|
|||||||
'StrategyTestV3CustomEntryPrice',
|
'StrategyTestV3CustomEntryPrice',
|
||||||
'StrategyTestV3Futures',
|
'StrategyTestV3Futures',
|
||||||
'freqai_test_classifier',
|
'freqai_test_classifier',
|
||||||
|
'freqai_test_multimodel_classifier_strat',
|
||||||
'freqai_test_multimodel_strat',
|
'freqai_test_multimodel_strat',
|
||||||
'freqai_test_strat'
|
'freqai_test_strat'
|
||||||
]}
|
]}
|
||||||
|
138
tests/strategy/strats/freqai_test_multimodel_classifier_strat.py
Normal file
138
tests/strategy/strats/freqai_test_multimodel_classifier_strat.py
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
import logging
|
||||||
|
from functools import reduce
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import talib.abstract as ta
|
||||||
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class freqai_test_multimodel_classifier_strat(IStrategy):
|
||||||
|
"""
|
||||||
|
Test strategy - used for testing freqAI multimodel functionalities.
|
||||||
|
DO not use in production.
|
||||||
|
"""
|
||||||
|
|
||||||
|
minimal_roi = {"0": 0.1, "240": -1}
|
||||||
|
|
||||||
|
plot_config = {
|
||||||
|
"main_plot": {},
|
||||||
|
"subplots": {
|
||||||
|
"prediction": {"prediction": {"color": "blue"}},
|
||||||
|
"target_roi": {
|
||||||
|
"target_roi": {"color": "brown"},
|
||||||
|
},
|
||||||
|
"do_predict": {
|
||||||
|
"do_predict": {"color": "brown"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
process_only_new_candles = True
|
||||||
|
stoploss = -0.05
|
||||||
|
use_exit_signal = True
|
||||||
|
startup_candle_count: int = 300
|
||||||
|
can_short = False
|
||||||
|
|
||||||
|
linear_roi_offset = DecimalParameter(
|
||||||
|
0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
|
||||||
|
)
|
||||||
|
max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True)
|
||||||
|
|
||||||
|
def populate_any_indicators(
|
||||||
|
self, pair, df, tf, informative=None, set_generalized_indicators=False
|
||||||
|
):
|
||||||
|
|
||||||
|
coin = pair.split('/')[0]
|
||||||
|
|
||||||
|
if informative is None:
|
||||||
|
informative = self.dp.get_pair_dataframe(pair, tf)
|
||||||
|
|
||||||
|
# first loop is automatically duplicating indicators for time periods
|
||||||
|
for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
|
||||||
|
|
||||||
|
t = int(t)
|
||||||
|
informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
|
||||||
|
informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
|
||||||
|
informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t)
|
||||||
|
|
||||||
|
informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
|
||||||
|
informative[f"%-{coin}raw_volume"] = informative["volume"]
|
||||||
|
informative[f"%-{coin}raw_price"] = informative["close"]
|
||||||
|
|
||||||
|
indicators = [col for col in informative if col.startswith("%")]
|
||||||
|
# This loop duplicates and shifts all indicators to add a sense of recency to data
|
||||||
|
for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
|
||||||
|
if n == 0:
|
||||||
|
continue
|
||||||
|
informative_shift = informative[indicators].shift(n)
|
||||||
|
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
|
||||||
|
informative = pd.concat((informative, informative_shift), axis=1)
|
||||||
|
|
||||||
|
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
|
||||||
|
skip_columns = [
|
||||||
|
(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]
|
||||||
|
]
|
||||||
|
df = df.drop(columns=skip_columns)
|
||||||
|
|
||||||
|
# Add generalized indicators here (because in live, it will call this
|
||||||
|
# function to populate indicators during training). Notice how we ensure not to
|
||||||
|
# add them multiple times
|
||||||
|
if set_generalized_indicators:
|
||||||
|
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
|
||||||
|
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
|
||||||
|
|
||||||
|
# user adds targets here by prepending them with &- (see convention below)
|
||||||
|
# If user wishes to use multiple targets, a multioutput prediction model
|
||||||
|
# needs to be used such as templates/CatboostPredictionMultiModel.py
|
||||||
|
df['&s-up_or_down'] = np.where(df["close"].shift(-50) >
|
||||||
|
df["close"], 'up', 'down')
|
||||||
|
|
||||||
|
df['&s-up_or_down2'] = np.where(df["close"].shift(-50) >
|
||||||
|
df["close"], 'up2', 'down2')
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
|
||||||
|
self.freqai_info = self.config["freqai"]
|
||||||
|
|
||||||
|
dataframe = self.freqai.start(dataframe, metadata, self)
|
||||||
|
|
||||||
|
dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25
|
||||||
|
dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
|
||||||
|
enter_long_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"]]
|
||||||
|
|
||||||
|
if enter_long_conditions:
|
||||||
|
df.loc[
|
||||||
|
reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
|
||||||
|
] = (1, "long")
|
||||||
|
|
||||||
|
enter_short_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"]]
|
||||||
|
|
||||||
|
if enter_short_conditions:
|
||||||
|
df.loc[
|
||||||
|
reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"]
|
||||||
|
] = (1, "short")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
exit_long_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"] * 0.25]
|
||||||
|
if exit_long_conditions:
|
||||||
|
df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1
|
||||||
|
|
||||||
|
exit_short_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"] * 0.25]
|
||||||
|
if exit_short_conditions:
|
||||||
|
df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1
|
||||||
|
|
||||||
|
return df
|
@ -34,7 +34,7 @@ def test_search_all_strategies_no_failed():
|
|||||||
directory = Path(__file__).parent / "strats"
|
directory = Path(__file__).parent / "strats"
|
||||||
strategies = StrategyResolver._search_all_objects(directory, enum_failed=False)
|
strategies = StrategyResolver._search_all_objects(directory, enum_failed=False)
|
||||||
assert isinstance(strategies, list)
|
assert isinstance(strategies, list)
|
||||||
assert len(strategies) == 10
|
assert len(strategies) == 11
|
||||||
assert isinstance(strategies[0], dict)
|
assert isinstance(strategies[0], dict)
|
||||||
|
|
||||||
|
|
||||||
@ -42,10 +42,10 @@ def test_search_all_strategies_with_failed():
|
|||||||
directory = Path(__file__).parent / "strats"
|
directory = Path(__file__).parent / "strats"
|
||||||
strategies = StrategyResolver._search_all_objects(directory, enum_failed=True)
|
strategies = StrategyResolver._search_all_objects(directory, enum_failed=True)
|
||||||
assert isinstance(strategies, list)
|
assert isinstance(strategies, list)
|
||||||
assert len(strategies) == 11
|
assert len(strategies) == 12
|
||||||
# with enum_failed=True search_all_objects() shall find 2 good strategies
|
# with enum_failed=True search_all_objects() shall find 2 good strategies
|
||||||
# and 1 which fails to load
|
# and 1 which fails to load
|
||||||
assert len([x for x in strategies if x['class'] is not None]) == 10
|
assert len([x for x in strategies if x['class'] is not None]) == 11
|
||||||
|
|
||||||
assert len([x for x in strategies if x['class'] is None]) == 1
|
assert len([x for x in strategies if x['class'] is None]) == 1
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user