diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py new file mode 100644 index 000000000..435c0e646 --- /dev/null +++ b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py @@ -0,0 +1,93 @@ +import numpy as np +from joblib import Parallel +from sklearn.base import is_classifier +from sklearn.multioutput import MultiOutputClassifier, _fit_estimator +from sklearn.utils.fixes import delayed +from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import has_fit_parameter + +from freqtrade.exceptions import OperationalException + + +class FreqaiMultiOutputClassifier(MultiOutputClassifier): + + def fit(self, X, y, sample_weight=None, fit_params=None): + """Fit the model to data, separately for each output variable. + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets. An indicator matrix turns on multilabel + estimation. + sample_weight : array-like of shape (n_samples,), default=None + Sample weights. If `None`, then samples are equally weighted. + Only supported if the underlying classifier supports sample + weights. + fit_params : A list of dicts for the fit_params + Parameters passed to the ``estimator.fit`` method of each step. + Each dict may contain same or different values (e.g. different + eval_sets or init_models) + .. versionadded:: 0.23 + Returns + ------- + self : object + Returns a fitted instance. + """ + + if not hasattr(self.estimator, "fit"): + raise ValueError("The base estimator should implement a fit method") + + y = self._validate_data(X="no_validation", y=y, multi_output=True) + + if is_classifier(self): + check_classification_targets(y) + + if y.ndim == 1: + raise ValueError( + "y must have at least two dimensions for " + "multi-output regression but has only one." + ) + + if sample_weight is not None and not has_fit_parameter( + self.estimator, "sample_weight" + ): + raise ValueError("Underlying estimator does not support sample weights.") + + if not fit_params: + fit_params = [None] * y.shape[1] + + self.estimators_ = Parallel(n_jobs=self.n_jobs)( + delayed(_fit_estimator)( + self.estimator, X, y[:, i], sample_weight, **fit_params[i] + ) + for i in range(y.shape[1]) + ) + + self.classes_ = [] + for estimator in self.estimators_: + self.classes_.extend(estimator.classes_) + if len(set(self.classes_)) != len(self.classes_): + raise OperationalException(f"Class labels must be unique across targets: " + f"{self.classes_}") + + if hasattr(self.estimators_[0], "n_features_in_"): + self.n_features_in_ = self.estimators_[0].n_features_in_ + if hasattr(self.estimators_[0], "feature_names_in_"): + self.feature_names_in_ = self.estimators_[0].feature_names_in_ + + return self + + def predict_proba(self, X): + """ + Get predict_proba and stack arrays horizontally + """ + results = np.hstack(super().predict_proba(X)) + return np.squeeze(results) + + def predict(self, X): + """ + Get predict and squeeze into 2D array + """ + results = super().predict(X) + return np.squeeze(results) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index dda8ebdbf..038ddaf2e 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -87,6 +87,7 @@ class FreqaiDataDrawer: self.create_follower_dict() self.load_drawer_from_disk() self.load_historic_predictions_from_disk() + self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {} self.load_metric_tracker_from_disk() self.training_queue: Dict[str, int] = {} self.history_lock = threading.Lock() @@ -97,7 +98,6 @@ class FreqaiDataDrawer: self.empty_pair_dict: pair_info = { "model_filename": "", "trained_timestamp": 0, "data_path": "", "extras": {}} - self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {} def update_metric_tracker(self, metric: str, value: float, pair: str) -> None: """ @@ -153,6 +153,7 @@ class FreqaiDataDrawer: if exists: with open(self.metric_tracker_path, "r") as fp: self.metric_tracker = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE) + logger.info("Loading existing metric tracker from disk.") else: logger.info("Could not find existing metric tracker, starting from scratch") diff --git a/freqtrade/freqai/prediction_models/CatboostClassifierMultiTarget.py b/freqtrade/freqai/prediction_models/CatboostClassifierMultiTarget.py new file mode 100644 index 000000000..c6f900fad --- /dev/null +++ b/freqtrade/freqai/prediction_models/CatboostClassifierMultiTarget.py @@ -0,0 +1,74 @@ +import logging +import sys +from pathlib import Path +from typing import Any, Dict + +from catboost import CatBoostClassifier, Pool + +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class CatboostClassifierMultiTarget(BaseClassifierModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + cbc = CatBoostClassifier( + allow_writing_files=True, + loss_function='MultiClass', + train_dir=Path(dk.data_path), + **self.model_training_parameters, + ) + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + + sample_weight = data_dictionary["train_weights"] + + eval_sets = [None] * y.shape[1] + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_sets = [None] * data_dictionary['test_labels'].shape[1] + + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = Pool( + data=data_dictionary["test_features"], + label=data_dictionary["test_labels"].iloc[:, i], + weight=data_dictionary["test_weights"], + ) + + init_model = self.get_init_model(dk.pair) + + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append({ + 'eval_set': eval_sets[i], 'init_model': init_models[i], + 'log_cout': sys.stdout, 'log_cerr': sys.stderr, + }) + + model = FreqaiMultiOutputClassifier(estimator=cbc) + thread_training = self.freqai_info.get('multitarget_parallel_training', False) + if thread_training: + model.n_jobs = y.shape[1] + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + + return model diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifierMultiTarget.py b/freqtrade/freqai/prediction_models/LightGBMClassifierMultiTarget.py new file mode 100644 index 000000000..d1eb6daa2 --- /dev/null +++ b/freqtrade/freqai/prediction_models/LightGBMClassifierMultiTarget.py @@ -0,0 +1,64 @@ +import logging +from typing import Any, Dict + +from lightgbm import LGBMClassifier + +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class LightGBMClassifierMultiTarget(BaseClassifierModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + lgb = LGBMClassifier(**self.model_training_parameters) + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + sample_weight = data_dictionary["train_weights"] + + eval_weights = None + eval_sets = [None] * y.shape[1] + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_weights = [data_dictionary["test_weights"]] + eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = ( # type: ignore + data_dictionary["test_features"], + data_dictionary["test_labels"].iloc[:, i] + ) + + init_model = self.get_init_model(dk.pair) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] + + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append( + {'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights, + 'init_model': init_models[i]}) + + model = FreqaiMultiOutputClassifier(estimator=lgb) + thread_training = self.freqai_info.get('multitarget_parallel_training', False) + if thread_training: + model.n_jobs = y.shape[1] + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + + return model diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 4c503e4de..eab92c7f1 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -78,17 +78,19 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, shutil.rmtree(Path(freqai.dk.full_path)) -@pytest.mark.parametrize('model', [ - 'LightGBMRegressorMultiTarget', - 'XGBoostRegressorMultiTarget', - 'CatboostRegressorMultiTarget', +@pytest.mark.parametrize('model, strat', [ + ('LightGBMRegressorMultiTarget', "freqai_test_multimodel_strat"), + ('XGBoostRegressorMultiTarget', "freqai_test_multimodel_strat"), + ('CatboostRegressorMultiTarget', "freqai_test_multimodel_strat"), + ('LightGBMClassifierMultiTarget', "freqai_test_multimodel_classifier_strat"), + ('CatboostClassifierMultiTarget', "freqai_test_multimodel_classifier_strat") ]) -def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model): - if is_arm() and model == 'CatboostRegressorMultiTarget': +def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model, strat): + if is_arm() and 'Catboost' in model: pytest.skip("CatBoost is not supported on ARM") freqai_conf.update({"timerange": "20180110-20180130"}) - freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) + freqai_conf.update({"strategy": strat}) freqai_conf.update({"freqaimodel": model}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) diff --git a/tests/rpc/test_rpc_apiserver.py b/tests/rpc/test_rpc_apiserver.py index 6c28c1cac..019b8fc82 100644 --- a/tests/rpc/test_rpc_apiserver.py +++ b/tests/rpc/test_rpc_apiserver.py @@ -1460,6 +1460,7 @@ def test_api_strategies(botclient, tmpdir): 'StrategyTestV3CustomEntryPrice', 'StrategyTestV3Futures', 'freqai_test_classifier', + 'freqai_test_multimodel_classifier_strat', 'freqai_test_multimodel_strat', 'freqai_test_strat' ]} diff --git a/tests/strategy/strats/freqai_test_multimodel_classifier_strat.py b/tests/strategy/strats/freqai_test_multimodel_classifier_strat.py new file mode 100644 index 000000000..9188fa331 --- /dev/null +++ b/tests/strategy/strats/freqai_test_multimodel_classifier_strat.py @@ -0,0 +1,138 @@ +import logging +from functools import reduce + +import numpy as np +import pandas as pd +import talib.abstract as ta +from pandas import DataFrame + +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair + + +logger = logging.getLogger(__name__) + + +class freqai_test_multimodel_classifier_strat(IStrategy): + """ + Test strategy - used for testing freqAI multimodel functionalities. + DO not use in production. + """ + + minimal_roi = {"0": 0.1, "240": -1} + + plot_config = { + "main_plot": {}, + "subplots": { + "prediction": {"prediction": {"color": "blue"}}, + "target_roi": { + "target_roi": {"color": "brown"}, + }, + "do_predict": { + "do_predict": {"color": "brown"}, + }, + }, + } + + process_only_new_candles = True + stoploss = -0.05 + use_exit_signal = True + startup_candle_count: int = 300 + can_short = False + + linear_roi_offset = DecimalParameter( + 0.00, 0.02, default=0.005, space="sell", optimize=False, load=True + ) + max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True) + + def populate_any_indicators( + self, pair, df, tf, informative=None, set_generalized_indicators=False + ): + + coin = pair.split('/')[0] + + if informative is None: + informative = self.dp.get_pair_dataframe(pair, tf) + + # first loop is automatically duplicating indicators for time periods + for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + + t = int(t) + informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) + informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) + informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t) + + informative[f"%-{coin}pct-change"] = informative["close"].pct_change() + informative[f"%-{coin}raw_volume"] = informative["volume"] + informative[f"%-{coin}raw_price"] = informative["close"] + + indicators = [col for col in informative if col.startswith("%")] + # This loop duplicates and shifts all indicators to add a sense of recency to data + for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): + if n == 0: + continue + informative_shift = informative[indicators].shift(n) + informative_shift = informative_shift.add_suffix("_shift-" + str(n)) + informative = pd.concat((informative, informative_shift), axis=1) + + df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) + skip_columns = [ + (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] + ] + df = df.drop(columns=skip_columns) + + # Add generalized indicators here (because in live, it will call this + # function to populate indicators during training). Notice how we ensure not to + # add them multiple times + if set_generalized_indicators: + df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 + df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + + # user adds targets here by prepending them with &- (see convention below) + # If user wishes to use multiple targets, a multioutput prediction model + # needs to be used such as templates/CatboostPredictionMultiModel.py + df['&s-up_or_down'] = np.where(df["close"].shift(-50) > + df["close"], 'up', 'down') + + df['&s-up_or_down2'] = np.where(df["close"].shift(-50) > + df["close"], 'up2', 'down2') + + return df + + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + self.freqai_info = self.config["freqai"] + + dataframe = self.freqai.start(dataframe, metadata, self) + + dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25 + dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25 + return dataframe + + def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: + + enter_long_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"]] + + if enter_long_conditions: + df.loc[ + reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"] + ] = (1, "long") + + enter_short_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"]] + + if enter_short_conditions: + df.loc[ + reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"] + ] = (1, "short") + + return df + + def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame: + exit_long_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"] * 0.25] + if exit_long_conditions: + df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1 + + exit_short_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"] * 0.25] + if exit_short_conditions: + df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1 + + return df diff --git a/tests/strategy/test_strategy_loading.py b/tests/strategy/test_strategy_loading.py index 2d13fc380..6b831c116 100644 --- a/tests/strategy/test_strategy_loading.py +++ b/tests/strategy/test_strategy_loading.py @@ -34,7 +34,7 @@ def test_search_all_strategies_no_failed(): directory = Path(__file__).parent / "strats" strategies = StrategyResolver._search_all_objects(directory, enum_failed=False) assert isinstance(strategies, list) - assert len(strategies) == 10 + assert len(strategies) == 11 assert isinstance(strategies[0], dict) @@ -42,10 +42,10 @@ def test_search_all_strategies_with_failed(): directory = Path(__file__).parent / "strats" strategies = StrategyResolver._search_all_objects(directory, enum_failed=True) assert isinstance(strategies, list) - assert len(strategies) == 11 + assert len(strategies) == 12 # with enum_failed=True search_all_objects() shall find 2 good strategies # and 1 which fails to load - assert len([x for x in strategies if x['class'] is not None]) == 10 + assert len([x for x in strategies if x['class'] is not None]) == 11 assert len([x for x in strategies if x['class'] is None]) == 1