diff --git a/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py index 54136d5e0..a4a8ddfcb 100644 --- a/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py +++ b/freqtrade/freqai/base_models/FreqaiMultiOutputClassifier.py @@ -1,10 +1,13 @@ +import numpy as np from joblib import Parallel -from sklearn.multioutput import MultiOutputRegressor, _fit_estimator +from sklearn.base import is_classifier +from sklearn.multioutput import MultiOutputClassifier, _fit_estimator from sklearn.utils.fixes import delayed -from sklearn.utils.validation import has_fit_parameter +from sklearn.utils.multiclass import check_classification_targets +from sklearn.utils.validation import check_is_fitted, has_fit_parameter -class FreqaiMultiOutputRegressor(MultiOutputRegressor): +class FreqaiMultiOutputClassifier(MultiOutputClassifier): def fit(self, X, y, sample_weight=None, fit_params=None): """Fit the model to data, separately for each output variable. @@ -17,7 +20,7 @@ class FreqaiMultiOutputRegressor(MultiOutputRegressor): estimation. sample_weight : array-like of shape (n_samples,), default=None Sample weights. If `None`, then samples are equally weighted. - Only supported if the underlying regressor supports sample + Only supported if the underlying classifier supports sample weights. fit_params : A list of dicts for the fit_params Parameters passed to the ``estimator.fit`` method of each step. @@ -35,6 +38,9 @@ class FreqaiMultiOutputRegressor(MultiOutputRegressor): y = self._validate_data(X="no_validation", y=y, multi_output=True) + if is_classifier(self): + check_classification_targets(y) + if y.ndim == 1: raise ValueError( "y must have at least two dimensions for " @@ -56,9 +62,66 @@ class FreqaiMultiOutputRegressor(MultiOutputRegressor): for i in range(y.shape[1]) ) + self.classes_ = [] + for estimator in self.estimators_: + self.classes_.extend(estimator.classes_) + if hasattr(self.estimators_[0], "n_features_in_"): self.n_features_in_ = self.estimators_[0].n_features_in_ if hasattr(self.estimators_[0], "feature_names_in_"): self.feature_names_in_ = self.estimators_[0].feature_names_in_ - return + return self + + def predict_proba(self, X): + """Return prediction probabilities for each class of each output. + + This method will raise a ``ValueError`` if any of the + estimators do not have ``predict_proba``. + + Parameters + ---------- + X : array-like of shape (n_samples, n_features) + The input data. + + Returns + ------- + p : array of shape (n_samples, n_classes), or a list of n_outputs \ + such arrays if n_outputs > 1. + The class probabilities of the input samples. The order of the + classes corresponds to that in the attribute :term:`classes_`. + + .. versionchanged:: 0.19 + This function now returns a list of arrays where the length of + the list is ``n_outputs``, and each array is (``n_samples``, + ``n_classes``) for that particular output. + """ + check_is_fitted(self) + results = np.hstack([estimator.predict_proba(X) for estimator in self.estimators_]) + return np.squeeze(results) + + def predict(self, X): + """Predict multi-output variable using model for each target variable. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + The input data. + + Returns + ------- + y : {array-like, sparse matrix} of shape (n_samples, n_outputs) + Multi-output targets predicted across multiple predictors. + Note: Separate models are generated for each predictor. + """ + check_is_fitted(self) + if not hasattr(self.estimators_[0], "predict"): + raise ValueError("The base estimator should implement a predict method") + + y = Parallel(n_jobs=self.n_jobs)( + delayed(e.predict)(X) for e in self.estimators_ + ) + + results = np.asarray(y).T + + return np.squeeze(results) diff --git a/freqtrade/freqai/prediction_models/CatboostClassifierMultiTarget.py b/freqtrade/freqai/prediction_models/CatboostClassifierMultiTarget.py index ca1d8ece0..c6f900fad 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifierMultiTarget.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifierMultiTarget.py @@ -6,13 +6,14 @@ from typing import Any, Dict from catboost import CatBoostClassifier, Pool from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.base_models.FreqaiMultiOutputClassifier import FreqaiMultiOutputClassifier from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) -class CatboostClassifier(BaseClassifierModel): +class CatboostClassifierMultiTarget(BaseClassifierModel): """ User created prediction model. The class needs to override three necessary functions, predict(), train(), fit(). The class inherits ModelHandler which @@ -26,30 +27,48 @@ class CatboostClassifier(BaseClassifierModel): all the training and test data/labels. """ - train_data = Pool( - data=data_dictionary["train_features"], - label=data_dictionary["train_labels"], - weight=data_dictionary["train_weights"], - ) - if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: - test_data = None - else: - test_data = Pool( - data=data_dictionary["test_features"], - label=data_dictionary["test_labels"], - weight=data_dictionary["test_weights"], - ) - - cbr = CatBoostClassifier( + cbc = CatBoostClassifier( allow_writing_files=True, loss_function='MultiClass', train_dir=Path(dk.data_path), **self.model_training_parameters, ) + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + + sample_weight = data_dictionary["train_weights"] + + eval_sets = [None] * y.shape[1] + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + eval_sets = [None] * data_dictionary['test_labels'].shape[1] + + for i in range(data_dictionary['test_labels'].shape[1]): + eval_sets[i] = Pool( + data=data_dictionary["test_features"], + label=data_dictionary["test_labels"].iloc[:, i], + weight=data_dictionary["test_weights"], + ) + init_model = self.get_init_model(dk.pair) - cbr.fit(X=train_data, eval_set=test_data, init_model=init_model, - log_cout=sys.stdout, log_cerr=sys.stderr) + if init_model: + init_models = init_model.estimators_ + else: + init_models = [None] * y.shape[1] - return cbr + fit_params = [] + for i in range(len(eval_sets)): + fit_params.append({ + 'eval_set': eval_sets[i], 'init_model': init_models[i], + 'log_cout': sys.stdout, 'log_cerr': sys.stderr, + }) + + model = FreqaiMultiOutputClassifier(estimator=cbc) + thread_training = self.freqai_info.get('multitarget_parallel_training', False) + if thread_training: + model.n_jobs = y.shape[1] + model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params) + + return model diff --git a/user_data/strategies/MultiTargetClassifierTestStrategy.py b/user_data/strategies/MultiTargetClassifierTestStrategy.py new file mode 100644 index 000000000..6ca2567c3 --- /dev/null +++ b/user_data/strategies/MultiTargetClassifierTestStrategy.py @@ -0,0 +1,244 @@ +import logging +from functools import reduce + +import numpy as np +import pandas as pd +import talib.abstract as ta +from pandas import DataFrame +from technical import qtpylib + +from freqtrade.strategy import CategoricalParameter, IStrategy, merge_informative_pair + + +logger = logging.getLogger(__name__) + + +class MultiTargetClassifierTestStrategy(IStrategy): + """ + Example strategy showing how the user connects their own + IFreqaiModel to the strategy. Namely, the user uses: + self.freqai.start(dataframe, metadata) + + to make predictions on their data. populate_any_indicators() automatically + generates the variety of features indicated by the user in the + canonical freqtrade configuration file under config['freqai']. + """ + + minimal_roi = {"0": 0.1, "240": -1} + + plot_config = { + "main_plot": {}, + "subplots": { + "prediction": {"prediction": {"color": "blue"}}, + "do_predict": { + "do_predict": {"color": "brown"}, + }, + }, + } + + process_only_new_candles = True + stoploss = -0.05 + use_exit_signal = True + # this is the maximum period fed to talib (timeframe independent) + startup_candle_count: int = 40 + can_short = False + + std_dev_multiplier_buy = CategoricalParameter( + [0.75, 1, 1.25, 1.5, 1.75], default=1.25, space="buy", optimize=True) + std_dev_multiplier_sell = CategoricalParameter( + [0.75, 1, 1.25, 1.5, 1.75], space="sell", default=1.25, optimize=True) + + def populate_any_indicators( + self, pair, df, tf, informative=None, set_generalized_indicators=False + ): + """ + Function designed to automatically generate, name and merge features + from user indicated timeframes in the configuration file. User controls the indicators + passed to the training/prediction by prepending indicators with `'%-' + coin ` + (see convention below). I.e. user should not prepend any supporting metrics + (e.g. bb_lowerband below) with % unless they explicitly want to pass that metric to the + model. + :param pair: pair to be used as informative + :param df: strategy dataframe which will receive merges from informatives + :param tf: timeframe of the dataframe which will modify the feature names + :param informative: the dataframe associated with the informative pair + """ + + coin = pair.split('/')[0] + + if informative is None: + informative = self.dp.get_pair_dataframe(pair, tf) + + # first loop is automatically duplicating indicators for time periods + for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + + t = int(t) + informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) + informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) + informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, timeperiod=t) + informative[f"%-{coin}sma-period_{t}"] = ta.SMA(informative, timeperiod=t) + informative[f"%-{coin}ema-period_{t}"] = ta.EMA(informative, timeperiod=t) + + bollinger = qtpylib.bollinger_bands( + qtpylib.typical_price(informative), window=t, stds=2.2 + ) + informative[f"{coin}bb_lowerband-period_{t}"] = bollinger["lower"] + informative[f"{coin}bb_middleband-period_{t}"] = bollinger["mid"] + informative[f"{coin}bb_upperband-period_{t}"] = bollinger["upper"] + + informative[f"%-{coin}bb_width-period_{t}"] = ( + informative[f"{coin}bb_upperband-period_{t}"] + - informative[f"{coin}bb_lowerband-period_{t}"] + ) / informative[f"{coin}bb_middleband-period_{t}"] + informative[f"%-{coin}close-bb_lower-period_{t}"] = ( + informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"] + ) + + informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t) + + informative[f"%-{coin}relative_volume-period_{t}"] = ( + informative["volume"] / informative["volume"].rolling(t).mean() + ) + + informative[f"%-{coin}pct-change"] = informative["close"].pct_change() + informative[f"%-{coin}raw_volume"] = informative["volume"] + informative[f"%-{coin}raw_price"] = informative["close"] + + indicators = [col for col in informative if col.startswith("%")] + # This loop duplicates and shifts all indicators to add a sense of recency to data + for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): + if n == 0: + continue + informative_shift = informative[indicators].shift(n) + informative_shift = informative_shift.add_suffix("_shift-" + str(n)) + informative = pd.concat((informative, informative_shift), axis=1) + + df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) + skip_columns = [ + (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] + ] + df = df.drop(columns=skip_columns) + + # Add generalized indicators here (because in live, it will call this + # function to populate indicators during training). Notice how we ensure not to + # add them multiple times + if set_generalized_indicators: + df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 + df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + + # Classifiers are typically set up with strings as targets: + df['&s-up_or_down_long'] = np.where( + df["close"].shift(-100) > df["close"], 'up_long', 'down_long') + df['&s-up_or_down_medium'] = np.where( + df["close"].shift(-50) > df["close"], 'up_medium', 'down_medium') + df['&s-up_or_down_short'] = np.where( + df["close"].shift(-20) > df["close"], 'up_short', 'down_short') + + # If user wishes to use multiple targets, they can add more by + # appending more columns with '&'. User should keep in mind that multi targets + # requires a multioutput prediction model such as + # templates/CatboostPredictionMultiModel.py, + + # df["&-s_range"] = ( + # df["close"] + # .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + # .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + # .max() + # - + # df["close"] + # .shift(-self.freqai_info["feature_parameters"]["label_period_candles"]) + # .rolling(self.freqai_info["feature_parameters"]["label_period_candles"]) + # .min() + # ) + + return df + + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + # All indicators must be populated by populate_any_indicators() for live functionality + # to work correctly. + + # the model will return all labels created by user in `populate_any_indicators` + # (& appended targets), an indication of whether or not the prediction should be accepted, + # the target mean/std values for each of the labels created by user in + # `populate_any_indicators()` for each training period. + + dataframe = self.freqai.start(dataframe, metadata, self) + for val in self.std_dev_multiplier_buy.range: + dataframe[f'target_roi_{val}'] = ( + dataframe["up_long_mean"] + dataframe["up_long_std"] * val + ) + for val in self.std_dev_multiplier_sell.range: + dataframe[f'sell_roi_{val}'] = ( + dataframe["down_long_mean"] - dataframe["down_long_std"] * val + ) + return dataframe + + def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: + + enter_long_conditions = [ + df["do_predict"] == 1, + df["up_long"] > df[f"target_roi_{self.std_dev_multiplier_buy.value}"], + ] + + if enter_long_conditions: + df.loc[ + reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"] + ] = (1, "long") + + enter_short_conditions = [ + df["do_predict"] == 1, + df["down_long"] < df[f"sell_roi_{self.std_dev_multiplier_sell.value}"], + ] + + if enter_short_conditions: + df.loc[ + reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"] + ] = (1, "short") + + return df + + def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame: + exit_long_conditions = [ + df["do_predict"] == 1, + df["down_long"] < df[f"sell_roi_{self.std_dev_multiplier_sell.value}"] * 0.25, + ] + if exit_long_conditions: + df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1 + + exit_short_conditions = [ + df["do_predict"] == 1, + df["up_long"] > df[f"target_roi_{self.std_dev_multiplier_buy.value}"] * 0.25, + ] + if exit_short_conditions: + df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1 + + return df + + def get_ticker_indicator(self): + return int(self.config["timeframe"][:-1]) + + def confirm_trade_entry( + self, + pair: str, + order_type: str, + amount: float, + rate: float, + time_in_force: str, + current_time, + entry_tag, + side: str, + **kwargs, + ) -> bool: + + df, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe) + last_candle = df.iloc[-1].squeeze() + + if side == "long": + if rate > (last_candle["close"] * (1 + 0.0025)): + return False + else: + if rate < (last_candle["close"] * (1 - 0.0025)): + return False + + return True diff --git a/user_data/strategies/config_test.json b/user_data/strategies/config_test.json new file mode 100644 index 000000000..5e508096d --- /dev/null +++ b/user_data/strategies/config_test.json @@ -0,0 +1,105 @@ +{ + "trading_mode": "futures", + "margin_mode": "isolated", + "max_open_trades": 5, + "stake_currency": "USDT", + "stake_amount": 200, + "tradable_balance_ratio": 1, + "fiat_display_currency": "USD", + "dry_run": true, + "timeframe": "3m", + "dry_run_wallet": 1000, + "cancel_open_orders_on_exit": true, + "unfilledtimeout": { + "entry": 10, + "exit": 30 + }, + "exchange": { + "name": "binance", + "key": "", + "secret": "", + "ccxt_config": {}, + "ccxt_async_config": {}, + "pair_whitelist": [ + "1INCH/USDT", + "ALGO/USDT" + ], + "pair_blacklist": [] + }, + "entry_pricing": { + "price_side": "same", + "use_order_book": true, + "order_book_top": 1, + "price_last_balance": 0.0, + "check_depth_of_market": { + "enabled": false, + "bids_to_ask_delta": 1 + } + }, + "exit_pricing": { + "price_side": "other", + "use_order_book": true, + "order_book_top": 1 + }, + "pairlists": [ + { + "method": "StaticPairList" + } + ], + "freqai": { + "enabled": true, + "purge_old_models": true, + "train_period_days": 15, + "backtest_period_days": 7, + "live_retrain_hours": 0, + "identifier": "uniqe-id", + "multitarget_parallel_training": true, + "feature_parameters": { + "include_timeframes": [ + "3m", + "15m", + "1h" + ], + "include_corr_pairlist": [ + "BTC/USDT", + "ETH/USDT" + ], + "label_period_candles": 20, + "include_shifted_candles": 2, + "DI_threshold": 0.9, + "weight_factor": 0.9, + "principal_component_analysis": false, + "use_SVM_to_remove_outliers": true, + "indicator_periods_candles": [ + 10, + 20 + ], + "plot_feature_importances": 0 + }, + "data_split_parameters": { + "test_size": 0.33, + "random_state": 1 + }, + "model_training_parameters": { + "n_estimators": 1000, + "early_stopping_rounds": 100 + } + }, + "api_server": { + "enabled": true, + "listen_ip_address": "127.0.0.1", + "listen_port": 8081, + "verbosity": "error", + "enable_openapi": false, + "jwt_secret_key": "test", + "CORS_origins": [], + "username": "test", + "password": "test" + }, + "bot_name": "", + "force_entry_enable": true, + "initial_state": "running", + "internals": { + "process_throttle_secs": 5 + } +}