From eb8bde37c16d201cae16fa9f13883924cd8e697a Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 6 Aug 2022 17:51:21 +0200 Subject: [PATCH] Add lightgbm classifier, add classifier check test, fix classifier bug. --- freqtrade/freqai/freqai_interface.py | 2 + .../prediction_models/CatboostClassifier.py | 3 - .../prediction_models/LightGBMClassifier.py | 38 +++++ freqtrade/templates/FreqaiExampleStrategy.py | 4 + tests/freqai/test_freqai_interface.py | 63 ++++++++ tests/rpc/test_rpc_apiserver.py | 1 + .../strategy/strats/freqai_test_classifier.py | 138 ++++++++++++++++++ tests/strategy/test_strategy_loading.py | 6 +- 8 files changed, 249 insertions(+), 6 deletions(-) create mode 100644 freqtrade/freqai/prediction_models/LightGBMClassifier.py create mode 100644 tests/strategy/strats/freqai_test_classifier.py diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 248484b05..097ecf553 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -566,6 +566,8 @@ class IFreqaiModel(ABC): num_candles = self.freqai_info.get("fit_live_predictions_candles", 100) dk.data["labels_mean"], dk.data["labels_std"] = {}, {} for label in dk.label_list: + if self.dd.historic_predictions[dk.pair][label].dtype == object: + continue f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles)) dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1] diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index d003744fb..ac1386eee 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -32,9 +32,6 @@ class CatboostClassifier(BaseRegressionModel): cbr = CatBoostClassifier( allow_writing_files=False, - gpu_ram_part=0.5, - verbose=100, - early_stopping_rounds=400, loss_function='MultiClass', **self.model_training_parameters, ) diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifier.py b/freqtrade/freqai/prediction_models/LightGBMClassifier.py new file mode 100644 index 000000000..782dbce35 --- /dev/null +++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py @@ -0,0 +1,38 @@ +import logging +from typing import Any, Dict + +from lightgbm import LGBMClassifier + +from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel + + +logger = logging.getLogger(__name__) + + +class LightGBMClassifier(BaseRegressionModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict) -> Any: + """ + User sets up the training and test data to fit their desired model here + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: + eval_set = None + else: + eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + + model = LGBMClassifier(**self.model_training_parameters) + + model.fit(X=X, y=y, eval_set=eval_set) + + return model diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 4f632f907..ee1602725 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -155,6 +155,10 @@ class FreqaiExampleStrategy(IStrategy): - 1 ) + # Classifiers are typically set up with strings as targets: + # df['&s-up_or_down'] = np.where( df["close"].shift(-100) > + # df["close"], 'up', 'down') + # If user wishes to use multiple targets, they can add more by # appending more columns with '&'. User should keep in mind that multi targets # requires a multioutput prediction model such as diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 1f96cf6df..676f451ab 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -103,6 +103,69 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) +@pytest.mark.skipif("arm" in platform.uname()[-1], reason="no ARM for Catboost ...") +def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "CatboostClassifier"}) + freqai_conf.update({"strategy": "freqai_test_classifier"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.train_model_in_series(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) + + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists() + + shutil.rmtree(Path(freqai.dk.full_path)) + + +def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "LightGBMClassifier"}) + freqai_conf.update({"strategy": "freqai_test_classifier"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.train_model_in_series(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) + + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists() + + shutil.rmtree(Path(freqai.dk.full_path)) + + def test_start_backtesting(mocker, freqai_conf): freqai_conf.update({"timerange": "20180120-20180130"}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) diff --git a/tests/rpc/test_rpc_apiserver.py b/tests/rpc/test_rpc_apiserver.py index e6b7c4dd2..2b66ed332 100644 --- a/tests/rpc/test_rpc_apiserver.py +++ b/tests/rpc/test_rpc_apiserver.py @@ -1403,6 +1403,7 @@ def test_api_strategies(botclient): 'StrategyTestV2', 'StrategyTestV3', 'StrategyTestV3Futures', + 'freqai_test_classifier', 'freqai_test_multimodel_strat', 'freqai_test_strat' ]} diff --git a/tests/strategy/strats/freqai_test_classifier.py b/tests/strategy/strats/freqai_test_classifier.py new file mode 100644 index 000000000..c333ac818 --- /dev/null +++ b/tests/strategy/strats/freqai_test_classifier.py @@ -0,0 +1,138 @@ +import logging +from functools import reduce + +import pandas as pd +import talib.abstract as ta +from pandas import DataFrame +import numpy as np +from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair + + +logger = logging.getLogger(__name__) + + +class freqai_test_classifier(IStrategy): + """ + Test strategy - used for testing freqAI functionalities. + DO not use in production. + """ + + minimal_roi = {"0": 0.1, "240": -1} + + plot_config = { + "main_plot": {}, + "subplots": { + "prediction": {"prediction": {"color": "blue"}}, + "target_roi": { + "target_roi": {"color": "brown"}, + }, + "do_predict": { + "do_predict": {"color": "brown"}, + }, + }, + } + + process_only_new_candles = True + stoploss = -0.05 + use_exit_signal = True + startup_candle_count: int = 300 + can_short = False + + linear_roi_offset = DecimalParameter( + 0.00, 0.02, default=0.005, space="sell", optimize=False, load=True + ) + max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True) + + def informative_pairs(self): + whitelist_pairs = self.dp.current_whitelist() + corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"] + informative_pairs = [] + for tf in self.config["freqai"]["feature_parameters"]["include_timeframes"]: + for pair in whitelist_pairs: + informative_pairs.append((pair, tf)) + for pair in corr_pairs: + if pair in whitelist_pairs: + continue # avoid duplication + informative_pairs.append((pair, tf)) + return informative_pairs + + def populate_any_indicators( + self, pair, df, tf, informative=None, set_generalized_indicators=False + ): + + coin = pair.split('/')[0] + + with self.freqai.lock: + if informative is None: + informative = self.dp.get_pair_dataframe(pair, tf) + + # first loop is automatically duplicating indicators for time periods + for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]: + + t = int(t) + informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t) + informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t) + informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t) + + informative[f"%-{coin}pct-change"] = informative["close"].pct_change() + informative[f"%-{coin}raw_volume"] = informative["volume"] + informative[f"%-{coin}raw_price"] = informative["close"] + + indicators = [col for col in informative if col.startswith("%")] + # This loop duplicates and shifts all indicators to add a sense of recency to data + for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1): + if n == 0: + continue + informative_shift = informative[indicators].shift(n) + informative_shift = informative_shift.add_suffix("_shift-" + str(n)) + informative = pd.concat((informative, informative_shift), axis=1) + + df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True) + skip_columns = [ + (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"] + ] + df = df.drop(columns=skip_columns) + + # Add generalized indicators here (because in live, it will call this + # function to populate indicators during training). Notice how we ensure not to + # add them multiple times + if set_generalized_indicators: + df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 + df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + + # user adds targets here by prepending them with &- (see convention below) + # If user wishes to use multiple targets, a multioutput prediction model + # needs to be used such as templates/CatboostPredictionMultiModel.py + df['&s-up_or_down'] = np.where(df["close"].shift(-100) > df["close"], 'up', 'down') + + return df + + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + self.freqai_info = self.config["freqai"] + + dataframe = self.freqai.start(dataframe, metadata, self) + + return dataframe + + def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: + + enter_long_conditions = [df['&s-up_or_down'] == 'up'] + + if enter_long_conditions: + df.loc[ + reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"] + ] = (1, "long") + + enter_short_conditions = [df['&s-up_or_down'] == 'down'] + + if enter_short_conditions: + df.loc[ + reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"] + ] = (1, "short") + + return df + + def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame: + + return df diff --git a/tests/strategy/test_strategy_loading.py b/tests/strategy/test_strategy_loading.py index aaad26e5b..5b6f15d11 100644 --- a/tests/strategy/test_strategy_loading.py +++ b/tests/strategy/test_strategy_loading.py @@ -34,7 +34,7 @@ def test_search_all_strategies_no_failed(): directory = Path(__file__).parent / "strats" strategies = StrategyResolver.search_all_objects(directory, enum_failed=False) assert isinstance(strategies, list) - assert len(strategies) == 8 + assert len(strategies) == 9 assert isinstance(strategies[0], dict) @@ -42,10 +42,10 @@ def test_search_all_strategies_with_failed(): directory = Path(__file__).parent / "strats" strategies = StrategyResolver.search_all_objects(directory, enum_failed=True) assert isinstance(strategies, list) - assert len(strategies) == 9 + assert len(strategies) == 10 # with enum_failed=True search_all_objects() shall find 2 good strategies # and 1 which fails to load - assert len([x for x in strategies if x['class'] is not None]) == 8 + assert len([x for x in strategies if x['class'] is not None]) == 9 assert len([x for x in strategies if x['class'] is None]) == 1