From 60eb02bb62f5952695a69d9eef531d33d91727bf Mon Sep 17 00:00:00 2001 From: Emre Date: Sat, 10 Sep 2022 20:13:16 +0300 Subject: [PATCH 1/8] Add XGBoostClassifier --- .../prediction_models/XGBoostClassifier.py | 85 +++++++++++++++++++ tests/freqai/test_freqai_interface.py | 30 +++++++ 2 files changed, 115 insertions(+) create mode 100644 freqtrade/freqai/prediction_models/XGBoostClassifier.py diff --git a/freqtrade/freqai/prediction_models/XGBoostClassifier.py b/freqtrade/freqai/prediction_models/XGBoostClassifier.py new file mode 100644 index 000000000..8bf5d6281 --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostClassifier.py @@ -0,0 +1,85 @@ +import logging +from typing import Any, Dict, Tuple + +import numpy as np +import numpy.typing as npt +import pandas as pd +from pandas import DataFrame +from pandas.api.types import is_integer_dtype +from sklearn.preprocessing import LabelEncoder +from xgboost import XGBClassifier + +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class XGBoostClassifier(BaseClassifierModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + X = data_dictionary["train_features"].to_numpy() + y = data_dictionary["train_labels"].to_numpy()[:, 0] + + le = LabelEncoder() + if not is_integer_dtype(y): + y = pd.Series(le.fit_transform(y), dtype="int64") + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: + eval_set = None + else: + test_features = data_dictionary["test_features"].to_numpy() + test_labels = data_dictionary["test_labels"].to_numpy()[:, 0] + + if not is_integer_dtype(test_labels): + test_labels = pd.Series(le.transform(test_labels), dtype="int64") + + eval_set = [(test_features, test_labels)] + + train_weights = data_dictionary["train_weights"] + + init_model = self.get_init_model(dk.pair) + + model = XGBClassifier(**self.model_training_parameters) + + model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, + xgb_model=init_model) + + return model + + def predict( + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs + ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_df: Full dataframe for the current backtest period. + :return: + :pred_df: dataframe containing the predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + (pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs) + + le = LabelEncoder() + label = dk.label_list[0] + labels_before = list(dk.data['labels_std'].keys()) + labels_after = le.fit_transform(labels_before).tolist() + pred_df[label] = le.inverse_transform(pred_df[label]) + pred_df = pred_df.rename( + columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}) + + return (pred_df, dk.do_predict) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 5f8eeb086..afcc4fd37 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -174,6 +174,36 @@ def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) +def test_extract_data_and_train_model_XGBoostClassifier(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.update({"freqaimodel": "XGBoostClassifier"}) + freqai_conf.update({"strategy": "freqai_test_classifier"}) + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) + + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists() + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists() + + def test_extract_data_and_train_model_XGBoostRegressor(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "XGBoostRegressor"}) From 72d197a99d0570ae9d3607042d7ec5a533907c8e Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 11 Sep 2022 15:42:27 +0200 Subject: [PATCH 2/8] Run first epoch in non-parallel mode this allows dataprovider to load it's cache. closes #7384 --- freqtrade/data/dataprovider.py | 4 +++- freqtrade/optimize/hyperopt.py | 21 +++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/freqtrade/data/dataprovider.py b/freqtrade/data/dataprovider.py index e639b3ae7..c6519d2b8 100644 --- a/freqtrade/data/dataprovider.py +++ b/freqtrade/data/dataprovider.py @@ -196,7 +196,9 @@ class DataProvider: Clear pair dataframe cache. """ self.__cached_pairs = {} - self.__cached_pairs_backtesting = {} + # Don't reset backtesting pairs - + # otherwise they're reloaded each time during hyperopt due to with analyze_per_epoch + # self.__cached_pairs_backtesting = {} self.__slice_index = 0 # Exchange functions diff --git a/freqtrade/optimize/hyperopt.py b/freqtrade/optimize/hyperopt.py index 76fc84609..b0119368f 100644 --- a/freqtrade/optimize/hyperopt.py +++ b/freqtrade/optimize/hyperopt.py @@ -580,11 +580,24 @@ class Hyperopt: max_value=self.total_epochs, redirect_stdout=False, redirect_stderr=False, widgets=widgets ) as pbar: - EVALS = ceil(self.total_epochs / jobs) - for i in range(EVALS): + start = 0 + + if self.analyze_per_epoch: + # First analysis not in parallel mode when using --analyze-per-epoch. + # This allows dataprovider to load it's informative cache. + asked, is_random = self.get_asked_points(n_points=1) + # print(asked) + f_val = self.generate_optimizer(asked[0]) + self.opt.tell(asked, [f_val['loss']]) + self.evaluate_result(f_val, 1, is_random[0]) + pbar.update(1) + start += 1 + + evals = ceil((self.total_epochs - start) / jobs) + for i in range(evals): # Correct the number of epochs to be processed for the last # iteration (should not exceed self.total_epochs in total) - n_rest = (i + 1) * jobs - self.total_epochs + n_rest = (i + 1) * jobs - (self.total_epochs - start) current_jobs = jobs - n_rest if n_rest > 0 else jobs asked, is_random = self.get_asked_points(n_points=current_jobs) @@ -594,7 +607,7 @@ class Hyperopt: # Calculate progressbar outputs for j, val in enumerate(f_val): # Use human-friendly indexes here (starting from 1) - current = i * jobs + j + 1 + current = i * jobs + j + 1 + start self.evaluate_result(val, current, is_random[j]) From 816c1f760373f6fc55710cb2e3f09ae39eb14fc5 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 11 Sep 2022 17:51:30 +0200 Subject: [PATCH 3/8] add test for per epoch hyperopt --- tests/optimize/test_hyperopt.py | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/tests/optimize/test_hyperopt.py b/tests/optimize/test_hyperopt.py index 0f615b7a3..eaea8aee7 100644 --- a/tests/optimize/test_hyperopt.py +++ b/tests/optimize/test_hyperopt.py @@ -922,6 +922,45 @@ def test_in_strategy_auto_hyperopt_with_parallel(mocker, hyperopt_conf, tmpdir, hyperopt.start() +def test_in_strategy_auto_hyperopt_per_epoch(mocker, hyperopt_conf, tmpdir, fee) -> None: + patch_exchange(mocker) + mocker.patch('freqtrade.exchange.Exchange.get_fee', fee) + (Path(tmpdir) / 'hyperopt_results').mkdir(parents=True) + + hyperopt_conf.update({ + 'strategy': 'HyperoptableStrategy', + 'user_data_dir': Path(tmpdir), + 'hyperopt_random_state': 42, + 'spaces': ['all'], + 'epochs': 3, + 'analyze_per_epoch': True, + }) + go = mocker.patch('freqtrade.optimize.hyperopt.Hyperopt.generate_optimizer', + return_value={ + 'loss': 0.05, + 'results_explanation': 'foo result', 'params': {}, + 'results_metrics': generate_result_metrics(), + }) + hyperopt = Hyperopt(hyperopt_conf) + hyperopt.backtesting.exchange.get_max_leverage = MagicMock(return_value=1.0) + assert isinstance(hyperopt.custom_hyperopt, HyperOptAuto) + assert isinstance(hyperopt.backtesting.strategy.buy_rsi, IntParameter) + assert hyperopt.backtesting.strategy.bot_loop_started is True + + assert hyperopt.backtesting.strategy.buy_rsi.in_space is True + assert hyperopt.backtesting.strategy.buy_rsi.value == 35 + assert hyperopt.backtesting.strategy.sell_rsi.value == 74 + assert hyperopt.backtesting.strategy.protection_cooldown_lookback.value == 30 + buy_rsi_range = hyperopt.backtesting.strategy.buy_rsi.range + assert isinstance(buy_rsi_range, range) + # Range from 0 - 50 (inclusive) + assert len(list(buy_rsi_range)) == 51 + + hyperopt.start() + # backtesting should be called 3 times (once per epoch) + assert go.call_count == 3 + + def test_SKDecimal(): space = SKDecimal(1, 2, decimals=2) assert 1.5 in space From 982c0315fa2ba909362173c2e892aa7bca2c836b Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 11 Sep 2022 19:31:11 +0200 Subject: [PATCH 4/8] Rename variable --- freqtrade/optimize/hyperopt.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/freqtrade/optimize/hyperopt.py b/freqtrade/optimize/hyperopt.py index b0119368f..f15e0b7d8 100644 --- a/freqtrade/optimize/hyperopt.py +++ b/freqtrade/optimize/hyperopt.py @@ -586,10 +586,9 @@ class Hyperopt: # First analysis not in parallel mode when using --analyze-per-epoch. # This allows dataprovider to load it's informative cache. asked, is_random = self.get_asked_points(n_points=1) - # print(asked) - f_val = self.generate_optimizer(asked[0]) - self.opt.tell(asked, [f_val['loss']]) - self.evaluate_result(f_val, 1, is_random[0]) + f_val0 = self.generate_optimizer(asked[0]) + self.opt.tell(asked, [f_val0['loss']]) + self.evaluate_result(f_val0, 1, is_random[0]) pbar.update(1) start += 1 From 7b6e465d57e392114d8cca2df24c18c5d4c3e4fe Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 13 Sep 2022 21:28:11 +0200 Subject: [PATCH 5/8] Remove gate live test skip --- tests/exchange/test_ccxt_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/exchange/test_ccxt_compat.py b/tests/exchange/test_ccxt_compat.py index f57b0b366..82be6196a 100644 --- a/tests/exchange/test_ccxt_compat.py +++ b/tests/exchange/test_ccxt_compat.py @@ -297,7 +297,7 @@ class TestCCXTExchange(): def test_ccxt__async_get_candle_history(self, exchange): exchange, exchangename = exchange # For some weired reason, this test returns random lengths for bittrex. - if not exchange._ft_has['ohlcv_has_history'] or exchangename in ('bittrex', 'gateio'): + if not exchange._ft_has['ohlcv_has_history'] or exchangename in ('bittrex'): return pair = EXCHANGES[exchangename]['pair'] timeframe = EXCHANGES[exchangename]['timeframe'] From 37dd22c89e2e50bce49ffce17277dc06d2abec8b Mon Sep 17 00:00:00 2001 From: initrv Date: Wed, 14 Sep 2022 03:40:13 +0300 Subject: [PATCH 6/8] Fixed a bug that prevents clearing old models Corrects the error of clearing old models when the model directory contains directories with names that do not match a regular expression --- freqtrade/freqai/data_drawer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 9eeabef8f..1c091f1be 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -355,7 +355,7 @@ class FreqaiDataDrawer: for dir in model_folders: result = pattern.match(str(dir.name)) if result is None: - break + continue coin = result.group(1) timestamp = result.group(2) From 49800e4cc300eb39b412287a05480a98962c354b Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 14 Sep 2022 06:55:05 +0200 Subject: [PATCH 7/8] pin ci python to 3.10.6 for now --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bb5bc209e..91d53044d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: os: [ ubuntu-18.04, ubuntu-20.04, ubuntu-22.04 ] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10.6"] steps: - uses: actions/checkout@v3 @@ -121,7 +121,7 @@ jobs: strategy: matrix: os: [ macos-latest ] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10.6"] steps: - uses: actions/checkout@v3 @@ -205,7 +205,7 @@ jobs: strategy: matrix: os: [ windows-latest ] - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10.6"] steps: - uses: actions/checkout@v3 From 91bc3d11618c3f5bf47a8a71166544cf4e0b7a0f Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 14 Sep 2022 07:04:14 +0200 Subject: [PATCH 8/8] Update docs aroudn use_exit_signal close #7413 --- docs/strategy-customization.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/strategy-customization.md b/docs/strategy-customization.md index 2b6e1fb2f..b97bd6d23 100644 --- a/docs/strategy-customization.md +++ b/docs/strategy-customization.md @@ -264,7 +264,8 @@ def populate_entry_trend(self, dataframe: DataFrame, metadata: dict) -> DataFram ### Exit signal rules Edit the method `populate_exit_trend()` into your strategy file to update your exit strategy. -Please note that the exit-signal is only used if `use_exit_signal` is set to true in the configuration. +The exit-signal is only used for exits if `use_exit_signal` is set to true in the configuration. +`use_exit_signal` will not influence [signal collision rules](#colliding-signals) - which will still apply and can prevent entries. It's important to always return the dataframe without removing/modifying the columns `"open", "high", "low", "close", "volume"`, otherwise these fields would contain something unexpected.