From 2ad086dd7a3d9ba9af01edc2a2f8100c5972ee89 Mon Sep 17 00:00:00 2001 From: silur Date: Mon, 10 Oct 2022 14:35:07 +0200 Subject: [PATCH 1/7] add XGBoost random forest predictors to freqai --- .../prediction_models/XGBoostRFClassifier.py | 85 +++++++++++++++++++ .../prediction_models/XGBoostRFRegressor.py | 45 ++++++++++ 2 files changed, 130 insertions(+) create mode 100644 freqtrade/freqai/prediction_models/XGBoostRFClassifier.py create mode 100644 freqtrade/freqai/prediction_models/XGBoostRFRegressor.py diff --git a/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py b/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py new file mode 100644 index 000000000..1aba8df85 --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRFClassifier.py @@ -0,0 +1,85 @@ +import logging +from typing import Any, Dict, Tuple + +import numpy as np +import numpy.typing as npt +import pandas as pd +from pandas import DataFrame +from pandas.api.types import is_integer_dtype +from sklearn.preprocessing import LabelEncoder +from xgboost import XGBRFClassifier + +from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class XGBoostRFClassifier(BaseClassifierModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + X = data_dictionary["train_features"].to_numpy() + y = data_dictionary["train_labels"].to_numpy()[:, 0] + + le = LabelEncoder() + if not is_integer_dtype(y): + y = pd.Series(le.fit_transform(y), dtype="int64") + + if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0: + eval_set = None + else: + test_features = data_dictionary["test_features"].to_numpy() + test_labels = data_dictionary["test_labels"].to_numpy()[:, 0] + + if not is_integer_dtype(test_labels): + test_labels = pd.Series(le.transform(test_labels), dtype="int64") + + eval_set = [(test_features, test_labels)] + + train_weights = data_dictionary["train_weights"] + + init_model = self.get_init_model(dk.pair) + + model = XGBRFClassifier(**self.model_training_parameters) + + model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights, + xgb_model=init_model) + + return model + + def predict( + self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs + ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_df: Full dataframe for the current backtest period. + :return: + :pred_df: dataframe containing the predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + (pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs) + + le = LabelEncoder() + label = dk.label_list[0] + labels_before = list(dk.data['labels_std'].keys()) + labels_after = le.fit_transform(labels_before).tolist() + pred_df[label] = le.inverse_transform(pred_df[label]) + pred_df = pred_df.rename( + columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))}) + + return (pred_df, dk.do_predict) diff --git a/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py b/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py new file mode 100644 index 000000000..4c18d594d --- /dev/null +++ b/freqtrade/freqai/prediction_models/XGBoostRFRegressor.py @@ -0,0 +1,45 @@ +import logging +from typing import Any, Dict + +from xgboost import XGBRFRegressor + +from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + + +logger = logging.getLogger(__name__) + + +class XGBoostRFRegressor(BaseRegressionModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any: + """ + User sets up the training and test data to fit their desired model here + :param data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + + if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: + eval_set = None + else: + eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])] + eval_weights = [data_dictionary['test_weights']] + + sample_weight = data_dictionary["train_weights"] + + xgb_model = self.get_init_model(dk.pair) + + model = XGBRFRegressor(**self.model_training_parameters) + + model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set, + sample_weight_eval_set=eval_weights, xgb_model=xgb_model) + + return model From 30a45bb59746a658e8f9fc970ab2467fdb00443d Mon Sep 17 00:00:00 2001 From: silur Date: Tue, 11 Oct 2022 13:17:21 +0200 Subject: [PATCH 2/7] add XGBoostRF models to freqai test interface --- tests/freqai/test_freqai_interface.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index a61853c47..2f5ada90a 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -30,6 +30,7 @@ def is_mac() -> bool: @pytest.mark.parametrize('model', [ 'LightGBMRegressor', 'XGBoostRegressor', + 'XGBoostRFRegressor', 'CatboostRegressor', ]) def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model): @@ -111,6 +112,7 @@ def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model): 'LightGBMClassifier', 'CatboostClassifier', 'XGBoostClassifier', + 'XGBoostRFClassifier', ]) def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model): if is_arm() and model == 'CatboostClassifier': From 7f05b44376bd62afd019e124b5931568ddcd5190 Mon Sep 17 00:00:00 2001 From: Emre Date: Thu, 13 Oct 2022 23:01:09 +0300 Subject: [PATCH 3/7] Add eval set to CatboostClassifier --- .../freqai/prediction_models/CatboostClassifier.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index 2aebc3ebf..063c76d1b 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -30,6 +30,14 @@ class CatboostClassifier(BaseClassifierModel): label=data_dictionary["train_labels"], weight=data_dictionary["train_weights"], ) + if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0: + test_data = None + else: + test_data = Pool( + data=data_dictionary["test_features"], + label=data_dictionary["test_labels"], + weight=data_dictionary["test_weights"], + ) cbr = CatBoostClassifier( allow_writing_files=True, @@ -40,6 +48,6 @@ class CatboostClassifier(BaseClassifierModel): init_model = self.get_init_model(dk.pair) - cbr.fit(train_data, init_model=init_model) + cbr.fit(X=train_data, eval_set=test_data, init_model=init_model) return cbr From 498289728d0cc08e8821d83098cddb82bef9b19a Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 15 Oct 2022 08:48:41 +0200 Subject: [PATCH 4/7] Fix catboost tests polluting CWD --- tests/freqai/test_freqai_interface.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 445b718d2..6917bc64b 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -55,6 +55,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model): data_load_timerange = TimeRange.parse_timerange("20180125-20180130") new_timerange = TimeRange.parse_timerange("20180127-20180130") + freqai.dk.set_paths('ADA/BTC', None) freqai.extract_data_and_train_model( new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) @@ -93,6 +94,7 @@ def test_extract_data_and_train_model_MultiTargets(mocker, freqai_conf, model): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") + freqai.dk.set_paths('ADA/BTC', None) freqai.extract_data_and_train_model( new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) @@ -134,6 +136,7 @@ def test_extract_data_and_train_model_Classifiers(mocker, freqai_conf, model): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") + freqai.dk.set_paths('ADA/BTC', None) freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) From c8e6dad9cdec45493b999363032765b5fc5e0e8e Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 15 Oct 2022 12:00:20 +0200 Subject: [PATCH 5/7] use exit_reason to determine left open trades --- freqtrade/optimize/optimize_reports.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/freqtrade/optimize/optimize_reports.py b/freqtrade/optimize/optimize_reports.py index 8dafe2e41..c406f866b 100644 --- a/freqtrade/optimize/optimize_reports.py +++ b/freqtrade/optimize/optimize_reports.py @@ -408,10 +408,10 @@ def generate_strategy_stats(pairlist: List[str], exit_reason_stats = generate_exit_reason_stats(max_open_trades=max_open_trades, results=results) - left_open_results = generate_pair_metrics(pairlist, stake_currency=stake_currency, - starting_balance=start_balance, - results=results.loc[results['is_open']], - skip_nan=True) + left_open_results = generate_pair_metrics( + pairlist, stake_currency=stake_currency, starting_balance=start_balance, + results=results.loc[results['exit_reason'] == 'force_exit'], skip_nan=True) + daily_stats = generate_daily_stats(results) trade_stats = generate_trading_stats(results) best_pair = max([pair for pair in pair_results if pair['key'] != 'TOTAL'], From 05ca725e4def506dd5727cd831b5ee3353eca932 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 15 Oct 2022 12:07:22 +0200 Subject: [PATCH 6/7] Remove no longer needed local state --- freqtrade/optimize/backtesting.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 720069f84..0d5910a62 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -919,11 +919,10 @@ class Backtesting: return trade def handle_left_open(self, open_trades: Dict[str, List[LocalTrade]], - data: Dict[str, List[Tuple]]) -> List[LocalTrade]: + data: Dict[str, List[Tuple]]) -> None: """ Handling of left open trades at the end of backtesting """ - trades = [] for pair in open_trades.keys(): if len(open_trades[pair]) > 0: for trade in open_trades[pair]: @@ -938,11 +937,6 @@ class Backtesting: trade.exit_reason = ExitType.FORCE_EXIT.value trade.close(exit_row[OPEN_IDX], show_msg=False) LocalTrade.close_bt_trade(trade) - # Deepcopy object to have wallets update correctly - trade1 = deepcopy(trade) - trade1.is_open = True - trades.append(trade1) - return trades def trade_slot_available(self, max_open_trades: int, open_trade_count: int) -> bool: # Always allow trades when max_open_trades is enabled. @@ -1094,7 +1088,6 @@ class Backtesting: :param enable_protections: Should protections be enabled? :return: DataFrame with trades (results of backtesting) """ - trades: List[LocalTrade] = [] self.prepare_backtest(enable_protections) # Ensure wallets are uptodate (important for --strategy-list) self.wallets.update() @@ -1188,7 +1181,6 @@ class Backtesting: open_trade_count -= 1 open_trades[pair].remove(trade) LocalTrade.close_bt_trade(trade) - trades.append(trade) self.wallets.update() self.run_protections( enable_protections, pair, current_time, trade.trade_direction) @@ -1197,10 +1189,10 @@ class Backtesting: self.progress.increment() current_time += timedelta(minutes=self.timeframe_min) - trades += self.handle_left_open(open_trades, data=data) + self.handle_left_open(open_trades, data=data) self.wallets.update() - results = trade_list_to_dataframe(trades) + results = trade_list_to_dataframe(LocalTrade.trades) return { 'results': results, 'config': self.strategy.config, From b6c096d3bc5aaf10edb08c40aa895f5612c1bd5c Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 15 Oct 2022 12:08:58 +0200 Subject: [PATCH 7/7] Simplify backtest condition --- freqtrade/optimize/backtesting.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 0d5910a62..0dd99aea3 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -924,19 +924,18 @@ class Backtesting: Handling of left open trades at the end of backtesting """ for pair in open_trades.keys(): - if len(open_trades[pair]) > 0: - for trade in open_trades[pair]: - if trade.open_order_id and trade.nr_of_successful_entries == 0: - # Ignore trade if entry-order did not fill yet - continue - exit_row = data[pair][-1] - self._exit_trade(trade, exit_row, exit_row[OPEN_IDX], trade.amount) - trade.orders[-1].close_bt_order(exit_row[DATE_IDX].to_pydatetime(), trade) + for trade in open_trades[pair]: + if trade.open_order_id and trade.nr_of_successful_entries == 0: + # Ignore trade if entry-order did not fill yet + continue + exit_row = data[pair][-1] + self._exit_trade(trade, exit_row, exit_row[OPEN_IDX], trade.amount) + trade.orders[-1].close_bt_order(exit_row[DATE_IDX].to_pydatetime(), trade) - trade.close_date = exit_row[DATE_IDX].to_pydatetime() - trade.exit_reason = ExitType.FORCE_EXIT.value - trade.close(exit_row[OPEN_IDX], show_msg=False) - LocalTrade.close_bt_trade(trade) + trade.close_date = exit_row[DATE_IDX].to_pydatetime() + trade.exit_reason = ExitType.FORCE_EXIT.value + trade.close(exit_row[OPEN_IDX], show_msg=False) + LocalTrade.close_bt_trade(trade) def trade_slot_available(self, max_open_trades: int, open_trade_count: int) -> bool: # Always allow trades when max_open_trades is enabled.