From 5a4e99b413f84f818662cc3012819db76aec47c1 Mon Sep 17 00:00:00 2001 From: froggleston Date: Mon, 5 Dec 2022 15:34:31 +0000 Subject: [PATCH] Add support for collating and analysing rejected trades in backtest --- docs/advanced-backtesting.md | 37 ++++++- docs/utils.md | 23 +++-- freqtrade/commands/arguments.py | 3 +- freqtrade/commands/cli_options.py | 17 +++- freqtrade/configuration/configuration.py | 9 ++ freqtrade/data/entryexitanalysis.py | 118 +++++++++++++++++------ freqtrade/optimize/backtesting.py | 64 +++++++++--- freqtrade/optimize/optimize_reports.py | 28 ++++-- tests/data/test_entryexitanalysis.py | 12 ++- 9 files changed, 254 insertions(+), 57 deletions(-) diff --git a/docs/advanced-backtesting.md b/docs/advanced-backtesting.md index ae3eb2e4e..779844cfe 100644 --- a/docs/advanced-backtesting.md +++ b/docs/advanced-backtesting.md @@ -29,7 +29,7 @@ If all goes well, you should now see a `backtest-result-{timestamp}_signals.pkl` `user_data/backtest_results` folder. To analyze the entry/exit tags, we now need to use the `freqtrade backtesting-analysis` command -with `--analysis-groups` option provided with space-separated arguments (default `0 1 2`): +with `--analysis-groups` option provided with space-separated arguments: ``` bash freqtrade backtesting-analysis -c --analysis-groups 0 1 2 3 4 @@ -39,6 +39,7 @@ This command will read from the last backtesting results. The `--analysis-groups used to specify the various tabular outputs showing the profit fo each group or trade, ranging from the simplest (0) to the most detailed per pair, per buy and per sell tag (4): +* 0: overall winrate and profit summary by enter_tag * 1: profit summaries grouped by enter_tag * 2: profit summaries grouped by enter_tag and exit_tag * 3: profit summaries grouped by pair and enter_tag @@ -114,3 +115,37 @@ For example, if your backtest timerange was `20220101-20221231` but you only wan ```bash freqtrade backtesting-analysis -c --timerange 20220101-20220201 ``` + +### Printing out rejected trades + +Use the `--rejected` option to print out rejected trades. + +```bash +freqtrade backtesting-analysis -c --rejected +``` + +### Writing tables to CSV + +Some of the tabular outputs can become large, so printing them out to the terminal is not preferable. +Use the `--analysis-to-csv` option to disable printing out of tables to standard out and write them to CSV files. + +```bash +freqtrade backtesting-analysis -c --analysis-to-csv +``` + +By default this will write one file per output table you specified in the `backtesting-analysis` command, e.g. + +```bash +freqtrade backtesting-analysis -c --analysis-to-csv --rejected --analysis-groups 0 1 +``` + +This will write to `user_data/backtest_results`: +* rejected.csv +* group_0.csv +* group_1.csv + +To override where the files will be written, also specify the `--analysis-csv-path` option. + +```bash +freqtrade backtesting-analysis -c --analysis-to-csv --analysis-csv-path another/data/path/ +``` diff --git a/docs/utils.md b/docs/utils.md index e717a0f9c..24639e81e 100644 --- a/docs/utils.md +++ b/docs/utils.md @@ -723,6 +723,9 @@ usage: freqtrade backtesting-analysis [-h] [-v] [--logfile FILE] [-V] [--exit-reason-list EXIT_REASON_LIST [EXIT_REASON_LIST ...]] [--indicator-list INDICATOR_LIST [INDICATOR_LIST ...]] [--timerange YYYYMMDD-[YYYYMMDD]] + [--rejected] + [--analysis-to-csv] + [--analysis-csv-path PATH] optional arguments: -h, --help show this help message and exit @@ -736,19 +739,27 @@ optional arguments: pair and enter_tag, 4: by pair, enter_ and exit_tag (this can get quite large) --enter-reason-list ENTER_REASON_LIST [ENTER_REASON_LIST ...] - Comma separated list of entry signals to analyse. - Default: all. e.g. 'entry_tag_a,entry_tag_b' + Space separated list of entry signals to analyse. + Default: all. e.g. 'entry_tag_a entry_tag_b' --exit-reason-list EXIT_REASON_LIST [EXIT_REASON_LIST ...] - Comma separated list of exit signals to analyse. + Space separated list of exit signals to analyse. Default: all. e.g. - 'exit_tag_a,roi,stop_loss,trailing_stop_loss' + 'exit_tag_a roi stop_loss trailing_stop_loss' --indicator-list INDICATOR_LIST [INDICATOR_LIST ...] - Comma separated list of indicators to analyse. e.g. - 'close,rsi,bb_lowerband,profit_abs' + Space separated list of indicators to analyse. e.g. + 'close rsi bb_lowerband profit_abs' --timerange YYYYMMDD-[YYYYMMDD] Timerange to filter trades for analysis, start inclusive, end exclusive. e.g. 20220101-20220201 + --rejected + Print out rejected trades table + --analysis-to-csv + Write out tables to individual CSVs, by default to + 'user_data/backtest_results' unless '--analysis-csv-path' is given. + --analysis-csv-path [PATH] + Optional path where individual CSVs will be written. If not used, + CSVs will be written to 'user_data/backtest_results'. Common arguments: -v, --verbose Verbose mode (-vv for more, -vvv to get all messages). diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py index b53a1022d..b53aec1fb 100644 --- a/freqtrade/commands/arguments.py +++ b/freqtrade/commands/arguments.py @@ -106,7 +106,8 @@ ARGS_HYPEROPT_SHOW = ["hyperopt_list_best", "hyperopt_list_profitable", "hyperop "disableparamexport", "backtest_breakdown"] ARGS_ANALYZE_ENTRIES_EXITS = ["exportfilename", "analysis_groups", "enter_reason_list", - "exit_reason_list", "indicator_list", "timerange"] + "exit_reason_list", "indicator_list", "timerange", + "analysis_rejected", "analysis_to_csv", "analysis_csv_path"] NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list-timeframes", "list-markets", "list-pairs", "list-strategies", "list-freqaimodels", diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py index 91ac16365..6b019b0d6 100644 --- a/freqtrade/commands/cli_options.py +++ b/freqtrade/commands/cli_options.py @@ -634,7 +634,7 @@ AVAILABLE_CLI_OPTIONS = { "3: by pair and enter_tag, " "4: by pair, enter_ and exit_tag (this can get quite large)"), nargs='+', - default=['0', '1', '2'], + default=[], choices=['0', '1', '2', '3', '4'], ), "enter_reason_list": Arg( @@ -658,6 +658,21 @@ AVAILABLE_CLI_OPTIONS = { nargs='+', default=[], ), + "analysis_rejected": Arg( + '--rejected', + help='Analyse rejected trades', + action='store_true', + ), + "analysis_to_csv": Arg( + '--analysis-to-csv', + help='Save selected analysis tables to individual CSVs', + action='store_true', + ), + "analysis_csv_path": Arg( + '--analysis-csv-path', + help=("Specify a path to save the analysis CSVs " + "if --analysis-to-csv is enabled. Default: user_data/basktesting_results/"), + ), "freqaimodel": Arg( '--freqaimodel', help='Specify a custom freqaimodels.', diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py index 664610f33..6698008cc 100644 --- a/freqtrade/configuration/configuration.py +++ b/freqtrade/configuration/configuration.py @@ -465,6 +465,15 @@ class Configuration: self._args_to_config(config, argname='timerange', logstring='Filter trades by timerange: {}') + self._args_to_config(config, argname='analysis_rejected', + logstring='Analyse rejected trades: {}') + + self._args_to_config(config, argname='analysis_to_csv', + logstring='Store analysis tables to CSV: {}') + + self._args_to_config(config, argname='analysis_csv_path', + logstring='Path to store analysis CSVs: {}') + def _process_runmode(self, config: Config) -> None: self._args_to_config(config, argname='dry_run', diff --git a/freqtrade/data/entryexitanalysis.py b/freqtrade/data/entryexitanalysis.py index 565a279b1..0343da6e6 100755 --- a/freqtrade/data/entryexitanalysis.py +++ b/freqtrade/data/entryexitanalysis.py @@ -15,22 +15,30 @@ from freqtrade.exceptions import OperationalException logger = logging.getLogger(__name__) -def _load_signal_candles(backtest_dir: Path): +def _load_backtest_analysis_data(backtest_dir: Path, name: str): if backtest_dir.is_dir(): scpf = Path(backtest_dir, - Path(get_latest_backtest_filename(backtest_dir)).stem + "_signals.pkl" + Path(get_latest_backtest_filename(backtest_dir)).stem + "_" + name + ".pkl" ) else: - scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_signals.pkl") + scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_{name}.pkl") try: scp = open(scpf, "rb") - signal_candles = joblib.load(scp) - logger.info(f"Loaded signal candles: {str(scpf)}") + rejected_trades = joblib.load(scp) + logger.info(f"Loaded {name} data: {str(scpf)}") except Exception as e: - logger.error("Cannot load signal candles from pickled results: ", e) + logger.error(f"Cannot load {name} data from pickled results: ", e) - return signal_candles + return rejected_trades + + +def _load_rejected_trades(backtest_dir: Path): + return _load_backtest_analysis_data(backtest_dir, "rejected") + + +def _load_signal_candles(backtest_dir: Path): + return _load_backtest_analysis_data(backtest_dir, "signals") def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_candles): @@ -43,9 +51,9 @@ def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_cand for pair in pairlist: if pair in signal_candles[strategy_name]: analysed_trades_dict[strategy_name][pair] = _analyze_candles_and_indicators( - pair, - trades, - signal_candles[strategy_name][pair]) + pair, + trades, + signal_candles[strategy_name][pair]) except Exception as e: print(f"Cannot process entry/exit reasons for {strategy_name}: ", e) @@ -85,7 +93,7 @@ def _analyze_candles_and_indicators(pair, trades, signal_candles): return pd.DataFrame() -def _do_group_table_output(bigdf, glist): +def _do_group_table_output(bigdf, glist, to_csv=False, csv_path=None): for g in glist: # 0: summary wins/losses grouped by enter tag if g == "0": @@ -116,7 +124,8 @@ def _do_group_table_output(bigdf, glist): sortcols = ['total_num_buys'] - _print_table(new, sortcols, show_index=True) + _print_table(new, sortcols, show_index=True, name="Group 0:", + to_csv=to_csv, csv_path=csv_path) else: agg_mask = {'profit_abs': ['count', 'sum', 'median', 'mean'], @@ -148,11 +157,23 @@ def _do_group_table_output(bigdf, glist): new['mean_profit_pct'] = new['mean_profit_pct'] * 100 new['total_profit_pct'] = new['total_profit_pct'] * 100 - _print_table(new, sortcols) + _print_table(new, sortcols, name=f"Group {g}:", + to_csv=to_csv, csv_path=csv_path) else: logger.warning("Invalid group mask specified.") +def _do_rejected_trades_output(rejected_trades_df, to_csv=False, csv_path=None): + cols = ['pair', 'date', 'enter_tag'] + sortcols = ['date', 'pair', 'enter_tag'] + _print_table(rejected_trades_df[cols], + sortcols, + show_index=False, + name="Rejected Trades:", + to_csv=to_csv, + csv_path=csv_path) + + def _select_rows_within_dates(df, timerange=None, df_date_col: str = 'date'): if timerange: if timerange.starttype == 'date': @@ -186,38 +207,65 @@ def prepare_results(analysed_trades, stratname, return res_df -def print_results(res_df, analysis_groups, indicator_list): +def print_results(res_df, analysis_groups, indicator_list, + rejected_trades=None, to_csv=False, csv_path=None): if res_df.shape[0] > 0: if analysis_groups: - _do_group_table_output(res_df, analysis_groups) + _do_group_table_output(res_df, analysis_groups, to_csv=to_csv, csv_path=csv_path) + if rejected_trades is not None and not rejected_trades.empty: + _do_rejected_trades_output(rejected_trades, to_csv=to_csv, csv_path=csv_path) + + # NB this can be large for big dataframes! if "all" in indicator_list: - print(res_df) - elif indicator_list is not None: + _print_table(res_df, + show_index=False, + name="Indicators:", + to_csv=to_csv, + csv_path=csv_path) + elif indicator_list is not None and indicator_list: available_inds = [] for ind in indicator_list: if ind in res_df: available_inds.append(ind) ilist = ["pair", "enter_reason", "exit_reason"] + available_inds - _print_table(res_df[ilist], sortcols=['exit_reason'], show_index=False) + _print_table(res_df[ilist], + sortcols=['exit_reason'], + show_index=False, + name="Indicators:", + to_csv=to_csv, + csv_path=csv_path) else: print("\\No trades to show") -def _print_table(df, sortcols=None, show_index=False): +def _print_table(df, sortcols=None, show_index=False, name=None, to_csv=False, csv_path=None): if (sortcols is not None): data = df.sort_values(sortcols) else: data = df - print( - tabulate( - data, - headers='keys', - tablefmt='psql', - showindex=show_index + if to_csv: + if csv_path is not None: + safe_name = Path(csv_path, + name.lower().replace(" ", "_").replace(":", "")) + else: + safe_name = Path("user_data", + "backtest_results", + name.lower().replace(" ", "_").replace(":", "")) + data.to_csv(f"{str(safe_name)}.csv") + else: + if name is not None: + print(name) + + print( + tabulate( + data, + headers='keys', + tablefmt='psql', + showindex=show_index + ) ) - ) def process_entry_exit_reasons(config: Config): @@ -226,6 +274,9 @@ def process_entry_exit_reasons(config: Config): enter_reason_list = config.get('enter_reason_list', ["all"]) exit_reason_list = config.get('exit_reason_list', ["all"]) indicator_list = config.get('indicator_list', []) + do_rejected = config.get('analysis_rejected', False) + to_csv = config.get('analysis_to_csv', False) + csv_path = config.get('analysis_csv_path', config['exportfilename']) timerange = TimeRange.parse_timerange(None if config.get( 'timerange') is None else str(config.get('timerange'))) @@ -235,8 +286,16 @@ def process_entry_exit_reasons(config: Config): for strategy_name, results in backtest_stats['strategy'].items(): trades = load_backtest_data(config['exportfilename'], strategy_name) - if not trades.empty: + if trades is not None and not trades.empty: signal_candles = _load_signal_candles(config['exportfilename']) + + rej_df = None + if do_rejected: + rejected_trades_dict = _load_rejected_trades(config['exportfilename']) + rej_df = prepare_results(rejected_trades_dict, strategy_name, + enter_reason_list, exit_reason_list, + timerange=timerange) + analysed_trades_dict = _process_candles_and_indicators( config['exchange']['pair_whitelist'], strategy_name, trades, signal_candles) @@ -247,7 +306,10 @@ def process_entry_exit_reasons(config: Config): print_results(res_df, analysis_groups, - indicator_list) + indicator_list, + rejected_trades=rej_df, + to_csv=to_csv, + csv_path=csv_path) except ValueError as e: raise OperationalException(e) from e diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 82aa2b3e9..9e866fda5 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -29,6 +29,7 @@ from freqtrade.mixins import LoggingMixin from freqtrade.optimize.backtest_caching import get_strategy_run_id from freqtrade.optimize.bt_progress import BTProgress from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results, + store_backtest_rejected_trades, store_backtest_signal_candles, store_backtest_stats) from freqtrade.persistence import LocalTrade, Order, PairLocks, Trade @@ -83,6 +84,8 @@ class Backtesting: self.strategylist: List[IStrategy] = [] self.all_results: Dict[str, Dict] = {} self.processed_dfs: Dict[str, Dict] = {} + self.rejected_dict: Dict[str, List] = {} + self.rejected_df: Dict[str, Dict] = {} self._exchange_name = self.config['exchange']['name'] self.exchange = ExchangeResolver.load_exchange( @@ -1048,6 +1051,18 @@ class Backtesting: return None return row + def _collate_rejected(self, pair, row): + """ + Temporarily store rejected trade information for downstream use in backtesting_analysis + """ + # It could be fun to enable hyperopt mode to write + # a loss function to reduce rejected signals + if (self.config.get('export', 'none') == 'signals' and + self.dataprovider.runmode == RunMode.BACKTEST): + if pair not in self.rejected_dict: + self.rejected_dict[pair] = [] + self.rejected_dict[pair].append([row[DATE_IDX], row[ENTER_TAG_IDX]]) + def backtest_loop( self, row: Tuple, pair: str, current_time: datetime, end_date: datetime, max_open_trades: int, open_trade_count_start: int, is_first: bool = True) -> int: @@ -1073,20 +1088,22 @@ class Backtesting: if ( (self._position_stacking or len(LocalTrade.bt_trades_open_pp[pair]) == 0) and is_first - and self.trade_slot_available(max_open_trades, open_trade_count_start) and current_time != end_date and trade_dir is not None and not PairLocks.is_pair_locked(pair, row[DATE_IDX], trade_dir) ): - trade = self._enter_trade(pair, row, trade_dir) - if trade: - # TODO: hacky workaround to avoid opening > max_open_trades - # This emulates previous behavior - not sure if this is correct - # Prevents entering if the trade-slot was freed in this candle - open_trade_count_start += 1 - # logger.debug(f"{pair} - Emulate creation of new trade: {trade}.") - LocalTrade.add_bt_trade(trade) - self.wallets.update() + if (self.trade_slot_available(max_open_trades, open_trade_count_start)): + trade = self._enter_trade(pair, row, trade_dir) + if trade: + # TODO: hacky workaround to avoid opening > max_open_trades + # This emulates previous behavior - not sure if this is correct + # Prevents entering if the trade-slot was freed in this candle + open_trade_count_start += 1 + # logger.debug(f"{pair} - Emulate creation of new trade: {trade}.") + LocalTrade.add_bt_trade(trade) + self.wallets.update() + else: + self._collate_rejected(pair, row) for trade in list(LocalTrade.bt_trades_open_pp[pair]): # 3. Process entry orders. @@ -1266,6 +1283,7 @@ class Backtesting: if (self.config.get('export', 'none') == 'signals' and self.dataprovider.runmode == RunMode.BACKTEST): self._generate_trade_signal_candles(preprocessed_tmp, results) + self._generate_rejected_trades(preprocessed_tmp, self.rejected_dict) return min_date, max_date @@ -1282,12 +1300,33 @@ class Backtesting: for t, v in pairresults.open_date.items(): allinds = pairdf.loc[(pairdf['date'] < v)] signal_inds = allinds.iloc[[-1]] - signal_candles_only_df = pd.concat([signal_candles_only_df, signal_inds]) + signal_candles_only_df = pd.concat([ + signal_candles_only_df.infer_objects(), + signal_inds.infer_objects()]) signal_candles_only[pair] = signal_candles_only_df self.processed_dfs[self.strategy.get_strategy_name()] = signal_candles_only + def _generate_rejected_trades(self, preprocessed_df, rejected_dict): + rejected_candles_only = {} + for pair, trades in rejected_dict.items(): + rejected_trades_only_df = DataFrame() + pairdf = preprocessed_df[pair] + + for t in trades: + data_df_row = pairdf.loc[(pairdf['date'] == t[0])].copy() + data_df_row['pair'] = pair + data_df_row['enter_tag'] = t[1] + + rejected_trades_only_df = pd.concat([ + rejected_trades_only_df.infer_objects(), + data_df_row.infer_objects()]) + + rejected_candles_only[pair] = rejected_trades_only_df + + self.rejected_df[self.strategy.get_strategy_name()] = rejected_candles_only + def _get_min_cached_backtest_date(self): min_backtest_date = None backtest_cache_age = self.config.get('backtest_cache', constants.BACKTEST_CACHE_DEFAULT) @@ -1353,6 +1392,9 @@ class Backtesting: store_backtest_signal_candles( self.config['exportfilename'], self.processed_dfs, dt_appendix) + store_backtest_rejected_trades( + self.config['exportfilename'], self.rejected_df, dt_appendix) + # Results may be mixed up now. Sort them so they follow --strategy-list order. if 'strategy_list' in self.config and len(self.results) > 0: self.results['strategy_comparison'] = sorted( diff --git a/freqtrade/optimize/optimize_reports.py b/freqtrade/optimize/optimize_reports.py index 8ad37e7d8..2ac0eed3d 100644 --- a/freqtrade/optimize/optimize_reports.py +++ b/freqtrade/optimize/optimize_reports.py @@ -45,29 +45,41 @@ def store_backtest_stats( file_dump_json(latest_filename, {'latest_backtest': str(filename.name)}) -def store_backtest_signal_candles( - recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path: +def _store_backtest_analysis_data( + recordfilename: Path, data: Dict[str, Dict], + dtappendix: str, name: str) -> Path: """ - Stores backtest trade signal candles + Stores backtest trade candles for analysis :param recordfilename: Path object, which can either be a filename or a directory. Filenames will be appended with a timestamp right before the suffix - while for directories, /backtest-result-_signals.pkl will be used + while for directories, /backtest-result-_.pkl will be used as filename - :param stats: Dict containing the backtesting signal candles + :param candles: Dict containing the backtesting data for analysis :param dtappendix: Datetime to use for the filename + :param name: Name to use for the file, e.g. signals, rejected """ if recordfilename.is_dir(): - filename = (recordfilename / f'backtest-result-{dtappendix}_signals.pkl') + filename = (recordfilename / f'backtest-result-{dtappendix}_{name}.pkl') else: filename = Path.joinpath( - recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_signals.pkl' + recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_{name}.pkl' ) - file_dump_joblib(filename, candles) + file_dump_joblib(filename, data) return filename +def store_backtest_signal_candles( + recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path: + return _store_backtest_analysis_data(recordfilename, candles, dtappendix, "signals") + + +def store_backtest_rejected_trades( + recordfilename: Path, trades: Dict[str, Dict], dtappendix: str) -> Path: + return _store_backtest_analysis_data(recordfilename, trades, dtappendix, "rejected") + + def _get_line_floatfmt(stake_currency: str) -> List[str]: """ Generate floatformat (goes in line with _generate_result_line()) diff --git a/tests/data/test_entryexitanalysis.py b/tests/data/test_entryexitanalysis.py index e33ed4955..96ab7b1ca 100755 --- a/tests/data/test_entryexitanalysis.py +++ b/tests/data/test_entryexitanalysis.py @@ -191,8 +191,18 @@ def test_backtest_analysis_nomock(default_conf, mocker, caplog, testdatadir, tmp assert '2.5' in captured.out # test date filtering - args = get_args(base_args + ['--timerange', "20180129-20180130"]) + args = get_args(base_args + + ['--analysis-groups', "0", "1", "2", + '--timerange', "20180129-20180130"] + ) start_analysis_entries_exits(args) captured = capsys.readouterr() assert 'enter_tag_long_a' in captured.out assert 'enter_tag_long_b' not in captured.out + + # test rejected - how to mock this? + # args = get_args(base_args + ['--rejected']) + # start_analysis_entries_exits(args) + # captured = capsys.readouterr() + # assert 'Rejected Trades:' in captured.out + # assert False