Add support for collating and analysing rejected trades in backtest

2022-12-05 15:34:31 +00:00
parent f28b314266
commit 5a4e99b413
9 changed files with 254 additions and 57 deletions
--- a/docs/advanced-backtesting.md
+++ b/docs/advanced-backtesting.md
@@ -29,7 +29,7 @@ If all goes well, you should now see a `backtest-result-{timestamp}_signals.pkl`
 `user_data/backtest_results` folder.

 To analyze the entry/exit tags, we now need to use the `freqtrade backtesting-analysis` command
-with `--analysis-groups` option provided with space-separated arguments (default `0 1 2`):
+with `--analysis-groups` option provided with space-separated arguments:

 ``` bash
 freqtrade backtesting-analysis -c <config.json> --analysis-groups 0 1 2 3 4
@@ -39,6 +39,7 @@ This command will read from the last backtesting results. The `--analysis-groups
 used to specify the various tabular outputs showing the profit fo each group or trade,
 ranging from the simplest (0) to the most detailed per pair, per buy and per sell tag (4):

+* 0: overall winrate and profit summary by enter_tag
 * 1: profit summaries grouped by enter_tag
 * 2: profit summaries grouped by enter_tag and exit_tag
 * 3: profit summaries grouped by pair and enter_tag
@@ -114,3 +115,37 @@ For example, if your backtest timerange was `20220101-20221231` but you only wan
 ```bash
 freqtrade backtesting-analysis -c <config.json> --timerange 20220101-20220201
 ```
+
+### Printing out rejected trades
+
+Use the `--rejected` option to print out rejected trades.
+
+```bash
+freqtrade backtesting-analysis -c <config.json> --rejected
+```
+
+### Writing tables to CSV
+
+Some of the tabular outputs can become large, so printing them out to the terminal is not preferable.
+Use the `--analysis-to-csv` option to disable printing out of tables to standard out and write them to CSV files.
+
+```bash
+freqtrade backtesting-analysis -c <config.json> --analysis-to-csv
+```
+
+By default this will write one file per output table you specified in the `backtesting-analysis` command, e.g.
+
+```bash
+freqtrade backtesting-analysis -c <config.json> --analysis-to-csv --rejected --analysis-groups 0 1
+```
+
+This will write to `user_data/backtest_results`:
+* rejected.csv
+* group_0.csv
+* group_1.csv
+
+To override where the files will be written, also specify the `--analysis-csv-path` option.
+
+```bash
+freqtrade backtesting-analysis -c <config.json> --analysis-to-csv --analysis-csv-path another/data/path/
+```
--- a/docs/utils.md
+++ b/docs/utils.md
@@ -723,6 +723,9 @@ usage: freqtrade backtesting-analysis [-h] [-v] [--logfile FILE] [-V]
                                      [--exit-reason-list EXIT_REASON_LIST [EXIT_REASON_LIST ...]]
                                      [--indicator-list INDICATOR_LIST [INDICATOR_LIST ...]]
                                      [--timerange YYYYMMDD-[YYYYMMDD]]
+                                      [--rejected]
+                                      [--analysis-to-csv]
+                                      [--analysis-csv-path PATH]

 optional arguments:
  -h, --help            show this help message and exit
@@ -736,19 +739,27 @@ optional arguments:
                        pair and enter_tag, 4: by pair, enter_ and exit_tag
                        (this can get quite large)
  --enter-reason-list ENTER_REASON_LIST [ENTER_REASON_LIST ...]
-                        Comma separated list of entry signals to analyse.
-                        Default: all. e.g. 'entry_tag_a,entry_tag_b'
+                        Space separated list of entry signals to analyse.
+                        Default: all. e.g. 'entry_tag_a entry_tag_b'
  --exit-reason-list EXIT_REASON_LIST [EXIT_REASON_LIST ...]
-                        Comma separated list of exit signals to analyse.
+                        Space separated list of exit signals to analyse.
                        Default: all. e.g.
-                        'exit_tag_a,roi,stop_loss,trailing_stop_loss'
+                        'exit_tag_a roi stop_loss trailing_stop_loss'
  --indicator-list INDICATOR_LIST [INDICATOR_LIST ...]
-                        Comma separated list of indicators to analyse. e.g.
-                        'close,rsi,bb_lowerband,profit_abs'
+                        Space separated list of indicators to analyse. e.g.
+                        'close rsi bb_lowerband profit_abs'
  --timerange YYYYMMDD-[YYYYMMDD]
                        Timerange to filter trades for analysis, 
                        start inclusive, end exclusive. e.g.
                        20220101-20220201
+  --rejected
+                        Print out rejected trades table
+  --analysis-to-csv
+                        Write out tables to individual CSVs, by default to 
+                        'user_data/backtest_results' unless '--analysis-csv-path' is given.
+  --analysis-csv-path [PATH]
+                        Optional path where individual CSVs will be written. If not used,
+                        CSVs will be written to 'user_data/backtest_results'.

 Common arguments:
  -v, --verbose         Verbose mode (-vv for more, -vvv to get all messages).
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -106,7 +106,8 @@ ARGS_HYPEROPT_SHOW = ["hyperopt_list_best", "hyperopt_list_profitable", "hyperop
                      "disableparamexport", "backtest_breakdown"]

 ARGS_ANALYZE_ENTRIES_EXITS = ["exportfilename", "analysis_groups", "enter_reason_list",
-                              "exit_reason_list", "indicator_list", "timerange"]
+                              "exit_reason_list", "indicator_list", "timerange",
+                              "analysis_rejected", "analysis_to_csv", "analysis_csv_path"]

 NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list-timeframes",
                    "list-markets", "list-pairs", "list-strategies", "list-freqaimodels",
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -634,7 +634,7 @@ AVAILABLE_CLI_OPTIONS = {
              "3: by pair and enter_tag, "
              "4: by pair, enter_ and exit_tag (this can get quite large)"),
        nargs='+',
-        default=['0', '1', '2'],
+        default=[],
        choices=['0', '1', '2', '3', '4'],
    ),
    "enter_reason_list": Arg(
@@ -658,6 +658,21 @@ AVAILABLE_CLI_OPTIONS = {
        nargs='+',
        default=[],
    ),
+    "analysis_rejected": Arg(
+        '--rejected',
+        help='Analyse rejected trades',
+        action='store_true',
+    ),
+    "analysis_to_csv": Arg(
+        '--analysis-to-csv',
+        help='Save selected analysis tables to individual CSVs',
+        action='store_true',
+    ),
+    "analysis_csv_path": Arg(
+        '--analysis-csv-path',
+        help=("Specify a path to save the analysis CSVs "
+              "if --analysis-to-csv is enabled. Default: user_data/basktesting_results/"),
+    ),
    "freqaimodel": Arg(
        '--freqaimodel',
        help='Specify a custom freqaimodels.',
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -465,6 +465,15 @@ class Configuration:
        self._args_to_config(config, argname='timerange',
                             logstring='Filter trades by timerange: {}')

+        self._args_to_config(config, argname='analysis_rejected',
+                             logstring='Analyse rejected trades: {}')
+
+        self._args_to_config(config, argname='analysis_to_csv',
+                             logstring='Store analysis tables to CSV: {}')
+
+        self._args_to_config(config, argname='analysis_csv_path',
+                             logstring='Path to store analysis CSVs: {}')
+
    def _process_runmode(self, config: Config) -> None:

        self._args_to_config(config, argname='dry_run',
--- a/freqtrade/data/entryexitanalysis.py
+++ b/freqtrade/data/entryexitanalysis.py
@@ -15,22 +15,30 @@ from freqtrade.exceptions import OperationalException
 logger = logging.getLogger(__name__)


-def _load_signal_candles(backtest_dir: Path):
+def _load_backtest_analysis_data(backtest_dir: Path, name: str):
    if backtest_dir.is_dir():
        scpf = Path(backtest_dir,
-                    Path(get_latest_backtest_filename(backtest_dir)).stem + "_signals.pkl"
+                    Path(get_latest_backtest_filename(backtest_dir)).stem + "_" + name + ".pkl"
                    )
    else:
-        scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_signals.pkl")
+        scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_{name}.pkl")

    try:
        scp = open(scpf, "rb")
-        signal_candles = joblib.load(scp)
-        logger.info(f"Loaded signal candles: {str(scpf)}")
+        rejected_trades = joblib.load(scp)
+        logger.info(f"Loaded {name} data: {str(scpf)}")
    except Exception as e:
-        logger.error("Cannot load signal candles from pickled results: ", e)
+        logger.error(f"Cannot load {name} data from pickled results: ", e)

-    return signal_candles
+    return rejected_trades
+
+
+def _load_rejected_trades(backtest_dir: Path):
+    return _load_backtest_analysis_data(backtest_dir, "rejected")
+
+
+def _load_signal_candles(backtest_dir: Path):
+    return _load_backtest_analysis_data(backtest_dir, "signals")


 def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_candles):
@@ -43,9 +51,9 @@ def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_cand
        for pair in pairlist:
            if pair in signal_candles[strategy_name]:
                analysed_trades_dict[strategy_name][pair] = _analyze_candles_and_indicators(
-                                                              pair,
-                                                              trades,
-                                                              signal_candles[strategy_name][pair])
+                                                            pair,
+                                                            trades,
+                                                            signal_candles[strategy_name][pair])
    except Exception as e:
        print(f"Cannot process entry/exit reasons for {strategy_name}: ", e)

@@ -85,7 +93,7 @@ def _analyze_candles_and_indicators(pair, trades, signal_candles):
        return pd.DataFrame()


-def _do_group_table_output(bigdf, glist):
+def _do_group_table_output(bigdf, glist, to_csv=False, csv_path=None):
    for g in glist:
        # 0: summary wins/losses grouped by enter tag
        if g == "0":
@@ -116,7 +124,8 @@ def _do_group_table_output(bigdf, glist):

            sortcols = ['total_num_buys']

-            _print_table(new, sortcols, show_index=True)
+            _print_table(new, sortcols, show_index=True, name="Group 0:",
+                         to_csv=to_csv, csv_path=csv_path)

        else:
            agg_mask = {'profit_abs': ['count', 'sum', 'median', 'mean'],
@@ -148,11 +157,23 @@ def _do_group_table_output(bigdf, glist):
                new['mean_profit_pct'] = new['mean_profit_pct'] * 100
                new['total_profit_pct'] = new['total_profit_pct'] * 100

-                _print_table(new, sortcols)
+                _print_table(new, sortcols, name=f"Group {g}:",
+                             to_csv=to_csv, csv_path=csv_path)
            else:
                logger.warning("Invalid group mask specified.")


+def _do_rejected_trades_output(rejected_trades_df, to_csv=False, csv_path=None):
+    cols = ['pair', 'date', 'enter_tag']
+    sortcols = ['date', 'pair', 'enter_tag']
+    _print_table(rejected_trades_df[cols],
+                 sortcols,
+                 show_index=False,
+                 name="Rejected Trades:",
+                 to_csv=to_csv,
+                 csv_path=csv_path)
+
+
 def _select_rows_within_dates(df, timerange=None, df_date_col: str = 'date'):
    if timerange:
        if timerange.starttype == 'date':
@@ -186,38 +207,65 @@ def prepare_results(analysed_trades, stratname,
    return res_df


-def print_results(res_df, analysis_groups, indicator_list):
+def print_results(res_df, analysis_groups, indicator_list,
+                  rejected_trades=None, to_csv=False, csv_path=None):
    if res_df.shape[0] > 0:
        if analysis_groups:
-            _do_group_table_output(res_df, analysis_groups)
+            _do_group_table_output(res_df, analysis_groups, to_csv=to_csv, csv_path=csv_path)

+        if rejected_trades is not None and not rejected_trades.empty:
+            _do_rejected_trades_output(rejected_trades, to_csv=to_csv, csv_path=csv_path)
+
+        # NB this can be large for big dataframes!
        if "all" in indicator_list:
-            print(res_df)
-        elif indicator_list is not None:
+            _print_table(res_df,
+                         show_index=False,
+                         name="Indicators:",
+                         to_csv=to_csv,
+                         csv_path=csv_path)
+        elif indicator_list is not None and indicator_list:
            available_inds = []
            for ind in indicator_list:
                if ind in res_df:
                    available_inds.append(ind)
            ilist = ["pair", "enter_reason", "exit_reason"] + available_inds
-            _print_table(res_df[ilist], sortcols=['exit_reason'], show_index=False)
+            _print_table(res_df[ilist],
+                         sortcols=['exit_reason'],
+                         show_index=False,
+                         name="Indicators:",
+                         to_csv=to_csv,
+                         csv_path=csv_path)
    else:
        print("\\No trades to show")


-def _print_table(df, sortcols=None, show_index=False):
+def _print_table(df, sortcols=None, show_index=False, name=None, to_csv=False, csv_path=None):
    if (sortcols is not None):
        data = df.sort_values(sortcols)
    else:
        data = df

-    print(
-        tabulate(
-            data,
-            headers='keys',
-            tablefmt='psql',
-            showindex=show_index
+    if to_csv:
+        if csv_path is not None:
+            safe_name = Path(csv_path,
+                             name.lower().replace(" ", "_").replace(":", ""))
+        else:
+            safe_name = Path("user_data",
+                             "backtest_results",
+                             name.lower().replace(" ", "_").replace(":", ""))
+        data.to_csv(f"{str(safe_name)}.csv")
+    else:
+        if name is not None:
+            print(name)
+
+        print(
+            tabulate(
+                data,
+                headers='keys',
+                tablefmt='psql',
+                showindex=show_index
+            )
        )
-    )


 def process_entry_exit_reasons(config: Config):
@@ -226,6 +274,9 @@ def process_entry_exit_reasons(config: Config):
        enter_reason_list = config.get('enter_reason_list', ["all"])
        exit_reason_list = config.get('exit_reason_list', ["all"])
        indicator_list = config.get('indicator_list', [])
+        do_rejected = config.get('analysis_rejected', False)
+        to_csv = config.get('analysis_to_csv', False)
+        csv_path = config.get('analysis_csv_path', config['exportfilename'])

        timerange = TimeRange.parse_timerange(None if config.get(
            'timerange') is None else str(config.get('timerange')))
@@ -235,8 +286,16 @@ def process_entry_exit_reasons(config: Config):
        for strategy_name, results in backtest_stats['strategy'].items():
            trades = load_backtest_data(config['exportfilename'], strategy_name)

-            if not trades.empty:
+            if trades is not None and not trades.empty:
                signal_candles = _load_signal_candles(config['exportfilename'])
+
+                rej_df = None
+                if do_rejected:
+                    rejected_trades_dict = _load_rejected_trades(config['exportfilename'])
+                    rej_df = prepare_results(rejected_trades_dict, strategy_name,
+                                             enter_reason_list, exit_reason_list,
+                                             timerange=timerange)
+
                analysed_trades_dict = _process_candles_and_indicators(
                                        config['exchange']['pair_whitelist'], strategy_name,
                                        trades, signal_candles)
@@ -247,7 +306,10 @@ def process_entry_exit_reasons(config: Config):

                print_results(res_df,
                              analysis_groups,
-                              indicator_list)
+                              indicator_list,
+                              rejected_trades=rej_df,
+                              to_csv=to_csv,
+                              csv_path=csv_path)

    except ValueError as e:
        raise OperationalException(e) from e
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -29,6 +29,7 @@ from freqtrade.mixins import LoggingMixin
 from freqtrade.optimize.backtest_caching import get_strategy_run_id
 from freqtrade.optimize.bt_progress import BTProgress
 from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results,
+                                                 store_backtest_rejected_trades,
                                                 store_backtest_signal_candles,
                                                 store_backtest_stats)
 from freqtrade.persistence import LocalTrade, Order, PairLocks, Trade
@@ -83,6 +84,8 @@ class Backtesting:
        self.strategylist: List[IStrategy] = []
        self.all_results: Dict[str, Dict] = {}
        self.processed_dfs: Dict[str, Dict] = {}
+        self.rejected_dict: Dict[str, List] = {}
+        self.rejected_df: Dict[str, Dict] = {}

        self._exchange_name = self.config['exchange']['name']
        self.exchange = ExchangeResolver.load_exchange(
@@ -1048,6 +1051,18 @@ class Backtesting:
            return None
        return row

+    def _collate_rejected(self, pair, row):
+        """
+        Temporarily store rejected trade information for downstream use in backtesting_analysis
+        """
+        # It could be fun to enable hyperopt mode to write
+        # a loss function to reduce rejected signals
+        if (self.config.get('export', 'none') == 'signals' and
+                self.dataprovider.runmode == RunMode.BACKTEST):
+            if pair not in self.rejected_dict:
+                self.rejected_dict[pair] = []
+            self.rejected_dict[pair].append([row[DATE_IDX], row[ENTER_TAG_IDX]])
+
    def backtest_loop(
            self, row: Tuple, pair: str, current_time: datetime, end_date: datetime,
            max_open_trades: int, open_trade_count_start: int, is_first: bool = True) -> int:
@@ -1073,20 +1088,22 @@ class Backtesting:
        if (
            (self._position_stacking or len(LocalTrade.bt_trades_open_pp[pair]) == 0)
            and is_first
-            and self.trade_slot_available(max_open_trades, open_trade_count_start)
            and current_time != end_date
            and trade_dir is not None
            and not PairLocks.is_pair_locked(pair, row[DATE_IDX], trade_dir)
        ):
-            trade = self._enter_trade(pair, row, trade_dir)
-            if trade:
-                # TODO: hacky workaround to avoid opening > max_open_trades
-                # This emulates previous behavior - not sure if this is correct
-                # Prevents entering if the trade-slot was freed in this candle
-                open_trade_count_start += 1
-                # logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
-                LocalTrade.add_bt_trade(trade)
-                self.wallets.update()
+            if (self.trade_slot_available(max_open_trades, open_trade_count_start)):
+                trade = self._enter_trade(pair, row, trade_dir)
+                if trade:
+                    # TODO: hacky workaround to avoid opening > max_open_trades
+                    # This emulates previous behavior - not sure if this is correct
+                    # Prevents entering if the trade-slot was freed in this candle
+                    open_trade_count_start += 1
+                    # logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
+                    LocalTrade.add_bt_trade(trade)
+                    self.wallets.update()
+            else:
+                self._collate_rejected(pair, row)

        for trade in list(LocalTrade.bt_trades_open_pp[pair]):
            # 3. Process entry orders.
@@ -1266,6 +1283,7 @@ class Backtesting:
        if (self.config.get('export', 'none') == 'signals' and
                self.dataprovider.runmode == RunMode.BACKTEST):
            self._generate_trade_signal_candles(preprocessed_tmp, results)
+            self._generate_rejected_trades(preprocessed_tmp, self.rejected_dict)

        return min_date, max_date

@@ -1282,12 +1300,33 @@ class Backtesting:
                for t, v in pairresults.open_date.items():
                    allinds = pairdf.loc[(pairdf['date'] < v)]
                    signal_inds = allinds.iloc[[-1]]
-                    signal_candles_only_df = pd.concat([signal_candles_only_df, signal_inds])
+                    signal_candles_only_df = pd.concat([
+                        signal_candles_only_df.infer_objects(),
+                        signal_inds.infer_objects()])

                signal_candles_only[pair] = signal_candles_only_df

        self.processed_dfs[self.strategy.get_strategy_name()] = signal_candles_only

+    def _generate_rejected_trades(self, preprocessed_df, rejected_dict):
+        rejected_candles_only = {}
+        for pair, trades in rejected_dict.items():
+            rejected_trades_only_df = DataFrame()
+            pairdf = preprocessed_df[pair]
+
+            for t in trades:
+                data_df_row = pairdf.loc[(pairdf['date'] == t[0])].copy()
+                data_df_row['pair'] = pair
+                data_df_row['enter_tag'] = t[1]
+
+                rejected_trades_only_df = pd.concat([
+                    rejected_trades_only_df.infer_objects(),
+                    data_df_row.infer_objects()])
+
+            rejected_candles_only[pair] = rejected_trades_only_df
+
+        self.rejected_df[self.strategy.get_strategy_name()] = rejected_candles_only
+
    def _get_min_cached_backtest_date(self):
        min_backtest_date = None
        backtest_cache_age = self.config.get('backtest_cache', constants.BACKTEST_CACHE_DEFAULT)
@@ -1353,6 +1392,9 @@ class Backtesting:
                store_backtest_signal_candles(
                    self.config['exportfilename'], self.processed_dfs, dt_appendix)

+                store_backtest_rejected_trades(
+                    self.config['exportfilename'], self.rejected_df, dt_appendix)
+
        # Results may be mixed up now. Sort them so they follow --strategy-list order.
        if 'strategy_list' in self.config and len(self.results) > 0:
            self.results['strategy_comparison'] = sorted(
--- a/freqtrade/optimize/optimize_reports.py
+++ b/freqtrade/optimize/optimize_reports.py
@@ -45,29 +45,41 @@ def store_backtest_stats(
    file_dump_json(latest_filename, {'latest_backtest': str(filename.name)})


-def store_backtest_signal_candles(
-        recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
+def _store_backtest_analysis_data(
+        recordfilename: Path, data: Dict[str, Dict],
+        dtappendix: str, name: str) -> Path:
    """
-    Stores backtest trade signal candles
+    Stores backtest trade candles for analysis
    :param recordfilename: Path object, which can either be a filename or a directory.
        Filenames will be appended with a timestamp right before the suffix
-        while for directories, <directory>/backtest-result-<datetime>_signals.pkl will be used
+        while for directories, <directory>/backtest-result-<datetime>_<name>.pkl will be used
        as filename
-    :param stats: Dict containing the backtesting signal candles
+    :param candles: Dict containing the backtesting data for analysis
    :param dtappendix: Datetime to use for the filename
+    :param name: Name to use for the file, e.g. signals, rejected
    """
    if recordfilename.is_dir():
-        filename = (recordfilename / f'backtest-result-{dtappendix}_signals.pkl')
+        filename = (recordfilename / f'backtest-result-{dtappendix}_{name}.pkl')
    else:
        filename = Path.joinpath(
-            recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_signals.pkl'
+            recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_{name}.pkl'
        )

-    file_dump_joblib(filename, candles)
+    file_dump_joblib(filename, data)

    return filename


+def store_backtest_signal_candles(
+        recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
+    return _store_backtest_analysis_data(recordfilename, candles, dtappendix, "signals")
+
+
+def store_backtest_rejected_trades(
+        recordfilename: Path, trades: Dict[str, Dict], dtappendix: str) -> Path:
+    return _store_backtest_analysis_data(recordfilename, trades, dtappendix, "rejected")
+
+
 def _get_line_floatfmt(stake_currency: str) -> List[str]:
    """
    Generate floatformat (goes in line with _generate_result_line())
--- a/tests/data/test_entryexitanalysis.py
+++ b/tests/data/test_entryexitanalysis.py
@@ -191,8 +191,18 @@ def test_backtest_analysis_nomock(default_conf, mocker, caplog, testdatadir, tmp
    assert '2.5' in captured.out

    # test date filtering
-    args = get_args(base_args + ['--timerange', "20180129-20180130"])
+    args = get_args(base_args +
+                    ['--analysis-groups', "0", "1", "2",
+                     '--timerange', "20180129-20180130"]
+                    )
    start_analysis_entries_exits(args)
    captured = capsys.readouterr()
    assert 'enter_tag_long_a' in captured.out
    assert 'enter_tag_long_b' not in captured.out
+
+    # test rejected - how to mock this?
+    # args = get_args(base_args + ['--rejected'])
+    # start_analysis_entries_exits(args)
+    # captured = capsys.readouterr()
+    # assert 'Rejected Trades:' in captured.out
+    # assert False