Add support for collating and analysing rejected trades in backtest

2022-12-05 15:34:31 +00:00
parent f28b314266
commit 5a4e99b413
9 changed files with 254 additions and 57 deletions
--- a/freqtrade/commands/arguments.py
+++ b/freqtrade/commands/arguments.py
@@ -106,7 +106,8 @@ ARGS_HYPEROPT_SHOW = ["hyperopt_list_best", "hyperopt_list_profitable", "hyperop
                      "disableparamexport", "backtest_breakdown"]

 ARGS_ANALYZE_ENTRIES_EXITS = ["exportfilename", "analysis_groups", "enter_reason_list",
-                              "exit_reason_list", "indicator_list", "timerange"]
+                              "exit_reason_list", "indicator_list", "timerange",
+                              "analysis_rejected", "analysis_to_csv", "analysis_csv_path"]

 NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list-timeframes",
                    "list-markets", "list-pairs", "list-strategies", "list-freqaimodels",
--- a/freqtrade/commands/cli_options.py
+++ b/freqtrade/commands/cli_options.py
@@ -634,7 +634,7 @@ AVAILABLE_CLI_OPTIONS = {
              "3: by pair and enter_tag, "
              "4: by pair, enter_ and exit_tag (this can get quite large)"),
        nargs='+',
-        default=['0', '1', '2'],
+        default=[],
        choices=['0', '1', '2', '3', '4'],
    ),
    "enter_reason_list": Arg(
@@ -658,6 +658,21 @@ AVAILABLE_CLI_OPTIONS = {
        nargs='+',
        default=[],
    ),
+    "analysis_rejected": Arg(
+        '--rejected',
+        help='Analyse rejected trades',
+        action='store_true',
+    ),
+    "analysis_to_csv": Arg(
+        '--analysis-to-csv',
+        help='Save selected analysis tables to individual CSVs',
+        action='store_true',
+    ),
+    "analysis_csv_path": Arg(
+        '--analysis-csv-path',
+        help=("Specify a path to save the analysis CSVs "
+              "if --analysis-to-csv is enabled. Default: user_data/basktesting_results/"),
+    ),
    "freqaimodel": Arg(
        '--freqaimodel',
        help='Specify a custom freqaimodels.',
--- a/freqtrade/configuration/configuration.py
+++ b/freqtrade/configuration/configuration.py
@@ -465,6 +465,15 @@ class Configuration:
        self._args_to_config(config, argname='timerange',
                             logstring='Filter trades by timerange: {}')

+        self._args_to_config(config, argname='analysis_rejected',
+                             logstring='Analyse rejected trades: {}')
+
+        self._args_to_config(config, argname='analysis_to_csv',
+                             logstring='Store analysis tables to CSV: {}')
+
+        self._args_to_config(config, argname='analysis_csv_path',
+                             logstring='Path to store analysis CSVs: {}')
+
    def _process_runmode(self, config: Config) -> None:

        self._args_to_config(config, argname='dry_run',
--- a/freqtrade/data/entryexitanalysis.py
+++ b/freqtrade/data/entryexitanalysis.py
@@ -15,22 +15,30 @@ from freqtrade.exceptions import OperationalException
 logger = logging.getLogger(__name__)


-def _load_signal_candles(backtest_dir: Path):
+def _load_backtest_analysis_data(backtest_dir: Path, name: str):
    if backtest_dir.is_dir():
        scpf = Path(backtest_dir,
-                    Path(get_latest_backtest_filename(backtest_dir)).stem + "_signals.pkl"
+                    Path(get_latest_backtest_filename(backtest_dir)).stem + "_" + name + ".pkl"
                    )
    else:
-        scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_signals.pkl")
+        scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_{name}.pkl")

    try:
        scp = open(scpf, "rb")
-        signal_candles = joblib.load(scp)
-        logger.info(f"Loaded signal candles: {str(scpf)}")
+        rejected_trades = joblib.load(scp)
+        logger.info(f"Loaded {name} data: {str(scpf)}")
    except Exception as e:
-        logger.error("Cannot load signal candles from pickled results: ", e)
+        logger.error(f"Cannot load {name} data from pickled results: ", e)

-    return signal_candles
+    return rejected_trades
+
+
+def _load_rejected_trades(backtest_dir: Path):
+    return _load_backtest_analysis_data(backtest_dir, "rejected")
+
+
+def _load_signal_candles(backtest_dir: Path):
+    return _load_backtest_analysis_data(backtest_dir, "signals")


 def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_candles):
@@ -43,9 +51,9 @@ def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_cand
        for pair in pairlist:
            if pair in signal_candles[strategy_name]:
                analysed_trades_dict[strategy_name][pair] = _analyze_candles_and_indicators(
-                                                              pair,
-                                                              trades,
-                                                              signal_candles[strategy_name][pair])
+                                                            pair,
+                                                            trades,
+                                                            signal_candles[strategy_name][pair])
    except Exception as e:
        print(f"Cannot process entry/exit reasons for {strategy_name}: ", e)

@@ -85,7 +93,7 @@ def _analyze_candles_and_indicators(pair, trades, signal_candles):
        return pd.DataFrame()


-def _do_group_table_output(bigdf, glist):
+def _do_group_table_output(bigdf, glist, to_csv=False, csv_path=None):
    for g in glist:
        # 0: summary wins/losses grouped by enter tag
        if g == "0":
@@ -116,7 +124,8 @@ def _do_group_table_output(bigdf, glist):

            sortcols = ['total_num_buys']

-            _print_table(new, sortcols, show_index=True)
+            _print_table(new, sortcols, show_index=True, name="Group 0:",
+                         to_csv=to_csv, csv_path=csv_path)

        else:
            agg_mask = {'profit_abs': ['count', 'sum', 'median', 'mean'],
@@ -148,11 +157,23 @@ def _do_group_table_output(bigdf, glist):
                new['mean_profit_pct'] = new['mean_profit_pct'] * 100
                new['total_profit_pct'] = new['total_profit_pct'] * 100

-                _print_table(new, sortcols)
+                _print_table(new, sortcols, name=f"Group {g}:",
+                             to_csv=to_csv, csv_path=csv_path)
            else:
                logger.warning("Invalid group mask specified.")


+def _do_rejected_trades_output(rejected_trades_df, to_csv=False, csv_path=None):
+    cols = ['pair', 'date', 'enter_tag']
+    sortcols = ['date', 'pair', 'enter_tag']
+    _print_table(rejected_trades_df[cols],
+                 sortcols,
+                 show_index=False,
+                 name="Rejected Trades:",
+                 to_csv=to_csv,
+                 csv_path=csv_path)
+
+
 def _select_rows_within_dates(df, timerange=None, df_date_col: str = 'date'):
    if timerange:
        if timerange.starttype == 'date':
@@ -186,38 +207,65 @@ def prepare_results(analysed_trades, stratname,
    return res_df


-def print_results(res_df, analysis_groups, indicator_list):
+def print_results(res_df, analysis_groups, indicator_list,
+                  rejected_trades=None, to_csv=False, csv_path=None):
    if res_df.shape[0] > 0:
        if analysis_groups:
-            _do_group_table_output(res_df, analysis_groups)
+            _do_group_table_output(res_df, analysis_groups, to_csv=to_csv, csv_path=csv_path)

+        if rejected_trades is not None and not rejected_trades.empty:
+            _do_rejected_trades_output(rejected_trades, to_csv=to_csv, csv_path=csv_path)
+
+        # NB this can be large for big dataframes!
        if "all" in indicator_list:
-            print(res_df)
-        elif indicator_list is not None:
+            _print_table(res_df,
+                         show_index=False,
+                         name="Indicators:",
+                         to_csv=to_csv,
+                         csv_path=csv_path)
+        elif indicator_list is not None and indicator_list:
            available_inds = []
            for ind in indicator_list:
                if ind in res_df:
                    available_inds.append(ind)
            ilist = ["pair", "enter_reason", "exit_reason"] + available_inds
-            _print_table(res_df[ilist], sortcols=['exit_reason'], show_index=False)
+            _print_table(res_df[ilist],
+                         sortcols=['exit_reason'],
+                         show_index=False,
+                         name="Indicators:",
+                         to_csv=to_csv,
+                         csv_path=csv_path)
    else:
        print("\\No trades to show")


-def _print_table(df, sortcols=None, show_index=False):
+def _print_table(df, sortcols=None, show_index=False, name=None, to_csv=False, csv_path=None):
    if (sortcols is not None):
        data = df.sort_values(sortcols)
    else:
        data = df

-    print(
-        tabulate(
-            data,
-            headers='keys',
-            tablefmt='psql',
-            showindex=show_index
+    if to_csv:
+        if csv_path is not None:
+            safe_name = Path(csv_path,
+                             name.lower().replace(" ", "_").replace(":", ""))
+        else:
+            safe_name = Path("user_data",
+                             "backtest_results",
+                             name.lower().replace(" ", "_").replace(":", ""))
+        data.to_csv(f"{str(safe_name)}.csv")
+    else:
+        if name is not None:
+            print(name)
+
+        print(
+            tabulate(
+                data,
+                headers='keys',
+                tablefmt='psql',
+                showindex=show_index
+            )
        )
-    )


 def process_entry_exit_reasons(config: Config):
@@ -226,6 +274,9 @@ def process_entry_exit_reasons(config: Config):
        enter_reason_list = config.get('enter_reason_list', ["all"])
        exit_reason_list = config.get('exit_reason_list', ["all"])
        indicator_list = config.get('indicator_list', [])
+        do_rejected = config.get('analysis_rejected', False)
+        to_csv = config.get('analysis_to_csv', False)
+        csv_path = config.get('analysis_csv_path', config['exportfilename'])

        timerange = TimeRange.parse_timerange(None if config.get(
            'timerange') is None else str(config.get('timerange')))
@@ -235,8 +286,16 @@ def process_entry_exit_reasons(config: Config):
        for strategy_name, results in backtest_stats['strategy'].items():
            trades = load_backtest_data(config['exportfilename'], strategy_name)

-            if not trades.empty:
+            if trades is not None and not trades.empty:
                signal_candles = _load_signal_candles(config['exportfilename'])
+
+                rej_df = None
+                if do_rejected:
+                    rejected_trades_dict = _load_rejected_trades(config['exportfilename'])
+                    rej_df = prepare_results(rejected_trades_dict, strategy_name,
+                                             enter_reason_list, exit_reason_list,
+                                             timerange=timerange)
+
                analysed_trades_dict = _process_candles_and_indicators(
                                        config['exchange']['pair_whitelist'], strategy_name,
                                        trades, signal_candles)
@@ -247,7 +306,10 @@ def process_entry_exit_reasons(config: Config):

                print_results(res_df,
                              analysis_groups,
-                              indicator_list)
+                              indicator_list,
+                              rejected_trades=rej_df,
+                              to_csv=to_csv,
+                              csv_path=csv_path)

    except ValueError as e:
        raise OperationalException(e) from e
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -29,6 +29,7 @@ from freqtrade.mixins import LoggingMixin
 from freqtrade.optimize.backtest_caching import get_strategy_run_id
 from freqtrade.optimize.bt_progress import BTProgress
 from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results,
+                                                 store_backtest_rejected_trades,
                                                 store_backtest_signal_candles,
                                                 store_backtest_stats)
 from freqtrade.persistence import LocalTrade, Order, PairLocks, Trade
@@ -83,6 +84,8 @@ class Backtesting:
        self.strategylist: List[IStrategy] = []
        self.all_results: Dict[str, Dict] = {}
        self.processed_dfs: Dict[str, Dict] = {}
+        self.rejected_dict: Dict[str, List] = {}
+        self.rejected_df: Dict[str, Dict] = {}

        self._exchange_name = self.config['exchange']['name']
        self.exchange = ExchangeResolver.load_exchange(
@@ -1048,6 +1051,18 @@ class Backtesting:
            return None
        return row

+    def _collate_rejected(self, pair, row):
+        """
+        Temporarily store rejected trade information for downstream use in backtesting_analysis
+        """
+        # It could be fun to enable hyperopt mode to write
+        # a loss function to reduce rejected signals
+        if (self.config.get('export', 'none') == 'signals' and
+                self.dataprovider.runmode == RunMode.BACKTEST):
+            if pair not in self.rejected_dict:
+                self.rejected_dict[pair] = []
+            self.rejected_dict[pair].append([row[DATE_IDX], row[ENTER_TAG_IDX]])
+
    def backtest_loop(
            self, row: Tuple, pair: str, current_time: datetime, end_date: datetime,
            max_open_trades: int, open_trade_count_start: int, is_first: bool = True) -> int:
@@ -1073,20 +1088,22 @@ class Backtesting:
        if (
            (self._position_stacking or len(LocalTrade.bt_trades_open_pp[pair]) == 0)
            and is_first
-            and self.trade_slot_available(max_open_trades, open_trade_count_start)
            and current_time != end_date
            and trade_dir is not None
            and not PairLocks.is_pair_locked(pair, row[DATE_IDX], trade_dir)
        ):
-            trade = self._enter_trade(pair, row, trade_dir)
-            if trade:
-                # TODO: hacky workaround to avoid opening > max_open_trades
-                # This emulates previous behavior - not sure if this is correct
-                # Prevents entering if the trade-slot was freed in this candle
-                open_trade_count_start += 1
-                # logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
-                LocalTrade.add_bt_trade(trade)
-                self.wallets.update()
+            if (self.trade_slot_available(max_open_trades, open_trade_count_start)):
+                trade = self._enter_trade(pair, row, trade_dir)
+                if trade:
+                    # TODO: hacky workaround to avoid opening > max_open_trades
+                    # This emulates previous behavior - not sure if this is correct
+                    # Prevents entering if the trade-slot was freed in this candle
+                    open_trade_count_start += 1
+                    # logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
+                    LocalTrade.add_bt_trade(trade)
+                    self.wallets.update()
+            else:
+                self._collate_rejected(pair, row)

        for trade in list(LocalTrade.bt_trades_open_pp[pair]):
            # 3. Process entry orders.
@@ -1266,6 +1283,7 @@ class Backtesting:
        if (self.config.get('export', 'none') == 'signals' and
                self.dataprovider.runmode == RunMode.BACKTEST):
            self._generate_trade_signal_candles(preprocessed_tmp, results)
+            self._generate_rejected_trades(preprocessed_tmp, self.rejected_dict)

        return min_date, max_date

@@ -1282,12 +1300,33 @@ class Backtesting:
                for t, v in pairresults.open_date.items():
                    allinds = pairdf.loc[(pairdf['date'] < v)]
                    signal_inds = allinds.iloc[[-1]]
-                    signal_candles_only_df = pd.concat([signal_candles_only_df, signal_inds])
+                    signal_candles_only_df = pd.concat([
+                        signal_candles_only_df.infer_objects(),
+                        signal_inds.infer_objects()])

                signal_candles_only[pair] = signal_candles_only_df

        self.processed_dfs[self.strategy.get_strategy_name()] = signal_candles_only

+    def _generate_rejected_trades(self, preprocessed_df, rejected_dict):
+        rejected_candles_only = {}
+        for pair, trades in rejected_dict.items():
+            rejected_trades_only_df = DataFrame()
+            pairdf = preprocessed_df[pair]
+
+            for t in trades:
+                data_df_row = pairdf.loc[(pairdf['date'] == t[0])].copy()
+                data_df_row['pair'] = pair
+                data_df_row['enter_tag'] = t[1]
+
+                rejected_trades_only_df = pd.concat([
+                    rejected_trades_only_df.infer_objects(),
+                    data_df_row.infer_objects()])
+
+            rejected_candles_only[pair] = rejected_trades_only_df
+
+        self.rejected_df[self.strategy.get_strategy_name()] = rejected_candles_only
+
    def _get_min_cached_backtest_date(self):
        min_backtest_date = None
        backtest_cache_age = self.config.get('backtest_cache', constants.BACKTEST_CACHE_DEFAULT)
@@ -1353,6 +1392,9 @@ class Backtesting:
                store_backtest_signal_candles(
                    self.config['exportfilename'], self.processed_dfs, dt_appendix)

+                store_backtest_rejected_trades(
+                    self.config['exportfilename'], self.rejected_df, dt_appendix)
+
        # Results may be mixed up now. Sort them so they follow --strategy-list order.
        if 'strategy_list' in self.config and len(self.results) > 0:
            self.results['strategy_comparison'] = sorted(
--- a/freqtrade/optimize/optimize_reports.py
+++ b/freqtrade/optimize/optimize_reports.py
@@ -45,29 +45,41 @@ def store_backtest_stats(
    file_dump_json(latest_filename, {'latest_backtest': str(filename.name)})


-def store_backtest_signal_candles(
-        recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
+def _store_backtest_analysis_data(
+        recordfilename: Path, data: Dict[str, Dict],
+        dtappendix: str, name: str) -> Path:
    """
-    Stores backtest trade signal candles
+    Stores backtest trade candles for analysis
    :param recordfilename: Path object, which can either be a filename or a directory.
        Filenames will be appended with a timestamp right before the suffix
-        while for directories, <directory>/backtest-result-<datetime>_signals.pkl will be used
+        while for directories, <directory>/backtest-result-<datetime>_<name>.pkl will be used
        as filename
-    :param stats: Dict containing the backtesting signal candles
+    :param candles: Dict containing the backtesting data for analysis
    :param dtappendix: Datetime to use for the filename
+    :param name: Name to use for the file, e.g. signals, rejected
    """
    if recordfilename.is_dir():
-        filename = (recordfilename / f'backtest-result-{dtappendix}_signals.pkl')
+        filename = (recordfilename / f'backtest-result-{dtappendix}_{name}.pkl')
    else:
        filename = Path.joinpath(
-            recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_signals.pkl'
+            recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_{name}.pkl'
        )

-    file_dump_joblib(filename, candles)
+    file_dump_joblib(filename, data)

    return filename


+def store_backtest_signal_candles(
+        recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
+    return _store_backtest_analysis_data(recordfilename, candles, dtappendix, "signals")
+
+
+def store_backtest_rejected_trades(
+        recordfilename: Path, trades: Dict[str, Dict], dtappendix: str) -> Path:
+    return _store_backtest_analysis_data(recordfilename, trades, dtappendix, "rejected")
+
+
 def _get_line_floatfmt(stake_currency: str) -> List[str]:
    """
    Generate floatformat (goes in line with _generate_result_line())