Add support for collating and analysing rejected trades in backtest

This commit is contained in:
froggleston 2022-12-05 15:34:31 +00:00
parent f28b314266
commit 5a4e99b413
9 changed files with 254 additions and 57 deletions

View File

@ -29,7 +29,7 @@ If all goes well, you should now see a `backtest-result-{timestamp}_signals.pkl`
`user_data/backtest_results` folder.
To analyze the entry/exit tags, we now need to use the `freqtrade backtesting-analysis` command
with `--analysis-groups` option provided with space-separated arguments (default `0 1 2`):
with `--analysis-groups` option provided with space-separated arguments:
``` bash
freqtrade backtesting-analysis -c <config.json> --analysis-groups 0 1 2 3 4
@ -39,6 +39,7 @@ This command will read from the last backtesting results. The `--analysis-groups
used to specify the various tabular outputs showing the profit fo each group or trade,
ranging from the simplest (0) to the most detailed per pair, per buy and per sell tag (4):
* 0: overall winrate and profit summary by enter_tag
* 1: profit summaries grouped by enter_tag
* 2: profit summaries grouped by enter_tag and exit_tag
* 3: profit summaries grouped by pair and enter_tag
@ -114,3 +115,37 @@ For example, if your backtest timerange was `20220101-20221231` but you only wan
```bash
freqtrade backtesting-analysis -c <config.json> --timerange 20220101-20220201
```
### Printing out rejected trades
Use the `--rejected` option to print out rejected trades.
```bash
freqtrade backtesting-analysis -c <config.json> --rejected
```
### Writing tables to CSV
Some of the tabular outputs can become large, so printing them out to the terminal is not preferable.
Use the `--analysis-to-csv` option to disable printing out of tables to standard out and write them to CSV files.
```bash
freqtrade backtesting-analysis -c <config.json> --analysis-to-csv
```
By default this will write one file per output table you specified in the `backtesting-analysis` command, e.g.
```bash
freqtrade backtesting-analysis -c <config.json> --analysis-to-csv --rejected --analysis-groups 0 1
```
This will write to `user_data/backtest_results`:
* rejected.csv
* group_0.csv
* group_1.csv
To override where the files will be written, also specify the `--analysis-csv-path` option.
```bash
freqtrade backtesting-analysis -c <config.json> --analysis-to-csv --analysis-csv-path another/data/path/
```

View File

@ -723,6 +723,9 @@ usage: freqtrade backtesting-analysis [-h] [-v] [--logfile FILE] [-V]
[--exit-reason-list EXIT_REASON_LIST [EXIT_REASON_LIST ...]]
[--indicator-list INDICATOR_LIST [INDICATOR_LIST ...]]
[--timerange YYYYMMDD-[YYYYMMDD]]
[--rejected]
[--analysis-to-csv]
[--analysis-csv-path PATH]
optional arguments:
-h, --help show this help message and exit
@ -736,19 +739,27 @@ optional arguments:
pair and enter_tag, 4: by pair, enter_ and exit_tag
(this can get quite large)
--enter-reason-list ENTER_REASON_LIST [ENTER_REASON_LIST ...]
Comma separated list of entry signals to analyse.
Default: all. e.g. 'entry_tag_a,entry_tag_b'
Space separated list of entry signals to analyse.
Default: all. e.g. 'entry_tag_a entry_tag_b'
--exit-reason-list EXIT_REASON_LIST [EXIT_REASON_LIST ...]
Comma separated list of exit signals to analyse.
Space separated list of exit signals to analyse.
Default: all. e.g.
'exit_tag_a,roi,stop_loss,trailing_stop_loss'
'exit_tag_a roi stop_loss trailing_stop_loss'
--indicator-list INDICATOR_LIST [INDICATOR_LIST ...]
Comma separated list of indicators to analyse. e.g.
'close,rsi,bb_lowerband,profit_abs'
Space separated list of indicators to analyse. e.g.
'close rsi bb_lowerband profit_abs'
--timerange YYYYMMDD-[YYYYMMDD]
Timerange to filter trades for analysis,
start inclusive, end exclusive. e.g.
20220101-20220201
--rejected
Print out rejected trades table
--analysis-to-csv
Write out tables to individual CSVs, by default to
'user_data/backtest_results' unless '--analysis-csv-path' is given.
--analysis-csv-path [PATH]
Optional path where individual CSVs will be written. If not used,
CSVs will be written to 'user_data/backtest_results'.
Common arguments:
-v, --verbose Verbose mode (-vv for more, -vvv to get all messages).

View File

@ -106,7 +106,8 @@ ARGS_HYPEROPT_SHOW = ["hyperopt_list_best", "hyperopt_list_profitable", "hyperop
"disableparamexport", "backtest_breakdown"]
ARGS_ANALYZE_ENTRIES_EXITS = ["exportfilename", "analysis_groups", "enter_reason_list",
"exit_reason_list", "indicator_list", "timerange"]
"exit_reason_list", "indicator_list", "timerange",
"analysis_rejected", "analysis_to_csv", "analysis_csv_path"]
NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list-timeframes",
"list-markets", "list-pairs", "list-strategies", "list-freqaimodels",

View File

@ -634,7 +634,7 @@ AVAILABLE_CLI_OPTIONS = {
"3: by pair and enter_tag, "
"4: by pair, enter_ and exit_tag (this can get quite large)"),
nargs='+',
default=['0', '1', '2'],
default=[],
choices=['0', '1', '2', '3', '4'],
),
"enter_reason_list": Arg(
@ -658,6 +658,21 @@ AVAILABLE_CLI_OPTIONS = {
nargs='+',
default=[],
),
"analysis_rejected": Arg(
'--rejected',
help='Analyse rejected trades',
action='store_true',
),
"analysis_to_csv": Arg(
'--analysis-to-csv',
help='Save selected analysis tables to individual CSVs',
action='store_true',
),
"analysis_csv_path": Arg(
'--analysis-csv-path',
help=("Specify a path to save the analysis CSVs "
"if --analysis-to-csv is enabled. Default: user_data/basktesting_results/"),
),
"freqaimodel": Arg(
'--freqaimodel',
help='Specify a custom freqaimodels.',

View File

@ -465,6 +465,15 @@ class Configuration:
self._args_to_config(config, argname='timerange',
logstring='Filter trades by timerange: {}')
self._args_to_config(config, argname='analysis_rejected',
logstring='Analyse rejected trades: {}')
self._args_to_config(config, argname='analysis_to_csv',
logstring='Store analysis tables to CSV: {}')
self._args_to_config(config, argname='analysis_csv_path',
logstring='Path to store analysis CSVs: {}')
def _process_runmode(self, config: Config) -> None:
self._args_to_config(config, argname='dry_run',

View File

@ -15,22 +15,30 @@ from freqtrade.exceptions import OperationalException
logger = logging.getLogger(__name__)
def _load_signal_candles(backtest_dir: Path):
def _load_backtest_analysis_data(backtest_dir: Path, name: str):
if backtest_dir.is_dir():
scpf = Path(backtest_dir,
Path(get_latest_backtest_filename(backtest_dir)).stem + "_signals.pkl"
Path(get_latest_backtest_filename(backtest_dir)).stem + "_" + name + ".pkl"
)
else:
scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_signals.pkl")
scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_{name}.pkl")
try:
scp = open(scpf, "rb")
signal_candles = joblib.load(scp)
logger.info(f"Loaded signal candles: {str(scpf)}")
rejected_trades = joblib.load(scp)
logger.info(f"Loaded {name} data: {str(scpf)}")
except Exception as e:
logger.error("Cannot load signal candles from pickled results: ", e)
logger.error(f"Cannot load {name} data from pickled results: ", e)
return signal_candles
return rejected_trades
def _load_rejected_trades(backtest_dir: Path):
return _load_backtest_analysis_data(backtest_dir, "rejected")
def _load_signal_candles(backtest_dir: Path):
return _load_backtest_analysis_data(backtest_dir, "signals")
def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_candles):
@ -43,9 +51,9 @@ def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_cand
for pair in pairlist:
if pair in signal_candles[strategy_name]:
analysed_trades_dict[strategy_name][pair] = _analyze_candles_and_indicators(
pair,
trades,
signal_candles[strategy_name][pair])
pair,
trades,
signal_candles[strategy_name][pair])
except Exception as e:
print(f"Cannot process entry/exit reasons for {strategy_name}: ", e)
@ -85,7 +93,7 @@ def _analyze_candles_and_indicators(pair, trades, signal_candles):
return pd.DataFrame()
def _do_group_table_output(bigdf, glist):
def _do_group_table_output(bigdf, glist, to_csv=False, csv_path=None):
for g in glist:
# 0: summary wins/losses grouped by enter tag
if g == "0":
@ -116,7 +124,8 @@ def _do_group_table_output(bigdf, glist):
sortcols = ['total_num_buys']
_print_table(new, sortcols, show_index=True)
_print_table(new, sortcols, show_index=True, name="Group 0:",
to_csv=to_csv, csv_path=csv_path)
else:
agg_mask = {'profit_abs': ['count', 'sum', 'median', 'mean'],
@ -148,11 +157,23 @@ def _do_group_table_output(bigdf, glist):
new['mean_profit_pct'] = new['mean_profit_pct'] * 100
new['total_profit_pct'] = new['total_profit_pct'] * 100
_print_table(new, sortcols)
_print_table(new, sortcols, name=f"Group {g}:",
to_csv=to_csv, csv_path=csv_path)
else:
logger.warning("Invalid group mask specified.")
def _do_rejected_trades_output(rejected_trades_df, to_csv=False, csv_path=None):
cols = ['pair', 'date', 'enter_tag']
sortcols = ['date', 'pair', 'enter_tag']
_print_table(rejected_trades_df[cols],
sortcols,
show_index=False,
name="Rejected Trades:",
to_csv=to_csv,
csv_path=csv_path)
def _select_rows_within_dates(df, timerange=None, df_date_col: str = 'date'):
if timerange:
if timerange.starttype == 'date':
@ -186,38 +207,65 @@ def prepare_results(analysed_trades, stratname,
return res_df
def print_results(res_df, analysis_groups, indicator_list):
def print_results(res_df, analysis_groups, indicator_list,
rejected_trades=None, to_csv=False, csv_path=None):
if res_df.shape[0] > 0:
if analysis_groups:
_do_group_table_output(res_df, analysis_groups)
_do_group_table_output(res_df, analysis_groups, to_csv=to_csv, csv_path=csv_path)
if rejected_trades is not None and not rejected_trades.empty:
_do_rejected_trades_output(rejected_trades, to_csv=to_csv, csv_path=csv_path)
# NB this can be large for big dataframes!
if "all" in indicator_list:
print(res_df)
elif indicator_list is not None:
_print_table(res_df,
show_index=False,
name="Indicators:",
to_csv=to_csv,
csv_path=csv_path)
elif indicator_list is not None and indicator_list:
available_inds = []
for ind in indicator_list:
if ind in res_df:
available_inds.append(ind)
ilist = ["pair", "enter_reason", "exit_reason"] + available_inds
_print_table(res_df[ilist], sortcols=['exit_reason'], show_index=False)
_print_table(res_df[ilist],
sortcols=['exit_reason'],
show_index=False,
name="Indicators:",
to_csv=to_csv,
csv_path=csv_path)
else:
print("\\No trades to show")
def _print_table(df, sortcols=None, show_index=False):
def _print_table(df, sortcols=None, show_index=False, name=None, to_csv=False, csv_path=None):
if (sortcols is not None):
data = df.sort_values(sortcols)
else:
data = df
print(
tabulate(
data,
headers='keys',
tablefmt='psql',
showindex=show_index
if to_csv:
if csv_path is not None:
safe_name = Path(csv_path,
name.lower().replace(" ", "_").replace(":", ""))
else:
safe_name = Path("user_data",
"backtest_results",
name.lower().replace(" ", "_").replace(":", ""))
data.to_csv(f"{str(safe_name)}.csv")
else:
if name is not None:
print(name)
print(
tabulate(
data,
headers='keys',
tablefmt='psql',
showindex=show_index
)
)
)
def process_entry_exit_reasons(config: Config):
@ -226,6 +274,9 @@ def process_entry_exit_reasons(config: Config):
enter_reason_list = config.get('enter_reason_list', ["all"])
exit_reason_list = config.get('exit_reason_list', ["all"])
indicator_list = config.get('indicator_list', [])
do_rejected = config.get('analysis_rejected', False)
to_csv = config.get('analysis_to_csv', False)
csv_path = config.get('analysis_csv_path', config['exportfilename'])
timerange = TimeRange.parse_timerange(None if config.get(
'timerange') is None else str(config.get('timerange')))
@ -235,8 +286,16 @@ def process_entry_exit_reasons(config: Config):
for strategy_name, results in backtest_stats['strategy'].items():
trades = load_backtest_data(config['exportfilename'], strategy_name)
if not trades.empty:
if trades is not None and not trades.empty:
signal_candles = _load_signal_candles(config['exportfilename'])
rej_df = None
if do_rejected:
rejected_trades_dict = _load_rejected_trades(config['exportfilename'])
rej_df = prepare_results(rejected_trades_dict, strategy_name,
enter_reason_list, exit_reason_list,
timerange=timerange)
analysed_trades_dict = _process_candles_and_indicators(
config['exchange']['pair_whitelist'], strategy_name,
trades, signal_candles)
@ -247,7 +306,10 @@ def process_entry_exit_reasons(config: Config):
print_results(res_df,
analysis_groups,
indicator_list)
indicator_list,
rejected_trades=rej_df,
to_csv=to_csv,
csv_path=csv_path)
except ValueError as e:
raise OperationalException(e) from e

View File

@ -29,6 +29,7 @@ from freqtrade.mixins import LoggingMixin
from freqtrade.optimize.backtest_caching import get_strategy_run_id
from freqtrade.optimize.bt_progress import BTProgress
from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results,
store_backtest_rejected_trades,
store_backtest_signal_candles,
store_backtest_stats)
from freqtrade.persistence import LocalTrade, Order, PairLocks, Trade
@ -83,6 +84,8 @@ class Backtesting:
self.strategylist: List[IStrategy] = []
self.all_results: Dict[str, Dict] = {}
self.processed_dfs: Dict[str, Dict] = {}
self.rejected_dict: Dict[str, List] = {}
self.rejected_df: Dict[str, Dict] = {}
self._exchange_name = self.config['exchange']['name']
self.exchange = ExchangeResolver.load_exchange(
@ -1048,6 +1051,18 @@ class Backtesting:
return None
return row
def _collate_rejected(self, pair, row):
"""
Temporarily store rejected trade information for downstream use in backtesting_analysis
"""
# It could be fun to enable hyperopt mode to write
# a loss function to reduce rejected signals
if (self.config.get('export', 'none') == 'signals' and
self.dataprovider.runmode == RunMode.BACKTEST):
if pair not in self.rejected_dict:
self.rejected_dict[pair] = []
self.rejected_dict[pair].append([row[DATE_IDX], row[ENTER_TAG_IDX]])
def backtest_loop(
self, row: Tuple, pair: str, current_time: datetime, end_date: datetime,
max_open_trades: int, open_trade_count_start: int, is_first: bool = True) -> int:
@ -1073,20 +1088,22 @@ class Backtesting:
if (
(self._position_stacking or len(LocalTrade.bt_trades_open_pp[pair]) == 0)
and is_first
and self.trade_slot_available(max_open_trades, open_trade_count_start)
and current_time != end_date
and trade_dir is not None
and not PairLocks.is_pair_locked(pair, row[DATE_IDX], trade_dir)
):
trade = self._enter_trade(pair, row, trade_dir)
if trade:
# TODO: hacky workaround to avoid opening > max_open_trades
# This emulates previous behavior - not sure if this is correct
# Prevents entering if the trade-slot was freed in this candle
open_trade_count_start += 1
# logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
LocalTrade.add_bt_trade(trade)
self.wallets.update()
if (self.trade_slot_available(max_open_trades, open_trade_count_start)):
trade = self._enter_trade(pair, row, trade_dir)
if trade:
# TODO: hacky workaround to avoid opening > max_open_trades
# This emulates previous behavior - not sure if this is correct
# Prevents entering if the trade-slot was freed in this candle
open_trade_count_start += 1
# logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
LocalTrade.add_bt_trade(trade)
self.wallets.update()
else:
self._collate_rejected(pair, row)
for trade in list(LocalTrade.bt_trades_open_pp[pair]):
# 3. Process entry orders.
@ -1266,6 +1283,7 @@ class Backtesting:
if (self.config.get('export', 'none') == 'signals' and
self.dataprovider.runmode == RunMode.BACKTEST):
self._generate_trade_signal_candles(preprocessed_tmp, results)
self._generate_rejected_trades(preprocessed_tmp, self.rejected_dict)
return min_date, max_date
@ -1282,12 +1300,33 @@ class Backtesting:
for t, v in pairresults.open_date.items():
allinds = pairdf.loc[(pairdf['date'] < v)]
signal_inds = allinds.iloc[[-1]]
signal_candles_only_df = pd.concat([signal_candles_only_df, signal_inds])
signal_candles_only_df = pd.concat([
signal_candles_only_df.infer_objects(),
signal_inds.infer_objects()])
signal_candles_only[pair] = signal_candles_only_df
self.processed_dfs[self.strategy.get_strategy_name()] = signal_candles_only
def _generate_rejected_trades(self, preprocessed_df, rejected_dict):
rejected_candles_only = {}
for pair, trades in rejected_dict.items():
rejected_trades_only_df = DataFrame()
pairdf = preprocessed_df[pair]
for t in trades:
data_df_row = pairdf.loc[(pairdf['date'] == t[0])].copy()
data_df_row['pair'] = pair
data_df_row['enter_tag'] = t[1]
rejected_trades_only_df = pd.concat([
rejected_trades_only_df.infer_objects(),
data_df_row.infer_objects()])
rejected_candles_only[pair] = rejected_trades_only_df
self.rejected_df[self.strategy.get_strategy_name()] = rejected_candles_only
def _get_min_cached_backtest_date(self):
min_backtest_date = None
backtest_cache_age = self.config.get('backtest_cache', constants.BACKTEST_CACHE_DEFAULT)
@ -1353,6 +1392,9 @@ class Backtesting:
store_backtest_signal_candles(
self.config['exportfilename'], self.processed_dfs, dt_appendix)
store_backtest_rejected_trades(
self.config['exportfilename'], self.rejected_df, dt_appendix)
# Results may be mixed up now. Sort them so they follow --strategy-list order.
if 'strategy_list' in self.config and len(self.results) > 0:
self.results['strategy_comparison'] = sorted(

View File

@ -45,29 +45,41 @@ def store_backtest_stats(
file_dump_json(latest_filename, {'latest_backtest': str(filename.name)})
def store_backtest_signal_candles(
recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
def _store_backtest_analysis_data(
recordfilename: Path, data: Dict[str, Dict],
dtappendix: str, name: str) -> Path:
"""
Stores backtest trade signal candles
Stores backtest trade candles for analysis
:param recordfilename: Path object, which can either be a filename or a directory.
Filenames will be appended with a timestamp right before the suffix
while for directories, <directory>/backtest-result-<datetime>_signals.pkl will be used
while for directories, <directory>/backtest-result-<datetime>_<name>.pkl will be used
as filename
:param stats: Dict containing the backtesting signal candles
:param candles: Dict containing the backtesting data for analysis
:param dtappendix: Datetime to use for the filename
:param name: Name to use for the file, e.g. signals, rejected
"""
if recordfilename.is_dir():
filename = (recordfilename / f'backtest-result-{dtappendix}_signals.pkl')
filename = (recordfilename / f'backtest-result-{dtappendix}_{name}.pkl')
else:
filename = Path.joinpath(
recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_signals.pkl'
recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_{name}.pkl'
)
file_dump_joblib(filename, candles)
file_dump_joblib(filename, data)
return filename
def store_backtest_signal_candles(
recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
return _store_backtest_analysis_data(recordfilename, candles, dtappendix, "signals")
def store_backtest_rejected_trades(
recordfilename: Path, trades: Dict[str, Dict], dtappendix: str) -> Path:
return _store_backtest_analysis_data(recordfilename, trades, dtappendix, "rejected")
def _get_line_floatfmt(stake_currency: str) -> List[str]:
"""
Generate floatformat (goes in line with _generate_result_line())

View File

@ -191,8 +191,18 @@ def test_backtest_analysis_nomock(default_conf, mocker, caplog, testdatadir, tmp
assert '2.5' in captured.out
# test date filtering
args = get_args(base_args + ['--timerange', "20180129-20180130"])
args = get_args(base_args +
['--analysis-groups', "0", "1", "2",
'--timerange', "20180129-20180130"]
)
start_analysis_entries_exits(args)
captured = capsys.readouterr()
assert 'enter_tag_long_a' in captured.out
assert 'enter_tag_long_b' not in captured.out
# test rejected - how to mock this?
# args = get_args(base_args + ['--rejected'])
# start_analysis_entries_exits(args)
# captured = capsys.readouterr()
# assert 'Rejected Trades:' in captured.out
# assert False