Add support for collating and analysing rejected trades in backtest

This commit is contained in:
froggleston
2022-12-05 15:34:31 +00:00
parent f28b314266
commit 5a4e99b413
9 changed files with 254 additions and 57 deletions

View File

@@ -106,7 +106,8 @@ ARGS_HYPEROPT_SHOW = ["hyperopt_list_best", "hyperopt_list_profitable", "hyperop
"disableparamexport", "backtest_breakdown"]
ARGS_ANALYZE_ENTRIES_EXITS = ["exportfilename", "analysis_groups", "enter_reason_list",
"exit_reason_list", "indicator_list", "timerange"]
"exit_reason_list", "indicator_list", "timerange",
"analysis_rejected", "analysis_to_csv", "analysis_csv_path"]
NO_CONF_REQURIED = ["convert-data", "convert-trade-data", "download-data", "list-timeframes",
"list-markets", "list-pairs", "list-strategies", "list-freqaimodels",

View File

@@ -634,7 +634,7 @@ AVAILABLE_CLI_OPTIONS = {
"3: by pair and enter_tag, "
"4: by pair, enter_ and exit_tag (this can get quite large)"),
nargs='+',
default=['0', '1', '2'],
default=[],
choices=['0', '1', '2', '3', '4'],
),
"enter_reason_list": Arg(
@@ -658,6 +658,21 @@ AVAILABLE_CLI_OPTIONS = {
nargs='+',
default=[],
),
"analysis_rejected": Arg(
'--rejected',
help='Analyse rejected trades',
action='store_true',
),
"analysis_to_csv": Arg(
'--analysis-to-csv',
help='Save selected analysis tables to individual CSVs',
action='store_true',
),
"analysis_csv_path": Arg(
'--analysis-csv-path',
help=("Specify a path to save the analysis CSVs "
"if --analysis-to-csv is enabled. Default: user_data/basktesting_results/"),
),
"freqaimodel": Arg(
'--freqaimodel',
help='Specify a custom freqaimodels.',

View File

@@ -465,6 +465,15 @@ class Configuration:
self._args_to_config(config, argname='timerange',
logstring='Filter trades by timerange: {}')
self._args_to_config(config, argname='analysis_rejected',
logstring='Analyse rejected trades: {}')
self._args_to_config(config, argname='analysis_to_csv',
logstring='Store analysis tables to CSV: {}')
self._args_to_config(config, argname='analysis_csv_path',
logstring='Path to store analysis CSVs: {}')
def _process_runmode(self, config: Config) -> None:
self._args_to_config(config, argname='dry_run',

View File

@@ -15,22 +15,30 @@ from freqtrade.exceptions import OperationalException
logger = logging.getLogger(__name__)
def _load_signal_candles(backtest_dir: Path):
def _load_backtest_analysis_data(backtest_dir: Path, name: str):
if backtest_dir.is_dir():
scpf = Path(backtest_dir,
Path(get_latest_backtest_filename(backtest_dir)).stem + "_signals.pkl"
Path(get_latest_backtest_filename(backtest_dir)).stem + "_" + name + ".pkl"
)
else:
scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_signals.pkl")
scpf = Path(backtest_dir.parent / f"{backtest_dir.stem}_{name}.pkl")
try:
scp = open(scpf, "rb")
signal_candles = joblib.load(scp)
logger.info(f"Loaded signal candles: {str(scpf)}")
rejected_trades = joblib.load(scp)
logger.info(f"Loaded {name} data: {str(scpf)}")
except Exception as e:
logger.error("Cannot load signal candles from pickled results: ", e)
logger.error(f"Cannot load {name} data from pickled results: ", e)
return signal_candles
return rejected_trades
def _load_rejected_trades(backtest_dir: Path):
return _load_backtest_analysis_data(backtest_dir, "rejected")
def _load_signal_candles(backtest_dir: Path):
return _load_backtest_analysis_data(backtest_dir, "signals")
def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_candles):
@@ -43,9 +51,9 @@ def _process_candles_and_indicators(pairlist, strategy_name, trades, signal_cand
for pair in pairlist:
if pair in signal_candles[strategy_name]:
analysed_trades_dict[strategy_name][pair] = _analyze_candles_and_indicators(
pair,
trades,
signal_candles[strategy_name][pair])
pair,
trades,
signal_candles[strategy_name][pair])
except Exception as e:
print(f"Cannot process entry/exit reasons for {strategy_name}: ", e)
@@ -85,7 +93,7 @@ def _analyze_candles_and_indicators(pair, trades, signal_candles):
return pd.DataFrame()
def _do_group_table_output(bigdf, glist):
def _do_group_table_output(bigdf, glist, to_csv=False, csv_path=None):
for g in glist:
# 0: summary wins/losses grouped by enter tag
if g == "0":
@@ -116,7 +124,8 @@ def _do_group_table_output(bigdf, glist):
sortcols = ['total_num_buys']
_print_table(new, sortcols, show_index=True)
_print_table(new, sortcols, show_index=True, name="Group 0:",
to_csv=to_csv, csv_path=csv_path)
else:
agg_mask = {'profit_abs': ['count', 'sum', 'median', 'mean'],
@@ -148,11 +157,23 @@ def _do_group_table_output(bigdf, glist):
new['mean_profit_pct'] = new['mean_profit_pct'] * 100
new['total_profit_pct'] = new['total_profit_pct'] * 100
_print_table(new, sortcols)
_print_table(new, sortcols, name=f"Group {g}:",
to_csv=to_csv, csv_path=csv_path)
else:
logger.warning("Invalid group mask specified.")
def _do_rejected_trades_output(rejected_trades_df, to_csv=False, csv_path=None):
cols = ['pair', 'date', 'enter_tag']
sortcols = ['date', 'pair', 'enter_tag']
_print_table(rejected_trades_df[cols],
sortcols,
show_index=False,
name="Rejected Trades:",
to_csv=to_csv,
csv_path=csv_path)
def _select_rows_within_dates(df, timerange=None, df_date_col: str = 'date'):
if timerange:
if timerange.starttype == 'date':
@@ -186,38 +207,65 @@ def prepare_results(analysed_trades, stratname,
return res_df
def print_results(res_df, analysis_groups, indicator_list):
def print_results(res_df, analysis_groups, indicator_list,
rejected_trades=None, to_csv=False, csv_path=None):
if res_df.shape[0] > 0:
if analysis_groups:
_do_group_table_output(res_df, analysis_groups)
_do_group_table_output(res_df, analysis_groups, to_csv=to_csv, csv_path=csv_path)
if rejected_trades is not None and not rejected_trades.empty:
_do_rejected_trades_output(rejected_trades, to_csv=to_csv, csv_path=csv_path)
# NB this can be large for big dataframes!
if "all" in indicator_list:
print(res_df)
elif indicator_list is not None:
_print_table(res_df,
show_index=False,
name="Indicators:",
to_csv=to_csv,
csv_path=csv_path)
elif indicator_list is not None and indicator_list:
available_inds = []
for ind in indicator_list:
if ind in res_df:
available_inds.append(ind)
ilist = ["pair", "enter_reason", "exit_reason"] + available_inds
_print_table(res_df[ilist], sortcols=['exit_reason'], show_index=False)
_print_table(res_df[ilist],
sortcols=['exit_reason'],
show_index=False,
name="Indicators:",
to_csv=to_csv,
csv_path=csv_path)
else:
print("\\No trades to show")
def _print_table(df, sortcols=None, show_index=False):
def _print_table(df, sortcols=None, show_index=False, name=None, to_csv=False, csv_path=None):
if (sortcols is not None):
data = df.sort_values(sortcols)
else:
data = df
print(
tabulate(
data,
headers='keys',
tablefmt='psql',
showindex=show_index
if to_csv:
if csv_path is not None:
safe_name = Path(csv_path,
name.lower().replace(" ", "_").replace(":", ""))
else:
safe_name = Path("user_data",
"backtest_results",
name.lower().replace(" ", "_").replace(":", ""))
data.to_csv(f"{str(safe_name)}.csv")
else:
if name is not None:
print(name)
print(
tabulate(
data,
headers='keys',
tablefmt='psql',
showindex=show_index
)
)
)
def process_entry_exit_reasons(config: Config):
@@ -226,6 +274,9 @@ def process_entry_exit_reasons(config: Config):
enter_reason_list = config.get('enter_reason_list', ["all"])
exit_reason_list = config.get('exit_reason_list', ["all"])
indicator_list = config.get('indicator_list', [])
do_rejected = config.get('analysis_rejected', False)
to_csv = config.get('analysis_to_csv', False)
csv_path = config.get('analysis_csv_path', config['exportfilename'])
timerange = TimeRange.parse_timerange(None if config.get(
'timerange') is None else str(config.get('timerange')))
@@ -235,8 +286,16 @@ def process_entry_exit_reasons(config: Config):
for strategy_name, results in backtest_stats['strategy'].items():
trades = load_backtest_data(config['exportfilename'], strategy_name)
if not trades.empty:
if trades is not None and not trades.empty:
signal_candles = _load_signal_candles(config['exportfilename'])
rej_df = None
if do_rejected:
rejected_trades_dict = _load_rejected_trades(config['exportfilename'])
rej_df = prepare_results(rejected_trades_dict, strategy_name,
enter_reason_list, exit_reason_list,
timerange=timerange)
analysed_trades_dict = _process_candles_and_indicators(
config['exchange']['pair_whitelist'], strategy_name,
trades, signal_candles)
@@ -247,7 +306,10 @@ def process_entry_exit_reasons(config: Config):
print_results(res_df,
analysis_groups,
indicator_list)
indicator_list,
rejected_trades=rej_df,
to_csv=to_csv,
csv_path=csv_path)
except ValueError as e:
raise OperationalException(e) from e

View File

@@ -29,6 +29,7 @@ from freqtrade.mixins import LoggingMixin
from freqtrade.optimize.backtest_caching import get_strategy_run_id
from freqtrade.optimize.bt_progress import BTProgress
from freqtrade.optimize.optimize_reports import (generate_backtest_stats, show_backtest_results,
store_backtest_rejected_trades,
store_backtest_signal_candles,
store_backtest_stats)
from freqtrade.persistence import LocalTrade, Order, PairLocks, Trade
@@ -83,6 +84,8 @@ class Backtesting:
self.strategylist: List[IStrategy] = []
self.all_results: Dict[str, Dict] = {}
self.processed_dfs: Dict[str, Dict] = {}
self.rejected_dict: Dict[str, List] = {}
self.rejected_df: Dict[str, Dict] = {}
self._exchange_name = self.config['exchange']['name']
self.exchange = ExchangeResolver.load_exchange(
@@ -1048,6 +1051,18 @@ class Backtesting:
return None
return row
def _collate_rejected(self, pair, row):
"""
Temporarily store rejected trade information for downstream use in backtesting_analysis
"""
# It could be fun to enable hyperopt mode to write
# a loss function to reduce rejected signals
if (self.config.get('export', 'none') == 'signals' and
self.dataprovider.runmode == RunMode.BACKTEST):
if pair not in self.rejected_dict:
self.rejected_dict[pair] = []
self.rejected_dict[pair].append([row[DATE_IDX], row[ENTER_TAG_IDX]])
def backtest_loop(
self, row: Tuple, pair: str, current_time: datetime, end_date: datetime,
max_open_trades: int, open_trade_count_start: int, is_first: bool = True) -> int:
@@ -1073,20 +1088,22 @@ class Backtesting:
if (
(self._position_stacking or len(LocalTrade.bt_trades_open_pp[pair]) == 0)
and is_first
and self.trade_slot_available(max_open_trades, open_trade_count_start)
and current_time != end_date
and trade_dir is not None
and not PairLocks.is_pair_locked(pair, row[DATE_IDX], trade_dir)
):
trade = self._enter_trade(pair, row, trade_dir)
if trade:
# TODO: hacky workaround to avoid opening > max_open_trades
# This emulates previous behavior - not sure if this is correct
# Prevents entering if the trade-slot was freed in this candle
open_trade_count_start += 1
# logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
LocalTrade.add_bt_trade(trade)
self.wallets.update()
if (self.trade_slot_available(max_open_trades, open_trade_count_start)):
trade = self._enter_trade(pair, row, trade_dir)
if trade:
# TODO: hacky workaround to avoid opening > max_open_trades
# This emulates previous behavior - not sure if this is correct
# Prevents entering if the trade-slot was freed in this candle
open_trade_count_start += 1
# logger.debug(f"{pair} - Emulate creation of new trade: {trade}.")
LocalTrade.add_bt_trade(trade)
self.wallets.update()
else:
self._collate_rejected(pair, row)
for trade in list(LocalTrade.bt_trades_open_pp[pair]):
# 3. Process entry orders.
@@ -1266,6 +1283,7 @@ class Backtesting:
if (self.config.get('export', 'none') == 'signals' and
self.dataprovider.runmode == RunMode.BACKTEST):
self._generate_trade_signal_candles(preprocessed_tmp, results)
self._generate_rejected_trades(preprocessed_tmp, self.rejected_dict)
return min_date, max_date
@@ -1282,12 +1300,33 @@ class Backtesting:
for t, v in pairresults.open_date.items():
allinds = pairdf.loc[(pairdf['date'] < v)]
signal_inds = allinds.iloc[[-1]]
signal_candles_only_df = pd.concat([signal_candles_only_df, signal_inds])
signal_candles_only_df = pd.concat([
signal_candles_only_df.infer_objects(),
signal_inds.infer_objects()])
signal_candles_only[pair] = signal_candles_only_df
self.processed_dfs[self.strategy.get_strategy_name()] = signal_candles_only
def _generate_rejected_trades(self, preprocessed_df, rejected_dict):
rejected_candles_only = {}
for pair, trades in rejected_dict.items():
rejected_trades_only_df = DataFrame()
pairdf = preprocessed_df[pair]
for t in trades:
data_df_row = pairdf.loc[(pairdf['date'] == t[0])].copy()
data_df_row['pair'] = pair
data_df_row['enter_tag'] = t[1]
rejected_trades_only_df = pd.concat([
rejected_trades_only_df.infer_objects(),
data_df_row.infer_objects()])
rejected_candles_only[pair] = rejected_trades_only_df
self.rejected_df[self.strategy.get_strategy_name()] = rejected_candles_only
def _get_min_cached_backtest_date(self):
min_backtest_date = None
backtest_cache_age = self.config.get('backtest_cache', constants.BACKTEST_CACHE_DEFAULT)
@@ -1353,6 +1392,9 @@ class Backtesting:
store_backtest_signal_candles(
self.config['exportfilename'], self.processed_dfs, dt_appendix)
store_backtest_rejected_trades(
self.config['exportfilename'], self.rejected_df, dt_appendix)
# Results may be mixed up now. Sort them so they follow --strategy-list order.
if 'strategy_list' in self.config and len(self.results) > 0:
self.results['strategy_comparison'] = sorted(

View File

@@ -45,29 +45,41 @@ def store_backtest_stats(
file_dump_json(latest_filename, {'latest_backtest': str(filename.name)})
def store_backtest_signal_candles(
recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
def _store_backtest_analysis_data(
recordfilename: Path, data: Dict[str, Dict],
dtappendix: str, name: str) -> Path:
"""
Stores backtest trade signal candles
Stores backtest trade candles for analysis
:param recordfilename: Path object, which can either be a filename or a directory.
Filenames will be appended with a timestamp right before the suffix
while for directories, <directory>/backtest-result-<datetime>_signals.pkl will be used
while for directories, <directory>/backtest-result-<datetime>_<name>.pkl will be used
as filename
:param stats: Dict containing the backtesting signal candles
:param candles: Dict containing the backtesting data for analysis
:param dtappendix: Datetime to use for the filename
:param name: Name to use for the file, e.g. signals, rejected
"""
if recordfilename.is_dir():
filename = (recordfilename / f'backtest-result-{dtappendix}_signals.pkl')
filename = (recordfilename / f'backtest-result-{dtappendix}_{name}.pkl')
else:
filename = Path.joinpath(
recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_signals.pkl'
recordfilename.parent, f'{recordfilename.stem}-{dtappendix}_{name}.pkl'
)
file_dump_joblib(filename, candles)
file_dump_joblib(filename, data)
return filename
def store_backtest_signal_candles(
recordfilename: Path, candles: Dict[str, Dict], dtappendix: str) -> Path:
return _store_backtest_analysis_data(recordfilename, candles, dtappendix, "signals")
def store_backtest_rejected_trades(
recordfilename: Path, trades: Dict[str, Dict], dtappendix: str) -> Path:
return _store_backtest_analysis_data(recordfilename, trades, dtappendix, "rejected")
def _get_line_floatfmt(stake_currency: str) -> List[str]:
"""
Generate floatformat (goes in line with _generate_result_line())