Merge pull request #3845 from freqtrade/feat/backtest_speedup_serialize

Backtesting should not double-loop for sell signals
2020-10-19 07:52:33 +02:00
parent 340f25bd42 cf2ae788d7
commit 667f1b8b8c
2 changed files with 135 additions and 124 deletions
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -4,11 +4,11 @@
 This module contains the backtesting logic
 """
 import logging
 from collections import defaultdict
 from copy import deepcopy
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, NamedTuple, Optional, Tuple
 import arrow
 from pandas import DataFrame
 from freqtrade.configuration import TimeRange, remove_credentials, validate_config_consistency
@@ -28,6 +28,15 @@ from freqtrade.strategy.interface import IStrategy, SellCheckTuple, SellType
 logger = logging.getLogger(__name__)
 # Indexes for backtest tuples
 DATE_IDX = 0
 BUY_IDX = 1
 OPEN_IDX = 2
 CLOSE_IDX = 3
 SELL_IDX = 4
 LOW_IDX = 5
 HIGH_IDX = 6
 class BacktestResult(NamedTuple):
    """
@@ -115,7 +124,7 @@ class Backtesting:
        """
        Load strategy into backtesting
        """
-        self.strategy = strategy
+        self.strategy: IStrategy = strategy
        # Set stoploss_on_exchange to false for backtesting,
        # since a "perfect" stoploss-sell is assumed anyway
        # And the regular "stoploss" function would not apply to that case
@@ -147,12 +156,14 @@ class Backtesting:
        return data, timerange
-    def _get_ohlcv_as_lists(self, processed: Dict) -> Dict[str, DataFrame]:
+    def _get_ohlcv_as_lists(self, processed: Dict[str, DataFrame]) -> Dict[str, Tuple]:
        """
        Helper function to convert a processed dataframes into lists for performance reasons.
        Used by backtest() - so keep this optimized for performance.
        """
        # Every change to this headers list must evaluate further usages of the resulting tuple
        # and eventually change the constants for indexes at the top
        headers = ['date', 'buy', 'open', 'close', 'sell', 'low', 'high']
        data: Dict = {}
        # Create dict with data
@@ -172,10 +183,10 @@ class Backtesting:
            # Convert from Pandas to list for performance reasons
            # (Looping Pandas is slow.)
-            data[pair] = [x for x in df_analyzed.itertuples()]
+            data[pair] = [x for x in df_analyzed.itertuples(index=False, name=None)]
        return data
-    def _get_close_rate(self, sell_row, trade: Trade, sell: SellCheckTuple,
+    def _get_close_rate(self, sell_row: Tuple, trade: Trade, sell: SellCheckTuple,
                        trade_dur: int) -> float:
        """
        Get close rate for backtesting result
@@ -186,12 +197,12 @@ class Backtesting:
            return trade.stop_loss
        elif sell.sell_type == (SellType.ROI):
            roi_entry, roi = self.strategy.min_roi_reached_entry(trade_dur)
-            if roi is not None:
+            if roi is not None and roi_entry is not None:
                if roi == -1 and roi_entry % self.timeframe_min == 0:
                    # When forceselling with ROI=-1, the roi time will always be equal to trade_dur.
                    # If that entry is a multiple of the timeframe (so on candle open)
                    # - we'll use open instead of close
-                    return sell_row.open
+                    return sell_row[OPEN_IDX]
                # - (Expected abs profit + open_rate + open_fee) / (fee_close -1)
                close_rate = - (trade.open_rate * roi + trade.open_rate *
@@ -199,91 +210,79 @@ class Backtesting:
                if (trade_dur > 0 and trade_dur == roi_entry
                        and roi_entry % self.timeframe_min == 0
-                        and sell_row.open > close_rate):
+                        and sell_row[OPEN_IDX] > close_rate):
                    # new ROI entry came into effect.
                    # use Open rate if open_rate > calculated sell rate
-                    return sell_row.open
+                    return sell_row[OPEN_IDX]
                # Use the maximum between close_rate and low as we
                # cannot sell outside of a candle.
                # Applies when a new ROI setting comes in place and the whole candle is above that.
-                return max(close_rate, sell_row.low)
+                return max(close_rate, sell_row[LOW_IDX])
            else:
                # This should not be reached...
-                return sell_row.open
+                return sell_row[OPEN_IDX]
        else:
-            return sell_row.open
+            return sell_row[OPEN_IDX]
-    def _get_sell_trade_entry(
+    def _get_sell_trade_entry(self, trade: Trade, sell_row: Tuple) -> Optional[BacktestResult]:
            self, pair: str, buy_row: DataFrame,
            partial_ohlcv: List, trade_count_lock: Dict,
            stake_amount: float, max_open_trades: int) -> Optional[BacktestResult]:
-        trade = Trade(
+        sell = self.strategy.should_sell(trade, sell_row[OPEN_IDX], sell_row[DATE_IDX],
-            pair=pair,
+                                         sell_row[BUY_IDX], sell_row[SELL_IDX],
-            open_rate=buy_row.open,
+                                         low=sell_row[LOW_IDX], high=sell_row[HIGH_IDX])
-            open_date=buy_row.date,
+        if sell.sell_flag:
-            stake_amount=stake_amount,
+            trade_dur = int((sell_row[DATE_IDX] - trade.open_date).total_seconds() // 60)
-            amount=round(stake_amount / buy_row.open, 8),
+            closerate = self._get_close_rate(sell_row, trade, sell, trade_dur)
            fee_open=self.fee,
            fee_close=self.fee,
            is_open=True,
        )
        logger.debug(f"{pair} - Backtesting emulates creation of new trade: {trade}.")
        # calculate win/lose forwards from buy point
        for sell_row in partial_ohlcv:
            if max_open_trades > 0:
                # Increase trade_count_lock for every iteration
                trade_count_lock[sell_row.date] = trade_count_lock.get(sell_row.date, 0) + 1
-            sell = self.strategy.should_sell(trade, sell_row.open, sell_row.date, sell_row.buy,
+            return BacktestResult(pair=trade.pair,
-                                             sell_row.sell, low=sell_row.low, high=sell_row.high)
+                                  profit_percent=trade.calc_profit_ratio(rate=closerate),
-            if sell.sell_flag:
+                                  profit_abs=trade.calc_profit(rate=closerate),
-                trade_dur = int((sell_row.date - buy_row.date).total_seconds() // 60)
+                                  open_date=trade.open_date,
-                closerate = self._get_close_rate(sell_row, trade, sell, trade_dur)
+                                  open_rate=trade.open_rate,
-
+                                  open_fee=self.fee,
-                return BacktestResult(pair=pair,
+                                  close_date=sell_row[DATE_IDX],
-                                      profit_percent=trade.calc_profit_ratio(rate=closerate),
+                                  close_rate=closerate,
-                                      profit_abs=trade.calc_profit(rate=closerate),
+                                  close_fee=self.fee,
-                                      open_date=buy_row.date,
+                                  amount=trade.amount,
-                                      open_rate=buy_row.open,
+                                  trade_duration=trade_dur,
-                                      open_fee=self.fee,
+                                  open_at_end=False,
-                                      close_date=sell_row.date,
+                                  sell_reason=sell.sell_type
-                                      close_rate=closerate,
+                                  )
                                      close_fee=self.fee,
                                      amount=trade.amount,
                                      trade_duration=trade_dur,
                                      open_at_end=False,
                                      sell_reason=sell.sell_type
                                      )
        if partial_ohlcv:
            # no sell condition found - trade stil open at end of backtest period
            sell_row = partial_ohlcv[-1]
            bt_res = BacktestResult(pair=pair,
                                    profit_percent=trade.calc_profit_ratio(rate=sell_row.open),
                                    profit_abs=trade.calc_profit(rate=sell_row.open),
                                    open_date=buy_row.date,
                                    open_rate=buy_row.open,
                                    open_fee=self.fee,
                                    close_date=sell_row.date,
                                    close_rate=sell_row.open,
                                    close_fee=self.fee,
                                    amount=trade.amount,
                                    trade_duration=int((
                                        sell_row.date - buy_row.date).total_seconds() // 60),
                                    open_at_end=True,
                                    sell_reason=SellType.FORCE_SELL
                                    )
            logger.debug(f"{pair} - Force selling still open trade, "
                         f"profit percent: {bt_res.profit_percent}, "
                         f"profit abs: {bt_res.profit_abs}")
            return bt_res
        return None
    def handle_left_open(self, open_trades: Dict[str, List[Trade]],
                         data: Dict[str, List[Tuple]]) -> List[BacktestResult]:
        """
        Handling of left open trades at the end of backtesting
        """
        trades = []
        for pair in open_trades.keys():
            if len(open_trades[pair]) > 0:
                for trade in open_trades[pair]:
                    sell_row = data[pair][-1]
                    trade_entry = BacktestResult(pair=trade.pair,
                                                 profit_percent=trade.calc_profit_ratio(
                                                     rate=sell_row[OPEN_IDX]),
                                                 profit_abs=trade.calc_profit(sell_row[OPEN_IDX]),
                                                 open_date=trade.open_date,
                                                 open_rate=trade.open_rate,
                                                 open_fee=self.fee,
                                                 close_date=sell_row[DATE_IDX],
                                                 close_rate=sell_row[OPEN_IDX],
                                                 close_fee=self.fee,
                                                 amount=trade.amount,
                                                 trade_duration=int((
                                                     sell_row[DATE_IDX] - trade.open_date
                                                 ).total_seconds() // 60),
                                                 open_at_end=True,
                                                 sell_reason=SellType.FORCE_SELL
                                                 )
                    trades.append(trade_entry)
        return trades
    def backtest(self, processed: Dict, stake_amount: float,
-                 start_date: arrow.Arrow, end_date: arrow.Arrow,
+                 start_date: datetime, end_date: datetime,
                 max_open_trades: int = 0, position_stacking: bool = False) -> DataFrame:
        """
        Implement backtesting functionality
@@ -305,19 +304,21 @@ class Backtesting:
                     f"max_open_trades: {max_open_trades}, position_stacking: {position_stacking}"
                     )
        trades = []
        trade_count_lock: Dict = {}
        # Use dict of lists with data for performance
        # (looping lists is a lot faster than pandas DataFrames)
        data: Dict = self._get_ohlcv_as_lists(processed)
        lock_pair_until: Dict = {}
        # Indexes per pair, so some pairs are allowed to have a missing start.
        indexes: Dict = {}
        tmp = start_date + timedelta(minutes=self.timeframe_min)
        open_trades: Dict[str, List] = defaultdict(list)
        open_trade_count = 0
        # Loop timerange and get candle for each pair at that point in time
-        while tmp < end_date:
+        while tmp <= end_date:
            open_trade_count_start = open_trade_count
            for i, pair in enumerate(data):
                if pair not in indexes:
@@ -331,42 +332,52 @@ class Backtesting:
                    continue
                # Waits until the time-counter reaches the start of the data for this pair.
-                if row.date > tmp.datetime:
+                if row[DATE_IDX] > tmp:
                    continue
                indexes[pair] += 1
-                if row.buy == 0 or row.sell == 1:
+                # without positionstacking, we can only have one open trade per pair.
-                    continue  # skip rows where no buy signal or that would immediately sell off
+                # max_open_trades must be respected
                # don't open on the last row
                if ((position_stacking or len(open_trades[pair]) == 0)
                        and max_open_trades > 0 and open_trade_count_start < max_open_trades
                        and tmp != end_date
                        and row[BUY_IDX] == 1 and row[SELL_IDX] != 1):
                    # Enter trade
                    trade = Trade(
                        pair=pair,
                        open_rate=row[OPEN_IDX],
                        open_date=row[DATE_IDX],
                        stake_amount=stake_amount,
                        amount=round(stake_amount / row[OPEN_IDX], 8),
                        fee_open=self.fee,
                        fee_close=self.fee,
                        is_open=True,
                    )
                    # TODO: hacky workaround to avoid opening > max_open_trades
                    # This emulates previous behaviour - not sure if this is correct
                    # Prevents buying if the trade-slot was freed in this candle
                    open_trade_count_start += 1
                    open_trade_count += 1
                    # logger.debug(f"{pair} - Backtesting emulates creation of new trade: {trade}.")
                    open_trades[pair].append(trade)
-                if (not position_stacking and pair in lock_pair_until
+                for trade in open_trades[pair]:
-                        and row.date <= lock_pair_until[pair]):
+                    # since indexes has been incremented before, we need to go one step back to
-                    # without positionstacking, we can only have one open trade per pair.
+                    # also check the buying candle for sell conditions.
-                    continue
+                    trade_entry = self._get_sell_trade_entry(trade, row)
-
+                    # Sell occured
-                if max_open_trades > 0:
+                    if trade_entry:
-                    # Check if max_open_trades has already been reached for the given date
+                        # logger.debug(f"{pair} - Backtesting sell {trade}")
-                    if not trade_count_lock.get(row.date, 0) < max_open_trades:
+                        open_trade_count -= 1
-                        continue
+                        open_trades[pair].remove(trade)
-                    trade_count_lock[row.date] = trade_count_lock.get(row.date, 0) + 1
+                        trades.append(trade_entry)
                # since indexes has been incremented before, we need to go one step back to
                # also check the buying candle for sell conditions.
                trade_entry = self._get_sell_trade_entry(pair, row, data[pair][indexes[pair]-1:],
                                                         trade_count_lock, stake_amount,
                                                         max_open_trades)
                if trade_entry:
                    logger.debug(f"{pair} - Locking pair till "
                                 f"close_date={trade_entry.close_date}")
                    lock_pair_until[pair] = trade_entry.close_date
                    trades.append(trade_entry)
                else:
                    # Set lock_pair_until to end of testing period if trade could not be closed
                    lock_pair_until[pair] = end_date.datetime
            # Move time one configured time_interval ahead.
            tmp += timedelta(minutes=self.timeframe_min)
        trades += self.handle_left_open(open_trades, data=data)
        return DataFrame.from_records(trades, columns=BacktestResult._fields)
    def start(self) -> None:
@@ -412,8 +423,8 @@ class Backtesting:
            results = self.backtest(
                processed=preprocessed,
                stake_amount=self.config['stake_amount'],
-                start_date=min_date,
+                start_date=min_date.datetime,
-                end_date=max_date,
+                end_date=max_date.datetime,
                max_open_trades=max_open_trades,
                position_stacking=position_stacking,
            )
--- a/freqtrade/optimize/hyperopt.py
+++ b/freqtrade/optimize/hyperopt.py
@@ -94,14 +94,14 @@ class Hyperopt:
        # Populate functions here (hasattr is slow so should not be run during "regular" operations)
        if hasattr(self.custom_hyperopt, 'populate_indicators'):
-            self.backtesting.strategy.advise_indicators = \
+            self.backtesting.strategy.advise_indicators = (  # type: ignore
-                self.custom_hyperopt.populate_indicators  # type: ignore
+                self.custom_hyperopt.populate_indicators)  # type: ignore
        if hasattr(self.custom_hyperopt, 'populate_buy_trend'):
-            self.backtesting.strategy.advise_buy = \
+            self.backtesting.strategy.advise_buy = (  # type: ignore
-                self.custom_hyperopt.populate_buy_trend  # type: ignore
+                self.custom_hyperopt.populate_buy_trend)  # type: ignore
        if hasattr(self.custom_hyperopt, 'populate_sell_trend'):
-            self.backtesting.strategy.advise_sell = \
+            self.backtesting.strategy.advise_sell = (  # type: ignore
-                self.custom_hyperopt.populate_sell_trend  # type: ignore
+                self.custom_hyperopt.populate_sell_trend)  # type: ignore
        # Use max_open_trades for hyperopt as well, except --disable-max-market-positions is set
        if self.config.get('use_max_market_positions', True):
@@ -508,16 +508,16 @@ class Hyperopt:
        params_details = self._get_params_details(params_dict)
        if self.has_space('roi'):
-            self.backtesting.strategy.minimal_roi = \
+            self.backtesting.strategy.minimal_roi = (  # type: ignore
-                self.custom_hyperopt.generate_roi_table(params_dict)
+                self.custom_hyperopt.generate_roi_table(params_dict))
        if self.has_space('buy'):
-            self.backtesting.strategy.advise_buy = \
+            self.backtesting.strategy.advise_buy = (  # type: ignore
-                self.custom_hyperopt.buy_strategy_generator(params_dict)
+                self.custom_hyperopt.buy_strategy_generator(params_dict))
        if self.has_space('sell'):
-            self.backtesting.strategy.advise_sell = \
+            self.backtesting.strategy.advise_sell = (  # type: ignore
-                self.custom_hyperopt.sell_strategy_generator(params_dict)
+                self.custom_hyperopt.sell_strategy_generator(params_dict))
        if self.has_space('stoploss'):
            self.backtesting.strategy.stoploss = params_dict['stoploss']
@@ -538,8 +538,8 @@ class Hyperopt:
        backtesting_results = self.backtesting.backtest(
            processed=processed,
            stake_amount=self.config['stake_amount'],
-            start_date=min_date,
+            start_date=min_date.datetime,
-            end_date=max_date,
+            end_date=max_date.datetime,
            max_open_trades=self.max_open_trades,
            position_stacking=self.position_stacking,
        )