Merge pull request #3845 from freqtrade/feat/backtest_speedup_serialize

Backtesting should not double-loop for sell signals
2020-10-19 07:52:33 +02:00
parent 340f25bd42 cf2ae788d7
commit 667f1b8b8c
2 changed files with 135 additions and 124 deletions
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -4,11 +4,11 @@
 This module contains the backtesting logic
 """
 import logging
+from collections import defaultdict
 from copy import deepcopy
 from datetime import datetime, timedelta
 from typing import Any, Dict, List, NamedTuple, Optional, Tuple

-import arrow
 from pandas import DataFrame

 from freqtrade.configuration import TimeRange, remove_credentials, validate_config_consistency
@@ -28,6 +28,15 @@ from freqtrade.strategy.interface import IStrategy, SellCheckTuple, SellType

 logger = logging.getLogger(__name__)

+# Indexes for backtest tuples
+DATE_IDX = 0
+BUY_IDX = 1
+OPEN_IDX = 2
+CLOSE_IDX = 3
+SELL_IDX = 4
+LOW_IDX = 5
+HIGH_IDX = 6
+

 class BacktestResult(NamedTuple):
    """
@@ -115,7 +124,7 @@ class Backtesting:
        """
        Load strategy into backtesting
        """
-        self.strategy = strategy
+        self.strategy: IStrategy = strategy
        # Set stoploss_on_exchange to false for backtesting,
        # since a "perfect" stoploss-sell is assumed anyway
        # And the regular "stoploss" function would not apply to that case
@@ -147,12 +156,14 @@ class Backtesting:

        return data, timerange

-    def _get_ohlcv_as_lists(self, processed: Dict) -> Dict[str, DataFrame]:
+    def _get_ohlcv_as_lists(self, processed: Dict[str, DataFrame]) -> Dict[str, Tuple]:
        """
        Helper function to convert a processed dataframes into lists for performance reasons.

        Used by backtest() - so keep this optimized for performance.
        """
+        # Every change to this headers list must evaluate further usages of the resulting tuple
+        # and eventually change the constants for indexes at the top
        headers = ['date', 'buy', 'open', 'close', 'sell', 'low', 'high']
        data: Dict = {}
        # Create dict with data
@@ -172,10 +183,10 @@ class Backtesting:

            # Convert from Pandas to list for performance reasons
            # (Looping Pandas is slow.)
-            data[pair] = [x for x in df_analyzed.itertuples()]
+            data[pair] = [x for x in df_analyzed.itertuples(index=False, name=None)]
        return data

-    def _get_close_rate(self, sell_row, trade: Trade, sell: SellCheckTuple,
+    def _get_close_rate(self, sell_row: Tuple, trade: Trade, sell: SellCheckTuple,
                        trade_dur: int) -> float:
        """
        Get close rate for backtesting result
@@ -186,12 +197,12 @@ class Backtesting:
            return trade.stop_loss
        elif sell.sell_type == (SellType.ROI):
            roi_entry, roi = self.strategy.min_roi_reached_entry(trade_dur)
-            if roi is not None:
+            if roi is not None and roi_entry is not None:
                if roi == -1 and roi_entry % self.timeframe_min == 0:
                    # When forceselling with ROI=-1, the roi time will always be equal to trade_dur.
                    # If that entry is a multiple of the timeframe (so on candle open)
                    # - we'll use open instead of close
-                    return sell_row.open
+                    return sell_row[OPEN_IDX]

                # - (Expected abs profit + open_rate + open_fee) / (fee_close -1)
                close_rate = - (trade.open_rate * roi + trade.open_rate *
@@ -199,91 +210,79 @@ class Backtesting:

                if (trade_dur > 0 and trade_dur == roi_entry
                        and roi_entry % self.timeframe_min == 0
-                        and sell_row.open > close_rate):
+                        and sell_row[OPEN_IDX] > close_rate):
                    # new ROI entry came into effect.
                    # use Open rate if open_rate > calculated sell rate
-                    return sell_row.open
+                    return sell_row[OPEN_IDX]

                # Use the maximum between close_rate and low as we
                # cannot sell outside of a candle.
                # Applies when a new ROI setting comes in place and the whole candle is above that.
-                return max(close_rate, sell_row.low)
+                return max(close_rate, sell_row[LOW_IDX])

            else:
                # This should not be reached...
-                return sell_row.open
+                return sell_row[OPEN_IDX]
        else:
-            return sell_row.open
+            return sell_row[OPEN_IDX]

-    def _get_sell_trade_entry(
-            self, pair: str, buy_row: DataFrame,
-            partial_ohlcv: List, trade_count_lock: Dict,
-            stake_amount: float, max_open_trades: int) -> Optional[BacktestResult]:
+    def _get_sell_trade_entry(self, trade: Trade, sell_row: Tuple) -> Optional[BacktestResult]:

-        trade = Trade(
-            pair=pair,
-            open_rate=buy_row.open,
-            open_date=buy_row.date,
-            stake_amount=stake_amount,
-            amount=round(stake_amount / buy_row.open, 8),
-            fee_open=self.fee,
-            fee_close=self.fee,
-            is_open=True,
-        )
-        logger.debug(f"{pair} - Backtesting emulates creation of new trade: {trade}.")
-        # calculate win/lose forwards from buy point
-        for sell_row in partial_ohlcv:
-            if max_open_trades > 0:
-                # Increase trade_count_lock for every iteration
-                trade_count_lock[sell_row.date] = trade_count_lock.get(sell_row.date, 0) + 1
+        sell = self.strategy.should_sell(trade, sell_row[OPEN_IDX], sell_row[DATE_IDX],
+                                         sell_row[BUY_IDX], sell_row[SELL_IDX],
+                                         low=sell_row[LOW_IDX], high=sell_row[HIGH_IDX])
+        if sell.sell_flag:
+            trade_dur = int((sell_row[DATE_IDX] - trade.open_date).total_seconds() // 60)
+            closerate = self._get_close_rate(sell_row, trade, sell, trade_dur)

-            sell = self.strategy.should_sell(trade, sell_row.open, sell_row.date, sell_row.buy,
-                                             sell_row.sell, low=sell_row.low, high=sell_row.high)
-            if sell.sell_flag:
-                trade_dur = int((sell_row.date - buy_row.date).total_seconds() // 60)
-                closerate = self._get_close_rate(sell_row, trade, sell, trade_dur)
-
-                return BacktestResult(pair=pair,
-                                      profit_percent=trade.calc_profit_ratio(rate=closerate),
-                                      profit_abs=trade.calc_profit(rate=closerate),
-                                      open_date=buy_row.date,
-                                      open_rate=buy_row.open,
-                                      open_fee=self.fee,
-                                      close_date=sell_row.date,
-                                      close_rate=closerate,
-                                      close_fee=self.fee,
-                                      amount=trade.amount,
-                                      trade_duration=trade_dur,
-                                      open_at_end=False,
-                                      sell_reason=sell.sell_type
-                                      )
-        if partial_ohlcv:
-            # no sell condition found - trade stil open at end of backtest period
-            sell_row = partial_ohlcv[-1]
-            bt_res = BacktestResult(pair=pair,
-                                    profit_percent=trade.calc_profit_ratio(rate=sell_row.open),
-                                    profit_abs=trade.calc_profit(rate=sell_row.open),
-                                    open_date=buy_row.date,
-                                    open_rate=buy_row.open,
-                                    open_fee=self.fee,
-                                    close_date=sell_row.date,
-                                    close_rate=sell_row.open,
-                                    close_fee=self.fee,
-                                    amount=trade.amount,
-                                    trade_duration=int((
-                                        sell_row.date - buy_row.date).total_seconds() // 60),
-                                    open_at_end=True,
-                                    sell_reason=SellType.FORCE_SELL
-                                    )
-            logger.debug(f"{pair} - Force selling still open trade, "
-                         f"profit percent: {bt_res.profit_percent}, "
-                         f"profit abs: {bt_res.profit_abs}")
-
-            return bt_res
+            return BacktestResult(pair=trade.pair,
+                                  profit_percent=trade.calc_profit_ratio(rate=closerate),
+                                  profit_abs=trade.calc_profit(rate=closerate),
+                                  open_date=trade.open_date,
+                                  open_rate=trade.open_rate,
+                                  open_fee=self.fee,
+                                  close_date=sell_row[DATE_IDX],
+                                  close_rate=closerate,
+                                  close_fee=self.fee,
+                                  amount=trade.amount,
+                                  trade_duration=trade_dur,
+                                  open_at_end=False,
+                                  sell_reason=sell.sell_type
+                                  )
        return None

+    def handle_left_open(self, open_trades: Dict[str, List[Trade]],
+                         data: Dict[str, List[Tuple]]) -> List[BacktestResult]:
+        """
+        Handling of left open trades at the end of backtesting
+        """
+        trades = []
+        for pair in open_trades.keys():
+            if len(open_trades[pair]) > 0:
+                for trade in open_trades[pair]:
+                    sell_row = data[pair][-1]
+                    trade_entry = BacktestResult(pair=trade.pair,
+                                                 profit_percent=trade.calc_profit_ratio(
+                                                     rate=sell_row[OPEN_IDX]),
+                                                 profit_abs=trade.calc_profit(sell_row[OPEN_IDX]),
+                                                 open_date=trade.open_date,
+                                                 open_rate=trade.open_rate,
+                                                 open_fee=self.fee,
+                                                 close_date=sell_row[DATE_IDX],
+                                                 close_rate=sell_row[OPEN_IDX],
+                                                 close_fee=self.fee,
+                                                 amount=trade.amount,
+                                                 trade_duration=int((
+                                                     sell_row[DATE_IDX] - trade.open_date
+                                                 ).total_seconds() // 60),
+                                                 open_at_end=True,
+                                                 sell_reason=SellType.FORCE_SELL
+                                                 )
+                    trades.append(trade_entry)
+        return trades
+
    def backtest(self, processed: Dict, stake_amount: float,
-                 start_date: arrow.Arrow, end_date: arrow.Arrow,
+                 start_date: datetime, end_date: datetime,
                 max_open_trades: int = 0, position_stacking: bool = False) -> DataFrame:
        """
        Implement backtesting functionality
@@ -305,19 +304,21 @@ class Backtesting:
                     f"max_open_trades: {max_open_trades}, position_stacking: {position_stacking}"
                     )
        trades = []
-        trade_count_lock: Dict = {}

        # Use dict of lists with data for performance
        # (looping lists is a lot faster than pandas DataFrames)
        data: Dict = self._get_ohlcv_as_lists(processed)

-        lock_pair_until: Dict = {}
        # Indexes per pair, so some pairs are allowed to have a missing start.
        indexes: Dict = {}
        tmp = start_date + timedelta(minutes=self.timeframe_min)

+        open_trades: Dict[str, List] = defaultdict(list)
+        open_trade_count = 0
+
        # Loop timerange and get candle for each pair at that point in time
-        while tmp < end_date:
+        while tmp <= end_date:
+            open_trade_count_start = open_trade_count

            for i, pair in enumerate(data):
                if pair not in indexes:
@@ -331,42 +332,52 @@ class Backtesting:
                    continue

                # Waits until the time-counter reaches the start of the data for this pair.
-                if row.date > tmp.datetime:
+                if row[DATE_IDX] > tmp:
                    continue
-
                indexes[pair] += 1

-                if row.buy == 0 or row.sell == 1:
-                    continue  # skip rows where no buy signal or that would immediately sell off
+                # without positionstacking, we can only have one open trade per pair.
+                # max_open_trades must be respected
+                # don't open on the last row
+                if ((position_stacking or len(open_trades[pair]) == 0)
+                        and max_open_trades > 0 and open_trade_count_start < max_open_trades
+                        and tmp != end_date
+                        and row[BUY_IDX] == 1 and row[SELL_IDX] != 1):
+                    # Enter trade
+                    trade = Trade(
+                        pair=pair,
+                        open_rate=row[OPEN_IDX],
+                        open_date=row[DATE_IDX],
+                        stake_amount=stake_amount,
+                        amount=round(stake_amount / row[OPEN_IDX], 8),
+                        fee_open=self.fee,
+                        fee_close=self.fee,
+                        is_open=True,
+                    )
+                    # TODO: hacky workaround to avoid opening > max_open_trades
+                    # This emulates previous behaviour - not sure if this is correct
+                    # Prevents buying if the trade-slot was freed in this candle
+                    open_trade_count_start += 1
+                    open_trade_count += 1
+                    # logger.debug(f"{pair} - Backtesting emulates creation of new trade: {trade}.")
+                    open_trades[pair].append(trade)

-                if (not position_stacking and pair in lock_pair_until
-                        and row.date <= lock_pair_until[pair]):
-                    # without positionstacking, we can only have one open trade per pair.
-                    continue
-
-                if max_open_trades > 0:
-                    # Check if max_open_trades has already been reached for the given date
-                    if not trade_count_lock.get(row.date, 0) < max_open_trades:
-                        continue
-                    trade_count_lock[row.date] = trade_count_lock.get(row.date, 0) + 1
-
-                # since indexes has been incremented before, we need to go one step back to
-                # also check the buying candle for sell conditions.
-                trade_entry = self._get_sell_trade_entry(pair, row, data[pair][indexes[pair]-1:],
-                                                         trade_count_lock, stake_amount,
-                                                         max_open_trades)
-
-                if trade_entry:
-                    logger.debug(f"{pair} - Locking pair till "
-                                 f"close_date={trade_entry.close_date}")
-                    lock_pair_until[pair] = trade_entry.close_date
-                    trades.append(trade_entry)
-                else:
-                    # Set lock_pair_until to end of testing period if trade could not be closed
-                    lock_pair_until[pair] = end_date.datetime
+                for trade in open_trades[pair]:
+                    # since indexes has been incremented before, we need to go one step back to
+                    # also check the buying candle for sell conditions.
+                    trade_entry = self._get_sell_trade_entry(trade, row)
+                    # Sell occured
+                    if trade_entry:
+                        # logger.debug(f"{pair} - Backtesting sell {trade}")
+                        open_trade_count -= 1
+                        open_trades[pair].remove(trade)
+                        trades.append(trade_entry)

            # Move time one configured time_interval ahead.
            tmp += timedelta(minutes=self.timeframe_min)
+
+        trades += self.handle_left_open(open_trades, data=data)
+
        return DataFrame.from_records(trades, columns=BacktestResult._fields)

    def start(self) -> None:
@@ -412,8 +423,8 @@ class Backtesting:
            results = self.backtest(
                processed=preprocessed,
                stake_amount=self.config['stake_amount'],
-                start_date=min_date,
-                end_date=max_date,
+                start_date=min_date.datetime,
+                end_date=max_date.datetime,
                max_open_trades=max_open_trades,
                position_stacking=position_stacking,
            )
--- a/freqtrade/optimize/hyperopt.py
+++ b/freqtrade/optimize/hyperopt.py
@@ -94,14 +94,14 @@ class Hyperopt:

        # Populate functions here (hasattr is slow so should not be run during "regular" operations)
        if hasattr(self.custom_hyperopt, 'populate_indicators'):
-            self.backtesting.strategy.advise_indicators = \
-                self.custom_hyperopt.populate_indicators  # type: ignore
+            self.backtesting.strategy.advise_indicators = (  # type: ignore
+                self.custom_hyperopt.populate_indicators)  # type: ignore
        if hasattr(self.custom_hyperopt, 'populate_buy_trend'):
-            self.backtesting.strategy.advise_buy = \
-                self.custom_hyperopt.populate_buy_trend  # type: ignore
+            self.backtesting.strategy.advise_buy = (  # type: ignore
+                self.custom_hyperopt.populate_buy_trend)  # type: ignore
        if hasattr(self.custom_hyperopt, 'populate_sell_trend'):
-            self.backtesting.strategy.advise_sell = \
-                self.custom_hyperopt.populate_sell_trend  # type: ignore
+            self.backtesting.strategy.advise_sell = (  # type: ignore
+                self.custom_hyperopt.populate_sell_trend)  # type: ignore

        # Use max_open_trades for hyperopt as well, except --disable-max-market-positions is set
        if self.config.get('use_max_market_positions', True):
@@ -508,16 +508,16 @@ class Hyperopt:
        params_details = self._get_params_details(params_dict)

        if self.has_space('roi'):
-            self.backtesting.strategy.minimal_roi = \
-                self.custom_hyperopt.generate_roi_table(params_dict)
+            self.backtesting.strategy.minimal_roi = (  # type: ignore
+                self.custom_hyperopt.generate_roi_table(params_dict))

        if self.has_space('buy'):
-            self.backtesting.strategy.advise_buy = \
-                self.custom_hyperopt.buy_strategy_generator(params_dict)
+            self.backtesting.strategy.advise_buy = (  # type: ignore
+                self.custom_hyperopt.buy_strategy_generator(params_dict))

        if self.has_space('sell'):
-            self.backtesting.strategy.advise_sell = \
-                self.custom_hyperopt.sell_strategy_generator(params_dict)
+            self.backtesting.strategy.advise_sell = (  # type: ignore
+                self.custom_hyperopt.sell_strategy_generator(params_dict))

        if self.has_space('stoploss'):
            self.backtesting.strategy.stoploss = params_dict['stoploss']
@@ -538,8 +538,8 @@ class Hyperopt:
        backtesting_results = self.backtesting.backtest(
            processed=processed,
            stake_amount=self.config['stake_amount'],
-            start_date=min_date,
-            end_date=max_date,
+            start_date=min_date.datetime,
+            end_date=max_date.datetime,
            max_open_trades=self.max_open_trades,
            position_stacking=self.position_stacking,
        )