Move Backtesting to a class and add unit tests

2018-02-08 23:35:38 -08:00
parent db67b10605
commit 1d251d6151
9 changed files with 942 additions and 427 deletions
--- a/freqtrade/optimize/init.py
+++ b/freqtrade/optimize/init.py
@@ -1,18 +1,16 @@
 # pragma pylint: disable=missing-docstring

-import logging
 import json
 import os
 from typing import Optional, List, Dict
-from pandas import DataFrame
+import gzip
 from freqtrade.exchange import get_ticker_history
-from freqtrade.analyze import populate_indicators, parse_ticker_dataframe

 from freqtrade import misc
+from freqtrade.logger import Logger
 from user_data.hyperopt_conf import hyperopt_optimize_conf
-import gzip

-logger = logging.getLogger(__name__)
+logger = Logger(name=__name__).get_logger()


 def trim_tickerlist(tickerlist, timerange):
@@ -84,21 +82,13 @@ def load_data(datadir: str, ticker_interval: int, pairs: Optional[List[str]] = N
    return result


-def tickerdata_to_dataframe(data):
-    preprocessed = preprocess(data)
-    return preprocessed
-
-
-def preprocess(tickerdata: Dict[str, List]) -> Dict[str, DataFrame]:
-    """Creates a dataframe and populates indicators for given ticker data"""
-    return {pair: populate_indicators(parse_ticker_dataframe(pair_data))
-            for pair, pair_data in tickerdata.items()}
-
-
 def make_testdata_path(datadir: str) -> str:
    """Return the path where testdata files are stored"""
-    return datadir or os.path.abspath(os.path.join(os.path.dirname(__file__),
-                                                   '..', 'tests', 'testdata'))
+    return datadir or os.path.abspath(
+        os.path.join(
+            os.path.dirname(__file__), '..', 'tests', 'testdata'
+        )
+    )


 def download_pairs(datadir, pairs: List[str], ticker_interval: int) -> bool:
@@ -115,11 +105,6 @@ def download_pairs(datadir, pairs: List[str], ticker_interval: int) -> bool:
    return True


-def file_dump_json(filename, data):
-    with open(filename, "wt") as fp:
-        json.dump(data, fp)
-
-
 # FIX: 20180110, suggest rename interval to tick_interval
 def download_backtesting_testdata(datadir: str, pair: str, interval: int = 5) -> bool:
    """
@@ -142,8 +127,8 @@ def download_backtesting_testdata(datadir: str, pair: str, interval: int = 5) ->
    ))

    if os.path.isfile(filename):
-        with open(filename, "rt") as fp:
-            data = json.load(fp)
+        with open(filename, "rt") as file:
+            data = json.load(file)
        logger.debug("Current Start: {}".format(data[1]['T']))
        logger.debug("Current End: {}".format(data[-1:][0]['T']))
    else:
--- a/freqtrade/optimize/backtesting.py
+++ b/freqtrade/optimize/backtesting.py
@@ -1,235 +1,321 @@
-# pragma pylint: disable=missing-docstring,W0212
+# pragma pylint: disable=missing-docstring, W0212, too-many-arguments

+"""
+This module contains the backtesting logic
+"""
+
+from typing import Dict, Tuple, Any
 import logging
-from typing import Dict, Tuple
-
 import arrow
 from pandas import DataFrame, Series
 from tabulate import tabulate

-import freqtrade.misc as misc
 import freqtrade.optimize as optimize
-from freqtrade import exchange
-from freqtrade.analyze import populate_buy_trend, populate_sell_trend
+from freqtrade.arguments import Arguments
 from freqtrade.exchange import Bittrex
-from freqtrade.main import should_sell
+from freqtrade.configuration import Configuration
+from freqtrade import exchange
+from freqtrade.analyze import Analyze
+from freqtrade.logger import Logger
+from freqtrade.misc import file_dump_json
 from freqtrade.persistence import Trade
-from freqtrade.strategy.strategy import Strategy
-
-logger = logging.getLogger(__name__)


-def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]:
+class Backtesting(object):
    """
-    Get the maximum timeframe for the given backtest data
-    :param data: dictionary with preprocessed backtesting data
-    :return: tuple containing min_date, max_date
-    """
-    all_dates = Series([])
-    for pair_data in data.values():
-        all_dates = all_dates.append(pair_data['date'])
-    all_dates.sort_values(inplace=True)
-    return arrow.get(all_dates.iloc[0]), arrow.get(all_dates.iloc[-1])
+    Backtesting class, this class contains all the logic to run a backtest

+    To run a backtest:
+    backtesting = Backtesting(config)
+    backtesting.start()
+    """
+    def __init__(self, config: Dict[str, Any]) -> None:
+        self.logging = Logger(name=__name__)
+        self.logger = self.logging.get_logger()

-def generate_text_table(
-        data: Dict[str, Dict], results: DataFrame, stake_currency, ticker_interval) -> str:
-    """
-    Generates and returns a text table for the given backtest data and the results dataframe
-    :return: pretty printed table with tabulate as str
-    """
-    floatfmt = ('s', 'd', '.2f', '.8f', '.1f')
-    tabular_data = []
-    headers = ['pair', 'buy count', 'avg profit %',
-               'total profit ' + stake_currency, 'avg duration', 'profit', 'loss']
-    for pair in data:
-        result = results[results.currency == pair]
+        self.config = config
+        self.analyze = None
+        self.ticker_interval = None
+        self.tickerdata_to_dataframe = None
+        self.populate_buy_trend = None
+        self.populate_sell_trend = None
+        self._init()
+
+    def _init(self) -> None:
+        """
+        Init objects required for backtesting
+        :return: None
+        """
+        self.analyze = Analyze(self.config)
+        self.ticker_interval = self.analyze.strategy.ticker_interval
+        self.tickerdata_to_dataframe = self.analyze.tickerdata_to_dataframe
+        self.populate_buy_trend = self.analyze.populate_buy_trend
+        self.populate_sell_trend = self.analyze.populate_sell_trend
+        exchange._API = Bittrex({'key': '', 'secret': ''})
+
+    @staticmethod
+    def get_timeframe(data: Dict[str, DataFrame]) -> Tuple[arrow.Arrow, arrow.Arrow]:
+        """
+        Get the maximum timeframe for the given backtest data
+        :param data: dictionary with preprocessed backtesting data
+        :return: tuple containing min_date, max_date
+        """
+        all_dates = Series([])
+        for pair_data in data.values():
+            all_dates = all_dates.append(pair_data['date'])
+        all_dates.sort_values(inplace=True)
+        return arrow.get(all_dates.iloc[0]), arrow.get(all_dates.iloc[-1])
+
+    def _generate_text_table(self, data: Dict[str, Dict], results: DataFrame) -> str:
+        """
+        Generates and returns a text table for the given backtest data and the results dataframe
+        :return: pretty printed table with tabulate as str
+        """
+        stake_currency = self.config.get('stake_currency')
+        ticker_interval = self.ticker_interval
+
+        floatfmt = ('s', 'd', '.2f', '.8f', '.1f')
+        tabular_data = []
+        headers = ['pair', 'buy count', 'avg profit %',
+                   'total profit ' + stake_currency, 'avg duration', 'profit', 'loss']
+        for pair in data:
+            result = results[results.currency == pair]
+            tabular_data.append([
+                pair,
+                len(result.index),
+                result.profit_percent.mean() * 100.0,
+                result.profit_BTC.sum(),
+                result.duration.mean() * ticker_interval,
+                len(result[result.profit_BTC > 0]),
+                len(result[result.profit_BTC < 0])
+            ])
+
+        # Append Total
        tabular_data.append([
-            pair,
-            len(result.index),
-            result.profit_percent.mean() * 100.0,
-            result.profit_BTC.sum(),
-            result.duration.mean() * ticker_interval,
-            len(result[result.profit_BTC > 0]),
-            len(result[result.profit_BTC < 0])
+            'TOTAL',
+            len(results.index),
+            results.profit_percent.mean() * 100.0,
+            results.profit_BTC.sum(),
+            results.duration.mean() * ticker_interval,
+            len(results[results.profit_BTC > 0]),
+            len(results[results.profit_BTC < 0])
        ])
+        return tabulate(tabular_data, headers=headers, floatfmt=floatfmt)

-    # Append Total
-    tabular_data.append([
-        'TOTAL',
-        len(results.index),
-        results.profit_percent.mean() * 100.0,
-        results.profit_BTC.sum(),
-        results.duration.mean() * ticker_interval,
-        len(results[results.profit_BTC > 0]),
-        len(results[results.profit_BTC < 0])
-    ])
-    return tabulate(tabular_data, headers=headers, floatfmt=floatfmt)
+    def _get_sell_trade_entry(self, pair, row, buy_subset, ticker, trade_count_lock, args):
+        stake_amount = args['stake_amount']
+        max_open_trades = args.get('max_open_trades', 0)
+        trade = Trade(
+            open_rate=row.close,
+            open_date=row.date,
+            stake_amount=stake_amount,
+            amount=stake_amount / row.open,
+            fee=exchange.get_fee()
+        )

-
-def get_sell_trade_entry(pair, row, buy_subset, ticker, trade_count_lock, args):
-    stake_amount = args['stake_amount']
-    max_open_trades = args.get('max_open_trades', 0)
-    trade = Trade(open_rate=row.close,
-                  open_date=row.date,
-                  stake_amount=stake_amount,
-                  amount=stake_amount / row.open,
-                  fee=exchange.get_fee()
-                  )
-
-    # calculate win/lose forwards from buy point
-    sell_subset = ticker[ticker.date > row.date][['close', 'date', 'sell']]
-    for row2 in sell_subset.itertuples(index=True):
-        if max_open_trades > 0:
-            # Increase trade_count_lock for every iteration
-            trade_count_lock[row2.date] = trade_count_lock.get(row2.date, 0) + 1
-
-        # Buy is on is in the buy_subset there is a row that matches the date
-        # of the sell event
-        buy_signal = not buy_subset[buy_subset.date == row2.date].empty
-        if(should_sell(trade, row2.close, row2.date, buy_signal, row2.sell)):
-            return row2, (pair,
-                          trade.calc_profit_percent(rate=row2.close),
-                          trade.calc_profit(rate=row2.close),
-                          row2.Index - row.Index
-                          ), row2.date
-    return None
-
-
-def backtest(args) -> DataFrame:
-    """
-    Implements backtesting functionality
-    :param args: a dict containing:
-        stake_amount: btc amount to use for each trade
-        processed: a processed dictionary with format {pair, data}
-        max_open_trades: maximum number of concurrent trades (default: 0, disabled)
-        realistic: do we try to simulate realistic trades? (default: True)
-        sell_profit_only: sell if profit only
-        use_sell_signal: act on sell-signal
-        stoploss: use stoploss
-    :return: DataFrame
-    """
-    processed = args['processed']
-    max_open_trades = args.get('max_open_trades', 0)
-    realistic = args.get('realistic', True)
-    record = args.get('record', None)
-    records = []
-    trades = []
-    trade_count_lock: dict = {}
-    exchange._API = Bittrex({'key': '', 'secret': ''})
-    for pair, pair_data in processed.items():
-        pair_data['buy'], pair_data['sell'] = 0, 0
-        ticker = populate_sell_trend(populate_buy_trend(pair_data))
-        # for each buy point
-        lock_pair_until = None
-        headers = ['buy', 'open', 'close', 'date', 'sell']
-        buy_subset = ticker[(ticker.buy == 1) & (ticker.sell == 0)][headers]
-        for row in buy_subset.itertuples(index=True):
-            if realistic:
-                if lock_pair_until is not None and row.date <= lock_pair_until:
-                    continue
+        # calculate win/lose forwards from buy point
+        sell_subset = ticker[ticker.date > row.date][['close', 'date', 'sell']]
+        for row2 in sell_subset.itertuples(index=True):
            if max_open_trades > 0:
-                # Check if max_open_trades has already been reached for the given date
-                if not trade_count_lock.get(row.date, 0) < max_open_trades:
-                    continue
+                # Increase trade_count_lock for every iteration
+                trade_count_lock[row2.date] = trade_count_lock.get(row2.date, 0) + 1

-            if max_open_trades > 0:
-                # Increase lock
-                trade_count_lock[row.date] = trade_count_lock.get(row.date, 0) + 1
+            # Buy is on is in the buy_subset there is a row that matches the date
+            # of the sell event
+            buy_signal = not buy_subset[buy_subset.date == row2.date].empty
+            if(
+                    self.analyze.should_sell(
+                        trade=trade,
+                        rate=row2.close,
+                        date=row2.date,
+                        buy=buy_signal,
+                        sell=row2.sell
+                    )
+            ):
+                return \
+                    row2, \
+                    (
+                        pair,
+                        trade.calc_profit_percent(rate=row2.close),
+                        trade.calc_profit(rate=row2.close),
+                        row2.Index - row.Index
+                    ),\
+                    row2.date
+        return None

-            ret = get_sell_trade_entry(pair, row, buy_subset, ticker,
-                                       trade_count_lock, args)
-            if ret:
-                row2, trade_entry, next_date = ret
-                lock_pair_until = next_date
-                trades.append(trade_entry)
-                if record:
-                    # Note, need to be json.dump friendly
-                    # record a tuple of pair, current_profit_percent,
-                    # entry-date, duration
-                    records.append((pair, trade_entry[1],
-                                    row.date.strftime('%s'),
-                                    row2.date.strftime('%s'),
-                                    row.Index, trade_entry[3]))
-    # For now export inside backtest(), maybe change so that backtest()
-    # returns a tuple like: (dataframe, records, logs, etc)
-    if record and record.find('trades') >= 0:
-        logger.info('Dumping backtest results')
-        misc.file_dump_json('backtest-result.json', records)
-    labels = ['currency', 'profit_percent', 'profit_BTC', 'duration']
-    return DataFrame.from_records(trades, columns=labels)
+    def backtest(self, args) -> DataFrame:
+        """
+        Implements backtesting functionality
+
+        NOTE: This method is used by Hyperopt at each iteration. Please keep it optimized.
+        Of course try to not have ugly code. By some accessor are sometime slower than functions.
+        Avoid, logging on this method
+
+        :param args: a dict containing:
+            stake_amount: btc amount to use for each trade
+            processed: a processed dictionary with format {pair, data}
+            max_open_trades: maximum number of concurrent trades (default: 0, disabled)
+            realistic: do we try to simulate realistic trades? (default: True)
+            sell_profit_only: sell if profit only
+            use_sell_signal: act on sell-signal
+            stoploss: use stoploss
+        :return: DataFrame
+        """
+        processed = args['processed']
+        max_open_trades = args.get('max_open_trades', 0)
+        realistic = args.get('realistic', True)
+        record = args.get('record', None)
+        records = []
+        trades = []
+        trade_count_lock = {}
+        for pair, pair_data in processed.items():
+            pair_data['buy'], pair_data['sell'] = 0, 0
+            ticker = self.populate_sell_trend(
+                self.populate_buy_trend(pair_data)
+            )
+            # for each buy point
+            lock_pair_until = None
+            headers = ['buy', 'open', 'close', 'date', 'sell']
+            buy_subset = ticker[(ticker.buy == 1) & (ticker.sell == 0)][headers]
+            for row in buy_subset.itertuples(index=True):
+                if realistic:
+                    if lock_pair_until is not None and row.date <= lock_pair_until:
+                        continue
+                if max_open_trades > 0:
+                    # Check if max_open_trades has already been reached for the given date
+                    if not trade_count_lock.get(row.date, 0) < max_open_trades:
+                        continue
+
+                if max_open_trades > 0:
+                    # Increase lock
+                    trade_count_lock[row.date] = trade_count_lock.get(row.date, 0) + 1
+
+                ret = self._get_sell_trade_entry(
+                    pair=pair,
+                    row=row,
+                    buy_subset=buy_subset,
+                    ticker=ticker,
+                    trade_count_lock=trade_count_lock,
+                    args=args
+                )
+
+                if ret:
+                    row2, trade_entry, next_date = ret
+                    lock_pair_until = next_date
+                    trades.append(trade_entry)
+                    if record:
+                        # Note, need to be json.dump friendly
+                        # record a tuple of pair, current_profit_percent,
+                        # entry-date, duration
+                        records.append((pair, trade_entry[1],
+                                        row.date.strftime('%s'),
+                                        row2.date.strftime('%s'),
+                                        row.Index, trade_entry[3]))
+        # For now export inside backtest(), maybe change so that backtest()
+        # returns a tuple like: (dataframe, records, logs, etc)
+        if record and record.find('trades') >= 0:
+            self.logger.info('Dumping backtest results')
+            file_dump_json('backtest-result.json', records)
+        labels = ['currency', 'profit_percent', 'profit_BTC', 'duration']
+        return DataFrame.from_records(trades, columns=labels)
+
+    def start(self) -> None:
+        """
+        Run a backtesting end-to-end
+        :return: None
+        """
+        data = {}
+        pairs = self.config['exchange']['pair_whitelist']
+
+        if self.config.get('live'):
+            self.logger.info('Downloading data for all pairs in whitelist ...')
+            for pair in pairs:
+                data[pair] = exchange.get_ticker_history(pair, self.ticker_interval)
+        else:
+            self.logger.info('Using local backtesting data (using whitelist in given config) ...')
+            self.logger.info('Using stake_currency: %s ...', self.config['stake_currency'])
+            self.logger.info('Using stake_amount: %s ...', self.config['stake_amount'])
+
+            timerange = Arguments.parse_timerange(self.config.get('timerange'))
+            data = optimize.load_data(
+                self.config['datadir'],
+                pairs=pairs,
+                ticker_interval=self.ticker_interval,
+                refresh_pairs=self.config.get('refresh_pairs', False),
+                timerange=timerange
+            )
+
+        max_open_trades = self.config.get('max_open_trades', 0)
+
+        preprocessed = self.tickerdata_to_dataframe(data)
+        # Print timeframe
+        min_date, max_date = self.get_timeframe(preprocessed)
+        self.logger.info(
+            'Measuring data from %s up to %s (%s days)..',
+            min_date.isoformat(),
+            max_date.isoformat(),
+            (max_date - min_date).days
+        )
+
+        # Execute backtest and print results
+        sell_profit_only = self.config.get('experimental', {}).get('sell_profit_only', False)
+        use_sell_signal = self.config.get('experimental', {}).get('use_sell_signal', False)
+        results = self.backtest(
+            {
+                'stake_amount': self.config.get('stake_amount'),
+                'processed': preprocessed,
+                'max_open_trades': max_open_trades,
+                'realistic': self.config.get('realistic_simulation', False),
+                'sell_profit_only': sell_profit_only,
+                'use_sell_signal': use_sell_signal,
+                'stoploss': self.analyze.strategy.stoploss,
+                'record': self.config.get('export')
+            }
+        )
+
+        self.logging.set_format('%(message)s')
+        self.logger.info(
+            '\n==================================== '
+            'BACKTESTING REPORT'
+            ' ====================================\n'
+            '%s',
+            self._generate_text_table(
+                data,
+                results
+            )
+        )


-def start(args):
+def setup_configuration(args) -> Dict[str, Any]:
+    """
+    Prepare the configuration for the backtesting
+    :param args: Cli args from Arguments()
+    :return: Configuration
+    """
+    configuration = Configuration(args)
+    config = configuration.get_config()
+
+    # Ensure we do not use Exchange credentials
+    config['exchange']['key'] = ''
+    config['exchange']['secret'] = ''
+
+    return config
+
+
+def start(args) -> None:
+    """
+    Start Backtesting script
+    :param args: Cli args from Arguments()
+    :return: None
+    """
+
    # Initialize logger
-    logging.basicConfig(
-        level=args.loglevel,
-        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    )
+    logger = Logger(name=__name__).get_logger()
+    logger.info('Starting freqtrade in Backtesting mode')

-    exchange._API = Bittrex({'key': '', 'secret': ''})
+    # Initialize configuration
+    config = setup_configuration(args)

-    logger.info('Using config: %s ...', args.config)
-    config = misc.load_config(args.config)
-
-    # If -i/--ticker-interval is use we override the configuration parameter
-    # (that will override the strategy configuration)
-    if args.ticker_interval:
-        config.update({'ticker_interval': args.ticker_interval})
-
-    # init the strategy to use
-    config.update({'strategy': args.strategy})
-    strategy = Strategy()
-    strategy.init(config)
-
-    logger.info('Using ticker_interval: %d ...', strategy.ticker_interval)
-
-    data = {}
-    pairs = config['exchange']['pair_whitelist']
-    if args.live:
-        logger.info('Downloading data for all pairs in whitelist ...')
-        for pair in pairs:
-            data[pair] = exchange.get_ticker_history(pair, strategy.ticker_interval)
-    else:
-        logger.info('Using local backtesting data (using whitelist in given config) ...')
-        logger.info('Using stake_currency: %s ...', config['stake_currency'])
-        logger.info('Using stake_amount: %s ...', config['stake_amount'])
-
-        timerange = misc.parse_timerange(args.timerange)
-        data = optimize.load_data(args.datadir,
-                                  pairs=pairs,
-                                  ticker_interval=strategy.ticker_interval,
-                                  refresh_pairs=args.refresh_pairs,
-                                  timerange=timerange)
-    max_open_trades = 0
-    if args.realistic_simulation:
-        logger.info('Using max_open_trades: %s ...', config['max_open_trades'])
-        max_open_trades = config['max_open_trades']
-
-    # Monkey patch config
-    from freqtrade import main
-    main._CONF = config
-
-    preprocessed = optimize.tickerdata_to_dataframe(data)
-    # Print timeframe
-    min_date, max_date = get_timeframe(preprocessed)
-    logger.info('Measuring data from %s up to %s (%s days)..',
-                min_date.isoformat(),
-                max_date.isoformat(),
-                (max_date-min_date).days)
-    # Execute backtest and print results
-    sell_profit_only = config.get('experimental', {}).get('sell_profit_only', False)
-    use_sell_signal = config.get('experimental', {}).get('use_sell_signal', False)
-    results = backtest({'stake_amount': config['stake_amount'],
-                        'processed': preprocessed,
-                        'max_open_trades': max_open_trades,
-                        'realistic': args.realistic_simulation,
-                        'sell_profit_only': sell_profit_only,
-                        'use_sell_signal': use_sell_signal,
-                        'stoploss': strategy.stoploss,
-                        'record': args.export
-                        })
-    logger.info(
-        '\n==================================== BACKTESTING REPORT ====================================\n%s',  # noqa
-        generate_text_table(data, results, config['stake_currency'], strategy.ticker_interval)
-    )
+    # Initialize backtesting object
+    backtesting = Backtesting(config)
+    backtesting.start()