deprecate indicator_max_period_candles, automatically compute startup candles for FreqAI backtesting.

This commit is contained in:
robcaulk 2022-08-22 18:19:07 +02:00
parent 88dd9920ea
commit ac42c0153d
10 changed files with 61 additions and 38 deletions

View File

@ -9,6 +9,7 @@
"dry_run": true, "dry_run": true,
"timeframe": "3m", "timeframe": "3m",
"dry_run_wallet": 1000, "dry_run_wallet": 1000,
"startup_candle_count": 20,
"cancel_open_orders_on_exit": true, "cancel_open_orders_on_exit": true,
"unfilledtimeout": { "unfilledtimeout": {
"entry": 10, "entry": 10,
@ -53,7 +54,6 @@
], ],
"freqai": { "freqai": {
"enabled": true, "enabled": true,
"startup_candles": 10000,
"purge_old_models": true, "purge_old_models": true,
"train_period_days": 15, "train_period_days": 15,
"backtest_period_days": 7, "backtest_period_days": 7,

View File

@ -113,7 +113,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `use_SVM_to_remove_outliers` | Ask FreqAI to train a support vector machine to detect and remove outliers from the training data set as well as from incoming data points. <br> **Datatype:** boolean. | `use_SVM_to_remove_outliers` | Ask FreqAI to train a support vector machine to detect and remove outliers from the training data set as well as from incoming data points. <br> **Datatype:** boolean.
| `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. E.g. `nu` *Very* broadly, is the percentage of data points that should be considered outliers. `shuffle` is by default false to maintain reproducibility. But these and all others can be added/changed in this dictionary. <br> **Datatype:** dictionary. | `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. E.g. `nu` *Very* broadly, is the percentage of data points that should be considered outliers. `shuffle` is by default false to maintain reproducibility. But these and all others can be added/changed in this dictionary. <br> **Datatype:** dictionary.
| `stratify_training_data` | This value is used to indicate the stratification of the data. e.g. 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. <br> **Datatype:** positive integer. | `stratify_training_data` | This value is used to indicate the stratification of the data. e.g. 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. <br> **Datatype:** positive integer.
| `indicator_max_period_candles` | The maximum *period* used in `populate_any_indicators()` for indicator creation. FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN <br> **Datatype:** positive integer. | `indicator_max_period_candles` | **Deprecated in favor of** strategy set `startup_candle_count`, however, both configuration parameters provide the same functionality; the maximum *period* used in `populate_any_indicators()` for indicator creation (timeframe independent). FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN <br> **Datatype:** positive integer.
| `indicator_periods_candles` | A list of integers used to duplicate all indicators according to a set of periods and add them to the feature set. <br> **Datatype:** list of positive integers. | `indicator_periods_candles` | A list of integers used to duplicate all indicators according to a set of periods and add them to the feature set. <br> **Datatype:** list of positive integers.
| `use_DBSCAN_to_remove_outliers` | Inactive by default. If true, FreqAI clusters data using DBSCAN to identify and remove outliers from training and prediction data. <br> **Datatype:** float (fraction of 1). | `use_DBSCAN_to_remove_outliers` | Inactive by default. If true, FreqAI clusters data using DBSCAN to identify and remove outliers from training and prediction data. <br> **Datatype:** float (fraction of 1).
| | **Data split parameters** | | **Data split parameters**

View File

@ -166,7 +166,7 @@ Additional technical libraries can be installed as necessary, or custom indicato
Most indicators have an instable startup period, in which they are either not available (NaN), or the calculation is incorrect. This can lead to inconsistencies, since Freqtrade does not know how long this instable period should be. Most indicators have an instable startup period, in which they are either not available (NaN), or the calculation is incorrect. This can lead to inconsistencies, since Freqtrade does not know how long this instable period should be.
To account for this, the strategy can be assigned the `startup_candle_count` attribute. To account for this, the strategy can be assigned the `startup_candle_count` attribute.
This should be set to the maximum number of candles that the strategy requires to calculate stable indicators. This should be set to the maximum number of candles that the strategy requires to calculate stable indicators. In the case where a user includes higher timeframes with informative pairs, the `startup_candle_count` does not necessarily change. The value is the maximum period (in candles) that any of the informatives timeframes need to compute stable indicators.
In this example strategy, this should be set to 100 (`startup_candle_count = 100`), since the longest needed history is 100 candles. In this example strategy, this should be set to 100 (`startup_candle_count = 100`), since the longest needed history is 100 candles.

View File

@ -92,7 +92,7 @@ class DataProvider:
'timerange') is None else str(self._config.get('timerange'))) 'timerange') is None else str(self._config.get('timerange')))
# Move informative start time respecting startup_candle_count # Move informative start time respecting startup_candle_count
timerange.subtract_start( timerange.subtract_start(
timeframe_to_seconds(str(timeframe)) * self._config.get('startup_candle_count', 0) self.get_required_startup_seconds(str(timeframe))
) )
self.__cached_pairs_backtesting[saved_pair] = load_pair_history( self.__cached_pairs_backtesting[saved_pair] = load_pair_history(
pair=pair, pair=pair,
@ -105,6 +105,17 @@ class DataProvider:
) )
return self.__cached_pairs_backtesting[saved_pair].copy() return self.__cached_pairs_backtesting[saved_pair].copy()
def get_required_startup_seconds(self, timeframe: str) -> int:
tf_seconds = timeframe_to_seconds(timeframe)
base_seconds = tf_seconds * self._config.get('startup_candle_count', 0)
if not self._config['freqai']['enabled']:
return base_seconds
else:
train_seconds = self._config['freqai']['train_period_days'] * 86400
# multiplied by safety factor of 2 because FreqAI users
# typically do not know the correct window.
return base_seconds * 2 + int(train_seconds)
def get_pair_dataframe( def get_pair_dataframe(
self, self,
pair: str, pair: str,

View File

@ -20,6 +20,8 @@ from freqtrade.data.dataprovider import DataProvider
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds from freqtrade.exchange import timeframe_to_seconds
from freqtrade.exchange.exchange import market_is_active
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
from freqtrade.strategy.interface import IStrategy from freqtrade.strategy.interface import IStrategy
@ -834,9 +836,7 @@ class FreqaiDataKitchen:
# We notice that users like to use exotic indicators where # We notice that users like to use exotic indicators where
# they do not know the required timeperiod. Here we include a factor # they do not know the required timeperiod. Here we include a factor
# of safety by multiplying the user considered "max" by 2. # of safety by multiplying the user considered "max" by 2.
max_period = self.freqai_config["feature_parameters"].get( max_period = self.config.get('startup_candle_count', 20) * 2
"indicator_max_period_candles", 20
) * 2
additional_seconds = max_period * max_tf_seconds additional_seconds = max_period * max_tf_seconds
if trained_timestamp != 0: if trained_timestamp != 0:
@ -1015,12 +1015,15 @@ def download_all_data_for_training(timerange: TimeRange,
and training the model. and training the model.
:param dp: DataProvider instance attached to the strategy :param dp: DataProvider instance attached to the strategy
""" """
all_pairs = copy.deepcopy(
config["freqai"]["feature_parameters"].get("include_corr_pairlist", []) if dp._exchange is not None:
) markets = [p for p, m in dp._exchange.markets.items() if market_is_active(m)
for pair in config.get("exchange", "").get("pair_whitelist"): or config.get('include_inactive')]
if pair not in all_pairs: else:
all_pairs.append(pair) # This should not occur:
raise OperationalException('No exchange object found.')
all_pairs = dynamic_expand_pairlist(config, markets)
new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY) new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
if not dp._exchange: if not dp._exchange:
@ -1048,7 +1051,6 @@ def get_required_data_timerange(
user. user.
""" """
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
trained_timerange = TimeRange()
data_load_timerange = TimeRange() data_load_timerange = TimeRange()
timeframes = config["freqai"]["feature_parameters"].get("include_timeframes") timeframes = config["freqai"]["feature_parameters"].get("include_timeframes")
@ -1059,15 +1061,9 @@ def get_required_data_timerange(
if secs > max_tf_seconds: if secs > max_tf_seconds:
max_tf_seconds = secs max_tf_seconds = secs
max_period = config["freqai"]["feature_parameters"].get( max_period = config.get('startup_candle_count', 20) * 2
"indicator_max_period_candles", 20
) * 2
additional_seconds = max_period * max_tf_seconds
trained_timerange.startts = int( additional_seconds = max_period * max_tf_seconds
time - config["freqai"].get("train_period_days", 0) * SECONDS_IN_DAY
)
trained_timerange.stopts = int(time)
data_load_timerange.startts = int( data_load_timerange.startts = int(
time time

View File

@ -211,21 +211,21 @@ class Backtesting:
""" """
self.progress.init_step(BacktestState.DATALOAD, 1) self.progress.init_step(BacktestState.DATALOAD, 1)
if self.config.get('freqai', {}).get('enabled', False): # if self.config.get('freqai', {}).get('enabled', False):
startup_candles = int(self.config.get('freqai', {}).get('startup_candles', 0)) # startup_candles = int(self.config.get('freqai', {}).get('startup_candles', 0))
if not startup_candles: # if not startup_candles:
raise OperationalException('FreqAI backtesting module requires user set ' # raise OperationalException('FreqAI backtesting module requires user set '
'startup_candles in config.') # 'startup_candles in config.')
self.required_startup += int(self.config.get('freqai', {}).get('startup_candles', 0)) # self.required_startup += int(self.config.get('freqai', {}).get('startup_candles', 0))
logger.info(f'Increasing startup_candle_count for freqai to {self.required_startup}') # logger.info(f'Increasing startup_candle_count for freqai to {self.required_startup}')
self.config['startup_candle_count'] = self.required_startup # self.config['startup_candle_count'] = self.required_startup
data = history.load_data( data = history.load_data(
datadir=self.config['datadir'], datadir=self.config['datadir'],
pairs=self.pairlists.whitelist, pairs=self.pairlists.whitelist,
timeframe=self.timeframe, timeframe=self.timeframe,
timerange=self.timerange, timerange=self.timerange,
startup_candles=self.required_startup, startup_candles=self.get_required_startup(self.timeframe),
fail_without_data=True, fail_without_data=True,
data_format=self.config.get('dataformat_ohlcv', 'json'), data_format=self.config.get('dataformat_ohlcv', 'json'),
candle_type=self.config.get('candle_type_def', CandleType.SPOT) candle_type=self.config.get('candle_type_def', CandleType.SPOT)
@ -244,6 +244,21 @@ class Backtesting:
self.progress.set_new_value(1) self.progress.set_new_value(1)
return data, self.timerange return data, self.timerange
def get_required_startup(self, timeframe: str) -> int:
if not self.config['freqai']['enabled']:
return self.required_startup
else:
if not self.config['startup_candle_count']:
raise OperationalException('FreqAI backtesting module requires strategy '
'set startup_candle_count.')
tf_seconds = timeframe_to_seconds(timeframe)
train_candles = self.config['freqai']['train_period_days'] * 86400 / tf_seconds
# multiplied by safety factor of 2 because FreqAI users
# typically do not know the correct window.
total_candles = self.required_startup * 2 + train_candles
logger.info(f'Increasing startup_candle_count for freqai to {total_candles}')
return total_candles
def load_bt_data_detail(self) -> None: def load_bt_data_detail(self) -> None:
""" """
Loads backtest detail data (smaller timeframe) if necessary. Loads backtest detail data (smaller timeframe) if necessary.

View File

@ -163,6 +163,7 @@ class IStrategy(ABC, HyperStrategyMixin):
) )
data_load_timerange = get_required_data_timerange(self.config) data_load_timerange = get_required_data_timerange(self.config)
download_all_data_for_training(data_load_timerange, self.dp, self.config) download_all_data_for_training(data_load_timerange, self.dp, self.config)
else: else:
# Gracious failures if freqAI is disabled but "start" is called. # Gracious failures if freqAI is disabled but "start" is called.
class DummyClass(): class DummyClass():

View File

@ -43,7 +43,8 @@ class FreqaiExampleStrategy(IStrategy):
process_only_new_candles = True process_only_new_candles = True
stoploss = -0.05 stoploss = -0.05
use_exit_signal = True use_exit_signal = True
startup_candle_count: int = 300 # this is the maximum period fed to talib (timeframe independent)
startup_candle_count: int = 20
can_short = False can_short = False
linear_roi_offset = DecimalParameter( linear_roi_offset = DecimalParameter(

View File

@ -44,7 +44,6 @@ def freqai_conf(default_conf, tmpdir):
"principal_component_analysis": False, "principal_component_analysis": False,
"use_SVM_to_remove_outliers": True, "use_SVM_to_remove_outliers": True,
"stratify_training_data": 0, "stratify_training_data": 0,
"indicator_max_period_candles": 10,
"indicator_periods_candles": [10], "indicator_periods_candles": [10],
}, },
"data_split_parameters": {"test_size": 0.33, "random_state": 1}, "data_split_parameters": {"test_size": 0.33, "random_state": 1},

View File

@ -48,10 +48,10 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog):
assert log_has_re('Increasing startup_candle_count for freqai to.*', caplog) assert log_has_re('Increasing startup_candle_count for freqai to.*', caplog)
del freqai_conf['freqai']['startup_candles'] # del freqai_conf['freqai']['startup_candles']
backtesting = Backtesting(freqai_conf) # backtesting = Backtesting(freqai_conf)
with pytest.raises(OperationalException, # with pytest.raises(OperationalException,
match=r'FreqAI backtesting module.*startup_candles in config.'): # match=r'FreqAI backtesting module.*startup_candles in config.'):
backtesting.load_bt_data() # backtesting.load_bt_data()
Backtesting.cleanup() Backtesting.cleanup()