deprecate indicator_max_period_candles, automatically compute startup candles for FreqAI backtesting.

This commit is contained in:
robcaulk 2022-08-22 18:19:07 +02:00
parent 88dd9920ea
commit ac42c0153d
10 changed files with 61 additions and 38 deletions

View File

@ -9,6 +9,7 @@
"dry_run": true,
"timeframe": "3m",
"dry_run_wallet": 1000,
"startup_candle_count": 20,
"cancel_open_orders_on_exit": true,
"unfilledtimeout": {
"entry": 10,
@ -53,7 +54,6 @@
],
"freqai": {
"enabled": true,
"startup_candles": 10000,
"purge_old_models": true,
"train_period_days": 15,
"backtest_period_days": 7,

View File

@ -113,7 +113,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `use_SVM_to_remove_outliers` | Ask FreqAI to train a support vector machine to detect and remove outliers from the training data set as well as from incoming data points. <br> **Datatype:** boolean.
| `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. E.g. `nu` *Very* broadly, is the percentage of data points that should be considered outliers. `shuffle` is by default false to maintain reproducibility. But these and all others can be added/changed in this dictionary. <br> **Datatype:** dictionary.
| `stratify_training_data` | This value is used to indicate the stratification of the data. e.g. 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. <br> **Datatype:** positive integer.
| `indicator_max_period_candles` | The maximum *period* used in `populate_any_indicators()` for indicator creation. FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN <br> **Datatype:** positive integer.
| `indicator_max_period_candles` | **Deprecated in favor of** strategy set `startup_candle_count`, however, both configuration parameters provide the same functionality; the maximum *period* used in `populate_any_indicators()` for indicator creation (timeframe independent). FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN <br> **Datatype:** positive integer.
| `indicator_periods_candles` | A list of integers used to duplicate all indicators according to a set of periods and add them to the feature set. <br> **Datatype:** list of positive integers.
| `use_DBSCAN_to_remove_outliers` | Inactive by default. If true, FreqAI clusters data using DBSCAN to identify and remove outliers from training and prediction data. <br> **Datatype:** float (fraction of 1).
| | **Data split parameters**

View File

@ -166,7 +166,7 @@ Additional technical libraries can be installed as necessary, or custom indicato
Most indicators have an instable startup period, in which they are either not available (NaN), or the calculation is incorrect. This can lead to inconsistencies, since Freqtrade does not know how long this instable period should be.
To account for this, the strategy can be assigned the `startup_candle_count` attribute.
This should be set to the maximum number of candles that the strategy requires to calculate stable indicators.
This should be set to the maximum number of candles that the strategy requires to calculate stable indicators. In the case where a user includes higher timeframes with informative pairs, the `startup_candle_count` does not necessarily change. The value is the maximum period (in candles) that any of the informatives timeframes need to compute stable indicators.
In this example strategy, this should be set to 100 (`startup_candle_count = 100`), since the longest needed history is 100 candles.

View File

@ -92,7 +92,7 @@ class DataProvider:
'timerange') is None else str(self._config.get('timerange')))
# Move informative start time respecting startup_candle_count
timerange.subtract_start(
timeframe_to_seconds(str(timeframe)) * self._config.get('startup_candle_count', 0)
self.get_required_startup_seconds(str(timeframe))
)
self.__cached_pairs_backtesting[saved_pair] = load_pair_history(
pair=pair,
@ -105,6 +105,17 @@ class DataProvider:
)
return self.__cached_pairs_backtesting[saved_pair].copy()
def get_required_startup_seconds(self, timeframe: str) -> int:
tf_seconds = timeframe_to_seconds(timeframe)
base_seconds = tf_seconds * self._config.get('startup_candle_count', 0)
if not self._config['freqai']['enabled']:
return base_seconds
else:
train_seconds = self._config['freqai']['train_period_days'] * 86400
# multiplied by safety factor of 2 because FreqAI users
# typically do not know the correct window.
return base_seconds * 2 + int(train_seconds)
def get_pair_dataframe(
self,
pair: str,

View File

@ -20,6 +20,8 @@ from freqtrade.data.dataprovider import DataProvider
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.exchange.exchange import market_is_active
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
from freqtrade.strategy.interface import IStrategy
@ -834,9 +836,7 @@ class FreqaiDataKitchen:
# We notice that users like to use exotic indicators where
# they do not know the required timeperiod. Here we include a factor
# of safety by multiplying the user considered "max" by 2.
max_period = self.freqai_config["feature_parameters"].get(
"indicator_max_period_candles", 20
) * 2
max_period = self.config.get('startup_candle_count', 20) * 2
additional_seconds = max_period * max_tf_seconds
if trained_timestamp != 0:
@ -1015,12 +1015,15 @@ def download_all_data_for_training(timerange: TimeRange,
and training the model.
:param dp: DataProvider instance attached to the strategy
"""
all_pairs = copy.deepcopy(
config["freqai"]["feature_parameters"].get("include_corr_pairlist", [])
)
for pair in config.get("exchange", "").get("pair_whitelist"):
if pair not in all_pairs:
all_pairs.append(pair)
if dp._exchange is not None:
markets = [p for p, m in dp._exchange.markets.items() if market_is_active(m)
or config.get('include_inactive')]
else:
# This should not occur:
raise OperationalException('No exchange object found.')
all_pairs = dynamic_expand_pairlist(config, markets)
new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
if not dp._exchange:
@ -1048,7 +1051,6 @@ def get_required_data_timerange(
user.
"""
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
trained_timerange = TimeRange()
data_load_timerange = TimeRange()
timeframes = config["freqai"]["feature_parameters"].get("include_timeframes")
@ -1059,15 +1061,9 @@ def get_required_data_timerange(
if secs > max_tf_seconds:
max_tf_seconds = secs
max_period = config["freqai"]["feature_parameters"].get(
"indicator_max_period_candles", 20
) * 2
additional_seconds = max_period * max_tf_seconds
max_period = config.get('startup_candle_count', 20) * 2
trained_timerange.startts = int(
time - config["freqai"].get("train_period_days", 0) * SECONDS_IN_DAY
)
trained_timerange.stopts = int(time)
additional_seconds = max_period * max_tf_seconds
data_load_timerange.startts = int(
time

View File

@ -211,21 +211,21 @@ class Backtesting:
"""
self.progress.init_step(BacktestState.DATALOAD, 1)
if self.config.get('freqai', {}).get('enabled', False):
startup_candles = int(self.config.get('freqai', {}).get('startup_candles', 0))
if not startup_candles:
raise OperationalException('FreqAI backtesting module requires user set '
'startup_candles in config.')
self.required_startup += int(self.config.get('freqai', {}).get('startup_candles', 0))
logger.info(f'Increasing startup_candle_count for freqai to {self.required_startup}')
self.config['startup_candle_count'] = self.required_startup
# if self.config.get('freqai', {}).get('enabled', False):
# startup_candles = int(self.config.get('freqai', {}).get('startup_candles', 0))
# if not startup_candles:
# raise OperationalException('FreqAI backtesting module requires user set '
# 'startup_candles in config.')
# self.required_startup += int(self.config.get('freqai', {}).get('startup_candles', 0))
# logger.info(f'Increasing startup_candle_count for freqai to {self.required_startup}')
# self.config['startup_candle_count'] = self.required_startup
data = history.load_data(
datadir=self.config['datadir'],
pairs=self.pairlists.whitelist,
timeframe=self.timeframe,
timerange=self.timerange,
startup_candles=self.required_startup,
startup_candles=self.get_required_startup(self.timeframe),
fail_without_data=True,
data_format=self.config.get('dataformat_ohlcv', 'json'),
candle_type=self.config.get('candle_type_def', CandleType.SPOT)
@ -244,6 +244,21 @@ class Backtesting:
self.progress.set_new_value(1)
return data, self.timerange
def get_required_startup(self, timeframe: str) -> int:
if not self.config['freqai']['enabled']:
return self.required_startup
else:
if not self.config['startup_candle_count']:
raise OperationalException('FreqAI backtesting module requires strategy '
'set startup_candle_count.')
tf_seconds = timeframe_to_seconds(timeframe)
train_candles = self.config['freqai']['train_period_days'] * 86400 / tf_seconds
# multiplied by safety factor of 2 because FreqAI users
# typically do not know the correct window.
total_candles = self.required_startup * 2 + train_candles
logger.info(f'Increasing startup_candle_count for freqai to {total_candles}')
return total_candles
def load_bt_data_detail(self) -> None:
"""
Loads backtest detail data (smaller timeframe) if necessary.

View File

@ -163,6 +163,7 @@ class IStrategy(ABC, HyperStrategyMixin):
)
data_load_timerange = get_required_data_timerange(self.config)
download_all_data_for_training(data_load_timerange, self.dp, self.config)
else:
# Gracious failures if freqAI is disabled but "start" is called.
class DummyClass():

View File

@ -43,7 +43,8 @@ class FreqaiExampleStrategy(IStrategy):
process_only_new_candles = True
stoploss = -0.05
use_exit_signal = True
startup_candle_count: int = 300
# this is the maximum period fed to talib (timeframe independent)
startup_candle_count: int = 20
can_short = False
linear_roi_offset = DecimalParameter(

View File

@ -44,7 +44,6 @@ def freqai_conf(default_conf, tmpdir):
"principal_component_analysis": False,
"use_SVM_to_remove_outliers": True,
"stratify_training_data": 0,
"indicator_max_period_candles": 10,
"indicator_periods_candles": [10],
},
"data_split_parameters": {"test_size": 0.33, "random_state": 1},

View File

@ -48,10 +48,10 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog):
assert log_has_re('Increasing startup_candle_count for freqai to.*', caplog)
del freqai_conf['freqai']['startup_candles']
backtesting = Backtesting(freqai_conf)
with pytest.raises(OperationalException,
match=r'FreqAI backtesting module.*startup_candles in config.'):
backtesting.load_bt_data()
# del freqai_conf['freqai']['startup_candles']
# backtesting = Backtesting(freqai_conf)
# with pytest.raises(OperationalException,
# match=r'FreqAI backtesting module.*startup_candles in config.'):
# backtesting.load_bt_data()
Backtesting.cleanup()