Merge pull request #7244 from freqtrade/move_datadownload

extract download-data from freqai to prepare for future async changes
This commit is contained in:
Matthias 2022-08-31 19:46:14 +02:00 committed by GitHub
commit 3d4ad1de4c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 201 additions and 90 deletions

View File

@ -53,7 +53,6 @@
],
"freqai": {
"enabled": true,
"startup_candles": 10000,
"purge_old_models": true,
"train_period_days": 15,
"backtest_period_days": 7,
@ -76,7 +75,10 @@
"principal_component_analysis": false,
"use_SVM_to_remove_outliers": true,
"indicator_max_period_candles": 20,
"indicator_periods_candles": [10, 20]
"indicator_periods_candles": [
10,
20
]
},
"data_split_parameters": {
"test_size": 0.33,

View File

@ -105,7 +105,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `label_period_candles` | Number of candles into the future that the labels are created for. This is used in `populate_any_indicators` (see `templates/FreqaiExampleStrategy.py` for detailed usage). The user can create custom labels, making use of this parameter or not. <br> **Datatype:** Positive integer.
| `include_shifted_candles` | Add features from previous candles to subsequent candles to add historical information. FreqAI takes all features from the `include_shifted_candles` previous candles, duplicates and shifts them so that the information is available for the subsequent candle. <br> **Datatype:** Positive integer.
| `weight_factor` | Used to set weights for training data points according to their recency. See details about how it works [here](#controlling-the-model-learning-process). <br> **Datatype:** Positive float (typically < 1).
| `indicator_max_period_candles` | The maximum period used in `populate_any_indicators()` for indicator creation. FreqAI uses this information in combination with the maximum timeframe to calculate how many data points that should be downloaded so that the first data point does not have a NaN. <br> **Datatype:** Positive integer.
| `indicator_max_period_candles` | **No longer used**. User must use the strategy set `startup_candle_count` which defines the maximum *period* used in `populate_any_indicators()` for indicator creation (timeframe independent). FreqAI uses this information in combination with the maximum timeframe to calculate how many data points it should download so that the first data point does not have a NaN <br> **Datatype:** positive integer.
| `indicator_periods_candles` | Calculate indicators for `indicator_periods_candles` time periods and add them to the feature set. <br> **Datatype:** List of positive integers.
| `stratify_training_data` | This value is used to indicate the grouping of the data. For example, 2 would set every 2nd data point into a separate dataset to be pulled from during training/testing. See details about how it works [here](#stratifying-the-data-for-training-and-testing-the-model) <br> **Datatype:** Positive integer.
| `principal_component_analysis` | Automatically reduce the dimensionality of the data set using Principal Component Analysis. See details about how it works [here](#reducing-data-dimensionality-with-principal-component-analysis) <br> **Datatype:** Boolean.
@ -167,7 +167,6 @@ The user interface is isolated to the typical Freqtrade config file. A FreqAI co
],
"label_period_candles": 24,
"include_shifted_candles": 2,
"indicator_max_period_candles": 20,
"indicator_periods_candles": [10, 20]
},
"data_split_parameters" : {
@ -184,6 +183,9 @@ The user interface is isolated to the typical Freqtrade config file. A FreqAI co
The FreqAI strategy requires the user to include the following lines of code in the standard Freqtrade strategy:
```python
# user should define the maximum startup candle count (the largest number of candles
# passed to any single indicator)
startup_candle_count: int = 20
def informative_pairs(self):
whitelist_pairs = self.dp.current_whitelist()

View File

@ -166,7 +166,7 @@ Additional technical libraries can be installed as necessary, or custom indicato
Most indicators have an instable startup period, in which they are either not available (NaN), or the calculation is incorrect. This can lead to inconsistencies, since Freqtrade does not know how long this instable period should be.
To account for this, the strategy can be assigned the `startup_candle_count` attribute.
This should be set to the maximum number of candles that the strategy requires to calculate stable indicators.
This should be set to the maximum number of candles that the strategy requires to calculate stable indicators. In the case where a user includes higher timeframes with informative pairs, the `startup_candle_count` does not necessarily change. The value is the maximum period (in candles) that any of the informatives timeframes need to compute stable indicators.
In this example strategy, this should be set to 100 (`startup_candle_count = 100`), since the longest needed history is 100 candles.

View File

@ -91,9 +91,9 @@ class DataProvider:
timerange = TimeRange.parse_timerange(None if self._config.get(
'timerange') is None else str(self._config.get('timerange')))
# Move informative start time respecting startup_candle_count
timerange.subtract_start(
timeframe_to_seconds(str(timeframe)) * self._config.get('startup_candle_count', 0)
)
startup_candles = self.get_required_startup(str(timeframe))
tf_seconds = timeframe_to_seconds(str(timeframe))
timerange.subtract_start(tf_seconds * startup_candles)
self.__cached_pairs_backtesting[saved_pair] = load_pair_history(
pair=pair,
timeframe=timeframe or self._config['timeframe'],
@ -105,6 +105,21 @@ class DataProvider:
)
return self.__cached_pairs_backtesting[saved_pair].copy()
def get_required_startup(self, timeframe: str) -> int:
freqai_config = self._config.get('freqai', {})
if not freqai_config.get('enabled', False):
return self._config.get('startup_candle_count', 0)
else:
startup_candles = self._config.get('startup_candle_count', 0)
indicator_periods = freqai_config['feature_parameters']['indicator_periods_candles']
# make sure the startupcandles is at least the set maximum indicator periods
self._config['startup_candle_count'] = max(startup_candles, max(indicator_periods))
tf_seconds = timeframe_to_seconds(timeframe)
train_candles = freqai_config['train_period_days'] * 86400 / tf_seconds
total_candles = int(self._config['startup_candle_count'] + train_candles)
logger.info(f'Increasing startup_candle_count for freqai to {total_candles}')
return total_candles
def get_pair_dataframe(
self,
pair: str,

View File

@ -16,8 +16,6 @@ from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.strategy.interface import IStrategy
@ -903,9 +901,7 @@ class FreqaiDataKitchen:
# We notice that users like to use exotic indicators where
# they do not know the required timeperiod. Here we include a factor
# of safety by multiplying the user considered "max" by 2.
max_period = self.freqai_config["feature_parameters"].get(
"indicator_max_period_candles", 20
) * 2
max_period = self.config.get('startup_candle_count', 20) * 2
additional_seconds = max_period * max_tf_seconds
if trained_timestamp != 0:
@ -951,31 +947,6 @@ class FreqaiDataKitchen:
self.model_filename = f"cb_{coin.lower()}_{int(trained_timerange.stopts)}"
def download_all_data_for_training(self, timerange: TimeRange, dp: DataProvider) -> None:
"""
Called only once upon start of bot to download the necessary data for
populating indicators and training the model.
:param timerange: TimeRange = The full data timerange for populating the indicators
and training the model.
:param dp: DataProvider instance attached to the strategy
"""
new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
if not dp._exchange:
# Not realistic - this is only called in live mode.
raise OperationalException("Dataprovider did not have an exchange attached.")
refresh_backtest_ohlcv_data(
dp._exchange,
pairs=self.all_pairs,
timeframes=self.freqai_config["feature_parameters"].get("include_timeframes"),
datadir=self.config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days,
erase=False,
data_format=self.config.get("dataformat_ohlcv", "json"),
trading_mode=self.config.get("trading_mode", "spot"),
prepend=self.config.get("prepend_data", False),
)
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(

View File

@ -290,14 +290,8 @@ class IFreqaiModel(ABC):
)
dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
# download candle history if it is not already in memory
# load candle history into memory if it is not yet.
if not self.dd.historic_data:
logger.info(
"Downloading all training data for all pairs in whitelist and "
"corr_pairlist, this may take a while if you do not have the "
"data saved"
)
dk.download_all_data_for_training(data_load_timerange, strategy.dp)
self.dd.load_all_pair_histories(data_load_timerange, dk)
if not self.scanning:

134
freqtrade/freqai/utils.py Normal file
View File

@ -0,0 +1,134 @@
import logging
from datetime import datetime, timezone
from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.exchange.exchange import market_is_active
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
logger = logging.getLogger(__name__)
def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
"""
Called only once upon start of bot to download the necessary data for
populating indicators and training the model.
:param timerange: TimeRange = The full data timerange for populating the indicators
and training the model.
:param dp: DataProvider instance attached to the strategy
"""
if dp._exchange is None:
raise OperationalException('No exchange object found.')
markets = [p for p, m in dp._exchange.markets.items() if market_is_active(m)
or config.get('include_inactive')]
all_pairs = dynamic_expand_pairlist(config, markets)
timerange = get_required_data_timerange(config)
new_pairs_days = int((timerange.stopts - timerange.startts) / 86400)
refresh_backtest_ohlcv_data(
dp._exchange,
pairs=all_pairs,
timeframes=config["freqai"]["feature_parameters"].get("include_timeframes"),
datadir=config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days,
erase=False,
data_format=config.get("dataformat_ohlcv", "json"),
trading_mode=config.get("trading_mode", "spot"),
prepend=config.get("prepend_data", False),
)
def get_required_data_timerange(
config: dict
) -> TimeRange:
"""
Used to compute the required data download time range
for auto data-download in FreqAI
"""
time = datetime.now(tz=timezone.utc).timestamp()
timeframes = config["freqai"]["feature_parameters"].get("include_timeframes")
max_tf_seconds = 0
for tf in timeframes:
secs = timeframe_to_seconds(tf)
if secs > max_tf_seconds:
max_tf_seconds = secs
startup_candles = config.get('startup_candle_count', 0)
indicator_periods = config["freqai"]["feature_parameters"]["indicator_periods_candles"]
# factor the max_period as a factor of safety.
max_period = int(max(startup_candles, max(indicator_periods)) * 1.5)
config['startup_candle_count'] = max_period
logger.info(f'FreqAI auto-downloader using {max_period} startup candles.')
additional_seconds = max_period * max_tf_seconds
startts = int(
time
- config["freqai"].get("train_period_days", 0) * 86400
- additional_seconds
)
stopts = int(time)
data_load_timerange = TimeRange('date', 'date', startts, stopts)
return data_load_timerange
# Keep below for when we wish to download heterogeneously lengthed data for FreqAI.
# def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
# """
# Called only once upon start of bot to download the necessary data for
# populating indicators and training a FreqAI model.
# :param timerange: TimeRange = The full data timerange for populating the indicators
# and training the model.
# :param dp: DataProvider instance attached to the strategy
# """
# if dp._exchange is not None:
# markets = [p for p, m in dp._exchange.markets.items() if market_is_active(m)
# or config.get('include_inactive')]
# else:
# # This should not occur:
# raise OperationalException('No exchange object found.')
# all_pairs = dynamic_expand_pairlist(config, markets)
# if not dp._exchange:
# # Not realistic - this is only called in live mode.
# raise OperationalException("Dataprovider did not have an exchange attached.")
# time = datetime.now(tz=timezone.utc).timestamp()
# for tf in config["freqai"]["feature_parameters"].get("include_timeframes"):
# timerange = TimeRange()
# timerange.startts = int(time)
# timerange.stopts = int(time)
# startup_candles = dp.get_required_startup(str(tf))
# tf_seconds = timeframe_to_seconds(str(tf))
# timerange.subtract_start(tf_seconds * startup_candles)
# new_pairs_days = int((timerange.stopts - timerange.startts) / 86400)
# # FIXME: now that we are looping on `refresh_backtest_ohlcv_data`, the function
# # redownloads the funding rate for each pair.
# refresh_backtest_ohlcv_data(
# dp._exchange,
# pairs=all_pairs,
# timeframes=[tf],
# datadir=config["datadir"],
# timerange=timerange,
# new_pairs_days=new_pairs_days,
# erase=False,
# data_format=config.get("dataformat_ohlcv", "json"),
# trading_mode=config.get("trading_mode", "spot"),
# prepend=config.get("prepend_data", False),
# )

View File

@ -212,21 +212,12 @@ class Backtesting:
"""
self.progress.init_step(BacktestState.DATALOAD, 1)
if self.config.get('freqai', {}).get('enabled', False):
startup_candles = int(self.config.get('freqai', {}).get('startup_candles', 0))
if not startup_candles:
raise OperationalException('FreqAI backtesting module requires user set '
'startup_candles in config.')
self.required_startup += int(self.config.get('freqai', {}).get('startup_candles', 0))
logger.info(f'Increasing startup_candle_count for freqai to {self.required_startup}')
self.config['startup_candle_count'] = self.required_startup
data = history.load_data(
datadir=self.config['datadir'],
pairs=self.pairlists.whitelist,
timeframe=self.timeframe,
timerange=self.timerange,
startup_candles=self.required_startup,
startup_candles=self.dataprovider.get_required_startup(self.timeframe),
fail_without_data=True,
data_format=self.config.get('dataformat_ohlcv', 'json'),
candle_type=self.config.get('candle_type_def', CandleType.SPOT)

View File

@ -148,10 +148,19 @@ class IStrategy(ABC, HyperStrategyMixin):
def load_freqAI_model(self) -> None:
if self.config.get('freqai', {}).get('enabled', False):
# Import here to avoid importing this if freqAI is disabled
from freqtrade.freqai.utils import download_all_data_for_training
from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver
self.freqai = FreqaiModelResolver.load_freqaimodel(self.config)
self.freqai_info = self.config["freqai"]
# download the desired data in dry/live
if self.config.get('runmode') in (RunMode.DRY_RUN, RunMode.LIVE):
logger.info(
"Downloading all training data for all pairs in whitelist and "
"corr_pairlist, this may take a while if the data is not "
"already on disk."
)
download_all_data_for_training(self.dp, self.config)
else:
# Gracious failures if freqAI is disabled but "start" is called.
class DummyClass():

View File

@ -43,7 +43,8 @@ class FreqaiExampleStrategy(IStrategy):
process_only_new_candles = True
stoploss = -0.05
use_exit_signal = True
startup_candle_count: int = 300
# this is the maximum period fed to talib (timeframe independent)
startup_candle_count: int = 20
can_short = False
linear_roi_offset = DecimalParameter(

View File

@ -45,7 +45,6 @@ class FreqaiExampleHybridStrategy(IStrategy):
"weight_factor": 0.9,
"principal_component_analysis": false,
"use_SVM_to_remove_outliers": true,
"indicator_max_period_candles": 20,
"indicator_periods_candles": [10, 20]
},
"data_split_parameters": {

View File

@ -45,7 +45,6 @@ def freqai_conf(default_conf, tmpdir):
"principal_component_analysis": False,
"use_SVM_to_remove_outliers": True,
"stratify_training_data": 0,
"indicator_max_period_candles": 10,
"indicator_periods_candles": [10],
},
"data_split_parameters": {"test_size": 0.33, "random_state": 1},

View File

@ -48,10 +48,4 @@ def test_freqai_backtest_load_data(freqai_conf, mocker, caplog):
assert log_has_re('Increasing startup_candle_count for freqai to.*', caplog)
del freqai_conf['freqai']['startup_candles']
backtesting = Backtesting(freqai_conf)
with pytest.raises(OperationalException,
match=r'FreqAI backtesting module.*startup_candles in config.'):
backtesting.load_bt_data()
Backtesting.cleanup()