download data homogeneously across timeframes

This commit is contained in:
robcaulk 2022-08-26 18:51:42 +02:00
parent e7261cf515
commit bb3523f383
2 changed files with 113 additions and 33 deletions

View File

@ -1,17 +1,22 @@
from freqtrade.data.dataprovider import DataProvider
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
from freqtrade.exchange.exchange import market_is_active
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
import logging
from datetime import datetime, timezone
from freqtrade.exceptions import OperationalException
from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.exchange.exchange import market_is_active
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
logger = logging.getLogger(__name__)
def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
"""
Called only once upon start of bot to download the necessary data for
populating indicators and training a FreqAI model.
populating indicators and training the model.
:param timerange: TimeRange = The full data timerange for populating the indicators
and training the model.
:param dp: DataProvider instance attached to the strategy
@ -26,26 +31,16 @@ def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
all_pairs = dynamic_expand_pairlist(config, markets)
timerange = get_required_data_timerange(config)
new_pairs_days = int((timerange.stopts - timerange.startts) / 86400)
if not dp._exchange:
# Not realistic - this is only called in live mode.
raise OperationalException("Dataprovider did not have an exchange attached.")
time = datetime.now(tz=timezone.utc).timestamp()
for tf in config["freqai"]["feature_parameters"].get("include_timeframes"):
timerange = TimeRange()
timerange.startts = int(time)
timerange.stopts = int(time)
startup_candles = dp.get_required_startup(str(tf))
tf_seconds = timeframe_to_seconds(str(tf))
timerange.subtract_start(tf_seconds * startup_candles)
new_pairs_days = int((timerange.stopts - timerange.startts) / 86400)
# FIXME: now that we are looping on `refresh_backtest_ohlcv_data`, the function
# redownloads the funding rate for each pair.
refresh_backtest_ohlcv_data(
dp._exchange,
pairs=all_pairs,
timeframes=[tf],
timeframes=config["freqai"]["feature_parameters"].get("include_timeframes"),
datadir=config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days,
@ -54,3 +49,90 @@ def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
trading_mode=config.get("trading_mode", "spot"),
prepend=config.get("prepend_data", False),
)
def get_required_data_timerange(
config: dict
) -> TimeRange:
"""
Used to compute the required data download time range
for auto data-download in FreqAI
"""
time = datetime.now(tz=timezone.utc).timestamp()
data_load_timerange = TimeRange()
timeframes = config["freqai"]["feature_parameters"].get("include_timeframes")
max_tf_seconds = 0
for tf in timeframes:
secs = timeframe_to_seconds(tf)
if secs > max_tf_seconds:
max_tf_seconds = secs
startup_candles = config.get('startup_candle_count', 0)
indicator_periods = config["freqai"]["feature_parameters"]["indicator_periods_candles"]
# factor the max_period as a factor of safety.
max_period = int(max(startup_candles, max(indicator_periods)) * 1.5)
config['startup_candle_count'] = max_period
logger.info(f'FreqAI auto-downloader using {max_period} startup candles.')
additional_seconds = max_period * max_tf_seconds
data_load_timerange.startts = int(
time
- config["freqai"].get("train_period_days", 0) * 86400
- additional_seconds
)
data_load_timerange.stopts = int(time)
return data_load_timerange
# Keep below for when we wish to download heterogeneously lengthed data for FreqAI.
# def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
# """
# Called only once upon start of bot to download the necessary data for
# populating indicators and training a FreqAI model.
# :param timerange: TimeRange = The full data timerange for populating the indicators
# and training the model.
# :param dp: DataProvider instance attached to the strategy
# """
# if dp._exchange is not None:
# markets = [p for p, m in dp._exchange.markets.items() if market_is_active(m)
# or config.get('include_inactive')]
# else:
# # This should not occur:
# raise OperationalException('No exchange object found.')
# all_pairs = dynamic_expand_pairlist(config, markets)
# if not dp._exchange:
# # Not realistic - this is only called in live mode.
# raise OperationalException("Dataprovider did not have an exchange attached.")
# time = datetime.now(tz=timezone.utc).timestamp()
# for tf in config["freqai"]["feature_parameters"].get("include_timeframes"):
# timerange = TimeRange()
# timerange.startts = int(time)
# timerange.stopts = int(time)
# startup_candles = dp.get_required_startup(str(tf))
# tf_seconds = timeframe_to_seconds(str(tf))
# timerange.subtract_start(tf_seconds * startup_candles)
# new_pairs_days = int((timerange.stopts - timerange.startts) / 86400)
# # FIXME: now that we are looping on `refresh_backtest_ohlcv_data`, the function
# # redownloads the funding rate for each pair.
# refresh_backtest_ohlcv_data(
# dp._exchange,
# pairs=all_pairs,
# timeframes=[tf],
# datadir=config["datadir"],
# timerange=timerange,
# new_pairs_days=new_pairs_days,
# erase=False,
# data_format=config.get("dataformat_ohlcv", "json"),
# trading_mode=config.get("trading_mode", "spot"),
# prepend=config.get("prepend_data", False),
# )

View File

@ -157,12 +157,10 @@ class IStrategy(ABC, HyperStrategyMixin):
if self.config.get('runmode') in (RunMode.DRY_RUN, RunMode.LIVE):
logger.info(
"Downloading all training data for all pairs in whitelist and "
"corr_pairlist, this may take a while if you do not have the "
"data saved"
"corr_pairlist, this may take a while if the data is not "
"already on disk."
)
# data_load_timerange = get_required_data_timerange(self.config)
download_all_data_for_training(self.dp, self.config)
else:
# Gracious failures if freqAI is disabled but "start" is called.
class DummyClass():