backtest_live_models - refactoring after PR review

This commit is contained in:
Wagner Costa Santos 2022-09-29 01:48:38 -03:00
parent df0927cdee
commit 6845a5c6ea
6 changed files with 70 additions and 76 deletions

View File

@ -671,7 +671,6 @@ AVAILABLE_CLI_OPTIONS = {
"freqai_backtest_live_models": Arg( "freqai_backtest_live_models": Arg(
'--freqai-backtest-live-models', '--freqai-backtest-live-models',
help='Run backtest with ready models.', help='Run backtest with ready models.',
action='store_true', action='store_true'
default=False,
), ),
} }

View File

@ -84,7 +84,7 @@ class FreqaiDataKitchen:
self.backtest_live_models = config.get("freqai_backtest_live_models", False) self.backtest_live_models = config.get("freqai_backtest_live_models", False)
if not self.live: if not self.live:
self.full_path = freqai_util.get_full_model_path(self.config) self.full_path = freqai_util.get_full_models_path(self.config)
self.full_timerange = self.create_fulltimerange( self.full_timerange = self.create_fulltimerange(
self.config["timerange"], self.freqai_config.get("train_period_days", 0) self.config["timerange"], self.freqai_config.get("train_period_days", 0)
) )
@ -118,7 +118,7 @@ class FreqaiDataKitchen:
metadata: dict = strategy furnished pair metadata metadata: dict = strategy furnished pair metadata
trained_timestamp: int = timestamp of most recent training trained_timestamp: int = timestamp of most recent training
""" """
self.full_path = freqai_util.get_full_model_path(self.config) self.full_path = freqai_util.get_full_models_path(self.config)
self.data_path = Path( self.data_path = Path(
self.full_path self.full_path
/ f"sub-train-{pair.split('/')[0]}_{trained_timestamp}" / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
@ -459,17 +459,15 @@ class FreqaiDataKitchen:
) -> Tuple[list, list]: ) -> Tuple[list, list]:
tr_backtesting_list_timerange = [] tr_backtesting_list_timerange = []
pair = self.pair.split("/")[0].split(":")[0] asset = self.pair.split("/")[0]
if pair not in self.backtest_live_models_data["pairs_end_dates"]: if asset not in self.backtest_live_models_data["assets_end_dates"]:
raise OperationalException( raise OperationalException(
f"Model not available for pair {self.pair}. " f"Model not available for pair {self.pair}. "
"Please, try again after removing this pair from the configuration file." "Please, try again after removing this pair from the configuration file."
) )
pair_data = self.backtest_live_models_data["pairs_end_dates"][pair] asset_data = self.backtest_live_models_data["assets_end_dates"][asset]
model_end_dates = []
backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"] backtesting_timerange = self.backtest_live_models_data["backtesting_timerange"]
for end_date in pair_data: model_end_dates = [x for x in asset_data]
model_end_dates.append(end_date)
model_end_dates.append(backtesting_timerange.stopts) model_end_dates.append(backtesting_timerange.stopts)
model_end_dates.sort() model_end_dates.sort()
for index, item in enumerate(model_end_dates): for index, item in enumerate(model_end_dates):
@ -1291,11 +1289,11 @@ class FreqaiDataKitchen:
def set_timerange_from_ready_models(self): def set_timerange_from_ready_models(self):
backtesting_timerange, \ backtesting_timerange, \
backtesting_string_timerange, \ assets_end_dates = (
pairs_end_dates = freqai_util.get_timerange_from_ready_models(self.full_path) freqai_util.get_timerange_and_assets_end_dates_from_ready_models(self.full_path))
self.backtest_live_models_data = { self.backtest_live_models_data = {
"backtesting_timerange": backtesting_timerange, "backtesting_timerange": backtesting_timerange,
"backtesting_string_timerange": backtesting_string_timerange, "assets_end_dates": assets_end_dates
"pairs_end_dates": pairs_end_dates
} }
return return

View File

@ -264,12 +264,9 @@ class IFreqaiModel(ABC):
tr_backtest_stopts_str = datetime.fromtimestamp( tr_backtest_stopts_str = datetime.fromtimestamp(
tr_backtest.stopts, tr_backtest.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT) tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info( logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} "
f"No data found for pair {pair} " f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. "
f" from {tr_backtest_startts_str} " "Probably more than one training within the same candle period.")
f"to {tr_backtest_stopts_str}. "
"Probably more than one training within the same candle period."
)
continue continue
trained_timestamp = tr_train trained_timestamp = tr_train
@ -305,12 +302,6 @@ class IFreqaiModel(ABC):
dk.append_predictions(append_df) dk.append_predictions(append_df)
else: else:
if not self.model_exists(dk): if not self.model_exists(dk):
if dk.backtest_live_models:
raise OperationalException(
"Training models is not allowed "
"in backtest_live_models backtesting "
"mode"
)
dk.find_features(dataframe_train) dk.find_features(dataframe_train)
dk.find_labels(dataframe_train) dk.find_labels(dataframe_train)
self.model = self.train(dataframe_train, pair, dk) self.model = self.train(dataframe_train, pair, dk)
@ -603,7 +594,7 @@ class IFreqaiModel(ABC):
model = self.train(unfiltered_dataframe, pair, dk) model = self.train(unfiltered_dataframe, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
dk.set_new_model_names(pair, int(new_trained_timerange.stopts)) dk.set_new_model_names(pair, new_trained_timerange.stopts)
self.dd.save_data(model, pair, dk) self.dd.save_data(model, pair, dk)
if self.plot_features: if self.plot_features:

View File

@ -14,7 +14,7 @@ from freqtrade.exceptions import OperationalException
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def get_full_model_path(config: Config) -> Path: def get_full_models_path(config: Config) -> Path:
""" """
Returns default FreqAI model path Returns default FreqAI model path
:param config: Configuration dictionary :param config: Configuration dictionary
@ -25,20 +25,19 @@ def get_full_model_path(config: Config) -> Path:
) )
def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str, Dict[str, Any]]: def get_timerange_and_assets_end_dates_from_ready_models(
models_path: Path) -> Tuple[TimeRange, Dict[str, Any]]:
""" """
Returns timerange information based on a FreqAI model directory Returns timerange information based on a FreqAI model directory
:param models_path: FreqAI model path :param models_path: FreqAI model path
:returns: a Tuple with (backtesting_timerange: Timerange calculated from directory, :return: a Tuple with (Timerange calculated from directory and
backtesting_string_timerange: str timerange calculated from a Dict with pair and model end training dates info)
directory (format example '20020822-20220830'), \
pairs_end_dates: Dict with pair and model end training dates info)
""" """
all_models_end_dates = [] all_models_end_dates = []
pairs_end_dates: Dict[str, Any] = get_pairs_timestamps_training_from_ready_models(models_path) assets_end_dates: Dict[str, Any] = get_assets_timestamps_training_from_ready_models(models_path)
for key in pairs_end_dates: for key in assets_end_dates:
for model_end_date in pairs_end_dates[key]: for model_end_date in assets_end_dates[key]:
if model_end_date not in all_models_end_dates: if model_end_date not in all_models_end_dates:
all_models_end_dates.append(model_end_date) all_models_end_dates.append(model_end_date)
@ -64,34 +63,27 @@ def get_timerange_from_ready_models(models_path: Path) -> Tuple[TimeRange, str,
all_models_end_dates.append(finish_timestamp) all_models_end_dates.append(finish_timestamp)
all_models_end_dates.sort() all_models_end_dates.sort()
start = datetime.fromtimestamp(min(all_models_end_dates), tz=timezone.utc) start_date = (datetime(*datetime.fromtimestamp(min(all_models_end_dates)).timetuple()[:3],
stop = datetime.fromtimestamp(max(all_models_end_dates), tz=timezone.utc) tzinfo=timezone.utc))
end_date_string_timerange = stop end_date = (datetime(*datetime.fromtimestamp(max(all_models_end_dates)).timetuple()[:3],
if ( tzinfo=timezone.utc))
finish_timestamp < int(datetime.now(tz=timezone.utc).timestamp()) and
datetime.now(tz=timezone.utc).strftime('%Y%m%d') != stop.strftime('%Y%m%d')
):
# add 1 day to string timerange to ensure BT module will load all dataframe data
end_date_string_timerange = stop + timedelta(days=1)
backtesting_string_timerange = ( # add 1 day to string timerange to ensure BT module will load all dataframe data
f"{start.strftime('%Y%m%d')}-{end_date_string_timerange.strftime('%Y%m%d')}" end_date = end_date + timedelta(days=1)
)
backtesting_timerange = TimeRange( backtesting_timerange = TimeRange(
'date', 'date', min(all_models_end_dates), max(all_models_end_dates) 'date', 'date', int(start_date.timestamp()), int(end_date.timestamp())
) )
return backtesting_timerange, backtesting_string_timerange, pairs_end_dates return backtesting_timerange, assets_end_dates
def get_pairs_timestamps_training_from_ready_models(models_path: Path) -> Dict[str, Any]: def get_assets_timestamps_training_from_ready_models(models_path: Path) -> Dict[str, Any]:
""" """
Scan the models path and returns all pairs end training dates (timestamp) Scan the models path and returns all assets end training dates (timestamp)
:param models_path: FreqAI model path :param models_path: FreqAI model path
:returns: :return: a Dict with asset and model end training dates info
:pairs_end_dates: Dict with pair and model end training dates info
""" """
pairs_end_dates: Dict[str, Any] = {} assets_end_dates: Dict[str, Any] = {}
if not models_path.is_dir(): if not models_path.is_dir():
raise OperationalException( raise OperationalException(
'Model folders not found. Saved models are required ' 'Model folders not found. Saved models are required '
@ -100,7 +92,7 @@ def get_pairs_timestamps_training_from_ready_models(models_path: Path) -> Dict[s
for model_dir in models_path.iterdir(): for model_dir in models_path.iterdir():
if str(model_dir.name).startswith("sub-train"): if str(model_dir.name).startswith("sub-train"):
model_end_date = int(model_dir.name.split("_")[1]) model_end_date = int(model_dir.name.split("_")[1])
pair = model_dir.name.split("_")[0].replace("sub-train-", "") asset = model_dir.name.split("_")[0].replace("sub-train-", "")
model_file_name = ( model_file_name = (
f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}" f"cb_{str(model_dir.name).replace('sub-train-', '').lower()}"
"_model.joblib" "_model.joblib"
@ -108,8 +100,23 @@ def get_pairs_timestamps_training_from_ready_models(models_path: Path) -> Dict[s
model_path_file = Path(model_dir / model_file_name) model_path_file = Path(model_dir / model_file_name)
if model_path_file.is_file(): if model_path_file.is_file():
if pair not in pairs_end_dates: if asset not in assets_end_dates:
pairs_end_dates[pair] = [] assets_end_dates[asset] = []
assets_end_dates[asset].append(model_end_date)
pairs_end_dates[pair].append(model_end_date) return assets_end_dates
return pairs_end_dates
def get_timerange_backtest_live_models(config: Config):
"""
Returns a formated timerange for backtest live/ready models
:param config: Configuration dictionary
:return: a string timerange (format example: '20220801-20220822')
"""
models_path = get_full_models_path(config)
timerange, _ = get_timerange_and_assets_end_dates_from_ready_models(models_path)
start_date = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
end_date = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
tr = f"{start_date.strftime('%Y%m%d')}-{end_date.strftime('%Y%m%d')}"
return tr

View File

@ -25,7 +25,6 @@ from freqtrade.enums import (BacktestState, CandleType, ExitCheckTuple, ExitType
from freqtrade.exceptions import DependencyException, OperationalException from freqtrade.exceptions import DependencyException, OperationalException
from freqtrade.exchange import (amount_to_contract_precision, price_to_precision, from freqtrade.exchange import (amount_to_contract_precision, price_to_precision,
timeframe_to_minutes, timeframe_to_seconds) timeframe_to_minutes, timeframe_to_seconds)
from freqtrade.freqai import freqai_util
from freqtrade.mixins import LoggingMixin from freqtrade.mixins import LoggingMixin
from freqtrade.optimize.backtest_caching import get_strategy_run_id from freqtrade.optimize.backtest_caching import get_strategy_run_id
from freqtrade.optimize.bt_progress import BTProgress from freqtrade.optimize.bt_progress import BTProgress
@ -136,10 +135,8 @@ class Backtesting:
self.precision_mode = self.exchange.precisionMode self.precision_mode = self.exchange.precisionMode
if self.config.get('freqai_backtest_live_models', False): if self.config.get('freqai_backtest_live_models', False):
freqai_model_path = freqai_util.get_full_model_path(self.config) from freqtrade.freqai import freqai_util
_, live_models_timerange, _ = freqai_util.get_timerange_from_ready_models( self.config['timerange'] = freqai_util.get_timerange_backtest_live_models(self.config)
freqai_model_path)
self.config['timerange'] = live_models_timerange
self.timerange = TimeRange.parse_timerange( self.timerange = TimeRange.parse_timerange(
None if self.config.get('timerange') is None else str(self.config.get('timerange'))) None if self.config.get('timerange') is None else str(self.config.get('timerange')))

View File

@ -7,9 +7,10 @@ from freqtrade.configuration import TimeRange
from freqtrade.data.dataprovider import DataProvider from freqtrade.data.dataprovider import DataProvider
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_util import (get_full_model_path, from freqtrade.freqai.freqai_util import (get_assets_timestamps_training_from_ready_models,
get_pairs_timestamps_training_from_ready_models, get_full_models_path,
get_timerange_from_ready_models) get_timerange_and_assets_end_dates_from_ready_models,
get_timerange_backtest_live_models)
from tests.conftest import get_patched_exchange from tests.conftest import get_patched_exchange
from tests.freqai.conftest import get_patched_freqai_strategy from tests.freqai.conftest import get_patched_freqai_strategy
@ -48,17 +49,17 @@ def test_get_full_model_path(mocker, freqai_conf, model):
freqai.extract_data_and_train_model( freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
model_path = get_full_model_path(freqai_conf) model_path = get_full_models_path(freqai_conf)
assert model_path.is_dir() is True assert model_path.is_dir() is True
def test_get_pairs_timestamp_validation(mocker, freqai_conf): def test_get_pairs_timestamp_validation(mocker, freqai_conf):
model_path = get_full_model_path(freqai_conf) model_path = get_full_models_path(freqai_conf)
with pytest.raises( with pytest.raises(
OperationalException, OperationalException,
match=r'.*required to run backtest with the freqai-backtest-live-models.*' match=r'.*required to run backtest with the freqai-backtest-live-models.*'
): ):
get_pairs_timestamps_training_from_ready_models(model_path) get_assets_timestamps_training_from_ready_models(model_path)
@pytest.mark.parametrize('model', [ @pytest.mark.parametrize('model', [
@ -94,12 +95,13 @@ def test_get_timerange_from_ready_models(mocker, freqai_conf, model):
freqai.extract_data_and_train_model( freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
model_path = get_full_model_path(freqai_conf) model_path = get_full_models_path(freqai_conf)
(backtesting_timerange, (backtesting_timerange,
backtesting_string_timerange, pairs_end_dates) = get_timerange_and_assets_end_dates_from_ready_models(models_path=model_path)
pairs_end_dates) = get_timerange_from_ready_models(models_path=model_path)
assert len(pairs_end_dates["ADA"]) == 2 assert len(pairs_end_dates["ADA"]) == 2
assert backtesting_string_timerange == '20180122-20180127' assert backtesting_timerange.startts == 1516492800
assert backtesting_timerange.startts == 1516579200
assert backtesting_timerange.stopts == 1516924800 assert backtesting_timerange.stopts == 1516924800
backtesting_string_timerange = get_timerange_backtest_live_models(freqai_conf)
assert backtesting_string_timerange == '20180121-20180126'