merge develop into feat/freqai-rl-dev

This commit is contained in:
robcaulk
2022-11-12 10:54:34 +01:00
60 changed files with 1314 additions and 264 deletions

View File

@@ -5,8 +5,7 @@ from abc import ABC, abstractmethod
from collections import deque
from datetime import datetime, timezone
from pathlib import Path
from threading import Lock
from typing import Any, Dict, List, Optional, Literal, Tuple
from typing import Any, Dict, List, Literal, Optional, Tuple
import numpy as np
import pandas as pd
@@ -70,22 +69,23 @@ class IFreqaiModel(ABC):
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
# set current candle to arbitrary historical date
self.current_candle: datetime = datetime.fromtimestamp(637887600, tz=timezone.utc)
self.dd.current_candle = self.current_candle
self.scanning = False
self.ft_params = self.freqai_info["feature_parameters"]
self.corr_pairlist: List[str] = self.ft_params.get("include_corr_pairlist", [])
self.keras: bool = self.freqai_info.get("keras", False)
if self.keras and self.ft_params.get("DI_threshold", 0):
self.ft_params["DI_threshold"] = 0
logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
self.CONV_WIDTH = self.freqai_info.get('conv_width', 1)
if self.ft_params.get("inlier_metric_window", 0):
self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
self.pair_it = 0
self.pair_it_train = 0
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
self.train_queue = self._set_train_queue()
self.last_trade_database_summary: DataFrame = {}
self.current_trade_database_summary: DataFrame = {}
self.analysis_lock = Lock()
self.inference_time: float = 0
self.train_time: float = 0
self.begin_time: float = 0
@@ -93,7 +93,10 @@ class IFreqaiModel(ABC):
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
self.continual_learning = self.freqai_info.get('continual_learning', False)
self.plot_features = self.ft_params.get("plot_feature_importances", 0)
self.corr_dataframes: Dict[str, DataFrame] = {}
# get_corr_dataframes is controlling the caching of corr_dataframes
# for improved performance. Careful with this boolean.
self.get_corr_dataframes: bool = True
self._threads: List[threading.Thread] = []
self._stop_event = threading.Event()
self.strategy: Optional[IStrategy] = None
@@ -140,7 +143,11 @@ class IFreqaiModel(ABC):
# the concatenated results for the full backtesting period back to the strategy.
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.live, metadata["pair"])
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
if self.dk.backtest_live_models:
logger.info(
f"Backtesting {len(self.dk.backtesting_timeranges)} timeranges (live models)")
else:
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
@@ -269,25 +276,20 @@ class IFreqaiModel(ABC):
dataframe_train = dk.slice_dataframe(tr_train, dataframe)
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train
tr_train_startts_str = datetime.fromtimestamp(
tr_train.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info(
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains"
)
if not self.ensure_data_exists(dataframe_backtest, tr_backtest, pair):
continue
trained_timestamp_int = int(trained_timestamp.stopts)
dk.set_paths(pair, trained_timestamp_int)
self.log_backtesting_progress(tr_train, pair, train_it, total_trains)
dk.set_new_model_names(pair, trained_timestamp)
timestamp_model_id = int(tr_train.stopts)
if dk.backtest_live_models:
timestamp_model_id = int(tr_backtest.startts)
if dk.check_if_backtest_prediction_exists():
dk.set_paths(pair, timestamp_model_id)
dk.set_new_model_names(pair, timestamp_model_id)
if dk.check_if_backtest_prediction_is_valid(len(dataframe_backtest)):
self.dd.load_metadata(dk)
dk.find_features(dataframe_train)
self.check_if_feature_list_matches_strategy(dk)
@@ -299,7 +301,7 @@ class IFreqaiModel(ABC):
dk.find_labels(dataframe_train)
self.model = self.train(dataframe_train, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = int(
trained_timestamp.stopts)
tr_train.stopts)
if self.plot_features:
plot_feature_importance(self.model, pair, dk, self.plot_features)
if self.save_backtest_models:
@@ -351,6 +353,7 @@ class IFreqaiModel(ABC):
if self.dd.historic_data:
self.dd.update_historic_data(strategy, dk)
logger.debug(f'Updating historic data on pair {metadata["pair"]}')
self.track_current_candle()
if not self.follow_mode:
@@ -377,10 +380,10 @@ class IFreqaiModel(ABC):
# load the model and associated data into the data kitchen
self.model = self.dd.load_data(metadata["pair"], dk)
with self.analysis_lock:
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
dataframe = dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"],
do_corr_pairs=self.get_corr_dataframes
)
if not self.model:
logger.warning(
@@ -389,6 +392,9 @@ class IFreqaiModel(ABC):
self.dd.return_null_values_to_strategy(dataframe, dk)
return dk
if self.corr_pairlist:
dataframe = self.cache_corr_pairlist_dfs(dataframe, dk)
dk.find_labels(dataframe)
self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
@@ -572,10 +578,9 @@ class IFreqaiModel(ABC):
data_load_timerange, pair, dk
)
with self.analysis_lock:
unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
strategy, corr_dataframes, base_dataframes, pair
)
unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
strategy, corr_dataframes, base_dataframes, pair
)
unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
@@ -586,7 +591,7 @@ class IFreqaiModel(ABC):
model = self.train(unfiltered_dataframe, pair, dk)
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
dk.set_new_model_names(pair, new_trained_timerange)
dk.set_new_model_names(pair, new_trained_timerange.stopts)
self.dd.save_data(model, pair, dk)
if self.plot_features:
@@ -751,6 +756,87 @@ class IFreqaiModel(ABC):
f'Best approximation queue: {best_queue}')
return best_queue
def cache_corr_pairlist_dfs(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame:
"""
Cache the corr_pairlist dfs to speed up performance for subsequent pairs during the
current candle.
:param dataframe: strategy fed dataframe
:param dk: datakitchen object for current asset
:return: dataframe to attach/extract cached corr_pair dfs to/from.
"""
if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
if not self.corr_dataframes:
logger.warning("Couldn't cache corr_pair dataframes for improved performance. "
"Consider ensuring that the full coin/stake, e.g. XYZ/USD, "
"is included in the column names when you are creating features "
"in `populate_any_indicators()`.")
self.get_corr_dataframes = not bool(self.corr_dataframes)
elif self.corr_dataframes:
dataframe = dk.attach_corr_pair_columns(
dataframe, self.corr_dataframes, dk.pair)
return dataframe
def track_current_candle(self):
"""
Checks if the latest candle appended by the datadrawer is
equivalent to the latest candle seen by FreqAI. If not, it
asks to refresh the cached corr_dfs, and resets the pair
counter.
"""
if self.dd.current_candle > self.current_candle:
self.get_corr_dataframes = True
self.pair_it = 1
self.current_candle = self.dd.current_candle
def ensure_data_exists(self, dataframe_backtest: DataFrame,
tr_backtest: TimeRange, pair: str) -> bool:
"""
Check if the dataframe is empty, if not, report useful information to user.
:param dataframe_backtest: the backtesting dataframe, maybe empty.
:param tr_backtest: current backtesting timerange.
:param pair: current pair
:return: if the data exists or not
"""
if self.config.get("freqai_backtest_live_models", False) and len(dataframe_backtest) == 0:
tr_backtest_startts_str = datetime.fromtimestamp(
tr_backtest.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_backtest_stopts_str = datetime.fromtimestamp(
tr_backtest.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info(f"No data found for pair {pair} from {tr_backtest_startts_str} "
f" from {tr_backtest_startts_str} to {tr_backtest_stopts_str}. "
"Probably more than one training within the same candle period.")
return False
return True
def log_backtesting_progress(self, tr_train: TimeRange, pair: str,
train_it: int, total_trains: int):
"""
Log the backtesting progress so user knows how many pairs have been trained and
how many more pairs/trains remain.
:param tr_train: the training timerange
:param train_it: the train iteration for the current pair (the sliding window progress)
:param pair: the current pair
:param total_trains: total trains (total number of slides for the sliding window)
"""
tr_train_startts_str = datetime.fromtimestamp(
tr_train.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
if not self.config.get("freqai_backtest_live_models", False):
logger.info(
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} "
f"to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains"
)
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.