automatically detect maximum required data based on user fed indicators (to avoid NaNs in dataset for rolling indicators), add new config parameter for backtesting to let users increase their startup_candles to accommodate high timeframe indicators, add docs to explain all. Add new feature for automatic indicator duplication according to user defined intervals (exhibited in example strat and configs now).
This commit is contained in:
@@ -60,11 +60,6 @@ class FreqaiDataKitchen:
|
||||
self.pair = pair
|
||||
self.svm_model: linear_model.SGDOneClassSVM = None
|
||||
if not self.live:
|
||||
# if config.get('freqai', {}).get('backtest_period') < 1:
|
||||
# raise OperationalException('backtest_period < 1,'
|
||||
# 'Can only backtest on full day increments'
|
||||
# 'backtest_period. Only live/dry mode'
|
||||
# 'allows fractions of days')
|
||||
self.full_timerange = self.create_fulltimerange(self.config["timerange"],
|
||||
self.freqai_config.get("train_period")
|
||||
)
|
||||
@@ -291,10 +286,16 @@ class FreqaiDataKitchen:
|
||||
labels = labels[
|
||||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
] # assuming the labels depend entirely on the dataframe here.
|
||||
# logger.info(
|
||||
# "dropped %s training points due to NaNs, ensure all historical data downloaded",
|
||||
# len(unfiltered_dataframe) - len(filtered_dataframe),
|
||||
# )
|
||||
logger.info(
|
||||
f'dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points'
|
||||
f' due to NaNs in populated dataset {len(unfiltered_dataframe)}.'
|
||||
)
|
||||
if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
|
||||
logger.warning(
|
||||
f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100} percent'
|
||||
' of training data dropped due to NaNs, model may perform inconsistent'
|
||||
'with expectations'
|
||||
)
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
@@ -685,10 +686,31 @@ class FreqaiDataKitchen:
|
||||
|
||||
return full_timerange
|
||||
|
||||
def check_if_new_training_required(self, trained_timestamp: int) -> Tuple[bool, TimeRange]:
|
||||
def check_if_new_training_required(self, trained_timestamp: int) -> Tuple[bool,
|
||||
TimeRange, TimeRange]:
|
||||
|
||||
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||
trained_timerange = TimeRange()
|
||||
data_load_timerange = TimeRange()
|
||||
|
||||
# find the max indicator length required
|
||||
max_timeframe_chars = self.freqai_config.get('timeframes')[-1]
|
||||
max_period = self.freqai_config.get('feature_parameters', {}).get(
|
||||
'indicator_max_period', 20)
|
||||
additional_seconds = 0
|
||||
if max_timeframe_chars[-1] == 'd':
|
||||
additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2])
|
||||
elif max_timeframe_chars[-1] == 'h':
|
||||
additional_seconds = max_period * 3600 * int(max_timeframe_chars[-2])
|
||||
elif max_timeframe_chars[-1] == 'm':
|
||||
if len(max_timeframe_chars) == 2:
|
||||
additional_seconds = max_period * 60 * int(max_timeframe_chars[-2])
|
||||
elif len(max_timeframe_chars) == 3:
|
||||
additional_seconds = max_period * 60 * int(float(max_timeframe_chars[0:2]))
|
||||
else:
|
||||
logger.warning('FreqAI could not detect max timeframe and therefore may not '
|
||||
'download the proper amount of data for training')
|
||||
|
||||
if trained_timestamp != 0:
|
||||
elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
|
||||
retrain = elapsed_time > self.freqai_config.get('backtest_period')
|
||||
@@ -696,10 +718,22 @@ class FreqaiDataKitchen:
|
||||
trained_timerange.startts = int(time - self.freqai_config.get(
|
||||
'train_period', 0) * SECONDS_IN_DAY)
|
||||
trained_timerange.stopts = int(time)
|
||||
# we want to load/populate indicators on more data than we plan to train on so
|
||||
# because most of the indicators have a rolling timeperiod, and are thus NaNs
|
||||
# unless they have data further back in time before the start of the train period
|
||||
data_load_timerange.startts = int(time - self.freqai_config.get(
|
||||
'train_period', 0) * SECONDS_IN_DAY
|
||||
- additional_seconds)
|
||||
data_load_timerange.stopts = int(time)
|
||||
else: # user passed no live_trained_timerange in config
|
||||
trained_timerange.startts = int(time - self.freqai_config.get('train_period') *
|
||||
SECONDS_IN_DAY)
|
||||
trained_timerange.stopts = int(time)
|
||||
|
||||
data_load_timerange.startts = int(time - self.freqai_config.get(
|
||||
'train_period', 0) * SECONDS_IN_DAY
|
||||
- additional_seconds)
|
||||
data_load_timerange.stopts = int(time)
|
||||
retrain = True
|
||||
|
||||
# if retrain:
|
||||
@@ -714,7 +748,7 @@ class FreqaiDataKitchen:
|
||||
# # enables persistence, but not fully implemented into save/load data yer
|
||||
# self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
|
||||
|
||||
return retrain, trained_timerange
|
||||
return retrain, trained_timerange, data_load_timerange
|
||||
|
||||
def set_new_model_names(self, metadata: dict, trained_timerange: TimeRange):
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
# import contextlib
|
||||
import datetime
|
||||
import gc
|
||||
import logging
|
||||
# import sys
|
||||
@@ -149,8 +150,15 @@ class IFreqaiModel(ABC):
|
||||
# self.training_timerange_timerange = tr_train
|
||||
dataframe_train = dh.slice_dataframe(tr_train, dataframe)
|
||||
dataframe_backtest = dh.slice_dataframe(tr_backtest, dataframe)
|
||||
logger.info("training %s for %s", metadata["pair"], tr_train)
|
||||
|
||||
trained_timestamp = tr_train # TimeRange.parse_timerange(tr_train)
|
||||
tr_train_startts_str = datetime.datetime.utcfromtimestamp(
|
||||
tr_train.startts).strftime('%Y-%m-%d %H:%M:%S')
|
||||
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(
|
||||
tr_train.stopts).strftime('%Y-%m-%d %H:%M:%S')
|
||||
logger.info("Training %s", metadata["pair"])
|
||||
logger.info(f'Training {tr_train_startts_str} to {tr_train_stopts_str}')
|
||||
|
||||
dh.data_path = Path(dh.full_path /
|
||||
str("sub-train" + "-" + metadata['pair'].split("/")[0] +
|
||||
str(int(trained_timestamp.stopts))))
|
||||
@@ -218,16 +226,19 @@ class IFreqaiModel(ABC):
|
||||
model_filename=model_filename)
|
||||
|
||||
(self.retrain,
|
||||
new_trained_timerange) = dh.check_if_new_training_required(trained_timestamp)
|
||||
new_trained_timerange,
|
||||
data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
|
||||
dh.set_paths(metadata, new_trained_timerange.stopts)
|
||||
|
||||
if self.retrain or not file_exists:
|
||||
if coin_first:
|
||||
self.train_model_in_series(new_trained_timerange, metadata, strategy, dh)
|
||||
self.train_model_in_series(new_trained_timerange, metadata,
|
||||
strategy, dh, data_load_timerange)
|
||||
else:
|
||||
self.training_on_separate_thread = True # acts like a lock
|
||||
self.retrain_model_on_separate_thread(new_trained_timerange,
|
||||
metadata, strategy, dh)
|
||||
metadata, strategy,
|
||||
dh, data_load_timerange)
|
||||
|
||||
elif self.training_on_separate_thread and not self.follow_mode:
|
||||
logger.info("FreqAI training a new model on background thread.")
|
||||
@@ -342,11 +353,12 @@ class IFreqaiModel(ABC):
|
||||
|
||||
@threaded
|
||||
def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,
|
||||
strategy: IStrategy, dh: FreqaiDataKitchen):
|
||||
strategy: IStrategy, dh: FreqaiDataKitchen,
|
||||
data_load_timerange: TimeRange):
|
||||
|
||||
# with nostdout():
|
||||
dh.download_new_data_for_retraining(new_trained_timerange, metadata, strategy)
|
||||
corr_dataframes, base_dataframes = dh.load_pairs_histories(new_trained_timerange,
|
||||
dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
|
||||
corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
|
||||
metadata)
|
||||
|
||||
# protecting from common benign errors associated with grabbing new data from exchange:
|
||||
@@ -355,6 +367,8 @@ class IFreqaiModel(ABC):
|
||||
corr_dataframes,
|
||||
base_dataframes,
|
||||
metadata)
|
||||
unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
|
||||
|
||||
except Exception:
|
||||
logger.warning('Mismatched sizes encountered in strategy')
|
||||
# self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
|
||||
@@ -390,10 +404,11 @@ class IFreqaiModel(ABC):
|
||||
return
|
||||
|
||||
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
|
||||
strategy: IStrategy, dh: FreqaiDataKitchen):
|
||||
strategy: IStrategy, dh: FreqaiDataKitchen,
|
||||
data_load_timerange: TimeRange):
|
||||
|
||||
dh.download_new_data_for_retraining(new_trained_timerange, metadata, strategy)
|
||||
corr_dataframes, base_dataframes = dh.load_pairs_histories(new_trained_timerange,
|
||||
dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
|
||||
corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
|
||||
metadata)
|
||||
|
||||
unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
|
||||
@@ -401,6 +416,8 @@ class IFreqaiModel(ABC):
|
||||
base_dataframes,
|
||||
metadata)
|
||||
|
||||
unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
|
||||
|
||||
model = self.train(unfiltered_dataframe, metadata, dh)
|
||||
|
||||
self.data_drawer.pair_dict[metadata['pair']][
|
||||
|
Reference in New Issue
Block a user