improve model youth by constantly scanning pairs in dry/live and always training new models. Fix bug in DI return values

This commit is contained in:
robcaulk 2022-06-17 16:06:51 +02:00
commit 5e914d5756
7 changed files with 203 additions and 118 deletions

View File

@ -452,6 +452,24 @@ config:
which will automatically purge all models older than the two most recently trained ones. which will automatically purge all models older than the two most recently trained ones.
## Defining model expirations
During dry/live, FreqAI trains each pair sequentially (on separate threads/GPU from the main
Freqtrade bot). This means there is always an age discrepancy between models. If a user is training
on 50 pairs, and each pair requires 5 minutes to train, the oldest model will be over 4 hours old.
This may be undesirable if the characteristic time scale (read trade duration target) for a strategy
is much less than 4 hours. The user can decide to only make trade entries if the model is less than
a certain number of hours in age by setting the `expiration_hours` in the config file:
```json
"freqai": {
"expiration_hours": 0.5,
}
```
In the present example, the user will only allow predictions on models that are less than 1/2 hours
old.
<!-- ## Dynamic target expectation <!-- ## Dynamic target expectation
The labels used for model training have a unique statistical distribution for each separate model training. The labels used for model training have a unique statistical distribution for each separate model training.

View File

@ -30,6 +30,7 @@ class FreqaiDataDrawer:
def __init__(self, full_path: Path, config: dict, follow_mode: bool = False): def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
self.config = config self.config = config
self.freqai_info = config.get('freqai', {})
# dictionary holding all pair metadata necessary to load in from disk # dictionary holding all pair metadata necessary to load in from disk
self.pair_dict: Dict[str, Any] = {} self.pair_dict: Dict[str, Any] = {}
# dictionary holding all actively inferenced models in memory given a model filename # dictionary holding all actively inferenced models in memory given a model filename
@ -107,7 +108,7 @@ class FreqaiDataDrawer:
if isinstance(object, np.generic): if isinstance(object, np.generic):
return object.item() return object.item()
def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]: def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool, bool]:
""" """
Locate and load existing model metadata from persistent storage. If not located, Locate and load existing model metadata from persistent storage. If not located,
create a new one and append the current pair to it and prepare it for its first create a new one and append the current pair to it and prepare it for its first
@ -120,22 +121,22 @@ class FreqaiDataDrawer:
coin_first: bool = If the coin is fresh without metadata coin_first: bool = If the coin is fresh without metadata
return_null_array: bool = Follower could not find pair metadata return_null_array: bool = Follower could not find pair metadata
""" """
pair_in_dict = self.pair_dict.get(metadata['pair']) pair_in_dict = self.pair_dict.get(pair)
data_path_set = self.pair_dict.get(metadata['pair'], {}).get('data_path', None) data_path_set = self.pair_dict.get(pair, {}).get('data_path', None)
return_null_array = False return_null_array = False
if pair_in_dict: if pair_in_dict:
model_filename = self.pair_dict[metadata['pair']]['model_filename'] model_filename = self.pair_dict[pair]['model_filename']
trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] trained_timestamp = self.pair_dict[pair]['trained_timestamp']
coin_first = self.pair_dict[metadata['pair']]['first'] coin_first = self.pair_dict[pair]['first']
elif not self.follow_mode: elif not self.follow_mode:
self.pair_dict[metadata['pair']] = {} self.pair_dict[pair] = {}
model_filename = self.pair_dict[metadata['pair']]['model_filename'] = '' model_filename = self.pair_dict[pair]['model_filename'] = ''
coin_first = self.pair_dict[metadata['pair']]['first'] = True coin_first = self.pair_dict[pair]['first'] = True
trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] = 0 trained_timestamp = self.pair_dict[pair]['trained_timestamp'] = 0
if not data_path_set and self.follow_mode: if not data_path_set and self.follow_mode:
logger.warning(f'Follower could not find current pair {metadata["pair"]} in ' logger.warning(f'Follower could not find current pair {pair} in '
f'pair_dictionary at path {self.full_path}, sending null values ' f'pair_dictionary at path {self.full_path}, sending null values '
'back to strategy.') 'back to strategy.')
return_null_array = True return_null_array = True
@ -168,6 +169,7 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['do_preds'] = dh.full_do_predict self.model_return_values[pair]['do_preds'] = dh.full_do_predict
self.model_return_values[pair]['target_mean'] = dh.full_target_mean self.model_return_values[pair]['target_mean'] = dh.full_target_mean
self.model_return_values[pair]['target_std'] = dh.full_target_std self.model_return_values[pair]['target_std'] = dh.full_target_std
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
self.model_return_values[pair]['DI_values'] = dh.full_DI_values self.model_return_values[pair]['DI_values'] = dh.full_DI_values
# if not self.follow_mode: # if not self.follow_mode:
@ -189,6 +191,7 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['predictions'] = np.append( self.model_return_values[pair]['predictions'] = np.append(
self.model_return_values[pair]['predictions'][i:], predictions[-1]) self.model_return_values[pair]['predictions'][i:], predictions[-1])
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
self.model_return_values[pair]['DI_values'] = np.append( self.model_return_values[pair]['DI_values'] = np.append(
self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1]) self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1])
self.model_return_values[pair]['do_preds'] = np.append( self.model_return_values[pair]['do_preds'] = np.append(
@ -202,6 +205,7 @@ class FreqaiDataDrawer:
prepend = np.zeros(abs(length_difference) - 1) prepend = np.zeros(abs(length_difference) - 1)
self.model_return_values[pair]['predictions'] = np.insert( self.model_return_values[pair]['predictions'] = np.insert(
self.model_return_values[pair]['predictions'], 0, prepend) self.model_return_values[pair]['predictions'], 0, prepend)
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
self.model_return_values[pair]['DI_values'] = np.insert( self.model_return_values[pair]['DI_values'] = np.insert(
self.model_return_values[pair]['DI_values'], 0, prepend) self.model_return_values[pair]['DI_values'], 0, prepend)
self.model_return_values[pair]['do_preds'] = np.insert( self.model_return_values[pair]['do_preds'] = np.insert(
@ -215,6 +219,7 @@ class FreqaiDataDrawer:
dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds']) dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean']) dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std']) dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values']) dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values'])
# if not self.follow_mode: # if not self.follow_mode:
@ -227,6 +232,7 @@ class FreqaiDataDrawer:
dh.full_do_predict = np.zeros(len_df) dh.full_do_predict = np.zeros(len_df)
dh.full_target_mean = np.zeros(len_df) dh.full_target_mean = np.zeros(len_df)
dh.full_target_std = np.zeros(len_df) dh.full_target_std = np.zeros(len_df)
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
dh.full_DI_values = np.zeros(len_df) dh.full_DI_values = np.zeros(len_df)
def purge_old_models(self) -> None: def purge_old_models(self) -> None:

View File

@ -71,7 +71,7 @@ class FreqaiDataKitchen:
self.data_drawer = data_drawer self.data_drawer = data_drawer
def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None: def set_paths(self, pair: str, trained_timestamp: int = None,) -> None:
""" """
Set the paths to the data for the present coin/botloop Set the paths to the data for the present coin/botloop
:params: :params:
@ -83,7 +83,7 @@ class FreqaiDataKitchen:
str(self.freqai_config.get('identifier'))) str(self.freqai_config.get('identifier')))
self.data_path = Path(self.full_path / str("sub-train" + "-" + self.data_path = Path(self.full_path / str("sub-train" + "-" +
metadata['pair'].split("/")[0] + pair.split("/")[0] +
str(trained_timestamp))) str(trained_timestamp)))
return return
@ -151,6 +151,9 @@ class FreqaiDataKitchen:
:model: User trained model which can be inferenced for new predictions :model: User trained model which can be inferenced for new predictions
""" """
if not self.data_drawer.pair_dict[coin]['model_filename']:
return None
if self.live: if self.live:
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename'] self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path']) self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
@ -670,7 +673,7 @@ class FreqaiDataKitchen:
self.full_predictions = np.append(self.full_predictions, predictions) self.full_predictions = np.append(self.full_predictions, predictions)
self.full_do_predict = np.append(self.full_do_predict, do_predict) self.full_do_predict = np.append(self.full_do_predict, do_predict)
if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0: if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
self.full_DI_values = np.append(self.full_DI_values, self.DI_values) self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
self.full_target_mean = np.append(self.full_target_mean, target_mean) self.full_target_mean = np.append(self.full_target_mean, target_mean)
self.full_target_std = np.append(self.full_target_std, target_std) self.full_target_std = np.append(self.full_target_std, target_std)
@ -686,7 +689,7 @@ class FreqaiDataKitchen:
filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count
self.full_predictions = np.append(filler, self.full_predictions) self.full_predictions = np.append(filler, self.full_predictions)
self.full_do_predict = np.append(filler, self.full_do_predict) self.full_do_predict = np.append(filler, self.full_do_predict)
if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0: if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
self.full_DI_values = np.append(filler, self.full_DI_values) self.full_DI_values = np.append(filler, self.full_DI_values)
self.full_target_mean = np.append(filler, self.full_target_mean) self.full_target_mean = np.append(filler, self.full_target_mean)
self.full_target_std = np.append(filler, self.full_target_std) self.full_target_std = np.append(filler, self.full_target_std)
@ -722,6 +725,12 @@ class FreqaiDataKitchen:
return full_timerange return full_timerange
def check_if_model_expired(self, trained_timestamp: int) -> bool:
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
elapsed_time = (time - trained_timestamp) / 3600 # hours
max_time = self.freqai_config.get('expiration_hours', 0)
return elapsed_time > max_time
def check_if_new_training_required(self, trained_timestamp: int) -> Tuple[bool, def check_if_new_training_required(self, trained_timestamp: int) -> Tuple[bool,
TimeRange, TimeRange]: TimeRange, TimeRange]:
@ -747,7 +756,7 @@ class FreqaiDataKitchen:
logger.warning('FreqAI could not detect max timeframe and therefore may not ' logger.warning('FreqAI could not detect max timeframe and therefore may not '
'download the proper amount of data for training') 'download the proper amount of data for training')
logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days') # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
if trained_timestamp != 0: if trained_timestamp != 0:
elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
@ -796,12 +805,12 @@ class FreqaiDataKitchen:
return retrain, trained_timerange, data_load_timerange return retrain, trained_timerange, data_load_timerange
def set_new_model_names(self, metadata: dict, trained_timerange: TimeRange): def set_new_model_names(self, pair: str, trained_timerange: TimeRange):
coin, _ = metadata['pair'].split("/") coin, _ = pair.split("/")
# set the new data_path # set the new data_path
self.data_path = Path(self.full_path / str("sub-train" + "-" + self.data_path = Path(self.full_path / str("sub-train" + "-" +
metadata['pair'].split("/")[0] + pair.split("/")[0] +
str(int(trained_timerange.stopts)))) str(int(trained_timerange.stopts))))
self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts)) self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
@ -870,6 +879,8 @@ class FreqaiDataKitchen:
# check if newest candle is already appended # check if newest candle is already appended
df_dp = strategy.dp.get_pair_dataframe(pair, tf) df_dp = strategy.dp.get_pair_dataframe(pair, tf)
if len(df_dp.index) == 0:
continue
if ( if (
str(history_data[pair][tf].iloc[-1]['date']) == str(history_data[pair][tf].iloc[-1]['date']) ==
str(df_dp.iloc[-1:]['date'].iloc[-1]) str(df_dp.iloc[-1:]['date'].iloc[-1])
@ -918,7 +929,7 @@ class FreqaiDataKitchen:
'trading_mode', 'spot')) 'trading_mode', 'spot'))
def get_base_and_corr_dataframes(self, timerange: TimeRange, def get_base_and_corr_dataframes(self, timerange: TimeRange,
metadata: dict) -> Tuple[Dict[Any, Any], Dict[Any, Any]]: pair: str) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
""" """
Searches through our historic_data in memory and returns the dataframes relevant Searches through our historic_data in memory and returns the dataframes relevant
to the present pair. to the present pair.
@ -927,6 +938,7 @@ class FreqaiDataKitchen:
for training according to user defined train_period for training according to user defined train_period
metadata: dict = strategy furnished pair metadata metadata: dict = strategy furnished pair metadata
""" """
with self.data_drawer.history_lock: with self.data_drawer.history_lock:
corr_dataframes: Dict[Any, Any] = {} corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {}
@ -936,11 +948,11 @@ class FreqaiDataKitchen:
for tf in self.freqai_config.get('timeframes'): for tf in self.freqai_config.get('timeframes'):
base_dataframes[tf] = self.slice_dataframe( base_dataframes[tf] = self.slice_dataframe(
timerange, timerange,
historic_data[metadata['pair']][tf] historic_data[pair][tf]
) )
if pairs: if pairs:
for p in pairs: for p in pairs:
if metadata['pair'] in p: if pair in p:
continue # dont repeat anything from whitelist continue # dont repeat anything from whitelist
if p not in corr_dataframes: if p not in corr_dataframes:
corr_dataframes[p] = {} corr_dataframes[p] = {}
@ -984,7 +996,7 @@ class FreqaiDataKitchen:
def use_strategy_to_populate_indicators(self, strategy: IStrategy, def use_strategy_to_populate_indicators(self, strategy: IStrategy,
corr_dataframes: dict, corr_dataframes: dict,
base_dataframes: dict, base_dataframes: dict,
metadata: dict) -> DataFrame: pair: str) -> DataFrame:
""" """
Use the user defined strategy for populating indicators during Use the user defined strategy for populating indicators during
retrain retrain
@ -1003,19 +1015,19 @@ class FreqaiDataKitchen:
for tf in self.freqai_config.get("timeframes"): for tf in self.freqai_config.get("timeframes"):
dataframe = strategy.populate_any_indicators( dataframe = strategy.populate_any_indicators(
metadata, pair,
metadata['pair'], pair,
dataframe.copy(), dataframe.copy(),
tf, tf,
base_dataframes[tf], base_dataframes[tf],
coin=metadata['pair'].split("/")[0] + "-" coin=pair.split("/")[0] + "-"
) )
if pairs: if pairs:
for i in pairs: for i in pairs:
if metadata['pair'] in i: if pair in i:
continue # dont repeat anything from whitelist continue # dont repeat anything from whitelist
dataframe = strategy.populate_any_indicators( dataframe = strategy.populate_any_indicators(
metadata, pair,
i, i,
dataframe.copy(), dataframe.copy(),
tf, tf,

View File

@ -8,6 +8,7 @@ from abc import ABC, abstractmethod
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Tuple from typing import Any, Dict, Tuple
import numpy as np
import numpy.typing as npt import numpy.typing as npt
import pandas as pd import pandas as pd
from pandas import DataFrame from pandas import DataFrame
@ -63,6 +64,9 @@ class IFreqaiModel(ABC):
self.lock = threading.Lock() self.lock = threading.Lock()
self.follow_mode = self.freqai_info.get('follow_mode', False) self.follow_mode = self.freqai_info.get('follow_mode', False)
self.identifier = self.freqai_info.get('identifier', 'no_id_provided') self.identifier = self.freqai_info.get('identifier', 'no_id_provided')
self.scanning = False
self.ready_to_scan = False
self.first = True
def assert_config(self, config: Dict[str, Any]) -> None: def assert_config(self, config: Dict[str, Any]) -> None:
@ -91,17 +95,9 @@ class IFreqaiModel(ABC):
# and we keep the flag self.training_on_separate_threaad in the current object to help # and we keep the flag self.training_on_separate_threaad in the current object to help
# determine what the current pair will do # determine what the current pair will do
if self.live: if self.live:
if (not self.training_on_separate_thread and
self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1):
self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.dh = FreqaiDataKitchen(self.config, self.data_drawer,
self.live, metadata["pair"]) self.live, metadata["pair"])
dh = self.start_live(dataframe, metadata, strategy, self.dh, trainable=True) dh = self.start_live(dataframe, metadata, strategy, self.dh)
else:
# we will have at max 2 separate instances of the kitchen at once.
self.dh_fg = FreqaiDataKitchen(self.config, self.data_drawer,
self.live, metadata["pair"])
dh = self.start_live(dataframe, metadata, strategy, self.dh_fg, trainable=False)
# For backtesting, each pair enters and then gets trained for each window along the # For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period" (training window) and "backtest_period" # sliding window defined by "train_period" (training window) and "backtest_period"
@ -114,8 +110,37 @@ class IFreqaiModel(ABC):
dh = self.start_backtesting(dataframe, metadata, self.dh) dh = self.start_backtesting(dataframe, metadata, self.dh)
return self.return_values(dataframe, dh) return self.return_values(dataframe, dh)
# return (dh.full_predictions, dh.full_do_predict,
# dh.full_target_mean, dh.full_target_std) @threaded
def start_scanning(self, strategy: IStrategy) -> None:
while 1:
for pair in self.config.get('exchange', {}).get('pair_whitelist'):
if self.data_drawer.pair_dict[pair]['priority'] != 1:
continue
dh = FreqaiDataKitchen(self.config, self.data_drawer,
self.live, pair)
(model_filename,
trained_timestamp,
_, _) = self.data_drawer.get_pair_dict_info(pair)
file_exists = False
dh.set_paths(pair, trained_timestamp)
file_exists = self.model_exists(pair,
dh,
trained_timestamp=trained_timestamp,
model_filename=model_filename,
scanning=True)
(retrain,
new_trained_timerange,
data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
dh.set_paths(pair, new_trained_timerange.stopts)
if retrain or not file_exists:
self.train_model_in_series(new_trained_timerange, pair,
strategy, dh, data_load_timerange)
def start_backtesting(self, dataframe: DataFrame, metadata: dict, def start_backtesting(self, dataframe: DataFrame, metadata: dict,
dh: FreqaiDataKitchen) -> FreqaiDataKitchen: dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
@ -142,7 +167,7 @@ class IFreqaiModel(ABC):
for tr_train, tr_backtest in zip( for tr_train, tr_backtest in zip(
dh.training_timeranges, dh.backtesting_timeranges dh.training_timeranges, dh.backtesting_timeranges
): ):
(_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata) (_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata['pair'])
gc.collect() gc.collect()
dh.data = {} # clean the pair specific data between training window sliding dh.data = {} # clean the pair specific data between training window sliding
self.training_timerange = tr_train self.training_timerange = tr_train
@ -163,7 +188,7 @@ class IFreqaiModel(ABC):
str(int(trained_timestamp.stopts)))) str(int(trained_timestamp.stopts))))
if not self.model_exists(metadata["pair"], dh, if not self.model_exists(metadata["pair"], dh,
trained_timestamp=trained_timestamp.stopts): trained_timestamp=trained_timestamp.stopts):
self.model = self.train(dataframe_train, metadata, dh) self.model = self.train(dataframe_train, metadata['pair'], dh)
self.data_drawer.pair_dict[metadata['pair']][ self.data_drawer.pair_dict[metadata['pair']][
'trained_timestamp'] = trained_timestamp.stopts 'trained_timestamp'] = trained_timestamp.stopts
dh.set_new_model_names(metadata, trained_timestamp) dh.set_new_model_names(metadata, trained_timestamp)
@ -184,8 +209,7 @@ class IFreqaiModel(ABC):
return dh return dh
def start_live(self, dataframe: DataFrame, metadata: dict, def start_live(self, dataframe: DataFrame, metadata: dict,
strategy: IStrategy, dh: FreqaiDataKitchen, strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
trainable: bool) -> FreqaiDataKitchen:
""" """
The main broad execution for dry/live. This function will check if a retraining should be The main broad execution for dry/live. This function will check if a retraining should be
performed, and if so, retrain and reset the model. performed, and if so, retrain and reset the model.
@ -203,10 +227,10 @@ class IFreqaiModel(ABC):
self.data_drawer.update_follower_metadata() self.data_drawer.update_follower_metadata()
# get the model metadata associated with the current pair # get the model metadata associated with the current pair
(model_filename, (_,
trained_timestamp, trained_timestamp,
coin_first, _,
return_null_array) = self.data_drawer.get_pair_dict_info(metadata) return_null_array) = self.data_drawer.get_pair_dict_info(metadata['pair'])
# if the metadata doesnt exist, the follower returns null arrays to strategy # if the metadata doesnt exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array: if self.follow_mode and return_null_array:
@ -222,20 +246,18 @@ class IFreqaiModel(ABC):
# if trainable, check if model needs training, if so compute new timerange, # if trainable, check if model needs training, if so compute new timerange,
# then save model and metadata. # then save model and metadata.
# if not trainable, load existing data # if not trainable, load existing data
if (trainable or coin_first) and not self.follow_mode: if not self.follow_mode:
file_exists = False # if trained_timestamp != 0: # historical model available
# dh.set_paths(metadata['pair'], trained_timestamp)
# # file_exists = self.model_exists(metadata['pair'],
# # dh,
# # trained_timestamp=trained_timestamp,
# # model_filename=model_filename)
if trained_timestamp != 0: # historical model available (_,
dh.set_paths(metadata, trained_timestamp)
file_exists = self.model_exists(metadata['pair'],
dh,
trained_timestamp=trained_timestamp,
model_filename=model_filename)
(self.retrain,
new_trained_timerange, new_trained_timerange,
data_load_timerange) = dh.check_if_new_training_required(trained_timestamp) data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
dh.set_paths(metadata, new_trained_timerange.stopts) dh.set_paths(metadata['pair'], new_trained_timerange.stopts)
# download candle history if it is not already in memory # download candle history if it is not already in memory
if not self.data_drawer.historic_data: if not self.data_drawer.historic_data:
@ -245,21 +267,22 @@ class IFreqaiModel(ABC):
dh.download_all_data_for_training(data_load_timerange) dh.download_all_data_for_training(data_load_timerange)
dh.load_all_pair_histories(data_load_timerange) dh.load_all_pair_histories(data_load_timerange)
# train the model on the trained timerange if not self.scanning:
if self.retrain or not file_exists: self.scanning = True
if coin_first: self.start_scanning(strategy)
self.train_model_in_series(new_trained_timerange, metadata,
strategy, dh, data_load_timerange)
else:
self.training_on_separate_thread = True # acts like a lock
self.retrain_model_on_separate_thread(new_trained_timerange,
metadata, strategy,
dh, data_load_timerange)
elif not trainable and not self.follow_mode: # train the model on the trained timerange
logger.info(f'{metadata["pair"]} holds spot ' # if coin_first and not self.scanning:
f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} ' # self.train_model_in_series(new_trained_timerange, metadata['pair'],
'in training queue') # strategy, dh, data_load_timerange)
# elif not coin_first and not self.scanning:
# self.scanning = True
# self.start_scanning(strategy)
# elif not trainable and not self.follow_mode:
# logger.info(f'{metadata["pair"]} holds spot '
# f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} '
# 'in training queue')
elif self.follow_mode: elif self.follow_mode:
dh.set_paths(metadata, trained_timestamp) dh.set_paths(metadata, trained_timestamp)
logger.info('FreqAI instance set to follow_mode, finding existing pair' logger.info('FreqAI instance set to follow_mode, finding existing pair'
@ -268,25 +291,46 @@ class IFreqaiModel(ABC):
# load the model and associated data into the data kitchen # load the model and associated data into the data kitchen
self.model = dh.load_data(coin=metadata['pair']) self.model = dh.load_data(coin=metadata['pair'])
if not self.model:
logger.warning('No model ready, returning null values to strategy.')
self.data_drawer.return_null_values_to_strategy(dataframe, dh)
return dh
# ensure user is feeding the correct indicators to the model # ensure user is feeding the correct indicators to the model
self.check_if_feature_list_matches_strategy(dataframe, dh) self.check_if_feature_list_matches_strategy(dataframe, dh)
self.build_strategy_return_arrays(dataframe, dh, metadata['pair'], trained_timestamp)
return dh
def build_strategy_return_arrays(self, dataframe: DataFrame,
dh: FreqaiDataKitchen, pair: str,
trained_timestamp: int) -> None:
# hold the historical predictions in memory so we are sending back # hold the historical predictions in memory so we are sending back
# correct array to strategy FIXME currently broken, but only affecting # correct array to strategy FIXME currently broken, but only affecting
# Frequi reporting. Signals remain unaffeted. # Frequi reporting. Signals remain unaffeted.
if metadata['pair'] not in self.data_drawer.model_return_values:
if pair not in self.data_drawer.model_return_values:
preds, do_preds = self.predict(dataframe, dh) preds, do_preds = self.predict(dataframe, dh)
dh.append_predictions(preds, do_preds, len(dataframe)) dh.append_predictions(preds, do_preds, len(dataframe))
dh.fill_predictions(len(dataframe)) dh.fill_predictions(len(dataframe))
self.data_drawer.set_initial_return_values(metadata['pair'], dh) self.data_drawer.set_initial_return_values(pair, dh)
return
elif self.dh.check_if_model_expired(trained_timestamp):
preds, do_preds, dh.DI_values = np.zeros(2), np.ones(2) * 2, np.zeros(2)
logger.warning('Model expired, returning null values to strategy. Strategy '
'construction should take care to consider this event with '
'prediction == 0 and do_predict == 2')
else: else:
preds, do_preds = self.predict(dataframe.iloc[-2:], dh) preds, do_preds = self.predict(dataframe.iloc[-2:], dh)
self.data_drawer.append_model_predictions(metadata['pair'], preds, do_preds,
dh.data["target_mean"],
dh.data["target_std"], dh,
len(dataframe))
return dh self.data_drawer.append_model_predictions(pair, preds, do_preds,
dh.data["target_mean"],
dh.data["target_std"],
dh,
len(dataframe))
return
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
dh: FreqaiDataKitchen) -> None: dh: FreqaiDataKitchen) -> None:
@ -357,7 +401,7 @@ class IFreqaiModel(ABC):
# dh.remove_outliers(predict=True) # creates dropped index # dh.remove_outliers(predict=True) # creates dropped index
def model_exists(self, pair: str, dh: FreqaiDataKitchen, trained_timestamp: int = None, def model_exists(self, pair: str, dh: FreqaiDataKitchen, trained_timestamp: int = None,
model_filename: str = '') -> bool: model_filename: str = '', scanning: bool = False) -> bool:
""" """
Given a pair and path, check if a model already exists Given a pair and path, check if a model already exists
:param pair: pair e.g. BTC/USD :param pair: pair e.g. BTC/USD
@ -370,9 +414,9 @@ class IFreqaiModel(ABC):
path_to_modelfile = Path(dh.data_path / str(model_filename + "_model.joblib")) path_to_modelfile = Path(dh.data_path / str(model_filename + "_model.joblib"))
file_exists = path_to_modelfile.is_file() file_exists = path_to_modelfile.is_file()
if file_exists: if file_exists and not scanning:
logger.info("Found model at %s", dh.data_path / dh.model_filename) logger.info("Found model at %s", dh.data_path / dh.model_filename)
else: elif not scanning:
logger.info("Could not find model at %s", dh.data_path / dh.model_filename) logger.info("Could not find model at %s", dh.data_path / dh.model_filename)
return file_exists return file_exists
@ -382,7 +426,7 @@ class IFreqaiModel(ABC):
str(self.freqai_info.get('identifier'))) str(self.freqai_info.get('identifier')))
@threaded @threaded
def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict, def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, pair: str,
strategy: IStrategy, dh: FreqaiDataKitchen, strategy: IStrategy, dh: FreqaiDataKitchen,
data_load_timerange: TimeRange): data_load_timerange: TimeRange):
""" """
@ -403,14 +447,14 @@ class IFreqaiModel(ABC):
# metadata) # metadata)
corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
metadata) pair)
# protecting from common benign errors associated with grabbing new data from exchange: # protecting from common benign errors associated with grabbing new data from exchange:
try: try:
unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy, unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
corr_dataframes, corr_dataframes,
base_dataframes, base_dataframes,
metadata) pair)
unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe) unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
except Exception as err: except Exception as err:
@ -420,25 +464,25 @@ class IFreqaiModel(ABC):
return return
try: try:
model = self.train(unfiltered_dataframe, metadata, dh) model = self.train(unfiltered_dataframe, pair, dh)
except ValueError: except ValueError:
logger.warning('Value error encountered during training') logger.warning('Value error encountered during training')
self.training_on_separate_thread = False self.training_on_separate_thread = False
self.retrain = False self.retrain = False
return return
self.data_drawer.pair_dict[metadata['pair']][ self.data_drawer.pair_dict[pair][
'trained_timestamp'] = new_trained_timerange.stopts 'trained_timestamp'] = new_trained_timerange.stopts
dh.set_new_model_names(metadata, new_trained_timerange) dh.set_new_model_names(pair, new_trained_timerange)
# logger.info('Training queue' # logger.info('Training queue'
# f'{sorted(self.data_drawer.pair_dict.items(), key=lambda item: item[1])}') # f'{sorted(self.data_drawer.pair_dict.items(), key=lambda item: item[1])}')
if self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1: if self.data_drawer.pair_dict[pair]['priority'] == 1:
with self.lock: with self.lock:
self.data_drawer.pair_to_end_of_training_queue(metadata['pair']) self.data_drawer.pair_to_end_of_training_queue(pair)
dh.save_data(model, coin=metadata['pair']) dh.save_data(model, coin=pair)
self.training_on_separate_thread = False # self.training_on_separate_thread = False
self.retrain = False # self.retrain = False
# each time we finish a training, we check the directory to purge old models. # each time we finish a training, we check the directory to purge old models.
if self.freqai_info.get('purge_old_models', False): if self.freqai_info.get('purge_old_models', False):
@ -446,7 +490,7 @@ class IFreqaiModel(ABC):
return return
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict, def train_model_in_series(self, new_trained_timerange: TimeRange, pair: str,
strategy: IStrategy, dh: FreqaiDataKitchen, strategy: IStrategy, dh: FreqaiDataKitchen,
data_load_timerange: TimeRange): data_load_timerange: TimeRange):
""" """
@ -464,29 +508,35 @@ class IFreqaiModel(ABC):
# corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
# metadata) # metadata)
corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
metadata) pair)
unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy, unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
corr_dataframes, corr_dataframes,
base_dataframes, base_dataframes,
metadata) pair)
unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe) unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
model = self.train(unfiltered_dataframe, metadata, dh) model = self.train(unfiltered_dataframe, pair, dh)
self.data_drawer.pair_dict[metadata['pair']][ self.data_drawer.pair_dict[pair][
'trained_timestamp'] = new_trained_timerange.stopts 'trained_timestamp'] = new_trained_timerange.stopts
dh.set_new_model_names(metadata, new_trained_timerange) dh.set_new_model_names(pair, new_trained_timerange)
self.data_drawer.pair_dict[metadata['pair']]['first'] = False self.data_drawer.pair_dict[pair]['first'] = False
dh.save_data(model, coin=metadata['pair']) if self.data_drawer.pair_dict[pair]['priority'] == 1 and self.scanning:
self.retrain = False with self.lock:
self.data_drawer.pair_to_end_of_training_queue(pair)
dh.save_data(model, coin=pair)
if self.freqai_info.get('purge_old_models', False):
self.data_drawer.purge_old_models()
# self.retrain = False
# Following methods which are overridden by user made prediction models. # Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModlel.py for an example. # See freqai/prediction_models/CatboostPredictionModlel.py for an example.
@abstractmethod @abstractmethod
def train(self, unfiltered_dataframe: DataFrame, metadata: dict, dh: FreqaiDataKitchen) -> Any: def train(self, unfiltered_dataframe: DataFrame, pair: str, dh: FreqaiDataKitchen) -> Any:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datahandler Filter the training data and train a model to it. Train makes heavy use of the datahandler
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.

View File

@ -24,7 +24,7 @@ class CatboostPredictionModel(IFreqaiModel):
dataframe["do_predict"] = dh.full_do_predict dataframe["do_predict"] = dh.full_do_predict
dataframe["target_mean"] = dh.full_target_mean dataframe["target_mean"] = dh.full_target_mean
dataframe["target_std"] = dh.full_target_std dataframe["target_std"] = dh.full_target_std
if self.freqai_info.get('feature_parameters', {}).get('DI-threshold', 0) > 0: if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0:
dataframe["DI"] = dh.full_DI_values dataframe["DI"] = dh.full_DI_values
return dataframe return dataframe
@ -48,7 +48,7 @@ class CatboostPredictionModel(IFreqaiModel):
return dataframe["s"] return dataframe["s"]
def train(self, unfiltered_dataframe: DataFrame, def train(self, unfiltered_dataframe: DataFrame,
metadata: dict, dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: pair: str, dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -60,7 +60,7 @@ class CatboostPredictionModel(IFreqaiModel):
""" """
logger.info('--------------------Starting training ' logger.info('--------------------Starting training '
f'{metadata["pair"]} --------------------') f'{pair} --------------------')
# create the full feature list based on user config info # create the full feature list based on user config info
dh.training_features_list = dh.find_features(unfiltered_dataframe) dh.training_features_list = dh.find_features(unfiltered_dataframe)
@ -88,7 +88,7 @@ class CatboostPredictionModel(IFreqaiModel):
model = self.fit(data_dictionary) model = self.fit(data_dictionary)
logger.info(f'--------------------done training {metadata["pair"]}--------------------') logger.info(f'--------------------done training {pair}--------------------')
return model return model

View File

@ -532,7 +532,7 @@ class IStrategy(ABC, HyperStrategyMixin):
""" """
return None return None
def populate_any_indicators(self, metadata: dict, pair: str, df: DataFrame, tf: str, def populate_any_indicators(self, basepair: str, pair: str, df: DataFrame, tf: str,
informative: DataFrame = None, coin: str = "") -> DataFrame: informative: DataFrame = None, coin: str = "") -> DataFrame:
""" """
Function designed to automatically generate, name and merge features Function designed to automatically generate, name and merge features

View File

@ -116,7 +116,6 @@ class FreqaiExampleStrategy(IStrategy):
informative[f"{coin}bb_upperband-period_{t}"] informative[f"{coin}bb_upperband-period_{t}"]
- informative[f"{coin}bb_lowerband-period_{t}"] - informative[f"{coin}bb_lowerband-period_{t}"]
) / informative[f"{coin}bb_middleband-period_{t}"] ) / informative[f"{coin}bb_middleband-period_{t}"]
informative[f"%-{coin}close-bb_lower-period_{t}"] = ( informative[f"%-{coin}close-bb_lower-period_{t}"] = (
informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"] informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
) )
@ -153,7 +152,7 @@ class FreqaiExampleStrategy(IStrategy):
# Add generalized indicators here (because in live, it will call this # Add generalized indicators here (because in live, it will call this
# function to populate indicators during training). Notice how we ensure not to # function to populate indicators during training). Notice how we ensure not to
# add them multiple times # add them multiple times
if pair == metadata["pair"] and tf == self.timeframe: if pair == self.freqai_info['corr_pairlist'][0] and tf == self.timeframe:
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25