rehaul of backend data management - increasing performance by holding history in memory, reducing load on the ratelimit by only pinging exchange once per candle. Improve code readability.

This commit is contained in:
robcaulk 2022-06-03 15:19:46 +02:00
parent 4ac6ef2972
commit 16b4a5b71f
5 changed files with 342 additions and 70 deletions

View File

@ -35,6 +35,8 @@ class FreqaiDataDrawer:
self.model_dictionary: Dict[str, Any] = {}
self.model_return_values: Dict[str, Any] = {}
self.pair_data_dict: Dict[str, Any] = {}
self.historic_data: Dict[str, Any] = {}
# self.populated_historic_data: Dict[str, Any] = {} ?
self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path
self.follow_mode = follow_mode
@ -45,6 +47,12 @@ class FreqaiDataDrawer:
# self.create_training_queue(pair_whitelist)
def load_drawer_from_disk(self):
"""
Locate and load a previously saved data drawer full of all pair model metadata in
present model folder.
:returns:
exists: bool = whether or not the drawer was located
"""
exists = Path(self.full_path / str('pair_dictionary.json')).resolve().exists()
if exists:
with open(self.full_path / str('pair_dictionary.json'), "r") as fp:
@ -58,16 +66,25 @@ class FreqaiDataDrawer:
return exists
def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with open(self.full_path / str('pair_dictionary.json'), "w") as fp:
json.dump(self.pair_dict, fp, default=self.np_encoder)
def save_follower_dict_to_dist(self):
def save_follower_dict_to_disk(self):
"""
Save follower dictionary to disk (used by strategy for persistent prediction targets)
"""
follower_name = self.config.get('bot_name', 'follower1')
with open(self.full_path / str('follower_dictionary-' +
follower_name + '.json'), "w") as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder)
def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets
"""
follower_name = self.config.get('bot_name', 'follower1')
whitelist_pairs = self.config.get('exchange', {}).get('pair_whitelist')
@ -89,6 +106,18 @@ class FreqaiDataDrawer:
return object.item()
def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]:
"""
Locate and load existing model metadata from persistent storage. If not located,
create a new one and append the current pair to it and prepare it for its first
training
:params:
metadata: dict = strategy furnished pair metadata
:returns:
model_filename: str = unique filename used for loading persistent objects from disk
trained_timestamp: int = the last time the coin was trained
coin_first: bool = If the coin is fresh without metadata
return_null_array: bool = Follower could not find pair metadata
"""
pair_in_dict = self.pair_dict.get(metadata['pair'])
data_path_set = self.pair_dict.get(metadata['pair'], {}).get('data_path', None)
return_null_array = False
@ -137,6 +166,7 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['do_preds'] = dh.full_do_predict
self.model_return_values[pair]['target_mean'] = dh.full_target_mean
self.model_return_values[pair]['target_std'] = dh.full_target_std
self.model_return_values[pair]['DI_values'] = dh.full_DI_values
# if not self.follow_mode:
# self.save_model_return_values_to_disk()
@ -157,6 +187,8 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['predictions'] = np.append(
self.model_return_values[pair]['predictions'][i:], predictions[-1])
self.model_return_values[pair]['DI_values'] = np.append(
self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1])
self.model_return_values[pair]['do_preds'] = np.append(
self.model_return_values[pair]['do_preds'][i:], do_preds[-1])
self.model_return_values[pair]['target_mean'] = np.append(
@ -168,6 +200,8 @@ class FreqaiDataDrawer:
prepend = np.zeros(abs(length_difference) - 1)
self.model_return_values[pair]['predictions'] = np.insert(
self.model_return_values[pair]['predictions'], 0, prepend)
self.model_return_values[pair]['DI_values'] = np.insert(
self.model_return_values[pair]['DI_values'], 0, prepend)
self.model_return_values[pair]['do_preds'] = np.insert(
self.model_return_values[pair]['do_preds'], 0, prepend)
self.model_return_values[pair]['target_mean'] = np.insert(
@ -179,6 +213,7 @@ class FreqaiDataDrawer:
dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values'])
# if not self.follow_mode:
# self.save_model_return_values_to_disk()
@ -190,6 +225,7 @@ class FreqaiDataDrawer:
dh.full_do_predict = np.zeros(len_df)
dh.full_target_mean = np.zeros(len_df)
dh.full_target_std = np.zeros(len_df)
dh.full_DI_values = np.zeros(len_df)
def purge_old_models(self) -> None:
@ -227,6 +263,12 @@ class FreqaiDataDrawer:
shutil.rmtree(v)
deleted += 1
def update_follower_metadata(self):
# follower needs to load from disk to get any changes made by leader to pair_dict
self.load_drawer_from_disk()
if self.config.get('freqai', {})('purge_old_models', False):
self.purge_old_models()
# to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference
# def save_model_return_values_to_disk(self) -> None:

View File

@ -25,9 +25,6 @@ from freqtrade.resolvers import ExchangeResolver
from freqtrade.strategy.interface import IStrategy
# import scipy as spy # used for auto distribution assignment
SECONDS_IN_DAY = 86400
logger = logging.getLogger(__name__)
@ -52,6 +49,7 @@ class FreqaiDataKitchen:
self.target_std: npt.ArrayLike = np.array([])
self.full_predictions: npt.ArrayLike = np.array([])
self.full_do_predict: npt.ArrayLike = np.array([])
self.full_DI_values: npt.ArrayLike = np.array([])
self.full_target_mean: npt.ArrayLike = np.array([])
self.full_target_std: npt.ArrayLike = np.array([])
self.data_path = Path()
@ -59,6 +57,7 @@ class FreqaiDataKitchen:
self.live = live
self.pair = pair
self.svm_model: linear_model.SGDOneClassSVM = None
self.set_all_pairs()
if not self.live:
self.full_timerange = self.create_fulltimerange(self.config["timerange"],
self.freqai_config.get("train_period")
@ -73,6 +72,12 @@ class FreqaiDataKitchen:
self.data_drawer = data_drawer
def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None:
"""
Set the paths to the data for the present coin/botloop
:params:
metadata: dict = strategy furnished pair metadata
trained_timestamp: int = timestamp of most recent training
"""
self.full_path = Path(self.config['user_data_dir'] /
"models" /
str(self.freqai_config.get('identifier')))
@ -514,6 +519,11 @@ class FreqaiDataKitchen:
return None
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
"""
Use an existing pca transform to transform data into components
:params:
filtered_dataframe: DataFrame = the cleaned dataframe
"""
pca_components = self.pca.transform(filtered_dataframe)
self.data_dictionary["prediction_features"] = pd.DataFrame(
data=pca_components,
@ -522,6 +532,11 @@ class FreqaiDataKitchen:
)
def compute_distances(self) -> float:
"""
Compute distances between each training point and every other training
point. This metric defines the neighborhood of trained data and is used
for prediction confidence in the Dissimilarity Index
"""
logger.info("computing average mean distance for all training points")
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1)
avg_mean_dist = pairwise.mean(axis=1).mean()
@ -530,6 +545,12 @@ class FreqaiDataKitchen:
return avg_mean_dist
def use_SVM_to_remove_outliers(self, predict: bool) -> None:
"""
Build/inference a Support Vector Machine to detect outliers
in training data and prediction
:params:
predict: bool = If true, inference an existing SVM model, else construct one
"""
if predict:
assert self.svm_model, "No svm model available for outlier removal"
@ -580,6 +601,13 @@ class FreqaiDataKitchen:
return
def find_features(self, dataframe: DataFrame) -> list:
"""
Find features in the strategy provided dataframe
:params:
dataframe: DataFrame = strategy provided dataframe
:returns:
features: list = the features to be used for training/prediction
"""
column_names = dataframe.columns
features = [c for c in column_names if '%' in c]
if not features:
@ -600,17 +628,19 @@ class FreqaiDataKitchen:
n_jobs=-1,
)
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
do_predict = np.where(
distance.min(axis=0) / self.data["avg_mean_dist"]
self.DI_values
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
1,
0,
)
# logger.info(
# "Distance checker tossed %s predictions for being too far from training data",
# len(do_predict) - do_predict.sum(),
# )
logger.info(
"DI tossed %s predictions for being too far from training data",
len(do_predict) - do_predict.sum(),
)
self.do_predict += do_predict
self.do_predict -= 1
@ -638,6 +668,7 @@ class FreqaiDataKitchen:
self.full_predictions = np.append(self.full_predictions, predictions)
self.full_do_predict = np.append(self.full_do_predict, do_predict)
self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
self.full_target_mean = np.append(self.full_target_mean, target_mean)
self.full_target_std = np.append(self.full_target_std, target_std)
@ -652,6 +683,7 @@ class FreqaiDataKitchen:
filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count
self.full_predictions = np.append(filler, self.full_predictions)
self.full_do_predict = np.append(filler, self.full_do_predict)
self.full_DI_values = np.append(filler, self.full_DI_values)
self.full_target_mean = np.append(filler, self.full_target_mean)
self.full_target_std = np.append(filler, self.full_target_std)
@ -711,6 +743,8 @@ class FreqaiDataKitchen:
logger.warning('FreqAI could not detect max timeframe and therefore may not '
'download the proper amount of data for training')
# logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days')
if trained_timestamp != 0:
elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
retrain = elapsed_time > self.freqai_config.get('backtest_period')
@ -764,61 +798,176 @@ class FreqaiDataKitchen:
# enables persistence, but not fully implemented into save/load data yer
# self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict,
strategy: IStrategy) -> None:
# SUPERCEDED
# def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict,
# strategy: IStrategy) -> None:
# exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
# self.config, validate=False, freqai=True)
# # exchange = strategy.dp._exchange # closes ccxt session
# pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
# if str(metadata['pair']) not in pairs:
# pairs.append(str(metadata['pair']))
# refresh_backtest_ohlcv_data(
# exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
# datadir=self.config['datadir'], timerange=timerange,
# new_pairs_days=self.config['new_pairs_days'],
# erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
# trading_mode=self.config.get('trading_mode', 'spot'),
# prepend=self.config.get('prepend_data', False)
# )
def download_all_data_for_training(self, timerange: TimeRange) -> None:
"""
Called only once upon start of bot to download the necessary data for
populating indicators and training the model.
:params:
timerange: TimeRange = The full data timerange for populating the indicators
and training the model.
"""
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
self.config, validate=False, freqai=True)
# exchange = strategy.dp._exchange # closes ccxt session
pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
if str(metadata['pair']) not in pairs:
pairs.append(str(metadata['pair']))
new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
refresh_backtest_ohlcv_data(
exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
exchange, pairs=self.all_pairs,
timeframes=self.freqai_config.get('timeframes'),
datadir=self.config['datadir'], timerange=timerange,
new_pairs_days=self.config['new_pairs_days'],
new_pairs_days=new_pairs_days,
erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
trading_mode=self.config.get('trading_mode', 'spot'),
prepend=self.config.get('prepend_data', False)
)
def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
DataFrame]:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']]
# timerange = TimeRange.parse_timerange(new_timerange)
def update_historic_data(self, strategy: IStrategy) -> None:
"""
Append new candles to our stores historic data (in memory) so that
we do not need to load candle history from disk and we dont need to
pinging exchange multiple times for the same candle.
:params:
dataframe: DataFrame = strategy provided dataframe
"""
history_data = self.data_drawer.historic_data
for pair in self.all_pairs:
for tf in self.freqai_config.get('timeframes'):
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
history_data[pair][tf] = pd.concat(
[history_data[pair][tf],
strategy.dp.get_pair_dataframe(pair, tf).iloc[-1]],
axis=0
)
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
for pair in self.config.get('exchange', '').get('pair_whitelist'):
if pair not in self.all_pairs:
self.all_pairs.append(pair)
def load_all_pair_histories(self, timerange: TimeRange) -> None:
"""
Load pair histories for all whitelist and corr_pairlist pairs.
Only called once upon startup of bot.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period
"""
history_data = self.data_drawer.historic_data
for pair in self.all_pairs:
if pair not in history_data:
history_data[pair] = {}
for tf in self.freqai_config.get('timeframes'):
history_data[pair][tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=metadata['pair'], timerange=timerange,
pair=pair, timerange=timerange,
data_format=self.config.get(
'dataformat_ohlcv', 'json'),
candle_type=self.config.get(
'trading_mode', 'spot'))
def get_base_and_corr_dataframes(self, timerange: TimeRange,
metadata: dict) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
"""
Searches through our historic_data in memory and returns the dataframes relevant
to the present pair.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period
metadata: dict = strategy furnished pair metadata
"""
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.data_drawer.historic_data
pairs = self.freqai_config.get('corr_pairlist', [])
for tf in self.freqai_config.get('timeframes'):
base_dataframes[tf] = self.slice_dataframe(
timerange,
historic_data[metadata['pair']][tf]
)
if pairs:
for p in pairs:
if metadata['pair'] in p:
continue # dont repeat anything from whitelist
if p not in corr_dataframes:
corr_dataframes[p] = {}
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=p, timerange=timerange,
data_format=self.config.get(
'dataformat_ohlcv', 'json'),
candle_type=self.config.get(
'trading_mode', 'spot'))
corr_dataframes[p][tf] = self.slice_dataframe(timerange, historic_data[p][tf])
return corr_dataframes, base_dataframes
# SUPERCEDED
# def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
# DataFrame]:
# corr_dataframes: Dict[Any, Any] = {}
# base_dataframes: Dict[Any, Any] = {}
# pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']]
# # timerange = TimeRange.parse_timerange(new_timerange)
# for tf in self.freqai_config.get('timeframes'):
# base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
# timeframe=tf,
# pair=metadata['pair'], timerange=timerange,
# data_format=self.config.get(
# 'dataformat_ohlcv', 'json'),
# candle_type=self.config.get(
# 'trading_mode', 'spot'))
# if pairs:
# for p in pairs:
# if metadata['pair'] in p:
# continue # dont repeat anything from whitelist
# if p not in corr_dataframes:
# corr_dataframes[p] = {}
# corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
# timeframe=tf,
# pair=p, timerange=timerange,
# data_format=self.config.get(
# 'dataformat_ohlcv', 'json'),
# candle_type=self.config.get(
# 'trading_mode', 'spot'))
# return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(self, strategy: IStrategy,
corr_dataframes: dict,
base_dataframes: dict,
metadata: dict) -> DataFrame:
"""
Use the user defined strategy for populating indicators during
retrain
:params:
strategy: IStrategy = user defined strategy object
corr_dataframes: dict = dict containing the informative pair dataframes
(for user defined timeframes)
base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
metadata: dict = strategy furnished pair metadata
:returns:
dataframe: DataFrame = dataframe containing populated indicators
"""
dataframe = base_dataframes[self.config['timeframe']].copy()
pairs = self.freqai_config.get("corr_pairlist", [])
@ -847,6 +996,9 @@ class FreqaiDataKitchen:
return dataframe
def fit_labels(self) -> None:
"""
Fit the labels with a gaussian distribution
"""
import scipy as spy
f = spy.stats.norm.fit(self.data_dictionary["train_labels"])

View File

@ -44,9 +44,9 @@ class IFreqaiModel(ABC):
self.config = config
self.assert_config(self.config)
self.freqai_info = config["freqai"]
self.data_split_parameters = config["freqai"]["data_split_parameters"]
self.model_training_parameters = config["freqai"]["model_training_parameters"]
self.feature_parameters = config["freqai"]["feature_parameters"]
self.data_split_parameters = config.get('freqai', {}).get("data_split_parameters")
self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters")
self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
self.time_last_trained = None
self.current_time = None
self.model = None
@ -54,6 +54,7 @@ class IFreqaiModel(ABC):
self.training_on_separate_thread = False
self.retrain = False
self.first = True
self.update_historic_data = 0
self.set_full_path()
self.follow_mode = self.freqai_info.get('follow_mode', False)
self.data_drawer = FreqaiDataDrawer(Path(self.full_path),
@ -95,15 +96,12 @@ class IFreqaiModel(ABC):
self.dh = FreqaiDataKitchen(self.config, self.data_drawer,
self.live, metadata["pair"])
dh = self.start_live(dataframe, metadata, strategy, self.dh)
dh = self.start_live(dataframe, metadata, strategy, self.dh, trainable=True)
else:
# we will have at max 2 separate instances of the kitchen at once.
self.dh_fg = FreqaiDataKitchen(self.config, self.data_drawer,
self.live, metadata["pair"])
dh = self.start_live(dataframe, metadata, strategy, self.dh_fg)
# return (dh.full_predictions, dh.full_do_predict,
# dh.full_target_mean, dh.full_target_std)
dh = self.start_live(dataframe, metadata, strategy, self.dh_fg, trainable=False)
# For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period" (training window) and "backtest_period"
@ -115,8 +113,9 @@ class IFreqaiModel(ABC):
logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
dh = self.start_backtesting(dataframe, metadata, self.dh)
return (dh.full_predictions, dh.full_do_predict,
dh.full_target_mean, dh.full_target_std)
return self.return_values(dataframe, dh)
# return (dh.full_predictions, dh.full_do_predict,
# dh.full_target_mean, dh.full_target_std)
def start_backtesting(self, dataframe: DataFrame, metadata: dict,
dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
@ -185,7 +184,8 @@ class IFreqaiModel(ABC):
return dh
def start_live(self, dataframe: DataFrame, metadata: dict,
strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
strategy: IStrategy, dh: FreqaiDataKitchen,
trainable: bool) -> FreqaiDataKitchen:
"""
The main broad execution for dry/live. This function will check if a retraining should be
performed, and if so, retrain and reset the model.
@ -198,25 +198,35 @@ class IFreqaiModel(ABC):
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
"""
# update follower
if self.follow_mode:
# follower needs to load from disk to get any changes made by leader to pair_dict
self.data_drawer.load_drawer_from_disk()
if self.freqai_info.get('purge_old_models', False):
self.data_drawer.purge_old_models()
self.data_drawer.update_follower_metadata()
# get the model metadata associated with the current pair
(model_filename,
trained_timestamp,
coin_first,
return_null_array) = self.data_drawer.get_pair_dict_info(metadata)
# if the files do not yet exist, the follower returns null arrays to strategy
# if the metadata doesnt exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array:
logger.info('Returning null array from follower to strategy')
self.data_drawer.return_null_values_to_strategy(dataframe, dh)
return dh
if (not self.training_on_separate_thread and not self.follow_mode
and self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1) or coin_first:
# append the historic data once per round
if (self.data_drawer.historic_data and
self.update_historic_data >= len(self.config.get('exchange', '')
.get('pair_whitelist'))):
dh.update_historic_data(strategy)
self.update_historic_data = 1
else:
self.update_historic_data += 1
# if trainable, check if model needs training, if so compute new timerange,
# then save model and metadata.
# if not trainable, load existing data
if (trainable and not self.follow_mode) or coin_first:
file_exists = False
if trained_timestamp != 0: # historical model available
@ -231,6 +241,15 @@ class IFreqaiModel(ABC):
data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
dh.set_paths(metadata, new_trained_timerange.stopts)
# download candle history if it is not already in memory
if not self.data_drawer.historic_data:
logger.info('Downloading all training data for all pairs in whitelist and '
'corr_pairlist, this may take a while if you do not have the '
'data saved')
dh.download_all_data_for_training(data_load_timerange)
dh.load_all_pair_histories(data_load_timerange)
# train the model on the trained timerange
if self.retrain or not file_exists:
if coin_first:
self.train_model_in_series(new_trained_timerange, metadata,
@ -241,17 +260,24 @@ class IFreqaiModel(ABC):
metadata, strategy,
dh, data_load_timerange)
elif self.training_on_separate_thread and not self.follow_mode:
logger.info("FreqAI training a new model on background thread.")
elif not trainable and not self.follow_mode:
logger.info(f'{metadata["pair"]} holds spot '
f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} '
'in training queue')
elif self.follow_mode:
dh.set_paths(metadata, trained_timestamp)
logger.info('FreqAI instance set to follow_mode, finding existing pair'
f'using { self.identifier }')
# load the model and associated data into the data kitchen
self.model = dh.load_data(coin=metadata['pair'])
# ensure user is feeding the correct indicators to the model
self.check_if_feature_list_matches_strategy(dataframe, dh)
# hold the historical predictions in memory so we are sending back
# correct array to strategy FIXME currently broken, but only affecting
# Frequi reporting. Signals remain unaffeted.
if metadata['pair'] not in self.data_drawer.model_return_values:
preds, do_preds = self.predict(dataframe, dh)
dh.append_predictions(preds, do_preds, len(dataframe))
@ -268,6 +294,13 @@ class IFreqaiModel(ABC):
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
dh: FreqaiDataKitchen) -> None:
"""
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
to a folder holding existing models.
:params:
dataframe: DataFrame = strategy provided dataframe
dh: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop
"""
strategy_provided_features = dh.find_features(dataframe)
if 'training_features_list_raw' in dh.data:
feature_list = dh.data['training_features_list_raw']
@ -356,10 +389,23 @@ class IFreqaiModel(ABC):
def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,
strategy: IStrategy, dh: FreqaiDataKitchen,
data_load_timerange: TimeRange):
"""
Retreive data and train model on separate thread. Always called if the model folder already
contains a full set of trained models.
:params:
new_trained_timerange: TimeRange = the timerange to train the model on
metadata: dict = strategy provided metadata
strategy: IStrategy = user defined strategy object
dh: FreqaiDataKitchen = non-persistent data container for current coin/loop
data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators
(larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
"""
# with nostdout():
dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
# dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
# corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
# metadata)
corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
metadata)
# protecting from common benign errors associated with grabbing new data from exchange:
@ -408,9 +454,21 @@ class IFreqaiModel(ABC):
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
strategy: IStrategy, dh: FreqaiDataKitchen,
data_load_timerange: TimeRange):
dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
"""
Retreive data and train model in single threaded mode (only used if model directory is empty
upon startup for dry/live )
:params:
new_trained_timerange: TimeRange = the timerange to train the model on
metadata: dict = strategy provided metadata
strategy: IStrategy = user defined strategy object
dh: FreqaiDataKitchen = non-persistent data container for current coin/loop
data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators
(larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
"""
# dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
# corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
# metadata)
corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
metadata)
unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
@ -481,3 +539,17 @@ class IFreqaiModel(ABC):
"""
return
@abstractmethod
def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
"""
User defines the dataframe to be returned to strategy here.
:params:
dataframe: DataFrame = the full dataframe for the current prediction (live)
or --timerange (backtesting)
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
:returns:
dataframe: DataFrame = dataframe filled with user defined data
"""
return

View File

@ -18,6 +18,17 @@ class CatboostPredictionModel(IFreqaiModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
dataframe["prediction"] = dh.full_predictions
dataframe["do_predict"] = dh.full_do_predict
dataframe["target_mean"] = dh.full_target_mean
dataframe["target_std"] = dh.full_target_std
if self.freqai_info('feature_parameters', {}).get('DI-threshold', 0) > 0:
dataframe["DI"] = dh.full_DI_values
return dataframe
def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
"""
User defines the labels here (target values).

View File

@ -45,7 +45,7 @@ class FreqaiExampleStrategy(IStrategy):
process_only_new_candles = False
stoploss = -0.05
use_sell_signal = True
use_exit_signal = True
startup_candle_count: int = 300
can_short = False
@ -176,12 +176,7 @@ class FreqaiExampleStrategy(IStrategy):
# the model will return 4 values, its prediction, an indication of whether or not the
# prediction should be accepted, the target mean/std values from the labels used during
# each training period.
(
dataframe["prediction"],
dataframe["do_predict"],
dataframe["target_mean"],
dataframe["target_std"],
) = self.model.bridge.start(dataframe, metadata, self)
dataframe = self.model.bridge.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"]
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"]