merge datarehaul into main freqai branch

This commit is contained in:
robcaulk 2022-06-10 20:26:19 +02:00
commit eb47c74096
6 changed files with 397 additions and 99 deletions

View File

@ -66,8 +66,8 @@
"1h" "1h"
], ],
"train_period": 20, "train_period": 20,
"backtest_period": 2, "backtest_period": 0.001,
"identifier": "example2", "identifier": "constant_retrain_live",
"live_trained_timestamp": 0, "live_trained_timestamp": 0,
"corr_pairlist": [ "corr_pairlist": [
"BTC/USDT:USDT", "BTC/USDT:USDT",
@ -76,20 +76,20 @@
"feature_parameters": { "feature_parameters": {
"period": 20, "period": 20,
"shift": 2, "shift": 2,
"DI_threshold": 0, "DI_threshold": 0.9,
"weight_factor": 0.9, "weight_factor": 0.9,
"principal_component_analysis": false, "principal_component_analysis": false,
"use_SVM_to_remove_outliers": true, "use_SVM_to_remove_outliers": true,
"stratify": 0, "stratify": 0,
"indicator_max_period": 20, "indicator_max_period": 20,
"indicator_periods": [10, 20, 30] "indicator_periods": [10, 20]
}, },
"data_split_parameters": { "data_split_parameters": {
"test_size": 0.33, "test_size": 0.33,
"random_state": 1 "random_state": 1
}, },
"model_training_parameters": { "model_training_parameters": {
"n_estimators": 200, "n_estimators": 1000,
"task_type": "CPU" "task_type": "CPU"
} }
}, },

View File

@ -5,6 +5,7 @@ import json
import logging import logging
import re import re
import shutil import shutil
import threading
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Tuple from typing import Any, Dict, Tuple
@ -35,6 +36,8 @@ class FreqaiDataDrawer:
self.model_dictionary: Dict[str, Any] = {} self.model_dictionary: Dict[str, Any] = {}
self.model_return_values: Dict[str, Any] = {} self.model_return_values: Dict[str, Any] = {}
self.pair_data_dict: Dict[str, Any] = {} self.pair_data_dict: Dict[str, Any] = {}
self.historic_data: Dict[str, Any] = {}
# self.populated_historic_data: Dict[str, Any] = {} ?
self.follower_dict: Dict[str, Any] = {} self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path self.full_path = full_path
self.follow_mode = follow_mode self.follow_mode = follow_mode
@ -42,9 +45,16 @@ class FreqaiDataDrawer:
self.create_follower_dict() self.create_follower_dict()
self.load_drawer_from_disk() self.load_drawer_from_disk()
self.training_queue: Dict[str, int] = {} self.training_queue: Dict[str, int] = {}
self.history_lock = threading.Lock()
# self.create_training_queue(pair_whitelist) # self.create_training_queue(pair_whitelist)
def load_drawer_from_disk(self): def load_drawer_from_disk(self):
"""
Locate and load a previously saved data drawer full of all pair model metadata in
present model folder.
:returns:
exists: bool = whether or not the drawer was located
"""
exists = Path(self.full_path / str('pair_dictionary.json')).resolve().exists() exists = Path(self.full_path / str('pair_dictionary.json')).resolve().exists()
if exists: if exists:
with open(self.full_path / str('pair_dictionary.json'), "r") as fp: with open(self.full_path / str('pair_dictionary.json'), "r") as fp:
@ -58,16 +68,25 @@ class FreqaiDataDrawer:
return exists return exists
def save_drawer_to_disk(self): def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with open(self.full_path / str('pair_dictionary.json'), "w") as fp: with open(self.full_path / str('pair_dictionary.json'), "w") as fp:
json.dump(self.pair_dict, fp, default=self.np_encoder) json.dump(self.pair_dict, fp, default=self.np_encoder)
def save_follower_dict_to_dist(self): def save_follower_dict_to_disk(self):
"""
Save follower dictionary to disk (used by strategy for persistent prediction targets)
"""
follower_name = self.config.get('bot_name', 'follower1') follower_name = self.config.get('bot_name', 'follower1')
with open(self.full_path / str('follower_dictionary-' + with open(self.full_path / str('follower_dictionary-' +
follower_name + '.json'), "w") as fp: follower_name + '.json'), "w") as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder) json.dump(self.follower_dict, fp, default=self.np_encoder)
def create_follower_dict(self): def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets
"""
follower_name = self.config.get('bot_name', 'follower1') follower_name = self.config.get('bot_name', 'follower1')
whitelist_pairs = self.config.get('exchange', {}).get('pair_whitelist') whitelist_pairs = self.config.get('exchange', {}).get('pair_whitelist')
@ -89,6 +108,18 @@ class FreqaiDataDrawer:
return object.item() return object.item()
def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]: def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]:
"""
Locate and load existing model metadata from persistent storage. If not located,
create a new one and append the current pair to it and prepare it for its first
training
:params:
metadata: dict = strategy furnished pair metadata
:returns:
model_filename: str = unique filename used for loading persistent objects from disk
trained_timestamp: int = the last time the coin was trained
coin_first: bool = If the coin is fresh without metadata
return_null_array: bool = Follower could not find pair metadata
"""
pair_in_dict = self.pair_dict.get(metadata['pair']) pair_in_dict = self.pair_dict.get(metadata['pair'])
data_path_set = self.pair_dict.get(metadata['pair'], {}).get('data_path', None) data_path_set = self.pair_dict.get(metadata['pair'], {}).get('data_path', None)
return_null_array = False return_null_array = False
@ -137,6 +168,7 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['do_preds'] = dh.full_do_predict self.model_return_values[pair]['do_preds'] = dh.full_do_predict
self.model_return_values[pair]['target_mean'] = dh.full_target_mean self.model_return_values[pair]['target_mean'] = dh.full_target_mean
self.model_return_values[pair]['target_std'] = dh.full_target_std self.model_return_values[pair]['target_std'] = dh.full_target_std
self.model_return_values[pair]['DI_values'] = dh.full_DI_values
# if not self.follow_mode: # if not self.follow_mode:
# self.save_model_return_values_to_disk() # self.save_model_return_values_to_disk()
@ -157,6 +189,8 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['predictions'] = np.append( self.model_return_values[pair]['predictions'] = np.append(
self.model_return_values[pair]['predictions'][i:], predictions[-1]) self.model_return_values[pair]['predictions'][i:], predictions[-1])
self.model_return_values[pair]['DI_values'] = np.append(
self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1])
self.model_return_values[pair]['do_preds'] = np.append( self.model_return_values[pair]['do_preds'] = np.append(
self.model_return_values[pair]['do_preds'][i:], do_preds[-1]) self.model_return_values[pair]['do_preds'][i:], do_preds[-1])
self.model_return_values[pair]['target_mean'] = np.append( self.model_return_values[pair]['target_mean'] = np.append(
@ -168,6 +202,8 @@ class FreqaiDataDrawer:
prepend = np.zeros(abs(length_difference) - 1) prepend = np.zeros(abs(length_difference) - 1)
self.model_return_values[pair]['predictions'] = np.insert( self.model_return_values[pair]['predictions'] = np.insert(
self.model_return_values[pair]['predictions'], 0, prepend) self.model_return_values[pair]['predictions'], 0, prepend)
self.model_return_values[pair]['DI_values'] = np.insert(
self.model_return_values[pair]['DI_values'], 0, prepend)
self.model_return_values[pair]['do_preds'] = np.insert( self.model_return_values[pair]['do_preds'] = np.insert(
self.model_return_values[pair]['do_preds'], 0, prepend) self.model_return_values[pair]['do_preds'], 0, prepend)
self.model_return_values[pair]['target_mean'] = np.insert( self.model_return_values[pair]['target_mean'] = np.insert(
@ -179,6 +215,7 @@ class FreqaiDataDrawer:
dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds']) dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean']) dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std']) dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values'])
# if not self.follow_mode: # if not self.follow_mode:
# self.save_model_return_values_to_disk() # self.save_model_return_values_to_disk()
@ -190,6 +227,7 @@ class FreqaiDataDrawer:
dh.full_do_predict = np.zeros(len_df) dh.full_do_predict = np.zeros(len_df)
dh.full_target_mean = np.zeros(len_df) dh.full_target_mean = np.zeros(len_df)
dh.full_target_std = np.zeros(len_df) dh.full_target_std = np.zeros(len_df)
dh.full_DI_values = np.zeros(len_df)
def purge_old_models(self) -> None: def purge_old_models(self) -> None:
@ -227,6 +265,12 @@ class FreqaiDataDrawer:
shutil.rmtree(v) shutil.rmtree(v)
deleted += 1 deleted += 1
def update_follower_metadata(self):
# follower needs to load from disk to get any changes made by leader to pair_dict
self.load_drawer_from_disk()
if self.config.get('freqai', {}).get('purge_old_models', False):
self.purge_old_models()
# to be used if we want to send predictions directly to the follower instead of forcing # to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference # follower to load models and inference
# def save_model_return_values_to_disk(self) -> None: # def save_model_return_values_to_disk(self) -> None:

View File

@ -25,9 +25,6 @@ from freqtrade.resolvers import ExchangeResolver
from freqtrade.strategy.interface import IStrategy from freqtrade.strategy.interface import IStrategy
# import scipy as spy # used for auto distribution assignment
SECONDS_IN_DAY = 86400 SECONDS_IN_DAY = 86400
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -52,6 +49,7 @@ class FreqaiDataKitchen:
self.target_std: npt.ArrayLike = np.array([]) self.target_std: npt.ArrayLike = np.array([])
self.full_predictions: npt.ArrayLike = np.array([]) self.full_predictions: npt.ArrayLike = np.array([])
self.full_do_predict: npt.ArrayLike = np.array([]) self.full_do_predict: npt.ArrayLike = np.array([])
self.full_DI_values: npt.ArrayLike = np.array([])
self.full_target_mean: npt.ArrayLike = np.array([]) self.full_target_mean: npt.ArrayLike = np.array([])
self.full_target_std: npt.ArrayLike = np.array([]) self.full_target_std: npt.ArrayLike = np.array([])
self.data_path = Path() self.data_path = Path()
@ -59,6 +57,7 @@ class FreqaiDataKitchen:
self.live = live self.live = live
self.pair = pair self.pair = pair
self.svm_model: linear_model.SGDOneClassSVM = None self.svm_model: linear_model.SGDOneClassSVM = None
self.set_all_pairs()
if not self.live: if not self.live:
self.full_timerange = self.create_fulltimerange(self.config["timerange"], self.full_timerange = self.create_fulltimerange(self.config["timerange"],
self.freqai_config.get("train_period") self.freqai_config.get("train_period")
@ -73,6 +72,12 @@ class FreqaiDataKitchen:
self.data_drawer = data_drawer self.data_drawer = data_drawer
def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None: def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None:
"""
Set the paths to the data for the present coin/botloop
:params:
metadata: dict = strategy furnished pair metadata
trained_timestamp: int = timestamp of most recent training
"""
self.full_path = Path(self.config['user_data_dir'] / self.full_path = Path(self.config['user_data_dir'] /
"models" / "models" /
str(self.freqai_config.get('identifier'))) str(self.freqai_config.get('identifier')))
@ -293,7 +298,7 @@ class FreqaiDataKitchen:
) )
if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live: if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
logger.warning( logger.warning(
f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100} percent' f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent'
' of training data dropped due to NaNs, model may perform inconsistent' ' of training data dropped due to NaNs, model may perform inconsistent'
'with expectations' 'with expectations'
) )
@ -515,6 +520,11 @@ class FreqaiDataKitchen:
return None return None
def pca_transform(self, filtered_dataframe: DataFrame) -> None: def pca_transform(self, filtered_dataframe: DataFrame) -> None:
"""
Use an existing pca transform to transform data into components
:params:
filtered_dataframe: DataFrame = the cleaned dataframe
"""
pca_components = self.pca.transform(filtered_dataframe) pca_components = self.pca.transform(filtered_dataframe)
self.data_dictionary["prediction_features"] = pd.DataFrame( self.data_dictionary["prediction_features"] = pd.DataFrame(
data=pca_components, data=pca_components,
@ -523,14 +533,26 @@ class FreqaiDataKitchen:
) )
def compute_distances(self) -> float: def compute_distances(self) -> float:
"""
Compute distances between each training point and every other training
point. This metric defines the neighborhood of trained data and is used
for prediction confidence in the Dissimilarity Index
"""
logger.info("computing average mean distance for all training points") logger.info("computing average mean distance for all training points")
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1) tc = self.freqai_config.get('model_training_parameters', {}).get('thread_count', -1)
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
avg_mean_dist = pairwise.mean(axis=1).mean() avg_mean_dist = pairwise.mean(axis=1).mean()
logger.info("avg_mean_dist %s", avg_mean_dist) logger.info(f'avg_mean_dist {avg_mean_dist:.2f}')
return avg_mean_dist return avg_mean_dist
def use_SVM_to_remove_outliers(self, predict: bool) -> None: def use_SVM_to_remove_outliers(self, predict: bool) -> None:
"""
Build/inference a Support Vector Machine to detect outliers
in training data and prediction
:params:
predict: bool = If true, inference an existing SVM model, else construct one
"""
if predict: if predict:
assert self.svm_model, "No svm model available for outlier removal" assert self.svm_model, "No svm model available for outlier removal"
@ -581,6 +603,13 @@ class FreqaiDataKitchen:
return return
def find_features(self, dataframe: DataFrame) -> list: def find_features(self, dataframe: DataFrame) -> list:
"""
Find features in the strategy provided dataframe
:params:
dataframe: DataFrame = strategy provided dataframe
:returns:
features: list = the features to be used for training/prediction
"""
column_names = dataframe.columns column_names = dataframe.columns
features = [c for c in column_names if '%' in c] features = [c for c in column_names if '%' in c]
if not features: if not features:
@ -601,17 +630,19 @@ class FreqaiDataKitchen:
n_jobs=-1, n_jobs=-1,
) )
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
do_predict = np.where( do_predict = np.where(
distance.min(axis=0) / self.data["avg_mean_dist"] self.DI_values
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"), < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
1, 1,
0, 0,
) )
# logger.info( logger.info(
# "Distance checker tossed %s predictions for being too far from training data", f'DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for '
# len(do_predict) - do_predict.sum(), 'being too far from training data'
# ) )
self.do_predict += do_predict self.do_predict += do_predict
self.do_predict -= 1 self.do_predict -= 1
@ -639,6 +670,8 @@ class FreqaiDataKitchen:
self.full_predictions = np.append(self.full_predictions, predictions) self.full_predictions = np.append(self.full_predictions, predictions)
self.full_do_predict = np.append(self.full_do_predict, do_predict) self.full_do_predict = np.append(self.full_do_predict, do_predict)
if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
self.full_target_mean = np.append(self.full_target_mean, target_mean) self.full_target_mean = np.append(self.full_target_mean, target_mean)
self.full_target_std = np.append(self.full_target_std, target_std) self.full_target_std = np.append(self.full_target_std, target_std)
@ -653,6 +686,8 @@ class FreqaiDataKitchen:
filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count
self.full_predictions = np.append(filler, self.full_predictions) self.full_predictions = np.append(filler, self.full_predictions)
self.full_do_predict = np.append(filler, self.full_do_predict) self.full_do_predict = np.append(filler, self.full_do_predict)
if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
self.full_DI_values = np.append(filler, self.full_DI_values)
self.full_target_mean = np.append(filler, self.full_target_mean) self.full_target_mean = np.append(filler, self.full_target_mean)
self.full_target_std = np.append(filler, self.full_target_std) self.full_target_std = np.append(filler, self.full_target_std)
@ -697,7 +732,7 @@ class FreqaiDataKitchen:
# find the max indicator length required # find the max indicator length required
max_timeframe_chars = self.freqai_config.get('timeframes')[-1] max_timeframe_chars = self.freqai_config.get('timeframes')[-1]
max_period = self.freqai_config.get('feature_parameters', {}).get( max_period = self.freqai_config.get('feature_parameters', {}).get(
'indicator_max_period', 20) 'indicator_max_period', 50)
additional_seconds = 0 additional_seconds = 0
if max_timeframe_chars[-1] == 'd': if max_timeframe_chars[-1] == 'd':
additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2]) additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2])
@ -712,6 +747,8 @@ class FreqaiDataKitchen:
logger.warning('FreqAI could not detect max timeframe and therefore may not ' logger.warning('FreqAI could not detect max timeframe and therefore may not '
'download the proper amount of data for training') 'download the proper amount of data for training')
logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
if trained_timestamp != 0: if trained_timestamp != 0:
elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
retrain = elapsed_time > self.freqai_config.get('backtest_period') retrain = elapsed_time > self.freqai_config.get('backtest_period')
@ -737,6 +774,14 @@ class FreqaiDataKitchen:
data_load_timerange.stopts = int(time) data_load_timerange.stopts = int(time)
retrain = True retrain = True
# logger.info(
# f'Total data download needed '
# f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY:.2f}'
# ' days')
# logger.info(f'Total training timerange '
# f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} '
# ' days')
# if retrain: # if retrain:
# coin, _ = metadata['pair'].split("/") # coin, _ = metadata['pair'].split("/")
# # set the new data_path # # set the new data_path
@ -765,61 +810,194 @@ class FreqaiDataKitchen:
# enables persistence, but not fully implemented into save/load data yer # enables persistence, but not fully implemented into save/load data yer
# self.data['live_trained_timerange'] = str(int(trained_timerange.stopts)) # self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict, # SUPERCEDED
strategy: IStrategy) -> None: # def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict,
# strategy: IStrategy) -> None:
# exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
# self.config, validate=False, freqai=True)
# # exchange = strategy.dp._exchange # closes ccxt session
# pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
# if str(metadata['pair']) not in pairs:
# pairs.append(str(metadata['pair']))
# refresh_backtest_ohlcv_data(
# exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
# datadir=self.config['datadir'], timerange=timerange,
# new_pairs_days=self.config['new_pairs_days'],
# erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
# trading_mode=self.config.get('trading_mode', 'spot'),
# prepend=self.config.get('prepend_data', False)
# )
def download_all_data_for_training(self, timerange: TimeRange) -> None:
"""
Called only once upon start of bot to download the necessary data for
populating indicators and training the model.
:params:
timerange: TimeRange = The full data timerange for populating the indicators
and training the model.
"""
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
self.config, validate=False, freqai=True) self.config, validate=False, freqai=True)
# exchange = strategy.dp._exchange # closes ccxt session
pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', [])) new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
if str(metadata['pair']) not in pairs:
pairs.append(str(metadata['pair']))
refresh_backtest_ohlcv_data( refresh_backtest_ohlcv_data(
exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'), exchange, pairs=self.all_pairs,
timeframes=self.freqai_config.get('timeframes'),
datadir=self.config['datadir'], timerange=timerange, datadir=self.config['datadir'], timerange=timerange,
new_pairs_days=self.config['new_pairs_days'], new_pairs_days=new_pairs_days,
erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'), erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
trading_mode=self.config.get('trading_mode', 'spot'), trading_mode=self.config.get('trading_mode', 'spot'),
prepend=self.config.get('prepend_data', False) prepend=self.config.get('prepend_data', False)
) )
def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any], def update_historic_data(self, strategy: IStrategy) -> None:
DataFrame]: """
corr_dataframes: Dict[Any, Any] = {} Append new candles to our stores historic data (in memory) so that
base_dataframes: Dict[Any, Any] = {} we do not need to load candle history from disk and we dont need to
pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']] pinging exchange multiple times for the same candle.
# timerange = TimeRange.parse_timerange(new_timerange) :params:
dataframe: DataFrame = strategy provided dataframe
"""
for tf in self.freqai_config.get('timeframes'): with self.data_drawer.history_lock:
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'], history_data = self.data_drawer.historic_data
timeframe=tf,
pair=metadata['pair'], timerange=timerange, for pair in self.all_pairs:
data_format=self.config.get( for tf in self.freqai_config.get('timeframes'):
'dataformat_ohlcv', 'json'),
candle_type=self.config.get( # check if newest candle is already appended
'trading_mode', 'spot')) df_dp = strategy.dp.get_pair_dataframe(pair, tf)
if pairs: if (
for p in pairs: str(history_data[pair][tf].iloc[-1]['date']) ==
if metadata['pair'] in p: str(df_dp.iloc[-1:]['date'].iloc[-1])
continue # dont repeat anything from whitelist ):
if p not in corr_dataframes: continue
corr_dataframes[p] = {}
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'], index = df_dp.loc[
timeframe=tf, df_dp['date'] ==
pair=p, timerange=timerange, history_data[pair][tf].iloc[-1]['date']
data_format=self.config.get( ].index[0] + 1
'dataformat_ohlcv', 'json'), history_data[pair][tf] = pd.concat(
candle_type=self.config.get( [history_data[pair][tf],
'trading_mode', 'spot')) strategy.dp.get_pair_dataframe(pair, tf).iloc[index:]],
ignore_index=True, axis=0
)
logger.info(f'Length of history data {len(history_data[pair][tf])}')
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
for pair in self.config.get('exchange', '').get('pair_whitelist'):
if pair not in self.all_pairs:
self.all_pairs.append(pair)
def load_all_pair_histories(self, timerange: TimeRange) -> None:
"""
Load pair histories for all whitelist and corr_pairlist pairs.
Only called once upon startup of bot.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period
"""
history_data = self.data_drawer.historic_data
for pair in self.all_pairs:
if pair not in history_data:
history_data[pair] = {}
for tf in self.freqai_config.get('timeframes'):
history_data[pair][tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=pair, timerange=timerange,
data_format=self.config.get(
'dataformat_ohlcv', 'json'),
candle_type=self.config.get(
'trading_mode', 'spot'))
def get_base_and_corr_dataframes(self, timerange: TimeRange,
metadata: dict) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
"""
Searches through our historic_data in memory and returns the dataframes relevant
to the present pair.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period
metadata: dict = strategy furnished pair metadata
"""
with self.data_drawer.history_lock:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.data_drawer.historic_data
pairs = self.freqai_config.get('corr_pairlist', [])
for tf in self.freqai_config.get('timeframes'):
base_dataframes[tf] = self.slice_dataframe(
timerange,
historic_data[metadata['pair']][tf]
)
if pairs:
for p in pairs:
if metadata['pair'] in p:
continue # dont repeat anything from whitelist
if p not in corr_dataframes:
corr_dataframes[p] = {}
corr_dataframes[p][tf] = self.slice_dataframe(timerange,
historic_data[p][tf])
return corr_dataframes, base_dataframes return corr_dataframes, base_dataframes
# SUPERCEDED
# def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
# DataFrame]:
# corr_dataframes: Dict[Any, Any] = {}
# base_dataframes: Dict[Any, Any] = {}
# pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']]
# # timerange = TimeRange.parse_timerange(new_timerange)
# for tf in self.freqai_config.get('timeframes'):
# base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
# timeframe=tf,
# pair=metadata['pair'], timerange=timerange,
# data_format=self.config.get(
# 'dataformat_ohlcv', 'json'),
# candle_type=self.config.get(
# 'trading_mode', 'spot'))
# if pairs:
# for p in pairs:
# if metadata['pair'] in p:
# continue # dont repeat anything from whitelist
# if p not in corr_dataframes:
# corr_dataframes[p] = {}
# corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
# timeframe=tf,
# pair=p, timerange=timerange,
# data_format=self.config.get(
# 'dataformat_ohlcv', 'json'),
# candle_type=self.config.get(
# 'trading_mode', 'spot'))
# return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(self, strategy: IStrategy, def use_strategy_to_populate_indicators(self, strategy: IStrategy,
corr_dataframes: dict, corr_dataframes: dict,
base_dataframes: dict, base_dataframes: dict,
metadata: dict) -> DataFrame: metadata: dict) -> DataFrame:
"""
Use the user defined strategy for populating indicators during
retrain
:params:
strategy: IStrategy = user defined strategy object
corr_dataframes: dict = dict containing the informative pair dataframes
(for user defined timeframes)
base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
metadata: dict = strategy furnished pair metadata
:returns:
dataframe: DataFrame = dataframe containing populated indicators
"""
dataframe = base_dataframes[self.config['timeframe']].copy() dataframe = base_dataframes[self.config['timeframe']].copy()
pairs = self.freqai_config.get("corr_pairlist", []) pairs = self.freqai_config.get("corr_pairlist", [])
@ -848,6 +1026,9 @@ class FreqaiDataKitchen:
return dataframe return dataframe
def fit_labels(self) -> None: def fit_labels(self) -> None:
"""
Fit the labels with a gaussian distribution
"""
import scipy as spy import scipy as spy
f = spy.stats.norm.fit(self.data_dictionary["train_labels"]) f = spy.stats.norm.fit(self.data_dictionary["train_labels"])

View File

@ -44,9 +44,9 @@ class IFreqaiModel(ABC):
self.config = config self.config = config
self.assert_config(self.config) self.assert_config(self.config)
self.freqai_info = config["freqai"] self.freqai_info = config["freqai"]
self.data_split_parameters = config["freqai"]["data_split_parameters"] self.data_split_parameters = config.get('freqai', {}).get("data_split_parameters")
self.model_training_parameters = config["freqai"]["model_training_parameters"] self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters")
self.feature_parameters = config["freqai"]["feature_parameters"] self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
self.time_last_trained = None self.time_last_trained = None
self.current_time = None self.current_time = None
self.model = None self.model = None
@ -54,6 +54,7 @@ class IFreqaiModel(ABC):
self.training_on_separate_thread = False self.training_on_separate_thread = False
self.retrain = False self.retrain = False
self.first = True self.first = True
self.update_historic_data = 0
self.set_full_path() self.set_full_path()
self.follow_mode = self.freqai_info.get('follow_mode', False) self.follow_mode = self.freqai_info.get('follow_mode', False)
self.data_drawer = FreqaiDataDrawer(Path(self.full_path), self.data_drawer = FreqaiDataDrawer(Path(self.full_path),
@ -95,15 +96,12 @@ class IFreqaiModel(ABC):
self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.dh = FreqaiDataKitchen(self.config, self.data_drawer,
self.live, metadata["pair"]) self.live, metadata["pair"])
dh = self.start_live(dataframe, metadata, strategy, self.dh) dh = self.start_live(dataframe, metadata, strategy, self.dh, trainable=True)
else: else:
# we will have at max 2 separate instances of the kitchen at once. # we will have at max 2 separate instances of the kitchen at once.
self.dh_fg = FreqaiDataKitchen(self.config, self.data_drawer, self.dh_fg = FreqaiDataKitchen(self.config, self.data_drawer,
self.live, metadata["pair"]) self.live, metadata["pair"])
dh = self.start_live(dataframe, metadata, strategy, self.dh_fg) dh = self.start_live(dataframe, metadata, strategy, self.dh_fg, trainable=False)
# return (dh.full_predictions, dh.full_do_predict,
# dh.full_target_mean, dh.full_target_std)
# For backtesting, each pair enters and then gets trained for each window along the # For backtesting, each pair enters and then gets trained for each window along the
# sliding window defined by "train_period" (training window) and "backtest_period" # sliding window defined by "train_period" (training window) and "backtest_period"
@ -115,8 +113,9 @@ class IFreqaiModel(ABC):
logger.info(f'Training {len(self.dh.training_timeranges)} timeranges') logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
dh = self.start_backtesting(dataframe, metadata, self.dh) dh = self.start_backtesting(dataframe, metadata, self.dh)
return (dh.full_predictions, dh.full_do_predict, return self.return_values(dataframe, dh)
dh.full_target_mean, dh.full_target_std) # return (dh.full_predictions, dh.full_do_predict,
# dh.full_target_mean, dh.full_target_std)
def start_backtesting(self, dataframe: DataFrame, metadata: dict, def start_backtesting(self, dataframe: DataFrame, metadata: dict,
dh: FreqaiDataKitchen) -> FreqaiDataKitchen: dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
@ -185,7 +184,8 @@ class IFreqaiModel(ABC):
return dh return dh
def start_live(self, dataframe: DataFrame, metadata: dict, def start_live(self, dataframe: DataFrame, metadata: dict,
strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen: strategy: IStrategy, dh: FreqaiDataKitchen,
trainable: bool) -> FreqaiDataKitchen:
""" """
The main broad execution for dry/live. This function will check if a retraining should be The main broad execution for dry/live. This function will check if a retraining should be
performed, and if so, retrain and reset the model. performed, and if so, retrain and reset the model.
@ -198,25 +198,31 @@ class IFreqaiModel(ABC):
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
""" """
# update follower
if self.follow_mode: if self.follow_mode:
# follower needs to load from disk to get any changes made by leader to pair_dict self.data_drawer.update_follower_metadata()
self.data_drawer.load_drawer_from_disk()
if self.freqai_info.get('purge_old_models', False):
self.data_drawer.purge_old_models()
# get the model metadata associated with the current pair
(model_filename, (model_filename,
trained_timestamp, trained_timestamp,
coin_first, coin_first,
return_null_array) = self.data_drawer.get_pair_dict_info(metadata) return_null_array) = self.data_drawer.get_pair_dict_info(metadata)
# if the files do not yet exist, the follower returns null arrays to strategy # if the metadata doesnt exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array: if self.follow_mode and return_null_array:
logger.info('Returning null array from follower to strategy') logger.info('Returning null array from follower to strategy')
self.data_drawer.return_null_values_to_strategy(dataframe, dh) self.data_drawer.return_null_values_to_strategy(dataframe, dh)
return dh return dh
if (not self.training_on_separate_thread and not self.follow_mode # append the historic data once per round
and self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1) or coin_first: if self.data_drawer.historic_data:
dh.update_historic_data(strategy)
logger.info(f'Updating historic data on pair {metadata["pair"]}')
# if trainable, check if model needs training, if so compute new timerange,
# then save model and metadata.
# if not trainable, load existing data
if (trainable or coin_first) and not self.follow_mode:
file_exists = False file_exists = False
if trained_timestamp != 0: # historical model available if trained_timestamp != 0: # historical model available
@ -231,6 +237,15 @@ class IFreqaiModel(ABC):
data_load_timerange) = dh.check_if_new_training_required(trained_timestamp) data_load_timerange) = dh.check_if_new_training_required(trained_timestamp)
dh.set_paths(metadata, new_trained_timerange.stopts) dh.set_paths(metadata, new_trained_timerange.stopts)
# download candle history if it is not already in memory
if not self.data_drawer.historic_data:
logger.info('Downloading all training data for all pairs in whitelist and '
'corr_pairlist, this may take a while if you do not have the '
'data saved')
dh.download_all_data_for_training(data_load_timerange)
dh.load_all_pair_histories(data_load_timerange)
# train the model on the trained timerange
if self.retrain or not file_exists: if self.retrain or not file_exists:
if coin_first: if coin_first:
self.train_model_in_series(new_trained_timerange, metadata, self.train_model_in_series(new_trained_timerange, metadata,
@ -241,17 +256,24 @@ class IFreqaiModel(ABC):
metadata, strategy, metadata, strategy,
dh, data_load_timerange) dh, data_load_timerange)
elif self.training_on_separate_thread and not self.follow_mode: elif not trainable and not self.follow_mode:
logger.info("FreqAI training a new model on background thread.") logger.info(f'{metadata["pair"]} holds spot '
f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} '
'in training queue')
elif self.follow_mode: elif self.follow_mode:
dh.set_paths(metadata, trained_timestamp) dh.set_paths(metadata, trained_timestamp)
logger.info('FreqAI instance set to follow_mode, finding existing pair' logger.info('FreqAI instance set to follow_mode, finding existing pair'
f'using { self.identifier }') f'using { self.identifier }')
# load the model and associated data into the data kitchen
self.model = dh.load_data(coin=metadata['pair']) self.model = dh.load_data(coin=metadata['pair'])
# ensure user is feeding the correct indicators to the model
self.check_if_feature_list_matches_strategy(dataframe, dh) self.check_if_feature_list_matches_strategy(dataframe, dh)
# hold the historical predictions in memory so we are sending back
# correct array to strategy FIXME currently broken, but only affecting
# Frequi reporting. Signals remain unaffeted.
if metadata['pair'] not in self.data_drawer.model_return_values: if metadata['pair'] not in self.data_drawer.model_return_values:
preds, do_preds = self.predict(dataframe, dh) preds, do_preds = self.predict(dataframe, dh)
dh.append_predictions(preds, do_preds, len(dataframe)) dh.append_predictions(preds, do_preds, len(dataframe))
@ -268,6 +290,13 @@ class IFreqaiModel(ABC):
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
dh: FreqaiDataKitchen) -> None: dh: FreqaiDataKitchen) -> None:
"""
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
to a folder holding existing models.
:params:
dataframe: DataFrame = strategy provided dataframe
dh: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop
"""
strategy_provided_features = dh.find_features(dataframe) strategy_provided_features = dh.find_features(dataframe)
if 'training_features_list_raw' in dh.data: if 'training_features_list_raw' in dh.data:
feature_list = dh.data['training_features_list_raw'] feature_list = dh.data['training_features_list_raw']
@ -356,11 +385,25 @@ class IFreqaiModel(ABC):
def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict, def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,
strategy: IStrategy, dh: FreqaiDataKitchen, strategy: IStrategy, dh: FreqaiDataKitchen,
data_load_timerange: TimeRange): data_load_timerange: TimeRange):
"""
Retreive data and train model on separate thread. Always called if the model folder already
contains a full set of trained models.
:params:
new_trained_timerange: TimeRange = the timerange to train the model on
metadata: dict = strategy provided metadata
strategy: IStrategy = user defined strategy object
dh: FreqaiDataKitchen = non-persistent data container for current coin/loop
data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators
(larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
"""
# with nostdout(): # with nostdout():
dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
metadata) # metadata)
corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
metadata)
# protecting from common benign errors associated with grabbing new data from exchange: # protecting from common benign errors associated with grabbing new data from exchange:
try: try:
@ -370,9 +413,8 @@ class IFreqaiModel(ABC):
metadata) metadata)
unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe) unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
except Exception: except Exception as err:
logger.warning('Mismatched sizes encountered in strategy') logger.exception(err)
# self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
self.training_on_separate_thread = False self.training_on_separate_thread = False
self.retrain = False self.retrain = False
return return
@ -381,7 +423,6 @@ class IFreqaiModel(ABC):
model = self.train(unfiltered_dataframe, metadata, dh) model = self.train(unfiltered_dataframe, metadata, dh)
except ValueError: except ValueError:
logger.warning('Value error encountered during training') logger.warning('Value error encountered during training')
# self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
self.training_on_separate_thread = False self.training_on_separate_thread = False
self.retrain = False self.retrain = False
return return
@ -408,10 +449,22 @@ class IFreqaiModel(ABC):
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict, def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
strategy: IStrategy, dh: FreqaiDataKitchen, strategy: IStrategy, dh: FreqaiDataKitchen,
data_load_timerange: TimeRange): data_load_timerange: TimeRange):
"""
dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) Retreive data and train model in single threaded mode (only used if model directory is empty
corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, upon startup for dry/live )
metadata) :params:
new_trained_timerange: TimeRange = the timerange to train the model on
metadata: dict = strategy provided metadata
strategy: IStrategy = user defined strategy object
dh: FreqaiDataKitchen = non-persistent data container for current coin/loop
data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators
(larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
"""
# dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy)
# corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange,
# metadata)
corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange,
metadata)
unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy, unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy,
corr_dataframes, corr_dataframes,
@ -481,3 +534,17 @@ class IFreqaiModel(ABC):
""" """
return return
@abstractmethod
def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
"""
User defines the dataframe to be returned to strategy here.
:params:
dataframe: DataFrame = the full dataframe for the current prediction (live)
or --timerange (backtesting)
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
:returns:
dataframe: DataFrame = dataframe filled with user defined data
"""
return

View File

@ -18,6 +18,17 @@ class CatboostPredictionModel(IFreqaiModel):
has its own DataHandler where data is held, saved, loaded, and managed. has its own DataHandler where data is held, saved, loaded, and managed.
""" """
def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
dataframe["prediction"] = dh.full_predictions
dataframe["do_predict"] = dh.full_do_predict
dataframe["target_mean"] = dh.full_target_mean
dataframe["target_std"] = dh.full_target_std
if self.freqai_info.get('feature_parameters', {}).get('DI-threshold', 0) > 0:
dataframe["DI"] = dh.full_DI_values
return dataframe
def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame:
""" """
User defines the labels here (target values). User defines the labels here (target values).
@ -48,7 +59,7 @@ class CatboostPredictionModel(IFreqaiModel):
:model: Trained model which can be used to inference (self.predict) :model: Trained model which can be used to inference (self.predict)
""" """
logger.info('--------------------Starting training' logger.info('--------------------Starting training '
f'{metadata["pair"]} --------------------') f'{metadata["pair"]} --------------------')
# create the full feature list based on user config info # create the full feature list based on user config info

View File

@ -28,7 +28,7 @@ class FreqaiExampleStrategy(IStrategy):
canonical freqtrade configuration file under config['freqai']. canonical freqtrade configuration file under config['freqai'].
""" """
minimal_roi = {"0": 0.01, "240": -1} minimal_roi = {"0": 0.1, "240": -1}
plot_config = { plot_config = {
"main_plot": {}, "main_plot": {},
@ -47,7 +47,7 @@ class FreqaiExampleStrategy(IStrategy):
stoploss = -0.05 stoploss = -0.05
use_exit_signal = True use_exit_signal = True
startup_candle_count: int = 300 startup_candle_count: int = 300
can_short = False can_short = True
linear_roi_offset = DecimalParameter( linear_roi_offset = DecimalParameter(
0.00, 0.02, default=0.005, space="sell", optimize=False, load=True 0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
@ -191,15 +191,10 @@ class FreqaiExampleStrategy(IStrategy):
# the model will return 4 values, its prediction, an indication of whether or not the # the model will return 4 values, its prediction, an indication of whether or not the
# prediction should be accepted, the target mean/std values from the labels used during # prediction should be accepted, the target mean/std values from the labels used during
# each training period. # each training period.
( dataframe = self.model.bridge.start(dataframe, metadata, self)
dataframe["prediction"],
dataframe["do_predict"],
dataframe["target_mean"],
dataframe["target_std"],
) = self.model.bridge.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.25
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.25
return dataframe return dataframe
def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
@ -256,13 +251,13 @@ class FreqaiExampleStrategy(IStrategy):
entry_tag = trade.enter_tag entry_tag = trade.enter_tag
if ('prediction' + entry_tag not in pair_dict[pair] or if ('prediction' + entry_tag not in pair_dict[pair] or
pair_dict[pair]['prediction' + entry_tag] == 0): pair_dict[pair]['prediction' + entry_tag] > 0):
with self.model.bridge.lock: with self.model.bridge.lock:
pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction']) pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction'])
if not follow_mode: if not follow_mode:
self.model.bridge.data_drawer.save_drawer_to_disk() self.model.bridge.data_drawer.save_drawer_to_disk()
else: else:
self.model.bridge.data_drawer.save_follower_dict_to_dist() self.model.bridge.data_drawer.save_follower_dict_to_disk()
roi_price = pair_dict[pair]['prediction' + entry_tag] roi_price = pair_dict[pair]['prediction' + entry_tag]
roi_time = self.max_roi_time_long.value roi_time = self.max_roi_time_long.value
@ -305,7 +300,7 @@ class FreqaiExampleStrategy(IStrategy):
if not follow_mode: if not follow_mode:
self.model.bridge.data_drawer.save_drawer_to_disk() self.model.bridge.data_drawer.save_drawer_to_disk()
else: else:
self.model.bridge.data_drawer.save_follower_dict_to_dist() self.model.bridge.data_drawer.save_follower_dict_to_disk()
return True return True