black formatting on freqai files

This commit is contained in:
robcaulk 2022-07-03 10:59:38 +02:00
parent 106131ff0f
commit ffb39a5029
7 changed files with 508 additions and 427 deletions

View File

@ -1,4 +1,3 @@
import collections import collections
import json import json
import logging import logging
@ -27,10 +26,11 @@ class FreqaiDataDrawer:
This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is
reinstantiated for each coin. reinstantiated for each coin.
""" """
def __init__(self, full_path: Path, config: dict, follow_mode: bool = False): def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
self.config = config self.config = config
self.freqai_info = config.get('freqai', {}) self.freqai_info = config.get("freqai", {})
# dictionary holding all pair metadata necessary to load in from disk # dictionary holding all pair metadata necessary to load in from disk
self.pair_dict: Dict[str, Any] = {} self.pair_dict: Dict[str, Any] = {}
# dictionary holding all actively inferenced models in memory given a model filename # dictionary holding all actively inferenced models in memory given a model filename
@ -38,7 +38,6 @@ class FreqaiDataDrawer:
self.model_return_values: Dict[str, Any] = {} self.model_return_values: Dict[str, Any] = {}
self.pair_data_dict: Dict[str, Any] = {} self.pair_data_dict: Dict[str, Any] = {}
self.historic_data: Dict[str, Any] = {} self.historic_data: Dict[str, Any] = {}
# self.populated_historic_data: Dict[str, Any] = {} ?
self.follower_dict: Dict[str, Any] = {} self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path self.full_path = full_path
self.follow_mode = follow_mode self.follow_mode = follow_mode
@ -47,7 +46,6 @@ class FreqaiDataDrawer:
self.load_drawer_from_disk() self.load_drawer_from_disk()
self.training_queue: Dict[str, int] = {} self.training_queue: Dict[str, int] = {}
self.history_lock = threading.Lock() self.history_lock = threading.Lock()
# self.create_training_queue(pair_whitelist)
def load_drawer_from_disk(self): def load_drawer_from_disk(self):
""" """
@ -56,15 +54,17 @@ class FreqaiDataDrawer:
:returns: :returns:
exists: bool = whether or not the drawer was located exists: bool = whether or not the drawer was located
""" """
exists = Path(self.full_path / str('pair_dictionary.json')).resolve().exists() exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists()
if exists: if exists:
with open(self.full_path / str('pair_dictionary.json'), "r") as fp: with open(self.full_path / str("pair_dictionary.json"), "r") as fp:
self.pair_dict = json.load(fp) self.pair_dict = json.load(fp)
elif not self.follow_mode: elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch") logger.info("Could not find existing datadrawer, starting from scratch")
else: else:
logger.warning(f'Follower could not find pair_dictionary at {self.full_path} ' logger.warning(
'sending null values back to strategy') f"Follower could not find pair_dictionary at {self.full_path} "
"sending null values back to strategy"
)
return exists return exists
@ -72,36 +72,41 @@ class FreqaiDataDrawer:
""" """
Save data drawer full of all pair model metadata in present model folder. Save data drawer full of all pair model metadata in present model folder.
""" """
with open(self.full_path / str('pair_dictionary.json'), "w") as fp: with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
json.dump(self.pair_dict, fp, default=self.np_encoder) json.dump(self.pair_dict, fp, default=self.np_encoder)
def save_follower_dict_to_disk(self): def save_follower_dict_to_disk(self):
""" """
Save follower dictionary to disk (used by strategy for persistent prediction targets) Save follower dictionary to disk (used by strategy for persistent prediction targets)
""" """
follower_name = self.config.get('bot_name', 'follower1') follower_name = self.config.get("bot_name", "follower1")
with open(self.full_path / str('follower_dictionary-' + with open(
follower_name + '.json'), "w") as fp: self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
) as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder) json.dump(self.follower_dict, fp, default=self.np_encoder)
def create_follower_dict(self): def create_follower_dict(self):
""" """
Create or dictionary for each follower to maintain unique persistent prediction targets Create or dictionary for each follower to maintain unique persistent prediction targets
""" """
follower_name = self.config.get('bot_name', 'follower1') follower_name = self.config.get("bot_name", "follower1")
whitelist_pairs = self.config.get('exchange', {}).get('pair_whitelist') whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
exists = Path(self.full_path / str('follower_dictionary-' + exists = (
follower_name + '.json')).resolve().exists() Path(self.full_path / str("follower_dictionary-" + follower_name + ".json"))
.resolve()
.exists()
)
if exists: if exists:
logger.info('Found an existing follower dictionary') logger.info("Found an existing follower dictionary")
for pair in whitelist_pairs: for pair in whitelist_pairs:
self.follower_dict[pair] = {} self.follower_dict[pair] = {}
with open(self.full_path / str('follower_dictionary-' + with open(
follower_name + '.json'), "w") as fp: self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
) as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder) json.dump(self.follower_dict, fp, default=self.np_encoder)
def np_encoder(self, object): def np_encoder(self, object):
@ -122,46 +127,48 @@ class FreqaiDataDrawer:
return_null_array: bool = Follower could not find pair metadata return_null_array: bool = Follower could not find pair metadata
""" """
pair_in_dict = self.pair_dict.get(pair) pair_in_dict = self.pair_dict.get(pair)
data_path_set = self.pair_dict.get(pair, {}).get('data_path', None) data_path_set = self.pair_dict.get(pair, {}).get("data_path", None)
return_null_array = False return_null_array = False
if pair_in_dict: if pair_in_dict:
model_filename = self.pair_dict[pair]['model_filename'] model_filename = self.pair_dict[pair]["model_filename"]
trained_timestamp = self.pair_dict[pair]['trained_timestamp'] trained_timestamp = self.pair_dict[pair]["trained_timestamp"]
coin_first = self.pair_dict[pair]['first'] coin_first = self.pair_dict[pair]["first"]
elif not self.follow_mode: elif not self.follow_mode:
self.pair_dict[pair] = {} self.pair_dict[pair] = {}
model_filename = self.pair_dict[pair]['model_filename'] = '' model_filename = self.pair_dict[pair]["model_filename"] = ""
coin_first = self.pair_dict[pair]['first'] = True coin_first = self.pair_dict[pair]["first"] = True
trained_timestamp = self.pair_dict[pair]['trained_timestamp'] = 0 trained_timestamp = self.pair_dict[pair]["trained_timestamp"] = 0
self.pair_dict[pair]['priority'] = len(self.pair_dict) self.pair_dict[pair]["priority"] = len(self.pair_dict)
if not data_path_set and self.follow_mode: if not data_path_set and self.follow_mode:
logger.warning(f'Follower could not find current pair {pair} in ' logger.warning(
f'pair_dictionary at path {self.full_path}, sending null values ' f"Follower could not find current pair {pair} in "
'back to strategy.') f"pair_dictionary at path {self.full_path}, sending null values "
"back to strategy."
)
return_null_array = True return_null_array = True
return model_filename, trained_timestamp, coin_first, return_null_array return model_filename, trained_timestamp, coin_first, return_null_array
def set_pair_dict_info(self, metadata: dict) -> None: def set_pair_dict_info(self, metadata: dict) -> None:
pair_in_dict = self.pair_dict.get(metadata['pair']) pair_in_dict = self.pair_dict.get(metadata["pair"])
if pair_in_dict: if pair_in_dict:
return return
else: else:
self.pair_dict[metadata['pair']] = {} self.pair_dict[metadata["pair"]] = {}
self.pair_dict[metadata['pair']]['model_filename'] = '' self.pair_dict[metadata["pair"]]["model_filename"] = ""
self.pair_dict[metadata['pair']]['first'] = True self.pair_dict[metadata["pair"]]["first"] = True
self.pair_dict[metadata['pair']]['trained_timestamp'] = 0 self.pair_dict[metadata["pair"]]["trained_timestamp"] = 0
self.pair_dict[metadata['pair']]['priority'] = len(self.pair_dict) self.pair_dict[metadata["pair"]]["priority"] = len(self.pair_dict)
return return
def pair_to_end_of_training_queue(self, pair: str) -> None: def pair_to_end_of_training_queue(self, pair: str) -> None:
# march all pairs up in the queue # march all pairs up in the queue
for p in self.pair_dict: for p in self.pair_dict:
self.pair_dict[p]['priority'] -= 1 self.pair_dict[p]["priority"] -= 1
# send pair to end of queue # send pair to end of queue
self.pair_dict[pair]['priority'] = len(self.pair_dict) self.pair_dict[pair]["priority"] = len(self.pair_dict)
def set_initial_return_values(self, pair: str, dk, pred_df, do_preds) -> None: def set_initial_return_values(self, pair: str, dk, pred_df, do_preds) -> None:
""" """
@ -172,16 +179,15 @@ class FreqaiDataDrawer:
self.model_return_values[pair] = pd.DataFrame() self.model_return_values[pair] = pd.DataFrame()
for label in dk.label_list: for label in dk.label_list:
self.model_return_values[pair][label] = pred_df[label] self.model_return_values[pair][label] = pred_df[label]
self.model_return_values[pair][f'{label}_mean'] = dk.data['labels_mean'][label] self.model_return_values[pair][f"{label}_mean"] = dk.data["labels_mean"][label]
self.model_return_values[pair][f'{label}_std'] = dk.data['labels_std'][label] self.model_return_values[pair][f"{label}_std"] = dk.data["labels_std"][label]
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
self.model_return_values[pair]['DI_values'] = dk.DI_values self.model_return_values[pair]["DI_values"] = dk.DI_values
self.model_return_values[pair]['do_predict'] = do_preds self.model_return_values[pair]["do_predict"] = do_preds
def append_model_predictions(self, pair: str, predictions, do_preds, def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:
dk, len_df) -> None:
# strat seems to feed us variable sized dataframes - and since we are trying to build our # strat seems to feed us variable sized dataframes - and since we are trying to build our
# own return array in the same shape, we need to figure out how the size has changed # own return array in the same shape, we need to figure out how the size has changed
@ -198,17 +204,18 @@ class FreqaiDataDrawer:
for label in dk.label_list: for label in dk.label_list:
df[label].iloc[-1] = predictions[label].iloc[-1] df[label].iloc[-1] = predictions[label].iloc[-1]
df[f"{label}_mean"].iloc[-1] = dk.data['labels_mean'][label] df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label]
df[f"{label}_std"].iloc[-1] = dk.data['labels_std'][label] df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label]
# df['prediction'].iloc[-1] = predictions[-1] # df['prediction'].iloc[-1] = predictions[-1]
df['do_predict'].iloc[-1] = do_preds[-1] df["do_predict"].iloc[-1] = do_preds[-1]
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
df['DI_values'].iloc[-1] = dk.DI_values[-1] df["DI_values"].iloc[-1] = dk.DI_values[-1]
if length_difference < 0: if length_difference < 0:
prepend_df = pd.DataFrame(np.zeros((abs(length_difference) - 1, len(df.columns))), prepend_df = pd.DataFrame(
columns=df.columns) np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
)
df = pd.concat([prepend_df, df], axis=0) df = pd.concat([prepend_df, df], axis=0)
def attach_return_values_to_return_dataframe(self, pair: str, dataframe) -> DataFrame: def attach_return_values_to_return_dataframe(self, pair: str, dataframe) -> DataFrame:
@ -220,7 +227,7 @@ class FreqaiDataDrawer:
dataframe: DataFrame = strat dataframe with return values attached dataframe: DataFrame = strat dataframe with return values attached
""" """
df = self.model_return_values[pair] df = self.model_return_values[pair]
to_keep = [col for col in dataframe.columns if not col.startswith('&')] to_keep = [col for col in dataframe.columns if not col.startswith("&")]
dataframe = pd.concat([dataframe[to_keep], df], axis=1) dataframe = pd.concat([dataframe[to_keep], df], axis=1)
return dataframe return dataframe
@ -237,10 +244,10 @@ class FreqaiDataDrawer:
dataframe[f"{label}_std"] = 0 dataframe[f"{label}_std"] = 0
# dataframe['prediction'] = 0 # dataframe['prediction'] = 0
dataframe['do_predict'] = 0 dataframe["do_predict"] = 0
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
dataframe['DI_value'] = 0 dataframe["DI_value"] = 0
dk.return_dataframe = dataframe dk.return_dataframe = dataframe
@ -261,29 +268,30 @@ class FreqaiDataDrawer:
if coin not in delete_dict: if coin not in delete_dict:
delete_dict[coin] = {} delete_dict[coin] = {}
delete_dict[coin]['num_folders'] = 1 delete_dict[coin]["num_folders"] = 1
delete_dict[coin]['timestamps'] = {int(timestamp): dir} delete_dict[coin]["timestamps"] = {int(timestamp): dir}
else: else:
delete_dict[coin]['num_folders'] += 1 delete_dict[coin]["num_folders"] += 1
delete_dict[coin]['timestamps'][int(timestamp)] = dir delete_dict[coin]["timestamps"][int(timestamp)] = dir
for coin in delete_dict: for coin in delete_dict:
if delete_dict[coin]['num_folders'] > 2: if delete_dict[coin]["num_folders"] > 2:
sorted_dict = collections.OrderedDict( sorted_dict = collections.OrderedDict(
sorted(delete_dict[coin]['timestamps'].items())) sorted(delete_dict[coin]["timestamps"].items())
)
num_delete = len(sorted_dict) - 2 num_delete = len(sorted_dict) - 2
deleted = 0 deleted = 0
for k, v in sorted_dict.items(): for k, v in sorted_dict.items():
if deleted >= num_delete: if deleted >= num_delete:
break break
logger.info(f'Freqai purging old model file {v}') logger.info(f"Freqai purging old model file {v}")
shutil.rmtree(v) shutil.rmtree(v)
deleted += 1 deleted += 1
def update_follower_metadata(self): def update_follower_metadata(self):
# follower needs to load from disk to get any changes made by leader to pair_dict # follower needs to load from disk to get any changes made by leader to pair_dict
self.load_drawer_from_disk() self.load_drawer_from_disk()
if self.config.get('freqai', {}).get('purge_old_models', False): if self.config.get("freqai", {}).get("purge_old_models", False):
self.purge_old_models() self.purge_old_models()
# to be used if we want to send predictions directly to the follower instead of forcing # to be used if we want to send predictions directly to the follower instead of forcing

View File

@ -37,8 +37,13 @@ class FreqaiDataKitchen:
author: Robert Caulk, rob.caulk@gmail.com author: Robert Caulk, rob.caulk@gmail.com
""" """
def __init__(self, config: Dict[str, Any], data_drawer: FreqaiDataDrawer, live: bool = False, def __init__(
pair: str = ''): self,
config: Dict[str, Any],
data_drawer: FreqaiDataDrawer,
live: bool = False,
pair: str = "",
):
self.data: Dict[Any, Any] = {} self.data: Dict[Any, Any] = {}
self.data_dictionary: Dict[Any, Any] = {} self.data_dictionary: Dict[Any, Any] = {}
self.config = config self.config = config
@ -60,8 +65,8 @@ class FreqaiDataKitchen:
self.svm_model: linear_model.SGDOneClassSVM = None self.svm_model: linear_model.SGDOneClassSVM = None
self.set_all_pairs() self.set_all_pairs()
if not self.live: if not self.live:
self.full_timerange = self.create_fulltimerange(self.config["timerange"], self.full_timerange = self.create_fulltimerange(
self.freqai_config.get("train_period") self.config["timerange"], self.freqai_config.get("train_period")
) )
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
@ -72,24 +77,28 @@ class FreqaiDataKitchen:
# self.strat_dataframe: DataFrame = strat_dataframe # self.strat_dataframe: DataFrame = strat_dataframe
self.dd = data_drawer self.dd = data_drawer
def set_paths(self, pair: str, trained_timestamp: int = None,) -> None: def set_paths(
self,
pair: str,
trained_timestamp: int = None,
) -> None:
""" """
Set the paths to the data for the present coin/botloop Set the paths to the data for the present coin/botloop
:params: :params:
metadata: dict = strategy furnished pair metadata metadata: dict = strategy furnished pair metadata
trained_timestamp: int = timestamp of most recent training trained_timestamp: int = timestamp of most recent training
""" """
self.full_path = Path(self.config['user_data_dir'] / self.full_path = Path(
"models" / self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
str(self.freqai_config.get('identifier'))) )
self.data_path = Path(self.full_path / str("sub-train" + "-" + self.data_path = Path(
pair.split("/")[0] + self.full_path / str("sub-train" + "-" + pair.split("/")[0] + str(trained_timestamp))
str(trained_timestamp))) )
return return
def save_data(self, model: Any, coin: str = '', keras_model=False, label=None) -> None: def save_data(self, model: Any, coin: str = "", keras_model=False, label=None) -> None:
""" """
Saves all data associated with a model for a single sub-train time range Saves all data associated with a model for a single sub-train time range
:params: :params:
@ -114,7 +123,7 @@ class FreqaiDataKitchen:
self.data["data_path"] = str(self.data_path) self.data["data_path"] = str(self.data_path)
self.data["model_filename"] = str(self.model_filename) self.data["model_filename"] = str(self.model_filename)
self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns) self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns)
self.data['label_list'] = self.label_list self.data["label_list"] = self.label_list
# store the metadata # store the metadata
with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp: with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp:
json.dump(self.data, fp, default=self.np_encoder) json.dump(self.data, fp, default=self.np_encoder)
@ -124,14 +133,15 @@ class FreqaiDataKitchen:
save_path / str(self.model_filename + "_trained_df.pkl") save_path / str(self.model_filename + "_trained_df.pkl")
) )
if self.freqai_config.get('feature_parameters', {}).get('principal_component_analysis'): if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
pk.dump(self.pca, open(self.data_path / pk.dump(
str(self.model_filename + "_pca_object.pkl"), "wb")) self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
)
# if self.live: # if self.live:
self.dd.model_dictionary[self.model_filename] = model self.dd.model_dictionary[self.model_filename] = model
self.dd.pair_dict[coin]['model_filename'] = self.model_filename self.dd.pair_dict[coin]["model_filename"] = self.model_filename
self.dd.pair_dict[coin]['data_path'] = str(self.data_path) self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
self.dd.save_drawer_to_disk() self.dd.save_drawer_to_disk()
# TODO add a helper function to let user save/load any data they are custom adding. We # TODO add a helper function to let user save/load any data they are custom adding. We
@ -149,29 +159,32 @@ class FreqaiDataKitchen:
return return
def load_data(self, coin: str = '', keras_model=False) -> Any: def load_data(self, coin: str = "", keras_model=False) -> Any:
""" """
loads all data required to make a prediction on a sub-train time range loads all data required to make a prediction on a sub-train time range
:returns: :returns:
:model: User trained model which can be inferenced for new predictions :model: User trained model which can be inferenced for new predictions
""" """
if not self.dd.pair_dict[coin]['model_filename']: if not self.dd.pair_dict[coin]["model_filename"]:
return None return None
if self.live: if self.live:
self.model_filename = self.dd.pair_dict[coin]['model_filename'] self.model_filename = self.dd.pair_dict[coin]["model_filename"]
self.data_path = Path(self.dd.pair_dict[coin]['data_path']) self.data_path = Path(self.dd.pair_dict[coin]["data_path"])
if self.freqai_config.get('follow_mode', False): if self.freqai_config.get("follow_mode", False):
# follower can be on a different system which is rsynced to the leader: # follower can be on a different system which is rsynced to the leader:
self.data_path = Path(self.config["user_data_dir"] / self.data_path = Path(
"models" / self.data_path.parts[-2] / self.config["user_data_dir"]
self.data_path.parts[-1]) / "models"
/ self.data_path.parts[-2]
/ self.data_path.parts[-1]
)
with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp: with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp:
self.data = json.load(fp) self.data = json.load(fp)
self.training_features_list = self.data["training_features_list"] self.training_features_list = self.data["training_features_list"]
self.label_list = self.data['label_list'] self.label_list = self.data["label_list"]
self.data_dictionary["train_features"] = pd.read_pickle( self.data_dictionary["train_features"] = pd.read_pickle(
self.data_path / str(self.model_filename + "_trained_df.pkl") self.data_path / str(self.model_filename + "_trained_df.pkl")
@ -200,16 +213,15 @@ class FreqaiDataKitchen:
model = load(self.data_path / str(self.model_filename + "_model.joblib")) model = load(self.data_path / str(self.model_filename + "_model.joblib"))
else: else:
from tensorflow import keras from tensorflow import keras
model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5")) model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))
if Path(self.data_path / str(self.model_filename + if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
"_svm_model.joblib")).resolve().exists():
self.svm_model = load(self.data_path / str(self.model_filename + "_svm_model.joblib")) self.svm_model = load(self.data_path / str(self.model_filename + "_svm_model.joblib"))
if not model: if not model:
raise OperationalException( raise OperationalException(
f"Unable to load model, ensure model exists at " f"Unable to load model, ensure model exists at " f"{self.data_path} "
f"{self.data_path} "
) )
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
@ -257,7 +269,7 @@ class FreqaiDataKitchen:
weights, weights,
stratify=stratification, stratify=stratification,
# shuffle=False, # shuffle=False,
**self.config["freqai"]["data_split_parameters"] **self.config["freqai"]["data_split_parameters"],
) )
return self.build_data_dictionary( return self.build_data_dictionary(
@ -309,14 +321,14 @@ class FreqaiDataKitchen:
(drop_index == 0) & (drop_index_labels == 0) (drop_index == 0) & (drop_index_labels == 0)
] # assuming the labels depend entirely on the dataframe here. ] # assuming the labels depend entirely on the dataframe here.
logger.info( logger.info(
f'dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points' f"dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points"
f' due to NaNs in populated dataset {len(unfiltered_dataframe)}.' f" due to NaNs in populated dataset {len(unfiltered_dataframe)}."
) )
if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live: if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
logger.warning( logger.warning(
f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent' f" {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent"
' of training data dropped due to NaNs, model may perform inconsistent' " of training data dropped due to NaNs, model may perform inconsistent"
'with expectations' "with expectations"
) )
self.data["filter_drop_index_training"] = drop_index self.data["filter_drop_index_training"] = drop_index
@ -372,21 +384,27 @@ class FreqaiDataKitchen:
# standardize the data by training stats # standardize the data by training stats
train_max = data_dictionary["train_features"].max() train_max = data_dictionary["train_features"].max()
train_min = data_dictionary["train_features"].min() train_min = data_dictionary["train_features"].min()
data_dictionary["train_features"] = 2 * ( data_dictionary["train_features"] = (
data_dictionary["train_features"] - train_min 2 * (data_dictionary["train_features"] - train_min) / (train_max - train_min) - 1
) / (train_max - train_min) - 1 )
data_dictionary["test_features"] = 2 * ( data_dictionary["test_features"] = (
data_dictionary["test_features"] - train_min 2 * (data_dictionary["test_features"] - train_min) / (train_max - train_min) - 1
) / (train_max - train_min) - 1 )
train_labels_max = data_dictionary["train_labels"].max() train_labels_max = data_dictionary["train_labels"].max()
train_labels_min = data_dictionary["train_labels"].min() train_labels_min = data_dictionary["train_labels"].min()
data_dictionary["train_labels"] = 2 * ( data_dictionary["train_labels"] = (
data_dictionary["train_labels"] - train_labels_min 2
) / (train_labels_max - train_labels_min) - 1 * (data_dictionary["train_labels"] - train_labels_min)
data_dictionary["test_labels"] = 2 * ( / (train_labels_max - train_labels_min)
data_dictionary["test_labels"] - train_labels_min - 1
) / (train_labels_max - train_labels_min) - 1 )
data_dictionary["test_labels"] = (
2
* (data_dictionary["test_labels"] - train_labels_min)
/ (train_labels_max - train_labels_min)
- 1
)
for item in train_max.keys(): for item in train_max.keys():
self.data[item + "_max"] = train_max[item] self.data[item + "_max"] = train_max[item]
@ -406,8 +424,12 @@ class FreqaiDataKitchen:
""" """
for item in df.keys(): for item in df.keys():
df[item] = 2 * (df[item] - self.data[item + "_min"]) / (self.data[item + "_max"] - df[item] = (
self.data[item + '_min']) - 1 2
* (df[item] - self.data[item + "_min"])
/ (self.data[item + "_max"] - self.data[item + "_min"])
- 1
)
return df return df
@ -429,8 +451,9 @@ class FreqaiDataKitchen:
full_timerange = TimeRange.parse_timerange(tr) full_timerange = TimeRange.parse_timerange(tr)
config_timerange = TimeRange.parse_timerange(self.config["timerange"]) config_timerange = TimeRange.parse_timerange(self.config["timerange"])
if config_timerange.stopts == 0: if config_timerange.stopts == 0:
config_timerange.stopts = int(datetime.datetime.now( config_timerange.stopts = int(
tz=datetime.timezone.utc).timestamp()) datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
)
timerange_train = copy.deepcopy(full_timerange) timerange_train = copy.deepcopy(full_timerange)
timerange_backtest = copy.deepcopy(full_timerange) timerange_backtest = copy.deepcopy(full_timerange)
@ -518,7 +541,7 @@ class FreqaiDataKitchen:
# keeping a copy of the non-transformed features so we can check for errors during # keeping a copy of the non-transformed features so we can check for errors during
# model load from disk # model load from disk
self.data['training_features_list_raw'] = copy.deepcopy(self.training_features_list) self.data["training_features_list_raw"] = copy.deepcopy(self.training_features_list)
self.training_features_list = self.data_dictionary["train_features"].columns self.training_features_list = self.data_dictionary["train_features"].columns
self.data_dictionary["test_features"] = pd.DataFrame( self.data_dictionary["test_features"] = pd.DataFrame(
@ -530,7 +553,7 @@ class FreqaiDataKitchen:
self.data["n_kept_components"] = n_keep_components self.data["n_kept_components"] = n_keep_components
self.pca = pca2 self.pca = pca2
logger.info(f'PCA reduced total features from {n_components} to {n_keep_components}') logger.info(f"PCA reduced total features from {n_components} to {n_keep_components}")
if not self.data_path.is_dir(): if not self.data_path.is_dir():
self.data_path.mkdir(parents=True, exist_ok=True) self.data_path.mkdir(parents=True, exist_ok=True)
@ -557,10 +580,10 @@ class FreqaiDataKitchen:
for prediction confidence in the Dissimilarity Index for prediction confidence in the Dissimilarity Index
""" """
logger.info("computing average mean distance for all training points") logger.info("computing average mean distance for all training points")
tc = self.freqai_config.get('model_training_parameters', {}).get('thread_count', -1) tc = self.freqai_config.get("model_training_parameters", {}).get("thread_count", -1)
pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc) pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
avg_mean_dist = pairwise.mean(axis=1).mean() avg_mean_dist = pairwise.mean(axis=1).mean()
logger.info(f'avg_mean_dist {avg_mean_dist:.2f}') logger.info(f"avg_mean_dist {avg_mean_dist:.2f}")
return avg_mean_dist return avg_mean_dist
@ -579,45 +602,49 @@ class FreqaiDataKitchen:
if (len(do_predict) - do_predict.sum()) > 0: if (len(do_predict) - do_predict.sum()) > 0:
logger.info( logger.info(
f'svm_remove_outliers() tossed {len(do_predict) - do_predict.sum()} predictions' f"svm_remove_outliers() tossed {len(do_predict) - do_predict.sum()} predictions"
) )
self.do_predict += do_predict self.do_predict += do_predict
self.do_predict -= 1 self.do_predict -= 1
else: else:
# use SGDOneClassSVM to increase speed? # use SGDOneClassSVM to increase speed?
nu = self.freqai_config.get('feature_parameters', {}).get('svm_nu', 0.2) nu = self.freqai_config.get("feature_parameters", {}).get("svm_nu", 0.2)
self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit( self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit(
self.data_dictionary["train_features"] self.data_dictionary["train_features"]
) )
y_pred = self.svm_model.predict(self.data_dictionary["train_features"]) y_pred = self.svm_model.predict(self.data_dictionary["train_features"])
dropped_points = np.where(y_pred == -1, 0, y_pred) dropped_points = np.where(y_pred == -1, 0, y_pred)
# keep_index = np.where(y_pred == 1) # keep_index = np.where(y_pred == 1)
self.data_dictionary["train_features"] = self.data_dictionary[ self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
"train_features"][(y_pred == 1)] (y_pred == 1)
self.data_dictionary["train_labels"] = self.data_dictionary[ ]
"train_labels"][(y_pred == 1)] self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
self.data_dictionary["train_weights"] = self.data_dictionary[ (y_pred == 1)
"train_weights"][(y_pred == 1)] ]
self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
(y_pred == 1)
]
logger.info( logger.info(
f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}' f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
f' train points from {len(y_pred)}' f" train points from {len(y_pred)}"
) )
# same for test data # same for test data
y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
dropped_points = np.where(y_pred == -1, 0, y_pred) dropped_points = np.where(y_pred == -1, 0, y_pred)
self.data_dictionary["test_features"] = self.data_dictionary[ self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
"test_features"][(y_pred == 1)] (y_pred == 1)
self.data_dictionary["test_labels"] = self.data_dictionary[ ]
"test_labels"][(y_pred == 1)] self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][(y_pred == 1)]
self.data_dictionary["test_weights"] = self.data_dictionary[ self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
"test_weights"][(y_pred == 1)] (y_pred == 1)
]
logger.info( logger.info(
f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}' f"svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}"
f' test points from {len(y_pred)}' f" test points from {len(y_pred)}"
) )
return return
@ -631,8 +658,8 @@ class FreqaiDataKitchen:
features: list = the features to be used for training/prediction features: list = the features to be used for training/prediction
""" """
column_names = dataframe.columns column_names = dataframe.columns
features = [c for c in column_names if '%' in c] features = [c for c in column_names if "%" in c]
labels = [c for c in column_names if '&' in c] labels = [c for c in column_names if "&" in c]
if not features: if not features:
raise OperationalException("Could not find any features!") raise OperationalException("Could not find any features!")
@ -657,16 +684,15 @@ class FreqaiDataKitchen:
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"] self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
do_predict = np.where( do_predict = np.where(
self.DI_values self.DI_values < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
1, 1,
0, 0,
) )
if (len(do_predict) - do_predict.sum()) > 0: if (len(do_predict) - do_predict.sum()) > 0:
logger.info( logger.info(
f'DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for ' f"DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for "
'being too far from training data' "being too far from training data"
) )
self.do_predict += do_predict self.do_predict += do_predict
@ -695,7 +721,7 @@ class FreqaiDataKitchen:
self.full_predictions = np.append(self.full_predictions, predictions) self.full_predictions = np.append(self.full_predictions, predictions)
self.full_do_predict = np.append(self.full_do_predict, do_predict) self.full_do_predict = np.append(self.full_do_predict, do_predict)
if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0: if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
self.full_DI_values = np.append(self.full_DI_values, self.DI_values) self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
self.full_target_mean = np.append(self.full_target_mean, target_mean) self.full_target_mean = np.append(self.full_target_mean, target_mean)
self.full_target_std = np.append(self.full_target_std, target_std) self.full_target_std = np.append(self.full_target_std, target_std)
@ -711,7 +737,7 @@ class FreqaiDataKitchen:
filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count
self.full_predictions = np.append(filler, self.full_predictions) self.full_predictions = np.append(filler, self.full_predictions)
self.full_do_predict = np.append(filler, self.full_do_predict) self.full_do_predict = np.append(filler, self.full_do_predict)
if self.freqai_config.get('feature_parameters', {}).get('DI_threshold', 0) > 0: if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
self.full_DI_values = np.append(filler, self.full_DI_values) self.full_DI_values = np.append(filler, self.full_DI_values)
self.full_target_mean = np.append(filler, self.full_target_mean) self.full_target_mean = np.append(filler, self.full_target_mean)
self.full_target_std = np.append(filler, self.full_target_std) self.full_target_std = np.append(filler, self.full_target_std)
@ -722,8 +748,9 @@ class FreqaiDataKitchen:
backtest_timerange = TimeRange.parse_timerange(backtest_tr) backtest_timerange = TimeRange.parse_timerange(backtest_tr)
if backtest_timerange.stopts == 0: if backtest_timerange.stopts == 0:
backtest_timerange.stopts = int(datetime.datetime.now( backtest_timerange.stopts = int(
tz=datetime.timezone.utc).timestamp()) datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
)
backtest_timerange.startts = backtest_timerange.startts - backtest_period * SECONDS_IN_DAY backtest_timerange.startts = backtest_timerange.startts - backtest_period * SECONDS_IN_DAY
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts) start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
@ -731,9 +758,7 @@ class FreqaiDataKitchen:
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
self.full_path = Path( self.full_path = Path(
self.config["user_data_dir"] self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
/ "models"
/ str(self.freqai_config.get("identifier"))
) )
config_path = Path(self.config["config_files"][0]) config_path = Path(self.config["config_files"][0])
@ -758,61 +783,71 @@ class FreqaiDataKitchen:
""" """
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
elapsed_time = (time - trained_timestamp) / 3600 # hours elapsed_time = (time - trained_timestamp) / 3600 # hours
max_time = self.freqai_config.get('expiration_hours', 0) max_time = self.freqai_config.get("expiration_hours", 0)
if max_time > 0: if max_time > 0:
return elapsed_time > max_time return elapsed_time > max_time
else: else:
return False return False
def check_if_new_training_required(self, trained_timestamp: int) -> Tuple[bool, def check_if_new_training_required(
TimeRange, TimeRange]: self, trained_timestamp: int
) -> Tuple[bool, TimeRange, TimeRange]:
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
trained_timerange = TimeRange() trained_timerange = TimeRange()
data_load_timerange = TimeRange() data_load_timerange = TimeRange()
# find the max indicator length required # find the max indicator length required
max_timeframe_chars = self.freqai_config.get('timeframes')[-1] max_timeframe_chars = self.freqai_config.get("timeframes")[-1]
max_period = self.freqai_config.get('feature_parameters', {}).get( max_period = self.freqai_config.get("feature_parameters", {}).get(
'indicator_max_period', 50) "indicator_max_period", 50
)
additional_seconds = 0 additional_seconds = 0
if max_timeframe_chars[-1] == 'd': if max_timeframe_chars[-1] == "d":
additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2]) additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2])
elif max_timeframe_chars[-1] == 'h': elif max_timeframe_chars[-1] == "h":
additional_seconds = max_period * 3600 * int(max_timeframe_chars[-2]) additional_seconds = max_period * 3600 * int(max_timeframe_chars[-2])
elif max_timeframe_chars[-1] == 'm': elif max_timeframe_chars[-1] == "m":
if len(max_timeframe_chars) == 2: if len(max_timeframe_chars) == 2:
additional_seconds = max_period * 60 * int(max_timeframe_chars[-2]) additional_seconds = max_period * 60 * int(max_timeframe_chars[-2])
elif len(max_timeframe_chars) == 3: elif len(max_timeframe_chars) == 3:
additional_seconds = max_period * 60 * int(float(max_timeframe_chars[0:2])) additional_seconds = max_period * 60 * int(float(max_timeframe_chars[0:2]))
else: else:
logger.warning('FreqAI could not detect max timeframe and therefore may not ' logger.warning(
'download the proper amount of data for training') "FreqAI could not detect max timeframe and therefore may not "
"download the proper amount of data for training"
)
# logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days') # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days')
if trained_timestamp != 0: if trained_timestamp != 0:
elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY
retrain = elapsed_time > self.freqai_config.get('backtest_period') retrain = elapsed_time > self.freqai_config.get("backtest_period")
if retrain: if retrain:
trained_timerange.startts = int(time - self.freqai_config.get( trained_timerange.startts = int(
'train_period', 0) * SECONDS_IN_DAY) time - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
)
trained_timerange.stopts = int(time) trained_timerange.stopts = int(time)
# we want to load/populate indicators on more data than we plan to train on so # we want to load/populate indicators on more data than we plan to train on so
# because most of the indicators have a rolling timeperiod, and are thus NaNs # because most of the indicators have a rolling timeperiod, and are thus NaNs
# unless they have data further back in time before the start of the train period # unless they have data further back in time before the start of the train period
data_load_timerange.startts = int(time - self.freqai_config.get( data_load_timerange.startts = int(
'train_period', 0) * SECONDS_IN_DAY time
- additional_seconds) - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
- additional_seconds
)
data_load_timerange.stopts = int(time) data_load_timerange.stopts = int(time)
else: # user passed no live_trained_timerange in config else: # user passed no live_trained_timerange in config
trained_timerange.startts = int(time - self.freqai_config.get('train_period') * trained_timerange.startts = int(
SECONDS_IN_DAY) time - self.freqai_config.get("train_period") * SECONDS_IN_DAY
)
trained_timerange.stopts = int(time) trained_timerange.stopts = int(time)
data_load_timerange.startts = int(time - self.freqai_config.get( data_load_timerange.startts = int(
'train_period', 0) * SECONDS_IN_DAY time
- additional_seconds) - self.freqai_config.get("train_period", 0) * SECONDS_IN_DAY
- additional_seconds
)
data_load_timerange.stopts = int(time) data_load_timerange.stopts = int(time)
retrain = True retrain = True
@ -822,9 +857,10 @@ class FreqaiDataKitchen:
coin, _ = pair.split("/") coin, _ = pair.split("/")
# set the new data_path # set the new data_path
self.data_path = Path(self.full_path / str("sub-train" + "-" + self.data_path = Path(
pair.split("/")[0] + self.full_path
str(int(trained_timerange.stopts)))) / str("sub-train" + "-" + pair.split("/")[0] + str(int(trained_timerange.stopts)))
)
self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts)) self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
@ -860,19 +896,23 @@ class FreqaiDataKitchen:
timerange: TimeRange = The full data timerange for populating the indicators timerange: TimeRange = The full data timerange for populating the indicators
and training the model. and training the model.
""" """
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], exchange = ExchangeResolver.load_exchange(
self.config, validate=False, freqai=True) self.config["exchange"]["name"], self.config, validate=False, freqai=True
)
new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY) new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY)
refresh_backtest_ohlcv_data( refresh_backtest_ohlcv_data(
exchange, pairs=self.all_pairs, exchange,
timeframes=self.freqai_config.get('timeframes'), pairs=self.all_pairs,
datadir=self.config['datadir'], timerange=timerange, timeframes=self.freqai_config.get("timeframes"),
datadir=self.config["datadir"],
timerange=timerange,
new_pairs_days=new_pairs_days, new_pairs_days=new_pairs_days,
erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'), erase=False,
trading_mode=self.config.get('trading_mode', 'spot'), data_format=self.config.get("dataformat_ohlcv", "json"),
prepend=self.config.get('prepend_data', False) trading_mode=self.config.get("trading_mode", "spot"),
prepend=self.config.get("prepend_data", False),
) )
def update_historic_data(self, strategy: IStrategy) -> None: def update_historic_data(self, strategy: IStrategy) -> None:
@ -888,34 +928,36 @@ class FreqaiDataKitchen:
history_data = self.dd.historic_data history_data = self.dd.historic_data
for pair in self.all_pairs: for pair in self.all_pairs:
for tf in self.freqai_config.get('timeframes'): for tf in self.freqai_config.get("timeframes"):
# check if newest candle is already appended # check if newest candle is already appended
df_dp = strategy.dp.get_pair_dataframe(pair, tf) df_dp = strategy.dp.get_pair_dataframe(pair, tf)
if len(df_dp.index) == 0: if len(df_dp.index) == 0:
continue continue
if ( if str(history_data[pair][tf].iloc[-1]["date"]) == str(
str(history_data[pair][tf].iloc[-1]['date']) == df_dp.iloc[-1:]["date"].iloc[-1]
str(df_dp.iloc[-1:]['date'].iloc[-1])
): ):
continue continue
index = df_dp.loc[ index = (
df_dp['date'] == df_dp.loc[df_dp["date"] == history_data[pair][tf].iloc[-1]["date"]].index[0]
history_data[pair][tf].iloc[-1]['date'] + 1
].index[0] + 1 )
history_data[pair][tf] = pd.concat( history_data[pair][tf] = pd.concat(
[history_data[pair][tf], [
strategy.dp.get_pair_dataframe(pair, tf).iloc[index:]], history_data[pair][tf],
ignore_index=True, axis=0 strategy.dp.get_pair_dataframe(pair, tf).iloc[index:],
],
ignore_index=True,
axis=0,
) )
# logger.info(f'Length of history data {len(history_data[pair][tf])}') # logger.info(f'Length of history data {len(history_data[pair][tf])}')
def set_all_pairs(self) -> None: def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', [])) self.all_pairs = copy.deepcopy(self.freqai_config.get("corr_pairlist", []))
for pair in self.config.get('exchange', '').get('pair_whitelist'): for pair in self.config.get("exchange", "").get("pair_whitelist"):
if pair not in self.all_pairs: if pair not in self.all_pairs:
self.all_pairs.append(pair) self.all_pairs.append(pair)
@ -932,17 +974,19 @@ class FreqaiDataKitchen:
for pair in self.all_pairs: for pair in self.all_pairs:
if pair not in history_data: if pair not in history_data:
history_data[pair] = {} history_data[pair] = {}
for tf in self.freqai_config.get('timeframes'): for tf in self.freqai_config.get("timeframes"):
history_data[pair][tf] = load_pair_history(datadir=self.config['datadir'], history_data[pair][tf] = load_pair_history(
datadir=self.config["datadir"],
timeframe=tf, timeframe=tf,
pair=pair, timerange=timerange, pair=pair,
data_format=self.config.get( timerange=timerange,
'dataformat_ohlcv', 'json'), data_format=self.config.get("dataformat_ohlcv", "json"),
candle_type=self.config.get( candle_type=self.config.get("trading_mode", "spot"),
'trading_mode', 'spot')) )
def get_base_and_corr_dataframes(self, timerange: TimeRange, def get_base_and_corr_dataframes(
pair: str) -> Tuple[Dict[Any, Any], Dict[Any, Any]]: self, timerange: TimeRange, pair: str
) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
""" """
Searches through our historic_data in memory and returns the dataframes relevant Searches through our historic_data in memory and returns the dataframes relevant
to the present pair. to the present pair.
@ -956,21 +1000,19 @@ class FreqaiDataKitchen:
corr_dataframes: Dict[Any, Any] = {} corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {}
historic_data = self.dd.historic_data historic_data = self.dd.historic_data
pairs = self.freqai_config.get('corr_pairlist', []) pairs = self.freqai_config.get("corr_pairlist", [])
for tf in self.freqai_config.get('timeframes'): for tf in self.freqai_config.get("timeframes"):
base_dataframes[tf] = self.slice_dataframe( base_dataframes[tf] = self.slice_dataframe(timerange, historic_data[pair][tf])
timerange,
historic_data[pair][tf]
)
if pairs: if pairs:
for p in pairs: for p in pairs:
if pair in p: if pair in p:
continue # dont repeat anything from whitelist continue # dont repeat anything from whitelist
if p not in corr_dataframes: if p not in corr_dataframes:
corr_dataframes[p] = {} corr_dataframes[p] = {}
corr_dataframes[p][tf] = self.slice_dataframe(timerange, corr_dataframes[p][tf] = self.slice_dataframe(
historic_data[p][tf]) timerange, historic_data[p][tf]
)
return corr_dataframes, base_dataframes return corr_dataframes, base_dataframes
@ -1006,10 +1048,9 @@ class FreqaiDataKitchen:
# return corr_dataframes, base_dataframes # return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(self, strategy: IStrategy, def use_strategy_to_populate_indicators(
corr_dataframes: dict, self, strategy: IStrategy, corr_dataframes: dict, base_dataframes: dict, pair: str
base_dataframes: dict, ) -> DataFrame:
pair: str) -> DataFrame:
""" """
Use the user defined strategy for populating indicators during Use the user defined strategy for populating indicators during
retrain retrain
@ -1023,17 +1064,12 @@ class FreqaiDataKitchen:
:returns: :returns:
dataframe: DataFrame = dataframe containing populated indicators dataframe: DataFrame = dataframe containing populated indicators
""" """
dataframe = base_dataframes[self.config['timeframe']].copy() dataframe = base_dataframes[self.config["timeframe"]].copy()
pairs = self.freqai_config.get("corr_pairlist", []) pairs = self.freqai_config.get("corr_pairlist", [])
for tf in self.freqai_config.get("timeframes"): for tf in self.freqai_config.get("timeframes"):
dataframe = strategy.populate_any_indicators( dataframe = strategy.populate_any_indicators(
pair, pair, pair, dataframe.copy(), tf, base_dataframes[tf], coin=pair.split("/")[0] + "-"
pair,
dataframe.copy(),
tf,
base_dataframes[tf],
coin=pair.split("/")[0] + "-"
) )
if pairs: if pairs:
for i in pairs: for i in pairs:
@ -1045,7 +1081,7 @@ class FreqaiDataKitchen:
dataframe.copy(), dataframe.copy(),
tf, tf,
corr_dataframes[i][tf], corr_dataframes[i][tf],
coin=i.split("/")[0] + "-" coin=i.split("/")[0] + "-",
) )
return dataframe return dataframe
@ -1056,7 +1092,7 @@ class FreqaiDataKitchen:
""" """
import scipy as spy import scipy as spy
self.data['labels_mean'], self.data['labels_std'] = {}, {} self.data["labels_mean"], self.data["labels_std"] = {}, {}
for label in self.label_list: for label in self.label_list:
f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label]) f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1] self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]

View File

@ -29,6 +29,7 @@ logger = logging.getLogger(__name__)
def threaded(fn): def threaded(fn):
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs).start() threading.Thread(target=fn, args=args, kwargs=kwargs).start()
return wrapper return wrapper
@ -46,7 +47,7 @@ class IFreqaiModel(ABC):
self.config = config self.config = config
self.assert_config(self.config) self.assert_config(self.config)
self.freqai_info = config["freqai"] self.freqai_info = config["freqai"]
self.data_split_parameters = config.get('freqai', {}).get("data_split_parameters") self.data_split_parameters = config.get("freqai", {}).get("data_split_parameters")
self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters") self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters")
self.feature_parameters = config.get("freqai", {}).get("feature_parameters") self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
self.time_last_trained = None self.time_last_trained = None
@ -58,23 +59,21 @@ class IFreqaiModel(ABC):
self.first = True self.first = True
self.update_historic_data = 0 self.update_historic_data = 0
self.set_full_path() self.set_full_path()
self.follow_mode = self.freqai_info.get('follow_mode', False) self.follow_mode = self.freqai_info.get("follow_mode", False)
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
self.lock = threading.Lock() self.lock = threading.Lock()
self.follow_mode = self.freqai_info.get('follow_mode', False) self.follow_mode = self.freqai_info.get("follow_mode", False)
self.identifier = self.freqai_info.get('identifier', 'no_id_provided') self.identifier = self.freqai_info.get("identifier", "no_id_provided")
self.scanning = False self.scanning = False
self.ready_to_scan = False self.ready_to_scan = False
self.first = True self.first = True
self.keras = self.freqai_info.get('keras', False) self.keras = self.freqai_info.get("keras", False)
self.CONV_WIDTH = self.freqai_info.get('conv_width', 2) self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
def assert_config(self, config: Dict[str, Any]) -> None: def assert_config(self, config: Dict[str, Any]) -> None:
if not config.get('freqai', {}): if not config.get("freqai", {}):
raise OperationalException( raise OperationalException("No freqai parameters found in configuration file.")
"No freqai parameters found in configuration file."
)
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame: def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
""" """
@ -92,8 +91,7 @@ class IFreqaiModel(ABC):
self.dd.set_pair_dict_info(metadata) self.dd.set_pair_dict_info(metadata)
if self.live: if self.live:
self.dk = FreqaiDataKitchen(self.config, self.dd, self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
self.live, metadata["pair"])
dk = self.start_live(dataframe, metadata, strategy, self.dk) dk = self.start_live(dataframe, metadata, strategy, self.dk)
# For backtesting, each pair enters and then gets trained for each window along the # For backtesting, each pair enters and then gets trained for each window along the
@ -103,7 +101,7 @@ class IFreqaiModel(ABC):
# the concatenated results for the full backtesting period back to the strategy. # the concatenated results for the full backtesting period back to the strategy.
elif not self.follow_mode: elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"]) self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
logger.info(f'Training {len(self.dk.training_timeranges)} timeranges') logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dk = self.start_backtesting(dataframe, metadata, self.dk) dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = self.remove_features_from_df(dk.return_dataframe) dataframe = self.remove_features_from_df(dk.return_dataframe)
@ -120,14 +118,13 @@ class IFreqaiModel(ABC):
""" """
while 1: while 1:
time.sleep(1) time.sleep(1)
for pair in self.config.get('exchange', {}).get('pair_whitelist'): for pair in self.config.get("exchange", {}).get("pair_whitelist"):
(_, trained_timestamp, _, _) = self.dd.get_pair_dict_info(pair) (_, trained_timestamp, _, _) = self.dd.get_pair_dict_info(pair)
if self.dd.pair_dict[pair]['priority'] != 1: if self.dd.pair_dict[pair]["priority"] != 1:
continue continue
dk = FreqaiDataKitchen(self.config, self.dd, dk = FreqaiDataKitchen(self.config, self.dd, self.live, pair)
self.live, pair)
# file_exists = False # file_exists = False
@ -138,17 +135,21 @@ class IFreqaiModel(ABC):
# model_filename=model_filename, # model_filename=model_filename,
# scanning=True) # scanning=True)
(retrain, (
retrain,
new_trained_timerange, new_trained_timerange,
data_load_timerange) = dk.check_if_new_training_required(trained_timestamp) data_load_timerange,
) = dk.check_if_new_training_required(trained_timestamp)
dk.set_paths(pair, new_trained_timerange.stopts) dk.set_paths(pair, new_trained_timerange.stopts)
if retrain: # or not file_exists: if retrain: # or not file_exists:
self.train_model_in_series(new_trained_timerange, pair, self.train_model_in_series(
strategy, dk, data_load_timerange) new_trained_timerange, pair, strategy, dk, data_load_timerange
)
def start_backtesting(self, dataframe: DataFrame, metadata: dict, def start_backtesting(
dk: FreqaiDataKitchen) -> FreqaiDataKitchen: self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen:
""" """
The main broad execution for backtesting. For backtesting, each pair enters and then gets The main broad execution for backtesting. For backtesting, each pair enters and then gets
trained for each window along the sliding window defined by "train_period" (training window) trained for each window along the sliding window defined by "train_period" (training window)
@ -169,10 +170,8 @@ class IFreqaiModel(ABC):
# tr_backtest is the backtesting time range e.g. the week directly # tr_backtest is the backtesting time range e.g. the week directly
# following tr_train. Both of these windows slide through the # following tr_train. Both of these windows slide through the
# entire backtest # entire backtest
for tr_train, tr_backtest in zip( for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
dk.training_timeranges, dk.backtesting_timeranges (_, _, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
):
(_, _, _, _) = self.dd.get_pair_dict_info(metadata['pair'])
gc.collect() gc.collect()
dk.data = {} # clean the pair specific data between training window sliding dk.data = {} # clean the pair specific data between training window sliding
self.training_timerange = tr_train self.training_timerange = tr_train
@ -181,40 +180,48 @@ class IFreqaiModel(ABC):
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train # TimeRange.parse_timerange(tr_train) trained_timestamp = tr_train # TimeRange.parse_timerange(tr_train)
tr_train_startts_str = datetime.datetime.utcfromtimestamp( tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
tr_train.startts).strftime('%Y-%m-%d %H:%M:%S') "%Y-%m-%d %H:%M:%S"
tr_train_stopts_str = datetime.datetime.utcfromtimestamp( )
tr_train.stopts).strftime('%Y-%m-%d %H:%M:%S') tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
"%Y-%m-%d %H:%M:%S"
)
logger.info("Training %s", metadata["pair"]) logger.info("Training %s", metadata["pair"])
logger.info(f'Training {tr_train_startts_str} to {tr_train_stopts_str}') logger.info(f"Training {tr_train_startts_str} to {tr_train_stopts_str}")
dk.data_path = Path(dk.full_path / dk.data_path = Path(
str("sub-train" + "-" + metadata['pair'].split("/")[0] + dk.full_path
str(int(trained_timestamp.stopts)))) / str(
if not self.model_exists(metadata["pair"], dk, "sub-train"
trained_timestamp=trained_timestamp.stopts): + "-"
self.model = self.train(dataframe_train, metadata['pair'], dk) + metadata["pair"].split("/")[0]
self.dd.pair_dict[metadata['pair']][ + str(int(trained_timestamp.stopts))
'trained_timestamp'] = trained_timestamp.stopts )
dk.set_new_model_names(metadata['pair'], trained_timestamp) )
dk.save_data(self.model, metadata['pair'], keras_model=self.keras) if not self.model_exists(
metadata["pair"], dk, trained_timestamp=trained_timestamp.stopts
):
self.model = self.train(dataframe_train, metadata["pair"], dk)
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = trained_timestamp.stopts
dk.set_new_model_names(metadata["pair"], trained_timestamp)
dk.save_data(self.model, metadata["pair"], keras_model=self.keras)
else: else:
self.model = dk.load_data(metadata['pair'], keras_model=self.keras) self.model = dk.load_data(metadata["pair"], keras_model=self.keras)
self.check_if_feature_list_matches_strategy(dataframe_train, dk) self.check_if_feature_list_matches_strategy(dataframe_train, dk)
preds, do_preds = self.predict(dataframe_backtest, dk) preds, do_preds = self.predict(dataframe_backtest, dk)
dk.append_predictions(preds, do_preds, len(dataframe_backtest)) dk.append_predictions(preds, do_preds, len(dataframe_backtest))
print('predictions', len(dk.full_predictions), print("predictions", len(dk.full_predictions), "do_predict", len(dk.full_do_predict))
'do_predict', len(dk.full_do_predict))
dk.fill_predictions(len(dataframe)) dk.fill_predictions(len(dataframe))
return dk return dk
def start_live(self, dataframe: DataFrame, metadata: dict, def start_live(
strategy: IStrategy, dk: FreqaiDataKitchen) -> FreqaiDataKitchen: self, dataframe: DataFrame, metadata: dict, strategy: IStrategy, dk: FreqaiDataKitchen
) -> FreqaiDataKitchen:
""" """
The main broad execution for dry/live. This function will check if a retraining should be The main broad execution for dry/live. This function will check if a retraining should be
performed, and if so, retrain and reset the model. performed, and if so, retrain and reset the model.
@ -232,14 +239,11 @@ class IFreqaiModel(ABC):
self.dd.update_follower_metadata() self.dd.update_follower_metadata()
# get the model metadata associated with the current pair # get the model metadata associated with the current pair
(_, (_, trained_timestamp, _, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
trained_timestamp,
_,
return_null_array) = self.dd.get_pair_dict_info(metadata['pair'])
# if the metadata doesnt exist, the follower returns null arrays to strategy # if the metadata doesnt exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array: if self.follow_mode and return_null_array:
logger.info('Returning null array from follower to strategy') logger.info("Returning null array from follower to strategy")
self.dd.return_null_values_to_strategy(dataframe, dk) self.dd.return_null_values_to_strategy(dataframe, dk)
return dk return dk
@ -253,16 +257,18 @@ class IFreqaiModel(ABC):
# if not trainable, load existing data # if not trainable, load existing data
if not self.follow_mode: if not self.follow_mode:
(_, (_, new_trained_timerange, data_load_timerange) = dk.check_if_new_training_required(
new_trained_timerange, trained_timestamp
data_load_timerange) = dk.check_if_new_training_required(trained_timestamp) )
dk.set_paths(metadata['pair'], new_trained_timerange.stopts) dk.set_paths(metadata["pair"], new_trained_timerange.stopts)
# download candle history if it is not already in memory # download candle history if it is not already in memory
if not self.dd.historic_data: if not self.dd.historic_data:
logger.info('Downloading all training data for all pairs in whitelist and ' logger.info(
'corr_pairlist, this may take a while if you do not have the ' "Downloading all training data for all pairs in whitelist and "
'data saved') "corr_pairlist, this may take a while if you do not have the "
"data saved"
)
dk.download_all_data_for_training(data_load_timerange) dk.download_all_data_for_training(data_load_timerange)
dk.load_all_pair_histories(data_load_timerange) dk.load_all_pair_histories(data_load_timerange)
@ -271,53 +277,47 @@ class IFreqaiModel(ABC):
self.start_scanning(strategy) self.start_scanning(strategy)
elif self.follow_mode: elif self.follow_mode:
dk.set_paths(metadata['pair'], trained_timestamp) dk.set_paths(metadata["pair"], trained_timestamp)
logger.info('FreqAI instance set to follow_mode, finding existing pair' logger.info(
f'using { self.identifier }') "FreqAI instance set to follow_mode, finding existing pair"
f"using { self.identifier }"
)
# load the model and associated data into the data kitchen # load the model and associated data into the data kitchen
self.model = dk.load_data(coin=metadata['pair'], keras_model=self.keras) self.model = dk.load_data(coin=metadata["pair"], keras_model=self.keras)
if not self.model: if not self.model:
logger.warning('No model ready, returning null values to strategy.') logger.warning("No model ready, returning null values to strategy.")
self.dd.return_null_values_to_strategy(dataframe, dk) self.dd.return_null_values_to_strategy(dataframe, dk)
return dk return dk
# ensure user is feeding the correct indicators to the model # ensure user is feeding the correct indicators to the model
self.check_if_feature_list_matches_strategy(dataframe, dk) self.check_if_feature_list_matches_strategy(dataframe, dk)
self.build_strategy_return_arrays(dataframe, dk, metadata['pair'], trained_timestamp) self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
return dk return dk
def build_strategy_return_arrays(self, dataframe: DataFrame, def build_strategy_return_arrays(
dk: FreqaiDataKitchen, pair: str, self, dataframe: DataFrame, dk: FreqaiDataKitchen, pair: str, trained_timestamp: int
trained_timestamp: int) -> None: ) -> None:
# hold the historical predictions in memory so we are sending back # hold the historical predictions in memory so we are sending back
# correct array to strategy # correct array to strategy
if pair not in self.dd.model_return_values: if pair not in self.dd.model_return_values:
pred_df, do_preds = self.predict(dataframe, dk) pred_df, do_preds = self.predict(dataframe, dk)
# mypy doesnt like the typing in else statement, so we need to explicitly add to
# dataframe separately
# for label in dk.label_list:
# dataframe[label] = pred_df[label]
# dataframe['do_predict'] = do_preds
# dk.append_predictions(preds, do_preds, len(dataframe))
# dk.fill_predictions(len(dataframe))
self.dd.set_initial_return_values(pair, dk, pred_df, do_preds) self.dd.set_initial_return_values(pair, dk, pred_df, do_preds)
dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe) dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe)
return return
elif self.dk.check_if_model_expired(trained_timestamp): elif self.dk.check_if_model_expired(trained_timestamp):
pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list) pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list)
do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2) do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2)
logger.warning('Model expired, returning null values to strategy. Strategy ' logger.warning(
'construction should take care to consider this event with ' "Model expired, returning null values to strategy. Strategy "
'prediction == 0 and do_predict == 2') "construction should take care to consider this event with "
"prediction == 0 and do_predict == 2"
)
else: else:
# Only feed in the most recent candle for prediction in live scenario # Only feed in the most recent candle for prediction in live scenario
pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False) pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False)
@ -327,8 +327,9 @@ class IFreqaiModel(ABC):
return return
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, def check_if_feature_list_matches_strategy(
dk: FreqaiDataKitchen) -> None: self, dataframe: DataFrame, dk: FreqaiDataKitchen
) -> None:
""" """
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
to a folder holding existing models. to a folder holding existing models.
@ -337,16 +338,18 @@ class IFreqaiModel(ABC):
dk: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop dk: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop
""" """
dk.find_features(dataframe) dk.find_features(dataframe)
if 'training_features_list_raw' in dk.data: if "training_features_list_raw" in dk.data:
feature_list = dk.data['training_features_list_raw'] feature_list = dk.data["training_features_list_raw"]
else: else:
feature_list = dk.training_features_list feature_list = dk.training_features_list
if dk.training_features_list != feature_list: if dk.training_features_list != feature_list:
raise OperationalException("Trying to access pretrained model with `identifier` " raise OperationalException(
"Trying to access pretrained model with `identifier` "
"but found different features furnished by current strategy." "but found different features furnished by current strategy."
"Change `identifer` to train from scratch, or ensure the" "Change `identifer` to train from scratch, or ensure the"
"strategy is furnishing the same features as the pretrained" "strategy is furnishing the same features as the pretrained"
"model") "model"
)
def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None: def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
""" """
@ -356,13 +359,13 @@ class IFreqaiModel(ABC):
of how outlier data points are dropped from the dataframe used for training. of how outlier data points are dropped from the dataframe used for training.
""" """
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
dk.principal_component_analysis() dk.principal_component_analysis()
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'): if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"):
dk.use_SVM_to_remove_outliers(predict=False) dk.use_SVM_to_remove_outliers(predict=False)
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'): if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"):
dk.data["avg_mean_dist"] = dk.compute_distances() dk.data["avg_mean_dist"] = dk.compute_distances()
# if self.feature_parameters["determine_statistical_distributions"]: # if self.feature_parameters["determine_statistical_distributions"]:
@ -381,13 +384,13 @@ class IFreqaiModel(ABC):
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
for buy signals. for buy signals.
""" """
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"):
dk.pca_transform(dataframe) dk.pca_transform(dataframe)
if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'): if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers"):
dk.use_SVM_to_remove_outliers(predict=True) dk.use_SVM_to_remove_outliers(predict=True)
if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'): if self.freqai_info.get("feature_parameters", {}).get("DI_threshold"):
dk.check_if_pred_in_training_spaces() dk.check_if_pred_in_training_spaces()
# if self.feature_parameters["determine_statistical_distributions"]: # if self.feature_parameters["determine_statistical_distributions"]:
@ -395,8 +398,14 @@ class IFreqaiModel(ABC):
# if self.feature_parameters["remove_outliers"]: # if self.feature_parameters["remove_outliers"]:
# dk.remove_outliers(predict=True) # creates dropped index # dk.remove_outliers(predict=True) # creates dropped index
def model_exists(self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int = None, def model_exists(
model_filename: str = '', scanning: bool = False) -> bool: self,
pair: str,
dk: FreqaiDataKitchen,
trained_timestamp: int = None,
model_filename: str = "",
scanning: bool = False,
) -> bool:
""" """
Given a pair and path, check if a model already exists Given a pair and path, check if a model already exists
:param pair: pair e.g. BTC/USD :param pair: pair e.g. BTC/USD
@ -416,25 +425,33 @@ class IFreqaiModel(ABC):
return file_exists return file_exists
def set_full_path(self) -> None: def set_full_path(self) -> None:
self.full_path = Path(self.config['user_data_dir'] / self.full_path = Path(
"models" / self.config["user_data_dir"] / "models" / str(self.freqai_info.get("identifier"))
str(self.freqai_info.get('identifier'))) )
self.full_path.mkdir(parents=True, exist_ok=True) self.full_path.mkdir(parents=True, exist_ok=True)
shutil.copy(self.config['config_files'][0], Path(self.full_path, shutil.copy(
Path(self.config['config_files'][0]).name)) self.config["config_files"][0],
Path(self.full_path, Path(self.config["config_files"][0]).name),
)
def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame: def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
""" """
Remove the features from the dataframe before returning it to strategy. This keeps it Remove the features from the dataframe before returning it to strategy. This keeps it
compact for Frequi purposes. compact for Frequi purposes.
""" """
to_keep = [col for col in dataframe.columns to_keep = [
if not col.startswith('%') or col.startswith('%%')] col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
]
return dataframe[to_keep] return dataframe[to_keep]
def train_model_in_series(self, new_trained_timerange: TimeRange, pair: str, def train_model_in_series(
strategy: IStrategy, dk: FreqaiDataKitchen, self,
data_load_timerange: TimeRange): new_trained_timerange: TimeRange,
pair: str,
strategy: IStrategy,
dk: FreqaiDataKitchen,
data_load_timerange: TimeRange,
):
""" """
Retreive data and train model in single threaded mode (only used if model directory is empty Retreive data and train model in single threaded mode (only used if model directory is empty
upon startup for dry/live ) upon startup for dry/live )
@ -447,13 +464,13 @@ class IFreqaiModel(ABC):
(larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs) (larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs)
""" """
corr_dataframes, base_dataframes = dk.get_base_and_corr_dataframes(data_load_timerange, corr_dataframes, base_dataframes = dk.get_base_and_corr_dataframes(
pair) data_load_timerange, pair
)
unfiltered_dataframe = dk.use_strategy_to_populate_indicators(strategy, unfiltered_dataframe = dk.use_strategy_to_populate_indicators(
corr_dataframes, strategy, corr_dataframes, base_dataframes, pair
base_dataframes, )
pair)
unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe) unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe)
@ -462,15 +479,15 @@ class IFreqaiModel(ABC):
model = self.train(unfiltered_dataframe, pair, dk) model = self.train(unfiltered_dataframe, pair, dk)
self.dd.pair_dict[pair]['trained_timestamp'] = new_trained_timerange.stopts self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
dk.set_new_model_names(pair, new_trained_timerange) dk.set_new_model_names(pair, new_trained_timerange)
self.dd.pair_dict[pair]['first'] = False self.dd.pair_dict[pair]["first"] = False
if self.dd.pair_dict[pair]['priority'] == 1 and self.scanning: if self.dd.pair_dict[pair]["priority"] == 1 and self.scanning:
with self.lock: with self.lock:
self.dd.pair_to_end_of_training_queue(pair) self.dd.pair_to_end_of_training_queue(pair)
dk.save_data(model, coin=pair, keras_model=self.keras) dk.save_data(model, coin=pair, keras_model=self.keras)
if self.freqai_info.get('purge_old_models', False): if self.freqai_info.get("purge_old_models", False):
self.dd.purge_old_models() self.dd.purge_old_models()
# self.retrain = False # self.retrain = False
@ -503,8 +520,9 @@ class IFreqaiModel(ABC):
return return
@abstractmethod @abstractmethod
def predict(self, dataframe: DataFrame, def predict(
dk: FreqaiDataKitchen, first: bool = True) -> Tuple[DataFrame, npt.ArrayLike]: self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
) -> Tuple[DataFrame, npt.ArrayLike]:
""" """
Filter the prediction features data and predict with it. Filter the prediction features data and predict with it.
:param: :param:

View File

@ -45,8 +45,9 @@ class CatboostPredictionModel(IFreqaiModel):
return dataframe["s"] return dataframe["s"]
def train(self, unfiltered_dataframe: DataFrame, def train(
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -57,8 +58,7 @@ class CatboostPredictionModel(IFreqaiModel):
:model: Trained model which can be used to inference (self.predict) :model: Trained model which can be used to inference (self.predict)
""" """
logger.info('--------------------Starting training ' logger.info("--------------------Starting training " f"{pair} --------------------")
f'{pair} --------------------')
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk) # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
# filter the features requested by user in the configuration file and elegantly handle NaNs # filter the features requested by user in the configuration file and elegantly handle NaNs
@ -78,13 +78,14 @@ class CatboostPredictionModel(IFreqaiModel):
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
self.data_cleaning_train(dk) self.data_cleaning_train(dk)
logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}' logger.info(
' features') f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary) model = self.fit(data_dictionary)
logger.info(f'--------------------done training {pair}--------------------') logger.info(f"--------------------done training {pair}--------------------")
return model return model
@ -110,14 +111,17 @@ class CatboostPredictionModel(IFreqaiModel):
model = CatBoostRegressor( model = CatBoostRegressor(
allow_writing_files=False, allow_writing_files=False,
verbose=100, early_stopping_rounds=400, **self.model_training_parameters verbose=100,
early_stopping_rounds=400,
**self.model_training_parameters,
) )
model.fit(X=train_data, eval_set=test_data) model.fit(X=train_data, eval_set=test_data)
return model return model
def predict(self, unfiltered_dataframe: DataFrame, def predict(
dk: FreqaiDataKitchen, first: bool = False) -> Tuple[DataFrame, DataFrame]: self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, DataFrame]:
""" """
Filter the prediction features data and predict with it. Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period. :param: unfiltered_dataframe: Full dataframe for the current backtest period.
@ -141,8 +145,10 @@ class CatboostPredictionModel(IFreqaiModel):
pred_df = DataFrame(predictions, columns=dk.label_list) pred_df = DataFrame(predictions, columns=dk.label_list)
for label in dk.label_list: for label in dk.label_list:
pred_df[label] = ((pred_df[label] + 1) * pred_df[label] = (
(dk.data["labels_max"][label] - (pred_df[label] + 1)
dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label] * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)

View File

@ -28,8 +28,9 @@ class CatboostPredictionMultiModel(IFreqaiModel):
return dataframe return dataframe
def train(self, unfiltered_dataframe: DataFrame, def train(
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -40,8 +41,7 @@ class CatboostPredictionMultiModel(IFreqaiModel):
:model: Trained model which can be used to inference (self.predict) :model: Trained model which can be used to inference (self.predict)
""" """
logger.info('--------------------Starting training ' logger.info("--------------------Starting training " f"{pair} --------------------")
f'{pair} --------------------')
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk) # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
# filter the features requested by user in the configuration file and elegantly handle NaNs # filter the features requested by user in the configuration file and elegantly handle NaNs
@ -61,13 +61,14 @@ class CatboostPredictionMultiModel(IFreqaiModel):
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
self.data_cleaning_train(dk) self.data_cleaning_train(dk)
logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}' logger.info(
' features') f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary) model = self.fit(data_dictionary)
logger.info(f'--------------------done training {pair}--------------------') logger.info(f"--------------------done training {pair}--------------------")
return model return model
@ -80,22 +81,26 @@ class CatboostPredictionMultiModel(IFreqaiModel):
""" """
cbr = CatBoostRegressor( cbr = CatBoostRegressor(
allow_writing_files=False, gpu_ram_part=0.5, allow_writing_files=False,
verbose=100, early_stopping_rounds=400, **self.model_training_parameters gpu_ram_part=0.5,
verbose=100,
early_stopping_rounds=400,
**self.model_training_parameters,
) )
X = data_dictionary["train_features"] X = data_dictionary["train_features"]
y = data_dictionary["train_labels"] y = data_dictionary["train_labels"]
# eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) # eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
sample_weight = data_dictionary['train_weights'] sample_weight = data_dictionary["train_weights"]
model = MultiOutputRegressor(estimator=cbr) model = MultiOutputRegressor(estimator=cbr)
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
return model return model
def predict(self, unfiltered_dataframe: DataFrame, def predict(
dk: FreqaiDataKitchen, first: bool = False) -> Tuple[DataFrame, DataFrame]: self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, DataFrame]:
""" """
Filter the prediction features data and predict with it. Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period. :param: unfiltered_dataframe: Full dataframe for the current backtest period.
@ -119,8 +124,10 @@ class CatboostPredictionMultiModel(IFreqaiModel):
pred_df = DataFrame(predictions, columns=dk.label_list) pred_df = DataFrame(predictions, columns=dk.label_list)
for label in dk.label_list: for label in dk.label_list:
pred_df[label] = ((pred_df[label] + 1) * pred_df[label] = (
(dk.data["labels_max"][label] - (pred_df[label] + 1)
dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label] * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)

View File

@ -27,8 +27,9 @@ class LightGBMPredictionModel(IFreqaiModel):
return dataframe return dataframe
def train(self, unfiltered_dataframe: DataFrame, def train(
pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
""" """
Filter the training data and train a model to it. Train makes heavy use of the datahkitchen Filter the training data and train a model to it. Train makes heavy use of the datahkitchen
for storing, saving, loading, and analyzing the data. for storing, saving, loading, and analyzing the data.
@ -39,8 +40,7 @@ class LightGBMPredictionModel(IFreqaiModel):
:model: Trained model which can be used to inference (self.predict) :model: Trained model which can be used to inference (self.predict)
""" """
logger.info('--------------------Starting training ' logger.info("--------------------Starting training " f"{pair} --------------------")
f'{pair} --------------------')
# unfiltered_labels = self.make_labels(unfiltered_dataframe, dk) # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk)
# filter the features requested by user in the configuration file and elegantly handle NaNs # filter the features requested by user in the configuration file and elegantly handle NaNs
@ -60,13 +60,14 @@ class LightGBMPredictionModel(IFreqaiModel):
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
self.data_cleaning_train(dk) self.data_cleaning_train(dk)
logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}' logger.info(
' features') f'Training model on {len(dk.data_dictionary["train_features"].columns)}' " features"
)
logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points')
model = self.fit(data_dictionary) model = self.fit(data_dictionary)
logger.info(f'--------------------done training {pair}--------------------') logger.info(f"--------------------done training {pair}--------------------")
return model return model
@ -89,8 +90,9 @@ class LightGBMPredictionModel(IFreqaiModel):
return model return model
def predict(self, unfiltered_dataframe: DataFrame, def predict(
dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen
) -> Tuple[DataFrame, DataFrame]:
""" """
Filter the prediction features data and predict with it. Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period. :param: unfiltered_dataframe: Full dataframe for the current backtest period.
@ -116,8 +118,10 @@ class LightGBMPredictionModel(IFreqaiModel):
pred_df = DataFrame(predictions, columns=dk.label_list) pred_df = DataFrame(predictions, columns=dk.label_list)
for label in dk.label_list: for label in dk.label_list:
pred_df[label] = ((pred_df[label] + 1) * pred_df[label] = (
(dk.data["labels_max"][label] - (pred_df[label] + 1)
dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label] * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
/ 2
) + dk.data["labels_min"][label]
return (pred_df, dk.do_predict) return (pred_df, dk.do_predict)

View File

@ -120,9 +120,7 @@ class FreqaiExampleStrategy(IStrategy):
informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"] informative["close"] / informative[f"{coin}bb_lowerband-period_{t}"]
) )
informative[f"%-{coin}roc-period_{t}"] = ta.ROC( informative[f"%-{coin}roc-period_{t}"] = ta.ROC(informative, timeperiod=t)
informative, timeperiod=t
)
macd = ta.MACD(informative, timeperiod=t) macd = ta.MACD(informative, timeperiod=t)
informative[f"%-{coin}macd-period_{t}"] = macd["macd"] informative[f"%-{coin}macd-period_{t}"] = macd["macd"]
@ -152,17 +150,17 @@ class FreqaiExampleStrategy(IStrategy):
# Add generalized indicators here (because in live, it will call this # Add generalized indicators here (because in live, it will call this
# function to populate indicators during training). Notice how we ensure not to # function to populate indicators during training). Notice how we ensure not to
# add them multiple times # add them multiple times
if pair == self.freqai_info['corr_pairlist'][0] and tf == self.timeframe: if pair == self.freqai_info["corr_pairlist"][0] and tf == self.timeframe:
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
# user adds targets here by prepending them with &- (see convention below) # user adds targets here by prepending them with &- (see convention below)
# If user wishes to use multiple targets, a multioutput prediction model # If user wishes to use multiple targets, a multioutput prediction model
# needs to be used such as templates/CatboostPredictionMultiModel.py # needs to be used such as templates/CatboostPredictionMultiModel.py
df['&-s_close'] = ( df["&-s_close"] = (
df["close"] df["close"]
.shift(-self.freqai_info['feature_parameters']["period"]) .shift(-self.freqai_info["feature_parameters"]["period"])
.rolling(self.freqai_info['feature_parameters']["period"]) .rolling(self.freqai_info["feature_parameters"]["period"])
.mean() .mean()
/ df["close"] / df["close"]
- 1 - 1
@ -231,19 +229,20 @@ class FreqaiExampleStrategy(IStrategy):
def get_ticker_indicator(self): def get_ticker_indicator(self):
return int(self.config["timeframe"][:-1]) return int(self.config["timeframe"][:-1])
def custom_exit(self, pair: str, trade: Trade, current_time, current_rate, def custom_exit(
current_profit, **kwargs): self, pair: str, trade: Trade, current_time, current_rate, current_profit, **kwargs
):
dataframe, _ = self.dp.get_analyzed_dataframe(pair=pair, timeframe=self.timeframe) dataframe, _ = self.dp.get_analyzed_dataframe(pair=pair, timeframe=self.timeframe)
trade_date = timeframe_to_prev_date(self.config['timeframe'], trade.open_date_utc) trade_date = timeframe_to_prev_date(self.config["timeframe"], trade.open_date_utc)
trade_candle = dataframe.loc[(dataframe['date'] == trade_date)] trade_candle = dataframe.loc[(dataframe["date"] == trade_date)]
if trade_candle.empty: if trade_candle.empty:
return None return None
trade_candle = trade_candle.squeeze() trade_candle = trade_candle.squeeze()
follow_mode = self.config.get('freqai', {}).get('follow_mode', False) follow_mode = self.config.get("freqai", {}).get("follow_mode", False)
if not follow_mode: if not follow_mode:
pair_dict = self.model.bridge.data_drawer.pair_dict pair_dict = self.model.bridge.data_drawer.pair_dict
@ -252,30 +251,33 @@ class FreqaiExampleStrategy(IStrategy):
entry_tag = trade.enter_tag entry_tag = trade.enter_tag
if ('prediction' + entry_tag not in pair_dict[pair] or if (
pair_dict[pair]['prediction' + entry_tag] > 0): "prediction" + entry_tag not in pair_dict[pair]
or pair_dict[pair]["prediction" + entry_tag] > 0
):
with self.model.bridge.lock: with self.model.bridge.lock:
pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['&-s_close']) pair_dict[pair]["prediction" + entry_tag] = abs(trade_candle["&-s_close"])
if not follow_mode: if not follow_mode:
self.model.bridge.data_drawer.save_drawer_to_disk() self.model.bridge.data_drawer.save_drawer_to_disk()
else: else:
self.model.bridge.data_drawer.save_follower_dict_to_disk() self.model.bridge.data_drawer.save_follower_dict_to_disk()
roi_price = pair_dict[pair]['prediction' + entry_tag] roi_price = pair_dict[pair]["prediction" + entry_tag]
roi_time = self.max_roi_time_long.value roi_time = self.max_roi_time_long.value
roi_decay = roi_price * (1 - ((current_time - trade.open_date_utc).seconds) / roi_decay = roi_price * (
(roi_time * 60)) 1 - ((current_time - trade.open_date_utc).seconds) / (roi_time * 60)
)
if roi_decay < 0: if roi_decay < 0:
roi_decay = self.linear_roi_offset.value roi_decay = self.linear_roi_offset.value
else: else:
roi_decay += self.linear_roi_offset.value roi_decay += self.linear_roi_offset.value
if current_profit > roi_decay: if current_profit > roi_decay:
return 'roi_custom_win' return "roi_custom_win"
if current_profit < -roi_decay: if current_profit < -roi_decay:
return 'roi_custom_loss' return "roi_custom_loss"
def confirm_trade_exit( def confirm_trade_exit(
self, self,
@ -287,7 +289,7 @@ class FreqaiExampleStrategy(IStrategy):
time_in_force: str, time_in_force: str,
exit_reason: str, exit_reason: str,
current_time, current_time,
**kwargs **kwargs,
) -> bool: ) -> bool:
entry_tag = trade.enter_tag entry_tag = trade.enter_tag
@ -316,7 +318,7 @@ class FreqaiExampleStrategy(IStrategy):
current_time, current_time,
entry_tag, entry_tag,
side: str, side: str,
**kwargs **kwargs,
) -> bool: ) -> bool:
df, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe) df, _ = self.dp.get_analyzed_dataframe(pair, self.timeframe)