diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 17f3fc2ee..42584eae4 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -163,21 +163,30 @@ class FreqaiDataDrawer: # send pair to end of queue self.pair_dict[pair]['priority'] = len(self.pair_dict) - def set_initial_return_values(self, pair: str, dh, dataframe: DataFrame) -> None: + def set_initial_return_values(self, pair: str, dk, pred_df, do_preds) -> None: + """ + Set the initial return values to a persistent dataframe. This avoids needing to repredict on + historical candles, and also stores historical predictions despite retrainings (so stored + predictions are true predictions, not just inferencing on trained data) + """ + self.model_return_values[pair] = pd.DataFrame() + for label in dk.label_list: + self.model_return_values[pair][label] = pred_df[label] + self.model_return_values[pair][f'{label}_mean'] = dk.data['labels_mean'][label] + self.model_return_values[pair][f'{label}_std'] = dk.data['labels_std'][label] - self.model_return_values[pair] = dataframe - self.model_return_values[pair]['target_mean'] = dh.data['target_mean'] - self.model_return_values[pair]['target_std'] = dh.data['target_std'] if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: - self.model_return_values[pair]['DI_values'] = dh.DI_values + self.model_return_values[pair]['DI_values'] = dk.DI_values + + self.model_return_values[pair]['do_predict'] = do_preds def append_model_predictions(self, pair: str, predictions, do_preds, - target_mean, target_std, dh, len_df) -> None: + dk, len_df) -> None: # strat seems to feed us variable sized dataframes - and since we are trying to build our # own return array in the same shape, we need to figure out how the size has changed # and adapt our stored/returned info accordingly. - length_difference = len(self.model_return_values[pair]['prediction']) - len_df + length_difference = len(self.model_return_values[pair]) - len_df i = 0 if length_difference == 0: @@ -185,30 +194,56 @@ class FreqaiDataDrawer: elif length_difference > 0: i = length_difference + 1 - df = self.model_return_values[pair].shift(-i) + df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i) - df['prediction'].iloc[-1] = predictions[-1] + for label in dk.label_list: + df[label].iloc[-1] = predictions[label].iloc[-1] + df[f"{label}_mean"].iloc[-1] = dk.data['labels_mean'][label] + df[f"{label}_std"].iloc[-1] = dk.data['labels_std'][label] + # df['prediction'].iloc[-1] = predictions[-1] df['do_predict'].iloc[-1] = do_preds[-1] - df['target_mean'].iloc[-1] = target_mean - df['target_std'].iloc[-1] = target_std + if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: - df['DI_values'].iloc[-1] = dh.DI_values[-1] + df['DI_values'].iloc[-1] = dk.DI_values[-1] if length_difference < 0: prepend_df = pd.DataFrame(np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns) df = pd.concat([prepend_df, df], axis=0) - def return_null_values_to_strategy(self, dataframe: DataFrame, dh) -> None: + def attach_return_values_to_return_dataframe(self, pair: str, dataframe) -> DataFrame: + """ + Attach the return values to the strat dataframe + :params: + dataframe: DataFrame = strat dataframe + :returns: + dataframe: DataFrame = strat dataframe with return values attached + """ + df = self.model_return_values[pair] + to_keep = [col for col in dataframe.columns if not col.startswith('&')] + dataframe = pd.concat([dataframe[to_keep], df], axis=1) + return dataframe - dataframe['prediction'] = 0 + def return_null_values_to_strategy(self, dataframe: DataFrame, dk) -> None: + """ + Build 0 filled dataframe to return to strategy + """ + + dk.find_features(dataframe) + + for label in dk.label_list: + dataframe[label] = 0 + dataframe[f"{label}_mean"] = 0 + dataframe[f"{label}_std"] = 0 + + # dataframe['prediction'] = 0 dataframe['do_predict'] = 0 - dataframe['target_mean'] = 0 - dataframe['target_std'] = 0 if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: dataframe['DI_value'] = 0 + dk.return_dataframe = dataframe + def purge_old_models(self) -> None: model_folders = [x for x in self.full_path.iterdir() if x.is_dir()] @@ -257,7 +292,7 @@ class FreqaiDataDrawer: # with open(self.full_path / str('model_return_values.json'), "w") as fp: # json.dump(self.model_return_values, fp, default=self.np_encoder) - # def load_model_return_values_from_disk(self, dh: FreqaiDataKitchen) -> FreqaiDataKitchen: + # def load_model_return_values_from_disk(self, dk: FreqaiDataKitchen) -> FreqaiDataKitchen: # exists = Path(self.full_path / str('model_return_values.json')).resolve().exists() # if exists: # with open(self.full_path / str('model_return_values.json'), "r") as fp: @@ -268,4 +303,4 @@ class FreqaiDataDrawer: # logger.warning(f'Follower could not find pair_dictionary at {self.full_path} ' # 'sending null values back to strategy') - # return exists, dh + # return exists, dk diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 599706636..d11da968f 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -53,6 +53,7 @@ class FreqaiDataKitchen: self.full_target_mean: npt.ArrayLike = np.array([]) self.full_target_std: npt.ArrayLike = np.array([]) self.data_path = Path() + self.label_list: List = [] self.model_filename: str = "" self.live = live self.pair = pair @@ -68,8 +69,8 @@ class FreqaiDataKitchen: config["freqai"]["train_period"], config["freqai"]["backtest_period"], ) - - self.data_drawer = data_drawer + # self.strat_dataframe: DataFrame = strat_dataframe + self.dd = data_drawer def set_paths(self, pair: str, trained_timestamp: int = None,) -> None: """ @@ -88,7 +89,7 @@ class FreqaiDataKitchen: return - def save_data(self, model: Any, coin: str = '', keras_model=False) -> None: + def save_data(self, model: Any, coin: str = '', keras_model=False, label=None) -> None: """ Saves all data associated with a model for a single sub-train time range :params: @@ -103,9 +104,9 @@ class FreqaiDataKitchen: # Save the trained model if not keras_model: - dump(model, save_path / str(self.model_filename + "_model.joblib")) + dump(model, save_path / f"{self.model_filename}_model.joblib") else: - model.save(save_path / str(self.model_filename + "_model.h5")) + model.save(save_path / f"{self.model_filename}_model.h5") if self.svm_model is not None: dump(self.svm_model, save_path / str(self.model_filename + "_svm_model.joblib")) @@ -113,6 +114,7 @@ class FreqaiDataKitchen: self.data["data_path"] = str(self.data_path) self.data["model_filename"] = str(self.model_filename) self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns) + self.data['label_list'] = self.label_list # store the metadata with open(save_path / str(self.model_filename + "_metadata.json"), "w") as fp: json.dump(self.data, fp, default=self.np_encoder) @@ -127,10 +129,10 @@ class FreqaiDataKitchen: str(self.model_filename + "_pca_object.pkl"), "wb")) # if self.live: - self.data_drawer.model_dictionary[self.model_filename] = model - self.data_drawer.pair_dict[coin]['model_filename'] = self.model_filename - self.data_drawer.pair_dict[coin]['data_path'] = str(self.data_path) - self.data_drawer.save_drawer_to_disk() + self.dd.model_dictionary[self.model_filename] = model + self.dd.pair_dict[coin]['model_filename'] = self.model_filename + self.dd.pair_dict[coin]['data_path'] = str(self.data_path) + self.dd.save_drawer_to_disk() # TODO add a helper function to let user save/load any data they are custom adding. We # do not want them having to edit the default save/load methods here. Below is an example @@ -154,12 +156,12 @@ class FreqaiDataKitchen: :model: User trained model which can be inferenced for new predictions """ - if not self.data_drawer.pair_dict[coin]['model_filename']: + if not self.dd.pair_dict[coin]['model_filename']: return None if self.live: - self.model_filename = self.data_drawer.pair_dict[coin]['model_filename'] - self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path']) + self.model_filename = self.dd.pair_dict[coin]['model_filename'] + self.data_path = Path(self.dd.pair_dict[coin]['data_path']) if self.freqai_config.get('follow_mode', False): # follower can be on a different system which is rsynced to the leader: self.data_path = Path(self.config["user_data_dir"] / @@ -169,6 +171,7 @@ class FreqaiDataKitchen: with open(self.data_path / str(self.model_filename + "_metadata.json"), "r") as fp: self.data = json.load(fp) self.training_features_list = self.data["training_features_list"] + self.label_list = self.data['label_list'] self.data_dictionary["train_features"] = pd.read_pickle( self.data_path / str(self.model_filename + "_trained_df.pkl") @@ -191,8 +194,8 @@ class FreqaiDataKitchen: # self.model_filename = self.data["model_filename"] # try to access model in memory instead of loading object from disk to save time - if self.live and self.model_filename in self.data_drawer.model_dictionary: - model = self.data_drawer.model_dictionary[self.model_filename] + if self.live and self.model_filename in self.dd.model_dictionary: + model = self.dd.model_dictionary[self.model_filename] elif not keras_model: model = load(self.data_path / str(self.model_filename + "_model.joblib")) else: @@ -265,11 +268,12 @@ class FreqaiDataKitchen: self, unfiltered_dataframe: DataFrame, training_feature_list: List, - labels: DataFrame = pd.DataFrame(), + label_list: List = list(), + # labels: DataFrame = pd.DataFrame(), training_filter: bool = True, ) -> Tuple[DataFrame, DataFrame]: """ - Filter the unfiltered dataframe to extract the user requested features and properly + Filter the unfiltered dataframe to extract the user requested features/labels and properly remove all NaNs. Any row with a NaN is removed from training dataset or replaced with 0s in the prediction dataset. However, prediction dataset do_predict will reflect any row that had a NaN and will shield user from that prediction. @@ -287,6 +291,7 @@ class FreqaiDataKitchen: """ filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1) filtered_dataframe = filtered_dataframe.replace([np.inf, -np.inf], np.nan) + drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs, drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement. if ( @@ -294,10 +299,8 @@ class FreqaiDataKitchen: ): # we don't care about total row number (total no. datapoints) in training, we only care # about removing any row with NaNs # if labels has multiple columns (user wants to train multiple models), we detect here - if labels.shape[1] == 1: - drop_index_labels = pd.isnull(labels) - else: - drop_index_labels = pd.isnull(labels).any(1) + labels = unfiltered_dataframe.filter(label_list, axis=1) + drop_index_labels = pd.isnull(labels).any(1) drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0) filtered_dataframe = filtered_dataframe[ (drop_index == 0) & (drop_index_labels == 0) @@ -333,6 +336,7 @@ class FreqaiDataKitchen: len(self.do_predict) - self.do_predict.sum(), len(filtered_dataframe), ) + labels = [] return filtered_dataframe, labels @@ -388,8 +392,8 @@ class FreqaiDataKitchen: self.data[item + "_max"] = train_max[item] self.data[item + "_min"] = train_min[item] - self.data["labels_max"] = train_labels_max - self.data["labels_min"] = train_labels_min + self.data["labels_max"] = train_labels_max.to_dict() + self.data["labels_min"] = train_labels_min.to_dict() return data_dictionary @@ -618,7 +622,7 @@ class FreqaiDataKitchen: return - def find_features(self, dataframe: DataFrame) -> list: + def find_features(self, dataframe: DataFrame) -> None: """ Find features in the strategy provided dataframe :params: @@ -628,9 +632,13 @@ class FreqaiDataKitchen: """ column_names = dataframe.columns features = [c for c in column_names if '%' in c] + labels = [c for c in column_names if '&' in c] if not features: raise OperationalException("Could not find any features!") - return features + + self.training_features_list = features + self.label_list = labels + # return features, labels def check_if_pred_in_training_spaces(self) -> None: """ @@ -808,26 +816,6 @@ class FreqaiDataKitchen: data_load_timerange.stopts = int(time) retrain = True - # logger.info( - # f'Total data download needed ' - # f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY:.2f}' - # ' days') - # logger.info(f'Total training timerange ' - # f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} ' - # ' days') - - # if retrain: - # coin, _ = metadata['pair'].split("/") - # # set the new data_path - # self.data_path = Path(self.full_path / str("sub-train" + "-" + - # str(int(trained_timerange.stopts)))) - - # self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts)) - # # this is not persistent at the moment TODO - # self.freqai_config['live_trained_timerange'] = str(int(trained_timerange.stopts)) - # # enables persistence, but not fully implemented into save/load data yer - # self.data['live_trained_timerange'] = str(int(trained_timerange.stopts)) - return retrain, trained_timerange, data_load_timerange def set_new_model_names(self, pair: str, trained_timerange: TimeRange): @@ -896,8 +884,8 @@ class FreqaiDataKitchen: dataframe: DataFrame = strategy provided dataframe """ - with self.data_drawer.history_lock: - history_data = self.data_drawer.historic_data + with self.dd.history_lock: + history_data = self.dd.historic_data for pair in self.all_pairs: for tf in self.freqai_config.get('timeframes'): @@ -939,7 +927,7 @@ class FreqaiDataKitchen: timerange: TimeRange = full timerange required to populate all indicators for training according to user defined train_period """ - history_data = self.data_drawer.historic_data + history_data = self.dd.historic_data for pair in self.all_pairs: if pair not in history_data: @@ -964,10 +952,10 @@ class FreqaiDataKitchen: metadata: dict = strategy furnished pair metadata """ - with self.data_drawer.history_lock: + with self.dd.history_lock: corr_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {} - historic_data = self.data_drawer.historic_data + historic_data = self.dd.historic_data pairs = self.freqai_config.get('corr_pairlist', []) for tf in self.freqai_config.get('timeframes'): @@ -1068,18 +1056,18 @@ class FreqaiDataKitchen: """ import scipy as spy - f = spy.stats.norm.fit(self.data_dictionary["train_labels"]) + self.data['labels_mean'], self.data['labels_std'] = {}, {} + for label in self.label_list: + f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label]) + self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1] # KEEPME incase we want to let user start to grab quantiles. # upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][ # 'target_quantile'], *f) # lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][ # 'target_quantile'], *f) - - self.data["target_mean"], self.data["target_std"] = f[0], f[1] # self.data["upper_quantile"] = upper_q # self.data["lower_quantile"] = lower_q - return def np_encoder(self, object): diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 64a13c802..2b1714761 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -59,9 +59,7 @@ class IFreqaiModel(ABC): self.update_historic_data = 0 self.set_full_path() self.follow_mode = self.freqai_info.get('follow_mode', False) - self.data_drawer = FreqaiDataDrawer(Path(self.full_path), - self.config, - self.follow_mode) + self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) self.lock = threading.Lock() self.follow_mode = self.freqai_info.get('follow_mode', False) self.identifier = self.freqai_info.get('identifier', 'no_id_provided') @@ -91,12 +89,12 @@ class IFreqaiModel(ABC): """ self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE) - self.data_drawer.set_pair_dict_info(metadata) + self.dd.set_pair_dict_info(metadata) if self.live: - self.dh = FreqaiDataKitchen(self.config, self.data_drawer, + self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"]) - dh = self.start_live(dataframe, metadata, strategy, self.dh) + dk = self.start_live(dataframe, metadata, strategy, self.dk) # For backtesting, each pair enters and then gets trained for each window along the # sliding window defined by "train_period" (training window) and "backtest_period" @@ -104,19 +102,19 @@ class IFreqaiModel(ABC): # FreqAI slides the window and sequentially builds the backtesting results before returning # the concatenated results for the full backtesting period back to the strategy. elif not self.follow_mode: - self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"]) - logger.info(f'Training {len(self.dh.training_timeranges)} timeranges') - dh = self.start_backtesting(dataframe, metadata, self.dh) + self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"]) + logger.info(f'Training {len(self.dk.training_timeranges)} timeranges') + dk = self.start_backtesting(dataframe, metadata, self.dk) - dataframe = self.remove_features_from_df(dataframe) - return self.return_values(dataframe, dh) + dataframe = self.remove_features_from_df(dk.return_dataframe) + return self.return_values(dataframe, dk) @threaded def start_scanning(self, strategy: IStrategy) -> None: """ Function designed to constantly scan pairs for retraining on a separate thread (intracandle) to improve model youth. This function is agnostic to data preparation/collection/storage, - it simply trains on what ever data is available in the self.data_drawer. + it simply trains on what ever data is available in the self.dd. :params: strategy: IStrategy = The user defined strategy class """ @@ -124,33 +122,33 @@ class IFreqaiModel(ABC): time.sleep(1) for pair in self.config.get('exchange', {}).get('pair_whitelist'): - (_, trained_timestamp, _, _) = self.data_drawer.get_pair_dict_info(pair) + (_, trained_timestamp, _, _) = self.dd.get_pair_dict_info(pair) - if self.data_drawer.pair_dict[pair]['priority'] != 1: + if self.dd.pair_dict[pair]['priority'] != 1: continue - dh = FreqaiDataKitchen(self.config, self.data_drawer, + dk = FreqaiDataKitchen(self.config, self.dd, self.live, pair) # file_exists = False - dh.set_paths(pair, trained_timestamp) + dk.set_paths(pair, trained_timestamp) # file_exists = self.model_exists(pair, - # dh, + # dk, # trained_timestamp=trained_timestamp, # model_filename=model_filename, # scanning=True) (retrain, new_trained_timerange, - data_load_timerange) = dh.check_if_new_training_required(trained_timestamp) - dh.set_paths(pair, new_trained_timerange.stopts) + data_load_timerange) = dk.check_if_new_training_required(trained_timestamp) + dk.set_paths(pair, new_trained_timerange.stopts) if retrain: # or not file_exists: self.train_model_in_series(new_trained_timerange, pair, - strategy, dh, data_load_timerange) + strategy, dk, data_load_timerange) def start_backtesting(self, dataframe: DataFrame, metadata: dict, - dh: FreqaiDataKitchen) -> FreqaiDataKitchen: + dk: FreqaiDataKitchen) -> FreqaiDataKitchen: """ The main broad execution for backtesting. For backtesting, each pair enters and then gets trained for each window along the sliding window defined by "train_period" (training window) @@ -161,9 +159,9 @@ class IFreqaiModel(ABC): :params: dataframe: DataFrame = strategy passed dataframe metadata: Dict = pair metadata - dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only :returns: - dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only """ # Loop enforcing the sliding window training/backtesting paradigm @@ -172,15 +170,15 @@ class IFreqaiModel(ABC): # following tr_train. Both of these windows slide through the # entire backtest for tr_train, tr_backtest in zip( - dh.training_timeranges, dh.backtesting_timeranges + dk.training_timeranges, dk.backtesting_timeranges ): - (_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata['pair']) + (_, _, _, _) = self.dd.get_pair_dict_info(metadata['pair']) gc.collect() - dh.data = {} # clean the pair specific data between training window sliding + dk.data = {} # clean the pair specific data between training window sliding self.training_timerange = tr_train # self.training_timerange_timerange = tr_train - dataframe_train = dh.slice_dataframe(tr_train, dataframe) - dataframe_backtest = dh.slice_dataframe(tr_backtest, dataframe) + dataframe_train = dk.slice_dataframe(tr_train, dataframe) + dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) trained_timestamp = tr_train # TimeRange.parse_timerange(tr_train) tr_train_startts_str = datetime.datetime.utcfromtimestamp( @@ -190,33 +188,33 @@ class IFreqaiModel(ABC): logger.info("Training %s", metadata["pair"]) logger.info(f'Training {tr_train_startts_str} to {tr_train_stopts_str}') - dh.data_path = Path(dh.full_path / + dk.data_path = Path(dk.full_path / str("sub-train" + "-" + metadata['pair'].split("/")[0] + str(int(trained_timestamp.stopts)))) - if not self.model_exists(metadata["pair"], dh, + if not self.model_exists(metadata["pair"], dk, trained_timestamp=trained_timestamp.stopts): - self.model = self.train(dataframe_train, metadata['pair'], dh) - self.data_drawer.pair_dict[metadata['pair']][ + self.model = self.train(dataframe_train, metadata['pair'], dk) + self.dd.pair_dict[metadata['pair']][ 'trained_timestamp'] = trained_timestamp.stopts - dh.set_new_model_names(metadata['pair'], trained_timestamp) - dh.save_data(self.model, metadata['pair'], keras=self.keras) + dk.set_new_model_names(metadata['pair'], trained_timestamp) + dk.save_data(self.model, metadata['pair'], keras_model=self.keras) else: - self.model = dh.load_data(metadata['pair'], keras=self.keras) + self.model = dk.load_data(metadata['pair'], keras_model=self.keras) - self.check_if_feature_list_matches_strategy(dataframe_train, dh) + self.check_if_feature_list_matches_strategy(dataframe_train, dk) - preds, do_preds = self.predict(dataframe_backtest, dh) + preds, do_preds = self.predict(dataframe_backtest, dk) - dh.append_predictions(preds, do_preds, len(dataframe_backtest)) - print('predictions', len(dh.full_predictions), - 'do_predict', len(dh.full_do_predict)) + dk.append_predictions(preds, do_preds, len(dataframe_backtest)) + print('predictions', len(dk.full_predictions), + 'do_predict', len(dk.full_do_predict)) - dh.fill_predictions(len(dataframe)) + dk.fill_predictions(len(dataframe)) - return dh + return dk def start_live(self, dataframe: DataFrame, metadata: dict, - strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen: + strategy: IStrategy, dk: FreqaiDataKitchen) -> FreqaiDataKitchen: """ The main broad execution for dry/live. This function will check if a retraining should be performed, and if so, retrain and reset the model. @@ -224,30 +222,30 @@ class IFreqaiModel(ABC): dataframe: DataFrame = strategy passed dataframe metadata: Dict = pair metadata strategy: IStrategy = currently employed strategy - dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only :returns: - dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only """ # update follower if self.follow_mode: - self.data_drawer.update_follower_metadata() + self.dd.update_follower_metadata() # get the model metadata associated with the current pair (_, trained_timestamp, _, - return_null_array) = self.data_drawer.get_pair_dict_info(metadata['pair']) + return_null_array) = self.dd.get_pair_dict_info(metadata['pair']) # if the metadata doesnt exist, the follower returns null arrays to strategy if self.follow_mode and return_null_array: logger.info('Returning null array from follower to strategy') - self.data_drawer.return_null_values_to_strategy(dataframe, dh) - return dh + self.dd.return_null_values_to_strategy(dataframe, dk) + return dk # append the historic data once per round - if self.data_drawer.historic_data: - dh.update_historic_data(strategy) + if self.dd.historic_data: + dk.update_historic_data(strategy) logger.debug(f'Updating historic data on pair {metadata["pair"]}') # if trainable, check if model needs training, if so compute new timerange, @@ -257,95 +255,100 @@ class IFreqaiModel(ABC): (_, new_trained_timerange, - data_load_timerange) = dh.check_if_new_training_required(trained_timestamp) - dh.set_paths(metadata['pair'], new_trained_timerange.stopts) + data_load_timerange) = dk.check_if_new_training_required(trained_timestamp) + dk.set_paths(metadata['pair'], new_trained_timerange.stopts) # download candle history if it is not already in memory - if not self.data_drawer.historic_data: + if not self.dd.historic_data: logger.info('Downloading all training data for all pairs in whitelist and ' 'corr_pairlist, this may take a while if you do not have the ' 'data saved') - dh.download_all_data_for_training(data_load_timerange) - dh.load_all_pair_histories(data_load_timerange) + dk.download_all_data_for_training(data_load_timerange) + dk.load_all_pair_histories(data_load_timerange) if not self.scanning: self.scanning = True self.start_scanning(strategy) elif self.follow_mode: - dh.set_paths(metadata['pair'], trained_timestamp) + dk.set_paths(metadata['pair'], trained_timestamp) logger.info('FreqAI instance set to follow_mode, finding existing pair' f'using { self.identifier }') # load the model and associated data into the data kitchen - self.model = dh.load_data(coin=metadata['pair'], keras=self.keras) + self.model = dk.load_data(coin=metadata['pair'], keras_model=self.keras) if not self.model: logger.warning('No model ready, returning null values to strategy.') - self.data_drawer.return_null_values_to_strategy(dataframe, dh) - return dh + self.dd.return_null_values_to_strategy(dataframe, dk) + return dk # ensure user is feeding the correct indicators to the model - self.check_if_feature_list_matches_strategy(dataframe, dh) + self.check_if_feature_list_matches_strategy(dataframe, dk) - self.build_strategy_return_arrays(dataframe, dh, metadata['pair'], trained_timestamp) + self.build_strategy_return_arrays(dataframe, dk, metadata['pair'], trained_timestamp) - return dh + return dk def build_strategy_return_arrays(self, dataframe: DataFrame, - dh: FreqaiDataKitchen, pair: str, + dk: FreqaiDataKitchen, pair: str, trained_timestamp: int) -> None: # hold the historical predictions in memory so we are sending back # correct array to strategy - if pair not in self.data_drawer.model_return_values: - preds, do_preds = self.predict(dataframe, dh) + if pair not in self.dd.model_return_values: + pred_df, do_preds = self.predict(dataframe, dk) # mypy doesnt like the typing in else statement, so we need to explicitly add to # dataframe separately - dataframe['prediction'], dataframe['do_predict'] = preds, do_preds - # dh.append_predictions(preds, do_preds, len(dataframe)) - # dh.fill_predictions(len(dataframe)) - self.data_drawer.set_initial_return_values(pair, dh, dataframe) + + # for label in dk.label_list: + # dataframe[label] = pred_df[label] + + # dataframe['do_predict'] = do_preds + + # dk.append_predictions(preds, do_preds, len(dataframe)) + # dk.fill_predictions(len(dataframe)) + self.dd.set_initial_return_values(pair, dk, pred_df, do_preds) + dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe) return - elif self.dh.check_if_model_expired(trained_timestamp): - preds, do_preds, dh.DI_values = np.zeros(2), np.ones(2) * 2, np.zeros(2) + elif self.dk.check_if_model_expired(trained_timestamp): + pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list) + do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2) logger.warning('Model expired, returning null values to strategy. Strategy ' 'construction should take care to consider this event with ' 'prediction == 0 and do_predict == 2') else: # Only feed in the most recent candle for prediction in live scenario - preds, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dh, first=False) + pred_df, do_preds = self.predict(dataframe.iloc[-self.CONV_WIDTH:], dk, first=False) + + self.dd.append_model_predictions(pair, pred_df, do_preds, dk, len(dataframe)) + dk.return_dataframe = self.dd.attach_return_values_to_return_dataframe(pair, dataframe) - self.data_drawer.append_model_predictions(pair, preds, do_preds, - dh.data["target_mean"], - dh.data["target_std"], - dh, - len(dataframe)) return def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, - dh: FreqaiDataKitchen) -> None: + dk: FreqaiDataKitchen) -> None: """ Ensure user is passing the proper feature set if they are reusing an `identifier` pointing to a folder holding existing models. :params: dataframe: DataFrame = strategy provided dataframe - dh: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop + dk: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop """ - strategy_provided_features = dh.find_features(dataframe) - if 'training_features_list_raw' in dh.data: - feature_list = dh.data['training_features_list_raw'] + dk.find_features(dataframe) + if 'training_features_list_raw' in dk.data: + feature_list = dk.data['training_features_list_raw'] else: - feature_list = dh.training_features_list - if strategy_provided_features != feature_list: + feature_list = dk.training_features_list + if dk.training_features_list != feature_list: raise OperationalException("Trying to access pretrained model with `identifier` " "but found different features furnished by current strategy." "Change `identifer` to train from scratch, or ensure the" "strategy is furnishing the same features as the pretrained" "model") - def data_cleaning_train(self, dh: FreqaiDataKitchen) -> None: + def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None: """ Base data cleaning method for train Any function inside this method should drop training data points from the filtered_dataframe @@ -354,23 +357,23 @@ class IFreqaiModel(ABC): """ if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): - dh.principal_component_analysis() + dk.principal_component_analysis() if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'): - dh.use_SVM_to_remove_outliers(predict=False) + dk.use_SVM_to_remove_outliers(predict=False) if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'): - dh.data["avg_mean_dist"] = dh.compute_distances() + dk.data["avg_mean_dist"] = dk.compute_distances() # if self.feature_parameters["determine_statistical_distributions"]: - # dh.determine_statistical_distributions() + # dk.determine_statistical_distributions() # if self.feature_parameters["remove_outliers"]: - # dh.remove_outliers(predict=False) + # dk.remove_outliers(predict=False) - def data_cleaning_predict(self, dh: FreqaiDataKitchen, dataframe: DataFrame) -> None: + def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None: """ Base data cleaning method for predict. - These functions each modify dh.do_predict, which is a dataframe with equal length + These functions each modify dk.do_predict, which is a dataframe with equal length to the number of candles coming from and returning to the strategy. Inside do_predict, 1 allows prediction and < 0 signals to the strategy that the model is not confident in the prediction. @@ -379,20 +382,20 @@ class IFreqaiModel(ABC): for buy signals. """ if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): - dh.pca_transform(dataframe) + dk.pca_transform(dataframe) if self.freqai_info.get('feature_parameters', {}).get('use_SVM_to_remove_outliers'): - dh.use_SVM_to_remove_outliers(predict=True) + dk.use_SVM_to_remove_outliers(predict=True) if self.freqai_info.get('feature_parameters', {}).get('DI_threshold'): - dh.check_if_pred_in_training_spaces() + dk.check_if_pred_in_training_spaces() # if self.feature_parameters["determine_statistical_distributions"]: - # dh.determine_statistical_distributions() + # dk.determine_statistical_distributions() # if self.feature_parameters["remove_outliers"]: - # dh.remove_outliers(predict=True) # creates dropped index + # dk.remove_outliers(predict=True) # creates dropped index - def model_exists(self, pair: str, dh: FreqaiDataKitchen, trained_timestamp: int = None, + def model_exists(self, pair: str, dk: FreqaiDataKitchen, trained_timestamp: int = None, model_filename: str = '', scanning: bool = False) -> bool: """ Given a pair and path, check if a model already exists @@ -402,14 +405,14 @@ class IFreqaiModel(ABC): coin, _ = pair.split("/") if not self.live: - dh.model_filename = model_filename = "cb_" + coin.lower() + "_" + str(trained_timestamp) + dk.model_filename = model_filename = "cb_" + coin.lower() + "_" + str(trained_timestamp) - path_to_modelfile = Path(dh.data_path / str(model_filename + "_model.joblib")) + path_to_modelfile = Path(dk.data_path / str(model_filename + "_model.joblib")) file_exists = path_to_modelfile.is_file() if file_exists and not scanning: - logger.info("Found model at %s", dh.data_path / dh.model_filename) + logger.info("Found model at %s", dk.data_path / dk.model_filename) elif not scanning: - logger.info("Could not find model at %s", dh.data_path / dh.model_filename) + logger.info("Could not find model at %s", dk.data_path / dk.model_filename) return file_exists def set_full_path(self) -> None: @@ -430,7 +433,7 @@ class IFreqaiModel(ABC): return dataframe[to_keep] def train_model_in_series(self, new_trained_timerange: TimeRange, pair: str, - strategy: IStrategy, dh: FreqaiDataKitchen, + strategy: IStrategy, dk: FreqaiDataKitchen, data_load_timerange: TimeRange): """ Retreive data and train model in single threaded mode (only used if model directory is empty @@ -439,41 +442,43 @@ class IFreqaiModel(ABC): new_trained_timerange: TimeRange = the timerange to train the model on metadata: dict = strategy provided metadata strategy: IStrategy = user defined strategy object - dh: FreqaiDataKitchen = non-persistent data container for current coin/loop + dk: FreqaiDataKitchen = non-persistent data container for current coin/loop data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators (larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs) """ - corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, + corr_dataframes, base_dataframes = dk.get_base_and_corr_dataframes(data_load_timerange, pair) - unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy, + unfiltered_dataframe = dk.use_strategy_to_populate_indicators(strategy, corr_dataframes, base_dataframes, pair) - unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe) + unfiltered_dataframe = dk.slice_dataframe(new_trained_timerange, unfiltered_dataframe) - model = self.train(unfiltered_dataframe, pair, dh) + # find the features indicated by strategy and store in datakitchen + dk.find_features(unfiltered_dataframe) - self.data_drawer.pair_dict[pair][ - 'trained_timestamp'] = new_trained_timerange.stopts - dh.set_new_model_names(pair, new_trained_timerange) - self.data_drawer.pair_dict[pair]['first'] = False - if self.data_drawer.pair_dict[pair]['priority'] == 1 and self.scanning: + model = self.train(unfiltered_dataframe, pair, dk) + + self.dd.pair_dict[pair]['trained_timestamp'] = new_trained_timerange.stopts + dk.set_new_model_names(pair, new_trained_timerange) + self.dd.pair_dict[pair]['first'] = False + if self.dd.pair_dict[pair]['priority'] == 1 and self.scanning: with self.lock: - self.data_drawer.pair_to_end_of_training_queue(pair) - dh.save_data(model, coin=pair, keras=self.keras) + self.dd.pair_to_end_of_training_queue(pair) + dk.save_data(model, coin=pair, keras_model=self.keras) if self.freqai_info.get('purge_old_models', False): - self.data_drawer.purge_old_models() + self.dd.purge_old_models() # self.retrain = False # Following methods which are overridden by user made prediction models. # See freqai/prediction_models/CatboostPredictionModlel.py for an example. @abstractmethod - def train(self, unfiltered_dataframe: DataFrame, pair: str, dh: FreqaiDataKitchen) -> Any: + def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any: """ Filter the training data and train a model to it. Train makes heavy use of the datahandler for storing, saving, loading, and analyzing the data. @@ -499,37 +504,36 @@ class IFreqaiModel(ABC): @abstractmethod def predict(self, dataframe: DataFrame, - dh: FreqaiDataKitchen, first: bool = True) -> Tuple[npt.ArrayLike, npt.ArrayLike]: + dk: FreqaiDataKitchen, first: bool = True) -> Tuple[DataFrame, npt.ArrayLike]: """ Filter the prediction features data and predict with it. :param: unfiltered_dataframe: Full dataframe for the current backtest period. - dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only :return: :predictions: np.array of predictions :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index) """ - @abstractmethod - def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: + def make_labels(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: """ User defines the labels here (target values). :params: dataframe: DataFrame = the full dataframe for the present training period - dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only """ return @abstractmethod - def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: + def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: """ User defines the dataframe to be returned to strategy here. :params: dataframe: DataFrame = the full dataframe for the current prediction (live) or --timerange (backtesting) - dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only :returns: dataframe: DataFrame = dataframe filled with user defined data """ diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index ac66dc704..86bda2aac 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -18,18 +18,16 @@ class CatboostPredictionModel(IFreqaiModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: - - dataframe["prediction"] = dh.full_predictions - dataframe["do_predict"] = dh.full_do_predict - dataframe["target_mean"] = dh.full_target_mean - dataframe["target_std"] = dh.full_target_std - if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: - dataframe["DI"] = dh.full_DI_values + def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: + """ + User uses this function to add any additional return values to the dataframe. + e.g. + dataframe['volatility'] = dk.volatility_values + """ return dataframe - def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: + def make_labels(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: """ User defines the labels here (target values). :params: @@ -48,7 +46,7 @@ class CatboostPredictionModel(IFreqaiModel): return dataframe["s"] def train(self, unfiltered_dataframe: DataFrame, - pair: str, dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: + pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: """ Filter the training data and train a model to it. Train makes heavy use of the datahkitchen for storing, saving, loading, and analyzing the data. @@ -62,27 +60,25 @@ class CatboostPredictionModel(IFreqaiModel): logger.info('--------------------Starting training ' f'{pair} --------------------') - # create the full feature list based on user config info - dh.training_features_list = dh.find_features(unfiltered_dataframe) - unfiltered_labels = self.make_labels(unfiltered_dataframe, dh) + # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk) # filter the features requested by user in the configuration file and elegantly handle NaNs - features_filtered, labels_filtered = dh.filter_features( + features_filtered, labels_filtered = dk.filter_features( unfiltered_dataframe, - dh.training_features_list, - unfiltered_labels, + dk.training_features_list, + dk.label_list, training_filter=True, ) # split data into train/test data. - data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered) - dh.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy + data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) + dk.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy # normalize all data based on train_dataset only - data_dictionary = dh.normalize_data(data_dictionary) + data_dictionary = dk.normalize_data(data_dictionary) # optional additional data cleaning/analysis - self.data_cleaning_train(dh) + self.data_cleaning_train(dk) - logger.info(f'Training model on {len(dh.data_dictionary["train_features"].columns)}' + logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}' ' features') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') @@ -121,34 +117,32 @@ class CatboostPredictionModel(IFreqaiModel): return model def predict(self, unfiltered_dataframe: DataFrame, - dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: + dk: FreqaiDataKitchen, first: bool = False) -> Tuple[DataFrame, DataFrame]: """ Filter the prediction features data and predict with it. :param: unfiltered_dataframe: Full dataframe for the current backtest period. :return: - :predictions: np.array of predictions + :pred_df: dataframe containing the predictions :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove data (NaNs) or felt uncertain about data (PCA and DI index) """ - # logger.info("--------------------Starting prediction--------------------") - - original_feature_list = dh.find_features(unfiltered_dataframe) - filtered_dataframe, _ = dh.filter_features( - unfiltered_dataframe, original_feature_list, training_filter=False + dk.find_features(unfiltered_dataframe) + filtered_dataframe, _ = dk.filter_features( + unfiltered_dataframe, dk.training_features_list, training_filter=False ) - filtered_dataframe = dh.normalize_data_from_metadata(filtered_dataframe) - dh.data_dictionary["prediction_features"] = filtered_dataframe + filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) + dk.data_dictionary["prediction_features"] = filtered_dataframe # optional additional data cleaning/analysis - self.data_cleaning_predict(dh, filtered_dataframe) + self.data_cleaning_predict(dk, filtered_dataframe) - predictions = self.model.predict(dh.data_dictionary["prediction_features"]) + predictions = self.model.predict(dk.data_dictionary["prediction_features"]) + pred_df = DataFrame(predictions, columns=dk.label_list) - # compute the non-normalized predictions - dh.predictions = (predictions + 1) * (dh.data["labels_max"] - - dh.data["labels_min"]) / 2 + dh.data["labels_min"] + for label in dk.label_list: + pred_df[label] = ((pred_df[label] + 1) * + (dk.data["labels_max"][label] - + dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label] - # logger.info("--------------------Finished prediction--------------------") - - return (dh.predictions, dh.do_predict) + return (pred_df, dk.do_predict) diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py new file mode 100644 index 000000000..02d4af1cf --- /dev/null +++ b/freqtrade/freqai/prediction_models/CatboostPredictionMultiModel.py @@ -0,0 +1,126 @@ +import logging +from typing import Any, Dict, Tuple + +from catboost import CatBoostRegressor # , Pool +from pandas import DataFrame +from sklearn.multioutput import MultiOutputRegressor + +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.freqai_interface import IFreqaiModel + + +logger = logging.getLogger(__name__) + + +class CatboostPredictionMultiModel(IFreqaiModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: + """ + User uses this function to add any additional return values to the dataframe. + e.g. + dataframe['volatility'] = dk.volatility_values + """ + + return dataframe + + def train(self, unfiltered_dataframe: DataFrame, + pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: + """ + Filter the training data and train a model to it. Train makes heavy use of the datahkitchen + for storing, saving, loading, and analyzing the data. + :params: + :unfiltered_dataframe: Full dataframe for the current training period + :metadata: pair metadata from strategy. + :returns: + :model: Trained model which can be used to inference (self.predict) + """ + + logger.info('--------------------Starting training ' + f'{pair} --------------------') + + # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk) + # filter the features requested by user in the configuration file and elegantly handle NaNs + features_filtered, labels_filtered = dk.filter_features( + unfiltered_dataframe, + dk.training_features_list, + dk.label_list, + training_filter=True, + ) + + # split data into train/test data. + data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) + dk.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy + # normalize all data based on train_dataset only + data_dictionary = dk.normalize_data(data_dictionary) + + # optional additional data cleaning/analysis + self.data_cleaning_train(dk) + + logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}' + ' features') + logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') + + model = self.fit(data_dictionary) + + logger.info(f'--------------------done training {pair}--------------------') + + return model + + def fit(self, data_dictionary: Dict) -> Any: + """ + User sets up the training and test data to fit their desired model here + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + cbr = CatBoostRegressor( + allow_writing_files=False, gpu_ram_part=0.5, + verbose=100, early_stopping_rounds=400, **self.model_training_parameters + ) + + X = data_dictionary["train_features"] + y = data_dictionary["train_labels"] + # eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"]) + sample_weight = data_dictionary['train_weights'] + + model = MultiOutputRegressor(estimator=cbr) + model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set) + + return model + + def predict(self, unfiltered_dataframe: DataFrame, + dk: FreqaiDataKitchen, first: bool = False) -> Tuple[DataFrame, DataFrame]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :return: + :pred_df: dataframe containing the predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + dk.find_features(unfiltered_dataframe) + filtered_dataframe, _ = dk.filter_features( + unfiltered_dataframe, dk.training_features_list, training_filter=False + ) + filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) + dk.data_dictionary["prediction_features"] = filtered_dataframe + + # optional additional data cleaning/analysis + self.data_cleaning_predict(dk, filtered_dataframe) + + predictions = self.model.predict(dk.data_dictionary["prediction_features"]) + pred_df = DataFrame(predictions, columns=dk.label_list) + + for label in dk.label_list: + pred_df[label] = ((pred_df[label] + 1) * + (dk.data["labels_max"][label] - + dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label] + + return (pred_df, dk.do_predict) diff --git a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py index 04bba2a90..7fe81d7c2 100644 --- a/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py +++ b/freqtrade/freqai/prediction_models/LightGBMPredictionModel.py @@ -18,37 +18,17 @@ class LightGBMPredictionModel(IFreqaiModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: - - dataframe["prediction"] = dh.full_predictions - dataframe["do_predict"] = dh.full_do_predict - dataframe["target_mean"] = dh.full_target_mean - dataframe["target_std"] = dh.full_target_std - if self.freqai_info.get('feature_parameters', {}).get('DI_threshold', 0) > 0: - dataframe["DI"] = dh.full_DI_values + def return_values(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> DataFrame: + """ + User uses this function to add any additional return values to the dataframe. + e.g. + dataframe['volatility'] = dk.volatility_values + """ return dataframe - def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: - """ - User defines the labels here (target values). - :params: - :dataframe: the full dataframe for the present training period - """ - - dataframe["s"] = ( - dataframe["close"] - .shift(-self.feature_parameters["period"]) - .rolling(self.feature_parameters["period"]) - .mean() - / dataframe["close"] - - 1 - ) - - return dataframe["s"] - def train(self, unfiltered_dataframe: DataFrame, - pair: str, dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: + pair: str, dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: """ Filter the training data and train a model to it. Train makes heavy use of the datahkitchen for storing, saving, loading, and analyzing the data. @@ -62,27 +42,25 @@ class LightGBMPredictionModel(IFreqaiModel): logger.info('--------------------Starting training ' f'{pair} --------------------') - # create the full feature list based on user config info - dh.training_features_list = dh.find_features(unfiltered_dataframe) - unfiltered_labels = self.make_labels(unfiltered_dataframe, dh) + # unfiltered_labels = self.make_labels(unfiltered_dataframe, dk) # filter the features requested by user in the configuration file and elegantly handle NaNs - features_filtered, labels_filtered = dh.filter_features( + features_filtered, labels_filtered = dk.filter_features( unfiltered_dataframe, - dh.training_features_list, - unfiltered_labels, + dk.training_features_list, + dk.label_list, training_filter=True, ) # split data into train/test data. - data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered) - dh.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy + data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered) + dk.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy # normalize all data based on train_dataset only - data_dictionary = dh.normalize_data(data_dictionary) + data_dictionary = dk.normalize_data(data_dictionary) # optional additional data cleaning/analysis - self.data_cleaning_train(dh) + self.data_cleaning_train(dk) - logger.info(f'Training model on {len(dh.data_dictionary["train_features"].columns)}' + logger.info(f'Training model on {len(dk.data_dictionary["train_features"].columns)}' ' features') logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') @@ -112,7 +90,7 @@ class LightGBMPredictionModel(IFreqaiModel): return model def predict(self, unfiltered_dataframe: DataFrame, - dh: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: + dk: FreqaiDataKitchen) -> Tuple[DataFrame, DataFrame]: """ Filter the prediction features data and predict with it. :param: unfiltered_dataframe: Full dataframe for the current backtest period. @@ -124,22 +102,22 @@ class LightGBMPredictionModel(IFreqaiModel): # logger.info("--------------------Starting prediction--------------------") - original_feature_list = dh.find_features(unfiltered_dataframe) - filtered_dataframe, _ = dh.filter_features( + original_feature_list = dk.find_features(unfiltered_dataframe) + filtered_dataframe, _ = dk.filter_features( unfiltered_dataframe, original_feature_list, training_filter=False ) - filtered_dataframe = dh.normalize_data_from_metadata(filtered_dataframe) - dh.data_dictionary["prediction_features"] = filtered_dataframe + filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe) + dk.data_dictionary["prediction_features"] = filtered_dataframe # optional additional data cleaning/analysis - self.data_cleaning_predict(dh, filtered_dataframe) + self.data_cleaning_predict(dk, filtered_dataframe) - predictions = self.model.predict(dh.data_dictionary["prediction_features"]) + predictions = self.model.predict(dk.data_dictionary["prediction_features"]) + pred_df = DataFrame(predictions, columns=dk.label_list) - # compute the non-normalized predictions - dh.predictions = (predictions + 1) * (dh.data["labels_max"] - - dh.data["labels_min"]) / 2 + dh.data["labels_min"] + for label in dk.label_list: + pred_df[label] = ((pred_df[label] + 1) * + (dk.data["labels_max"][label] - + dk.data["labels_min"][label]) / 2) + dk.data["labels_min"][label] - # logger.info("--------------------Finished prediction--------------------") - - return (dh.predictions, dh.do_predict) + return (pred_df, dk.do_predict) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 95fdba316..f70277c77 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -156,6 +156,18 @@ class FreqaiExampleStrategy(IStrategy): df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7 df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25 + # user adds targets here by prepending them with &- (see convention below) + # If user wishes to use multiple targets, a multioutput prediction model + # needs to be used such as templates/CatboostPredictionMultiModel.py + df['&-s_close'] = ( + df["close"] + .shift(-self.freqai_info['feature_parameters']["period"]) + .rolling(self.freqai_info['feature_parameters']["period"]) + .mean() + / df["close"] + - 1 + ) + return df def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: @@ -183,20 +195,20 @@ class FreqaiExampleStrategy(IStrategy): # each training period. dataframe = self.model.bridge.start(dataframe, metadata, self) - dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.25 - dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.25 + dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25 + dataframe["sell_roi"] = dataframe["&-s_close_mean"] - dataframe["&-s_close_std"] * 1.25 return dataframe def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: - enter_long_conditions = [df["do_predict"] == 1, df["prediction"] > df["target_roi"]] + enter_long_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"]] if enter_long_conditions: df.loc[ reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"] ] = (1, "long") - enter_short_conditions = [df["do_predict"] == 1, df["prediction"] < df["sell_roi"]] + enter_short_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"]] if enter_short_conditions: df.loc[ @@ -206,11 +218,11 @@ class FreqaiExampleStrategy(IStrategy): return df def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame: - exit_long_conditions = [df["do_predict"] == 1, df["prediction"] < df["sell_roi"] * 0.25] + exit_long_conditions = [df["do_predict"] == 1, df["&-s_close"] < df["sell_roi"] * 0.25] if exit_long_conditions: df.loc[reduce(lambda x, y: x & y, exit_long_conditions), "exit_long"] = 1 - exit_short_conditions = [df["do_predict"] == 1, df["prediction"] > df["target_roi"] * 0.25] + exit_short_conditions = [df["do_predict"] == 1, df["&-s_close"] > df["target_roi"] * 0.25] if exit_short_conditions: df.loc[reduce(lambda x, y: x & y, exit_short_conditions), "exit_short"] = 1 @@ -243,7 +255,7 @@ class FreqaiExampleStrategy(IStrategy): if ('prediction' + entry_tag not in pair_dict[pair] or pair_dict[pair]['prediction' + entry_tag] > 0): with self.model.bridge.lock: - pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction']) + pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['&-s_close']) if not follow_mode: self.model.bridge.data_drawer.save_drawer_to_disk() else: