From 16b4a5b71ff140f5de31e5d5572f1f193457cf6b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 3 Jun 2022 15:19:46 +0200 Subject: [PATCH 01/14] rehaul of backend data management - increasing performance by holding history in memory, reducing load on the ratelimit by only pinging exchange once per candle. Improve code readability. --- freqtrade/freqai/data_drawer.py | 44 +++- freqtrade/freqai/data_kitchen.py | 222 +++++++++++++++--- freqtrade/freqai/freqai_interface.py | 126 +++++++--- .../CatboostPredictionModel.py | 11 + freqtrade/templates/FreqaiExampleStrategy.py | 9 +- 5 files changed, 342 insertions(+), 70 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 77b595d56..4e52ac711 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -35,6 +35,8 @@ class FreqaiDataDrawer: self.model_dictionary: Dict[str, Any] = {} self.model_return_values: Dict[str, Any] = {} self.pair_data_dict: Dict[str, Any] = {} + self.historic_data: Dict[str, Any] = {} + # self.populated_historic_data: Dict[str, Any] = {} ? self.follower_dict: Dict[str, Any] = {} self.full_path = full_path self.follow_mode = follow_mode @@ -45,6 +47,12 @@ class FreqaiDataDrawer: # self.create_training_queue(pair_whitelist) def load_drawer_from_disk(self): + """ + Locate and load a previously saved data drawer full of all pair model metadata in + present model folder. + :returns: + exists: bool = whether or not the drawer was located + """ exists = Path(self.full_path / str('pair_dictionary.json')).resolve().exists() if exists: with open(self.full_path / str('pair_dictionary.json'), "r") as fp: @@ -58,16 +66,25 @@ class FreqaiDataDrawer: return exists def save_drawer_to_disk(self): + """ + Save data drawer full of all pair model metadata in present model folder. + """ with open(self.full_path / str('pair_dictionary.json'), "w") as fp: json.dump(self.pair_dict, fp, default=self.np_encoder) - def save_follower_dict_to_dist(self): + def save_follower_dict_to_disk(self): + """ + Save follower dictionary to disk (used by strategy for persistent prediction targets) + """ follower_name = self.config.get('bot_name', 'follower1') with open(self.full_path / str('follower_dictionary-' + follower_name + '.json'), "w") as fp: json.dump(self.follower_dict, fp, default=self.np_encoder) def create_follower_dict(self): + """ + Create or dictionary for each follower to maintain unique persistent prediction targets + """ follower_name = self.config.get('bot_name', 'follower1') whitelist_pairs = self.config.get('exchange', {}).get('pair_whitelist') @@ -89,6 +106,18 @@ class FreqaiDataDrawer: return object.item() def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]: + """ + Locate and load existing model metadata from persistent storage. If not located, + create a new one and append the current pair to it and prepare it for its first + training + :params: + metadata: dict = strategy furnished pair metadata + :returns: + model_filename: str = unique filename used for loading persistent objects from disk + trained_timestamp: int = the last time the coin was trained + coin_first: bool = If the coin is fresh without metadata + return_null_array: bool = Follower could not find pair metadata + """ pair_in_dict = self.pair_dict.get(metadata['pair']) data_path_set = self.pair_dict.get(metadata['pair'], {}).get('data_path', None) return_null_array = False @@ -137,6 +166,7 @@ class FreqaiDataDrawer: self.model_return_values[pair]['do_preds'] = dh.full_do_predict self.model_return_values[pair]['target_mean'] = dh.full_target_mean self.model_return_values[pair]['target_std'] = dh.full_target_std + self.model_return_values[pair]['DI_values'] = dh.full_DI_values # if not self.follow_mode: # self.save_model_return_values_to_disk() @@ -157,6 +187,8 @@ class FreqaiDataDrawer: self.model_return_values[pair]['predictions'] = np.append( self.model_return_values[pair]['predictions'][i:], predictions[-1]) + self.model_return_values[pair]['DI_values'] = np.append( + self.model_return_values[pair]['DI_values'][i:], dh.DI_values[-1]) self.model_return_values[pair]['do_preds'] = np.append( self.model_return_values[pair]['do_preds'][i:], do_preds[-1]) self.model_return_values[pair]['target_mean'] = np.append( @@ -168,6 +200,8 @@ class FreqaiDataDrawer: prepend = np.zeros(abs(length_difference) - 1) self.model_return_values[pair]['predictions'] = np.insert( self.model_return_values[pair]['predictions'], 0, prepend) + self.model_return_values[pair]['DI_values'] = np.insert( + self.model_return_values[pair]['DI_values'], 0, prepend) self.model_return_values[pair]['do_preds'] = np.insert( self.model_return_values[pair]['do_preds'], 0, prepend) self.model_return_values[pair]['target_mean'] = np.insert( @@ -179,6 +213,7 @@ class FreqaiDataDrawer: dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds']) dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean']) dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std']) + dh.full_DI_values = copy.deepcopy(self.model_return_values[pair]['DI_values']) # if not self.follow_mode: # self.save_model_return_values_to_disk() @@ -190,6 +225,7 @@ class FreqaiDataDrawer: dh.full_do_predict = np.zeros(len_df) dh.full_target_mean = np.zeros(len_df) dh.full_target_std = np.zeros(len_df) + dh.full_DI_values = np.zeros(len_df) def purge_old_models(self) -> None: @@ -227,6 +263,12 @@ class FreqaiDataDrawer: shutil.rmtree(v) deleted += 1 + def update_follower_metadata(self): + # follower needs to load from disk to get any changes made by leader to pair_dict + self.load_drawer_from_disk() + if self.config.get('freqai', {})('purge_old_models', False): + self.purge_old_models() + # to be used if we want to send predictions directly to the follower instead of forcing # follower to load models and inference # def save_model_return_values_to_disk(self) -> None: diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index dceb721c5..4e2fb6cc9 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -25,9 +25,6 @@ from freqtrade.resolvers import ExchangeResolver from freqtrade.strategy.interface import IStrategy -# import scipy as spy # used for auto distribution assignment - - SECONDS_IN_DAY = 86400 logger = logging.getLogger(__name__) @@ -52,6 +49,7 @@ class FreqaiDataKitchen: self.target_std: npt.ArrayLike = np.array([]) self.full_predictions: npt.ArrayLike = np.array([]) self.full_do_predict: npt.ArrayLike = np.array([]) + self.full_DI_values: npt.ArrayLike = np.array([]) self.full_target_mean: npt.ArrayLike = np.array([]) self.full_target_std: npt.ArrayLike = np.array([]) self.data_path = Path() @@ -59,6 +57,7 @@ class FreqaiDataKitchen: self.live = live self.pair = pair self.svm_model: linear_model.SGDOneClassSVM = None + self.set_all_pairs() if not self.live: self.full_timerange = self.create_fulltimerange(self.config["timerange"], self.freqai_config.get("train_period") @@ -73,6 +72,12 @@ class FreqaiDataKitchen: self.data_drawer = data_drawer def set_paths(self, metadata: dict, trained_timestamp: int = None,) -> None: + """ + Set the paths to the data for the present coin/botloop + :params: + metadata: dict = strategy furnished pair metadata + trained_timestamp: int = timestamp of most recent training + """ self.full_path = Path(self.config['user_data_dir'] / "models" / str(self.freqai_config.get('identifier'))) @@ -514,6 +519,11 @@ class FreqaiDataKitchen: return None def pca_transform(self, filtered_dataframe: DataFrame) -> None: + """ + Use an existing pca transform to transform data into components + :params: + filtered_dataframe: DataFrame = the cleaned dataframe + """ pca_components = self.pca.transform(filtered_dataframe) self.data_dictionary["prediction_features"] = pd.DataFrame( data=pca_components, @@ -522,6 +532,11 @@ class FreqaiDataKitchen: ) def compute_distances(self) -> float: + """ + Compute distances between each training point and every other training + point. This metric defines the neighborhood of trained data and is used + for prediction confidence in the Dissimilarity Index + """ logger.info("computing average mean distance for all training points") pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1) avg_mean_dist = pairwise.mean(axis=1).mean() @@ -530,6 +545,12 @@ class FreqaiDataKitchen: return avg_mean_dist def use_SVM_to_remove_outliers(self, predict: bool) -> None: + """ + Build/inference a Support Vector Machine to detect outliers + in training data and prediction + :params: + predict: bool = If true, inference an existing SVM model, else construct one + """ if predict: assert self.svm_model, "No svm model available for outlier removal" @@ -580,6 +601,13 @@ class FreqaiDataKitchen: return def find_features(self, dataframe: DataFrame) -> list: + """ + Find features in the strategy provided dataframe + :params: + dataframe: DataFrame = strategy provided dataframe + :returns: + features: list = the features to be used for training/prediction + """ column_names = dataframe.columns features = [c for c in column_names if '%' in c] if not features: @@ -600,17 +628,19 @@ class FreqaiDataKitchen: n_jobs=-1, ) + self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"] + do_predict = np.where( - distance.min(axis=0) / self.data["avg_mean_dist"] + self.DI_values < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"), 1, 0, ) - # logger.info( - # "Distance checker tossed %s predictions for being too far from training data", - # len(do_predict) - do_predict.sum(), - # ) + logger.info( + "DI tossed %s predictions for being too far from training data", + len(do_predict) - do_predict.sum(), + ) self.do_predict += do_predict self.do_predict -= 1 @@ -638,6 +668,7 @@ class FreqaiDataKitchen: self.full_predictions = np.append(self.full_predictions, predictions) self.full_do_predict = np.append(self.full_do_predict, do_predict) + self.full_DI_values = np.append(self.full_DI_values, self.DI_values) self.full_target_mean = np.append(self.full_target_mean, target_mean) self.full_target_std = np.append(self.full_target_std, target_std) @@ -652,6 +683,7 @@ class FreqaiDataKitchen: filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count self.full_predictions = np.append(filler, self.full_predictions) self.full_do_predict = np.append(filler, self.full_do_predict) + self.full_DI_values = np.append(filler, self.full_DI_values) self.full_target_mean = np.append(filler, self.full_target_mean) self.full_target_std = np.append(filler, self.full_target_std) @@ -711,6 +743,8 @@ class FreqaiDataKitchen: logger.warning('FreqAI could not detect max timeframe and therefore may not ' 'download the proper amount of data for training') + # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days') + if trained_timestamp != 0: elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY retrain = elapsed_time > self.freqai_config.get('backtest_period') @@ -764,61 +798,176 @@ class FreqaiDataKitchen: # enables persistence, but not fully implemented into save/load data yer # self.data['live_trained_timerange'] = str(int(trained_timerange.stopts)) - def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict, - strategy: IStrategy) -> None: + # SUPERCEDED + # def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict, + # strategy: IStrategy) -> None: + # exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], + # self.config, validate=False, freqai=True) + # # exchange = strategy.dp._exchange # closes ccxt session + # pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', [])) + # if str(metadata['pair']) not in pairs: + # pairs.append(str(metadata['pair'])) + + # refresh_backtest_ohlcv_data( + # exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'), + # datadir=self.config['datadir'], timerange=timerange, + # new_pairs_days=self.config['new_pairs_days'], + # erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'), + # trading_mode=self.config.get('trading_mode', 'spot'), + # prepend=self.config.get('prepend_data', False) + # ) + + def download_all_data_for_training(self, timerange: TimeRange) -> None: + """ + Called only once upon start of bot to download the necessary data for + populating indicators and training the model. + :params: + timerange: TimeRange = The full data timerange for populating the indicators + and training the model. + """ exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], self.config, validate=False, freqai=True) - # exchange = strategy.dp._exchange # closes ccxt session - pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', [])) - if str(metadata['pair']) not in pairs: - pairs.append(str(metadata['pair'])) + + new_pairs_days = int((timerange.stopts - timerange.startts) / SECONDS_IN_DAY) refresh_backtest_ohlcv_data( - exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'), + exchange, pairs=self.all_pairs, + timeframes=self.freqai_config.get('timeframes'), datadir=self.config['datadir'], timerange=timerange, - new_pairs_days=self.config['new_pairs_days'], + new_pairs_days=new_pairs_days, erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'), trading_mode=self.config.get('trading_mode', 'spot'), prepend=self.config.get('prepend_data', False) ) - def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any], - DataFrame]: + def update_historic_data(self, strategy: IStrategy) -> None: + """ + Append new candles to our stores historic data (in memory) so that + we do not need to load candle history from disk and we dont need to + pinging exchange multiple times for the same candle. + :params: + dataframe: DataFrame = strategy provided dataframe + """ + + history_data = self.data_drawer.historic_data + + for pair in self.all_pairs: + for tf in self.freqai_config.get('timeframes'): + history_data[pair][tf] = pd.concat( + [history_data[pair][tf], + strategy.dp.get_pair_dataframe(pair, tf).iloc[-1]], + axis=0 + ) + + def set_all_pairs(self) -> None: + + self.all_pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', [])) + for pair in self.config.get('exchange', '').get('pair_whitelist'): + if pair not in self.all_pairs: + self.all_pairs.append(pair) + + def load_all_pair_histories(self, timerange: TimeRange) -> None: + """ + Load pair histories for all whitelist and corr_pairlist pairs. + Only called once upon startup of bot. + :params: + timerange: TimeRange = full timerange required to populate all indicators + for training according to user defined train_period + """ + history_data = self.data_drawer.historic_data + + for pair in self.all_pairs: + if pair not in history_data: + history_data[pair] = {} + for tf in self.freqai_config.get('timeframes'): + history_data[pair][tf] = load_pair_history(datadir=self.config['datadir'], + timeframe=tf, + pair=pair, timerange=timerange, + data_format=self.config.get( + 'dataformat_ohlcv', 'json'), + candle_type=self.config.get( + 'trading_mode', 'spot')) + + def get_base_and_corr_dataframes(self, timerange: TimeRange, + metadata: dict) -> Tuple[Dict[Any, Any], Dict[Any, Any]]: + """ + Searches through our historic_data in memory and returns the dataframes relevant + to the present pair. + :params: + timerange: TimeRange = full timerange required to populate all indicators + for training according to user defined train_period + metadata: dict = strategy furnished pair metadata + """ corr_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {} - pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']] - # timerange = TimeRange.parse_timerange(new_timerange) + historic_data = self.data_drawer.historic_data + pairs = self.freqai_config.get('corr_pairlist', []) for tf in self.freqai_config.get('timeframes'): - base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'], - timeframe=tf, - pair=metadata['pair'], timerange=timerange, - data_format=self.config.get( - 'dataformat_ohlcv', 'json'), - candle_type=self.config.get( - 'trading_mode', 'spot')) + base_dataframes[tf] = self.slice_dataframe( + timerange, + historic_data[metadata['pair']][tf] + ) if pairs: for p in pairs: if metadata['pair'] in p: continue # dont repeat anything from whitelist if p not in corr_dataframes: corr_dataframes[p] = {} - corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'], - timeframe=tf, - pair=p, timerange=timerange, - data_format=self.config.get( - 'dataformat_ohlcv', 'json'), - candle_type=self.config.get( - 'trading_mode', 'spot')) + corr_dataframes[p][tf] = self.slice_dataframe(timerange, historic_data[p][tf]) return corr_dataframes, base_dataframes + # SUPERCEDED + # def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any], + # DataFrame]: + # corr_dataframes: Dict[Any, Any] = {} + # base_dataframes: Dict[Any, Any] = {} + # pairs = self.freqai_config.get('corr_pairlist', []) # + [metadata['pair']] + # # timerange = TimeRange.parse_timerange(new_timerange) + + # for tf in self.freqai_config.get('timeframes'): + # base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'], + # timeframe=tf, + # pair=metadata['pair'], timerange=timerange, + # data_format=self.config.get( + # 'dataformat_ohlcv', 'json'), + # candle_type=self.config.get( + # 'trading_mode', 'spot')) + # if pairs: + # for p in pairs: + # if metadata['pair'] in p: + # continue # dont repeat anything from whitelist + # if p not in corr_dataframes: + # corr_dataframes[p] = {} + # corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'], + # timeframe=tf, + # pair=p, timerange=timerange, + # data_format=self.config.get( + # 'dataformat_ohlcv', 'json'), + # candle_type=self.config.get( + # 'trading_mode', 'spot')) + + # return corr_dataframes, base_dataframes + def use_strategy_to_populate_indicators(self, strategy: IStrategy, corr_dataframes: dict, base_dataframes: dict, metadata: dict) -> DataFrame: - + """ + Use the user defined strategy for populating indicators during + retrain + :params: + strategy: IStrategy = user defined strategy object + corr_dataframes: dict = dict containing the informative pair dataframes + (for user defined timeframes) + base_dataframes: dict = dict containing the current pair dataframes + (for user defined timeframes) + metadata: dict = strategy furnished pair metadata + :returns: + dataframe: DataFrame = dataframe containing populated indicators + """ dataframe = base_dataframes[self.config['timeframe']].copy() pairs = self.freqai_config.get("corr_pairlist", []) @@ -847,6 +996,9 @@ class FreqaiDataKitchen: return dataframe def fit_labels(self) -> None: + """ + Fit the labels with a gaussian distribution + """ import scipy as spy f = spy.stats.norm.fit(self.data_dictionary["train_labels"]) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 9682ff818..04e819cc4 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -44,9 +44,9 @@ class IFreqaiModel(ABC): self.config = config self.assert_config(self.config) self.freqai_info = config["freqai"] - self.data_split_parameters = config["freqai"]["data_split_parameters"] - self.model_training_parameters = config["freqai"]["model_training_parameters"] - self.feature_parameters = config["freqai"]["feature_parameters"] + self.data_split_parameters = config.get('freqai', {}).get("data_split_parameters") + self.model_training_parameters = config.get("freqai", {}).get("model_training_parameters") + self.feature_parameters = config.get("freqai", {}).get("feature_parameters") self.time_last_trained = None self.current_time = None self.model = None @@ -54,6 +54,7 @@ class IFreqaiModel(ABC): self.training_on_separate_thread = False self.retrain = False self.first = True + self.update_historic_data = 0 self.set_full_path() self.follow_mode = self.freqai_info.get('follow_mode', False) self.data_drawer = FreqaiDataDrawer(Path(self.full_path), @@ -95,15 +96,12 @@ class IFreqaiModel(ABC): self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"]) - dh = self.start_live(dataframe, metadata, strategy, self.dh) + dh = self.start_live(dataframe, metadata, strategy, self.dh, trainable=True) else: # we will have at max 2 separate instances of the kitchen at once. self.dh_fg = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"]) - dh = self.start_live(dataframe, metadata, strategy, self.dh_fg) - - # return (dh.full_predictions, dh.full_do_predict, - # dh.full_target_mean, dh.full_target_std) + dh = self.start_live(dataframe, metadata, strategy, self.dh_fg, trainable=False) # For backtesting, each pair enters and then gets trained for each window along the # sliding window defined by "train_period" (training window) and "backtest_period" @@ -115,8 +113,9 @@ class IFreqaiModel(ABC): logger.info(f'Training {len(self.dh.training_timeranges)} timeranges') dh = self.start_backtesting(dataframe, metadata, self.dh) - return (dh.full_predictions, dh.full_do_predict, - dh.full_target_mean, dh.full_target_std) + return self.return_values(dataframe, dh) + # return (dh.full_predictions, dh.full_do_predict, + # dh.full_target_mean, dh.full_target_std) def start_backtesting(self, dataframe: DataFrame, metadata: dict, dh: FreqaiDataKitchen) -> FreqaiDataKitchen: @@ -185,7 +184,8 @@ class IFreqaiModel(ABC): return dh def start_live(self, dataframe: DataFrame, metadata: dict, - strategy: IStrategy, dh: FreqaiDataKitchen) -> FreqaiDataKitchen: + strategy: IStrategy, dh: FreqaiDataKitchen, + trainable: bool) -> FreqaiDataKitchen: """ The main broad execution for dry/live. This function will check if a retraining should be performed, and if so, retrain and reset the model. @@ -198,25 +198,35 @@ class IFreqaiModel(ABC): dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only """ + # update follower if self.follow_mode: - # follower needs to load from disk to get any changes made by leader to pair_dict - self.data_drawer.load_drawer_from_disk() - if self.freqai_info.get('purge_old_models', False): - self.data_drawer.purge_old_models() + self.data_drawer.update_follower_metadata() + # get the model metadata associated with the current pair (model_filename, trained_timestamp, coin_first, return_null_array) = self.data_drawer.get_pair_dict_info(metadata) - # if the files do not yet exist, the follower returns null arrays to strategy + # if the metadata doesnt exist, the follower returns null arrays to strategy if self.follow_mode and return_null_array: logger.info('Returning null array from follower to strategy') self.data_drawer.return_null_values_to_strategy(dataframe, dh) return dh - if (not self.training_on_separate_thread and not self.follow_mode - and self.data_drawer.pair_dict[metadata['pair']]['priority'] == 1) or coin_first: + # append the historic data once per round + if (self.data_drawer.historic_data and + self.update_historic_data >= len(self.config.get('exchange', '') + .get('pair_whitelist'))): + dh.update_historic_data(strategy) + self.update_historic_data = 1 + else: + self.update_historic_data += 1 + + # if trainable, check if model needs training, if so compute new timerange, + # then save model and metadata. + # if not trainable, load existing data + if (trainable and not self.follow_mode) or coin_first: file_exists = False if trained_timestamp != 0: # historical model available @@ -231,6 +241,15 @@ class IFreqaiModel(ABC): data_load_timerange) = dh.check_if_new_training_required(trained_timestamp) dh.set_paths(metadata, new_trained_timerange.stopts) + # download candle history if it is not already in memory + if not self.data_drawer.historic_data: + logger.info('Downloading all training data for all pairs in whitelist and ' + 'corr_pairlist, this may take a while if you do not have the ' + 'data saved') + dh.download_all_data_for_training(data_load_timerange) + dh.load_all_pair_histories(data_load_timerange) + + # train the model on the trained timerange if self.retrain or not file_exists: if coin_first: self.train_model_in_series(new_trained_timerange, metadata, @@ -241,17 +260,24 @@ class IFreqaiModel(ABC): metadata, strategy, dh, data_load_timerange) - elif self.training_on_separate_thread and not self.follow_mode: - logger.info("FreqAI training a new model on background thread.") + elif not trainable and not self.follow_mode: + logger.info(f'{metadata["pair"]} holds spot ' + f'{self.data_drawer.pair_dict[metadata["pair"]]["priority"]} ' + 'in training queue') elif self.follow_mode: dh.set_paths(metadata, trained_timestamp) logger.info('FreqAI instance set to follow_mode, finding existing pair' f'using { self.identifier }') + # load the model and associated data into the data kitchen self.model = dh.load_data(coin=metadata['pair']) + # ensure user is feeding the correct indicators to the model self.check_if_feature_list_matches_strategy(dataframe, dh) + # hold the historical predictions in memory so we are sending back + # correct array to strategy FIXME currently broken, but only affecting + # Frequi reporting. Signals remain unaffeted. if metadata['pair'] not in self.data_drawer.model_return_values: preds, do_preds = self.predict(dataframe, dh) dh.append_predictions(preds, do_preds, len(dataframe)) @@ -268,6 +294,13 @@ class IFreqaiModel(ABC): def check_if_feature_list_matches_strategy(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> None: + """ + Ensure user is passing the proper feature set if they are reusing an `identifier` pointing + to a folder holding existing models. + :params: + dataframe: DataFrame = strategy provided dataframe + dh: FreqaiDataKitchen = non-persistent data container/analyzer for current coin/bot loop + """ strategy_provided_features = dh.find_features(dataframe) if 'training_features_list_raw' in dh.data: feature_list = dh.data['training_features_list_raw'] @@ -356,11 +389,24 @@ class IFreqaiModel(ABC): def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict, strategy: IStrategy, dh: FreqaiDataKitchen, data_load_timerange: TimeRange): + """ + Retreive data and train model on separate thread. Always called if the model folder already + contains a full set of trained models. + :params: + new_trained_timerange: TimeRange = the timerange to train the model on + metadata: dict = strategy provided metadata + strategy: IStrategy = user defined strategy object + dh: FreqaiDataKitchen = non-persistent data container for current coin/loop + data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators + (larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs) + """ # with nostdout(): - dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) - corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, - metadata) + # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) + # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, + # metadata) + corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, + metadata) # protecting from common benign errors associated with grabbing new data from exchange: try: @@ -408,10 +454,22 @@ class IFreqaiModel(ABC): def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict, strategy: IStrategy, dh: FreqaiDataKitchen, data_load_timerange: TimeRange): - - dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) - corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, - metadata) + """ + Retreive data and train model in single threaded mode (only used if model directory is empty + upon startup for dry/live ) + :params: + new_trained_timerange: TimeRange = the timerange to train the model on + metadata: dict = strategy provided metadata + strategy: IStrategy = user defined strategy object + dh: FreqaiDataKitchen = non-persistent data container for current coin/loop + data_load_timerange: TimeRange = the amount of data to be loaded for populate_any_indicators + (larger than new_trained_timerange so that new_trained_timerange does not contain any NaNs) + """ + # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) + # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, + # metadata) + corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, + metadata) unfiltered_dataframe = dh.use_strategy_to_populate_indicators(strategy, corr_dataframes, @@ -481,3 +539,17 @@ class IFreqaiModel(ABC): """ return + + @abstractmethod + def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: + """ + User defines the dataframe to be returned to strategy here. + :params: + dataframe: DataFrame = the full dataframe for the current prediction (live) + or --timerange (backtesting) + dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only + :returns: + dataframe: DataFrame = dataframe filled with user defined data + """ + + return diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index 5147faf0c..9a5059bcf 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -18,6 +18,17 @@ class CatboostPredictionModel(IFreqaiModel): has its own DataHandler where data is held, saved, loaded, and managed. """ + def return_values(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: + + dataframe["prediction"] = dh.full_predictions + dataframe["do_predict"] = dh.full_do_predict + dataframe["target_mean"] = dh.full_target_mean + dataframe["target_std"] = dh.full_target_std + if self.freqai_info('feature_parameters', {}).get('DI-threshold', 0) > 0: + dataframe["DI"] = dh.full_DI_values + + return dataframe + def make_labels(self, dataframe: DataFrame, dh: FreqaiDataKitchen) -> DataFrame: """ User defines the labels here (target values). diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index d9dc38f0d..cf04bfa6e 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -45,7 +45,7 @@ class FreqaiExampleStrategy(IStrategy): process_only_new_candles = False stoploss = -0.05 - use_sell_signal = True + use_exit_signal = True startup_candle_count: int = 300 can_short = False @@ -176,12 +176,7 @@ class FreqaiExampleStrategy(IStrategy): # the model will return 4 values, its prediction, an indication of whether or not the # prediction should be accepted, the target mean/std values from the labels used during # each training period. - ( - dataframe["prediction"], - dataframe["do_predict"], - dataframe["target_mean"], - dataframe["target_std"], - ) = self.model.bridge.start(dataframe, metadata, self) + dataframe = self.model.bridge.start(dataframe, metadata, self) dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] From f2762e3b4bccd03f2e0afe79fa40cf795db5b8c1 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 3 Jun 2022 16:58:51 +0200 Subject: [PATCH 02/14] fix bug in return_values() --- freqtrade/freqai/prediction_models/CatboostPredictionModel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index 9a5059bcf..3cea0eb9b 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -24,7 +24,7 @@ class CatboostPredictionModel(IFreqaiModel): dataframe["do_predict"] = dh.full_do_predict dataframe["target_mean"] = dh.full_target_mean dataframe["target_std"] = dh.full_target_std - if self.freqai_info('feature_parameters', {}).get('DI-threshold', 0) > 0: + if self.freqai_info.get('feature_parameters', {}).get('DI-threshold', 0) > 0: dataframe["DI"] = dh.full_DI_values return dataframe From e8c0dcf9f352620fd1cf2f7e7d7265e5eb1b6713 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 3 Jun 2022 17:14:07 +0200 Subject: [PATCH 03/14] add debug message to timerange --- freqtrade/freqai/data_kitchen.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 4e2fb6cc9..5c26b0598 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -743,7 +743,7 @@ class FreqaiDataKitchen: logger.warning('FreqAI could not detect max timeframe and therefore may not ' 'download the proper amount of data for training') - # logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days') + logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days') if trained_timestamp != 0: elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY @@ -770,6 +770,13 @@ class FreqaiDataKitchen: data_load_timerange.stopts = int(time) retrain = True + logger.info(f'Total data download needed ' + f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY} ' + ' days') + logger.info(f'Total training timerange ' + f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} ' + ' days') + # if retrain: # coin, _ = metadata['pair'].split("/") # # set the new data_path From d6b8801f413be4f1c405314473a9d5efda3b959b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 5 Jun 2022 04:40:58 +0200 Subject: [PATCH 04/14] fix follower bug --- freqtrade/freqai/data_kitchen.py | 28 ++++++++++++++-------------- freqtrade/freqai/freqai_interface.py | 6 +++--- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 5c26b0598..4f68326fc 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -638,8 +638,8 @@ class FreqaiDataKitchen: ) logger.info( - "DI tossed %s predictions for being too far from training data", - len(do_predict) - do_predict.sum(), + f'DI tossed {len(do_predict) - do_predict.sum():.2f} predictions for ' + 'being too far from training data' ) self.do_predict += do_predict @@ -743,7 +743,7 @@ class FreqaiDataKitchen: logger.warning('FreqAI could not detect max timeframe and therefore may not ' 'download the proper amount of data for training') - logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY} days') + logger.info(f'Extending data download by {additional_seconds/SECONDS_IN_DAY:.2f} days') if trained_timestamp != 0: elapsed_time = (time - trained_timestamp) / SECONDS_IN_DAY @@ -770,12 +770,13 @@ class FreqaiDataKitchen: data_load_timerange.stopts = int(time) retrain = True - logger.info(f'Total data download needed ' - f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY} ' - ' days') - logger.info(f'Total training timerange ' - f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} ' - ' days') + # logger.info( + # f'Total data download needed ' + # f'{(data_load_timerange.stopts - data_load_timerange.startts)/SECONDS_IN_DAY:.2f}' + # ' days') + # logger.info(f'Total training timerange ' + # f'{(trained_timerange.stopts - trained_timerange.startts)/SECONDS_IN_DAY} ' + # ' days') # if retrain: # coin, _ = metadata['pair'].split("/") @@ -861,11 +862,10 @@ class FreqaiDataKitchen: for pair in self.all_pairs: for tf in self.freqai_config.get('timeframes'): - history_data[pair][tf] = pd.concat( - [history_data[pair][tf], - strategy.dp.get_pair_dataframe(pair, tf).iloc[-1]], - axis=0 - ) + lh = len(history_data[pair][tf].index) + history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair, tf).iloc[-1] + + logger.info(f'Length of history data {len(history_data[pair][tf])}') def set_all_pairs(self) -> None: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 04e819cc4..e192f9b0a 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -226,7 +226,7 @@ class IFreqaiModel(ABC): # if trainable, check if model needs training, if so compute new timerange, # then save model and metadata. # if not trainable, load existing data - if (trainable and not self.follow_mode) or coin_first: + if (trainable or coin_first) and not self.follow_mode: file_exists = False if trained_timestamp != 0: # historical model available @@ -416,8 +416,8 @@ class IFreqaiModel(ABC): metadata) unfiltered_dataframe = dh.slice_dataframe(new_trained_timerange, unfiltered_dataframe) - except Exception: - logger.warning('Mismatched sizes encountered in strategy') + except Exception as err: + logger.exception(err) # self.data_drawer.pair_to_end_of_training_queue(metadata['pair']) self.training_on_separate_thread = False self.retrain = False From 2451ed8c88eb079b9293af9b802caabd8167610d Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Mon, 6 Jun 2022 15:11:54 -0600 Subject: [PATCH 05/14] Quick bug fix --- freqtrade/freqai/data_drawer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 4e52ac711..740998caf 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -266,7 +266,7 @@ class FreqaiDataDrawer: def update_follower_metadata(self): # follower needs to load from disk to get any changes made by leader to pair_dict self.load_drawer_from_disk() - if self.config.get('freqai', {})('purge_old_models', False): + if self.config.get('freqai', {}).get('purge_old_models', False): self.purge_old_models() # to be used if we want to send predictions directly to the follower instead of forcing From bf19055e53332d19ac2dbacc0de171daa609b9a5 Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Mon, 6 Jun 2022 15:56:12 -0600 Subject: [PATCH 06/14] Update function spelling --- freqtrade/templates/FreqaiExampleStrategy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index cf04bfa6e..21dd4a39b 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -251,7 +251,7 @@ class FreqaiExampleStrategy(IStrategy): if not follow_mode: self.model.bridge.data_drawer.save_drawer_to_disk() else: - self.model.bridge.data_drawer.save_follower_dict_to_dist() + self.model.bridge.data_drawer.save_follower_dict_to_disk() else: if pair_dict[pair]['prediction' + entry_tag] > 0: roi_price = abs(trade_candle['prediction']) @@ -261,7 +261,7 @@ class FreqaiExampleStrategy(IStrategy): if not follow_mode: self.model.bridge.data_drawer.save_drawer_to_disk() else: - self.model.bridge.data_drawer.save_follower_dict_to_dist() + self.model.bridge.data_drawer.save_follower_dict_to_disk() roi_price = abs(trade_candle['prediction']) roi_time = self.max_roi_time_long.value @@ -295,7 +295,7 @@ class FreqaiExampleStrategy(IStrategy): if not follow_mode: self.model.bridge.data_drawer.save_drawer_to_disk() else: - self.model.bridge.data_drawer.save_follower_dict_to_dist() + self.model.bridge.data_drawer.save_follower_dict_to_disk() return True From e6c5e737a263ced74c4889a3ed5e23ab90214c0b Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Mon, 6 Jun 2022 16:24:32 -0600 Subject: [PATCH 07/14] Fix other bugs --- freqtrade/templates/FreqaiExampleStrategy.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 21dd4a39b..4775b1554 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -28,7 +28,7 @@ class FreqaiExampleStrategy(IStrategy): canonical freqtrade configuration file under config['freqai']. """ - minimal_roi = {"0": 0.01, "240": -1} + minimal_roi = {"0": 0.1, "240": -1} plot_config = { "main_plot": {}, @@ -43,7 +43,7 @@ class FreqaiExampleStrategy(IStrategy): }, } - process_only_new_candles = False + process_only_new_candles = True stoploss = -0.05 use_exit_signal = True startup_candle_count: int = 300 @@ -245,7 +245,7 @@ class FreqaiExampleStrategy(IStrategy): entry_tag = trade.enter_tag - if 'prediction' + entry_tag not in pair_dict[pair]: + if 'prediction' + entry_tag not in pair_dict[pair] or pair_dict[pair]['prediction' + entry_tag] > 0:: with self.model.bridge.lock: pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction']) if not follow_mode: @@ -253,15 +253,8 @@ class FreqaiExampleStrategy(IStrategy): else: self.model.bridge.data_drawer.save_follower_dict_to_disk() else: - if pair_dict[pair]['prediction' + entry_tag] > 0: - roi_price = abs(trade_candle['prediction']) - else: - with self.model.bridge.lock: - pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction']) - if not follow_mode: - self.model.bridge.data_drawer.save_drawer_to_disk() - else: - self.model.bridge.data_drawer.save_follower_dict_to_disk() + roi_price = abs(trade_candle['prediction']) + roi_price = abs(trade_candle['prediction']) roi_time = self.max_roi_time_long.value From 3c2e314ee54a8ed00d80fc69ce3232f5a4b25404 Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Mon, 6 Jun 2022 16:26:07 -0600 Subject: [PATCH 08/14] Fix bugs --- freqtrade/templates/FreqaiExampleStrategy.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 4775b1554..1aaf63dcf 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -252,11 +252,8 @@ class FreqaiExampleStrategy(IStrategy): self.model.bridge.data_drawer.save_drawer_to_disk() else: self.model.bridge.data_drawer.save_follower_dict_to_disk() - else: - roi_price = abs(trade_candle['prediction']) - - roi_price = abs(trade_candle['prediction']) + roi_price = pair_dict[pair]['prediction' + entry_tag] roi_time = self.max_roi_time_long.value roi_decay = roi_price * (1 - ((current_time - trade.open_date_utc).seconds) / From 4b26b6aaec9f4b397c42bfbba44186cbe545eaf0 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 7 Jun 2022 00:54:18 +0200 Subject: [PATCH 09/14] add lock to any historic data access --- freqtrade/freqai/data_kitchen.py | 14 ++++++++------ freqtrade/freqai/freqai_interface.py | 5 +++-- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 4f68326fc..32ea0e6ac 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -858,14 +858,16 @@ class FreqaiDataKitchen: dataframe: DataFrame = strategy provided dataframe """ - history_data = self.data_drawer.historic_data + with self.data_drawer.history_lock: + history_data = self.data_drawer.historic_data - for pair in self.all_pairs: - for tf in self.freqai_config.get('timeframes'): - lh = len(history_data[pair][tf].index) - history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair, tf).iloc[-1] + for pair in self.all_pairs: + for tf in self.freqai_config.get('timeframes'): + lh = len(history_data[pair][tf].index) + history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair, + tf).iloc[-1] - logger.info(f'Length of history data {len(history_data[pair][tf])}') + logger.info(f'Length of history data {len(history_data[pair][tf])}') def set_all_pairs(self) -> None: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index e192f9b0a..e74f2b92d 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -405,8 +405,9 @@ class IFreqaiModel(ABC): # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, # metadata) - corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, - metadata) + with self.data_drawer.history_lock: + corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, + metadata) # protecting from common benign errors associated with grabbing new data from exchange: try: From cab8f517b4137aa8914c18771d44e6b022c47d88 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 7 Jun 2022 01:07:30 +0200 Subject: [PATCH 10/14] add lock to datadrawer --- freqtrade/freqai/data_drawer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 740998caf..0fb399b58 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -5,6 +5,7 @@ import json import logging import re import shutil +import threading from pathlib import Path from typing import Any, Dict, Tuple @@ -44,6 +45,7 @@ class FreqaiDataDrawer: self.create_follower_dict() self.load_drawer_from_disk() self.training_queue: Dict[str, int] = {} + self.history_lock = threading.Lock() # self.create_training_queue(pair_whitelist) def load_drawer_from_disk(self): From 15d049cffe8e1b5462978424bc7501d714cdfb8e Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 7 Jun 2022 19:49:20 +0200 Subject: [PATCH 11/14] detect if upper tf candles are new or not, append if so. Correct the epoch for candle update check --- freqtrade/freqai/data_kitchen.py | 63 +++++++++++-------- freqtrade/freqai/freqai_interface.py | 15 ++--- .../CatboostPredictionModel.py | 2 +- 3 files changed, 44 insertions(+), 36 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 32ea0e6ac..fafeda49d 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -297,7 +297,7 @@ class FreqaiDataKitchen: ) if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live: logger.warning( - f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100} percent' + f' {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.2f} percent' ' of training data dropped due to NaNs, model may perform inconsistent' 'with expectations' ) @@ -538,9 +538,10 @@ class FreqaiDataKitchen: for prediction confidence in the Dissimilarity Index """ logger.info("computing average mean distance for all training points") - pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=-1) + tc = self.freqai_config.get('model_training_parameters', {}).get('thread_count', -1) + pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc) avg_mean_dist = pairwise.mean(axis=1).mean() - logger.info("avg_mean_dist %s", avg_mean_dist) + logger.info(f'avg_mean_dist {avg_mean_dist:.2f}') return avg_mean_dist @@ -668,7 +669,8 @@ class FreqaiDataKitchen: self.full_predictions = np.append(self.full_predictions, predictions) self.full_do_predict = np.append(self.full_do_predict, do_predict) - self.full_DI_values = np.append(self.full_DI_values, self.DI_values) + if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0: + self.full_DI_values = np.append(self.full_DI_values, self.DI_values) self.full_target_mean = np.append(self.full_target_mean, target_mean) self.full_target_std = np.append(self.full_target_std, target_std) @@ -683,7 +685,8 @@ class FreqaiDataKitchen: filler = np.zeros(len_dataframe - len(self.full_predictions)) # startup_candle_count self.full_predictions = np.append(filler, self.full_predictions) self.full_do_predict = np.append(filler, self.full_do_predict) - self.full_DI_values = np.append(filler, self.full_DI_values) + if self.freqai_config.get('feature_parameters', {}).get('DI-threshold', 0) > 0: + self.full_DI_values = np.append(filler, self.full_DI_values) self.full_target_mean = np.append(filler, self.full_target_mean) self.full_target_std = np.append(filler, self.full_target_std) @@ -728,7 +731,7 @@ class FreqaiDataKitchen: # find the max indicator length required max_timeframe_chars = self.freqai_config.get('timeframes')[-1] max_period = self.freqai_config.get('feature_parameters', {}).get( - 'indicator_max_period', 20) + 'indicator_max_period', 50) additional_seconds = 0 if max_timeframe_chars[-1] == 'd': additional_seconds = max_period * SECONDS_IN_DAY * int(max_timeframe_chars[-2]) @@ -863,9 +866,17 @@ class FreqaiDataKitchen: for pair in self.all_pairs: for tf in self.freqai_config.get('timeframes'): - lh = len(history_data[pair][tf].index) - history_data[pair][tf].loc[lh] = strategy.dp.get_pair_dataframe(pair, - tf).iloc[-1] + # check if newest candle is already appended + if ( + str(history_data[pair][tf].iloc[-1]['date']) == + str(strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]['date'].iloc[-1]) + ): + continue + history_data[pair][tf] = pd.concat( + [history_data[pair][tf], + strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]], + ignore_index=True, axis=0 + ) logger.info(f'Length of history data {len(history_data[pair][tf])}') @@ -908,23 +919,25 @@ class FreqaiDataKitchen: for training according to user defined train_period metadata: dict = strategy furnished pair metadata """ - corr_dataframes: Dict[Any, Any] = {} - base_dataframes: Dict[Any, Any] = {} - historic_data = self.data_drawer.historic_data - pairs = self.freqai_config.get('corr_pairlist', []) + with self.data_drawer.history_lock: + corr_dataframes: Dict[Any, Any] = {} + base_dataframes: Dict[Any, Any] = {} + historic_data = self.data_drawer.historic_data + pairs = self.freqai_config.get('corr_pairlist', []) - for tf in self.freqai_config.get('timeframes'): - base_dataframes[tf] = self.slice_dataframe( - timerange, - historic_data[metadata['pair']][tf] - ) - if pairs: - for p in pairs: - if metadata['pair'] in p: - continue # dont repeat anything from whitelist - if p not in corr_dataframes: - corr_dataframes[p] = {} - corr_dataframes[p][tf] = self.slice_dataframe(timerange, historic_data[p][tf]) + for tf in self.freqai_config.get('timeframes'): + base_dataframes[tf] = self.slice_dataframe( + timerange, + historic_data[metadata['pair']][tf] + ) + if pairs: + for p in pairs: + if metadata['pair'] in p: + continue # dont repeat anything from whitelist + if p not in corr_dataframes: + corr_dataframes[p] = {} + corr_dataframes[p][tf] = self.slice_dataframe(timerange, + historic_data[p][tf]) return corr_dataframes, base_dataframes diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index e74f2b92d..86f873f3a 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -216,12 +216,9 @@ class IFreqaiModel(ABC): # append the historic data once per round if (self.data_drawer.historic_data and - self.update_historic_data >= len(self.config.get('exchange', '') - .get('pair_whitelist'))): + self.config.get('exchange', '').get('pair_whitelist').index(metadata['pair']) == 1): dh.update_historic_data(strategy) - self.update_historic_data = 1 - else: - self.update_historic_data += 1 + logger.info(f'Updating historic data on pair {metadata["pair"]}') # if trainable, check if model needs training, if so compute new timerange, # then save model and metadata. @@ -405,9 +402,9 @@ class IFreqaiModel(ABC): # dh.download_new_data_for_retraining(data_load_timerange, metadata, strategy) # corr_dataframes, base_dataframes = dh.load_pairs_histories(data_load_timerange, # metadata) - with self.data_drawer.history_lock: - corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, - metadata) + + corr_dataframes, base_dataframes = dh.get_base_and_corr_dataframes(data_load_timerange, + metadata) # protecting from common benign errors associated with grabbing new data from exchange: try: @@ -419,7 +416,6 @@ class IFreqaiModel(ABC): except Exception as err: logger.exception(err) - # self.data_drawer.pair_to_end_of_training_queue(metadata['pair']) self.training_on_separate_thread = False self.retrain = False return @@ -428,7 +424,6 @@ class IFreqaiModel(ABC): model = self.train(unfiltered_dataframe, metadata, dh) except ValueError: logger.warning('Value error encountered during training') - # self.data_drawer.pair_to_end_of_training_queue(metadata['pair']) self.training_on_separate_thread = False self.retrain = False return diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index 3cea0eb9b..519109213 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -59,7 +59,7 @@ class CatboostPredictionModel(IFreqaiModel): :model: Trained model which can be used to inference (self.predict) """ - logger.info('--------------------Starting training' + logger.info('--------------------Starting training ' f'{metadata["pair"]} --------------------') # create the full feature list based on user config info From f8f25e36efd63e42041bceb7ec3ddcb2b1ab1c4f Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 7 Jun 2022 19:54:45 +0200 Subject: [PATCH 12/14] update example config/strat --- config_examples/config_freqai_futures.example.json | 10 +++++----- freqtrade/templates/FreqaiExampleStrategy.py | 9 +++++---- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/config_examples/config_freqai_futures.example.json b/config_examples/config_freqai_futures.example.json index 30eb6fc3e..e5207a906 100644 --- a/config_examples/config_freqai_futures.example.json +++ b/config_examples/config_freqai_futures.example.json @@ -66,8 +66,8 @@ "1h" ], "train_period": 20, - "backtest_period": 2, - "identifier": "example2", + "backtest_period": 0.001, + "identifier": "constant_retrain_live", "live_trained_timestamp": 0, "corr_pairlist": [ "BTC/USDT:USDT", @@ -76,20 +76,20 @@ "feature_parameters": { "period": 20, "shift": 2, - "DI_threshold": 0, + "DI_threshold": 0.9, "weight_factor": 0.9, "principal_component_analysis": false, "use_SVM_to_remove_outliers": true, "stratify": 0, "indicator_max_period": 20, - "indicator_periods": [10, 20, 30] + "indicator_periods": [10, 20] }, "data_split_parameters": { "test_size": 0.33, "random_state": 1 }, "model_training_parameters": { - "n_estimators": 200, + "n_estimators": 1000, "task_type": "CPU" } }, diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 1aaf63dcf..608f24239 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -47,7 +47,7 @@ class FreqaiExampleStrategy(IStrategy): stoploss = -0.05 use_exit_signal = True startup_candle_count: int = 300 - can_short = False + can_short = True linear_roi_offset = DecimalParameter(0.00, 0.02, default=0.005, space='sell', optimize=False, load=True) @@ -178,8 +178,8 @@ class FreqaiExampleStrategy(IStrategy): # each training period. dataframe = self.model.bridge.start(dataframe, metadata, self) - dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] - dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] + dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.25 + dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.25 return dataframe def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame: @@ -245,7 +245,8 @@ class FreqaiExampleStrategy(IStrategy): entry_tag = trade.enter_tag - if 'prediction' + entry_tag not in pair_dict[pair] or pair_dict[pair]['prediction' + entry_tag] > 0:: + if ('prediction' + entry_tag not in pair_dict[pair] or + pair_dict[pair]['prediction' + entry_tag] > 0): with self.model.bridge.lock: pair_dict[pair]['prediction' + entry_tag] = abs(trade_candle['prediction']) if not follow_mode: From 66800c7a45fb69a70d5e6542a462697cc80b41a5 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 7 Jun 2022 20:24:23 +0200 Subject: [PATCH 13/14] ensure newest candles are always appended --- freqtrade/freqai/freqai_interface.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 86f873f3a..1f194860d 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -215,8 +215,7 @@ class IFreqaiModel(ABC): return dh # append the historic data once per round - if (self.data_drawer.historic_data and - self.config.get('exchange', '').get('pair_whitelist').index(metadata['pair']) == 1): + if self.data_drawer.historic_data: dh.update_historic_data(strategy) logger.info(f'Updating historic data on pair {metadata["pair"]}') From d9b79d94e46fab456e4a0699ad372f2839a29b33 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 7 Jun 2022 20:57:10 +0200 Subject: [PATCH 14/14] increase candle update flexibility to allow long sequential trainings that may last more than one candle --- freqtrade/freqai/data_kitchen.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index fafeda49d..11c9142a6 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -866,15 +866,22 @@ class FreqaiDataKitchen: for pair in self.all_pairs: for tf in self.freqai_config.get('timeframes'): + # check if newest candle is already appended + df_dp = strategy.dp.get_pair_dataframe(pair, tf) if ( str(history_data[pair][tf].iloc[-1]['date']) == - str(strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]['date'].iloc[-1]) + str(df_dp.iloc[-1:]['date'].iloc[-1]) ): continue + + index = df_dp.loc[ + df_dp['date'] == + history_data[pair][tf].iloc[-1]['date'] + ].index[0] + 1 history_data[pair][tf] = pd.concat( [history_data[pair][tf], - strategy.dp.get_pair_dataframe(pair, tf).iloc[-1:]], + strategy.dp.get_pair_dataframe(pair, tf).iloc[index:]], ignore_index=True, axis=0 )