From 22bd5556ed21c6483e2b5ebb542e4ea0efa9df0f Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 9 May 2022 15:25:00 +0200 Subject: [PATCH] add self-retraining functionality for live/dry --- config_examples/config_freqai.example.json | 7 +- freqtrade/constants.py | 2 + freqtrade/freqai/data_kitchen.py | 150 +++++++++++++++--- freqtrade/freqai/freqai_interface.py | 58 ++++++- freqtrade/strategy/interface.py | 16 ++ freqtrade/templates/ExamplePredictionModel.py | 8 +- freqtrade/templates/FreqaiExampleStrategy.py | 21 ++- 7 files changed, 218 insertions(+), 44 deletions(-) diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json index 5bd4de6c4..351585d17 100644 --- a/config_examples/config_freqai.example.json +++ b/config_examples/config_freqai.example.json @@ -13,7 +13,7 @@ "exit": 30 }, "exchange": { - "name": "ftx", + "name": "binance", "key": "", "secret": "", "ccxt_config": { @@ -55,7 +55,9 @@ ], "train_period": 30, "backtest_period": 7, - "identifier": "example", + "identifier": "livetest5", + "live_trained_timerange": "20220330-20220429", + "live_full_backtestrange": "20220302-20220501", "base_features": [ "rsi", "close_over_20sma", @@ -68,6 +70,7 @@ "macd" ], "corr_pairlist": [ + "BTC/USDT", "ETH/USDT", "LINK/USDT", "DOT/USDT" diff --git a/freqtrade/constants.py b/freqtrade/constants.py index c19a71c61..0dc355914 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -440,6 +440,8 @@ CONF_SCHEMA = { "train_period": {"type": "integer", "default": 0}, "backtest_period": {"type": "integer", "default": 7}, "identifier": {"type": "str", "default": "example"}, + "live_trained_timerange": {"type": "str"}, + "live_full_backtestrange": {"type": "str"}, "base_features": {"type": "list"}, "corr_pairlist": {"type": "list"}, "feature_parameters": { diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index b2ea71984..7b6a65a59 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -16,6 +16,10 @@ from sklearn.metrics.pairwise import pairwise_distances from sklearn.model_selection import train_test_split from freqtrade.configuration import TimeRange +from freqtrade.data.history import load_pair_history +from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data +from freqtrade.resolvers import ExchangeResolver +from freqtrade.strategy.interface import IStrategy SECONDS_IN_DAY = 86400 @@ -30,7 +34,7 @@ class FreqaiDataKitchen: author: Robert Caulk, rob.caulk@gmail.com """ - def __init__(self, config: Dict[str, Any], dataframe: DataFrame): + def __init__(self, config: Dict[str, Any], dataframe: DataFrame, live: bool = False): self.full_dataframe = dataframe self.data: Dict[Any, Any] = {} self.data_dictionary: Dict[Any, Any] = {} @@ -45,17 +49,29 @@ class FreqaiDataKitchen: self.full_target_mean: npt.ArrayLike = np.array([]) self.full_target_std: npt.ArrayLike = np.array([]) self.model_path = Path() - self.model_filename = "" + self.model_filename: str = "" - self.full_timerange = self.create_fulltimerange( - self.config["timerange"], self.freqai_config["train_period"] - ) + if not live: + self.full_timerange = self.create_fulltimerange(self.config["timerange"], + self.freqai_config["train_period"] + ) - (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( - self.full_timerange, - config["freqai"]["train_period"], - config["freqai"]["backtest_period"], - ) + (self.training_timeranges, self.backtesting_timeranges) = self.split_timerange( + self.full_timerange, + config["freqai"]["train_period"], + config["freqai"]["backtest_period"], + ) + + def set_paths(self) -> None: + self.full_path = Path(self.config['user_data_dir'] / + "models" / + str(self.freqai_config['live_full_backtestrange'] + + self.freqai_config['identifier'])) + + self.model_path = Path(self.full_path / str("sub-train" + "-" + + str(self.freqai_config['live_trained_timerange']))) + + return def save_data(self, model: Any) -> None: """ @@ -187,10 +203,10 @@ class FreqaiDataKitchen: labels = labels[ (drop_index == 0) & (drop_index_labels == 0) ] # assuming the labels depend entirely on the dataframe here. - logger.info( - "dropped %s training points due to NaNs, ensure all historical data downloaded", - len(unfiltered_dataframe) - len(filtered_dataframe), - ) + # logger.info( + # "dropped %s training points due to NaNs, ensure all historical data downloaded", + # len(unfiltered_dataframe) - len(filtered_dataframe), + # ) self.data["filter_drop_index_training"] = drop_index else: @@ -485,11 +501,11 @@ class FreqaiDataKitchen: shift = "" if n > 0: shift = "_shift-" + str(n) - features.append(ft + shift + "_" + tf) + # features.append(ft + shift + "_" + tf) for p in config["freqai"]["corr_pairlist"]: features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf) - logger.info("number of features %s", len(features)) + # logger.info("number of features %s", len(features)) return features def check_if_pred_in_training_spaces(self) -> None: @@ -513,10 +529,10 @@ class FreqaiDataKitchen: 0, ) - logger.info( - "Distance checker tossed %s predictions for being too far from training data", - len(do_predict) - do_predict.sum(), - ) + # logger.info( + # "Distance checker tossed %s predictions for being too far from training data", + # len(do_predict) - do_predict.sum(), + # ) self.do_predict += do_predict self.do_predict -= 1 @@ -577,15 +593,105 @@ class FreqaiDataKitchen: / str(full_timerange + self.freqai_config["identifier"]) ) + config_path = Path(self.config["config_files"][0]) + if not self.full_path.is_dir(): self.full_path.mkdir(parents=True, exist_ok=True) shutil.copy( - Path(self.config["config_files"][0]).name, - Path(self.full_path / self.config["config_files"][0]), + config_path.name, + Path(self.full_path / config_path.parts[-1]), ) return full_timerange + def check_if_new_training_required(self, training_timerange: str, + metadata: dict) -> Tuple[bool, str]: + + time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + + trained_timerange = TimeRange.parse_timerange(training_timerange) + + elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY + + trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY + trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY + start = datetime.datetime.utcfromtimestamp(trained_timerange.startts) + stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts) + + new_trained_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") + + retrain = elapsed_time > self.freqai_config['backtest_period'] + + if retrain: + coin, _ = metadata['pair'].split("/") + # set the new model_path + self.model_path = Path(self.full_path / str("sub-train" + "-" + + str(new_trained_timerange))) + + self.model_filename = "cb_" + coin.lower() + "_" + new_trained_timerange + # this is not persistent at the moment TODO + self.freqai_config['live_trained_timerange'] = new_trained_timerange + # enables persistence, but not fully implemented into save/load data yer + self.data['live_trained_timerange'] = new_trained_timerange + + return retrain, new_trained_timerange + + def download_new_data_for_retraining(self, new_timerange: str, metadata: dict) -> None: + + exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], + self.config, validate=False) + pairs = self.freqai_config['corr_pairlist'] + [metadata['pair']] + timerange = TimeRange.parse_timerange(new_timerange) + # data_handler = get_datahandler(datadir, data_format) + + refresh_backtest_ohlcv_data( + exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'], + datadir=self.config['datadir'], timerange=timerange, + new_pairs_days=self.config['new_pairs_days'], + erase=False, data_format=self.config['dataformat_ohlcv'], + trading_mode=self.config.get('trading_mode', 'spot'), + prepend=self.config.get('prepend_data', False) + ) + + def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any], + DataFrame]: + corr_dataframes: Dict[Any, Any] = {} + # pair_dataframes: Dict[Any, Any] = {} + pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']] + timerange = TimeRange.parse_timerange(new_timerange) + + for p in pairs: + corr_dataframes[p] = {} + for tf in self.freqai_config['timeframes']: + corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'], + timeframe=tf, + pair=p, timerange=timerange) + + base_dataframe = [dataframe for key, dataframe in corr_dataframes.items() + if metadata['pair'] in key] + + # [0] indexes the lowest tf for the basepair + return corr_dataframes, base_dataframe[0][self.config['timeframe']] + + def use_strategy_to_populate_indicators(self, strategy: IStrategy, metadata: dict, + corr_dataframes: dict, + dataframe: DataFrame) -> DataFrame: + + # dataframe = pair_dataframes[0] # this is the base tf pair df + + for tf in self.freqai_config["timeframes"]: + # dataframe = strategy.populate_any_indicators(metadata["pair"], dataframe.copy, + # tf, pair_dataframes[tf]) + for i in self.freqai_config["corr_pairlist"]: + dataframe = strategy.populate_any_indicators(i, + dataframe.copy(), + tf, + corr_dataframes[i][tf], + coin=i.split("/")[0] + "-" + ) + + return dataframe + def np_encoder(self, object): if isinstance(object, np.generic): return object.item() diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 16b6fd9f9..222061e2a 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -8,9 +8,9 @@ import numpy.typing as npt import pandas as pd from pandas import DataFrame -from freqtrade.data.dataprovider import DataProvider from freqtrade.enums import RunMode from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.strategy.interface import IStrategy pd.options.mode.chained_assignment = None @@ -33,15 +33,14 @@ class IFreqaiModel(ABC): self.data_split_parameters = config["freqai"]["data_split_parameters"] self.model_training_parameters = config["freqai"]["model_training_parameters"] self.feature_parameters = config["freqai"]["feature_parameters"] - self.backtest_timerange = config["timerange"] + # self.backtest_timerange = config["timerange"] self.time_last_trained = None self.current_time = None self.model = None self.predictions = None - self.live_trained_timerange = None - def start(self, dataframe: DataFrame, metadata: dict, dp: DataProvider) -> DataFrame: + def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame: """ Entry point to the FreqaiModel, it will train a new model if necesssary before making the prediction. @@ -57,11 +56,18 @@ class IFreqaiModel(ABC): the model. :metadata: pair metadataa coming from strategy. """ - self.pair = metadata["pair"] - self.dh = FreqaiDataKitchen(self.config, dataframe) - if dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE): - logger.info('testing live') + live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE) + + self.pair = metadata["pair"] + self.dh = FreqaiDataKitchen(self.config, dataframe, live) + + if live: + # logger.info('testing live') + self.start_live(dataframe, metadata, strategy) + + return (self.dh.full_predictions, self.dh.full_do_predict, + self.dh.full_target_mean, self.dh.full_target_std) logger.info("going to train %s timeranges", len(self.dh.training_timeranges)) @@ -98,6 +104,42 @@ class IFreqaiModel(ABC): return (self.dh.full_predictions, self.dh.full_do_predict, self.dh.full_target_mean, self.dh.full_target_std) + def start_live(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> None: + + self.dh.set_paths() + + file_exists = self.model_exists(metadata['pair'], + training_timerange=self.freqai_info[ + 'live_trained_timerange']) + + (retrain, + new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[ + 'live_trained_timerange'], + metadata) + + if retrain or not file_exists: + self.dh.download_new_data_for_retraining(new_trained_timerange, metadata) + # dataframe = download-data + corr_dataframes, pair_dataframes = self.dh.load_pairs_histories(new_trained_timerange, + metadata) + + unfiltered_dataframe = self.dh.use_strategy_to_populate_indicators(strategy, + metadata, + corr_dataframes, + pair_dataframes) + + self.model = self.train(unfiltered_dataframe, metadata) + self.dh.save_data(self.model) + + self.freqai_info + + self.model = self.dh.load_data() + preds, do_preds = self.predict(dataframe) + self.dh.append_predictions(preds, do_preds, len(dataframe)) + # dataframe should have len 1 here + + return + def make_labels(self, dataframe: DataFrame) -> DataFrame: """ User defines the labels here (target values). diff --git a/freqtrade/strategy/interface.py b/freqtrade/strategy/interface.py index 57afbf32a..e681d70bd 100644 --- a/freqtrade/strategy/interface.py +++ b/freqtrade/strategy/interface.py @@ -532,6 +532,22 @@ class IStrategy(ABC, HyperStrategyMixin): """ return None + def populate_any_indicators(self, pair: str, df: DataFrame, tf: str, + informative: DataFrame = None, coin: str = "") -> DataFrame: + """ + Function designed to automatically generate, name and merge features + from user indicated timeframes in the configuration file. User can add + additional features here, but must follow the naming convention. + Defined in IStrategy because Freqai needs to know it exists. + :params: + :pair: pair to be used as informative + :df: strategy dataframe which will receive merges from informatives + :tf: timeframe of the dataframe which will modify the feature names + :informative: the dataframe associated with the informative pair + :coin: the name of the coin which will modify the feature names. + """ + return df + ### # END - Intended to be overridden by strategy ### diff --git a/freqtrade/templates/ExamplePredictionModel.py b/freqtrade/templates/ExamplePredictionModel.py index 35f25775a..08f9d2ba9 100644 --- a/freqtrade/templates/ExamplePredictionModel.py +++ b/freqtrade/templates/ExamplePredictionModel.py @@ -124,7 +124,7 @@ class ExamplePredictionModel(IFreqaiModel): data (NaNs) or felt uncertain about data (PCA and DI index) """ - logger.info("--------------------Starting prediction--------------------") + # logger.info("--------------------Starting prediction--------------------") original_feature_list = self.dh.build_feature_list(self.config) filtered_dataframe, _ = self.dh.filter_features( @@ -151,8 +151,8 @@ class ExamplePredictionModel(IFreqaiModel): predictions = self.model.predict(self.dh.data_dictionary["prediction_features"]) # compute the non-standardized predictions - predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"] + self.dh.predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"] - logger.info("--------------------Finished prediction--------------------") + # logger.info("--------------------Finished prediction--------------------") - return (predictions, self.dh.do_predict) + return (self.dh.predictions, self.dh.do_predict) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py index 873b31115..13309d8c3 100644 --- a/freqtrade/templates/FreqaiExampleStrategy.py +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -44,13 +44,16 @@ class FreqaiExampleStrategy(IStrategy): stoploss = -0.05 use_sell_signal = True - startup_candle_count: int = 1000 + startup_candle_count: int = 300 def informative_pairs(self): - pairs = self.freqai_info["corr_pairlist"] + pairs = self.config["freqai"]["corr_pairlist"] informative_pairs = [] - for tf in self.timeframes: - informative_pairs.append([(pair, tf) for pair in pairs]) + for tf in self.config["freqai"]["timeframes"]: + # informative_pairs.append((self.pair, tf)) + # informative_pairs.append([(pair, tf) for pair in pairs]) + for pair in pairs: + informative_pairs.append((pair, tf)) return informative_pairs def populate_any_indicators(self, pair, df, tf, informative=None, coin=""): @@ -129,6 +132,7 @@ class FreqaiExampleStrategy(IStrategy): # the configuration file parameters are stored here self.freqai_info = self.config["freqai"] + self.pair = metadata['pair'] # the model is instantiated here self.model = CustomModel(self.config) @@ -138,12 +142,13 @@ class FreqaiExampleStrategy(IStrategy): # the following loops are necessary for building the features # indicated by the user in the configuration file. for tf in self.freqai_info["timeframes"]: - dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf) - for i in self.freqai_info["corr_pairlist"]: + # dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf) + for pair in self.freqai_info["corr_pairlist"]: dataframe = self.populate_any_indicators( - i, dataframe.copy(), tf, coin=i.split("/")[0] + "-" + pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-" ) + print('dataframe_built') # the model will return 4 values, its prediction, an indication of whether or not the # prediction should be accepted, the target mean/std values from the labels used during # each training period. @@ -152,7 +157,7 @@ class FreqaiExampleStrategy(IStrategy): dataframe["do_predict"], dataframe["target_mean"], dataframe["target_std"], - ) = self.model.bridge.start(dataframe, metadata) + ) = self.model.bridge.start(dataframe, metadata, self) dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 0.5 dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.5