diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json index a895a7341..ed3782775 100644 --- a/config_examples/config_freqai.example.json +++ b/config_examples/config_freqai.example.json @@ -71,7 +71,8 @@ "DI_threshold": 1, "weight_factor": 0, "principal_component_analysis": false, - "use_SVM_to_remove_outliers": false + "use_SVM_to_remove_outliers": false, + "stratify": 0 }, "data_split_parameters": { "test_size": 0.25, diff --git a/docs/freqai.md b/docs/freqai.md index 8a37e7d66..606b88912 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -151,7 +151,8 @@ no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_ Users define the backtesting timerange with the typical `--timerange` parameter in the user configuration file. `train_period` is the duration of the sliding training window, while -`backtest_period` is the sliding backtesting window, both in number of days. In the present example, +`backtest_period` is the sliding backtesting window, both in number of days (backtest_period can be +a float to indicate sub daily retraining in live/dry mode). In the present example, the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days. This means that if the user sets `--timerange 20210501-20210701`, Freqai will train 8 separate models (because the full range comprises 8 weeks), @@ -347,6 +348,22 @@ Freqai will train an SVM on the training data (or components if the user activat `principal_component_analysis`) and remove any data point that it deems to be sit beyond the feature space. +## Stratifying the data + +The user can stratify the training/testing data using: + +```json + "freqai": { + "feature_parameters" : { + "stratify": 3 + } + } +``` + +which will split the data chronolocially so that every X data points is a testing data point. In the +present example, the user is asking for every third data point in the dataframe to be used for +testing, the other points are used for training. + ## Additional information ### Feature standardization diff --git a/freqtrade/constants.py b/freqtrade/constants.py index 686991e2c..05581cc3a 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -438,7 +438,7 @@ CONF_SCHEMA = { "properties": { "timeframes": {"type": "list"}, "train_period": {"type": "integer", "default": 0}, - "backtest_period": {"type": "integer", "default": 7}, + "backtest_period": {"type": "float", "default": 7}, "identifier": {"type": "str", "default": "example"}, "live_trained_timerange": {"type": "str"}, "live_full_backtestrange": {"type": "str"}, @@ -451,7 +451,7 @@ CONF_SCHEMA = { "DI_threshold": {"type": "integer", "default": 0}, "weight_factor": {"type": "number", "default": 0}, "principal_component_analysis": {"type": "boolean", "default": False}, - "remove_outliers": {"type": "boolean", "default": False}, + "use_SVM_to_remove_outliers": {"type": "boolean", "default": False}, }, }, "data_split_parameters": { diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index f589a1c89..e09a2d0d5 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -689,50 +689,58 @@ class FreqaiDataKitchen: return full_timerange - def check_if_new_training_required(self, training_timerange: str, - metadata: dict) -> Tuple[bool, str]: + def check_if_new_training_required(self, trained_timerange: TimeRange, + metadata: dict, + timestamp: int = 0) -> Tuple[bool, TimeRange, int]: time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() - if training_timerange: # user passed no live_trained_timerange in config - trained_timerange = TimeRange.parse_timerange(training_timerange) + if trained_timerange.startts != 0: + # trained_timerange = TimeRange.parse_timerange(training_timerange) + # keep hour available incase user wants to train multiple times per day + # training_timerange is a str for day range only, so we add the extra hours + # original_stop_seconds = trained_timerange.stopts + # trained_timerange.stopts += int(timestamp - original_stop_seconds) + # trained_timerange.startts += int(timestamp - original_stop_seconds) elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY - trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY - trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY retrain = elapsed_time > self.freqai_config['backtest_period'] - else: - trained_timerange = TimeRange.parse_timerange("20000101-20000201") + if retrain: + trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY + trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY + else: # user passed no live_trained_timerange in config + trained_timerange = TimeRange.parse_timerange("20000101-20000201") # arbitrary date trained_timerange.startts = int(time - self.freqai_config['train_period'] * SECONDS_IN_DAY) trained_timerange.stopts = int(time) retrain = True - start = datetime.datetime.utcfromtimestamp(trained_timerange.startts) - stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts) - new_trained_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") + timestamp = trained_timerange.stopts + # start = datetime.datetime.utcfromtimestamp(trained_timerange.startts) + # stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts) + # new_trained_timerange_str = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") if retrain: coin, _ = metadata['pair'].split("/") # set the new model_path self.model_path = Path(self.full_path / str("sub-train" + "-" + - str(new_trained_timerange))) + str(timestamp))) - self.model_filename = "cb_" + coin.lower() + "_" + new_trained_timerange + self.model_filename = "cb_" + coin.lower() + "_" + str(timestamp) # this is not persistent at the moment TODO - self.freqai_config['live_trained_timerange'] = new_trained_timerange + self.freqai_config['live_trained_timerange'] = str(timestamp) # enables persistence, but not fully implemented into save/load data yer - self.data['live_trained_timerange'] = new_trained_timerange + self.data['live_trained_timerange'] = str(timestamp) - return retrain, new_trained_timerange + return retrain, trained_timerange, timestamp - def download_new_data_for_retraining(self, new_timerange: str, metadata: dict) -> None: + def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict) -> None: exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], self.config, validate=False) pairs = self.freqai_config['corr_pairlist'] if metadata['pair'] not in pairs: pairs += metadata['pair'] # dont include pair twice - timerange = TimeRange.parse_timerange(new_timerange) + # timerange = TimeRange.parse_timerange(new_timerange) refresh_backtest_ohlcv_data( exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'], @@ -743,12 +751,12 @@ class FreqaiDataKitchen: prepend=self.config.get('prepend_data', False) ) - def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any], - DataFrame]: + def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any], + DataFrame]: corr_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {} pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']] - timerange = TimeRange.parse_timerange(new_timerange) + # timerange = TimeRange.parse_timerange(new_timerange) for tf in self.freqai_config['timeframes']: base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'], @@ -763,10 +771,6 @@ class FreqaiDataKitchen: timeframe=tf, pair=p, timerange=timerange) - # base_dataframe = [dataframe for key, dataframe in corr_dataframes.items() - # if metadata['pair'] in key] - - # [0] indexes the lowest tf for the basepair return corr_dataframes, base_dataframes def use_strategy_to_populate_indicators(self, strategy: IStrategy, diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index f1dd5550a..6e597531b 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -11,6 +11,7 @@ import numpy.typing as npt import pandas as pd from pandas import DataFrame +from freqtrade.configuration import TimeRange from freqtrade.enums import RunMode from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.strategy.interface import IStrategy @@ -63,6 +64,12 @@ class IFreqaiModel(ABC): self.training_on_separate_thread = False self.retrain = False self.first = True + self.timestamp = 0 + if self.freqai_info['live_trained_timerange']: + self.new_trained_timerange = TimeRange.parse_timerange( + self.freqai_info['live_trained_timerange']) + else: + self.new_trained_timerange = TimeRange() def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame: """ @@ -150,9 +157,10 @@ class IFreqaiModel(ABC): if not self.training_on_separate_thread: # this will also prevent other pairs from trying to train simultaneously. (self.retrain, - self.new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[ - 'live_trained_timerange'], - metadata) + self.new_trained_timerange, + self.timestamp) = self.dh.check_if_new_training_required(self.new_trained_timerange, + metadata, + timestamp=self.timestamp) else: logger.info("FreqAI training a new model on background thread.") self.retrain = False @@ -250,7 +258,7 @@ class IFreqaiModel(ABC): :param pair: pair e.g. BTC/USD :param path: path to model """ - if self.live and training_timerange is None: + if self.live and training_timerange == "": return False coin, _ = pair.split("/") self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange @@ -263,7 +271,7 @@ class IFreqaiModel(ABC): return file_exists @threaded - def retrain_model_on_separate_thread(self, new_trained_timerange: str, metadata: dict, + def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict, strategy: IStrategy): # with nostdout(): @@ -282,7 +290,7 @@ class IFreqaiModel(ABC): self.training_on_separate_thread = False self.retrain = False - def train_model_in_series(self, new_trained_timerange: str, metadata: dict, + def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict, strategy: IStrategy): self.dh.download_new_data_for_retraining(new_trained_timerange, metadata) diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index 8550f3f15..3dad6add6 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -101,6 +101,7 @@ class CatboostPredictionModel(IFreqaiModel): ) model = CatBoostRegressor( + allow_writing_files=False, verbose=100, early_stopping_rounds=400, **self.model_training_parameters ) model.fit(X=train_data, eval_set=test_data)