Enable hourly/minute retraining in live/dry. Suppress catboost folder output. Update config + constants + docs to reflect updates.

This commit is contained in:
robcaulk 2022-05-23 00:06:26 +02:00
parent 42d95af829
commit af0cc21af9
6 changed files with 66 additions and 35 deletions

View File

@ -71,7 +71,8 @@
"DI_threshold": 1, "DI_threshold": 1,
"weight_factor": 0, "weight_factor": 0,
"principal_component_analysis": false, "principal_component_analysis": false,
"use_SVM_to_remove_outliers": false "use_SVM_to_remove_outliers": false,
"stratify": 0
}, },
"data_split_parameters": { "data_split_parameters": {
"test_size": 0.25, "test_size": 0.25,

View File

@ -151,7 +151,8 @@ no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_
Users define the backtesting timerange with the typical `--timerange` parameter in the user Users define the backtesting timerange with the typical `--timerange` parameter in the user
configuration file. `train_period` is the duration of the sliding training window, while configuration file. `train_period` is the duration of the sliding training window, while
`backtest_period` is the sliding backtesting window, both in number of days. In the present example, `backtest_period` is the sliding backtesting window, both in number of days (backtest_period can be
a float to indicate sub daily retraining in live/dry mode). In the present example,
the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days. the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days.
This means that if the user sets `--timerange 20210501-20210701`, This means that if the user sets `--timerange 20210501-20210701`,
Freqai will train 8 separate models (because the full range comprises 8 weeks), Freqai will train 8 separate models (because the full range comprises 8 weeks),
@ -347,6 +348,22 @@ Freqai will train an SVM on the training data (or components if the user activat
`principal_component_analysis`) and remove any data point that it deems to be sit beyond the `principal_component_analysis`) and remove any data point that it deems to be sit beyond the
feature space. feature space.
## Stratifying the data
The user can stratify the training/testing data using:
```json
"freqai": {
"feature_parameters" : {
"stratify": 3
}
}
```
which will split the data chronolocially so that every X data points is a testing data point. In the
present example, the user is asking for every third data point in the dataframe to be used for
testing, the other points are used for training.
## Additional information ## Additional information
### Feature standardization ### Feature standardization

View File

@ -438,7 +438,7 @@ CONF_SCHEMA = {
"properties": { "properties": {
"timeframes": {"type": "list"}, "timeframes": {"type": "list"},
"train_period": {"type": "integer", "default": 0}, "train_period": {"type": "integer", "default": 0},
"backtest_period": {"type": "integer", "default": 7}, "backtest_period": {"type": "float", "default": 7},
"identifier": {"type": "str", "default": "example"}, "identifier": {"type": "str", "default": "example"},
"live_trained_timerange": {"type": "str"}, "live_trained_timerange": {"type": "str"},
"live_full_backtestrange": {"type": "str"}, "live_full_backtestrange": {"type": "str"},
@ -451,7 +451,7 @@ CONF_SCHEMA = {
"DI_threshold": {"type": "integer", "default": 0}, "DI_threshold": {"type": "integer", "default": 0},
"weight_factor": {"type": "number", "default": 0}, "weight_factor": {"type": "number", "default": 0},
"principal_component_analysis": {"type": "boolean", "default": False}, "principal_component_analysis": {"type": "boolean", "default": False},
"remove_outliers": {"type": "boolean", "default": False}, "use_SVM_to_remove_outliers": {"type": "boolean", "default": False},
}, },
}, },
"data_split_parameters": { "data_split_parameters": {

View File

@ -689,50 +689,58 @@ class FreqaiDataKitchen:
return full_timerange return full_timerange
def check_if_new_training_required(self, training_timerange: str, def check_if_new_training_required(self, trained_timerange: TimeRange,
metadata: dict) -> Tuple[bool, str]: metadata: dict,
timestamp: int = 0) -> Tuple[bool, TimeRange, int]:
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
if training_timerange: # user passed no live_trained_timerange in config if trained_timerange.startts != 0:
trained_timerange = TimeRange.parse_timerange(training_timerange) # trained_timerange = TimeRange.parse_timerange(training_timerange)
# keep hour available incase user wants to train multiple times per day
# training_timerange is a str for day range only, so we add the extra hours
# original_stop_seconds = trained_timerange.stopts
# trained_timerange.stopts += int(timestamp - original_stop_seconds)
# trained_timerange.startts += int(timestamp - original_stop_seconds)
elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
retrain = elapsed_time > self.freqai_config['backtest_period'] retrain = elapsed_time > self.freqai_config['backtest_period']
else: if retrain:
trained_timerange = TimeRange.parse_timerange("20000101-20000201") trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
else: # user passed no live_trained_timerange in config
trained_timerange = TimeRange.parse_timerange("20000101-20000201") # arbitrary date
trained_timerange.startts = int(time - self.freqai_config['train_period'] * trained_timerange.startts = int(time - self.freqai_config['train_period'] *
SECONDS_IN_DAY) SECONDS_IN_DAY)
trained_timerange.stopts = int(time) trained_timerange.stopts = int(time)
retrain = True retrain = True
start = datetime.datetime.utcfromtimestamp(trained_timerange.startts) timestamp = trained_timerange.stopts
stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts) # start = datetime.datetime.utcfromtimestamp(trained_timerange.startts)
new_trained_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") # stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts)
# new_trained_timerange_str = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
if retrain: if retrain:
coin, _ = metadata['pair'].split("/") coin, _ = metadata['pair'].split("/")
# set the new model_path # set the new model_path
self.model_path = Path(self.full_path / str("sub-train" + "-" + self.model_path = Path(self.full_path / str("sub-train" + "-" +
str(new_trained_timerange))) str(timestamp)))
self.model_filename = "cb_" + coin.lower() + "_" + new_trained_timerange self.model_filename = "cb_" + coin.lower() + "_" + str(timestamp)
# this is not persistent at the moment TODO # this is not persistent at the moment TODO
self.freqai_config['live_trained_timerange'] = new_trained_timerange self.freqai_config['live_trained_timerange'] = str(timestamp)
# enables persistence, but not fully implemented into save/load data yer # enables persistence, but not fully implemented into save/load data yer
self.data['live_trained_timerange'] = new_trained_timerange self.data['live_trained_timerange'] = str(timestamp)
return retrain, new_trained_timerange return retrain, trained_timerange, timestamp
def download_new_data_for_retraining(self, new_timerange: str, metadata: dict) -> None: def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict) -> None:
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'], exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
self.config, validate=False) self.config, validate=False)
pairs = self.freqai_config['corr_pairlist'] pairs = self.freqai_config['corr_pairlist']
if metadata['pair'] not in pairs: if metadata['pair'] not in pairs:
pairs += metadata['pair'] # dont include pair twice pairs += metadata['pair'] # dont include pair twice
timerange = TimeRange.parse_timerange(new_timerange) # timerange = TimeRange.parse_timerange(new_timerange)
refresh_backtest_ohlcv_data( refresh_backtest_ohlcv_data(
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'], exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
@ -743,12 +751,12 @@ class FreqaiDataKitchen:
prepend=self.config.get('prepend_data', False) prepend=self.config.get('prepend_data', False)
) )
def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any], def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
DataFrame]: DataFrame]:
corr_dataframes: Dict[Any, Any] = {} corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {}
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']] pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
timerange = TimeRange.parse_timerange(new_timerange) # timerange = TimeRange.parse_timerange(new_timerange)
for tf in self.freqai_config['timeframes']: for tf in self.freqai_config['timeframes']:
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'], base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
@ -763,10 +771,6 @@ class FreqaiDataKitchen:
timeframe=tf, timeframe=tf,
pair=p, timerange=timerange) pair=p, timerange=timerange)
# base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
# if metadata['pair'] in key]
# [0] indexes the lowest tf for the basepair
return corr_dataframes, base_dataframes return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(self, strategy: IStrategy, def use_strategy_to_populate_indicators(self, strategy: IStrategy,

View File

@ -11,6 +11,7 @@ import numpy.typing as npt
import pandas as pd import pandas as pd
from pandas import DataFrame from pandas import DataFrame
from freqtrade.configuration import TimeRange
from freqtrade.enums import RunMode from freqtrade.enums import RunMode
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.strategy.interface import IStrategy from freqtrade.strategy.interface import IStrategy
@ -63,6 +64,12 @@ class IFreqaiModel(ABC):
self.training_on_separate_thread = False self.training_on_separate_thread = False
self.retrain = False self.retrain = False
self.first = True self.first = True
self.timestamp = 0
if self.freqai_info['live_trained_timerange']:
self.new_trained_timerange = TimeRange.parse_timerange(
self.freqai_info['live_trained_timerange'])
else:
self.new_trained_timerange = TimeRange()
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame: def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
""" """
@ -150,9 +157,10 @@ class IFreqaiModel(ABC):
if not self.training_on_separate_thread: if not self.training_on_separate_thread:
# this will also prevent other pairs from trying to train simultaneously. # this will also prevent other pairs from trying to train simultaneously.
(self.retrain, (self.retrain,
self.new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[ self.new_trained_timerange,
'live_trained_timerange'], self.timestamp) = self.dh.check_if_new_training_required(self.new_trained_timerange,
metadata) metadata,
timestamp=self.timestamp)
else: else:
logger.info("FreqAI training a new model on background thread.") logger.info("FreqAI training a new model on background thread.")
self.retrain = False self.retrain = False
@ -250,7 +258,7 @@ class IFreqaiModel(ABC):
:param pair: pair e.g. BTC/USD :param pair: pair e.g. BTC/USD
:param path: path to model :param path: path to model
""" """
if self.live and training_timerange is None: if self.live and training_timerange == "":
return False return False
coin, _ = pair.split("/") coin, _ = pair.split("/")
self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange
@ -263,7 +271,7 @@ class IFreqaiModel(ABC):
return file_exists return file_exists
@threaded @threaded
def retrain_model_on_separate_thread(self, new_trained_timerange: str, metadata: dict, def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,
strategy: IStrategy): strategy: IStrategy):
# with nostdout(): # with nostdout():
@ -282,7 +290,7 @@ class IFreqaiModel(ABC):
self.training_on_separate_thread = False self.training_on_separate_thread = False
self.retrain = False self.retrain = False
def train_model_in_series(self, new_trained_timerange: str, metadata: dict, def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
strategy: IStrategy): strategy: IStrategy):
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata) self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)

View File

@ -101,6 +101,7 @@ class CatboostPredictionModel(IFreqaiModel):
) )
model = CatBoostRegressor( model = CatBoostRegressor(
allow_writing_files=False,
verbose=100, early_stopping_rounds=400, **self.model_training_parameters verbose=100, early_stopping_rounds=400, **self.model_training_parameters
) )
model.fit(X=train_data, eval_set=test_data) model.fit(X=train_data, eval_set=test_data)