Enable hourly/minute retraining in live/dry. Suppress catboost folder output. Update config + constants + docs to reflect updates.
This commit is contained in:
parent
42d95af829
commit
af0cc21af9
@ -71,7 +71,8 @@
|
|||||||
"DI_threshold": 1,
|
"DI_threshold": 1,
|
||||||
"weight_factor": 0,
|
"weight_factor": 0,
|
||||||
"principal_component_analysis": false,
|
"principal_component_analysis": false,
|
||||||
"use_SVM_to_remove_outliers": false
|
"use_SVM_to_remove_outliers": false,
|
||||||
|
"stratify": 0
|
||||||
},
|
},
|
||||||
"data_split_parameters": {
|
"data_split_parameters": {
|
||||||
"test_size": 0.25,
|
"test_size": 0.25,
|
||||||
|
@ -151,7 +151,8 @@ no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_
|
|||||||
|
|
||||||
Users define the backtesting timerange with the typical `--timerange` parameter in the user
|
Users define the backtesting timerange with the typical `--timerange` parameter in the user
|
||||||
configuration file. `train_period` is the duration of the sliding training window, while
|
configuration file. `train_period` is the duration of the sliding training window, while
|
||||||
`backtest_period` is the sliding backtesting window, both in number of days. In the present example,
|
`backtest_period` is the sliding backtesting window, both in number of days (backtest_period can be
|
||||||
|
a float to indicate sub daily retraining in live/dry mode). In the present example,
|
||||||
the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days.
|
the user is asking Freqai to use a training period of 30 days and backtest the subsequent 7 days.
|
||||||
This means that if the user sets `--timerange 20210501-20210701`,
|
This means that if the user sets `--timerange 20210501-20210701`,
|
||||||
Freqai will train 8 separate models (because the full range comprises 8 weeks),
|
Freqai will train 8 separate models (because the full range comprises 8 weeks),
|
||||||
@ -347,6 +348,22 @@ Freqai will train an SVM on the training data (or components if the user activat
|
|||||||
`principal_component_analysis`) and remove any data point that it deems to be sit beyond the
|
`principal_component_analysis`) and remove any data point that it deems to be sit beyond the
|
||||||
feature space.
|
feature space.
|
||||||
|
|
||||||
|
## Stratifying the data
|
||||||
|
|
||||||
|
The user can stratify the training/testing data using:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"freqai": {
|
||||||
|
"feature_parameters" : {
|
||||||
|
"stratify": 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
which will split the data chronolocially so that every X data points is a testing data point. In the
|
||||||
|
present example, the user is asking for every third data point in the dataframe to be used for
|
||||||
|
testing, the other points are used for training.
|
||||||
|
|
||||||
## Additional information
|
## Additional information
|
||||||
|
|
||||||
### Feature standardization
|
### Feature standardization
|
||||||
|
@ -438,7 +438,7 @@ CONF_SCHEMA = {
|
|||||||
"properties": {
|
"properties": {
|
||||||
"timeframes": {"type": "list"},
|
"timeframes": {"type": "list"},
|
||||||
"train_period": {"type": "integer", "default": 0},
|
"train_period": {"type": "integer", "default": 0},
|
||||||
"backtest_period": {"type": "integer", "default": 7},
|
"backtest_period": {"type": "float", "default": 7},
|
||||||
"identifier": {"type": "str", "default": "example"},
|
"identifier": {"type": "str", "default": "example"},
|
||||||
"live_trained_timerange": {"type": "str"},
|
"live_trained_timerange": {"type": "str"},
|
||||||
"live_full_backtestrange": {"type": "str"},
|
"live_full_backtestrange": {"type": "str"},
|
||||||
@ -451,7 +451,7 @@ CONF_SCHEMA = {
|
|||||||
"DI_threshold": {"type": "integer", "default": 0},
|
"DI_threshold": {"type": "integer", "default": 0},
|
||||||
"weight_factor": {"type": "number", "default": 0},
|
"weight_factor": {"type": "number", "default": 0},
|
||||||
"principal_component_analysis": {"type": "boolean", "default": False},
|
"principal_component_analysis": {"type": "boolean", "default": False},
|
||||||
"remove_outliers": {"type": "boolean", "default": False},
|
"use_SVM_to_remove_outliers": {"type": "boolean", "default": False},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
"data_split_parameters": {
|
"data_split_parameters": {
|
||||||
|
@ -689,50 +689,58 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
return full_timerange
|
return full_timerange
|
||||||
|
|
||||||
def check_if_new_training_required(self, training_timerange: str,
|
def check_if_new_training_required(self, trained_timerange: TimeRange,
|
||||||
metadata: dict) -> Tuple[bool, str]:
|
metadata: dict,
|
||||||
|
timestamp: int = 0) -> Tuple[bool, TimeRange, int]:
|
||||||
|
|
||||||
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
|
||||||
|
|
||||||
if training_timerange: # user passed no live_trained_timerange in config
|
if trained_timerange.startts != 0:
|
||||||
trained_timerange = TimeRange.parse_timerange(training_timerange)
|
# trained_timerange = TimeRange.parse_timerange(training_timerange)
|
||||||
|
# keep hour available incase user wants to train multiple times per day
|
||||||
|
# training_timerange is a str for day range only, so we add the extra hours
|
||||||
|
# original_stop_seconds = trained_timerange.stopts
|
||||||
|
# trained_timerange.stopts += int(timestamp - original_stop_seconds)
|
||||||
|
# trained_timerange.startts += int(timestamp - original_stop_seconds)
|
||||||
elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
|
elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
|
||||||
trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
|
|
||||||
trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
|
|
||||||
retrain = elapsed_time > self.freqai_config['backtest_period']
|
retrain = elapsed_time > self.freqai_config['backtest_period']
|
||||||
else:
|
if retrain:
|
||||||
trained_timerange = TimeRange.parse_timerange("20000101-20000201")
|
trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
|
||||||
|
trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
|
||||||
|
else: # user passed no live_trained_timerange in config
|
||||||
|
trained_timerange = TimeRange.parse_timerange("20000101-20000201") # arbitrary date
|
||||||
trained_timerange.startts = int(time - self.freqai_config['train_period'] *
|
trained_timerange.startts = int(time - self.freqai_config['train_period'] *
|
||||||
SECONDS_IN_DAY)
|
SECONDS_IN_DAY)
|
||||||
trained_timerange.stopts = int(time)
|
trained_timerange.stopts = int(time)
|
||||||
retrain = True
|
retrain = True
|
||||||
|
|
||||||
start = datetime.datetime.utcfromtimestamp(trained_timerange.startts)
|
timestamp = trained_timerange.stopts
|
||||||
stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts)
|
# start = datetime.datetime.utcfromtimestamp(trained_timerange.startts)
|
||||||
new_trained_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
|
# stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts)
|
||||||
|
# new_trained_timerange_str = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
|
||||||
|
|
||||||
if retrain:
|
if retrain:
|
||||||
coin, _ = metadata['pair'].split("/")
|
coin, _ = metadata['pair'].split("/")
|
||||||
# set the new model_path
|
# set the new model_path
|
||||||
self.model_path = Path(self.full_path / str("sub-train" + "-" +
|
self.model_path = Path(self.full_path / str("sub-train" + "-" +
|
||||||
str(new_trained_timerange)))
|
str(timestamp)))
|
||||||
|
|
||||||
self.model_filename = "cb_" + coin.lower() + "_" + new_trained_timerange
|
self.model_filename = "cb_" + coin.lower() + "_" + str(timestamp)
|
||||||
# this is not persistent at the moment TODO
|
# this is not persistent at the moment TODO
|
||||||
self.freqai_config['live_trained_timerange'] = new_trained_timerange
|
self.freqai_config['live_trained_timerange'] = str(timestamp)
|
||||||
# enables persistence, but not fully implemented into save/load data yer
|
# enables persistence, but not fully implemented into save/load data yer
|
||||||
self.data['live_trained_timerange'] = new_trained_timerange
|
self.data['live_trained_timerange'] = str(timestamp)
|
||||||
|
|
||||||
return retrain, new_trained_timerange
|
return retrain, trained_timerange, timestamp
|
||||||
|
|
||||||
def download_new_data_for_retraining(self, new_timerange: str, metadata: dict) -> None:
|
def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict) -> None:
|
||||||
|
|
||||||
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
|
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
|
||||||
self.config, validate=False)
|
self.config, validate=False)
|
||||||
pairs = self.freqai_config['corr_pairlist']
|
pairs = self.freqai_config['corr_pairlist']
|
||||||
if metadata['pair'] not in pairs:
|
if metadata['pair'] not in pairs:
|
||||||
pairs += metadata['pair'] # dont include pair twice
|
pairs += metadata['pair'] # dont include pair twice
|
||||||
timerange = TimeRange.parse_timerange(new_timerange)
|
# timerange = TimeRange.parse_timerange(new_timerange)
|
||||||
|
|
||||||
refresh_backtest_ohlcv_data(
|
refresh_backtest_ohlcv_data(
|
||||||
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
|
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
|
||||||
@ -743,12 +751,12 @@ class FreqaiDataKitchen:
|
|||||||
prepend=self.config.get('prepend_data', False)
|
prepend=self.config.get('prepend_data', False)
|
||||||
)
|
)
|
||||||
|
|
||||||
def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any],
|
def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
|
||||||
DataFrame]:
|
DataFrame]:
|
||||||
corr_dataframes: Dict[Any, Any] = {}
|
corr_dataframes: Dict[Any, Any] = {}
|
||||||
base_dataframes: Dict[Any, Any] = {}
|
base_dataframes: Dict[Any, Any] = {}
|
||||||
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
|
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
|
||||||
timerange = TimeRange.parse_timerange(new_timerange)
|
# timerange = TimeRange.parse_timerange(new_timerange)
|
||||||
|
|
||||||
for tf in self.freqai_config['timeframes']:
|
for tf in self.freqai_config['timeframes']:
|
||||||
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
|
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
|
||||||
@ -763,10 +771,6 @@ class FreqaiDataKitchen:
|
|||||||
timeframe=tf,
|
timeframe=tf,
|
||||||
pair=p, timerange=timerange)
|
pair=p, timerange=timerange)
|
||||||
|
|
||||||
# base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
|
|
||||||
# if metadata['pair'] in key]
|
|
||||||
|
|
||||||
# [0] indexes the lowest tf for the basepair
|
|
||||||
return corr_dataframes, base_dataframes
|
return corr_dataframes, base_dataframes
|
||||||
|
|
||||||
def use_strategy_to_populate_indicators(self, strategy: IStrategy,
|
def use_strategy_to_populate_indicators(self, strategy: IStrategy,
|
||||||
|
@ -11,6 +11,7 @@ import numpy.typing as npt
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
|
from freqtrade.configuration import TimeRange
|
||||||
from freqtrade.enums import RunMode
|
from freqtrade.enums import RunMode
|
||||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||||
from freqtrade.strategy.interface import IStrategy
|
from freqtrade.strategy.interface import IStrategy
|
||||||
@ -63,6 +64,12 @@ class IFreqaiModel(ABC):
|
|||||||
self.training_on_separate_thread = False
|
self.training_on_separate_thread = False
|
||||||
self.retrain = False
|
self.retrain = False
|
||||||
self.first = True
|
self.first = True
|
||||||
|
self.timestamp = 0
|
||||||
|
if self.freqai_info['live_trained_timerange']:
|
||||||
|
self.new_trained_timerange = TimeRange.parse_timerange(
|
||||||
|
self.freqai_info['live_trained_timerange'])
|
||||||
|
else:
|
||||||
|
self.new_trained_timerange = TimeRange()
|
||||||
|
|
||||||
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
|
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
|
||||||
"""
|
"""
|
||||||
@ -150,9 +157,10 @@ class IFreqaiModel(ABC):
|
|||||||
if not self.training_on_separate_thread:
|
if not self.training_on_separate_thread:
|
||||||
# this will also prevent other pairs from trying to train simultaneously.
|
# this will also prevent other pairs from trying to train simultaneously.
|
||||||
(self.retrain,
|
(self.retrain,
|
||||||
self.new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[
|
self.new_trained_timerange,
|
||||||
'live_trained_timerange'],
|
self.timestamp) = self.dh.check_if_new_training_required(self.new_trained_timerange,
|
||||||
metadata)
|
metadata,
|
||||||
|
timestamp=self.timestamp)
|
||||||
else:
|
else:
|
||||||
logger.info("FreqAI training a new model on background thread.")
|
logger.info("FreqAI training a new model on background thread.")
|
||||||
self.retrain = False
|
self.retrain = False
|
||||||
@ -250,7 +258,7 @@ class IFreqaiModel(ABC):
|
|||||||
:param pair: pair e.g. BTC/USD
|
:param pair: pair e.g. BTC/USD
|
||||||
:param path: path to model
|
:param path: path to model
|
||||||
"""
|
"""
|
||||||
if self.live and training_timerange is None:
|
if self.live and training_timerange == "":
|
||||||
return False
|
return False
|
||||||
coin, _ = pair.split("/")
|
coin, _ = pair.split("/")
|
||||||
self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange
|
self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange
|
||||||
@ -263,7 +271,7 @@ class IFreqaiModel(ABC):
|
|||||||
return file_exists
|
return file_exists
|
||||||
|
|
||||||
@threaded
|
@threaded
|
||||||
def retrain_model_on_separate_thread(self, new_trained_timerange: str, metadata: dict,
|
def retrain_model_on_separate_thread(self, new_trained_timerange: TimeRange, metadata: dict,
|
||||||
strategy: IStrategy):
|
strategy: IStrategy):
|
||||||
|
|
||||||
# with nostdout():
|
# with nostdout():
|
||||||
@ -282,7 +290,7 @@ class IFreqaiModel(ABC):
|
|||||||
self.training_on_separate_thread = False
|
self.training_on_separate_thread = False
|
||||||
self.retrain = False
|
self.retrain = False
|
||||||
|
|
||||||
def train_model_in_series(self, new_trained_timerange: str, metadata: dict,
|
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
|
||||||
strategy: IStrategy):
|
strategy: IStrategy):
|
||||||
|
|
||||||
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)
|
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)
|
||||||
|
@ -101,6 +101,7 @@ class CatboostPredictionModel(IFreqaiModel):
|
|||||||
)
|
)
|
||||||
|
|
||||||
model = CatBoostRegressor(
|
model = CatBoostRegressor(
|
||||||
|
allow_writing_files=False,
|
||||||
verbose=100, early_stopping_rounds=400, **self.model_training_parameters
|
verbose=100, early_stopping_rounds=400, **self.model_training_parameters
|
||||||
)
|
)
|
||||||
model.fit(X=train_data, eval_set=test_data)
|
model.fit(X=train_data, eval_set=test_data)
|
||||||
|
Loading…
Reference in New Issue
Block a user