Merge pull request #8210 from freqtrade/clean-data-drawer
Allow user to control number of historical model files
This commit is contained in:
commit
ac2a2512ef
@ -48,7 +48,7 @@
|
|||||||
],
|
],
|
||||||
"freqai": {
|
"freqai": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"purge_old_models": true,
|
"purge_old_models": 2,
|
||||||
"train_period_days": 15,
|
"train_period_days": 15,
|
||||||
"backtest_period_days": 7,
|
"backtest_period_days": 7,
|
||||||
"live_retrain_hours": 0,
|
"live_retrain_hours": 0,
|
||||||
|
@ -9,7 +9,7 @@ FreqAI is configured through the typical [Freqtrade config file](configuration.m
|
|||||||
```json
|
```json
|
||||||
"freqai": {
|
"freqai": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"purge_old_models": true,
|
"purge_old_models": 2,
|
||||||
"train_period_days": 30,
|
"train_period_days": 30,
|
||||||
"backtest_period_days": 7,
|
"backtest_period_days": 7,
|
||||||
"identifier" : "unique-id",
|
"identifier" : "unique-id",
|
||||||
|
@ -15,7 +15,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
|
|||||||
| `identifier` | **Required.** <br> A unique ID for the current model. If models are saved to disk, the `identifier` allows for reloading specific pre-trained models/data. <br> **Datatype:** String.
|
| `identifier` | **Required.** <br> A unique ID for the current model. If models are saved to disk, the `identifier` allows for reloading specific pre-trained models/data. <br> **Datatype:** String.
|
||||||
| `live_retrain_hours` | Frequency of retraining during dry/live runs. <br> **Datatype:** Float > 0. <br> Default: `0` (models retrain as often as possible).
|
| `live_retrain_hours` | Frequency of retraining during dry/live runs. <br> **Datatype:** Float > 0. <br> Default: `0` (models retrain as often as possible).
|
||||||
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire).
|
| `expiration_hours` | Avoid making predictions if a model is more than `expiration_hours` old. <br> **Datatype:** Positive integer. <br> Default: `0` (models never expire).
|
||||||
| `purge_old_models` | Delete all unused models during live runs (not relevant to backtesting). If set to false (not default), dry/live runs will accumulate all unused models to disk. If <br> **Datatype:** Boolean. <br> Default: `True`.
|
| `purge_old_models` | Number of models to keep on disk (not relevant to backtesting). Default is 2, which means that dry/live runs will keep the latest 2 models on disk. Setting to 0 keeps all models. This parameter also accepts a boolean to maintain backwards compatibility. <br> **Datatype:** Integer. <br> Default: `2`.
|
||||||
| `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
|
| `save_backtest_models` | Save models to disk when running backtesting. Backtesting operates most efficiently by saving the prediction data and reusing them directly for subsequent runs (when you wish to tune entry/exit parameters). Saving backtesting models to disk also allows to use the same model files for starting a dry/live instance with the same model `identifier`. <br> **Datatype:** Boolean. <br> Default: `False` (no models are saved).
|
||||||
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
|
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
|
||||||
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
|
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
|
||||||
|
@ -22,7 +22,7 @@ Features include:
|
|||||||
* **Automatic data download** - Compute timeranges for data downloads and update historic data (in live deployments)
|
* **Automatic data download** - Compute timeranges for data downloads and update historic data (in live deployments)
|
||||||
* **Cleaning of incoming data** - Handle NaNs safely before training and model inferencing
|
* **Cleaning of incoming data** - Handle NaNs safely before training and model inferencing
|
||||||
* **Dimensionality reduction** - Reduce the size of the training data via [Principal Component Analysis](freqai-feature-engineering.md#data-dimensionality-reduction-with-principal-component-analysis)
|
* **Dimensionality reduction** - Reduce the size of the training data via [Principal Component Analysis](freqai-feature-engineering.md#data-dimensionality-reduction-with-principal-component-analysis)
|
||||||
* **Deploying bot fleets** - Set one bot to train models while a fleet of [follower bots](freqai-running.md#setting-up-a-follower) inference the models and handle trades
|
* **Deploying bot fleets** - Set one bot to train models while a fleet of [consumers](producer-consumer.md) use signals.
|
||||||
|
|
||||||
## Quick start
|
## Quick start
|
||||||
|
|
||||||
|
@ -546,7 +546,7 @@ CONF_SCHEMA = {
|
|||||||
"enabled": {"type": "boolean", "default": False},
|
"enabled": {"type": "boolean", "default": False},
|
||||||
"keras": {"type": "boolean", "default": False},
|
"keras": {"type": "boolean", "default": False},
|
||||||
"write_metrics_to_disk": {"type": "boolean", "default": False},
|
"write_metrics_to_disk": {"type": "boolean", "default": False},
|
||||||
"purge_old_models": {"type": "boolean", "default": True},
|
"purge_old_models": {"type": ["boolean", "number"], "default": 2},
|
||||||
"conv_width": {"type": "integer", "default": 1},
|
"conv_width": {"type": "integer", "default": 1},
|
||||||
"train_period_days": {"type": "integer", "default": 0},
|
"train_period_days": {"type": "integer", "default": 0},
|
||||||
"backtest_period_days": {"type": "number", "default": 7},
|
"backtest_period_days": {"type": "number", "default": 7},
|
||||||
|
@ -72,12 +72,7 @@ class FreqaiDataDrawer:
|
|||||||
self.model_return_values: Dict[str, DataFrame] = {}
|
self.model_return_values: Dict[str, DataFrame] = {}
|
||||||
self.historic_data: Dict[str, Dict[str, DataFrame]] = {}
|
self.historic_data: Dict[str, Dict[str, DataFrame]] = {}
|
||||||
self.historic_predictions: Dict[str, DataFrame] = {}
|
self.historic_predictions: Dict[str, DataFrame] = {}
|
||||||
self.follower_dict: Dict[str, pair_info] = {}
|
|
||||||
self.full_path = full_path
|
self.full_path = full_path
|
||||||
self.follower_name: str = self.config.get("bot_name", "follower1")
|
|
||||||
self.follower_dict_path = Path(
|
|
||||||
self.full_path / f"follower_dictionary-{self.follower_name}.json"
|
|
||||||
)
|
|
||||||
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
|
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
|
||||||
self.historic_predictions_bkp_path = Path(
|
self.historic_predictions_bkp_path = Path(
|
||||||
self.full_path / "historic_predictions.backup.pkl")
|
self.full_path / "historic_predictions.backup.pkl")
|
||||||
@ -218,14 +213,6 @@ class FreqaiDataDrawer:
|
|||||||
rapidjson.dump(self.pair_dict, fp, default=self.np_encoder,
|
rapidjson.dump(self.pair_dict, fp, default=self.np_encoder,
|
||||||
number_mode=rapidjson.NM_NATIVE)
|
number_mode=rapidjson.NM_NATIVE)
|
||||||
|
|
||||||
def save_follower_dict_to_disk(self):
|
|
||||||
"""
|
|
||||||
Save follower dictionary to disk (used by strategy for persistent prediction targets)
|
|
||||||
"""
|
|
||||||
with open(self.follower_dict_path, "w") as fp:
|
|
||||||
rapidjson.dump(self.follower_dict, fp, default=self.np_encoder,
|
|
||||||
number_mode=rapidjson.NM_NATIVE)
|
|
||||||
|
|
||||||
def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
|
def save_global_metadata_to_disk(self, metadata: Dict[str, Any]):
|
||||||
"""
|
"""
|
||||||
Save global metadata json to disk
|
Save global metadata json to disk
|
||||||
@ -239,7 +226,7 @@ class FreqaiDataDrawer:
|
|||||||
if isinstance(object, np.generic):
|
if isinstance(object, np.generic):
|
||||||
return object.item()
|
return object.item()
|
||||||
|
|
||||||
def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool]:
|
def get_pair_dict_info(self, pair: str) -> Tuple[str, int]:
|
||||||
"""
|
"""
|
||||||
Locate and load existing model metadata from persistent storage. If not located,
|
Locate and load existing model metadata from persistent storage. If not located,
|
||||||
create a new one and append the current pair to it and prepare it for its first
|
create a new one and append the current pair to it and prepare it for its first
|
||||||
@ -248,12 +235,9 @@ class FreqaiDataDrawer:
|
|||||||
:return:
|
:return:
|
||||||
model_filename: str = unique filename used for loading persistent objects from disk
|
model_filename: str = unique filename used for loading persistent objects from disk
|
||||||
trained_timestamp: int = the last time the coin was trained
|
trained_timestamp: int = the last time the coin was trained
|
||||||
return_null_array: bool = Follower could not find pair metadata
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
pair_dict = self.pair_dict.get(pair)
|
pair_dict = self.pair_dict.get(pair)
|
||||||
# data_path_set = self.pair_dict.get(pair, self.empty_pair_dict).get("data_path", "")
|
|
||||||
return_null_array = False
|
|
||||||
|
|
||||||
if pair_dict:
|
if pair_dict:
|
||||||
model_filename = pair_dict["model_filename"]
|
model_filename = pair_dict["model_filename"]
|
||||||
@ -263,7 +247,7 @@ class FreqaiDataDrawer:
|
|||||||
model_filename = ""
|
model_filename = ""
|
||||||
trained_timestamp = 0
|
trained_timestamp = 0
|
||||||
|
|
||||||
return model_filename, trained_timestamp, return_null_array
|
return model_filename, trained_timestamp
|
||||||
|
|
||||||
def set_pair_dict_info(self, metadata: dict) -> None:
|
def set_pair_dict_info(self, metadata: dict) -> None:
|
||||||
pair_in_dict = self.pair_dict.get(metadata["pair"])
|
pair_in_dict = self.pair_dict.get(metadata["pair"])
|
||||||
@ -382,6 +366,12 @@ class FreqaiDataDrawer:
|
|||||||
|
|
||||||
def purge_old_models(self) -> None:
|
def purge_old_models(self) -> None:
|
||||||
|
|
||||||
|
num_keep = self.freqai_info["purge_old_models"]
|
||||||
|
if not num_keep:
|
||||||
|
return
|
||||||
|
elif type(num_keep) == bool:
|
||||||
|
num_keep = 2
|
||||||
|
|
||||||
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
|
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
|
||||||
|
|
||||||
pattern = re.compile(r"sub-train-(\w+)_(\d{10})")
|
pattern = re.compile(r"sub-train-(\w+)_(\d{10})")
|
||||||
@ -404,11 +394,11 @@ class FreqaiDataDrawer:
|
|||||||
delete_dict[coin]["timestamps"][int(timestamp)] = dir
|
delete_dict[coin]["timestamps"][int(timestamp)] = dir
|
||||||
|
|
||||||
for coin in delete_dict:
|
for coin in delete_dict:
|
||||||
if delete_dict[coin]["num_folders"] > 2:
|
if delete_dict[coin]["num_folders"] > num_keep:
|
||||||
sorted_dict = collections.OrderedDict(
|
sorted_dict = collections.OrderedDict(
|
||||||
sorted(delete_dict[coin]["timestamps"].items())
|
sorted(delete_dict[coin]["timestamps"].items())
|
||||||
)
|
)
|
||||||
num_delete = len(sorted_dict) - 2
|
num_delete = len(sorted_dict) - num_keep
|
||||||
deleted = 0
|
deleted = 0
|
||||||
for k, v in sorted_dict.items():
|
for k, v in sorted_dict.items():
|
||||||
if deleted >= num_delete:
|
if deleted >= num_delete:
|
||||||
@ -417,12 +407,6 @@ class FreqaiDataDrawer:
|
|||||||
shutil.rmtree(v)
|
shutil.rmtree(v)
|
||||||
deleted += 1
|
deleted += 1
|
||||||
|
|
||||||
def update_follower_metadata(self):
|
|
||||||
# follower needs to load from disk to get any changes made by leader to pair_dict
|
|
||||||
self.load_drawer_from_disk()
|
|
||||||
if self.config.get("freqai", {}).get("purge_old_models", False):
|
|
||||||
self.purge_old_models()
|
|
||||||
|
|
||||||
def save_metadata(self, dk: FreqaiDataKitchen) -> None:
|
def save_metadata(self, dk: FreqaiDataKitchen) -> None:
|
||||||
"""
|
"""
|
||||||
Saves only metadata for backtesting studies if user prefers
|
Saves only metadata for backtesting studies if user prefers
|
||||||
|
@ -227,7 +227,7 @@ class IFreqaiModel(ABC):
|
|||||||
logger.warning(f'{pair} not in current whitelist, removing from train queue.')
|
logger.warning(f'{pair} not in current whitelist, removing from train queue.')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
(_, trained_timestamp) = self.dd.get_pair_dict_info(pair)
|
||||||
|
|
||||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||||
(
|
(
|
||||||
@ -285,7 +285,7 @@ class IFreqaiModel(ABC):
|
|||||||
# following tr_train. Both of these windows slide through the
|
# following tr_train. Both of these windows slide through the
|
||||||
# entire backtest
|
# entire backtest
|
||||||
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
|
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
|
||||||
(_, _, _) = self.dd.get_pair_dict_info(pair)
|
(_, _) = self.dd.get_pair_dict_info(pair)
|
||||||
train_it += 1
|
train_it += 1
|
||||||
total_trains = len(dk.backtesting_timeranges)
|
total_trains = len(dk.backtesting_timeranges)
|
||||||
self.training_timerange = tr_train
|
self.training_timerange = tr_train
|
||||||
@ -382,7 +382,7 @@ class IFreqaiModel(ABC):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
# get the model metadata associated with the current pair
|
# get the model metadata associated with the current pair
|
||||||
(_, trained_timestamp, return_null_array) = self.dd.get_pair_dict_info(metadata["pair"])
|
(_, trained_timestamp) = self.dd.get_pair_dict_info(metadata["pair"])
|
||||||
|
|
||||||
# append the historic data once per round
|
# append the historic data once per round
|
||||||
if self.dd.historic_data:
|
if self.dd.historic_data:
|
||||||
@ -629,7 +629,6 @@ class IFreqaiModel(ABC):
|
|||||||
if self.plot_features:
|
if self.plot_features:
|
||||||
plot_feature_importance(model, pair, dk, self.plot_features)
|
plot_feature_importance(model, pair, dk, self.plot_features)
|
||||||
|
|
||||||
if self.freqai_info.get("purge_old_models", False):
|
|
||||||
self.dd.purge_old_models()
|
self.dd.purge_old_models()
|
||||||
|
|
||||||
def set_initial_historic_predictions(
|
def set_initial_historic_predictions(
|
||||||
|
@ -27,7 +27,7 @@ class FreqaiExampleHybridStrategy(IStrategy):
|
|||||||
|
|
||||||
"freqai": {
|
"freqai": {
|
||||||
"enabled": true,
|
"enabled": true,
|
||||||
"purge_old_models": true,
|
"purge_old_models": 2,
|
||||||
"train_period_days": 15,
|
"train_period_days": 15,
|
||||||
"identifier": "uniqe-id",
|
"identifier": "uniqe-id",
|
||||||
"feature_parameters": {
|
"feature_parameters": {
|
||||||
|
@ -27,7 +27,7 @@ def freqai_conf(default_conf, tmpdir):
|
|||||||
"timerange": "20180110-20180115",
|
"timerange": "20180110-20180115",
|
||||||
"freqai": {
|
"freqai": {
|
||||||
"enabled": True,
|
"enabled": True,
|
||||||
"purge_old_models": True,
|
"purge_old_models": 2,
|
||||||
"train_period_days": 2,
|
"train_period_days": 2,
|
||||||
"backtest_period_days": 10,
|
"backtest_period_days": 10,
|
||||||
"live_retrain_hours": 0,
|
"live_retrain_hours": 0,
|
||||||
|
Loading…
Reference in New Issue
Block a user