Add autopurge feature so that FreqAI cleans up after itself when it no longer needs old models on disk

2022-05-31 11:58:21 +02:00
parent 45f4f0f603
commit 0306f5ca13
4 changed files with 64 additions and 1 deletions
--- a/docs/freqai.md
+++ b/docs/freqai.md
@@ -423,6 +423,21 @@ by a leader with the same `identifier`. In this example, the user has a leader b
 `identifier: "example"` already running or launching simultaneously as the present follower. 
 The follower will load models created by the leader and inference them to obtain predictions.
 ### Purging old model data
 FreqAI stores new model files each time it retrains. These files become obsolete as new models 
 are trained and FreqAI adapts to the new market conditions. Users planning to leave FreqAI running 
 for extended periods of time with high frequency retraining should set `purge_old_models` in their 
 config:
 ```json
    "freqai": {
        "purge_old_models": true,
    }
 ```
 which will automatically purge all models older than the two most recently trained ones.
 <!-- ## Dynamic target expectation
 The labels used for model training have a unique statistical distribution for each separate model training. 
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -1,7 +1,10 @@
 import collections
 import copy
 import json
 import logging
 import re
 import shutil
 from pathlib import Path
 from typing import Any, Dict, Tuple
@@ -165,6 +168,42 @@ class FreqaiDataDrawer:
        dh.full_target_mean = np.zeros(len_df)
        dh.full_target_std = np.zeros(len_df)
    def purge_old_models(self) -> None:
        model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
        pattern = re.compile(r"sub-train-(\w+)(\d{10})")
        delete_dict: Dict[str, Any] = {}
        for dir in model_folders:
            result = pattern.match(str(dir.name))
            if result is None:
                break
            coin = result.group(1)
            timestamp = result.group(2)
            if coin not in delete_dict:
                delete_dict[coin] = {}
                delete_dict[coin]['num_folders'] = 1
                delete_dict[coin]['timestamps'] = {int(timestamp): dir}
            else:
                delete_dict[coin]['num_folders'] += 1
                delete_dict[coin]['timestamps'][int(timestamp)] = dir
        for coin in delete_dict:
            if delete_dict[coin]['num_folders'] > 2:
                sorted_dict = collections.OrderedDict(
                    sorted(delete_dict[coin]['timestamps'].items()))
                num_delete = len(sorted_dict) - 2
                deleted = 0
                for k, v in sorted_dict.items():
                    if deleted >= num_delete:
                        break
                    logger.info(f'Freqai purging old model file {v}')
                    shutil.rmtree(v)
                    deleted += 1
    # to be used if we want to send predictions directly to the follower instead of forcing
    # follower to load models and inference
    # def save_model_return_values_to_disk(self) -> None:
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -596,7 +596,7 @@ class FreqaiDataKitchen:
        do_predict = np.where(
            distance.min(axis=0) / self.data["avg_mean_dist"]
-            < self.config["freqai"]["feature_parameters"]["DI_threshold"],
+            < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
            1,
            0,
        )
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -190,6 +190,10 @@ class IFreqaiModel(ABC):
        dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
        """
        if self.follow_mode:
            # follower needs to load off disk to get any changes made by leader to pair_dict
            self.data_drawer.load_drawer_from_disk()
        (model_filename,
         trained_timestamp,
         coin_first,
@@ -376,6 +380,11 @@ class IFreqaiModel(ABC):
            self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
        self.training_on_separate_thread = False
        self.retrain = False
        # each time we finish a training, we check the directory to purge old models.
        if self.freqai_info.get('purge_old_models', False):
            self.data_drawer.purge_old_models()
        return
    def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,