Add autopurge feature so that FreqAI cleans up after itself when it no longer needs old models on disk

This commit is contained in:
robcaulk 2022-05-31 11:58:21 +02:00
parent 45f4f0f603
commit 0306f5ca13
4 changed files with 64 additions and 1 deletions

View File

@ -423,6 +423,21 @@ by a leader with the same `identifier`. In this example, the user has a leader b
`identifier: "example"` already running or launching simultaneously as the present follower.
The follower will load models created by the leader and inference them to obtain predictions.
### Purging old model data
FreqAI stores new model files each time it retrains. These files become obsolete as new models
are trained and FreqAI adapts to the new market conditions. Users planning to leave FreqAI running
for extended periods of time with high frequency retraining should set `purge_old_models` in their
config:
```json
"freqai": {
"purge_old_models": true,
}
```
which will automatically purge all models older than the two most recently trained ones.
<!-- ## Dynamic target expectation
The labels used for model training have a unique statistical distribution for each separate model training.

View File

@ -1,7 +1,10 @@
import collections
import copy
import json
import logging
import re
import shutil
from pathlib import Path
from typing import Any, Dict, Tuple
@ -165,6 +168,42 @@ class FreqaiDataDrawer:
dh.full_target_mean = np.zeros(len_df)
dh.full_target_std = np.zeros(len_df)
def purge_old_models(self) -> None:
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
pattern = re.compile(r"sub-train-(\w+)(\d{10})")
delete_dict: Dict[str, Any] = {}
for dir in model_folders:
result = pattern.match(str(dir.name))
if result is None:
break
coin = result.group(1)
timestamp = result.group(2)
if coin not in delete_dict:
delete_dict[coin] = {}
delete_dict[coin]['num_folders'] = 1
delete_dict[coin]['timestamps'] = {int(timestamp): dir}
else:
delete_dict[coin]['num_folders'] += 1
delete_dict[coin]['timestamps'][int(timestamp)] = dir
for coin in delete_dict:
if delete_dict[coin]['num_folders'] > 2:
sorted_dict = collections.OrderedDict(
sorted(delete_dict[coin]['timestamps'].items()))
num_delete = len(sorted_dict) - 2
deleted = 0
for k, v in sorted_dict.items():
if deleted >= num_delete:
break
logger.info(f'Freqai purging old model file {v}')
shutil.rmtree(v)
deleted += 1
# to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference
# def save_model_return_values_to_disk(self) -> None:

View File

@ -596,7 +596,7 @@ class FreqaiDataKitchen:
do_predict = np.where(
distance.min(axis=0) / self.data["avg_mean_dist"]
< self.config["freqai"]["feature_parameters"]["DI_threshold"],
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
1,
0,
)

View File

@ -190,6 +190,10 @@ class IFreqaiModel(ABC):
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
"""
if self.follow_mode:
# follower needs to load off disk to get any changes made by leader to pair_dict
self.data_drawer.load_drawer_from_disk()
(model_filename,
trained_timestamp,
coin_first,
@ -376,6 +380,11 @@ class IFreqaiModel(ABC):
self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
self.training_on_separate_thread = False
self.retrain = False
# each time we finish a training, we check the directory to purge old models.
if self.freqai_info.get('purge_old_models', False):
self.data_drawer.purge_old_models()
return
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,