Add autopurge feature so that FreqAI cleans up after itself when it no longer needs old models on disk
This commit is contained in:
parent
45f4f0f603
commit
0306f5ca13
@ -423,6 +423,21 @@ by a leader with the same `identifier`. In this example, the user has a leader b
|
||||
`identifier: "example"` already running or launching simultaneously as the present follower.
|
||||
The follower will load models created by the leader and inference them to obtain predictions.
|
||||
|
||||
### Purging old model data
|
||||
|
||||
FreqAI stores new model files each time it retrains. These files become obsolete as new models
|
||||
are trained and FreqAI adapts to the new market conditions. Users planning to leave FreqAI running
|
||||
for extended periods of time with high frequency retraining should set `purge_old_models` in their
|
||||
config:
|
||||
|
||||
```json
|
||||
"freqai": {
|
||||
"purge_old_models": true,
|
||||
}
|
||||
```
|
||||
|
||||
which will automatically purge all models older than the two most recently trained ones.
|
||||
|
||||
<!-- ## Dynamic target expectation
|
||||
|
||||
The labels used for model training have a unique statistical distribution for each separate model training.
|
||||
|
@ -1,7 +1,10 @@
|
||||
|
||||
import collections
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
@ -165,6 +168,42 @@ class FreqaiDataDrawer:
|
||||
dh.full_target_mean = np.zeros(len_df)
|
||||
dh.full_target_std = np.zeros(len_df)
|
||||
|
||||
def purge_old_models(self) -> None:
|
||||
|
||||
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
|
||||
|
||||
pattern = re.compile(r"sub-train-(\w+)(\d{10})")
|
||||
|
||||
delete_dict: Dict[str, Any] = {}
|
||||
|
||||
for dir in model_folders:
|
||||
result = pattern.match(str(dir.name))
|
||||
if result is None:
|
||||
break
|
||||
coin = result.group(1)
|
||||
timestamp = result.group(2)
|
||||
|
||||
if coin not in delete_dict:
|
||||
delete_dict[coin] = {}
|
||||
delete_dict[coin]['num_folders'] = 1
|
||||
delete_dict[coin]['timestamps'] = {int(timestamp): dir}
|
||||
else:
|
||||
delete_dict[coin]['num_folders'] += 1
|
||||
delete_dict[coin]['timestamps'][int(timestamp)] = dir
|
||||
|
||||
for coin in delete_dict:
|
||||
if delete_dict[coin]['num_folders'] > 2:
|
||||
sorted_dict = collections.OrderedDict(
|
||||
sorted(delete_dict[coin]['timestamps'].items()))
|
||||
num_delete = len(sorted_dict) - 2
|
||||
deleted = 0
|
||||
for k, v in sorted_dict.items():
|
||||
if deleted >= num_delete:
|
||||
break
|
||||
logger.info(f'Freqai purging old model file {v}')
|
||||
shutil.rmtree(v)
|
||||
deleted += 1
|
||||
|
||||
# to be used if we want to send predictions directly to the follower instead of forcing
|
||||
# follower to load models and inference
|
||||
# def save_model_return_values_to_disk(self) -> None:
|
||||
|
@ -596,7 +596,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
do_predict = np.where(
|
||||
distance.min(axis=0) / self.data["avg_mean_dist"]
|
||||
< self.config["freqai"]["feature_parameters"]["DI_threshold"],
|
||||
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
|
||||
1,
|
||||
0,
|
||||
)
|
||||
|
@ -190,6 +190,10 @@ class IFreqaiModel(ABC):
|
||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||
"""
|
||||
|
||||
if self.follow_mode:
|
||||
# follower needs to load off disk to get any changes made by leader to pair_dict
|
||||
self.data_drawer.load_drawer_from_disk()
|
||||
|
||||
(model_filename,
|
||||
trained_timestamp,
|
||||
coin_first,
|
||||
@ -376,6 +380,11 @@ class IFreqaiModel(ABC):
|
||||
self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
|
||||
self.training_on_separate_thread = False
|
||||
self.retrain = False
|
||||
|
||||
# each time we finish a training, we check the directory to purge old models.
|
||||
if self.freqai_info.get('purge_old_models', False):
|
||||
self.data_drawer.purge_old_models()
|
||||
|
||||
return
|
||||
|
||||
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
|
||||
|
Loading…
Reference in New Issue
Block a user