Add autopurge feature so that FreqAI cleans up after itself when it no longer needs old models on disk
This commit is contained in:
parent
45f4f0f603
commit
0306f5ca13
@ -423,6 +423,21 @@ by a leader with the same `identifier`. In this example, the user has a leader b
|
|||||||
`identifier: "example"` already running or launching simultaneously as the present follower.
|
`identifier: "example"` already running or launching simultaneously as the present follower.
|
||||||
The follower will load models created by the leader and inference them to obtain predictions.
|
The follower will load models created by the leader and inference them to obtain predictions.
|
||||||
|
|
||||||
|
### Purging old model data
|
||||||
|
|
||||||
|
FreqAI stores new model files each time it retrains. These files become obsolete as new models
|
||||||
|
are trained and FreqAI adapts to the new market conditions. Users planning to leave FreqAI running
|
||||||
|
for extended periods of time with high frequency retraining should set `purge_old_models` in their
|
||||||
|
config:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"freqai": {
|
||||||
|
"purge_old_models": true,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
which will automatically purge all models older than the two most recently trained ones.
|
||||||
|
|
||||||
<!-- ## Dynamic target expectation
|
<!-- ## Dynamic target expectation
|
||||||
|
|
||||||
The labels used for model training have a unique statistical distribution for each separate model training.
|
The labels used for model training have a unique statistical distribution for each separate model training.
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
|
|
||||||
|
import collections
|
||||||
import copy
|
import copy
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Tuple
|
from typing import Any, Dict, Tuple
|
||||||
|
|
||||||
@ -165,6 +168,42 @@ class FreqaiDataDrawer:
|
|||||||
dh.full_target_mean = np.zeros(len_df)
|
dh.full_target_mean = np.zeros(len_df)
|
||||||
dh.full_target_std = np.zeros(len_df)
|
dh.full_target_std = np.zeros(len_df)
|
||||||
|
|
||||||
|
def purge_old_models(self) -> None:
|
||||||
|
|
||||||
|
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
|
||||||
|
|
||||||
|
pattern = re.compile(r"sub-train-(\w+)(\d{10})")
|
||||||
|
|
||||||
|
delete_dict: Dict[str, Any] = {}
|
||||||
|
|
||||||
|
for dir in model_folders:
|
||||||
|
result = pattern.match(str(dir.name))
|
||||||
|
if result is None:
|
||||||
|
break
|
||||||
|
coin = result.group(1)
|
||||||
|
timestamp = result.group(2)
|
||||||
|
|
||||||
|
if coin not in delete_dict:
|
||||||
|
delete_dict[coin] = {}
|
||||||
|
delete_dict[coin]['num_folders'] = 1
|
||||||
|
delete_dict[coin]['timestamps'] = {int(timestamp): dir}
|
||||||
|
else:
|
||||||
|
delete_dict[coin]['num_folders'] += 1
|
||||||
|
delete_dict[coin]['timestamps'][int(timestamp)] = dir
|
||||||
|
|
||||||
|
for coin in delete_dict:
|
||||||
|
if delete_dict[coin]['num_folders'] > 2:
|
||||||
|
sorted_dict = collections.OrderedDict(
|
||||||
|
sorted(delete_dict[coin]['timestamps'].items()))
|
||||||
|
num_delete = len(sorted_dict) - 2
|
||||||
|
deleted = 0
|
||||||
|
for k, v in sorted_dict.items():
|
||||||
|
if deleted >= num_delete:
|
||||||
|
break
|
||||||
|
logger.info(f'Freqai purging old model file {v}')
|
||||||
|
shutil.rmtree(v)
|
||||||
|
deleted += 1
|
||||||
|
|
||||||
# to be used if we want to send predictions directly to the follower instead of forcing
|
# to be used if we want to send predictions directly to the follower instead of forcing
|
||||||
# follower to load models and inference
|
# follower to load models and inference
|
||||||
# def save_model_return_values_to_disk(self) -> None:
|
# def save_model_return_values_to_disk(self) -> None:
|
||||||
|
@ -596,7 +596,7 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
do_predict = np.where(
|
do_predict = np.where(
|
||||||
distance.min(axis=0) / self.data["avg_mean_dist"]
|
distance.min(axis=0) / self.data["avg_mean_dist"]
|
||||||
< self.config["freqai"]["feature_parameters"]["DI_threshold"],
|
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
|
||||||
1,
|
1,
|
||||||
0,
|
0,
|
||||||
)
|
)
|
||||||
|
@ -190,6 +190,10 @@ class IFreqaiModel(ABC):
|
|||||||
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if self.follow_mode:
|
||||||
|
# follower needs to load off disk to get any changes made by leader to pair_dict
|
||||||
|
self.data_drawer.load_drawer_from_disk()
|
||||||
|
|
||||||
(model_filename,
|
(model_filename,
|
||||||
trained_timestamp,
|
trained_timestamp,
|
||||||
coin_first,
|
coin_first,
|
||||||
@ -376,6 +380,11 @@ class IFreqaiModel(ABC):
|
|||||||
self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
|
self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
|
||||||
self.training_on_separate_thread = False
|
self.training_on_separate_thread = False
|
||||||
self.retrain = False
|
self.retrain = False
|
||||||
|
|
||||||
|
# each time we finish a training, we check the directory to purge old models.
|
||||||
|
if self.freqai_info.get('purge_old_models', False):
|
||||||
|
self.data_drawer.purge_old_models()
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
|
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,
|
||||||
|
Loading…
Reference in New Issue
Block a user