Add autopurge feature so that FreqAI cleans up after itself when it no longer needs old models on disk

This commit is contained in:
robcaulk
2022-05-31 11:58:21 +02:00
parent 45f4f0f603
commit 0306f5ca13
4 changed files with 64 additions and 1 deletions

View File

@@ -1,7 +1,10 @@
import collections
import copy
import json
import logging
import re
import shutil
from pathlib import Path
from typing import Any, Dict, Tuple
@@ -165,6 +168,42 @@ class FreqaiDataDrawer:
dh.full_target_mean = np.zeros(len_df)
dh.full_target_std = np.zeros(len_df)
def purge_old_models(self) -> None:
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
pattern = re.compile(r"sub-train-(\w+)(\d{10})")
delete_dict: Dict[str, Any] = {}
for dir in model_folders:
result = pattern.match(str(dir.name))
if result is None:
break
coin = result.group(1)
timestamp = result.group(2)
if coin not in delete_dict:
delete_dict[coin] = {}
delete_dict[coin]['num_folders'] = 1
delete_dict[coin]['timestamps'] = {int(timestamp): dir}
else:
delete_dict[coin]['num_folders'] += 1
delete_dict[coin]['timestamps'][int(timestamp)] = dir
for coin in delete_dict:
if delete_dict[coin]['num_folders'] > 2:
sorted_dict = collections.OrderedDict(
sorted(delete_dict[coin]['timestamps'].items()))
num_delete = len(sorted_dict) - 2
deleted = 0
for k, v in sorted_dict.items():
if deleted >= num_delete:
break
logger.info(f'Freqai purging old model file {v}')
shutil.rmtree(v)
deleted += 1
# to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference
# def save_model_return_values_to_disk(self) -> None:

View File

@@ -596,7 +596,7 @@ class FreqaiDataKitchen:
do_predict = np.where(
distance.min(axis=0) / self.data["avg_mean_dist"]
< self.config["freqai"]["feature_parameters"]["DI_threshold"],
< self.freqai_config.get("feature_parameters", {}).get("DI_threshold"),
1,
0,
)

View File

@@ -190,6 +190,10 @@ class IFreqaiModel(ABC):
dh: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
"""
if self.follow_mode:
# follower needs to load off disk to get any changes made by leader to pair_dict
self.data_drawer.load_drawer_from_disk()
(model_filename,
trained_timestamp,
coin_first,
@@ -376,6 +380,11 @@ class IFreqaiModel(ABC):
self.data_drawer.pair_to_end_of_training_queue(metadata['pair'])
self.training_on_separate_thread = False
self.retrain = False
# each time we finish a training, we check the directory to purge old models.
if self.freqai_info.get('purge_old_models', False):
self.data_drawer.purge_old_models()
return
def train_model_in_series(self, new_trained_timerange: TimeRange, metadata: dict,