add an optional metric tracker to collect train timings, inference timings, and cpu load data

This commit is contained in:
robcaulk
2022-10-09 20:22:42 +02:00
parent a10b2d003f
commit 76b33359a9
4 changed files with 70 additions and 9 deletions

View File

@@ -9,6 +9,7 @@ from typing import Any, Dict, Tuple, TypedDict
import numpy as np
import pandas as pd
import psutil
import rapidjson
from joblib import dump, load
from joblib.externals import cloudpickle
@@ -78,25 +79,53 @@ class FreqaiDataDrawer:
self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
self.follow_mode = follow_mode
if follow_mode:
self.create_follower_dict()
self.load_drawer_from_disk()
self.load_historic_predictions_from_disk()
self.load_metric_tracker_from_disk()
self.training_queue: Dict[str, int] = {}
self.history_lock = threading.Lock()
self.save_lock = threading.Lock()
self.pair_dict_lock = threading.Lock()
self.metric_tracker_lock = threading.Lock()
self.old_DBSCAN_eps: Dict[str, float] = {}
self.empty_pair_dict: pair_info = {
"model_filename": "", "trained_timestamp": 0,
"data_path": "", "extras": {}}
self.metric_tracker: Dict[str, Dict[str, list]] = {}
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
"""
General utility for adding and updating custom metrics. Typically used
for adding training performance, train timings, inferenc timings, cpu loads etc.
"""
with self.metric_tracker_lock:
if pair not in self.metric_tracker:
self.metric_tracker[pair] = {}
if metric not in self.metric_tracker[pair]:
self.metric_tracker[pair][metric] = []
self.metric_tracker[pair][metric].append(value)
def collect_metrics(self, time_spent: float, pair: str):
"""
Add metrics to the metric tracker dictionary
"""
load1, load5, load15 = psutil.getloadavg()
cpus = psutil.cpu_count()
self.update_metric_tracker('train_time', time_spent, pair)
self.update_metric_tracker('cpu_load1min', load1 / cpus, pair)
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
def load_drawer_from_disk(self):
"""
Locate and load a previously saved data drawer full of all pair model metadata in
present model folder.
:return: bool - whether or not the drawer was located
Load any existing metric tracker that may be present.
"""
exists = self.pair_dictionary_path.is_file()
if exists:
@@ -110,7 +139,18 @@ class FreqaiDataDrawer:
"sending null values back to strategy"
)
return exists
def load_metric_tracker_from_disk(self):
"""
Tries to load an existing metrics dictionary if the user
wants to collect metrics.
"""
if self.freqai_info.get('write_metrics_to_disk', False):
exists = self.metric_tracker_path.is_file()
if exists:
with open(self.metric_tracker_path, "r") as fp:
self.metric_tracker = json.load(fp)
else:
logger.info("Could not find existing metric tracker, starting from scratch")
def load_historic_predictions_from_disk(self):
"""
@@ -146,7 +186,7 @@ class FreqaiDataDrawer:
def save_historic_predictions_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
Save historic predictions pickle to disk
"""
with open(self.historic_predictions_path, "wb") as fp:
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
@@ -154,6 +194,15 @@ class FreqaiDataDrawer:
# create a backup
shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path)
def save_metric_tracker_to_disk(self):
"""
Save metric tracker of all pair metrics collected.
"""
with self.save_lock:
with open(self.metric_tracker_path, 'w') as fp:
rapidjson.dump(self.metric_tracker, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.

View File

@@ -144,7 +144,7 @@ class IFreqaiModel(ABC):
dataframe = dk.remove_features_from_df(dk.return_dataframe)
self.clean_up()
if self.live:
self.inference_timer('stop')
self.inference_timer('stop', metadata["pair"])
return dataframe
def clean_up(self):
@@ -214,12 +214,14 @@ class IFreqaiModel(ABC):
logger.warning(f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping.")
self.train_timer('stop')
self.train_timer('stop', pair)
# only rotate the queue after the first has been trained.
self.train_queue.rotate(-1)
self.dd.save_historic_predictions_to_disk()
if self.freqai_info.get('write_metrics_to_disk', False):
self.dd.save_metric_tracker_to_disk()
def start_backtesting(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
@@ -658,7 +660,7 @@ class IFreqaiModel(ABC):
return
def inference_timer(self, do='start'):
def inference_timer(self, do: str = 'start', pair: str = ''):
"""
Timer designed to track the cumulative time spent in FreqAI for one pass through
the whitelist. This will check if the time spent is more than 1/4 the time
@@ -669,7 +671,10 @@ class IFreqaiModel(ABC):
self.begin_time = time.time()
elif do == 'stop':
end = time.time()
self.inference_time += (end - self.begin_time)
time_spent = (end - self.begin_time)
if self.freqai_info.get('write_metrics_to_disk', False):
self.dd.update_metric_tracker('inference_time', time_spent, pair)
self.inference_time += time_spent
if self.pair_it == self.total_pairs:
logger.info(
f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
@@ -680,7 +685,7 @@ class IFreqaiModel(ABC):
self.inference_time = 0
return
def train_timer(self, do='start'):
def train_timer(self, do: str = 'start', pair: str = ''):
"""
Timer designed to track the cumulative time spent training the full pairlist in
FreqAI.
@@ -690,7 +695,11 @@ class IFreqaiModel(ABC):
self.begin_time_train = time.time()
elif do == 'stop':
end = time.time()
self.train_time += (end - self.begin_time_train)
time_spent = (end - self.begin_time_train)
if self.freqai_info.get('write_metrics_to_disk', False):
self.dd.collect_metrics(time_spent, pair)
self.train_time += time_spent
if self.pair_it_train == self.total_pairs:
logger.info(
f'Total time spent training pairlist {self.train_time:.2f} seconds')