stable/freqtrade/freqai/data_drawer.py

372 lines
15 KiB
Python

import collections
import json
import logging
import re
import shutil
import threading
from pathlib import Path
from typing import Any, Dict, Tuple
import numpy as np
import pandas as pd
from joblib.externals import cloudpickle
from pandas import DataFrame
logger = logging.getLogger(__name__)
class FreqaiDataDrawer:
"""
Class aimed at holding all pair models/info in memory for better inferencing/retrainig/saving
/loading to/from disk.
This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is
reinstantiated for each coin.
Record of contribution:
FreqAI was developed by a group of individuals who all contributed specific skillsets to the
project.
Conception and software development:
Robert Caulk @robcaulk
Theoretical brainstorming:
Elin Törnquist @thorntwig
Code review, software architecture brainstorming:
@xmatthias
Beta testing and bug reporting:
@bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm
Juha Nykänen @suikula, Wagner Costa @wagnercosta
"""
def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
self.config = config
self.freqai_info = config.get("freqai", {})
# dictionary holding all pair metadata necessary to load in from disk
self.pair_dict: Dict[str, Any] = {}
# dictionary holding all actively inferenced models in memory given a model filename
self.model_dictionary: Dict[str, Any] = {}
self.model_return_values: Dict[str, Any] = {}
self.pair_data_dict: Dict[str, Any] = {}
self.historic_data: Dict[str, Any] = {}
self.historic_predictions: Dict[str, Any] = {}
self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path
self.follower_name = self.config.get("bot_name", "follower1")
self.follower_dict_path = Path(
self.full_path / f"follower_dictionary-{self.follower_name}.json"
)
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.follow_mode = follow_mode
if follow_mode:
self.create_follower_dict()
self.load_drawer_from_disk()
self.load_historic_predictions_from_disk()
self.training_queue: Dict[str, int] = {}
self.history_lock = threading.Lock()
def load_drawer_from_disk(self):
"""
Locate and load a previously saved data drawer full of all pair model metadata in
present model folder.
:returns:
exists: bool = whether or not the drawer was located
"""
exists = self.pair_dictionary_path.is_file()
if exists:
with open(self.pair_dictionary_path, "r") as fp:
self.pair_dict = json.load(fp)
elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch")
else:
logger.warning(
f"Follower could not find pair_dictionary at {self.full_path} "
"sending null values back to strategy"
)
return exists
def load_historic_predictions_from_disk(self):
"""
Locate and load a previously saved historic predictions.
:returns:
exists: bool = whether or not the drawer was located
"""
exists = self.historic_predictions_path.is_file()
if exists:
with open(self.historic_predictions_path, "rb") as fp:
self.historic_predictions = cloudpickle.load(fp)
logger.info(
f"Found existing historic predictions at {self.full_path}, but beware "
"that statistics may be inaccurate if the bot has been offline for "
"an extended period of time."
)
elif not self.follow_mode:
logger.info("Could not find existing historic_predictions, starting from scratch")
else:
logger.warning(
f"Follower could not find historic predictions at {self.full_path} "
"sending null values back to strategy"
)
return exists
def save_historic_predictions_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with open(self.historic_predictions_path, "wb") as fp:
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with open(self.pair_dictionary_path, "w") as fp:
json.dump(self.pair_dict, fp, default=self.np_encoder)
def save_follower_dict_to_disk(self):
"""
Save follower dictionary to disk (used by strategy for persistent prediction targets)
"""
with open(self.follower_dict_path, "w") as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder)
def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets
"""
whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
exists = self.follower_dict_path.is_file()
if exists:
logger.info("Found an existing follower dictionary")
for pair in whitelist_pairs:
self.follower_dict[pair] = {}
with open(self.follow_path, "w") as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder)
def np_encoder(self, object):
if isinstance(object, np.generic):
return object.item()
def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool, bool]:
"""
Locate and load existing model metadata from persistent storage. If not located,
create a new one and append the current pair to it and prepare it for its first
training
:param pair: str: pair to lookup
:return:
model_filename: str = unique filename used for loading persistent objects from disk
trained_timestamp: int = the last time the coin was trained
coin_first: bool = If the coin is fresh without metadata
return_null_array: bool = Follower could not find pair metadata
"""
pair_in_dict = self.pair_dict.get(pair)
data_path_set = self.pair_dict.get(pair, {}).get("data_path", None)
return_null_array = False
if pair_in_dict:
model_filename = self.pair_dict[pair]["model_filename"]
trained_timestamp = self.pair_dict[pair]["trained_timestamp"]
coin_first = self.pair_dict[pair]["first"]
elif not self.follow_mode:
self.pair_dict[pair] = {}
model_filename = self.pair_dict[pair]["model_filename"] = ""
coin_first = self.pair_dict[pair]["first"] = True
trained_timestamp = self.pair_dict[pair]["trained_timestamp"] = 0
self.pair_dict[pair]["priority"] = len(self.pair_dict)
if not data_path_set and self.follow_mode:
logger.warning(
f"Follower could not find current pair {pair} in "
f"pair_dictionary at path {self.full_path}, sending null values "
"back to strategy."
)
return_null_array = True
return model_filename, trained_timestamp, coin_first, return_null_array
def set_pair_dict_info(self, metadata: dict) -> None:
pair_in_dict = self.pair_dict.get(metadata["pair"])
if pair_in_dict:
return
else:
self.pair_dict[metadata["pair"]] = {}
self.pair_dict[metadata["pair"]]["model_filename"] = ""
self.pair_dict[metadata["pair"]]["first"] = True
self.pair_dict[metadata["pair"]]["trained_timestamp"] = 0
self.pair_dict[metadata["pair"]]["priority"] = len(self.pair_dict)
return
def pair_to_end_of_training_queue(self, pair: str) -> None:
# march all pairs up in the queue
for p in self.pair_dict:
self.pair_dict[p]["priority"] -= 1
# send pair to end of queue
self.pair_dict[pair]["priority"] = len(self.pair_dict)
def set_initial_return_values(self, pair: str, dk, pred_df, do_preds) -> None:
"""
Set the initial return values to a persistent dataframe. This avoids needing to repredict on
historical candles, and also stores historical predictions despite retrainings (so stored
predictions are true predictions, not just inferencing on trained data)
"""
# dynamic df returned to strategy and plotted in frequi
mrv_df = self.model_return_values[pair] = pd.DataFrame()
for label in dk.label_list:
mrv_df[label] = pred_df[label]
mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label]
mrv_df[f"{label}_std"] = dk.data["labels_std"][label]
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
mrv_df["DI_values"] = dk.DI_values
mrv_df["do_predict"] = do_preds
def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:
# strat seems to feed us variable sized dataframes - and since we are trying to build our
# own return array in the same shape, we need to figure out how the size has changed
# and adapt our stored/returned info accordingly.
length_difference = len(self.model_return_values[pair]) - len_df
i = 0
if length_difference == 0:
i = 1
elif length_difference > 0:
i = length_difference + 1
df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i)
if pair in self.historic_predictions:
hp_df = self.historic_predictions[pair]
# here are some pandas hula hoops to accommodate the possibility of a series
# or dataframe depending number of labels requested by user
nan_df = pd.DataFrame(np.nan, index=hp_df.index[-2:] + 2, columns=hp_df.columns)
hp_df = pd.concat([hp_df, nan_df], ignore_index=True, axis=0)
self.historic_predictions[pair] = hp_df[:-1]
for label in dk.label_list:
df[label].iloc[-1] = predictions[label].iloc[-1]
df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label]
df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label]
# df['prediction'].iloc[-1] = predictions[-1]
df["do_predict"].iloc[-1] = do_preds[-1]
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
df["DI_values"].iloc[-1] = dk.DI_values[-1]
# append the new predictions to persistent storage
if pair in self.historic_predictions:
self.historic_predictions[pair].iloc[-1] = df[label].iloc[-1]
if length_difference < 0:
prepend_df = pd.DataFrame(
np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
)
df = pd.concat([prepend_df, df], axis=0)
def attach_return_values_to_return_dataframe(
self, pair: str, dataframe: DataFrame) -> DataFrame:
"""
Attach the return values to the strat dataframe
:param dataframe: DataFrame = strategy dataframe
:return: DataFrame = strat dataframe with return values attached
"""
df = self.model_return_values[pair]
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
dataframe = pd.concat([dataframe[to_keep], df], axis=1)
return dataframe
def return_null_values_to_strategy(self, dataframe: DataFrame, dk) -> None:
"""
Build 0 filled dataframe to return to strategy
"""
dk.find_features(dataframe)
for label in dk.label_list:
dataframe[label] = 0
dataframe[f"{label}_mean"] = 0
dataframe[f"{label}_std"] = 0
# dataframe['prediction'] = 0
dataframe["do_predict"] = 0
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
dataframe["DI_value"] = 0
dk.return_dataframe = dataframe
def purge_old_models(self) -> None:
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
pattern = re.compile(r"sub-train-(\w+)_(\d{10})")
delete_dict: Dict[str, Any] = {}
for dir in model_folders:
result = pattern.match(str(dir.name))
if result is None:
break
coin = result.group(1)
timestamp = result.group(2)
if coin not in delete_dict:
delete_dict[coin] = {}
delete_dict[coin]["num_folders"] = 1
delete_dict[coin]["timestamps"] = {int(timestamp): dir}
else:
delete_dict[coin]["num_folders"] += 1
delete_dict[coin]["timestamps"][int(timestamp)] = dir
for coin in delete_dict:
if delete_dict[coin]["num_folders"] > 2:
sorted_dict = collections.OrderedDict(
sorted(delete_dict[coin]["timestamps"].items())
)
num_delete = len(sorted_dict) - 2
deleted = 0
for k, v in sorted_dict.items():
if deleted >= num_delete:
break
logger.info(f"Freqai purging old model file {v}")
shutil.rmtree(v)
deleted += 1
def update_follower_metadata(self):
# follower needs to load from disk to get any changes made by leader to pair_dict
self.load_drawer_from_disk()
if self.config.get("freqai", {}).get("purge_old_models", False):
self.purge_old_models()
# to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference
# def save_model_return_values_to_disk(self) -> None:
# with open(self.full_path / str('model_return_values.json'), "w") as fp:
# json.dump(self.model_return_values, fp, default=self.np_encoder)
# def load_model_return_values_from_disk(self, dk: FreqaiDataKitchen) -> FreqaiDataKitchen:
# exists = Path(self.full_path / str('model_return_values.json')).resolve().exists()
# if exists:
# with open(self.full_path / str('model_return_values.json'), "r") as fp:
# self.model_return_values = json.load(fp)
# elif not self.follow_mode:
# logger.info("Could not find existing datadrawer, starting from scratch")
# else:
# logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
# 'sending null values back to strategy')
# return exists, dk