2022-05-31 09:58:21 +00:00
|
|
|
import collections
|
2022-05-23 19:05:05 +00:00
|
|
|
import json
|
|
|
|
import logging
|
2022-05-31 09:58:21 +00:00
|
|
|
import re
|
|
|
|
import shutil
|
2022-06-06 23:07:30 +00:00
|
|
|
import threading
|
2022-05-23 19:05:05 +00:00
|
|
|
from pathlib import Path
|
2022-08-09 13:30:25 +00:00
|
|
|
from typing import Any, Dict, Tuple, TypedDict
|
2022-05-23 19:05:05 +00:00
|
|
|
|
|
|
|
import numpy as np
|
2022-07-01 12:00:30 +00:00
|
|
|
import pandas as pd
|
2022-08-09 13:30:25 +00:00
|
|
|
import rapidjson
|
2022-07-26 08:51:39 +00:00
|
|
|
from joblib import dump, load
|
2022-07-22 15:37:51 +00:00
|
|
|
from joblib.externals import cloudpickle
|
2022-08-13 18:07:31 +00:00
|
|
|
from numpy.typing import NDArray
|
2022-05-30 19:35:48 +00:00
|
|
|
from pandas import DataFrame
|
2022-07-26 08:51:39 +00:00
|
|
|
|
|
|
|
from freqtrade.configuration import TimeRange
|
|
|
|
from freqtrade.data.history import load_pair_history
|
|
|
|
from freqtrade.exceptions import OperationalException
|
2022-07-26 08:24:14 +00:00
|
|
|
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
|
|
|
from freqtrade.strategy.interface import IStrategy
|
2022-07-26 08:51:39 +00:00
|
|
|
|
2022-05-30 19:35:48 +00:00
|
|
|
|
2022-05-23 19:05:05 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2022-08-09 13:30:25 +00:00
|
|
|
class pair_info(TypedDict):
|
|
|
|
model_filename: str
|
|
|
|
first: bool
|
|
|
|
trained_timestamp: int
|
|
|
|
priority: int
|
|
|
|
data_path: str
|
2022-08-09 14:03:10 +00:00
|
|
|
extras: dict
|
2022-08-09 13:30:25 +00:00
|
|
|
|
|
|
|
|
2022-05-23 19:05:05 +00:00
|
|
|
class FreqaiDataDrawer:
|
|
|
|
"""
|
|
|
|
Class aimed at holding all pair models/info in memory for better inferencing/retrainig/saving
|
|
|
|
/loading to/from disk.
|
2022-08-14 18:24:29 +00:00
|
|
|
This object remains persistent throughout live/dry.
|
2022-07-23 11:04:06 +00:00
|
|
|
|
|
|
|
Record of contribution:
|
|
|
|
FreqAI was developed by a group of individuals who all contributed specific skillsets to the
|
|
|
|
project.
|
|
|
|
|
|
|
|
Conception and software development:
|
|
|
|
Robert Caulk @robcaulk
|
|
|
|
|
|
|
|
Theoretical brainstorming:
|
2022-08-02 18:14:02 +00:00
|
|
|
Elin Törnquist @th0rntwig
|
2022-07-23 11:04:06 +00:00
|
|
|
|
|
|
|
Code review, software architecture brainstorming:
|
|
|
|
@xmatthias
|
|
|
|
|
|
|
|
Beta testing and bug reporting:
|
|
|
|
@bloodhunter4rc, Salah Lamkadem @ikonx, @ken11o2, @longyu, @paranoidandy, @smidelis, @smarm
|
2022-08-14 18:24:29 +00:00
|
|
|
Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
|
2022-05-23 19:05:05 +00:00
|
|
|
"""
|
2022-07-03 08:59:38 +00:00
|
|
|
|
2022-05-31 12:35:04 +00:00
|
|
|
def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
|
2022-05-23 19:05:05 +00:00
|
|
|
|
2022-05-31 12:35:04 +00:00
|
|
|
self.config = config
|
2022-07-03 08:59:38 +00:00
|
|
|
self.freqai_info = config.get("freqai", {})
|
2022-05-23 19:05:05 +00:00
|
|
|
# dictionary holding all pair metadata necessary to load in from disk
|
2022-08-09 13:30:25 +00:00
|
|
|
self.pair_dict: Dict[str, pair_info] = {}
|
2022-05-23 19:05:05 +00:00
|
|
|
# dictionary holding all actively inferenced models in memory given a model filename
|
|
|
|
self.model_dictionary: Dict[str, Any] = {}
|
2022-08-09 13:30:25 +00:00
|
|
|
self.model_return_values: Dict[str, DataFrame] = {}
|
|
|
|
self.historic_data: Dict[str, Dict[str, DataFrame]] = {}
|
|
|
|
self.historic_predictions: Dict[str, DataFrame] = {}
|
|
|
|
self.follower_dict: Dict[str, pair_info] = {}
|
2022-05-23 19:05:05 +00:00
|
|
|
self.full_path = full_path
|
2022-07-26 17:41:49 +00:00
|
|
|
self.follower_name: str = self.config.get("bot_name", "follower1")
|
2022-07-22 15:37:51 +00:00
|
|
|
self.follower_dict_path = Path(
|
|
|
|
self.full_path / f"follower_dictionary-{self.follower_name}.json"
|
|
|
|
)
|
|
|
|
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
|
|
|
|
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
|
2022-05-30 19:35:48 +00:00
|
|
|
self.follow_mode = follow_mode
|
2022-05-31 12:35:04 +00:00
|
|
|
if follow_mode:
|
|
|
|
self.create_follower_dict()
|
2022-05-23 19:05:05 +00:00
|
|
|
self.load_drawer_from_disk()
|
2022-07-11 20:01:48 +00:00
|
|
|
self.load_historic_predictions_from_disk()
|
2022-05-24 13:28:38 +00:00
|
|
|
self.training_queue: Dict[str, int] = {}
|
2022-06-06 23:07:30 +00:00
|
|
|
self.history_lock = threading.Lock()
|
2022-08-14 18:24:29 +00:00
|
|
|
self.save_lock = threading.Lock()
|
2022-08-14 14:41:50 +00:00
|
|
|
self.pair_dict_lock = threading.Lock()
|
2022-08-04 15:41:58 +00:00
|
|
|
self.old_DBSCAN_eps: Dict[str, float] = {}
|
2022-08-09 13:30:25 +00:00
|
|
|
self.empty_pair_dict: pair_info = {
|
|
|
|
"model_filename": "", "trained_timestamp": 0,
|
2022-08-09 14:03:10 +00:00
|
|
|
"priority": 1, "first": True, "data_path": "", "extras": {}}
|
2022-05-23 19:05:05 +00:00
|
|
|
|
|
|
|
def load_drawer_from_disk(self):
|
2022-06-03 13:19:46 +00:00
|
|
|
"""
|
|
|
|
Locate and load a previously saved data drawer full of all pair model metadata in
|
|
|
|
present model folder.
|
2022-07-27 05:27:24 +00:00
|
|
|
:return: bool - whether or not the drawer was located
|
2022-06-03 13:19:46 +00:00
|
|
|
"""
|
2022-07-23 11:35:44 +00:00
|
|
|
exists = self.pair_dictionary_path.is_file()
|
2022-05-23 19:05:05 +00:00
|
|
|
if exists:
|
2022-07-22 15:37:51 +00:00
|
|
|
with open(self.pair_dictionary_path, "r") as fp:
|
2022-05-23 19:05:05 +00:00
|
|
|
self.pair_dict = json.load(fp)
|
2022-05-30 19:35:48 +00:00
|
|
|
elif not self.follow_mode:
|
2022-05-23 19:05:05 +00:00
|
|
|
logger.info("Could not find existing datadrawer, starting from scratch")
|
2022-05-30 19:35:48 +00:00
|
|
|
else:
|
2022-07-03 08:59:38 +00:00
|
|
|
logger.warning(
|
|
|
|
f"Follower could not find pair_dictionary at {self.full_path} "
|
|
|
|
"sending null values back to strategy"
|
|
|
|
)
|
2022-05-30 19:35:48 +00:00
|
|
|
|
2022-05-23 19:05:05 +00:00
|
|
|
return exists
|
|
|
|
|
2022-07-11 20:01:48 +00:00
|
|
|
def load_historic_predictions_from_disk(self):
|
|
|
|
"""
|
|
|
|
Locate and load a previously saved historic predictions.
|
2022-07-27 05:27:24 +00:00
|
|
|
:return: bool - whether or not the drawer was located
|
2022-07-11 20:01:48 +00:00
|
|
|
"""
|
2022-07-23 11:35:44 +00:00
|
|
|
exists = self.historic_predictions_path.is_file()
|
2022-07-11 20:01:48 +00:00
|
|
|
if exists:
|
2022-07-22 15:37:51 +00:00
|
|
|
with open(self.historic_predictions_path, "rb") as fp:
|
|
|
|
self.historic_predictions = cloudpickle.load(fp)
|
|
|
|
logger.info(
|
|
|
|
f"Found existing historic predictions at {self.full_path}, but beware "
|
|
|
|
"that statistics may be inaccurate if the bot has been offline for "
|
|
|
|
"an extended period of time."
|
|
|
|
)
|
2022-07-11 20:01:48 +00:00
|
|
|
elif not self.follow_mode:
|
|
|
|
logger.info("Could not find existing historic_predictions, starting from scratch")
|
|
|
|
else:
|
|
|
|
logger.warning(
|
|
|
|
f"Follower could not find historic predictions at {self.full_path} "
|
|
|
|
"sending null values back to strategy"
|
|
|
|
)
|
|
|
|
|
|
|
|
return exists
|
|
|
|
|
2022-07-16 19:16:59 +00:00
|
|
|
def save_historic_predictions_to_disk(self):
|
2022-06-03 13:19:46 +00:00
|
|
|
"""
|
|
|
|
Save data drawer full of all pair model metadata in present model folder.
|
|
|
|
"""
|
2022-07-22 15:37:51 +00:00
|
|
|
with open(self.historic_predictions_path, "wb") as fp:
|
|
|
|
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
|
2022-05-23 19:05:05 +00:00
|
|
|
|
2022-07-16 19:16:59 +00:00
|
|
|
def save_drawer_to_disk(self):
|
2022-07-11 20:01:48 +00:00
|
|
|
"""
|
|
|
|
Save data drawer full of all pair model metadata in present model folder.
|
|
|
|
"""
|
2022-08-14 18:24:29 +00:00
|
|
|
with self.save_lock:
|
|
|
|
with open(self.pair_dictionary_path, 'w') as fp:
|
|
|
|
rapidjson.dump(self.pair_dict, fp, default=self.np_encoder,
|
|
|
|
number_mode=rapidjson.NM_NATIVE)
|
2022-07-11 20:01:48 +00:00
|
|
|
|
2022-06-03 13:19:46 +00:00
|
|
|
def save_follower_dict_to_disk(self):
|
|
|
|
"""
|
|
|
|
Save follower dictionary to disk (used by strategy for persistent prediction targets)
|
|
|
|
"""
|
2022-07-22 15:37:51 +00:00
|
|
|
with open(self.follower_dict_path, "w") as fp:
|
2022-08-09 13:30:25 +00:00
|
|
|
rapidjson.dump(self.follower_dict, fp, default=self.np_encoder,
|
|
|
|
number_mode=rapidjson.NM_NATIVE)
|
2022-05-31 12:35:04 +00:00
|
|
|
|
|
|
|
def create_follower_dict(self):
|
2022-06-03 13:19:46 +00:00
|
|
|
"""
|
|
|
|
Create or dictionary for each follower to maintain unique persistent prediction targets
|
|
|
|
"""
|
2022-07-22 15:37:51 +00:00
|
|
|
|
2022-07-03 08:59:38 +00:00
|
|
|
whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
|
2022-05-31 12:35:04 +00:00
|
|
|
|
2022-07-23 11:35:44 +00:00
|
|
|
exists = self.follower_dict_path.is_file()
|
2022-05-31 12:35:04 +00:00
|
|
|
|
|
|
|
if exists:
|
2022-07-03 08:59:38 +00:00
|
|
|
logger.info("Found an existing follower dictionary")
|
2022-05-31 12:35:04 +00:00
|
|
|
|
|
|
|
for pair in whitelist_pairs:
|
|
|
|
self.follower_dict[pair] = {}
|
|
|
|
|
2022-07-26 17:41:49 +00:00
|
|
|
self.save_follower_dict_to_disk()
|
2022-05-31 12:35:04 +00:00
|
|
|
|
2022-05-23 19:05:05 +00:00
|
|
|
def np_encoder(self, object):
|
|
|
|
if isinstance(object, np.generic):
|
|
|
|
return object.item()
|
|
|
|
|
2022-07-26 13:58:40 +00:00
|
|
|
def get_pair_dict_info(self, pair: str) -> Tuple[str, int, bool]:
|
2022-06-03 13:19:46 +00:00
|
|
|
"""
|
|
|
|
Locate and load existing model metadata from persistent storage. If not located,
|
|
|
|
create a new one and append the current pair to it and prepare it for its first
|
|
|
|
training
|
2022-07-24 14:51:48 +00:00
|
|
|
:param pair: str: pair to lookup
|
|
|
|
:return:
|
|
|
|
model_filename: str = unique filename used for loading persistent objects from disk
|
|
|
|
trained_timestamp: int = the last time the coin was trained
|
|
|
|
return_null_array: bool = Follower could not find pair metadata
|
2022-06-03 13:19:46 +00:00
|
|
|
"""
|
2022-08-09 13:30:25 +00:00
|
|
|
|
2022-07-26 13:58:40 +00:00
|
|
|
pair_dict = self.pair_dict.get(pair)
|
2022-08-09 13:30:25 +00:00
|
|
|
data_path_set = self.pair_dict.get(pair, self.empty_pair_dict).get("data_path", "")
|
2022-05-30 19:35:48 +00:00
|
|
|
return_null_array = False
|
2022-05-30 23:48:48 +00:00
|
|
|
|
2022-07-26 13:58:40 +00:00
|
|
|
if pair_dict:
|
|
|
|
model_filename = pair_dict["model_filename"]
|
|
|
|
trained_timestamp = pair_dict["trained_timestamp"]
|
2022-05-30 19:35:48 +00:00
|
|
|
elif not self.follow_mode:
|
2022-08-09 13:30:25 +00:00
|
|
|
self.pair_dict[pair] = self.empty_pair_dict.copy()
|
|
|
|
model_filename = ""
|
|
|
|
trained_timestamp = 0
|
|
|
|
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
2022-05-30 23:48:48 +00:00
|
|
|
|
|
|
|
if not data_path_set and self.follow_mode:
|
2022-07-03 08:59:38 +00:00
|
|
|
logger.warning(
|
|
|
|
f"Follower could not find current pair {pair} in "
|
|
|
|
f"pair_dictionary at path {self.full_path}, sending null values "
|
|
|
|
"back to strategy."
|
|
|
|
)
|
2022-07-26 13:58:40 +00:00
|
|
|
trained_timestamp = 0
|
|
|
|
model_filename = ''
|
2022-05-30 19:35:48 +00:00
|
|
|
return_null_array = True
|
2022-05-23 19:05:05 +00:00
|
|
|
|
2022-07-26 13:58:40 +00:00
|
|
|
return model_filename, trained_timestamp, return_null_array
|
2022-05-24 10:58:53 +00:00
|
|
|
|
|
|
|
def set_pair_dict_info(self, metadata: dict) -> None:
|
2022-07-03 08:59:38 +00:00
|
|
|
pair_in_dict = self.pair_dict.get(metadata["pair"])
|
2022-05-24 10:58:53 +00:00
|
|
|
if pair_in_dict:
|
|
|
|
return
|
|
|
|
else:
|
2022-08-09 13:30:25 +00:00
|
|
|
self.pair_dict[metadata["pair"]] = self.empty_pair_dict.copy()
|
2022-07-03 08:59:38 +00:00
|
|
|
self.pair_dict[metadata["pair"]]["priority"] = len(self.pair_dict)
|
2022-08-09 13:30:25 +00:00
|
|
|
|
2022-05-24 10:58:53 +00:00
|
|
|
return
|
2022-05-24 13:28:38 +00:00
|
|
|
|
|
|
|
def pair_to_end_of_training_queue(self, pair: str) -> None:
|
|
|
|
# march all pairs up in the queue
|
2022-08-14 14:41:50 +00:00
|
|
|
with self.pair_dict_lock:
|
|
|
|
for p in self.pair_dict:
|
|
|
|
self.pair_dict[p]["priority"] -= 1
|
|
|
|
# send pair to end of queue
|
|
|
|
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
2022-05-30 09:37:05 +00:00
|
|
|
|
2022-08-12 14:12:28 +00:00
|
|
|
def set_initial_return_values(self, pair: str, pred_df: DataFrame) -> None:
|
2022-07-02 16:09:38 +00:00
|
|
|
"""
|
2022-08-12 14:12:28 +00:00
|
|
|
Set the initial return values to the historical predictions dataframe. This avoids needing
|
|
|
|
to repredict on historical candles, and also stores historical predictions despite
|
|
|
|
retrainings (so stored predictions are true predictions, not just inferencing on trained
|
|
|
|
data)
|
2022-07-02 16:09:38 +00:00
|
|
|
"""
|
2022-07-09 08:13:33 +00:00
|
|
|
|
2022-08-05 16:27:05 +00:00
|
|
|
hist_df = self.historic_predictions
|
2022-08-12 11:13:08 +00:00
|
|
|
len_diff = len(hist_df[pair].index) - len(pred_df.index)
|
|
|
|
if len_diff < 0:
|
|
|
|
df_concat = pd.concat([pred_df.iloc[:abs(len_diff)], hist_df[pair]],
|
2022-08-12 14:12:28 +00:00
|
|
|
ignore_index=True, keys=hist_df[pair].keys())
|
2022-08-05 16:27:05 +00:00
|
|
|
else:
|
2022-08-12 11:13:08 +00:00
|
|
|
df_concat = hist_df[pair].tail(len(pred_df.index)).reset_index(drop=True)
|
|
|
|
df_concat = df_concat.fillna(0)
|
|
|
|
self.model_return_values[pair] = df_concat
|
2022-07-31 15:51:19 +00:00
|
|
|
|
2022-07-29 06:12:50 +00:00
|
|
|
def append_model_predictions(self, pair: str, predictions: DataFrame,
|
|
|
|
do_preds: NDArray[np.int_],
|
2022-07-28 05:07:40 +00:00
|
|
|
dk: FreqaiDataKitchen, len_df: int) -> None:
|
2022-08-12 14:12:28 +00:00
|
|
|
"""
|
|
|
|
Append model predictions to historic predictions dataframe, then set the
|
|
|
|
strategy return dataframe to the tail of the historic predictions. The length of
|
|
|
|
the tail is equivalent to the length of the dataframe that entered FreqAI from
|
|
|
|
the strategy originally. Doing this allows FreqUI to always display the correct
|
|
|
|
historic predictions.
|
|
|
|
"""
|
|
|
|
|
2022-08-14 14:41:50 +00:00
|
|
|
index = self.historic_predictions[pair].index[-1:]
|
|
|
|
columns = self.historic_predictions[pair].columns
|
|
|
|
|
|
|
|
nan_df = pd.DataFrame(np.nan, index=index, columns=columns)
|
|
|
|
self.historic_predictions[pair] = pd.concat(
|
|
|
|
[self.historic_predictions[pair], nan_df], ignore_index=True, axis=0)
|
|
|
|
df = self.historic_predictions[pair]
|
2022-05-30 11:55:46 +00:00
|
|
|
|
2022-08-12 14:12:28 +00:00
|
|
|
# model outputs and associated statistics
|
2022-08-06 11:51:19 +00:00
|
|
|
for label in predictions.columns:
|
2022-07-02 16:09:38 +00:00
|
|
|
df[label].iloc[-1] = predictions[label].iloc[-1]
|
2022-07-09 08:13:33 +00:00
|
|
|
if df[label].dtype == object:
|
|
|
|
continue
|
2022-07-03 08:59:38 +00:00
|
|
|
df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label]
|
|
|
|
df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label]
|
2022-07-29 06:12:50 +00:00
|
|
|
|
2022-08-12 14:12:28 +00:00
|
|
|
# outlier indicators
|
2022-07-03 08:59:38 +00:00
|
|
|
df["do_predict"].iloc[-1] = do_preds[-1]
|
2022-07-29 06:12:50 +00:00
|
|
|
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
|
2022-07-03 08:59:38 +00:00
|
|
|
df["DI_values"].iloc[-1] = dk.DI_values[-1]
|
2022-05-30 19:35:48 +00:00
|
|
|
|
2022-08-12 14:12:28 +00:00
|
|
|
# extra values the user added within custom prediction model
|
2022-08-02 18:14:02 +00:00
|
|
|
if dk.data['extra_returns_per_train']:
|
|
|
|
rets = dk.data['extra_returns_per_train']
|
|
|
|
for return_str in rets:
|
|
|
|
df[return_str].iloc[-1] = rets[return_str]
|
|
|
|
|
2022-08-12 11:13:08 +00:00
|
|
|
self.model_return_values[pair] = df.tail(len_df).reset_index(drop=True)
|
2022-07-11 20:01:48 +00:00
|
|
|
|
2022-07-24 14:51:48 +00:00
|
|
|
def attach_return_values_to_return_dataframe(
|
|
|
|
self, pair: str, dataframe: DataFrame) -> DataFrame:
|
2022-07-02 16:09:38 +00:00
|
|
|
"""
|
|
|
|
Attach the return values to the strat dataframe
|
2022-07-24 14:51:48 +00:00
|
|
|
:param dataframe: DataFrame = strategy dataframe
|
|
|
|
:return: DataFrame = strat dataframe with return values attached
|
2022-07-02 16:09:38 +00:00
|
|
|
"""
|
|
|
|
df = self.model_return_values[pair]
|
2022-07-03 08:59:38 +00:00
|
|
|
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
|
2022-07-02 16:09:38 +00:00
|
|
|
dataframe = pd.concat([dataframe[to_keep], df], axis=1)
|
|
|
|
return dataframe
|
|
|
|
|
2022-07-28 05:07:40 +00:00
|
|
|
def return_null_values_to_strategy(self, dataframe: DataFrame, dk: FreqaiDataKitchen) -> None:
|
2022-07-02 16:09:38 +00:00
|
|
|
"""
|
|
|
|
Build 0 filled dataframe to return to strategy
|
|
|
|
"""
|
2022-05-30 19:35:48 +00:00
|
|
|
|
2022-07-02 16:09:38 +00:00
|
|
|
dk.find_features(dataframe)
|
|
|
|
|
2022-08-10 13:16:50 +00:00
|
|
|
full_labels = dk.label_list + dk.unique_class_list
|
2022-08-06 11:51:19 +00:00
|
|
|
|
|
|
|
for label in full_labels:
|
2022-07-02 16:09:38 +00:00
|
|
|
dataframe[label] = 0
|
|
|
|
dataframe[f"{label}_mean"] = 0
|
|
|
|
dataframe[f"{label}_std"] = 0
|
|
|
|
|
2022-07-03 08:59:38 +00:00
|
|
|
dataframe["do_predict"] = 0
|
2022-07-01 12:00:30 +00:00
|
|
|
|
2022-07-29 06:12:50 +00:00
|
|
|
if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
|
2022-08-02 18:14:02 +00:00
|
|
|
dataframe["DI_values"] = 0
|
|
|
|
|
|
|
|
if dk.data['extra_returns_per_train']:
|
|
|
|
rets = dk.data['extra_returns_per_train']
|
|
|
|
for return_str in rets:
|
|
|
|
dataframe[return_str] = 0
|
2022-05-30 19:35:48 +00:00
|
|
|
|
2022-07-02 16:09:38 +00:00
|
|
|
dk.return_dataframe = dataframe
|
|
|
|
|
2022-05-31 09:58:21 +00:00
|
|
|
def purge_old_models(self) -> None:
|
|
|
|
|
|
|
|
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
|
|
|
|
|
2022-07-21 09:25:28 +00:00
|
|
|
pattern = re.compile(r"sub-train-(\w+)_(\d{10})")
|
2022-05-31 09:58:21 +00:00
|
|
|
|
|
|
|
delete_dict: Dict[str, Any] = {}
|
|
|
|
|
|
|
|
for dir in model_folders:
|
|
|
|
result = pattern.match(str(dir.name))
|
|
|
|
if result is None:
|
|
|
|
break
|
|
|
|
coin = result.group(1)
|
|
|
|
timestamp = result.group(2)
|
|
|
|
|
|
|
|
if coin not in delete_dict:
|
|
|
|
delete_dict[coin] = {}
|
2022-07-03 08:59:38 +00:00
|
|
|
delete_dict[coin]["num_folders"] = 1
|
|
|
|
delete_dict[coin]["timestamps"] = {int(timestamp): dir}
|
2022-05-31 09:58:21 +00:00
|
|
|
else:
|
2022-07-03 08:59:38 +00:00
|
|
|
delete_dict[coin]["num_folders"] += 1
|
|
|
|
delete_dict[coin]["timestamps"][int(timestamp)] = dir
|
2022-05-31 09:58:21 +00:00
|
|
|
|
|
|
|
for coin in delete_dict:
|
2022-07-03 08:59:38 +00:00
|
|
|
if delete_dict[coin]["num_folders"] > 2:
|
2022-05-31 09:58:21 +00:00
|
|
|
sorted_dict = collections.OrderedDict(
|
2022-07-03 08:59:38 +00:00
|
|
|
sorted(delete_dict[coin]["timestamps"].items())
|
|
|
|
)
|
2022-05-31 09:58:21 +00:00
|
|
|
num_delete = len(sorted_dict) - 2
|
|
|
|
deleted = 0
|
|
|
|
for k, v in sorted_dict.items():
|
|
|
|
if deleted >= num_delete:
|
|
|
|
break
|
2022-07-03 08:59:38 +00:00
|
|
|
logger.info(f"Freqai purging old model file {v}")
|
2022-05-31 09:58:21 +00:00
|
|
|
shutil.rmtree(v)
|
|
|
|
deleted += 1
|
|
|
|
|
2022-06-03 13:19:46 +00:00
|
|
|
def update_follower_metadata(self):
|
|
|
|
# follower needs to load from disk to get any changes made by leader to pair_dict
|
|
|
|
self.load_drawer_from_disk()
|
2022-07-03 08:59:38 +00:00
|
|
|
if self.config.get("freqai", {}).get("purge_old_models", False):
|
2022-06-03 13:19:46 +00:00
|
|
|
self.purge_old_models()
|
|
|
|
|
2022-07-26 08:24:14 +00:00
|
|
|
# Functions pulled back from FreqaiDataKitchen because they relied on DataDrawer
|
|
|
|
|
|
|
|
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
|
|
|
"""
|
|
|
|
Saves all data associated with a model for a single sub-train time range
|
|
|
|
:params:
|
|
|
|
:model: User trained model which can be reused for inferencing to generate
|
|
|
|
predictions
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not dk.data_path.is_dir():
|
|
|
|
dk.data_path.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
save_path = Path(dk.data_path)
|
|
|
|
|
|
|
|
# Save the trained model
|
|
|
|
if not dk.keras:
|
|
|
|
dump(model, save_path / f"{dk.model_filename}_model.joblib")
|
|
|
|
else:
|
|
|
|
model.save(save_path / f"{dk.model_filename}_model.h5")
|
|
|
|
|
|
|
|
if dk.svm_model is not None:
|
2022-07-29 06:12:50 +00:00
|
|
|
dump(dk.svm_model, save_path / f"{dk.model_filename}_svm_model.joblib")
|
2022-07-26 08:24:14 +00:00
|
|
|
|
|
|
|
dk.data["data_path"] = str(dk.data_path)
|
|
|
|
dk.data["model_filename"] = str(dk.model_filename)
|
|
|
|
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
|
|
|
|
dk.data["label_list"] = dk.label_list
|
|
|
|
# store the metadata
|
2022-07-29 06:12:50 +00:00
|
|
|
with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp:
|
2022-08-09 13:30:25 +00:00
|
|
|
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
2022-07-26 08:24:14 +00:00
|
|
|
|
|
|
|
# save the train data to file so we can check preds for area of applicability later
|
|
|
|
dk.data_dictionary["train_features"].to_pickle(
|
2022-07-29 06:12:50 +00:00
|
|
|
save_path / f"{dk.model_filename}_trained_df.pkl"
|
2022-07-26 08:24:14 +00:00
|
|
|
)
|
|
|
|
|
2022-08-09 13:30:25 +00:00
|
|
|
dk.data_dictionary["train_dates"].to_pickle(
|
|
|
|
save_path / f"{dk.model_filename}_trained_dates_df.pkl"
|
|
|
|
)
|
|
|
|
|
2022-07-29 06:12:50 +00:00
|
|
|
if self.freqai_info["feature_parameters"].get("principal_component_analysis"):
|
2022-07-26 08:24:14 +00:00
|
|
|
cloudpickle.dump(
|
2022-07-29 06:12:50 +00:00
|
|
|
dk.pca, open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "wb")
|
2022-07-26 08:24:14 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
# if self.live:
|
2022-08-22 11:30:30 +00:00
|
|
|
self.model_dictionary[coin] = model
|
2022-07-26 08:24:14 +00:00
|
|
|
self.pair_dict[coin]["model_filename"] = dk.model_filename
|
|
|
|
self.pair_dict[coin]["data_path"] = str(dk.data_path)
|
|
|
|
self.save_drawer_to_disk()
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
|
|
|
|
"""
|
|
|
|
loads all data required to make a prediction on a sub-train time range
|
|
|
|
:returns:
|
|
|
|
:model: User trained model which can be inferenced for new predictions
|
|
|
|
"""
|
|
|
|
|
|
|
|
if not self.pair_dict[coin]["model_filename"]:
|
|
|
|
return None
|
|
|
|
|
|
|
|
if dk.live:
|
|
|
|
dk.model_filename = self.pair_dict[coin]["model_filename"]
|
|
|
|
dk.data_path = Path(self.pair_dict[coin]["data_path"])
|
|
|
|
if self.freqai_info.get("follow_mode", False):
|
2022-07-27 05:27:24 +00:00
|
|
|
# follower can be on a different system which is rsynced from the leader:
|
2022-07-26 08:24:14 +00:00
|
|
|
dk.data_path = Path(
|
|
|
|
self.config["user_data_dir"]
|
|
|
|
/ "models"
|
|
|
|
/ dk.data_path.parts[-2]
|
|
|
|
/ dk.data_path.parts[-1]
|
|
|
|
)
|
|
|
|
|
2022-07-29 06:12:50 +00:00
|
|
|
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
2022-07-26 08:24:14 +00:00
|
|
|
dk.data = json.load(fp)
|
|
|
|
dk.training_features_list = dk.data["training_features_list"]
|
|
|
|
dk.label_list = dk.data["label_list"]
|
|
|
|
|
|
|
|
dk.data_dictionary["train_features"] = pd.read_pickle(
|
2022-07-29 06:12:50 +00:00
|
|
|
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
|
2022-07-26 08:24:14 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
# try to access model in memory instead of loading object from disk to save time
|
2022-08-22 11:30:30 +00:00
|
|
|
if dk.live and coin in self.model_dictionary:
|
|
|
|
model = self.model_dictionary[coin]
|
2022-07-26 08:24:14 +00:00
|
|
|
elif not dk.keras:
|
2022-07-29 06:12:50 +00:00
|
|
|
model = load(dk.data_path / f"{dk.model_filename}_model.joblib")
|
2022-07-26 08:24:14 +00:00
|
|
|
else:
|
|
|
|
from tensorflow import keras
|
|
|
|
|
2022-07-29 06:12:50 +00:00
|
|
|
model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5")
|
2022-07-26 08:24:14 +00:00
|
|
|
|
2022-07-29 06:12:50 +00:00
|
|
|
if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
|
|
|
|
dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")
|
2022-07-26 08:24:14 +00:00
|
|
|
|
|
|
|
if not model:
|
|
|
|
raise OperationalException(
|
|
|
|
f"Unable to load model, ensure model exists at " f"{dk.data_path} "
|
|
|
|
)
|
|
|
|
|
|
|
|
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
|
|
|
|
dk.pca = cloudpickle.load(
|
2022-07-29 06:12:50 +00:00
|
|
|
open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "rb")
|
2022-07-26 08:24:14 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
return model
|
|
|
|
|
|
|
|
def update_historic_data(self, strategy: IStrategy, dk: FreqaiDataKitchen) -> None:
|
|
|
|
"""
|
|
|
|
Append new candles to our stores historic data (in memory) so that
|
|
|
|
we do not need to load candle history from disk and we dont need to
|
|
|
|
pinging exchange multiple times for the same candle.
|
|
|
|
:params:
|
|
|
|
dataframe: DataFrame = strategy provided dataframe
|
|
|
|
"""
|
2022-07-29 06:12:50 +00:00
|
|
|
feat_params = self.freqai_info["feature_parameters"]
|
2022-07-26 08:24:14 +00:00
|
|
|
with self.history_lock:
|
|
|
|
history_data = self.historic_data
|
|
|
|
|
|
|
|
for pair in dk.all_pairs:
|
|
|
|
for tf in feat_params.get("include_timeframes"):
|
|
|
|
|
|
|
|
# check if newest candle is already appended
|
|
|
|
df_dp = strategy.dp.get_pair_dataframe(pair, tf)
|
|
|
|
if len(df_dp.index) == 0:
|
|
|
|
continue
|
|
|
|
if str(history_data[pair][tf].iloc[-1]["date"]) == str(
|
|
|
|
df_dp.iloc[-1:]["date"].iloc[-1]
|
|
|
|
):
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
index = (
|
|
|
|
df_dp.loc[
|
|
|
|
df_dp["date"] == history_data[pair][tf].iloc[-1]["date"]
|
|
|
|
].index[0]
|
|
|
|
+ 1
|
|
|
|
)
|
|
|
|
except IndexError:
|
|
|
|
logger.warning(
|
|
|
|
f"Unable to update pair history for {pair}. "
|
|
|
|
"If this does not resolve itself after 1 additional candle, "
|
|
|
|
"please report the error to #freqai discord channel"
|
|
|
|
)
|
|
|
|
return
|
|
|
|
|
|
|
|
history_data[pair][tf] = pd.concat(
|
|
|
|
[
|
|
|
|
history_data[pair][tf],
|
2022-08-14 14:41:50 +00:00
|
|
|
df_dp.iloc[index:],
|
2022-07-26 08:24:14 +00:00
|
|
|
],
|
|
|
|
ignore_index=True,
|
|
|
|
axis=0,
|
|
|
|
)
|
|
|
|
|
|
|
|
def load_all_pair_histories(self, timerange: TimeRange, dk: FreqaiDataKitchen) -> None:
|
|
|
|
"""
|
|
|
|
Load pair histories for all whitelist and corr_pairlist pairs.
|
|
|
|
Only called once upon startup of bot.
|
|
|
|
:params:
|
|
|
|
timerange: TimeRange = full timerange required to populate all indicators
|
|
|
|
for training according to user defined train_period_days
|
|
|
|
"""
|
|
|
|
history_data = self.historic_data
|
|
|
|
|
|
|
|
for pair in dk.all_pairs:
|
|
|
|
if pair not in history_data:
|
|
|
|
history_data[pair] = {}
|
2022-07-29 06:12:50 +00:00
|
|
|
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
|
2022-07-26 08:24:14 +00:00
|
|
|
history_data[pair][tf] = load_pair_history(
|
|
|
|
datadir=self.config["datadir"],
|
|
|
|
timeframe=tf,
|
|
|
|
pair=pair,
|
|
|
|
timerange=timerange,
|
|
|
|
data_format=self.config.get("dataformat_ohlcv", "json"),
|
|
|
|
candle_type=self.config.get("trading_mode", "spot"),
|
|
|
|
)
|
|
|
|
|
|
|
|
def get_base_and_corr_dataframes(
|
|
|
|
self, timerange: TimeRange, pair: str, dk: FreqaiDataKitchen
|
|
|
|
) -> Tuple[Dict[Any, Any], Dict[Any, Any]]:
|
|
|
|
"""
|
|
|
|
Searches through our historic_data in memory and returns the dataframes relevant
|
|
|
|
to the present pair.
|
|
|
|
:params:
|
|
|
|
timerange: TimeRange = full timerange required to populate all indicators
|
|
|
|
for training according to user defined train_period_days
|
|
|
|
metadata: dict = strategy furnished pair metadata
|
|
|
|
"""
|
|
|
|
|
|
|
|
with self.history_lock:
|
|
|
|
corr_dataframes: Dict[Any, Any] = {}
|
|
|
|
base_dataframes: Dict[Any, Any] = {}
|
|
|
|
historic_data = self.historic_data
|
2022-07-29 06:12:50 +00:00
|
|
|
pairs = self.freqai_info["feature_parameters"].get(
|
2022-07-26 08:24:14 +00:00
|
|
|
"include_corr_pairlist", []
|
|
|
|
)
|
|
|
|
|
2022-07-29 06:12:50 +00:00
|
|
|
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
|
2022-07-26 08:24:14 +00:00
|
|
|
base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf])
|
|
|
|
if pairs:
|
|
|
|
for p in pairs:
|
|
|
|
if pair in p:
|
|
|
|
continue # dont repeat anything from whitelist
|
|
|
|
if p not in corr_dataframes:
|
|
|
|
corr_dataframes[p] = {}
|
|
|
|
corr_dataframes[p][tf] = dk.slice_dataframe(
|
|
|
|
timerange, historic_data[p][tf]
|
|
|
|
)
|
|
|
|
|
|
|
|
return corr_dataframes, base_dataframes
|
|
|
|
|
2022-05-30 19:35:48 +00:00
|
|
|
# to be used if we want to send predictions directly to the follower instead of forcing
|
|
|
|
# follower to load models and inference
|
|
|
|
# def save_model_return_values_to_disk(self) -> None:
|
|
|
|
# with open(self.full_path / str('model_return_values.json'), "w") as fp:
|
|
|
|
# json.dump(self.model_return_values, fp, default=self.np_encoder)
|
|
|
|
|
2022-07-02 16:09:38 +00:00
|
|
|
# def load_model_return_values_from_disk(self, dk: FreqaiDataKitchen) -> FreqaiDataKitchen:
|
2022-05-30 19:35:48 +00:00
|
|
|
# exists = Path(self.full_path / str('model_return_values.json')).resolve().exists()
|
|
|
|
# if exists:
|
|
|
|
# with open(self.full_path / str('model_return_values.json'), "r") as fp:
|
|
|
|
# self.model_return_values = json.load(fp)
|
|
|
|
# elif not self.follow_mode:
|
|
|
|
# logger.info("Could not find existing datadrawer, starting from scratch")
|
|
|
|
# else:
|
|
|
|
# logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
|
|
|
|
# 'sending null values back to strategy')
|
|
|
|
|
2022-07-02 16:09:38 +00:00
|
|
|
# return exists, dk
|