From 40f00196ebe4abc91b9987bf4365ea43f48c0eee Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 22 Jul 2022 17:37:51 +0200 Subject: [PATCH] use cloudpickle in place of pickle. define Paths once in data_drawer. --- freqtrade/freqai/data_drawer.py | 49 +++++++++++++++++--------------- freqtrade/freqai/data_kitchen.py | 36 ++++------------------- 2 files changed, 31 insertions(+), 54 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index c89394c09..b0493e766 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -1,7 +1,6 @@ import collections import json import logging -import pickle import re import shutil import threading @@ -10,6 +9,7 @@ from typing import Any, Dict, Tuple import numpy as np import pandas as pd +from joblib.externals import cloudpickle from pandas import DataFrame @@ -41,6 +41,12 @@ class FreqaiDataDrawer: self.historic_predictions: Dict[str, Any] = {} self.follower_dict: Dict[str, Any] = {} self.full_path = full_path + self.follower_name = self.config.get("bot_name", "follower1") + self.follower_dict_path = Path( + self.full_path / f"follower_dictionary-{self.follower_name}.json" + ) + self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl") + self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") self.follow_mode = follow_mode if follow_mode: self.create_follower_dict() @@ -56,9 +62,9 @@ class FreqaiDataDrawer: :returns: exists: bool = whether or not the drawer was located """ - exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists() + exists = self.pair_dictionary_path.is_file() # resolve().exists() if exists: - with open(self.full_path / str("pair_dictionary.json"), "r") as fp: + with open(self.pair_dictionary_path, "r") as fp: self.pair_dict = json.load(fp) elif not self.follow_mode: logger.info("Could not find existing datadrawer, starting from scratch") @@ -76,13 +82,15 @@ class FreqaiDataDrawer: :returns: exists: bool = whether or not the drawer was located """ - exists = Path(self.full_path / str("historic_predictions.pkl")).resolve().exists() + exists = self.historic_predictions_path.is_file() # resolve().exists() if exists: - with open(self.full_path / str("historic_predictions.pkl"), "rb") as fp: - self.historic_predictions = pickle.load(fp) - logger.info(f"Found existing historic predictions at {self.full_path}, but beware " - "that statistics may be inaccurate if the bot has been offline for " - "an extended period of time.") + with open(self.historic_predictions_path, "rb") as fp: + self.historic_predictions = cloudpickle.load(fp) + logger.info( + f"Found existing historic predictions at {self.full_path}, but beware " + "that statistics may be inaccurate if the bot has been offline for " + "an extended period of time." + ) elif not self.follow_mode: logger.info("Could not find existing historic_predictions, starting from scratch") else: @@ -97,37 +105,34 @@ class FreqaiDataDrawer: """ Save data drawer full of all pair model metadata in present model folder. """ - with open(self.full_path / str("historic_predictions.pkl"), "wb") as fp: - pickle.dump(self.historic_predictions, fp, protocol=pickle.HIGHEST_PROTOCOL) + with open(self.historic_predictions_path, "wb") as fp: + cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL) def save_drawer_to_disk(self): """ Save data drawer full of all pair model metadata in present model folder. """ - with open(self.full_path / str("pair_dictionary.json"), "w") as fp: + with open(self.pair_dictionary_path, "w") as fp: json.dump(self.pair_dict, fp, default=self.np_encoder) def save_follower_dict_to_disk(self): """ Save follower dictionary to disk (used by strategy for persistent prediction targets) """ - follower_name = self.config.get("bot_name", "follower1") - with open( - self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w" - ) as fp: + with open(self.follower_dict_path, "w") as fp: json.dump(self.follower_dict, fp, default=self.np_encoder) def create_follower_dict(self): """ Create or dictionary for each follower to maintain unique persistent prediction targets """ - follower_name = self.config.get("bot_name", "follower1") + whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist") exists = ( - Path(self.full_path / str("follower_dictionary-" + follower_name + ".json")) - .resolve() - .exists() + self.follower_dict_path.is_file() + # .resolve() + # .exists() ) if exists: @@ -136,9 +141,7 @@ class FreqaiDataDrawer: for pair in whitelist_pairs: self.follower_dict[pair] = {} - with open( - self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w" - ) as fp: + with open(self.follow_path, "w") as fp: json.dump(self.follower_dict, fp, default=self.np_encoder) def np_encoder(self, object): diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index cfa0d3818..4ba6badf9 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -2,7 +2,6 @@ import copy import datetime import json import logging -import pickle as pk import shutil from pathlib import Path from typing import Any, Dict, List, Tuple @@ -11,6 +10,7 @@ import numpy as np import numpy.typing as npt import pandas as pd from joblib import dump, load # , Parallel, delayed # used for auto distribution assignment +from joblib.externals import cloudpickle from pandas import DataFrame from sklearn import linear_model from sklearn.metrics.pairwise import pairwise_distances @@ -130,7 +130,7 @@ class FreqaiDataKitchen: ) if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"): - pk.dump( + cloudpickle.dump( self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb") ) @@ -192,7 +192,7 @@ class FreqaiDataKitchen: ) if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: - self.pca = pk.load( + self.pca = cloudpickle.load( open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb") ) @@ -433,7 +433,7 @@ class FreqaiDataKitchen: tr_training_list_timerange = [] tr_backtesting_list_timerange = [] first = True - # within_config_timerange = True + while True: if not first: timerange_train.startts = timerange_train.startts + bt_period @@ -475,7 +475,7 @@ class FreqaiDataKitchen: :df: Dataframe containing all candles to run the entire backtest. Here it is sliced down to just the present training period. """ - # timerange = TimeRange.parse_timerange(tr) + start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) df = df.loc[df["date"] >= start, :] @@ -1132,32 +1132,6 @@ class FreqaiDataKitchen: # Functions containing useful data manpulation examples. but not actively in use. - # def build_feature_list(self, config: dict, metadata: dict) -> list: - # """ - # SUPERCEDED BY self.find_features() - # Build the list of features that will be used to filter - # the full dataframe. Feature list is construced from the - # user configuration file. - # :params: - # :config: Canonical freqtrade config file containing all - # user defined input in config['freqai] dictionary. - # """ - # features = [] - # for tf in config["freqai"]["timeframes"]: - # for ft in config["freqai"]["base_features"]: - # for n in range(config["freqai"]["feature_parameters"]["shift"] + 1): - # shift = "" - # if n > 0: - # shift = "_shift-" + str(n) - # features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf) - # for p in config["freqai"]["corr_pairlist"]: - # if metadata['pair'] in p: - # continue # avoid duplicate features - # features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf) - - # # logger.info("number of features %s", len(features)) - # return features - # Possibly phasing these outlier removal methods below out in favor of # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance). # But these have good data manipulation examples, so keep them commented here for now.