use cloudpickle in place of pickle. define Paths once in data_drawer.

2022-07-22 17:37:51 +02:00 · 2022-07-22 17:37:51 +02:00 · 40f00196eb
commit 40f00196eb
parent accc629e32
2 changed files with 31 additions and 54 deletions
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@ -1,7 +1,6 @@
 import collections
 import json
 import logging
-import pickle
 import re
 import shutil
 import threading
@ -10,6 +9,7 @@ from typing import Any, Dict, Tuple

 import numpy as np
 import pandas as pd
+from joblib.externals import cloudpickle
 from pandas import DataFrame


@ -41,6 +41,12 @@ class FreqaiDataDrawer:
        self.historic_predictions: Dict[str, Any] = {}
        self.follower_dict: Dict[str, Any] = {}
        self.full_path = full_path
+        self.follower_name = self.config.get("bot_name", "follower1")
+        self.follower_dict_path = Path(
+            self.full_path / f"follower_dictionary-{self.follower_name}.json"
+        )
+        self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
+        self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
        self.follow_mode = follow_mode
        if follow_mode:
            self.create_follower_dict()
@ -56,9 +62,9 @@ class FreqaiDataDrawer:
        :returns:
        exists: bool = whether or not the drawer was located
        """
-        exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists()
+        exists = self.pair_dictionary_path.is_file()  # resolve().exists()
        if exists:
-            with open(self.full_path / str("pair_dictionary.json"), "r") as fp:
+            with open(self.pair_dictionary_path, "r") as fp:
                self.pair_dict = json.load(fp)
        elif not self.follow_mode:
            logger.info("Could not find existing datadrawer, starting from scratch")
@ -76,13 +82,15 @@ class FreqaiDataDrawer:
        :returns:
        exists: bool = whether or not the drawer was located
        """
-        exists = Path(self.full_path / str("historic_predictions.pkl")).resolve().exists()
+        exists = self.historic_predictions_path.is_file()  # resolve().exists()
        if exists:
-            with open(self.full_path / str("historic_predictions.pkl"), "rb") as fp:
-                self.historic_predictions = pickle.load(fp)
-            logger.info(f"Found existing historic predictions at {self.full_path}, but beware "
-                        "that statistics may be inaccurate if the bot has been offline for "
-                        "an extended period of time.")
+            with open(self.historic_predictions_path, "rb") as fp:
+                self.historic_predictions = cloudpickle.load(fp)
+            logger.info(
+                f"Found existing historic predictions at {self.full_path}, but beware "
+                "that statistics may be inaccurate if the bot has been offline for "
+                "an extended period of time."
+            )
        elif not self.follow_mode:
            logger.info("Could not find existing historic_predictions, starting from scratch")
        else:
@ -97,37 +105,34 @@ class FreqaiDataDrawer:
        """
        Save data drawer full of all pair model metadata in present model folder.
        """
-        with open(self.full_path / str("historic_predictions.pkl"), "wb") as fp:
-            pickle.dump(self.historic_predictions, fp, protocol=pickle.HIGHEST_PROTOCOL)
+        with open(self.historic_predictions_path, "wb") as fp:
+            cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)

    def save_drawer_to_disk(self):
        """
        Save data drawer full of all pair model metadata in present model folder.
        """
-        with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
+        with open(self.pair_dictionary_path, "w") as fp:
            json.dump(self.pair_dict, fp, default=self.np_encoder)

    def save_follower_dict_to_disk(self):
        """
        Save follower dictionary to disk (used by strategy for persistent prediction targets)
        """
-        follower_name = self.config.get("bot_name", "follower1")
-        with open(
-            self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
-        ) as fp:
+        with open(self.follower_dict_path, "w") as fp:
            json.dump(self.follower_dict, fp, default=self.np_encoder)

    def create_follower_dict(self):
        """
        Create or dictionary for each follower to maintain unique persistent prediction targets
        """
-        follower_name = self.config.get("bot_name", "follower1")
+
        whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")

        exists = (
-            Path(self.full_path / str("follower_dictionary-" + follower_name + ".json"))
-            .resolve()
-            .exists()
+            self.follower_dict_path.is_file()
+            # .resolve()
+            # .exists()
        )

        if exists:
@ -136,9 +141,7 @@ class FreqaiDataDrawer:
        for pair in whitelist_pairs:
            self.follower_dict[pair] = {}

-        with open(
-            self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
-        ) as fp:
+        with open(self.follow_path, "w") as fp:
            json.dump(self.follower_dict, fp, default=self.np_encoder)

    def np_encoder(self, object):
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -2,7 +2,6 @@ import copy
 import datetime
 import json
 import logging
-import pickle as pk
 import shutil
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@ -11,6 +10,7 @@ import numpy as np
 import numpy.typing as npt
 import pandas as pd
 from joblib import dump, load  # , Parallel, delayed # used for auto distribution assignment
+from joblib.externals import cloudpickle
 from pandas import DataFrame
 from sklearn import linear_model
 from sklearn.metrics.pairwise import pairwise_distances
@ -130,7 +130,7 @@ class FreqaiDataKitchen:
        )

        if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
-            pk.dump(
+            cloudpickle.dump(
                self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
            )

@ -192,7 +192,7 @@ class FreqaiDataKitchen:
            )

        if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
-            self.pca = pk.load(
+            self.pca = cloudpickle.load(
                open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb")
            )

@ -433,7 +433,7 @@ class FreqaiDataKitchen:
        tr_training_list_timerange = []
        tr_backtesting_list_timerange = []
        first = True
-        # within_config_timerange = True
+
        while True:
            if not first:
                timerange_train.startts = timerange_train.startts + bt_period
@ -475,7 +475,7 @@ class FreqaiDataKitchen:
        :df: Dataframe containing all candles to run the entire backtest. Here
        it is sliced down to just the present training period.
        """
-        # timerange = TimeRange.parse_timerange(tr)
+
        start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
        stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
        df = df.loc[df["date"] >= start, :]
@ -1132,32 +1132,6 @@ class FreqaiDataKitchen:

    # Functions containing useful data manpulation examples. but not actively in use.

-    # def build_feature_list(self, config: dict, metadata: dict) -> list:
-    #     """
-    #     SUPERCEDED BY self.find_features()
-    #     Build the list of features that will be used to filter
-    #     the full dataframe. Feature list is construced from the
-    #     user configuration file.
-    #     :params:
-    #     :config: Canonical freqtrade config file containing all
-    #     user defined input in config['freqai] dictionary.
-    #     """
-    #     features = []
-    #     for tf in config["freqai"]["timeframes"]:
-    #         for ft in config["freqai"]["base_features"]:
-    #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
-    #                 shift = ""
-    #                 if n > 0:
-    #                     shift = "_shift-" + str(n)
-    #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
-    #                 for p in config["freqai"]["corr_pairlist"]:
-    #                     if metadata['pair'] in p:
-    #                         continue  # avoid duplicate features
-    #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
-
-    #     # logger.info("number of features %s", len(features))
-    #     return features
-
    # Possibly phasing these outlier removal methods below out in favor of
    # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
    # But these have good data manipulation examples, so keep them commented here for now.