use cloudpickle in place of pickle. define Paths once in data_drawer.

2022-07-22 17:37:51 +02:00
parent accc629e32
commit 40f00196eb
2 changed files with 31 additions and 54 deletions
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -2,7 +2,6 @@ import copy
 import datetime
 import json
 import logging
-import pickle as pk
 import shutil
 from pathlib import Path
 from typing import Any, Dict, List, Tuple
@@ -11,6 +10,7 @@ import numpy as np
 import numpy.typing as npt
 import pandas as pd
 from joblib import dump, load  # , Parallel, delayed # used for auto distribution assignment
+from joblib.externals import cloudpickle
 from pandas import DataFrame
 from sklearn import linear_model
 from sklearn.metrics.pairwise import pairwise_distances
@@ -130,7 +130,7 @@ class FreqaiDataKitchen:
        )

        if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
-            pk.dump(
+            cloudpickle.dump(
                self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
            )

@@ -192,7 +192,7 @@ class FreqaiDataKitchen:
            )

        if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
-            self.pca = pk.load(
+            self.pca = cloudpickle.load(
                open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb")
            )

@@ -433,7 +433,7 @@ class FreqaiDataKitchen:
        tr_training_list_timerange = []
        tr_backtesting_list_timerange = []
        first = True
-        # within_config_timerange = True
+
        while True:
            if not first:
                timerange_train.startts = timerange_train.startts + bt_period
@@ -475,7 +475,7 @@ class FreqaiDataKitchen:
        :df: Dataframe containing all candles to run the entire backtest. Here
        it is sliced down to just the present training period.
        """
-        # timerange = TimeRange.parse_timerange(tr)
+
        start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
        stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
        df = df.loc[df["date"] >= start, :]
@@ -1132,32 +1132,6 @@ class FreqaiDataKitchen:

    # Functions containing useful data manpulation examples. but not actively in use.

-    # def build_feature_list(self, config: dict, metadata: dict) -> list:
-    #     """
-    #     SUPERCEDED BY self.find_features()
-    #     Build the list of features that will be used to filter
-    #     the full dataframe. Feature list is construced from the
-    #     user configuration file.
-    #     :params:
-    #     :config: Canonical freqtrade config file containing all
-    #     user defined input in config['freqai] dictionary.
-    #     """
-    #     features = []
-    #     for tf in config["freqai"]["timeframes"]:
-    #         for ft in config["freqai"]["base_features"]:
-    #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
-    #                 shift = ""
-    #                 if n > 0:
-    #                     shift = "_shift-" + str(n)
-    #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
-    #                 for p in config["freqai"]["corr_pairlist"]:
-    #                     if metadata['pair'] in p:
-    #                         continue  # avoid duplicate features
-    #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
-
-    #     # logger.info("number of features %s", len(features))
-    #     return features
-
    # Possibly phasing these outlier removal methods below out in favor of
    # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
    # But these have good data manipulation examples, so keep them commented here for now.