use cloudpickle in place of pickle. define Paths once in data_drawer.

This commit is contained in:
robcaulk 2022-07-22 17:37:51 +02:00
parent accc629e32
commit 40f00196eb
2 changed files with 31 additions and 54 deletions

View File

@ -1,7 +1,6 @@
import collections import collections
import json import json
import logging import logging
import pickle
import re import re
import shutil import shutil
import threading import threading
@ -10,6 +9,7 @@ from typing import Any, Dict, Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from joblib.externals import cloudpickle
from pandas import DataFrame from pandas import DataFrame
@ -41,6 +41,12 @@ class FreqaiDataDrawer:
self.historic_predictions: Dict[str, Any] = {} self.historic_predictions: Dict[str, Any] = {}
self.follower_dict: Dict[str, Any] = {} self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path self.full_path = full_path
self.follower_name = self.config.get("bot_name", "follower1")
self.follower_dict_path = Path(
self.full_path / f"follower_dictionary-{self.follower_name}.json"
)
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.follow_mode = follow_mode self.follow_mode = follow_mode
if follow_mode: if follow_mode:
self.create_follower_dict() self.create_follower_dict()
@ -56,9 +62,9 @@ class FreqaiDataDrawer:
:returns: :returns:
exists: bool = whether or not the drawer was located exists: bool = whether or not the drawer was located
""" """
exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists() exists = self.pair_dictionary_path.is_file() # resolve().exists()
if exists: if exists:
with open(self.full_path / str("pair_dictionary.json"), "r") as fp: with open(self.pair_dictionary_path, "r") as fp:
self.pair_dict = json.load(fp) self.pair_dict = json.load(fp)
elif not self.follow_mode: elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch") logger.info("Could not find existing datadrawer, starting from scratch")
@ -76,13 +82,15 @@ class FreqaiDataDrawer:
:returns: :returns:
exists: bool = whether or not the drawer was located exists: bool = whether or not the drawer was located
""" """
exists = Path(self.full_path / str("historic_predictions.pkl")).resolve().exists() exists = self.historic_predictions_path.is_file() # resolve().exists()
if exists: if exists:
with open(self.full_path / str("historic_predictions.pkl"), "rb") as fp: with open(self.historic_predictions_path, "rb") as fp:
self.historic_predictions = pickle.load(fp) self.historic_predictions = cloudpickle.load(fp)
logger.info(f"Found existing historic predictions at {self.full_path}, but beware " logger.info(
f"Found existing historic predictions at {self.full_path}, but beware "
"that statistics may be inaccurate if the bot has been offline for " "that statistics may be inaccurate if the bot has been offline for "
"an extended period of time.") "an extended period of time."
)
elif not self.follow_mode: elif not self.follow_mode:
logger.info("Could not find existing historic_predictions, starting from scratch") logger.info("Could not find existing historic_predictions, starting from scratch")
else: else:
@ -97,37 +105,34 @@ class FreqaiDataDrawer:
""" """
Save data drawer full of all pair model metadata in present model folder. Save data drawer full of all pair model metadata in present model folder.
""" """
with open(self.full_path / str("historic_predictions.pkl"), "wb") as fp: with open(self.historic_predictions_path, "wb") as fp:
pickle.dump(self.historic_predictions, fp, protocol=pickle.HIGHEST_PROTOCOL) cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
def save_drawer_to_disk(self): def save_drawer_to_disk(self):
""" """
Save data drawer full of all pair model metadata in present model folder. Save data drawer full of all pair model metadata in present model folder.
""" """
with open(self.full_path / str("pair_dictionary.json"), "w") as fp: with open(self.pair_dictionary_path, "w") as fp:
json.dump(self.pair_dict, fp, default=self.np_encoder) json.dump(self.pair_dict, fp, default=self.np_encoder)
def save_follower_dict_to_disk(self): def save_follower_dict_to_disk(self):
""" """
Save follower dictionary to disk (used by strategy for persistent prediction targets) Save follower dictionary to disk (used by strategy for persistent prediction targets)
""" """
follower_name = self.config.get("bot_name", "follower1") with open(self.follower_dict_path, "w") as fp:
with open(
self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
) as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder) json.dump(self.follower_dict, fp, default=self.np_encoder)
def create_follower_dict(self): def create_follower_dict(self):
""" """
Create or dictionary for each follower to maintain unique persistent prediction targets Create or dictionary for each follower to maintain unique persistent prediction targets
""" """
follower_name = self.config.get("bot_name", "follower1")
whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist") whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
exists = ( exists = (
Path(self.full_path / str("follower_dictionary-" + follower_name + ".json")) self.follower_dict_path.is_file()
.resolve() # .resolve()
.exists() # .exists()
) )
if exists: if exists:
@ -136,9 +141,7 @@ class FreqaiDataDrawer:
for pair in whitelist_pairs: for pair in whitelist_pairs:
self.follower_dict[pair] = {} self.follower_dict[pair] = {}
with open( with open(self.follow_path, "w") as fp:
self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
) as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder) json.dump(self.follower_dict, fp, default=self.np_encoder)
def np_encoder(self, object): def np_encoder(self, object):

View File

@ -2,7 +2,6 @@ import copy
import datetime import datetime
import json import json
import logging import logging
import pickle as pk
import shutil import shutil
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
@ -11,6 +10,7 @@ import numpy as np
import numpy.typing as npt import numpy.typing as npt
import pandas as pd import pandas as pd
from joblib import dump, load # , Parallel, delayed # used for auto distribution assignment from joblib import dump, load # , Parallel, delayed # used for auto distribution assignment
from joblib.externals import cloudpickle
from pandas import DataFrame from pandas import DataFrame
from sklearn import linear_model from sklearn import linear_model
from sklearn.metrics.pairwise import pairwise_distances from sklearn.metrics.pairwise import pairwise_distances
@ -130,7 +130,7 @@ class FreqaiDataKitchen:
) )
if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"): if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
pk.dump( cloudpickle.dump(
self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb") self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
) )
@ -192,7 +192,7 @@ class FreqaiDataKitchen:
) )
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
self.pca = pk.load( self.pca = cloudpickle.load(
open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb") open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb")
) )
@ -433,7 +433,7 @@ class FreqaiDataKitchen:
tr_training_list_timerange = [] tr_training_list_timerange = []
tr_backtesting_list_timerange = [] tr_backtesting_list_timerange = []
first = True first = True
# within_config_timerange = True
while True: while True:
if not first: if not first:
timerange_train.startts = timerange_train.startts + bt_period timerange_train.startts = timerange_train.startts + bt_period
@ -475,7 +475,7 @@ class FreqaiDataKitchen:
:df: Dataframe containing all candles to run the entire backtest. Here :df: Dataframe containing all candles to run the entire backtest. Here
it is sliced down to just the present training period. it is sliced down to just the present training period.
""" """
# timerange = TimeRange.parse_timerange(tr)
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
df = df.loc[df["date"] >= start, :] df = df.loc[df["date"] >= start, :]
@ -1132,32 +1132,6 @@ class FreqaiDataKitchen:
# Functions containing useful data manpulation examples. but not actively in use. # Functions containing useful data manpulation examples. but not actively in use.
# def build_feature_list(self, config: dict, metadata: dict) -> list:
# """
# SUPERCEDED BY self.find_features()
# Build the list of features that will be used to filter
# the full dataframe. Feature list is construced from the
# user configuration file.
# :params:
# :config: Canonical freqtrade config file containing all
# user defined input in config['freqai] dictionary.
# """
# features = []
# for tf in config["freqai"]["timeframes"]:
# for ft in config["freqai"]["base_features"]:
# for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
# shift = ""
# if n > 0:
# shift = "_shift-" + str(n)
# features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
# for p in config["freqai"]["corr_pairlist"]:
# if metadata['pair'] in p:
# continue # avoid duplicate features
# features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
# # logger.info("number of features %s", len(features))
# return features
# Possibly phasing these outlier removal methods below out in favor of # Possibly phasing these outlier removal methods below out in favor of
# use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance). # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
# But these have good data manipulation examples, so keep them commented here for now. # But these have good data manipulation examples, so keep them commented here for now.