use cloudpickle in place of pickle. define Paths once in data_drawer.

This commit is contained in:
robcaulk 2022-07-22 17:37:51 +02:00
parent accc629e32
commit 40f00196eb
2 changed files with 31 additions and 54 deletions

View File

@ -1,7 +1,6 @@
import collections
import json
import logging
import pickle
import re
import shutil
import threading
@ -10,6 +9,7 @@ from typing import Any, Dict, Tuple
import numpy as np
import pandas as pd
from joblib.externals import cloudpickle
from pandas import DataFrame
@ -41,6 +41,12 @@ class FreqaiDataDrawer:
self.historic_predictions: Dict[str, Any] = {}
self.follower_dict: Dict[str, Any] = {}
self.full_path = full_path
self.follower_name = self.config.get("bot_name", "follower1")
self.follower_dict_path = Path(
self.full_path / f"follower_dictionary-{self.follower_name}.json"
)
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.follow_mode = follow_mode
if follow_mode:
self.create_follower_dict()
@ -56,9 +62,9 @@ class FreqaiDataDrawer:
:returns:
exists: bool = whether or not the drawer was located
"""
exists = Path(self.full_path / str("pair_dictionary.json")).resolve().exists()
exists = self.pair_dictionary_path.is_file() # resolve().exists()
if exists:
with open(self.full_path / str("pair_dictionary.json"), "r") as fp:
with open(self.pair_dictionary_path, "r") as fp:
self.pair_dict = json.load(fp)
elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch")
@ -76,13 +82,15 @@ class FreqaiDataDrawer:
:returns:
exists: bool = whether or not the drawer was located
"""
exists = Path(self.full_path / str("historic_predictions.pkl")).resolve().exists()
exists = self.historic_predictions_path.is_file() # resolve().exists()
if exists:
with open(self.full_path / str("historic_predictions.pkl"), "rb") as fp:
self.historic_predictions = pickle.load(fp)
logger.info(f"Found existing historic predictions at {self.full_path}, but beware "
with open(self.historic_predictions_path, "rb") as fp:
self.historic_predictions = cloudpickle.load(fp)
logger.info(
f"Found existing historic predictions at {self.full_path}, but beware "
"that statistics may be inaccurate if the bot has been offline for "
"an extended period of time.")
"an extended period of time."
)
elif not self.follow_mode:
logger.info("Could not find existing historic_predictions, starting from scratch")
else:
@ -97,37 +105,34 @@ class FreqaiDataDrawer:
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with open(self.full_path / str("historic_predictions.pkl"), "wb") as fp:
pickle.dump(self.historic_predictions, fp, protocol=pickle.HIGHEST_PROTOCOL)
with open(self.historic_predictions_path, "wb") as fp:
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
"""
with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
with open(self.pair_dictionary_path, "w") as fp:
json.dump(self.pair_dict, fp, default=self.np_encoder)
def save_follower_dict_to_disk(self):
"""
Save follower dictionary to disk (used by strategy for persistent prediction targets)
"""
follower_name = self.config.get("bot_name", "follower1")
with open(
self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
) as fp:
with open(self.follower_dict_path, "w") as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder)
def create_follower_dict(self):
"""
Create or dictionary for each follower to maintain unique persistent prediction targets
"""
follower_name = self.config.get("bot_name", "follower1")
whitelist_pairs = self.config.get("exchange", {}).get("pair_whitelist")
exists = (
Path(self.full_path / str("follower_dictionary-" + follower_name + ".json"))
.resolve()
.exists()
self.follower_dict_path.is_file()
# .resolve()
# .exists()
)
if exists:
@ -136,9 +141,7 @@ class FreqaiDataDrawer:
for pair in whitelist_pairs:
self.follower_dict[pair] = {}
with open(
self.full_path / str("follower_dictionary-" + follower_name + ".json"), "w"
) as fp:
with open(self.follow_path, "w") as fp:
json.dump(self.follower_dict, fp, default=self.np_encoder)
def np_encoder(self, object):

View File

@ -2,7 +2,6 @@ import copy
import datetime
import json
import logging
import pickle as pk
import shutil
from pathlib import Path
from typing import Any, Dict, List, Tuple
@ -11,6 +10,7 @@ import numpy as np
import numpy.typing as npt
import pandas as pd
from joblib import dump, load # , Parallel, delayed # used for auto distribution assignment
from joblib.externals import cloudpickle
from pandas import DataFrame
from sklearn import linear_model
from sklearn.metrics.pairwise import pairwise_distances
@ -130,7 +130,7 @@ class FreqaiDataKitchen:
)
if self.freqai_config.get("feature_parameters", {}).get("principal_component_analysis"):
pk.dump(
cloudpickle.dump(
self.pca, open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "wb")
)
@ -192,7 +192,7 @@ class FreqaiDataKitchen:
)
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
self.pca = pk.load(
self.pca = cloudpickle.load(
open(self.data_path / str(self.model_filename + "_pca_object.pkl"), "rb")
)
@ -433,7 +433,7 @@ class FreqaiDataKitchen:
tr_training_list_timerange = []
tr_backtesting_list_timerange = []
first = True
# within_config_timerange = True
while True:
if not first:
timerange_train.startts = timerange_train.startts + bt_period
@ -475,7 +475,7 @@ class FreqaiDataKitchen:
:df: Dataframe containing all candles to run the entire backtest. Here
it is sliced down to just the present training period.
"""
# timerange = TimeRange.parse_timerange(tr)
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
df = df.loc[df["date"] >= start, :]
@ -1132,32 +1132,6 @@ class FreqaiDataKitchen:
# Functions containing useful data manpulation examples. but not actively in use.
# def build_feature_list(self, config: dict, metadata: dict) -> list:
# """
# SUPERCEDED BY self.find_features()
# Build the list of features that will be used to filter
# the full dataframe. Feature list is construced from the
# user configuration file.
# :params:
# :config: Canonical freqtrade config file containing all
# user defined input in config['freqai] dictionary.
# """
# features = []
# for tf in config["freqai"]["timeframes"]:
# for ft in config["freqai"]["base_features"]:
# for n in range(config["freqai"]["feature_parameters"]["shift"] + 1):
# shift = ""
# if n > 0:
# shift = "_shift-" + str(n)
# features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
# for p in config["freqai"]["corr_pairlist"]:
# if metadata['pair'] in p:
# continue # avoid duplicate features
# features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
# # logger.info("number of features %s", len(features))
# return features
# Possibly phasing these outlier removal methods below out in favor of
# use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
# But these have good data manipulation examples, so keep them commented here for now.