Fix typing issue, avoid using .get() when unnecessary, convert to fstrings

This commit is contained in:
robcaulk 2022-07-29 08:12:50 +02:00
parent efbd83c56d
commit c84d54b35e
5 changed files with 57 additions and 57 deletions

View File

@ -11,7 +11,7 @@ import numpy as np
import pandas as pd import pandas as pd
from joblib import dump, load from joblib import dump, load
from joblib.externals import cloudpickle from joblib.externals import cloudpickle
from numpy.typing import ArrayLike from numpy.typing import ArrayLike, NDArray
from pandas import DataFrame from pandas import DataFrame
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
@ -233,12 +233,13 @@ class FreqaiDataDrawer:
mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label] mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label]
mrv_df[f"{label}_std"] = dk.data["labels_std"][label] mrv_df[f"{label}_std"] = dk.data["labels_std"][label]
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0: if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
mrv_df["DI_values"] = dk.DI_values mrv_df["DI_values"] = dk.DI_values
mrv_df["do_predict"] = do_preds mrv_df["do_predict"] = do_preds
def append_model_predictions(self, pair: str, predictions: DataFrame, do_preds: ArrayLike, def append_model_predictions(self, pair: str, predictions: DataFrame,
do_preds: NDArray[np.int_],
dk: FreqaiDataKitchen, len_df: int) -> None: dk: FreqaiDataKitchen, len_df: int) -> None:
# strat seems to feed us variable sized dataframes - and since we are trying to build our # strat seems to feed us variable sized dataframes - and since we are trying to build our
@ -266,10 +267,10 @@ class FreqaiDataDrawer:
df[label].iloc[-1] = predictions[label].iloc[-1] df[label].iloc[-1] = predictions[label].iloc[-1]
df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label] df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label]
df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label] df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label]
# df['prediction'].iloc[-1] = predictions[-1]
df["do_predict"].iloc[-1] = do_preds[-1] df["do_predict"].iloc[-1] = do_preds[-1]
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0: if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
df["DI_values"].iloc[-1] = dk.DI_values[-1] df["DI_values"].iloc[-1] = dk.DI_values[-1]
# append the new predictions to persistent storage # append the new predictions to persistent storage
@ -309,7 +310,7 @@ class FreqaiDataDrawer:
# dataframe['prediction'] = 0 # dataframe['prediction'] = 0
dataframe["do_predict"] = 0 dataframe["do_predict"] = 0
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0: if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
dataframe["DI_value"] = 0 dataframe["DI_value"] = 0
dk.return_dataframe = dataframe dk.return_dataframe = dataframe
@ -379,24 +380,24 @@ class FreqaiDataDrawer:
model.save(save_path / f"{dk.model_filename}_model.h5") model.save(save_path / f"{dk.model_filename}_model.h5")
if dk.svm_model is not None: if dk.svm_model is not None:
dump(dk.svm_model, save_path / str(dk.model_filename + "_svm_model.joblib")) dump(dk.svm_model, save_path / f"{dk.model_filename}_svm_model.joblib")
dk.data["data_path"] = str(dk.data_path) dk.data["data_path"] = str(dk.data_path)
dk.data["model_filename"] = str(dk.model_filename) dk.data["model_filename"] = str(dk.model_filename)
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns) dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
dk.data["label_list"] = dk.label_list dk.data["label_list"] = dk.label_list
# store the metadata # store the metadata
with open(save_path / str(dk.model_filename + "_metadata.json"), "w") as fp: with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp:
json.dump(dk.data, fp, default=dk.np_encoder) json.dump(dk.data, fp, default=dk.np_encoder)
# save the train data to file so we can check preds for area of applicability later # save the train data to file so we can check preds for area of applicability later
dk.data_dictionary["train_features"].to_pickle( dk.data_dictionary["train_features"].to_pickle(
save_path / str(dk.model_filename + "_trained_df.pkl") save_path / f"{dk.model_filename}_trained_df.pkl"
) )
if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"): if self.freqai_info["feature_parameters"].get("principal_component_analysis"):
cloudpickle.dump( cloudpickle.dump(
dk.pca, open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "wb") dk.pca, open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "wb")
) )
# if self.live: # if self.live:
@ -429,27 +430,27 @@ class FreqaiDataDrawer:
/ dk.data_path.parts[-1] / dk.data_path.parts[-1]
) )
with open(dk.data_path / str(dk.model_filename + "_metadata.json"), "r") as fp: with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
dk.data = json.load(fp) dk.data = json.load(fp)
dk.training_features_list = dk.data["training_features_list"] dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"] dk.label_list = dk.data["label_list"]
dk.data_dictionary["train_features"] = pd.read_pickle( dk.data_dictionary["train_features"] = pd.read_pickle(
dk.data_path / str(dk.model_filename + "_trained_df.pkl") dk.data_path / f"{dk.model_filename}_trained_df.pkl"
) )
# try to access model in memory instead of loading object from disk to save time # try to access model in memory instead of loading object from disk to save time
if dk.live and dk.model_filename in self.model_dictionary: if dk.live and dk.model_filename in self.model_dictionary:
model = self.model_dictionary[dk.model_filename] model = self.model_dictionary[dk.model_filename]
elif not dk.keras: elif not dk.keras:
model = load(dk.data_path / str(dk.model_filename + "_model.joblib")) model = load(dk.data_path / f"{dk.model_filename}_model.joblib")
else: else:
from tensorflow import keras from tensorflow import keras
model = keras.models.load_model(dk.data_path / str(dk.model_filename + "_model.h5")) model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5")
if Path(dk.data_path / str(dk.model_filename + "_svm_model.joblib")).resolve().exists(): if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file():
dk.svm_model = load(dk.data_path / str(dk.model_filename + "_svm_model.joblib")) dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib")
if not model: if not model:
raise OperationalException( raise OperationalException(
@ -458,7 +459,7 @@ class FreqaiDataDrawer:
if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]:
dk.pca = cloudpickle.load( dk.pca = cloudpickle.load(
open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "rb") open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "rb")
) )
return model return model
@ -471,7 +472,7 @@ class FreqaiDataDrawer:
:params: :params:
dataframe: DataFrame = strategy provided dataframe dataframe: DataFrame = strategy provided dataframe
""" """
feat_params = self.freqai_info.get("feature_parameters", {}) feat_params = self.freqai_info["feature_parameters"]
with self.history_lock: with self.history_lock:
history_data = self.historic_data history_data = self.historic_data
@ -524,7 +525,7 @@ class FreqaiDataDrawer:
for pair in dk.all_pairs: for pair in dk.all_pairs:
if pair not in history_data: if pair not in history_data:
history_data[pair] = {} history_data[pair] = {}
for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"): for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
history_data[pair][tf] = load_pair_history( history_data[pair][tf] = load_pair_history(
datadir=self.config["datadir"], datadir=self.config["datadir"],
timeframe=tf, timeframe=tf,
@ -550,11 +551,11 @@ class FreqaiDataDrawer:
corr_dataframes: Dict[Any, Any] = {} corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {}
historic_data = self.historic_data historic_data = self.historic_data
pairs = self.freqai_info.get("feature_parameters", {}).get( pairs = self.freqai_info["feature_parameters"].get(
"include_corr_pairlist", [] "include_corr_pairlist", []
) )
for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"): for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf]) base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf])
if pairs: if pairs:
for p in pairs: for p in pairs:

View File

@ -116,7 +116,7 @@ class FreqaiDataKitchen:
:filtered_dataframe: cleaned dataframe ready to be split. :filtered_dataframe: cleaned dataframe ready to be split.
:labels: cleaned labels ready to be split. :labels: cleaned labels ready to be split.
""" """
feat_dict = self.freqai_config.get("feature_parameters", {}) feat_dict = self.freqai_config["feature_parameters"]
weights: npt.ArrayLike weights: npt.ArrayLike
if feat_dict.get("weight_factor", 0) > 0: if feat_dict.get("weight_factor", 0) > 0:
@ -515,7 +515,9 @@ class FreqaiDataKitchen:
return return
if predict: if predict:
assert self.svm_model, "No svm model available for outlier removal" if not self.svm_model:
logger.warning("No svm model available for outlier removal")
return
y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"]) y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"])
do_predict = np.where(y_pred == -1, 0, y_pred) do_predict = np.where(y_pred == -1, 0, y_pred)
@ -528,7 +530,7 @@ class FreqaiDataKitchen:
else: else:
# use SGDOneClassSVM to increase speed? # use SGDOneClassSVM to increase speed?
nu = self.freqai_config.get("feature_parameters", {}).get("svm_nu", 0.2) nu = self.freqai_config["feature_parameters"].get("svm_nu", 0.2)
self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit( self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit(
self.data_dictionary["train_features"] self.data_dictionary["train_features"]
) )
@ -551,7 +553,7 @@ class FreqaiDataKitchen:
) )
# same for test data # same for test data
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0:
y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) y_pred = self.svm_model.predict(self.data_dictionary["test_features"])
dropped_points = np.where(y_pred == -1, 0, y_pred) dropped_points = np.where(y_pred == -1, 0, y_pred)
self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
@ -605,7 +607,7 @@ class FreqaiDataKitchen:
self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"] self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
do_predict = np.where( do_predict = np.where(
self.DI_values < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"), self.DI_values < self.freqai_config["feature_parameters"]["DI_threshold"],
1, 1,
0, 0,
) )
@ -640,7 +642,7 @@ class FreqaiDataKitchen:
self.append_df[f"{label}_std"] = self.data["labels_std"][label] self.append_df[f"{label}_std"] = self.data["labels_std"][label]
self.append_df["do_predict"] = do_predict self.append_df["do_predict"] = do_predict
if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0: if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
self.append_df["DI_values"] = self.DI_values self.append_df["DI_values"] = self.DI_values
if self.full_df.empty: if self.full_df.empty:
@ -701,7 +703,7 @@ class FreqaiDataKitchen:
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
self.full_path = Path( self.full_path = Path(
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier")) self.config["user_data_dir"] / "models" / f"{self.freqai_config['identifier']}"
) )
config_path = Path(self.config["config_files"][0]) config_path = Path(self.config["config_files"][0])
@ -741,10 +743,10 @@ class FreqaiDataKitchen:
data_load_timerange = TimeRange() data_load_timerange = TimeRange()
# find the max indicator length required # find the max indicator length required
max_timeframe_chars = self.freqai_config.get("feature_parameters", {}).get( max_timeframe_chars = self.freqai_config["feature_parameters"].get(
"include_timeframes" "include_timeframes"
)[-1] )[-1]
max_period = self.freqai_config.get("feature_parameters", {}).get( max_period = self.freqai_config["feature_parameters"].get(
"indicator_max_period_candles", 50 "indicator_max_period_candles", 50
) )
additional_seconds = 0 additional_seconds = 0
@ -832,7 +834,7 @@ class FreqaiDataKitchen:
refresh_backtest_ohlcv_data( refresh_backtest_ohlcv_data(
exchange, exchange,
pairs=self.all_pairs, pairs=self.all_pairs,
timeframes=self.freqai_config.get("feature_parameters", {}).get("include_timeframes"), timeframes=self.freqai_config["feature_parameters"].get("include_timeframes"),
datadir=self.config["datadir"], datadir=self.config["datadir"],
timerange=timerange, timerange=timerange,
new_pairs_days=new_pairs_days, new_pairs_days=new_pairs_days,
@ -845,7 +847,7 @@ class FreqaiDataKitchen:
def set_all_pairs(self) -> None: def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy( self.all_pairs = copy.deepcopy(
self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", []) self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
) )
for pair in self.config.get("exchange", "").get("pair_whitelist"): for pair in self.config.get("exchange", "").get("pair_whitelist"):
if pair not in self.all_pairs: if pair not in self.all_pairs:
@ -876,8 +878,8 @@ class FreqaiDataKitchen:
# for prediction dataframe creation, we let dataprovider handle everything in the strategy # for prediction dataframe creation, we let dataprovider handle everything in the strategy
# so we create empty dictionaries, which allows us to pass None to # so we create empty dictionaries, which allows us to pass None to
# `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe. # `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe.
tfs = self.freqai_config.get("feature_parameters", {}).get("include_timeframes") tfs = self.freqai_config["feature_parameters"].get("include_timeframes")
pairs = self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", []) pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
if not prediction_dataframe.empty: if not prediction_dataframe.empty:
dataframe = prediction_dataframe.copy() dataframe = prediction_dataframe.copy()
for tf in tfs: for tf in tfs:

View File

@ -12,7 +12,7 @@ from typing import Any, Dict, Tuple
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from numpy.typing import ArrayLike from numpy.typing import NDArray
from pandas import DataFrame from pandas import DataFrame
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
@ -204,14 +204,9 @@ class IFreqaiModel(ABC):
dk.data_path = Path( dk.data_path = Path(
dk.full_path dk.full_path
/ str( /
"sub-train" f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}"
+ "-"
+ metadata["pair"].split("/")[0]
+ "_"
+ str(int(trained_timestamp.stopts))
) )
)
if not self.model_exists( if not self.model_exists(
metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts) metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts)
): ):
@ -331,7 +326,8 @@ class IFreqaiModel(ABC):
return return
elif self.dk.check_if_model_expired(trained_timestamp): elif self.dk.check_if_model_expired(trained_timestamp):
pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list) pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list)
do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2) do_preds = np.ones(2, dtype=np.int_) * 2
dk.DI_values = np.zeros(2)
logger.warning( logger.warning(
f"Model expired for {pair}, returning null values to strategy. Strategy " f"Model expired for {pair}, returning null values to strategy. Strategy "
"construction should take care to consider this event with " "construction should take care to consider this event with "
@ -379,15 +375,15 @@ class IFreqaiModel(ABC):
example of how outlier data points are dropped from the dataframe used for training. example of how outlier data points are dropped from the dataframe used for training.
""" """
if self.freqai_info.get("feature_parameters", {}).get( if self.freqai_info["feature_parameters"].get(
"principal_component_analysis", False "principal_component_analysis", False
): ):
dk.principal_component_analysis() dk.principal_component_analysis()
if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False): if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
dk.use_SVM_to_remove_outliers(predict=False) dk.use_SVM_to_remove_outliers(predict=False)
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
dk.data["avg_mean_dist"] = dk.compute_distances() dk.data["avg_mean_dist"] = dk.compute_distances()
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None: def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
@ -401,15 +397,15 @@ class IFreqaiModel(ABC):
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
for buy signals. for buy signals.
""" """
if self.freqai_info.get("feature_parameters", {}).get( if self.freqai_info["feature_parameters"].get(
"principal_component_analysis", False "principal_component_analysis", False
): ):
dk.pca_transform(dataframe) dk.pca_transform(dataframe)
if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False): if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
dk.use_SVM_to_remove_outliers(predict=True) dk.use_SVM_to_remove_outliers(predict=True)
if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
dk.check_if_pred_in_training_spaces() dk.check_if_pred_in_training_spaces()
def model_exists( def model_exists(
@ -430,9 +426,9 @@ class IFreqaiModel(ABC):
coin, _ = pair.split("/") coin, _ = pair.split("/")
if not self.live: if not self.live:
dk.model_filename = model_filename = "cb_" + coin.lower() + "_" + str(trained_timestamp) dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}"
path_to_modelfile = Path(dk.data_path / str(model_filename + "_model.joblib")) path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
file_exists = path_to_modelfile.is_file() file_exists = path_to_modelfile.is_file()
if file_exists and not scanning: if file_exists and not scanning:
logger.info("Found model at %s", dk.data_path / dk.model_filename) logger.info("Found model at %s", dk.data_path / dk.model_filename)
@ -442,7 +438,7 @@ class IFreqaiModel(ABC):
def set_full_path(self) -> None: def set_full_path(self) -> None:
self.full_path = Path( self.full_path = Path(
self.config["user_data_dir"] / "models" / str(self.freqai_info.get("identifier")) self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}"
) )
self.full_path.mkdir(parents=True, exist_ok=True) self.full_path.mkdir(parents=True, exist_ok=True)
shutil.copy( shutil.copy(
@ -550,7 +546,7 @@ class IFreqaiModel(ABC):
@abstractmethod @abstractmethod
def predict( def predict(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
) -> Tuple[DataFrame, ArrayLike]: ) -> Tuple[DataFrame, NDArray[np.int_]]:
""" """
Filter the prediction features data and predict with it. Filter the prediction features data and predict with it.
:param unfiltered_dataframe: Full dataframe for the current backtest period. :param unfiltered_dataframe: Full dataframe for the current backtest period.

View File

@ -3,7 +3,7 @@ from typing import Any, Tuple
import numpy.typing as npt import numpy.typing as npt
from pandas import DataFrame from pandas import DataFrame
import numpy as np
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.freqai_interface import IFreqaiModel from freqtrade.freqai.freqai_interface import IFreqaiModel
@ -85,7 +85,7 @@ class BaseRegressionModel(IFreqaiModel):
def predict( def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
) -> Tuple[DataFrame, npt.ArrayLike]: ) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
""" """
Filter the prediction features data and predict with it. Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period. :param: unfiltered_dataframe: Full dataframe for the current backtest period.

View File

@ -1,6 +1,7 @@
import gc
import logging import logging
from typing import Any, Dict from typing import Any, Dict
import gc
from catboost import CatBoostRegressor, Pool from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel