From c84d54b35ec455e02818bca592ce80c98a89d756 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 29 Jul 2022 08:12:50 +0200 Subject: [PATCH] Fix typing issue, avoid using .get() when unnecessary, convert to fstrings --- freqtrade/freqai/data_drawer.py | 45 ++++++++++--------- freqtrade/freqai/data_kitchen.py | 28 ++++++------ freqtrade/freqai/freqai_interface.py | 34 +++++++------- .../prediction_models/BaseRegressionModel.py | 4 +- .../CatboostPredictionModel.py | 3 +- 5 files changed, 57 insertions(+), 57 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 4d37ef8c1..97cf7607a 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd from joblib import dump, load from joblib.externals import cloudpickle -from numpy.typing import ArrayLike +from numpy.typing import ArrayLike, NDArray from pandas import DataFrame from freqtrade.configuration import TimeRange @@ -233,12 +233,13 @@ class FreqaiDataDrawer: mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label] mrv_df[f"{label}_std"] = dk.data["labels_std"][label] - if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0: + if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0: mrv_df["DI_values"] = dk.DI_values mrv_df["do_predict"] = do_preds - def append_model_predictions(self, pair: str, predictions: DataFrame, do_preds: ArrayLike, + def append_model_predictions(self, pair: str, predictions: DataFrame, + do_preds: NDArray[np.int_], dk: FreqaiDataKitchen, len_df: int) -> None: # strat seems to feed us variable sized dataframes - and since we are trying to build our @@ -266,10 +267,10 @@ class FreqaiDataDrawer: df[label].iloc[-1] = predictions[label].iloc[-1] df[f"{label}_mean"].iloc[-1] = dk.data["labels_mean"][label] df[f"{label}_std"].iloc[-1] = dk.data["labels_std"][label] - # df['prediction'].iloc[-1] = predictions[-1] + df["do_predict"].iloc[-1] = do_preds[-1] - if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0: + if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0: df["DI_values"].iloc[-1] = dk.DI_values[-1] # append the new predictions to persistent storage @@ -309,7 +310,7 @@ class FreqaiDataDrawer: # dataframe['prediction'] = 0 dataframe["do_predict"] = 0 - if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0: + if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0: dataframe["DI_value"] = 0 dk.return_dataframe = dataframe @@ -379,24 +380,24 @@ class FreqaiDataDrawer: model.save(save_path / f"{dk.model_filename}_model.h5") if dk.svm_model is not None: - dump(dk.svm_model, save_path / str(dk.model_filename + "_svm_model.joblib")) + dump(dk.svm_model, save_path / f"{dk.model_filename}_svm_model.joblib") dk.data["data_path"] = str(dk.data_path) dk.data["model_filename"] = str(dk.model_filename) dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns) dk.data["label_list"] = dk.label_list # store the metadata - with open(save_path / str(dk.model_filename + "_metadata.json"), "w") as fp: + with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp: json.dump(dk.data, fp, default=dk.np_encoder) # save the train data to file so we can check preds for area of applicability later dk.data_dictionary["train_features"].to_pickle( - save_path / str(dk.model_filename + "_trained_df.pkl") + save_path / f"{dk.model_filename}_trained_df.pkl" ) - if self.freqai_info.get("feature_parameters", {}).get("principal_component_analysis"): + if self.freqai_info["feature_parameters"].get("principal_component_analysis"): cloudpickle.dump( - dk.pca, open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "wb") + dk.pca, open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "wb") ) # if self.live: @@ -429,27 +430,27 @@ class FreqaiDataDrawer: / dk.data_path.parts[-1] ) - with open(dk.data_path / str(dk.model_filename + "_metadata.json"), "r") as fp: + with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp: dk.data = json.load(fp) dk.training_features_list = dk.data["training_features_list"] dk.label_list = dk.data["label_list"] dk.data_dictionary["train_features"] = pd.read_pickle( - dk.data_path / str(dk.model_filename + "_trained_df.pkl") + dk.data_path / f"{dk.model_filename}_trained_df.pkl" ) # try to access model in memory instead of loading object from disk to save time if dk.live and dk.model_filename in self.model_dictionary: model = self.model_dictionary[dk.model_filename] elif not dk.keras: - model = load(dk.data_path / str(dk.model_filename + "_model.joblib")) + model = load(dk.data_path / f"{dk.model_filename}_model.joblib") else: from tensorflow import keras - model = keras.models.load_model(dk.data_path / str(dk.model_filename + "_model.h5")) + model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5") - if Path(dk.data_path / str(dk.model_filename + "_svm_model.joblib")).resolve().exists(): - dk.svm_model = load(dk.data_path / str(dk.model_filename + "_svm_model.joblib")) + if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): + dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") if not model: raise OperationalException( @@ -458,7 +459,7 @@ class FreqaiDataDrawer: if self.config["freqai"]["feature_parameters"]["principal_component_analysis"]: dk.pca = cloudpickle.load( - open(dk.data_path / str(dk.model_filename + "_pca_object.pkl"), "rb") + open(dk.data_path / f"{dk.model_filename}_pca_object.pkl", "rb") ) return model @@ -471,7 +472,7 @@ class FreqaiDataDrawer: :params: dataframe: DataFrame = strategy provided dataframe """ - feat_params = self.freqai_info.get("feature_parameters", {}) + feat_params = self.freqai_info["feature_parameters"] with self.history_lock: history_data = self.historic_data @@ -524,7 +525,7 @@ class FreqaiDataDrawer: for pair in dk.all_pairs: if pair not in history_data: history_data[pair] = {} - for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"): + for tf in self.freqai_info["feature_parameters"].get("include_timeframes"): history_data[pair][tf] = load_pair_history( datadir=self.config["datadir"], timeframe=tf, @@ -550,11 +551,11 @@ class FreqaiDataDrawer: corr_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {} historic_data = self.historic_data - pairs = self.freqai_info.get("feature_parameters", {}).get( + pairs = self.freqai_info["feature_parameters"].get( "include_corr_pairlist", [] ) - for tf in self.freqai_info.get("feature_parameters", {}).get("include_timeframes"): + for tf in self.freqai_info["feature_parameters"].get("include_timeframes"): base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf]) if pairs: for p in pairs: diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index f16e169b9..b5a3295b5 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -116,7 +116,7 @@ class FreqaiDataKitchen: :filtered_dataframe: cleaned dataframe ready to be split. :labels: cleaned labels ready to be split. """ - feat_dict = self.freqai_config.get("feature_parameters", {}) + feat_dict = self.freqai_config["feature_parameters"] weights: npt.ArrayLike if feat_dict.get("weight_factor", 0) > 0: @@ -515,7 +515,9 @@ class FreqaiDataKitchen: return if predict: - assert self.svm_model, "No svm model available for outlier removal" + if not self.svm_model: + logger.warning("No svm model available for outlier removal") + return y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"]) do_predict = np.where(y_pred == -1, 0, y_pred) @@ -528,7 +530,7 @@ class FreqaiDataKitchen: else: # use SGDOneClassSVM to increase speed? - nu = self.freqai_config.get("feature_parameters", {}).get("svm_nu", 0.2) + nu = self.freqai_config["feature_parameters"].get("svm_nu", 0.2) self.svm_model = linear_model.SGDOneClassSVM(nu=nu).fit( self.data_dictionary["train_features"] ) @@ -551,7 +553,7 @@ class FreqaiDataKitchen: ) # same for test data - if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0: + if self.freqai_config['data_split_parameters'].get('test_size', 0.1) != 0: y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) dropped_points = np.where(y_pred == -1, 0, y_pred) self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ @@ -605,7 +607,7 @@ class FreqaiDataKitchen: self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"] do_predict = np.where( - self.DI_values < self.freqai_config.get("feature_parameters", {}).get("DI_threshold"), + self.DI_values < self.freqai_config["feature_parameters"]["DI_threshold"], 1, 0, ) @@ -640,7 +642,7 @@ class FreqaiDataKitchen: self.append_df[f"{label}_std"] = self.data["labels_std"][label] self.append_df["do_predict"] = do_predict - if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0: + if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0: self.append_df["DI_values"] = self.DI_values if self.full_df.empty: @@ -701,7 +703,7 @@ class FreqaiDataKitchen: full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") self.full_path = Path( - self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier")) + self.config["user_data_dir"] / "models" / f"{self.freqai_config['identifier']}" ) config_path = Path(self.config["config_files"][0]) @@ -741,10 +743,10 @@ class FreqaiDataKitchen: data_load_timerange = TimeRange() # find the max indicator length required - max_timeframe_chars = self.freqai_config.get("feature_parameters", {}).get( + max_timeframe_chars = self.freqai_config["feature_parameters"].get( "include_timeframes" )[-1] - max_period = self.freqai_config.get("feature_parameters", {}).get( + max_period = self.freqai_config["feature_parameters"].get( "indicator_max_period_candles", 50 ) additional_seconds = 0 @@ -832,7 +834,7 @@ class FreqaiDataKitchen: refresh_backtest_ohlcv_data( exchange, pairs=self.all_pairs, - timeframes=self.freqai_config.get("feature_parameters", {}).get("include_timeframes"), + timeframes=self.freqai_config["feature_parameters"].get("include_timeframes"), datadir=self.config["datadir"], timerange=timerange, new_pairs_days=new_pairs_days, @@ -845,7 +847,7 @@ class FreqaiDataKitchen: def set_all_pairs(self) -> None: self.all_pairs = copy.deepcopy( - self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", []) + self.freqai_config["feature_parameters"].get("include_corr_pairlist", []) ) for pair in self.config.get("exchange", "").get("pair_whitelist"): if pair not in self.all_pairs: @@ -876,8 +878,8 @@ class FreqaiDataKitchen: # for prediction dataframe creation, we let dataprovider handle everything in the strategy # so we create empty dictionaries, which allows us to pass None to # `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe. - tfs = self.freqai_config.get("feature_parameters", {}).get("include_timeframes") - pairs = self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", []) + tfs = self.freqai_config["feature_parameters"].get("include_timeframes") + pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", []) if not prediction_dataframe.empty: dataframe = prediction_dataframe.copy() for tf in tfs: diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index ec69a78c4..47aeb32e4 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -12,7 +12,7 @@ from typing import Any, Dict, Tuple import numpy as np import pandas as pd -from numpy.typing import ArrayLike +from numpy.typing import NDArray from pandas import DataFrame from freqtrade.configuration import TimeRange @@ -204,14 +204,9 @@ class IFreqaiModel(ABC): dk.data_path = Path( dk.full_path - / str( - "sub-train" - + "-" - + metadata["pair"].split("/")[0] - + "_" - + str(int(trained_timestamp.stopts)) + / + f"sub-train-{metadata['pair'].split('/')[0]}_{int(trained_timestamp.stopts)}" ) - ) if not self.model_exists( metadata["pair"], dk, trained_timestamp=int(trained_timestamp.stopts) ): @@ -331,7 +326,8 @@ class IFreqaiModel(ABC): return elif self.dk.check_if_model_expired(trained_timestamp): pred_df = DataFrame(np.zeros((2, len(dk.label_list))), columns=dk.label_list) - do_preds, dk.DI_values = np.ones(2) * 2, np.zeros(2) + do_preds = np.ones(2, dtype=np.int_) * 2 + dk.DI_values = np.zeros(2) logger.warning( f"Model expired for {pair}, returning null values to strategy. Strategy " "construction should take care to consider this event with " @@ -379,15 +375,15 @@ class IFreqaiModel(ABC): example of how outlier data points are dropped from the dataframe used for training. """ - if self.freqai_info.get("feature_parameters", {}).get( + if self.freqai_info["feature_parameters"].get( "principal_component_analysis", False ): dk.principal_component_analysis() - if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False): + if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False): dk.use_SVM_to_remove_outliers(predict=False) - if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): + if self.freqai_info["feature_parameters"].get("DI_threshold", 0): dk.data["avg_mean_dist"] = dk.compute_distances() def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None: @@ -401,15 +397,15 @@ class IFreqaiModel(ABC): of how the do_predict vector is modified. do_predict is ultimately passed back to strategy for buy signals. """ - if self.freqai_info.get("feature_parameters", {}).get( + if self.freqai_info["feature_parameters"].get( "principal_component_analysis", False ): dk.pca_transform(dataframe) - if self.freqai_info.get("feature_parameters", {}).get("use_SVM_to_remove_outliers", False): + if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False): dk.use_SVM_to_remove_outliers(predict=True) - if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): + if self.freqai_info["feature_parameters"].get("DI_threshold", 0): dk.check_if_pred_in_training_spaces() def model_exists( @@ -430,9 +426,9 @@ class IFreqaiModel(ABC): coin, _ = pair.split("/") if not self.live: - dk.model_filename = model_filename = "cb_" + coin.lower() + "_" + str(trained_timestamp) + dk.model_filename = model_filename = f"cb_{coin.lower()}_{trained_timestamp}" - path_to_modelfile = Path(dk.data_path / str(model_filename + "_model.joblib")) + path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib") file_exists = path_to_modelfile.is_file() if file_exists and not scanning: logger.info("Found model at %s", dk.data_path / dk.model_filename) @@ -442,7 +438,7 @@ class IFreqaiModel(ABC): def set_full_path(self) -> None: self.full_path = Path( - self.config["user_data_dir"] / "models" / str(self.freqai_info.get("identifier")) + self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}" ) self.full_path.mkdir(parents=True, exist_ok=True) shutil.copy( @@ -550,7 +546,7 @@ class IFreqaiModel(ABC): @abstractmethod def predict( self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True - ) -> Tuple[DataFrame, ArrayLike]: + ) -> Tuple[DataFrame, NDArray[np.int_]]: """ Filter the prediction features data and predict with it. :param unfiltered_dataframe: Full dataframe for the current backtest period. diff --git a/freqtrade/freqai/prediction_models/BaseRegressionModel.py b/freqtrade/freqai/prediction_models/BaseRegressionModel.py index 2baec9fc3..85d7ae1ee 100644 --- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py +++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py @@ -3,7 +3,7 @@ from typing import Any, Tuple import numpy.typing as npt from pandas import DataFrame - +import numpy as np from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.freqai_interface import IFreqaiModel @@ -85,7 +85,7 @@ class BaseRegressionModel(IFreqaiModel): def predict( self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False - ) -> Tuple[DataFrame, npt.ArrayLike]: + ) -> Tuple[DataFrame, npt.NDArray[np.int_]]: """ Filter the prediction features data and predict with it. :param: unfiltered_dataframe: Full dataframe for the current backtest period. diff --git a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py index 884933803..9731e0c01 100644 --- a/freqtrade/freqai/prediction_models/CatboostPredictionModel.py +++ b/freqtrade/freqai/prediction_models/CatboostPredictionModel.py @@ -1,6 +1,7 @@ +import gc import logging from typing import Any, Dict -import gc + from catboost import CatBoostRegressor, Pool from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel