Aggregated commit. Adding support vector machine for outlier detection, improve user interface to dry/live, better standardization, fix various other bugs
This commit is contained in:
		| @@ -57,8 +57,8 @@ | ||||
|         "train_period": 30, | ||||
|         "backtest_period": 7, | ||||
|         "identifier": "example", | ||||
|         "live_trained_timerange": "20220330-20220429", | ||||
|         "live_full_backtestrange": "20220302-20220501", | ||||
|         "live_trained_timerange": "", | ||||
|         "live_full_backtestrange": "", | ||||
|         "corr_pairlist": [ | ||||
|             "BTC/USDT", | ||||
|             "ETH/USDT", | ||||
| @@ -68,20 +68,19 @@ | ||||
|         "feature_parameters": { | ||||
|             "period": 12, | ||||
|             "shift": 1, | ||||
|             "drop_features": false, | ||||
|             "DI_threshold": 1, | ||||
|             "weight_factor": 0, | ||||
|             "principal_component_analysis": false, | ||||
|             "remove_outliers": false | ||||
|             "use_SVM_to_remove_outliers": false | ||||
|         }, | ||||
|         "data_split_parameters": { | ||||
|             "test_size": 0.25, | ||||
|             "random_state": 1 | ||||
|         }, | ||||
|         "model_training_parameters": { | ||||
|             "n_estimators": 2000, | ||||
|             "n_estimators": 1000, | ||||
|             "random_state": 1, | ||||
|             "learning_rate": 0.02, | ||||
|             "learning_rate": 0.1, | ||||
|             "task_type": "CPU" | ||||
|         } | ||||
|     }, | ||||
|   | ||||
| @@ -331,21 +331,21 @@ Users can reduce the dimensionality of their features by activating the `princip | ||||
| Which will perform PCA on the features and reduce the dimensionality of the data so that the explained | ||||
| variance of the data set is >= 0.999. | ||||
|  | ||||
| ### Removing outliers based on feature statistical distributions | ||||
| ### Removing outliers using a Support Vector Machine (SVM) | ||||
|  | ||||
| The user can tell Freqai to remove outlier data points from the training/test data sets by setting: | ||||
|  | ||||
| ```json | ||||
|     "freqai": { | ||||
|         "feature_parameters" : { | ||||
|                 "remove_outliers": true | ||||
|             "use_SVM_to_remove_outliers: true | ||||
|         } | ||||
|     } | ||||
| ``` | ||||
|  | ||||
| Freqai will check the statistical distributions of each feature (or component if the user activated | ||||
| `principal_component_analysis`) and remove any data point that sits more than 3 standard deviations away  | ||||
| from the mean. | ||||
| Freqai will train an SVM on the training data (or components if the user activated | ||||
| `principal_component_analysis`) and remove any data point that it deems to be sit beyond the  | ||||
| feature space. | ||||
|  | ||||
| ## Additional information | ||||
|  | ||||
|   | ||||
| @@ -10,8 +10,9 @@ from typing import Any, Dict, List, Tuple | ||||
| import numpy as np | ||||
| import numpy.typing as npt | ||||
| import pandas as pd | ||||
| from joblib import dump, load | ||||
| from joblib import dump, load  # , Parallel, delayed # used for auto distribution assignment | ||||
| from pandas import DataFrame | ||||
| from sklearn import linear_model | ||||
| from sklearn.metrics.pairwise import pairwise_distances | ||||
| from sklearn.model_selection import train_test_split | ||||
|  | ||||
| @@ -22,6 +23,9 @@ from freqtrade.resolvers import ExchangeResolver | ||||
| from freqtrade.strategy.interface import IStrategy | ||||
|  | ||||
|  | ||||
| # import scipy as spy  # used for auto distribution assignment | ||||
|  | ||||
|  | ||||
| SECONDS_IN_DAY = 86400 | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
| @@ -52,6 +56,7 @@ class FreqaiDataKitchen: | ||||
|         self.model_filename: str = "" | ||||
|         self.model_dictionary: Dict[Any, Any] = {} | ||||
|         self.live = live | ||||
|         self.svm_model: linear_model.SGDOneClassSVM = None | ||||
|         if not self.live: | ||||
|             self.full_timerange = self.create_fulltimerange(self.config["timerange"], | ||||
|                                                             self.freqai_config["train_period"] | ||||
| @@ -89,6 +94,10 @@ class FreqaiDataKitchen: | ||||
|  | ||||
|         # Save the trained model | ||||
|         dump(model, save_path / str(self.model_filename + "_model.joblib")) | ||||
|  | ||||
|         if self.svm_model is not None: | ||||
|             dump(self.svm_model, save_path / str(self.model_filename + "_svm_model.joblib")) | ||||
|  | ||||
|         self.data["model_path"] = str(self.model_path) | ||||
|         self.data["model_filename"] = str(self.model_filename) | ||||
|         self.data["training_features_list"] = list(self.data_dictionary["train_features"].columns) | ||||
| @@ -104,6 +113,19 @@ class FreqaiDataKitchen: | ||||
|         if self.live: | ||||
|             self.model_dictionary[self.model_filename] = model | ||||
|  | ||||
|         # TODO add a helper function to let user save/load any data they are custom adding. We | ||||
|         # do not want them having to edit the default save/load methods here. Below is an example | ||||
|         # of what we do NOT want. | ||||
|  | ||||
|         # if self.freqai_config['feature_parameters']['determine_statistical_distributions']: | ||||
|         #     self.data_dictionary["upper_quantiles"].to_pickle( | ||||
|         #         save_path / str(self.model_filename + "_upper_quantiles.pkl") | ||||
|         #     ) | ||||
|  | ||||
|         #     self.data_dictionary["lower_quantiles"].to_pickle( | ||||
|         #         save_path / str(self.model_filename + "_lower_quantiles.pkl") | ||||
|         #     ) | ||||
|  | ||||
|         return | ||||
|  | ||||
|     def load_data(self) -> Any: | ||||
| @@ -121,6 +143,19 @@ class FreqaiDataKitchen: | ||||
|             self.model_path / str(self.model_filename + "_trained_df.pkl") | ||||
|         ) | ||||
|  | ||||
|         # TODO add a helper function to let user save/load any data they are custom adding. We | ||||
|         # do not want them having to edit the default save/load methods here. Below is an example | ||||
|         # of what we do NOT want. | ||||
|  | ||||
|         # if self.freqai_config['feature_parameters']['determine_statistical_distributions']: | ||||
|         #     self.data_dictionary["upper_quantiles"] = pd.read_pickle( | ||||
|         #         self.model_path / str(self.model_filename + "_upper_quantiles.pkl") | ||||
|         #     ) | ||||
|  | ||||
|         #     self.data_dictionary["lower_quantiles"] = pd.read_pickle( | ||||
|         #         self.model_path / str(self.model_filename + "_lower_quantiles.pkl") | ||||
|         #     ) | ||||
|  | ||||
|         self.model_path = Path(self.data["model_path"]) | ||||
|         self.model_filename = self.data["model_filename"] | ||||
|  | ||||
| @@ -130,6 +165,10 @@ class FreqaiDataKitchen: | ||||
|         else: | ||||
|             model = load(self.model_path / str(self.model_filename + "_model.joblib")) | ||||
|  | ||||
|         if Path(self.model_path / str(self.model_filename + | ||||
|                 "_svm_model.joblib")).resolve().exists(): | ||||
|             self.svm_model = load(self.model_path / str(self.model_filename + "_svm_model.joblib")) | ||||
|  | ||||
|         assert model, ( | ||||
|                        f"Unable to load model, ensure model exists at " | ||||
|                        f"{self.model_path} " | ||||
| @@ -159,6 +198,12 @@ class FreqaiDataKitchen: | ||||
|         else: | ||||
|             weights = np.ones(len(filtered_dataframe)) | ||||
|  | ||||
|         if self.config["freqai"]["feature_parameters"]["stratify"] > 0: | ||||
|             stratification = np.zeros(len(filtered_dataframe)) | ||||
|             for i in range(1, len(stratification)): | ||||
|                 if i % self.config["freqai"]["feature_parameters"]["stratify"] == 0: | ||||
|                     stratification[i] = 1 | ||||
|  | ||||
|         ( | ||||
|             train_features, | ||||
|             test_features, | ||||
| @@ -170,6 +215,8 @@ class FreqaiDataKitchen: | ||||
|             filtered_dataframe[: filtered_dataframe.shape[0]], | ||||
|             labels, | ||||
|             weights, | ||||
|             stratify=stratification, | ||||
|             # shuffle=False, | ||||
|             **self.config["freqai"]["data_split_parameters"] | ||||
|         ) | ||||
|  | ||||
| @@ -261,9 +308,9 @@ class FreqaiDataKitchen: | ||||
|  | ||||
|         return self.data_dictionary | ||||
|  | ||||
|     def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: | ||||
|     def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: | ||||
|         """ | ||||
|         Standardize all data in the data_dictionary according to the training dataset | ||||
|         Normalize all data in the data_dictionary according to the training dataset | ||||
|         :params: | ||||
|         :data_dictionary: dictionary containing the cleaned and split training/test data/labels | ||||
|         :returns: | ||||
| @@ -297,6 +344,42 @@ class FreqaiDataKitchen: | ||||
|  | ||||
|         return data_dictionary | ||||
|  | ||||
|     def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]: | ||||
|         """ | ||||
|         Standardize all data in the data_dictionary according to the training dataset | ||||
|         :params: | ||||
|         :data_dictionary: dictionary containing the cleaned and split training/test data/labels | ||||
|         :returns: | ||||
|         :data_dictionary: updated dictionary with standardized values. | ||||
|         """ | ||||
|         # standardize the data by training stats | ||||
|         train_max = data_dictionary["train_features"].max() | ||||
|         train_min = data_dictionary["train_features"].min() | ||||
|         data_dictionary["train_features"] = 2 * ( | ||||
|             data_dictionary["train_features"] - train_min | ||||
|         ) / (train_max - train_min) - 1 | ||||
|         data_dictionary["test_features"] = 2 * ( | ||||
|             data_dictionary["test_features"] - train_min | ||||
|         ) / (train_max - train_min) - 1 | ||||
|  | ||||
|         train_labels_max = data_dictionary["train_labels"].max() | ||||
|         train_labels_min = data_dictionary["train_labels"].min() | ||||
|         data_dictionary["train_labels"] = 2 * ( | ||||
|             data_dictionary["train_labels"] - train_labels_min | ||||
|         ) / (train_labels_max - train_labels_min) - 1 | ||||
|         data_dictionary["test_labels"] = 2 * ( | ||||
|             data_dictionary["test_labels"] - train_labels_min | ||||
|         ) / (train_labels_max - train_labels_min) - 1 | ||||
|  | ||||
|         for item in train_max.keys(): | ||||
|             self.data[item + "_max"] = train_max[item] | ||||
|             self.data[item + "_min"] = train_min[item] | ||||
|  | ||||
|         self.data["labels_max"] = train_labels_max | ||||
|         self.data["labels_min"] = train_labels_min | ||||
|  | ||||
|         return data_dictionary | ||||
|  | ||||
|     def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame: | ||||
|         """ | ||||
|         Standardizes a set of data using the mean and standard deviation from | ||||
| @@ -305,6 +388,20 @@ class FreqaiDataKitchen: | ||||
|         :df: Dataframe to be standardized | ||||
|         """ | ||||
|  | ||||
|         for item in df.keys(): | ||||
|             df[item] = 2 * (df[item] - self.data[item + "_min"]) / (self.data[item + "_max"] - | ||||
|                                                                     self.data[item + '_min']) - 1 | ||||
|  | ||||
|         return df | ||||
|  | ||||
|     def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame: | ||||
|         """ | ||||
|         Normalizes a set of data using the mean and standard deviation from | ||||
|         the associated training data. | ||||
|         :params: | ||||
|         :df: Dataframe to be standardized | ||||
|         """ | ||||
|  | ||||
|         for item in df.keys(): | ||||
|             df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"] | ||||
|  | ||||
| @@ -420,6 +517,8 @@ class FreqaiDataKitchen: | ||||
|         self.data["n_kept_components"] = n_keep_components | ||||
|         self.pca = pca2 | ||||
|  | ||||
|         logger.info(f'PCA reduced total features from  {n_components} to {n_keep_components}') | ||||
|  | ||||
|         if not self.model_path.is_dir(): | ||||
|             self.model_path.mkdir(parents=True, exist_ok=True) | ||||
|         pk.dump(pca2, open(self.model_path / str(self.model_filename + "_pca_object.pkl"), "wb")) | ||||
| @@ -434,70 +533,53 @@ class FreqaiDataKitchen: | ||||
|  | ||||
|         return avg_mean_dist | ||||
|  | ||||
|     def remove_outliers(self, predict: bool) -> None: | ||||
|         """ | ||||
|         Remove data that looks like an outlier based on the distribution of each | ||||
|         variable. | ||||
|         :params: | ||||
|         :predict: boolean which tells the function if this is prediction data or | ||||
|         training data coming in. | ||||
|         """ | ||||
|  | ||||
|         lower_quantile = self.data_dictionary["train_features"].quantile(0.001) | ||||
|         upper_quantile = self.data_dictionary["train_features"].quantile(0.999) | ||||
|     def use_SVM_to_remove_outliers(self, predict: bool) -> None: | ||||
|  | ||||
|         if predict: | ||||
|  | ||||
|             df = self.data_dictionary["prediction_features"][ | ||||
|                 (self.data_dictionary["prediction_features"] < upper_quantile) | ||||
|                 & (self.data_dictionary["prediction_features"] > lower_quantile) | ||||
|             ] | ||||
|             drop_index = pd.isnull(df).any(1) | ||||
|             self.data_dictionary["prediction_features"].fillna(0, inplace=True) | ||||
|             drop_index = ~drop_index | ||||
|             do_predict = np.array(drop_index.replace(True, 1).replace(False, 0)) | ||||
|             assert self.svm_model, "No svm model available for outlier removal" | ||||
|             y_pred = self.svm_model.predict(self.data_dictionary["prediction_features"]) | ||||
|             do_predict = np.where(y_pred == -1, 0, y_pred) | ||||
|  | ||||
|             logger.info( | ||||
|                 "remove_outliers() tossed %s predictions", | ||||
|                 len(do_predict) - do_predict.sum(), | ||||
|                 f'svm_remove_outliers() tossed {len(do_predict) - do_predict.sum()} predictions' | ||||
|             ) | ||||
|             self.do_predict += do_predict | ||||
|             self.do_predict -= 1 | ||||
|  | ||||
|         else: | ||||
|             # use SGDOneClassSVM to increase speed? | ||||
|             self.svm_model = linear_model.SGDOneClassSVM(nu=0.1).fit( | ||||
|                                                             self.data_dictionary["train_features"] | ||||
|                                                             ) | ||||
|             y_pred = self.svm_model.predict(self.data_dictionary["train_features"]) | ||||
|             dropped_points = np.where(y_pred == -1, 0, y_pred) | ||||
|             # keep_index = np.where(y_pred == 1) | ||||
|             self.data_dictionary["train_features"] = self.data_dictionary[ | ||||
|                                                                 "train_features"][(y_pred == 1)] | ||||
|             self.data_dictionary["train_labels"] = self.data_dictionary[ | ||||
|                                                                 "train_labels"][(y_pred == 1)] | ||||
|             self.data_dictionary["train_weights"] = self.data_dictionary[ | ||||
|                                                                 "train_weights"][(y_pred == 1)] | ||||
|  | ||||
|             filter_train_df = self.data_dictionary["train_features"][ | ||||
|                 (self.data_dictionary["train_features"] < upper_quantile) | ||||
|                 & (self.data_dictionary["train_features"] > lower_quantile) | ||||
|             ] | ||||
|             drop_index = pd.isnull(filter_train_df).any(1) | ||||
|             drop_index = drop_index.replace(True, 1).replace(False, 0) | ||||
|             self.data_dictionary["train_features"] = self.data_dictionary["train_features"][ | ||||
|                 (drop_index == 0) | ||||
|             ] | ||||
|             self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][ | ||||
|                 (drop_index == 0) | ||||
|             ] | ||||
|             self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][ | ||||
|                 (drop_index == 0) | ||||
|             ] | ||||
|             logger.info( | ||||
|                 f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}' | ||||
|                 f' train points from {len(y_pred)}' | ||||
|             ) | ||||
|  | ||||
|             # do the same for the test data | ||||
|             filter_test_df = self.data_dictionary["test_features"][ | ||||
|                 (self.data_dictionary["test_features"] < upper_quantile) | ||||
|                 & (self.data_dictionary["test_features"] > lower_quantile) | ||||
|             ] | ||||
|             drop_index = pd.isnull(filter_test_df).any(1) | ||||
|             drop_index = drop_index.replace(True, 1).replace(False, 0) | ||||
|             self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][ | ||||
|                 (drop_index == 0) | ||||
|             ] | ||||
|             self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ | ||||
|                 (drop_index == 0) | ||||
|             ] | ||||
|             self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][ | ||||
|                 (drop_index == 0) | ||||
|             ] | ||||
|             # same for test data | ||||
|             y_pred = self.svm_model.predict(self.data_dictionary["test_features"]) | ||||
|             dropped_points = np.where(y_pred == -1, 0, y_pred) | ||||
|             self.data_dictionary["test_features"] = self.data_dictionary[ | ||||
|                                                                 "test_features"][(y_pred == 1)] | ||||
|             self.data_dictionary["test_labels"] = self.data_dictionary[ | ||||
|                                                                 "test_labels"][(y_pred == 1)] | ||||
|             self.data_dictionary["test_weights"] = self.data_dictionary[ | ||||
|                                                                 "test_weights"][(y_pred == 1)] | ||||
|  | ||||
|             logger.info( | ||||
|                 f'svm_remove_outliers() tossed {len(y_pred) - dropped_points.sum()}' | ||||
|                 f' test points from {len(y_pred)}' | ||||
|             ) | ||||
|  | ||||
|         return | ||||
|  | ||||
| @@ -507,32 +589,6 @@ class FreqaiDataKitchen: | ||||
|         assert features, ("Could not find any features!") | ||||
|         return features | ||||
|  | ||||
|     # def build_feature_list(self, config: dict, metadata: dict) -> list: | ||||
|     #     """ | ||||
|     #     SUPERCEDED BY self.find_features() | ||||
|     #     Build the list of features that will be used to filter | ||||
|     #     the full dataframe. Feature list is construced from the | ||||
|     #     user configuration file. | ||||
|     #     :params: | ||||
|     #     :config: Canonical freqtrade config file containing all | ||||
|     #     user defined input in config['freqai] dictionary. | ||||
|     #     """ | ||||
|     #     features = [] | ||||
|     #     for tf in config["freqai"]["timeframes"]: | ||||
|     #         for ft in config["freqai"]["base_features"]: | ||||
|     #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1): | ||||
|     #                 shift = "" | ||||
|     #                 if n > 0: | ||||
|     #                     shift = "_shift-" + str(n) | ||||
|     #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf) | ||||
|     #                 for p in config["freqai"]["corr_pairlist"]: | ||||
|     #                     if metadata['pair'] in p: | ||||
|     #                         continue  # avoid duplicate features | ||||
|     #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf) | ||||
|  | ||||
|     #     # logger.info("number of features %s", len(features)) | ||||
|     #     return features | ||||
|  | ||||
|     def check_if_pred_in_training_spaces(self) -> None: | ||||
|         """ | ||||
|         Compares the distance from each prediction point to each training data | ||||
| @@ -568,7 +624,7 @@ class FreqaiDataKitchen: | ||||
|         training than older data. | ||||
|         """ | ||||
|  | ||||
|         weights = np.zeros_like(num_weights) | ||||
|         weights = np.zeros(num_weights) | ||||
|         for i in range(1, len(weights)): | ||||
|             weights[len(weights) - i] = np.exp( | ||||
|                 -i / (self.config["freqai"]["feature_parameters"]["weight_factor"] * num_weights) | ||||
| @@ -638,19 +694,23 @@ class FreqaiDataKitchen: | ||||
|  | ||||
|         time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() | ||||
|  | ||||
|         trained_timerange = TimeRange.parse_timerange(training_timerange) | ||||
|         if training_timerange:  # user passed no live_trained_timerange in config | ||||
|             trained_timerange = TimeRange.parse_timerange(training_timerange) | ||||
|             elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY | ||||
|             trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY | ||||
|             trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY | ||||
|             retrain = elapsed_time > self.freqai_config['backtest_period'] | ||||
|         else: | ||||
|             trained_timerange = TimeRange.parse_timerange("20000101-20000201") | ||||
|             trained_timerange.startts = int(time - self.freqai_config['train_period'] * | ||||
|                                             SECONDS_IN_DAY) | ||||
|             trained_timerange.stopts = int(time) | ||||
|             retrain = True | ||||
|  | ||||
|         elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY | ||||
|  | ||||
|         trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY | ||||
|         trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY | ||||
|         start = datetime.datetime.utcfromtimestamp(trained_timerange.startts) | ||||
|         stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts) | ||||
|  | ||||
|         new_trained_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") | ||||
|  | ||||
|         retrain = elapsed_time > self.freqai_config['backtest_period'] | ||||
|  | ||||
|         if retrain: | ||||
|             coin, _ = metadata['pair'].split("/") | ||||
|             # set the new model_path | ||||
| @@ -738,3 +798,141 @@ class FreqaiDataKitchen: | ||||
|     def np_encoder(self, object): | ||||
|         if isinstance(object, np.generic): | ||||
|             return object.item() | ||||
|  | ||||
|     # Functions containing useful data manpulation examples. but not actively in use. | ||||
|  | ||||
|     # def build_feature_list(self, config: dict, metadata: dict) -> list: | ||||
|     #     """ | ||||
|     #     SUPERCEDED BY self.find_features() | ||||
|     #     Build the list of features that will be used to filter | ||||
|     #     the full dataframe. Feature list is construced from the | ||||
|     #     user configuration file. | ||||
|     #     :params: | ||||
|     #     :config: Canonical freqtrade config file containing all | ||||
|     #     user defined input in config['freqai] dictionary. | ||||
|     #     """ | ||||
|     #     features = [] | ||||
|     #     for tf in config["freqai"]["timeframes"]: | ||||
|     #         for ft in config["freqai"]["base_features"]: | ||||
|     #             for n in range(config["freqai"]["feature_parameters"]["shift"] + 1): | ||||
|     #                 shift = "" | ||||
|     #                 if n > 0: | ||||
|     #                     shift = "_shift-" + str(n) | ||||
|     #                 features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf) | ||||
|     #                 for p in config["freqai"]["corr_pairlist"]: | ||||
|     #                     if metadata['pair'] in p: | ||||
|     #                         continue  # avoid duplicate features | ||||
|     #                     features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf) | ||||
|  | ||||
|     #     # logger.info("number of features %s", len(features)) | ||||
|     #     return features | ||||
|  | ||||
|     # Possibly phasing these outlier removal methods below out in favor of | ||||
|     # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance). | ||||
|     # But these have good data manipulation examples, so keep them commented here for now. | ||||
|  | ||||
|     # def determine_statistical_distributions(self) -> None: | ||||
|     #     from fitter import Fitter | ||||
|  | ||||
|     #     logger.info('Determining best model for all features, may take some time') | ||||
|  | ||||
|     #     def compute_quantiles(ft): | ||||
|     #         f = Fitter(self.data_dictionary["train_features"][ft], | ||||
|     #                    distributions=['gamma', 'cauchy', 'laplace', | ||||
|     #                                   'beta', 'uniform', 'lognorm']) | ||||
|     #         f.fit() | ||||
|     #         # f.summary() | ||||
|     #         dist = list(f.get_best().items())[0][0] | ||||
|     #         params = f.get_best()[dist] | ||||
|     #         upper_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.999, **params) | ||||
|     #         lower_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.001, **params) | ||||
|  | ||||
|     #         return ft, upper_q, lower_q, dist | ||||
|  | ||||
|     #     quantiles_tuple = Parallel(n_jobs=-1)( | ||||
|     #         delayed(compute_quantiles)(ft) for ft in self.data_dictionary[ | ||||
|     #                                                       'train_features'].columns) | ||||
|  | ||||
|     #     df = pd.DataFrame(quantiles_tuple, columns=['features', 'upper_quantiles', | ||||
|     #                                                 'lower_quantiles', 'dist']) | ||||
|     #     self.data_dictionary['upper_quantiles'] = df['upper_quantiles'] | ||||
|     #     self.data_dictionary['lower_quantiles'] = df['lower_quantiles'] | ||||
|  | ||||
|     #     return | ||||
|  | ||||
|     # def remove_outliers(self, predict: bool) -> None: | ||||
|     #     """ | ||||
|     #     Remove data that looks like an outlier based on the distribution of each | ||||
|     #     variable. | ||||
|     #     :params: | ||||
|     #     :predict: boolean which tells the function if this is prediction data or | ||||
|     #     training data coming in. | ||||
|     #     """ | ||||
|  | ||||
|     #     lower_quantile = self.data_dictionary["lower_quantiles"].to_numpy() | ||||
|     #     upper_quantile = self.data_dictionary["upper_quantiles"].to_numpy() | ||||
|  | ||||
|     #     if predict: | ||||
|  | ||||
|     #         df = self.data_dictionary["prediction_features"][ | ||||
|     #             (self.data_dictionary["prediction_features"] < upper_quantile) | ||||
|     #             & (self.data_dictionary["prediction_features"] > lower_quantile) | ||||
|     #         ] | ||||
|     #         drop_index = pd.isnull(df).any(1) | ||||
|     #         self.data_dictionary["prediction_features"].fillna(0, inplace=True) | ||||
|     #         drop_index = ~drop_index | ||||
|     #         do_predict = np.array(drop_index.replace(True, 1).replace(False, 0)) | ||||
|  | ||||
|     #         logger.info( | ||||
|     #             "remove_outliers() tossed %s predictions", | ||||
|     #             len(do_predict) - do_predict.sum(), | ||||
|     #         ) | ||||
|     #         self.do_predict += do_predict | ||||
|     #         self.do_predict -= 1 | ||||
|  | ||||
|     #     else: | ||||
|  | ||||
|     #         filter_train_df = self.data_dictionary["train_features"][ | ||||
|     #             (self.data_dictionary["train_features"] < upper_quantile) | ||||
|     #             & (self.data_dictionary["train_features"] > lower_quantile) | ||||
|     #         ] | ||||
|     #         drop_index = pd.isnull(filter_train_df).any(1) | ||||
|     #         drop_index = drop_index.replace(True, 1).replace(False, 0) | ||||
|     #         self.data_dictionary["train_features"] = self.data_dictionary["train_features"][ | ||||
|     #             (drop_index == 0) | ||||
|     #         ] | ||||
|     #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][ | ||||
|     #             (drop_index == 0) | ||||
|     #         ] | ||||
|     #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][ | ||||
|     #             (drop_index == 0) | ||||
|     #         ] | ||||
|  | ||||
|     #         logger.info( | ||||
|     #             f'remove_outliers() tossed {drop_index.sum()}' | ||||
|     #             f' training points from {len(filter_train_df)}' | ||||
|     #         ) | ||||
|  | ||||
|     #         # do the same for the test data | ||||
|     #         filter_test_df = self.data_dictionary["test_features"][ | ||||
|     #             (self.data_dictionary["test_features"] < upper_quantile) | ||||
|     #             & (self.data_dictionary["test_features"] > lower_quantile) | ||||
|     #         ] | ||||
|     #         drop_index = pd.isnull(filter_test_df).any(1) | ||||
|     #         drop_index = drop_index.replace(True, 1).replace(False, 0) | ||||
|     #         self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][ | ||||
|     #             (drop_index == 0) | ||||
|     #         ] | ||||
|     #         self.data_dictionary["test_features"] = self.data_dictionary["test_features"][ | ||||
|     #             (drop_index == 0) | ||||
|     #         ] | ||||
|     #         self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][ | ||||
|     #             (drop_index == 0) | ||||
|     #         ] | ||||
|  | ||||
|     #         logger.info( | ||||
|     #             f'remove_outliers() tossed {drop_index.sum()}' | ||||
|     #             f' test points from {len(filter_test_df)}' | ||||
|     #         ) | ||||
|  | ||||
|     #     return | ||||
|   | ||||
| @@ -62,6 +62,7 @@ class IFreqaiModel(ABC): | ||||
|         self.predictions = None | ||||
|         self.training_on_separate_thread = False | ||||
|         self.retrain = False | ||||
|         self.first = True | ||||
|  | ||||
|     def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame: | ||||
|         """ | ||||
| @@ -80,12 +81,12 @@ class IFreqaiModel(ABC): | ||||
|         :metadata: pair metadata coming from strategy. | ||||
|         """ | ||||
|  | ||||
|         live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE) | ||||
|         self.live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE) | ||||
|  | ||||
|         self.pair = metadata["pair"] | ||||
|         self.dh = FreqaiDataKitchen(self.config, dataframe, live) | ||||
|         self.dh = FreqaiDataKitchen(self.config, dataframe, self.live) | ||||
|  | ||||
|         if live: | ||||
|         if self.live: | ||||
|             # logger.info('testing live') | ||||
|             self.start_live(dataframe, metadata, strategy) | ||||
|  | ||||
| @@ -115,11 +116,12 @@ class IFreqaiModel(ABC): | ||||
|                 self.dh.save_data(self.model) | ||||
|             else: | ||||
|                 self.model = self.dh.load_data() | ||||
|                 strategy_provided_features = self.dh.find_features(dataframe_train) | ||||
|                 if strategy_provided_features != self.dh.training_features_list: | ||||
|                     logger.info("User changed input features, retraining model.") | ||||
|                     self.model = self.train(dataframe_train, metadata) | ||||
|                     self.dh.save_data(self.model) | ||||
|                 # strategy_provided_features = self.dh.find_features(dataframe_train) | ||||
|                 # # TOFIX doesnt work with PCA | ||||
|                 # if strategy_provided_features != self.dh.training_features_list: | ||||
|                 #     logger.info("User changed input features, retraining model.") | ||||
|                 #     self.model = self.train(dataframe_train, metadata) | ||||
|                 #     self.dh.save_data(self.model) | ||||
|  | ||||
|             preds, do_preds = self.predict(dataframe_backtest, metadata) | ||||
|  | ||||
| @@ -148,7 +150,7 @@ class IFreqaiModel(ABC): | ||||
|         if not self.training_on_separate_thread: | ||||
|             # this will also prevent other pairs from trying to train simultaneously. | ||||
|             (self.retrain, | ||||
|              new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[ | ||||
|              self.new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[ | ||||
|                                                                         'live_trained_timerange'], | ||||
|                                                                         metadata) | ||||
|         else: | ||||
| @@ -156,14 +158,19 @@ class IFreqaiModel(ABC): | ||||
|             self.retrain = False | ||||
|  | ||||
|         if self.retrain or not file_exists: | ||||
|             self.training_on_separate_thread = True  # acts like a lock | ||||
|             self.retrain_model_on_separate_thread(new_trained_timerange, metadata, strategy) | ||||
|             if self.first: | ||||
|                 self.train_model_in_series(self.new_trained_timerange, metadata, strategy) | ||||
|                 self.first = False | ||||
|             else: | ||||
|                 self.training_on_separate_thread = True  # acts like a lock | ||||
|                 self.retrain_model_on_separate_thread(self.new_trained_timerange, | ||||
|                                                       metadata, strategy) | ||||
|  | ||||
|         self.model = self.dh.load_data() | ||||
|  | ||||
|         strategy_provided_features = self.dh.find_features(dataframe) | ||||
|         if strategy_provided_features != self.dh.training_features_list: | ||||
|             self.train_model_in_series(new_trained_timerange, metadata, strategy) | ||||
|             self.train_model_in_series(self.new_trained_timerange, metadata, strategy) | ||||
|  | ||||
|         preds, do_preds = self.predict(dataframe, metadata) | ||||
|         self.dh.append_predictions(preds, do_preds, len(dataframe)) | ||||
| @@ -215,12 +222,36 @@ class IFreqaiModel(ABC): | ||||
|         data (NaNs) or felt uncertain about data (PCA and DI index) | ||||
|         """ | ||||
|  | ||||
|     @abstractmethod | ||||
|     def data_cleaning_train(self) -> None: | ||||
|         """ | ||||
|         User can add data analysis and cleaning here. | ||||
|         Any function inside this method should drop training data points from the filtered_dataframe | ||||
|         based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example | ||||
|         of how outlier data points are dropped from the dataframe used for training. | ||||
|         """ | ||||
|  | ||||
|     @abstractmethod | ||||
|     def data_cleaning_predict(self) -> None: | ||||
|         """ | ||||
|         User can add data analysis and cleaning here. | ||||
|         These functions each modify self.dh.do_predict, which is a dataframe with equal length | ||||
|         to the number of candles coming from and returning to the strategy. Inside do_predict, | ||||
|          1 allows prediction and < 0 signals to the strategy that the model is not confident in | ||||
|          the prediction. | ||||
|          See FreqaiDataKitchen::remove_outliers() for an example | ||||
|         of how the do_predict vector is modified. do_predict is ultimately passed back to strategy | ||||
|         for buy signals. | ||||
|         """ | ||||
|  | ||||
|     def model_exists(self, pair: str, training_timerange: str) -> bool: | ||||
|         """ | ||||
|         Given a pair and path, check if a model already exists | ||||
|         :param pair: pair e.g. BTC/USD | ||||
|         :param path: path to model | ||||
|         """ | ||||
|         if self.live and training_timerange is None: | ||||
|             return False | ||||
|         coin, _ = pair.split("/") | ||||
|         self.dh.model_filename = "cb_" + coin.lower() + "_" + training_timerange | ||||
|         path_to_modelfile = Path(self.dh.model_path / str(self.dh.model_filename + "_model.joblib")) | ||||
| @@ -265,3 +296,4 @@ class IFreqaiModel(ABC): | ||||
|  | ||||
|         self.model = self.train(unfiltered_dataframe, metadata) | ||||
|         self.dh.save_data(self.model) | ||||
|         self.retrain = False | ||||
|   | ||||
| @@ -29,7 +29,7 @@ class CatboostPredictionModel(IFreqaiModel): | ||||
|             dataframe["close"] | ||||
|             .shift(-self.feature_parameters["period"]) | ||||
|             .rolling(self.feature_parameters["period"]) | ||||
|             .max() | ||||
|             .mean() | ||||
|             / dataframe["close"] | ||||
|             - 1 | ||||
|         ) | ||||
| @@ -68,15 +68,11 @@ class CatboostPredictionModel(IFreqaiModel): | ||||
|         # standardize all data based on train_dataset only | ||||
|         data_dictionary = self.dh.standardize_data(data_dictionary) | ||||
|  | ||||
|         # optional additional data cleaning | ||||
|         if self.feature_parameters["principal_component_analysis"]: | ||||
|             self.dh.principal_component_analysis() | ||||
|         if self.feature_parameters["remove_outliers"]: | ||||
|             self.dh.remove_outliers(predict=False) | ||||
|         if self.feature_parameters["DI_threshold"]: | ||||
|             self.dh.data["avg_mean_dist"] = self.dh.compute_distances() | ||||
|         # optional additional data cleaning/analysis | ||||
|         self.data_cleaning_train() | ||||
|  | ||||
|         logger.info("length of train data %s", len(data_dictionary["train_features"])) | ||||
|         logger.info(f'Training model on {len(self.dh.training_features_list)} features') | ||||
|         logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') | ||||
|  | ||||
|         model = self.fit(data_dictionary) | ||||
|  | ||||
| @@ -86,9 +82,7 @@ class CatboostPredictionModel(IFreqaiModel): | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict) -> Any: | ||||
|         """ | ||||
|         Most regressors use the same function names and arguments e.g. user | ||||
|         can drop in LGBMRegressor in place of CatBoostRegressor and all data | ||||
|         management will be properly handled by Freqai. | ||||
|         User sets up the training and test data to fit their desired model here | ||||
|         :params: | ||||
|         :data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|         all the training and test data/labels. | ||||
| @@ -133,7 +127,51 @@ class CatboostPredictionModel(IFreqaiModel): | ||||
|         filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe) | ||||
|         self.dh.data_dictionary["prediction_features"] = filtered_dataframe | ||||
|  | ||||
|         # optional additional data cleaning | ||||
|         # optional additional data cleaning/analysis | ||||
|         self.data_cleaning_predict(filtered_dataframe) | ||||
|  | ||||
|         predictions = self.model.predict(self.dh.data_dictionary["prediction_features"]) | ||||
|  | ||||
|         # compute the non-standardized predictions | ||||
|         self.dh.predictions = (predictions + 1) * (self.dh.data["labels_max"] - | ||||
|                                                    self.dh.data["labels_min"]) / 2 + self.dh.data[ | ||||
|                                                                                      "labels_min"] | ||||
|  | ||||
|         # logger.info("--------------------Finished prediction--------------------") | ||||
|  | ||||
|         return (self.dh.predictions, self.dh.do_predict) | ||||
|  | ||||
|     def data_cleaning_train(self) -> None: | ||||
|         """ | ||||
|         User can add data analysis and cleaning here. | ||||
|         Any function inside this method should drop training data points from the filtered_dataframe | ||||
|         based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example | ||||
|         of how outlier data points are dropped from the dataframe used for training. | ||||
|         """ | ||||
|         if self.feature_parameters["principal_component_analysis"]: | ||||
|             self.dh.principal_component_analysis() | ||||
|  | ||||
|         # if self.feature_parameters["determine_statistical_distributions"]: | ||||
|         #     self.dh.determine_statistical_distributions() | ||||
|         # if self.feature_parameters["remove_outliers"]: | ||||
|         #     self.dh.remove_outliers(predict=False) | ||||
|  | ||||
|         if self.feature_parameters["use_SVM_to_remove_outliers"]: | ||||
|             self.dh.use_SVM_to_remove_outliers(predict=False) | ||||
|         if self.feature_parameters["DI_threshold"]: | ||||
|             self.dh.data["avg_mean_dist"] = self.dh.compute_distances() | ||||
|  | ||||
|     def data_cleaning_predict(self, filtered_dataframe: DataFrame) -> None: | ||||
|         """ | ||||
|         User can add data analysis and cleaning here. | ||||
|         These functions each modify self.dh.do_predict, which is a dataframe with equal length | ||||
|         to the number of candles coming from and returning to the strategy. Inside do_predict, | ||||
|          1 allows prediction and < 0 signals to the strategy that the model is not confident in | ||||
|          the prediction. | ||||
|          See FreqaiDataKitchen::remove_outliers() for an example | ||||
|         of how the do_predict vector is modified. do_predict is ultimately passed back to strategy | ||||
|         for buy signals. | ||||
|         """ | ||||
|         if self.feature_parameters["principal_component_analysis"]: | ||||
|             pca_components = self.dh.pca.transform(filtered_dataframe) | ||||
|             self.dh.data_dictionary["prediction_features"] = pd.DataFrame( | ||||
| @@ -142,17 +180,13 @@ class CatboostPredictionModel(IFreqaiModel): | ||||
|                 index=filtered_dataframe.index, | ||||
|             ) | ||||
|  | ||||
|         if self.feature_parameters["remove_outliers"]: | ||||
|             self.dh.remove_outliers(predict=True)  # creates dropped index | ||||
|         # if self.feature_parameters["determine_statistical_distributions"]: | ||||
|         #     self.dh.determine_statistical_distributions() | ||||
|         # if self.feature_parameters["remove_outliers"]: | ||||
|         #     self.dh.remove_outliers(predict=True)  # creates dropped index | ||||
|  | ||||
|         if self.feature_parameters["use_SVM_to_remove_outliers"]: | ||||
|             self.dh.use_SVM_to_remove_outliers(predict=True) | ||||
|  | ||||
|         if self.feature_parameters["DI_threshold"]: | ||||
|             self.dh.check_if_pred_in_training_spaces()  # sets do_predict | ||||
|  | ||||
|         predictions = self.model.predict(self.dh.data_dictionary["prediction_features"]) | ||||
|  | ||||
|         # compute the non-standardized predictions | ||||
|         self.dh.predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"] | ||||
|  | ||||
|         # logger.info("--------------------Finished prediction--------------------") | ||||
|  | ||||
|         return (self.dh.predictions, self.dh.do_predict) | ||||
|   | ||||
| @@ -1,159 +0,0 @@ | ||||
| import logging | ||||
| from typing import Any, Dict, Tuple | ||||
|  | ||||
| import pandas as pd | ||||
| from catboost import CatBoostRegressor, Pool | ||||
| from pandas import DataFrame | ||||
|  | ||||
| from freqtrade.freqai.freqai_interface import IFreqaiModel | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class ExamplePredictionModel(IFreqaiModel): | ||||
|     """ | ||||
|     User created prediction model. The class needs to override three necessary | ||||
|     functions, predict(), train(), fit(). The class inherits ModelHandler which | ||||
|     has its own DataHandler where data is held, saved, loaded, and managed. | ||||
|     """ | ||||
|  | ||||
|     def make_labels(self, dataframe: DataFrame) -> DataFrame: | ||||
|         """ | ||||
|         User defines the labels here (target values). | ||||
|         :params: | ||||
|         :dataframe: the full dataframe for the present training period | ||||
|         """ | ||||
|  | ||||
|         dataframe["s"] = ( | ||||
|             dataframe["close"] | ||||
|             .shift(-self.feature_parameters["period"]) | ||||
|             .rolling(self.feature_parameters["period"]) | ||||
|             .max() | ||||
|             / dataframe["close"] | ||||
|             - 1 | ||||
|         ) | ||||
|         self.dh.data["s_mean"] = dataframe["s"].mean() | ||||
|         self.dh.data["s_std"] = dataframe["s"].std() | ||||
|  | ||||
|         # logger.info("label mean", self.dh.data["s_mean"], "label std", self.dh.data["s_std"]) | ||||
|  | ||||
|         return dataframe["s"] | ||||
|  | ||||
|     def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]: | ||||
|         """ | ||||
|         Filter the training data and train a model to it. Train makes heavy use of the datakitchen | ||||
|         for storing, saving, loading, and analyzing the data. | ||||
|         :params: | ||||
|         :unfiltered_dataframe: Full dataframe for the current training period | ||||
|         :metadata: pair metadata from strategy. | ||||
|         :returns: | ||||
|         :model: Trained model which can be used to inference (self.predict) | ||||
|         """ | ||||
|         logger.info("--------------------Starting training--------------------") | ||||
|  | ||||
|         # create the full feature list based on user config info | ||||
|         self.dh.training_features_list = self.dh.build_feature_list(self.config, metadata) | ||||
|         unfiltered_labels = self.make_labels(unfiltered_dataframe) | ||||
|  | ||||
|         # filter the features requested by user in the configuration file and elegantly handle NaNs | ||||
|         features_filtered, labels_filtered = self.dh.filter_features( | ||||
|             unfiltered_dataframe, | ||||
|             self.dh.training_features_list, | ||||
|             unfiltered_labels, | ||||
|             training_filter=True, | ||||
|         ) | ||||
|  | ||||
|         # split data into train/test data. | ||||
|         data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered) | ||||
|         # standardize all data based on train_dataset only | ||||
|         data_dictionary = self.dh.standardize_data(data_dictionary) | ||||
|  | ||||
|         # optional additional data cleaning | ||||
|         if self.feature_parameters["principal_component_analysis"]: | ||||
|             self.dh.principal_component_analysis() | ||||
|         if self.feature_parameters["remove_outliers"]: | ||||
|             self.dh.remove_outliers(predict=False) | ||||
|         if self.feature_parameters["DI_threshold"]: | ||||
|             self.dh.data["avg_mean_dist"] = self.dh.compute_distances() | ||||
|  | ||||
|         logger.info("length of train data %s", len(data_dictionary["train_features"])) | ||||
|  | ||||
|         model = self.fit(data_dictionary) | ||||
|  | ||||
|         logger.info(f'--------------------done training {metadata["pair"]}--------------------') | ||||
|  | ||||
|         return model | ||||
|  | ||||
|     def fit(self, data_dictionary: Dict) -> Any: | ||||
|         """ | ||||
|         Most regressors use the same function names and arguments e.g. user | ||||
|         can drop in LGBMRegressor in place of CatBoostRegressor and all data | ||||
|         management will be properly handled by Freqai. | ||||
|         :params: | ||||
|         :data_dictionary: the dictionary constructed by DataHandler to hold | ||||
|         all the training and test data/labels. | ||||
|         """ | ||||
|  | ||||
|         train_data = Pool( | ||||
|             data=data_dictionary["train_features"], | ||||
|             label=data_dictionary["train_labels"], | ||||
|             weight=data_dictionary["train_weights"], | ||||
|         ) | ||||
|  | ||||
|         test_data = Pool( | ||||
|             data=data_dictionary["test_features"], | ||||
|             label=data_dictionary["test_labels"], | ||||
|             weight=data_dictionary["test_weights"], | ||||
|         ) | ||||
|  | ||||
|         model = CatBoostRegressor( | ||||
|             verbose=100, early_stopping_rounds=400, **self.model_training_parameters | ||||
|         ) | ||||
|         model.fit(X=train_data, eval_set=test_data) | ||||
|  | ||||
|         return model | ||||
|  | ||||
|     def predict(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, | ||||
|                                                                                 DataFrame]: | ||||
|         """ | ||||
|         Filter the prediction features data and predict with it. | ||||
|         :param: unfiltered_dataframe: Full dataframe for the current backtest period. | ||||
|         :return: | ||||
|         :predictions: np.array of predictions | ||||
|         :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove | ||||
|         data (NaNs) or felt uncertain about data (PCA and DI index) | ||||
|         """ | ||||
|  | ||||
|         # logger.info("--------------------Starting prediction--------------------") | ||||
|  | ||||
|         original_feature_list = self.dh.build_feature_list(self.config, metadata) | ||||
|         filtered_dataframe, _ = self.dh.filter_features( | ||||
|             unfiltered_dataframe, original_feature_list, training_filter=False | ||||
|         ) | ||||
|         filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe) | ||||
|         self.dh.data_dictionary["prediction_features"] = filtered_dataframe | ||||
|  | ||||
|         # optional additional data cleaning | ||||
|         if self.feature_parameters["principal_component_analysis"]: | ||||
|             pca_components = self.dh.pca.transform(filtered_dataframe) | ||||
|             self.dh.data_dictionary["prediction_features"] = pd.DataFrame( | ||||
|                 data=pca_components, | ||||
|                 columns=["PC" + str(i) for i in range(0, self.dh.data["n_kept_components"])], | ||||
|                 index=filtered_dataframe.index, | ||||
|             ) | ||||
|  | ||||
|         if self.feature_parameters["remove_outliers"]: | ||||
|             self.dh.remove_outliers(predict=True)  # creates dropped index | ||||
|  | ||||
|         if self.feature_parameters["DI_threshold"]: | ||||
|             self.dh.check_if_pred_in_training_spaces()  # sets do_predict | ||||
|  | ||||
|         predictions = self.model.predict(self.dh.data_dictionary["prediction_features"]) | ||||
|  | ||||
|         # compute the non-standardized predictions | ||||
|         self.dh.predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"] | ||||
|  | ||||
|         # logger.info("--------------------Finished prediction--------------------") | ||||
|  | ||||
|         return (self.dh.predictions, self.dh.do_predict) | ||||
| @@ -166,8 +166,8 @@ class FreqaiExampleStrategy(IStrategy): | ||||
|             dataframe["target_std"], | ||||
|         ) = self.model.bridge.start(dataframe, metadata, self) | ||||
|  | ||||
|         dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 0.5 | ||||
|         dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.5 | ||||
|         dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.5 | ||||
|         dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1 | ||||
|         return dataframe | ||||
|  | ||||
|     def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: | ||||
| @@ -183,7 +183,7 @@ class FreqaiExampleStrategy(IStrategy): | ||||
|  | ||||
|     def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: | ||||
|         sell_conditions = [ | ||||
|             (dataframe["prediction"] < dataframe["sell_roi"]) & (dataframe["do_predict"] == 1) | ||||
|             (dataframe["do_predict"] <= 0) | ||||
|         ] | ||||
|         if sell_conditions: | ||||
|             dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), "sell"] = 1 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user