Merge branch 'feature/training_data_slice_normalization' into develop

2022-10-08 09:23:40 +02:00
parent e337d4b78a ffebd812d1
commit 32c6097889
4 changed files with 690 additions and 8 deletions
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@@ -440,8 +440,8 @@ class FreqaiDataDrawer:
            rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)

        # save the train data to file so we can check preds for area of applicability later
-        dk.data_dictionary["train_features"].to_pickle(
-            save_path / f"{dk.model_filename}_trained_df.pkl"
+        dk.data_dictionary["train_features"].astype("float32").to_pickle(
+            save_path / f"{dk.model_filename}_trained_df.pkl.bz2"
        )

        dk.data_dictionary["train_dates"].to_pickle(
@@ -499,7 +499,7 @@ class FreqaiDataDrawer:
            dk.label_list = dk.data["label_list"]

        dk.data_dictionary["train_features"] = pd.read_pickle(
-            dk.data_path / f"{dk.model_filename}_trained_df.pkl"
+            dk.data_path / f"{dk.model_filename}_trained_df.pkl.bz2"
        )

        # try to access model in memory instead of loading object from disk to save time
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -859,11 +859,25 @@ class FreqaiDataKitchen:
        """
        Add noise to train features to reduce the risk of overfitting.
        """
-        mu = 0  # no shift
-        sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"]
-        compute_df = self.data_dictionary['train_features']
-        noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
-        self.data_dictionary['train_features'] += noise
+        da = self.freqai_config["feature_parameters"]["data_augment"]
+        X = self.data_dictionary['train_features']
+        y = self.data_dictionary['train_labels']
+        da_type = da.get("type", "std")
+        if da_type == "std":
+            # generate alpha values of 0-mean and 1-std
+            alpha = np.random.randn(*X.shape)
+            scale = da.get("vaue", 0.01)
+            Xaugmented = X +  alpha * scale * X.std(0)[None, :]
+            X = np.vstack((X, Xaugmented))
+            y = y.append(y)
+            self.data_dictionary['train_features'] = X
+            self.data_dictionary['train_labels'] = y
+        elif da_type == "constant":
+            mu = 0  # no shift
+            sigma = self.freqai_config["feature_parameters"]["data_augment"]["value"]
+            compute_df = self.data_dictionary['train_features']
+            noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
+            self.data_dictionary['train_features'] += noise
        return

    def find_features(self, dataframe: DataFrame) -> None:
@@ -1209,6 +1223,7 @@ class FreqaiDataKitchen:

        for key in self.label_list:
            if dataframe[key].dtype == object:
+                # TODO: make sure the `dataframe[key].dropna().unique()` are objet type too!
                self.unique_classes[key] = dataframe[key].dropna().unique()

        if self.unique_classes: