start collecting indefinite history of predictions. Allow user to generate statistics on these predictions. Direct FreqAI to save these to disk and reload them if available.

2022-07-11 22:01:48 +02:00 · 2022-07-11 22:01:48 +02:00 · 8ce6b18318
commit 8ce6b18318
parent 3fc92b1b21
5 changed files with 109 additions and 39 deletions
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -562,6 +562,28 @@ a certain number of hours in age by setting the `expiration_hours` in the config
 In the present example, the user will only allow predictions on models that are less than 1/2 hours
 old. 

+## Choosing the calculation of the `target_roi`
+
+As shown in `templates/FreqaiExampleStrategy.py`, the `target_roi` is based on two metrics computed
+by FreqAI: `label_mean` and `label_std`. These are the statistics associated with the labels used 
+*during the most recent training*. This allows the model to know what magnitude of a target to be 
+expecting since it is directly stemming from the training data. By default, FreqAI computes this based 
+on trainig data and it assumes the labels are Gaussian distributed. These are big assumptions 
+that the user should consider when creating their labels. If the user wants to consider the population
+of *historical predictions* for creating the dynamic target instead of the trained labels, the user 
+can do so by setting `fit_live_prediction_candles` to the number of historical prediction candles
+the user wishes to use to generate target statistics. 
+
+```json
+    "freqai": {
+        "fit_live_prediction_candles": 300,
+    }
+```
+
+If the user sets this value, FreqAI will initially use the predictions from the training data set
+and then subsequently begin introducing real prediction data as it is generated. FreqAI will save 
+this historical data to be reloaded if the user stops and restarts with the same `identifier`.
+
 <!-- ## Dynamic target expectation

 The labels used for model training have a unique statistical distribution for each separate model training. 
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@ -38,12 +38,14 @@ class FreqaiDataDrawer:
        self.model_return_values: Dict[str, Any] = {}
        self.pair_data_dict: Dict[str, Any] = {}
        self.historic_data: Dict[str, Any] = {}
+        self.historic_predictions: Dict[str, Any] = {}
        self.follower_dict: Dict[str, Any] = {}
        self.full_path = full_path
        self.follow_mode = follow_mode
        if follow_mode:
            self.create_follower_dict()
        self.load_drawer_from_disk()
+        self.load_historic_predictions_from_disk()
        self.training_queue: Dict[str, int] = {}
        self.history_lock = threading.Lock()

@ -68,6 +70,29 @@ class FreqaiDataDrawer:

        return exists

+    def load_historic_predictions_from_disk(self):
+        """
+        Locate and load a previously saved historic predictions.
+        :returns:
+        exists: bool = whether or not the drawer was located
+        """
+        exists = Path(self.full_path / str("historic_predictions.json")).resolve().exists()
+        if exists:
+            with open(self.full_path / str("historic_predictions.json"), "r") as fp:
+                self.pair_dict = json.load(fp)
+            logger.info(f"Found existing historic predictions at {self.full_path}, but beware of "
+                        "that statistics may be inaccurate if the bot has been offline for "
+                        "an extended period of time.")
+        elif not self.follow_mode:
+            logger.info("Could not find existing historic_predictions, starting from scratch")
+        else:
+            logger.warning(
+                f"Follower could not find historic predictions at {self.full_path} "
+                "sending null values back to strategy"
+            )
+
+        return exists
+
    def save_drawer_to_disk(self):
        """
        Save data drawer full of all pair model metadata in present model folder.
@ -75,6 +100,13 @@ class FreqaiDataDrawer:
        with open(self.full_path / str("pair_dictionary.json"), "w") as fp:
            json.dump(self.pair_dict, fp, default=self.np_encoder)

+    def save_historic_predictions_to_disk(self):
+        """
+        Save data drawer full of all pair model metadata in present model folder.
+        """
+        with open(self.full_path / str("historic_predictions.json"), "w") as fp:
+            json.dump(self.historic_predictions, fp, default=self.np_encoder)
+
    def save_follower_dict_to_disk(self):
        """
        Save follower dictionary to disk (used by strategy for persistent prediction targets)
@ -176,16 +208,18 @@ class FreqaiDataDrawer:
        historical candles, and also stores historical predictions despite retrainings (so stored
        predictions are true predictions, not just inferencing on trained data)
        """
-        self.model_return_values[pair] = pd.DataFrame()
+        # dynamic df returned to strategy and plotted in frequi
+        mrv_df = self.model_return_values[pair] = pd.DataFrame()
+
        for label in dk.label_list:
-            self.model_return_values[pair][label] = pred_df[label]
-            self.model_return_values[pair][f"{label}_mean"] = dk.data["labels_mean"][label]
-            self.model_return_values[pair][f"{label}_std"] = dk.data["labels_std"][label]
+            mrv_df[label] = pred_df[label]
+            mrv_df[f"{label}_mean"] = dk.data["labels_mean"][label]
+            mrv_df[f"{label}_std"] = dk.data["labels_std"][label]

        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
-            self.model_return_values[pair]["DI_values"] = dk.DI_values
+            mrv_df["DI_values"] = dk.DI_values

-        self.model_return_values[pair]["do_predict"] = do_preds
+        mrv_df["do_predict"] = do_preds

    def append_model_predictions(self, pair: str, predictions, do_preds, dk, len_df) -> None:

@ -201,6 +235,13 @@ class FreqaiDataDrawer:
            i = length_difference + 1

        df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i)
+        hp_df = self.historic_predictions[pair]
+
+        # here are some pandas hula hoops to accommodate the possibility of a series
+        # or dataframe depending number of labels requested by user
+        nan_df = pd.DataFrame(np.nan, index=hp_df.index[-2:] + 2, columns=hp_df.columns)
+        hp_df = pd.concat([hp_df, nan_df], ignore_index=True, axis=0)
+        hp_df = pd.concat([hp_df, nan_df[-2:-1]], axis=0)

        for label in dk.label_list:
            df[label].iloc[-1] = predictions[label].iloc[-1]
@ -212,6 +253,9 @@ class FreqaiDataDrawer:
        if self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
            df["DI_values"].iloc[-1] = dk.DI_values[-1]

+        # append the new predictions to persistent storage
+        hp_df.iloc[-1] = df[label].iloc[-1]
+
        if length_difference < 0:
            prepend_df = pd.DataFrame(
                np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -138,19 +138,6 @@ class FreqaiDataKitchen:
        self.dd.pair_dict[coin]["data_path"] = str(self.data_path)
        self.dd.save_drawer_to_disk()

-        # TODO add a helper function to let user save/load any data they are custom adding. We
-        # do not want them having to edit the default save/load methods here. Below is an example
-        # of what we do NOT want.
-
-        # if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
-        #     self.data_dictionary["upper_quantiles"].to_pickle(
-        #         save_path / str(self.model_filename + "_upper_quantiles.pkl")
-        #     )
-
-        #     self.data_dictionary["lower_quantiles"].to_pickle(
-        #         save_path / str(self.model_filename + "_lower_quantiles.pkl")
-        #     )
-
        return

    def load_data(self, coin: str = "", keras_model=False) -> Any:
@ -184,22 +171,6 @@ class FreqaiDataKitchen:
            self.data_path / str(self.model_filename + "_trained_df.pkl")
        )

-        # TODO add a helper function to let user save/load any data they are custom adding. We
-        # do not want them having to edit the default save/load methods here. Below is an example
-        # of what we do NOT want.
-
-        # if self.freqai_config.get('feature_parameters','determine_statistical_distributions'):
-        #     self.data_dictionary["upper_quantiles"] = pd.read_pickle(
-        #         self.data_path / str(self.model_filename + "_upper_quantiles.pkl")
-        #     )
-
-        #     self.data_dictionary["lower_quantiles"] = pd.read_pickle(
-        #         self.data_path / str(self.model_filename + "_lower_quantiles.pkl")
-        #     )
-
-        # self.data_path = Path(self.data["data_path"])
-        # self.model_filename = self.data["model_filename"]
-
        # try to access model in memory instead of loading object from disk to save time
        if self.live and self.model_filename in self.dd.model_dictionary:
            model = self.dd.model_dictionary[self.model_filename]
@ -207,7 +178,6 @@ class FreqaiDataKitchen:
            model = load(self.data_path / str(self.model_filename + "_model.joblib"))
        else:
            from tensorflow import keras
-
            model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))

        if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
@ -263,7 +233,6 @@ class FreqaiDataKitchen:
            labels,
            weights,
            stratify=stratification,
-            # shuffle=False,
            **self.config["freqai"]["data_split_parameters"],
        )

@ -276,7 +245,6 @@ class FreqaiDataKitchen:
        unfiltered_dataframe: DataFrame,
        training_feature_list: List,
        label_list: List = list(),
-        # labels: DataFrame = pd.DataFrame(),
        training_filter: bool = True,
    ) -> Tuple[DataFrame, DataFrame]:
        """
@ -1135,6 +1103,19 @@ class FreqaiDataKitchen:

        return dataframe

+    def fit_live_predictions(self) -> None:
+        """
+        Fit the labels with a gaussian distribution
+        """
+        import scipy as spy
+        num_candles = self.freqai_config.get('fit_live_predictions_candles', 100)
+        self.data["labels_mean"], self.data["labels_std"] = {}, {}
+        for label in self.label_list:
+            f = spy.stats.norm.fit(self.dd.historic_predictions[self.pair][label].tail(num_candles))
+            self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]
+
+        return
+
    def fit_labels(self) -> None:
        """
        Fit the labels with a gaussian distribution
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -1,4 +1,5 @@
 # import contextlib
+import copy
 import datetime
 import gc
 import logging
@ -484,6 +485,20 @@ class IFreqaiModel(ABC):
            self.dd.purge_old_models()
        # self.retrain = False

+    def set_initial_historic_predictions(self, df: DataFrame, model: Any,
+                                         dk: FreqaiDataKitchen, pair: str) -> None:
+        trained_predictions = model.predict(df)
+        pred_df = DataFrame(trained_predictions, columns=dk.label_list)
+        for label in dk.label_list:
+            pred_df[label] = (
+                (pred_df[label] + 1)
+                * (dk.data["labels_max"][label] - dk.data["labels_min"][label])
+                / 2
+            ) + dk.data["labels_min"][label]
+
+        self.dd.historic_predictions[pair] = pd.DataFrame()
+        self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
+
    # Following methods which are overridden by user made prediction models.
    # See freqai/prediction_models/CatboostPredictionModlel.py for an example.

--- a/freqtrade/freqai/prediction_models/BaseRegressionModel.py
+++ b/freqtrade/freqai/prediction_models/BaseRegressionModel.py
@ -51,7 +51,8 @@ class BaseRegressionModel(IFreqaiModel):

        # split data into train/test data.
        data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
-        dk.fit_labels()  # fit labels to a cauchy distribution so we know what to expect in strategy
+        if not self.freqai_info.get('fit_live_predictions', 0):
+            dk.fit_labels()
        # normalize all data based on train_dataset only
        data_dictionary = dk.normalize_data(data_dictionary)

@ -65,6 +66,13 @@ class BaseRegressionModel(IFreqaiModel):

        model = self.fit(data_dictionary)

+        if pair not in self.dd.historic_predictions:
+            self.set_initial_historic_predictions(
+                data_dictionary['train_features'], model, dk, pair)
+        elif self.freqai_info.get('fit_live_predictions_candles', 0):
+            dk.fit_live_predictions()
+            self.dd.save_historic_predictions_to_disk()
+
        logger.info(f"--------------------done training {pair}--------------------")

        return model