give user ability to analyze live trade dataframe inside custom prediction model. Add documentation to explain new functionality

2022-08-02 20:14:02 +02:00 · 2022-08-02 20:14:02 +02:00 · 95d3009a95
commit 95d3009a95
parent 895ebbfd18
4 changed files with 147 additions and 13 deletions
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -619,6 +619,46 @@ If the user sets this value, FreqAI will initially use the predictions from the
 and then subsequently begin introducing real prediction data as it is generated. FreqAI will save 
 this historical data to be reloaded if the user stops and restarts with the same `identifier`.

+## Extra returns per train
+
+Users may find that there are some important metrics that they'd like to return to the strategy at the end of each retrain.
+Users can include these metrics by assigining them to `dk.data['extra_returns_per_train']['my_new_value'] = XYZ` inside their custom prediction
+model class. FreqAI takes the `my_new_value` assigned in this dictionary and expands it to fit the return dataframe to the strategy.
+The user can then use the value in the strategy with `dataframe['my_new_value']`. An example of how this is already used in FreqAI is 
+the `&*_mean` and `&*_std` values, which indicate the mean and standard deviation of that particular label during the most recent training. 
+Another example is shown below if the user wants to use live metrics from the trade databse.
+
+The user needs to set the standard dictionary in the config so FreqAI can return proper dataframe shapes:
+
+```json
+    "freqai": {
+        "extra_returns_per_train": {"total_profit": 4}
+    }
+```
+
+These values will likely be overridden by the user prediction model, but in the case where the user model has yet to set them, or needs
+a default initial value - this is the value that will be returned.
+
+## Analyzing the trade live database
+
+Users can analyze the live trade database by calling `analyze_trade_database()` in their custom prediction model. FreqAI already has the
+database setup in a pandas dataframe and ready to be analyzed. Here is an example usecase:
+
+```python
+    def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
+        """
+        User analyzes the trade database here and returns summary stats which will be passed back
+        to the strategy for reinforcement learning or for additional adaptive metrics for use
+        in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
+        they will format themselves into the dataframe as an additional column in the user
+        strategy. User has access to the current trade database in dk.trade_database_df.
+        """
+        total_profit = dk.trade_database_df['close_profit_abs'].sum()
+        dk.data['extra_returns_per_train']['total_profit'] = total_profit
+
+        return
+```
+
 <!-- ## Dynamic target expectation

 The labels used for model training have a unique statistical distribution for each separate model training. 
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@ -39,7 +39,7 @@ class FreqaiDataDrawer:
    Robert Caulk @robcaulk

    Theoretical brainstorming:
-    Elin Törnquist @thorntwig
+    Elin Törnquist @th0rntwig

    Code review, software architecture brainstorming:
    @xmatthias
@ -238,6 +238,11 @@ class FreqaiDataDrawer:

        mrv_df["do_predict"] = do_preds

+        if dk.data['extra_returns_per_train']:
+            rets = dk.data['extra_returns_per_train']
+            for return_str in rets:
+                mrv_df[return_str] = rets[return_str]
+
        # for keras type models, the conv_window needs to be prepended so
        # viewing is correct in frequi
        if self.freqai_info.get('keras', False):
@ -282,9 +287,15 @@ class FreqaiDataDrawer:
        if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
            df["DI_values"].iloc[-1] = dk.DI_values[-1]

+        if dk.data['extra_returns_per_train']:
+            rets = dk.data['extra_returns_per_train']
+            for return_str in rets:
+                df[return_str].iloc[-1] = rets[return_str]
+
        # append the new predictions to persistent storage
        if pair in self.historic_predictions:
-            self.historic_predictions[pair].iloc[-1] = df[label].iloc[-1]
+            for key in df.keys():
+                self.historic_predictions[pair][key].iloc[-1] = df[key].iloc[-1]

        if length_difference < 0:
            prepend_df = pd.DataFrame(
@ -320,7 +331,12 @@ class FreqaiDataDrawer:
        dataframe["do_predict"] = 0

        if self.freqai_info["feature_parameters"].get("DI_threshold", 0) > 0:
-            dataframe["DI_value"] = 0
+            dataframe["DI_values"] = 0
+
+        if dk.data['extra_returns_per_train']:
+            rets = dk.data['extra_returns_per_train']
+            for return_str in rets:
+                dataframe[return_str] = 0

        dk.return_dataframe = dataframe

--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -2,6 +2,7 @@ import copy
 import datetime
 import logging
 import shutil
+import sqlite3
 from pathlib import Path
 from typing import Any, Dict, List, Tuple

@ -39,7 +40,7 @@ class FreqaiDataKitchen:
    Robert Caulk @robcaulk

    Theoretical brainstorming:
-    Elin Törnquist @thorntwig
+    Elin Törnquist @th0rntwig

    Code review, software architecture brainstorming:
    @xmatthias
@ -84,6 +85,12 @@ class FreqaiDataKitchen:
                config["freqai"]["backtest_period_days"],
            )

+        db_url = self.config.get('db_url', None)
+        self.database_path = '' if db_url == 'sqlite://' else str(db_url).split('///')[1]
+        self.trade_database_df: DataFrame = pd.DataFrame()
+
+        self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
+
    def set_paths(
        self,
        pair: str,
@ -101,7 +108,7 @@ class FreqaiDataKitchen:

        self.data_path = Path(
            self.full_path
-            / str("sub-train" + "-" + pair.split("/")[0] + "_" + str(trained_timestamp))
+            / f"sub-train-{pair.split('/')[0]}_{trained_timestamp}"
        )

        return
@ -328,7 +335,7 @@ class FreqaiDataKitchen:
        """

        for label in self.label_list:
-            if df[label].dtype == str:
+            if df[label].dtype == object:
                continue
            df[label] = (
                (df[label] + 1)
@ -493,7 +500,6 @@ class FreqaiDataKitchen:
        tc = self.freqai_config.get("model_training_parameters", {}).get("thread_count", -1)
        pairwise = pairwise_distances(self.data_dictionary["train_features"], n_jobs=tc)
        avg_mean_dist = pairwise.mean(axis=1).mean()
-        logger.info(f"avg_mean_dist {avg_mean_dist:.2f}")

        return avg_mean_dist

@ -599,10 +605,11 @@ class FreqaiDataKitchen:
        from the training data set.
        """

+        tc = self.freqai_config.get("model_training_parameters", {}).get("thread_count", -1)
        distance = pairwise_distances(
            self.data_dictionary["train_features"],
            self.data_dictionary["prediction_features"],
-            n_jobs=-1,
+            n_jobs=tc,
        )

        self.DI_values = distance.min(axis=0) / self.data["avg_mean_dist"]
@ -946,6 +953,19 @@ class FreqaiDataKitchen:
        ]
        return dataframe[to_keep]

+    def get_current_trade_database(self) -> None:
+
+        if self.database_path == '':
+            logger.warning('No trade databse found. Skipping analysis.')
+            return
+
+        data = sqlite3.connect(self.database_path)
+        query = data.execute("SELECT * From trades")
+        cols = [column[0] for column in query.description]
+        df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
+        self.trade_database_df = df.dropna(subset='close_date')
+        data.close()
+
    def np_encoder(self, object):
        if isinstance(object, np.generic):
            return object.item()
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -1,5 +1,4 @@
 # import contextlib
-import copy
 import datetime
 import logging
 import shutil
@ -46,7 +45,7 @@ class IFreqaiModel(ABC):
    Robert Caulk @robcaulk

    Theoretical brainstorming:
-    Elin Törnquist @thorntwig
+    Elin Törnquist @th0rntwig

    Code review, software architecture brainstorming:
    @xmatthias
@ -81,6 +80,8 @@ class IFreqaiModel(ABC):
        self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
        self.pair_it = 0
        self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
+        self.last_trade_database_summary: DataFrame = {}
+        self.current_trade_database_summary: DataFrame = {}

    def assert_config(self, config: Dict[str, Any]) -> None:

@ -479,6 +480,9 @@ class IFreqaiModel(ABC):

        model = self.train(unfiltered_dataframe, pair, dk)

+        dk.get_current_trade_database()
+        self.analyze_trade_database(dk, pair)
+
        self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
        dk.set_new_model_names(pair, new_trained_timerange)
        self.dd.pair_dict[pair]["first"] = False
@ -493,13 +497,50 @@ class IFreqaiModel(ABC):
    def set_initial_historic_predictions(
        self, df: DataFrame, model: Any, dk: FreqaiDataKitchen, pair: str
    ) -> None:
-        trained_predictions = model.predict(df)
+        """
+        This function is called only if the datadrawer failed to load an
+        existing set of historic predictions. In this case, it builds
+        the structure and sets fake predictions off the first training
+        data. After that, FreqAI will append new real predictions to the
+        set of historic predictions.
+
+        These values are used to generate live statistics which can be used
+        in the strategy for adaptive values. E.g. &*_mean/std are quantities
+        that can computed based on live predictions from the set of historical
+        predictions. Those values can be used in the user strategy to better
+        assess prediction rarity, and thus wait for probabilistically favorable
+        entries relative to the live historical predictions.
+
+        If the user reuses an identifier on a subsequent instance,
+        this function will not be called. In that case, "real" predictions
+        will be appended to the loaded set of historic predictions.
+        :param: df: DataFrame = the dataframe containing the training feature data
+        :param: model: Any = A model which was `fit` using a common librariy such as
+        catboost or lightgbm
+        :param: dk: FreqaiDataKitchen = object containing methods for data analysis
+        :param: pair: str = current pair
+        """
+        num_candles = self.freqai_info.get('fit_live_predictions_candles', 600)
+        df_tail = df.tail(num_candles)
+        trained_predictions = model.predict(df_tail)
        pred_df = DataFrame(trained_predictions, columns=dk.label_list)

        pred_df = dk.denormalize_labels_from_metadata(pred_df)

-        self.dd.historic_predictions[pair] = pd.DataFrame()
-        self.dd.historic_predictions[pair] = copy.deepcopy(pred_df)
+        self.dd.historic_predictions[pair] = pred_df
+        hist_preds_df = self.dd.historic_predictions[pair]
+
+        hist_preds_df['do_predict'] = 0
+
+        if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0:
+            hist_preds_df['DI_values'] = 0
+
+        for label in dk.data['labels_mean']:
+            hist_preds_df[f'{label}_mean'] = 0
+            hist_preds_df[f'{label}_std'] = 0
+
+        for return_str in dk.data['extra_returns_per_train']:
+            hist_preds_df[return_str] = 0

    def fit_live_predictions(self, dk: FreqaiDataKitchen) -> None:
        """
@ -565,3 +606,20 @@ class IFreqaiModel(ABC):
        """

        return
+
+    def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
+        """
+        User analyzes the trade database here and returns summary stats which will be passed back
+        to the strategy for reinforcement learning or for additional adaptive metrics for use
+        in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
+        they will format themselves into the dataframe as an additional column in the user
+        strategy. User has access to the current trade database in dk.trade_database_df.
+        """
+        if dk.trade_database_df.empty:
+            logger.warning(f'No trades found for {pair} to analyze DB')
+            return
+
+        total_profit = dk.trade_database_df['close_profit_abs'].sum()
+        dk.data['extra_returns_per_train']['total_profit'] = total_profit
+
+        return