remove trade database analyzer, clean up a bit

2022-08-10 17:43:06 +02:00 · 2022-08-10 17:43:06 +02:00 · 2cae3c42e6
commit 2cae3c42e6
parent 91d0c91287
3 changed files with 1 additions and 287 deletions
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -663,79 +663,13 @@ The user needs to set the standard dictionary in the config so FreqAI can return
 These values will likely be overridden by the user prediction model, but in the case where the user model has yet to set them, or needs
 a default initial value - this is the value that will be returned.

-## Analyzing the trade live database

-Users can analyze the live trade database by calling `analyze_trade_database()` in their custom prediction model. FreqAI already has the
-database setup in a pandas dataframe and ready to be analyzed. Here is an example usecase:
-
-```python
-    def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
-        """
-        User analyzes the trade database here and returns summary stats which will be passed back
-        to the strategy for reinforcement learning or for additional adaptive metrics for use
-        in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
-        they will format themselves into the dataframe as an additional column in the user
-        strategy. User has access to the current trade database in dk.trade_database_df.
-        """
-        total_profit = dk.trade_database_df['close_profit_abs'].sum()
-        dk.data['extra_returns_per_train']['total_profit'] = total_profit
-
-        return
-```
 ## Building an IFreqaiModel

 FreqAI has multiple example prediction model based libraries such as `Catboost` regression (`freqai/prediction_models/CatboostRegressor.py`) and `LightGBM` regression. 
 However, users can customize and create their own prediction models using the `IFreqaiModel` class.
 Users are encouraged to inherit `train()` and `predict()` to let them customize various aspects of their training procedures.

-<!-- ## Dynamic target expectation
-
-The labels used for model training have a unique statistical distribution for each separate model training. 
-We can use this information to know if our current prediction is in the realm of what the model was trained on, 
-and if so, what is the statistical probability of the current prediction. With this information, we can
-make more informed prediction.
-FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a
-dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting:
-
-```json
-    "freqai": {
-        "feature_parameters" : {
-            "target_quantile": 0.9
-        }
-    }
-```
-
-Means the user will get back in the strategy the label threshold at which 90% of the labels were 
-below this value. An example usage in the strategy may look something like:
-
-```python
-
-    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-
-        # ... #
-
-        (
-            dataframe["prediction"],
-            dataframe["do_predict"],
-            dataframe["target_upper_quantile"],
-            dataframe["target_lower_quantile"],
-        ) = self.freqai.start(dataframe, metadata, self)
-
-        return dataframe
-
-    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
-
-        buy_conditions = [
-            (dataframe["prediction"] > dataframe["target_upper_quantile"]) & (dataframe["do_predict"] == 1)
-        ]
-
-        if buy_conditions:
-            dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
-
-        return dataframe
-
-``` -->
-
 ## Additional information

 ### Common pitfalls
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@ -2,9 +2,8 @@ import copy
 import datetime
 import logging
 import shutil
-import sqlite3
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Tuple

 import numpy as np
 import numpy.typing as npt
@ -88,20 +87,6 @@ class FreqaiDataKitchen:
                config["freqai"]["backtest_period_days"],
            )

-        self.database_path: Optional[Path] = None
-
-        if self.live:
-            db_url = self.config.get('db_url', None)
-            self.database_path = Path(db_url)
-            if 'sqlite' not in self.database_path.parts[0]:
-                self.database_path = None
-                logger.warning('FreqAI database analyzer only available for sqlite dbs. '
-                               ' FreqAI will still run, but user cannot use database analyzer.')
-            else:
-                self.database_name = Path(*self.database_path.parts[1:])
-
-        self.trade_database_df: DataFrame = pd.DataFrame()
-
        self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
        self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
        self.train_dates: DataFrame = pd.DataFrame()
@ -1007,13 +992,6 @@ class FreqaiDataKitchen:
            f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
            self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]

-        # KEEPME incase we want to let user start to grab quantiles.
-        # upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
-        #                                                   'target_quantile'], *f)
-        # lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
-        #                                                       'target_quantile'], *f)
-        # self.data["upper_quantile"] = upper_q
-        # self.data["lower_quantile"] = lower_q
        return

    def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
@ -1025,181 +1003,3 @@ class FreqaiDataKitchen:
            col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
        ]
        return dataframe[to_keep]
-
-    def get_current_trade_database(self) -> None:
-
-        if self.database_path is None:
-            logger.warning('No trade database found. Skipping analysis.')
-            return
-
-        data = sqlite3.connect(self.database_name)
-        query = data.execute("SELECT * From trades")
-        cols = [column[0] for column in query.description]
-        df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
-        self.trade_database_df = df.dropna(subset='close_date')
-        data.close()
-
-    def np_encoder(self, object):
-        if isinstance(object, np.generic):
-            return object.item()
-
-    # Functions containing useful data manipulation examples. but not actively in use.
-
-    # Possibly phasing these outlier removal methods below out in favor of
-    # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
-    # But these have good data manipulation examples, so keep them commented here for now.
-
-    # def determine_statistical_distributions(self) -> None:
-    #     from fitter import Fitter
-
-    #     logger.info('Determining best model for all features, may take some time')
-
-    #     def compute_quantiles(ft):
-    #         f = Fitter(self.data_dictionary["train_features"][ft],
-    #                    distributions=['gamma', 'cauchy', 'laplace',
-    #                                   'beta', 'uniform', 'lognorm'])
-    #         f.fit()
-    #         # f.summary()
-    #         dist = list(f.get_best().items())[0][0]
-    #         params = f.get_best()[dist]
-    #         upper_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.999, **params)
-    #         lower_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.001, **params)
-
-    #         return ft, upper_q, lower_q, dist
-
-    #     quantiles_tuple = Parallel(n_jobs=-1)(
-    #         delayed(compute_quantiles)(ft) for ft in self.data_dictionary[
-    #                                                       'train_features'].columns)
-
-    #     df = pd.DataFrame(quantiles_tuple, columns=['features', 'upper_quantiles',
-    #                                                 'lower_quantiles', 'dist'])
-    #     self.data_dictionary['upper_quantiles'] = df['upper_quantiles']
-    #     self.data_dictionary['lower_quantiles'] = df['lower_quantiles']
-
-    #     return
-
-    # def remove_outliers(self, predict: bool) -> None:
-    #     """
-    #     Remove data that looks like an outlier based on the distribution of each
-    #     variable.
-    #     :params:
-    #     :predict: boolean which tells the function if this is prediction data or
-    #     training data coming in.
-    #     """
-
-    #     lower_quantile = self.data_dictionary["lower_quantiles"].to_numpy()
-    #     upper_quantile = self.data_dictionary["upper_quantiles"].to_numpy()
-
-    #     if predict:
-
-    #         df = self.data_dictionary["prediction_features"][
-    #             (self.data_dictionary["prediction_features"] < upper_quantile)
-    #             & (self.data_dictionary["prediction_features"] > lower_quantile)
-    #         ]
-    #         drop_index = pd.isnull(df).any(1)
-    #         self.data_dictionary["prediction_features"].fillna(0, inplace=True)
-    #         drop_index = ~drop_index
-    #         do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
-
-    #         logger.info(
-    #             "remove_outliers() tossed %s predictions",
-    #             len(do_predict) - do_predict.sum(),
-    #         )
-    #         self.do_predict += do_predict
-    #         self.do_predict -= 1
-
-    #     else:
-
-    #         filter_train_df = self.data_dictionary["train_features"][
-    #             (self.data_dictionary["train_features"] < upper_quantile)
-    #             & (self.data_dictionary["train_features"] > lower_quantile)
-    #         ]
-    #         drop_index = pd.isnull(filter_train_df).any(1)
-    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
-    #         self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
-    #             (drop_index == 0)
-    #         ]
-
-    #         logger.info(
-    #             f'remove_outliers() tossed {drop_index.sum()}'
-    #             f' training points from {len(filter_train_df)}'
-    #         )
-
-    #         # do the same for the test data
-    #         filter_test_df = self.data_dictionary["test_features"][
-    #             (self.data_dictionary["test_features"] < upper_quantile)
-    #             & (self.data_dictionary["test_features"] > lower_quantile)
-    #         ]
-    #         drop_index = pd.isnull(filter_test_df).any(1)
-    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
-    #         self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
-    #             (drop_index == 0)
-    #         ]
-
-    #         logger.info(
-    #             f'remove_outliers() tossed {drop_index.sum()}'
-    #             f' test points from {len(filter_test_df)}'
-    #         )
-
-    #     return
-
-    # def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
-    #     """
-    #     standardize all data in the data_dictionary according to the training dataset
-    #     :params:
-    #     :data_dictionary: dictionary containing the cleaned and split training/test data/labels
-    #     :returns:
-    #     :data_dictionary: updated dictionary with standardized values.
-    #     """
-    #     # standardize the data by training stats
-    #     train_mean = data_dictionary["train_features"].mean()
-    #     train_std = data_dictionary["train_features"].std()
-    #     data_dictionary["train_features"] = (
-    #         data_dictionary["train_features"] - train_mean
-    #     ) / train_std
-    #     data_dictionary["test_features"] = (
-    #         data_dictionary["test_features"] - train_mean
-    #     ) / train_std
-
-    #     train_labels_std = data_dictionary["train_labels"].std()
-    #     train_labels_mean = data_dictionary["train_labels"].mean()
-    #     data_dictionary["train_labels"] = (
-    #         data_dictionary["train_labels"] - train_labels_mean
-    #     ) / train_labels_std
-    #     data_dictionary["test_labels"] = (
-    #         data_dictionary["test_labels"] - train_labels_mean
-    #     ) / train_labels_std
-
-    #     for item in train_std.keys():
-    #         self.data[item + "_std"] = train_std[item]
-    #         self.data[item + "_mean"] = train_mean[item]
-
-    #     self.data["labels_std"] = train_labels_std
-    #     self.data["labels_mean"] = train_labels_mean
-
-    #     return data_dictionary
-
-    # def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
-    # """
-    # Normalizes a set of data using the mean and standard deviation from
-    # the associated training data.
-    # :params:
-    # :df: Dataframe to be standardized
-    # """
-
-    # for item in df.keys():
-    #     df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
-
-    # return df
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -491,9 +491,6 @@ class IFreqaiModel(ABC):

        model = self.train(unfiltered_dataframe, pair, dk)

-        dk.get_current_trade_database()
-        self.analyze_trade_database(dk, pair)
-
        self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
        dk.set_new_model_names(pair, new_trained_timerange)
        self.dd.pair_dict[pair]["first"] = False
@ -612,20 +609,3 @@ class IFreqaiModel(ABC):
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
        """
-
-    def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
-        """
-        User analyzes the trade database here and returns summary stats which will be passed back
-        to the strategy for reinforcement learning or for additional adaptive metrics for use
-        in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
-        they will format themselves into the dataframe as an additional column in the user
-        strategy. User has access to the current trade database in dk.trade_database_df.
-        """
-        # if dk.trade_database_df.empty:
-        #     logger.warning(f'No trades found for {pair} to analyze DB')
-        #     return
-
-        # total_profit = dk.trade_database_df['close_profit_abs'].sum()
-        # dk.data['extra_returns_per_train']['total_profit'] = total_profit
-
-        return