remove trade database analyzer, clean up a bit

2022-08-10 17:43:06 +02:00
parent 91d0c91287
commit 2cae3c42e6
3 changed files with 1 additions and 287 deletions
@@ -2,9 +2,8 @@ import copy
 import datetime
 import logging
 import shutil
-import sqlite3
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Tuple

 import numpy as np
 import numpy.typing as npt
@@ -88,20 +87,6 @@ class FreqaiDataKitchen:
                config["freqai"]["backtest_period_days"],
            )

-        self.database_path: Optional[Path] = None
-
-        if self.live:
-            db_url = self.config.get('db_url', None)
-            self.database_path = Path(db_url)
-            if 'sqlite' not in self.database_path.parts[0]:
-                self.database_path = None
-                logger.warning('FreqAI database analyzer only available for sqlite dbs. '
-                               ' FreqAI will still run, but user cannot use database analyzer.')
-            else:
-                self.database_name = Path(*self.database_path.parts[1:])
-
-        self.trade_database_df: DataFrame = pd.DataFrame()
-
        self.data['extra_returns_per_train'] = self.freqai_config.get('extra_returns_per_train', {})
        self.thread_count = self.freqai_config.get("data_kitchen_thread_count", -1)
        self.train_dates: DataFrame = pd.DataFrame()
@@ -1007,13 +992,6 @@ class FreqaiDataKitchen:
            f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])
            self.data["labels_mean"][label], self.data["labels_std"][label] = f[0], f[1]

-        # KEEPME incase we want to let user start to grab quantiles.
-        # upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
-        #                                                   'target_quantile'], *f)
-        # lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
-        #                                                       'target_quantile'], *f)
-        # self.data["upper_quantile"] = upper_q
-        # self.data["lower_quantile"] = lower_q
        return

    def remove_features_from_df(self, dataframe: DataFrame) -> DataFrame:
@@ -1025,181 +1003,3 @@ class FreqaiDataKitchen:
            col for col in dataframe.columns if not col.startswith("%") or col.startswith("%%")
        ]
        return dataframe[to_keep]
-
-    def get_current_trade_database(self) -> None:
-
-        if self.database_path is None:
-            logger.warning('No trade database found. Skipping analysis.')
-            return
-
-        data = sqlite3.connect(self.database_name)
-        query = data.execute("SELECT * From trades")
-        cols = [column[0] for column in query.description]
-        df = pd.DataFrame.from_records(data=query.fetchall(), columns=cols)
-        self.trade_database_df = df.dropna(subset='close_date')
-        data.close()
-
-    def np_encoder(self, object):
-        if isinstance(object, np.generic):
-            return object.item()
-
-    # Functions containing useful data manipulation examples. but not actively in use.
-
-    # Possibly phasing these outlier removal methods below out in favor of
-    # use_SVM_to_remove_outliers (computationally more efficient and apparently higher performance).
-    # But these have good data manipulation examples, so keep them commented here for now.
-
-    # def determine_statistical_distributions(self) -> None:
-    #     from fitter import Fitter
-
-    #     logger.info('Determining best model for all features, may take some time')
-
-    #     def compute_quantiles(ft):
-    #         f = Fitter(self.data_dictionary["train_features"][ft],
-    #                    distributions=['gamma', 'cauchy', 'laplace',
-    #                                   'beta', 'uniform', 'lognorm'])
-    #         f.fit()
-    #         # f.summary()
-    #         dist = list(f.get_best().items())[0][0]
-    #         params = f.get_best()[dist]
-    #         upper_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.999, **params)
-    #         lower_q = getattr(spy.stats, list(f.get_best().items())[0][0]).ppf(0.001, **params)
-
-    #         return ft, upper_q, lower_q, dist
-
-    #     quantiles_tuple = Parallel(n_jobs=-1)(
-    #         delayed(compute_quantiles)(ft) for ft in self.data_dictionary[
-    #                                                       'train_features'].columns)
-
-    #     df = pd.DataFrame(quantiles_tuple, columns=['features', 'upper_quantiles',
-    #                                                 'lower_quantiles', 'dist'])
-    #     self.data_dictionary['upper_quantiles'] = df['upper_quantiles']
-    #     self.data_dictionary['lower_quantiles'] = df['lower_quantiles']
-
-    #     return
-
-    # def remove_outliers(self, predict: bool) -> None:
-    #     """
-    #     Remove data that looks like an outlier based on the distribution of each
-    #     variable.
-    #     :params:
-    #     :predict: boolean which tells the function if this is prediction data or
-    #     training data coming in.
-    #     """
-
-    #     lower_quantile = self.data_dictionary["lower_quantiles"].to_numpy()
-    #     upper_quantile = self.data_dictionary["upper_quantiles"].to_numpy()
-
-    #     if predict:
-
-    #         df = self.data_dictionary["prediction_features"][
-    #             (self.data_dictionary["prediction_features"] < upper_quantile)
-    #             & (self.data_dictionary["prediction_features"] > lower_quantile)
-    #         ]
-    #         drop_index = pd.isnull(df).any(1)
-    #         self.data_dictionary["prediction_features"].fillna(0, inplace=True)
-    #         drop_index = ~drop_index
-    #         do_predict = np.array(drop_index.replace(True, 1).replace(False, 0))
-
-    #         logger.info(
-    #             "remove_outliers() tossed %s predictions",
-    #             len(do_predict) - do_predict.sum(),
-    #         )
-    #         self.do_predict += do_predict
-    #         self.do_predict -= 1
-
-    #     else:
-
-    #         filter_train_df = self.data_dictionary["train_features"][
-    #             (self.data_dictionary["train_features"] < upper_quantile)
-    #             & (self.data_dictionary["train_features"] > lower_quantile)
-    #         ]
-    #         drop_index = pd.isnull(filter_train_df).any(1)
-    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
-    #         self.data_dictionary["train_features"] = self.data_dictionary["train_features"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["train_labels"] = self.data_dictionary["train_labels"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["train_weights"] = self.data_dictionary["train_weights"][
-    #             (drop_index == 0)
-    #         ]
-
-    #         logger.info(
-    #             f'remove_outliers() tossed {drop_index.sum()}'
-    #             f' training points from {len(filter_train_df)}'
-    #         )
-
-    #         # do the same for the test data
-    #         filter_test_df = self.data_dictionary["test_features"][
-    #             (self.data_dictionary["test_features"] < upper_quantile)
-    #             & (self.data_dictionary["test_features"] > lower_quantile)
-    #         ]
-    #         drop_index = pd.isnull(filter_test_df).any(1)
-    #         drop_index = drop_index.replace(True, 1).replace(False, 0)
-    #         self.data_dictionary["test_labels"] = self.data_dictionary["test_labels"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["test_features"] = self.data_dictionary["test_features"][
-    #             (drop_index == 0)
-    #         ]
-    #         self.data_dictionary["test_weights"] = self.data_dictionary["test_weights"][
-    #             (drop_index == 0)
-    #         ]
-
-    #         logger.info(
-    #             f'remove_outliers() tossed {drop_index.sum()}'
-    #             f' test points from {len(filter_test_df)}'
-    #         )
-
-    #     return
-
-    # def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
-    #     """
-    #     standardize all data in the data_dictionary according to the training dataset
-    #     :params:
-    #     :data_dictionary: dictionary containing the cleaned and split training/test data/labels
-    #     :returns:
-    #     :data_dictionary: updated dictionary with standardized values.
-    #     """
-    #     # standardize the data by training stats
-    #     train_mean = data_dictionary["train_features"].mean()
-    #     train_std = data_dictionary["train_features"].std()
-    #     data_dictionary["train_features"] = (
-    #         data_dictionary["train_features"] - train_mean
-    #     ) / train_std
-    #     data_dictionary["test_features"] = (
-    #         data_dictionary["test_features"] - train_mean
-    #     ) / train_std
-
-    #     train_labels_std = data_dictionary["train_labels"].std()
-    #     train_labels_mean = data_dictionary["train_labels"].mean()
-    #     data_dictionary["train_labels"] = (
-    #         data_dictionary["train_labels"] - train_labels_mean
-    #     ) / train_labels_std
-    #     data_dictionary["test_labels"] = (
-    #         data_dictionary["test_labels"] - train_labels_mean
-    #     ) / train_labels_std
-
-    #     for item in train_std.keys():
-    #         self.data[item + "_std"] = train_std[item]
-    #         self.data[item + "_mean"] = train_mean[item]
-
-    #     self.data["labels_std"] = train_labels_std
-    #     self.data["labels_mean"] = train_labels_mean
-
-    #     return data_dictionary
-
-    # def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
-    # """
-    # Normalizes a set of data using the mean and standard deviation from
-    # the associated training data.
-    # :params:
-    # :df: Dataframe to be standardized
-    # """
-
-    # for item in df.keys():
-    #     df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
-
-    # return df
@@ -491,9 +491,6 @@ class IFreqaiModel(ABC):

        model = self.train(unfiltered_dataframe, pair, dk)

-        dk.get_current_trade_database()
-        self.analyze_trade_database(dk, pair)
-
        self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
        dk.set_new_model_names(pair, new_trained_timerange)
        self.dd.pair_dict[pair]["first"] = False
@@ -612,20 +609,3 @@ class IFreqaiModel(ABC):
        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
        data (NaNs) or felt uncertain about data (i.e. SVM and/or DI index)
        """
-
-    def analyze_trade_database(self, dk: FreqaiDataKitchen, pair: str) -> None:
-        """
-        User analyzes the trade database here and returns summary stats which will be passed back
-        to the strategy for reinforcement learning or for additional adaptive metrics for use
-        in entry/exit signals. Store these metrics in dk.data['extra_returns_per_train'] and
-        they will format themselves into the dataframe as an additional column in the user
-        strategy. User has access to the current trade database in dk.trade_database_df.
-        """
-        # if dk.trade_database_df.empty:
-        #     logger.warning(f'No trades found for {pair} to analyze DB')
-        #     return
-
-        # total_profit = dk.trade_database_df['close_profit_abs'].sum()
-        # dk.data['extra_returns_per_train']['total_profit'] = total_profit
-
-        return