Merge branch 'freqtrade:feat/freqai' into feat/freqai

2022-08-20 16:19:53 +02:00
parent 995defbb18 755041c134
commit 660c61554e
20 changed files with 287 additions and 138 deletions
--- a/freqtrade/data/history/hdf5datahandler.py
+++ b/freqtrade/data/history/hdf5datahandler.py
@@ -7,9 +7,8 @@ import numpy as np
 import pandas as pd

 from freqtrade.configuration import TimeRange
-from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS,
-                                 ListPairsWithTimeframes, TradeList)
-from freqtrade.enums import CandleType, TradingMode
+from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS, TradeList
+from freqtrade.enums import CandleType

 from .idatahandler import IDataHandler

@@ -21,29 +20,6 @@ class HDF5DataHandler(IDataHandler):

    _columns = DEFAULT_DATAFRAME_COLUMNS

-    @classmethod
-    def ohlcv_get_available_data(
-            cls, datadir: Path, trading_mode: TradingMode) -> ListPairsWithTimeframes:
-        """
-        Returns a list of all pairs with ohlcv data available in this datadir
-        :param datadir: Directory to search for ohlcv files
-        :param trading_mode: trading-mode to be used
-        :return: List of Tuples of (pair, timeframe)
-        """
-        if trading_mode == TradingMode.FUTURES:
-            datadir = datadir.joinpath('futures')
-        _tmp = [
-            re.search(
-                cls._OHLCV_REGEX, p.name
-            ) for p in datadir.glob("*.h5")
-        ]
-        return [
-            (
-                cls.rebuild_pair_from_filename(match[1]),
-                cls.rebuild_timeframe_from_filename(match[2]),
-                CandleType.from_string(match[3])
-            ) for match in _tmp if match and len(match.groups()) > 1]
-
    @classmethod
    def ohlcv_get_pairs(cls, datadir: Path, timeframe: str, candle_type: CandleType) -> List[str]:
        """
--- a/freqtrade/data/history/history_utils.py
+++ b/freqtrade/data/history/history_utils.py
@@ -56,7 +56,7 @@ def load_pair_history(pair: str,
                                   fill_missing=fill_up_missing,
                                   drop_incomplete=drop_incomplete,
                                   startup_candles=startup_candles,
-                                   candle_type=candle_type
+                                   candle_type=candle_type,
                                   )


@@ -97,14 +97,15 @@ def load_data(datadir: Path,
                                 fill_up_missing=fill_up_missing,
                                 startup_candles=startup_candles,
                                 data_handler=data_handler,
-                                 candle_type=candle_type
+                                 candle_type=candle_type,
                                 )
        if not hist.empty:
            result[pair] = hist
        else:
            if candle_type is CandleType.FUNDING_RATE and user_futures_funding_rate is not None:
                logger.warn(f"{pair} using user specified [{user_futures_funding_rate}]")
-                result[pair] = DataFrame(columns=["open", "close", "high", "low", "volume"])
+            elif candle_type not in (CandleType.SPOT, CandleType.FUTURES):
+                result[pair] = DataFrame(columns=["date", "open", "close", "high", "low", "volume"])

    if fail_without_data and not result:
        raise OperationalException("No data found. Terminating.")
--- a/freqtrade/data/history/idatahandler.py
+++ b/freqtrade/data/history/idatahandler.py
@@ -39,15 +39,26 @@ class IDataHandler(ABC):
        raise NotImplementedError()

    @classmethod
-    @abstractmethod
    def ohlcv_get_available_data(
            cls, datadir: Path, trading_mode: TradingMode) -> ListPairsWithTimeframes:
        """
        Returns a list of all pairs with ohlcv data available in this datadir
        :param datadir: Directory to search for ohlcv files
        :param trading_mode: trading-mode to be used
-        :return: List of Tuples of (pair, timeframe)
+        :return: List of Tuples of (pair, timeframe, CandleType)
        """
+        if trading_mode == TradingMode.FUTURES:
+            datadir = datadir.joinpath('futures')
+        _tmp = [
+            re.search(
+                cls._OHLCV_REGEX, p.name
+            ) for p in datadir.glob(f"*.{cls._get_file_extension()}")]
+        return [
+            (
+                cls.rebuild_pair_from_filename(match[1]),
+                cls.rebuild_timeframe_from_filename(match[2]),
+                CandleType.from_string(match[3])
+            ) for match in _tmp if match and len(match.groups()) > 1]

    @classmethod
    @abstractmethod
--- a/freqtrade/data/history/jsondatahandler.py
+++ b/freqtrade/data/history/jsondatahandler.py
@@ -8,9 +8,9 @@ from pandas import DataFrame, read_json, to_datetime

 from freqtrade import misc
 from freqtrade.configuration import TimeRange
-from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, ListPairsWithTimeframes, TradeList
+from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, TradeList
 from freqtrade.data.converter import trades_dict_to_list
-from freqtrade.enums import CandleType, TradingMode
+from freqtrade.enums import CandleType

 from .idatahandler import IDataHandler

@@ -23,28 +23,6 @@ class JsonDataHandler(IDataHandler):
    _use_zip = False
    _columns = DEFAULT_DATAFRAME_COLUMNS

-    @classmethod
-    def ohlcv_get_available_data(
-            cls, datadir: Path, trading_mode: TradingMode) -> ListPairsWithTimeframes:
-        """
-        Returns a list of all pairs with ohlcv data available in this datadir
-        :param datadir: Directory to search for ohlcv files
-        :param trading_mode: trading-mode to be used
-        :return: List of Tuples of (pair, timeframe)
-        """
-        if trading_mode == 'futures':
-            datadir = datadir.joinpath('futures')
-        _tmp = [
-            re.search(
-                cls._OHLCV_REGEX, p.name
-            ) for p in datadir.glob(f"*.{cls._get_file_extension()}")]
-        return [
-            (
-                cls.rebuild_pair_from_filename(match[1]),
-                cls.rebuild_timeframe_from_filename(match[2]),
-                CandleType.from_string(match[3])
-            ) for match in _tmp if match and len(match.groups()) > 1]
-
    @classmethod
    def ohlcv_get_pairs(cls, datadir: Path, timeframe: str, candle_type: CandleType) -> List[str]:
        """
--- a/freqtrade/exchange/exchange.py
+++ b/freqtrade/exchange/exchange.py
@@ -2377,7 +2377,8 @@ class Exchange:
            return

        try:
-            self._api.set_leverage(symbol=pair, leverage=leverage)
+            res = self._api.set_leverage(symbol=pair, leverage=leverage)
+            self._log_exchange_response('set_leverage', res)
        except ccxt.DDoSProtection as e:
            raise DDosProtection(e) from e
        except (ccxt.NetworkError, ccxt.ExchangeError) as e:
@@ -2405,7 +2406,6 @@ class Exchange:
        if self.trading_mode in TradingMode.SPOT:
            return None
        elif (
-            self.margin_mode == MarginMode.ISOLATED and
            self.trading_mode == TradingMode.FUTURES
        ):
            wallet_balance = (amount * open_rate) / leverage
@@ -2421,7 +2421,7 @@ class Exchange:
            return isolated_liq
        else:
            raise OperationalException(
-                "Freqtrade only supports isolated futures for leverage trading")
+                "Freqtrade currently only supports futures for leverage trading.")

    def funding_fee_cutoff(self, open_date: datetime):
        """
@@ -2441,7 +2441,8 @@ class Exchange:
            return

        try:
-            self._api.set_margin_mode(margin_mode.value, pair, params)
+            res = self._api.set_margin_mode(margin_mode.value, pair, params)
+            self._log_exchange_response('set_margin_mode', res)
        except ccxt.DDoSProtection as e:
            raise DDosProtection(e) from e
        except (ccxt.NetworkError, ccxt.ExchangeError) as e:
@@ -2599,7 +2600,7 @@ class Exchange:
        """
        if self.trading_mode == TradingMode.SPOT:
            return None
-        elif (self.trading_mode != TradingMode.FUTURES and self.margin_mode != MarginMode.ISOLATED):
+        elif (self.trading_mode != TradingMode.FUTURES):
            raise OperationalException(
                f"{self.name} does not support {self.margin_mode.value} {self.trading_mode.value}")

--- a/freqtrade/exchange/gateio.py
+++ b/freqtrade/exchange/gateio.py
@@ -34,6 +34,7 @@ class Gateio(Exchange):

    _ft_has_futures: Dict = {
        "needs_trading_fees": True,
+        "ohlcv_volume_currency": "base",
        "fee_cost_in_contracts": False,  # Set explicitly to false for clarity
        "order_props_in_contracts": ['amount', 'filled', 'remaining'],
    }
--- a/freqtrade/freqai/data_kitchen.py
+++ b/freqtrade/freqai/data_kitchen.py
@@ -659,6 +659,114 @@ class FreqaiDataKitchen:

        return

+    def compute_inlier_metric(self, set_='train') -> None:
+        """
+
+        Compute inlier metric from backwards distance distributions.
+        This metric defines how well features from a timepoint fit
+        into previous timepoints.
+        """
+
+        import scipy.stats as ss
+
+        no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
+        weib_pct = self.freqai_config["feature_parameters"]["inlier_metric_weibull_cutoff"]
+
+        if set_ == 'train':
+            compute_df = copy.deepcopy(self.data_dictionary['train_features'])
+        elif set_ == 'test':
+            compute_df = copy.deepcopy(self.data_dictionary['test_features'])
+        else:
+            compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
+
+        compute_df_reindexed = compute_df.reindex(
+            index=np.flip(compute_df.index)
+        )
+
+        pairwise = pd.DataFrame(
+            np.triu(
+                pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
+            ),
+            columns=compute_df_reindexed.index,
+            index=compute_df_reindexed.index
+        )
+        pairwise = pairwise.round(5)
+
+        column_labels = [
+            '{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
+        ]
+        distances = pd.DataFrame(
+            columns=column_labels, index=compute_df.index
+        )
+
+        for index in compute_df.index[no_prev_pts:]:
+            current_row = pairwise.loc[[index]]
+            current_row_no_zeros = current_row.loc[
+                :, (current_row != 0).any(axis=0)
+            ]
+            distances.loc[[index]] = current_row_no_zeros.iloc[
+                :, :no_prev_pts
+            ]
+        distances = distances.replace([np.inf, -np.inf], np.nan)
+        drop_index = pd.isnull(distances).any(1)
+        distances = distances[drop_index == 0]
+
+        inliers = pd.DataFrame(index=distances.index)
+        for key in distances.keys():
+            current_distances = distances[key].dropna()
+            fit_params = ss.weibull_min.fit(current_distances)
+            cutoff = ss.weibull_min.ppf(weib_pct, *fit_params)
+            is_inlier = np.where(
+                current_distances <= cutoff, 1, 0
+            )
+            df_inlier = pd.DataFrame(
+                {key + '_IsInlier': is_inlier}, index=distances.index
+            )
+            inliers = pd.concat(
+                [inliers, df_inlier], axis=1
+            )
+
+        inlier_metric = pd.DataFrame(
+            data=inliers.sum(axis=1) / no_prev_pts,
+            columns=['inlier_metric'],
+            index=compute_df.index
+        )
+
+        inlier_metric = 2 * (inlier_metric - inlier_metric.min()) / \
+            (inlier_metric.max() - inlier_metric.min()) - 1
+
+        if set_ in ('train', 'test'):
+            inlier_metric = inlier_metric.iloc[no_prev_pts:]
+            compute_df = compute_df.iloc[no_prev_pts:]
+            self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
+            self.data_dictionary[f'{set_}_features'] = pd.concat(
+                [compute_df, inlier_metric], axis=1)
+        else:
+            self.data_dictionary['prediction_features'] = pd.concat(
+                [compute_df, inlier_metric], axis=1)
+            self.data_dictionary['prediction_features'].fillna(0, inplace=True)
+
+        return None
+
+    def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
+        features = self.data_dictionary[f'{set_}_features']
+        weights = self.data_dictionary[f'{set_}_weights']
+        labels = self.data_dictionary[f'{set_}_labels']
+        self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
+        self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
+        self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
+
+    def add_noise_to_training_features(self) -> None:
+        """
+        Add noise to train features to reduce the risk of overfitting.
+        """
+        mu = 0  # no shift
+        sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"]
+        compute_df = self.data_dictionary['train_features']
+        noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
+        self.data_dictionary['train_features'] += noise
+        return
+
    def find_features(self, dataframe: DataFrame) -> None:
        """
        Find features in the strategy provided dataframe
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -66,7 +66,6 @@ class IFreqaiModel(ABC):
            "data_split_parameters", {})
        self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
            "model_training_parameters", {})
-        self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
        self.retrain = False
        self.first = True
        self.set_full_path()
@@ -74,11 +73,14 @@ class IFreqaiModel(ABC):
        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
        self.scanning = False
+        self.ft_params = self.freqai_info["feature_parameters"]
        self.keras: bool = self.freqai_info.get("keras", False)
-        if self.keras and self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0):
-            self.freqai_info["feature_parameters"]["DI_threshold"] = 0
+        if self.keras and self.ft_params.get("DI_threshold", 0):
+            self.ft_params["DI_threshold"] = 0
            logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
        self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
+        if self.ft_params.get("inlier_metric_window", 0):
+            self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
        self.pair_it = 0
        self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
        self.last_trade_database_summary: DataFrame = {}
@@ -383,24 +385,25 @@ class IFreqaiModel(ABC):

    def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
        """
-        Base data cleaning method for train
-        Any function inside this method should drop training data points from the filtered_dataframe
-        based on user decided logic. See FreqaiDataKitchen::use_SVM_to_remove_outliers() for an
-        example of how outlier data points are dropped from the dataframe used for training.
+        Base data cleaning method for train.
+        Functions here improve/modify the input data by identifying outliers,
+        computing additional metrics, adding noise, reducing dimensionality etc.
        """

-        if self.freqai_info["feature_parameters"].get(
+        ft_params = self.freqai_info["feature_parameters"]
+
+        if ft_params.get(
            "principal_component_analysis", False
        ):
            dk.principal_component_analysis()

-        if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
+        if ft_params.get("use_SVM_to_remove_outliers", False):
            dk.use_SVM_to_remove_outliers(predict=False)

-        if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
+        if ft_params.get("DI_threshold", 0):
            dk.data["avg_mean_dist"] = dk.compute_distances()

-        if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
+        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
            if dk.pair in self.dd.old_DBSCAN_eps:
                eps = self.dd.old_DBSCAN_eps[dk.pair]
            else:
@@ -408,29 +411,36 @@ class IFreqaiModel(ABC):
            dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
            self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']

+        if ft_params.get('inlier_metric_window', 0):
+            dk.compute_inlier_metric(set_='train')
+            if self.freqai_info["data_split_parameters"]["test_size"] > 0:
+                dk.compute_inlier_metric(set_='test')
+
+        if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
+            dk.add_noise_to_training_features()
+
    def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
        """
        Base data cleaning method for predict.
-        These functions each modify dk.do_predict, which is a dataframe with equal length
-        to the number of candles coming from and returning to the strategy. Inside do_predict,
-         1 allows prediction and < 0 signals to the strategy that the model is not confident in
-         the prediction.
-         See FreqaiDataKitchen::remove_outliers() for an example
-        of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
-        for buy signals.
+        Functions here are complementary to the functions of data_cleaning_train.
        """
-        if self.freqai_info["feature_parameters"].get(
+        ft_params = self.freqai_info["feature_parameters"]
+
+        if ft_params.get('inlier_metric_window', 0):
+            dk.compute_inlier_metric(set_='predict')
+
+        if ft_params.get(
            "principal_component_analysis", False
        ):
            dk.pca_transform(dataframe)

-        if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
+        if ft_params.get("use_SVM_to_remove_outliers", False):
            dk.use_SVM_to_remove_outliers(predict=True)

-        if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
+        if ft_params.get("DI_threshold", 0):
            dk.check_if_pred_in_training_spaces()

-        if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
+        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
            dk.use_DBSCAN_to_remove_outliers(predict=True)

    def model_exists(
--- a/freqtrade/freqtradebot.py
+++ b/freqtrade/freqtradebot.py
@@ -418,7 +418,7 @@ class FreqtradeBot(LoggingMixin):

        whitelist = copy.deepcopy(self.active_pair_whitelist)
        if not whitelist:
-            logger.info("Active pair whitelist is empty.")
+            self.log_once("Active pair whitelist is empty.", logger.info)
            return trades_created
        # Remove pairs for currently opened trades from the whitelist
        for trade in Trade.get_open_trades():
@@ -427,8 +427,8 @@ class FreqtradeBot(LoggingMixin):
                logger.debug('Ignoring %s in pair whitelist', trade.pair)

        if not whitelist:
-            logger.info("No currency pair in active pair whitelist, "
-                        "but checking to exit open trades.")
+            self.log_once("No currency pair in active pair whitelist, "
+                          "but checking to exit open trades.", logger.info)
            return trades_created
        if PairLocks.is_global_lock(side='*'):
            # This only checks for total locks (both sides).
--- a/freqtrade/persistence/migrations.py
+++ b/freqtrade/persistence/migrations.py
@@ -307,7 +307,9 @@ def check_migrate(engine, decl_base, previous_tables) -> None:
    # Migrates both trades and orders table!
    # if ('orders' not in previous_tables
    # or not has_column(cols_orders, 'stop_price')):
+    migrating = False
    if not has_column(cols_trades, 'precision_mode'):
+        migrating = True
        logger.info(f"Running database migration for trades - "
                    f"backup: {table_back_name}, {order_table_bak_name}")
        migrate_trades_and_orders_table(
@@ -315,6 +317,7 @@ def check_migrate(engine, decl_base, previous_tables) -> None:
            order_table_bak_name, cols_orders)

    if not has_column(cols_pairlocks, 'side'):
+        migrating = True
        logger.info(f"Running database migration for pairlocks - "
                    f"backup: {pairlock_table_bak_name}")

@@ -329,3 +332,6 @@ def check_migrate(engine, decl_base, previous_tables) -> None:

    set_sqlite_to_wal(engine)
    fix_old_dry_orders(engine)
+
+    if migrating:
+        logger.info("Database migration finished.")
--- a/freqtrade/persistence/models.py
+++ b/freqtrade/persistence/models.py
@@ -53,7 +53,7 @@ def init_db(db_url: str) -> None:
    # https://docs.sqlalchemy.org/en/13/orm/contextual.html#thread-local-scope
    # Scoped sessions proxy requests to the appropriate thread-local session.
    # We should use the scoped_session object - not a seperately initialized version
-    Trade._session = scoped_session(sessionmaker(bind=engine, autoflush=True))
+    Trade._session = scoped_session(sessionmaker(bind=engine, autoflush=False))
    Trade.query = Trade._session.query_property()
    Order.query = Trade._session.query_property()
    PairLock.query = Trade._session.query_property()
--- a/freqtrade/plugins/pairlist/PrecisionFilter.py
+++ b/freqtrade/plugins/pairlist/PrecisionFilter.py
@@ -51,6 +51,11 @@ class PrecisionFilter(IPairList):
        :param ticker: ticker dict as returned from ccxt.fetch_tickers()
        :return: True if the pair can stay, false if it should be removed
        """
+        if ticker.get('last', None) is None:
+            self.log_once(f"Removed {ticker['symbol']} from whitelist, because "
+                          "ticker['last'] is empty (Usually no trade in the last 24h).",
+                          logger.info)
+            return False
        stop_price = ticker['last'] * self._stoploss

        # Adjust stop-prices to precision
--- a/freqtrade/templates/strategy_analysis_example.ipynb
+++ b/freqtrade/templates/strategy_analysis_example.ipynb
@@ -30,7 +30,7 @@
    "\n",
    "# Initialize empty configuration object\n",
    "config = Configuration.from_files([])\n",
-    "# Optionally, use existing configuration file\n",
+    "# Optionally (recommended), use existing configuration file\n",
    "# config = Configuration.from_files([\"config.json\"])\n",
    "\n",
    "# Define some constants\n",
@@ -38,7 +38,7 @@
    "# Name of the strategy class\n",
    "config[\"strategy\"] = \"SampleStrategy\"\n",
    "# Location of the data\n",
-    "data_location = Path(config['user_data_dir'], 'data', 'binance')\n",
+    "data_location = config['datadir']\n",
    "# Pair to analyze - Only use one pair here\n",
    "pair = \"BTC/USDT\""
   ]
@@ -365,7 +365,7 @@
 "metadata": {
  "file_extension": ".py",
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3.9.7 64-bit ('trade_397')",
   "language": "python",
   "name": "python3"
  },
@@ -379,7 +379,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.9.7"
  },
  "mimetype": "text/x-python",
  "name": "python",
@@ -427,7 +427,12 @@
   ],
   "window_display": false
  },
-  "version": 3
+  "version": 3,
+  "vscode": {
+   "interpreter": {
+    "hash": "675f32a300d6d26767470181ad0b11dd4676bcce7ed1dd2ffe2fbc370c95fc7c"
+   }
+  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
--- a/freqtrade/wallets.py
+++ b/freqtrade/wallets.py
@@ -148,7 +148,7 @@ class Wallets:
                # Position is not open ...
                continue
            size = self._exchange._contracts_to_amount(symbol, position['contracts'])
-            collateral = position['collateral']
+            collateral = position['collateral'] or 0.0
            leverage = position['leverage']
            self._positions[symbol] = PositionWallet(
                symbol, position=size,