Merge branch 'freqtrade:develop' into fixHyperoptFreqai

2022-09-07 11:07:51 -03:00
parent 5aba5de20f c08c82bc40
commit 6b7644029c
11 changed files with 370 additions and 73 deletions
@@ -113,6 +113,8 @@ Mandatory parameters are marked as **Required**, which means that they are requi
 | `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm). <br> **Datatype:** Boolean.
 | `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm). <br> **Datatype:** Dictionary.
 | `use_DBSCAN_to_remove_outliers` | Cluster data using DBSCAN to identify and remove outliers from training and prediction data. See details about how it works [here](#removing-outliers-with-dbscan). <br> **Datatype:** Boolean. 
+| `inlier_metric_window` | If set, FreqAI will add the `inlier_metric` to the training feature set and set the lookback to be the `inlier_metric_window`. Details of how the `inlier_metric` is computed can be found [here](#using-the-inliermetric) <br> **Datatype:** int. Default: 0
+| `noise_standard_deviation` | If > 0, FreqAI adds noise to the training features. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. Value should be kept relative to the normalized space between -1 and 1). In other words, since data is always normalized between -1 and 1 in FreqAI, the user can expect a `noise_standard_deviation: 0.05` to see 32% of data randomly increased/decreased by more than 2.5% (i.e. the percent of data falling within the first standard deviation). Good for preventing overfitting. <br> **Datatype:** int. Default: 0
 | `outlier_protection_percentage` | If more than `outlier_protection_percentage` % of points are detected as outliers by the SVM or DBSCAN, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact. If the outlier protection is triggered, no predictions will be made based on the training data. <br> **Datatype:** Float. Default: `30`
 | `reverse_train_test_order` | If true, FreqAI will train on the latest data split and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, users should be careful to understand unorthodox nature of this parameter before employing it. <br> **Datatype:** Boolean. Default: False
 |  |  **Data split parameters**
@@ -654,6 +656,18 @@ testing; the other points are used for training.

 The test data is used to evaluate the performance of the model after training. If the test score is high, the model is able to capture the behavior of the data well. If the test score is low, either the model either does not capture the complexity of the data, the test data is significantly different from the train data, or a different model should be used.

+### Using the `inlier_metric`
+
+The `inlier_metric` is a metric aimed at quantifying how different a prediction data point is from the most recent historic data points. 
+
+User can set `inlier_metric_window` to set the look back window. FreqAI will compute the distance between the present prediction point and each of the previous data points (total of `inlier_metric_window` points). 
+
+This function goes one step further - during training, it computes the `inlier_metric` for all training data points and builds weibull distributions for each each lookback point. The cumulative distribution function for the weibull distribution is used to produce a quantile for each of the data points. The quantiles for each lookback point are averaged to create the `inlier_metric`. 
+
+FreqAI adds this `inlier_metric` score to the training features! In other words, your model is trained to recognize how this temporal inlier metric is related to the user set labels. 
+
+This function does **not** remove outliers from the data set.
+
 ### Controlling the model learning process

 Model training parameters are unique to the machine learning library selected by the user. FreqAI allows the user to set any parameter for any library using the `model_training_parameters` dictionary in the user configuration file. The example configuration file (found in `config_examples/config_freqai.example.json`) show some of the example parameters associated with `Catboost` and `LightGBM`, but the user can add any parameters available in those libraries.
@@ -446,6 +446,15 @@ class Exchange:
        contract_size = self.get_contract_size(pair)
        return contracts_to_amount(num_contracts, contract_size)

+    def amount_to_contract_precision(self, pair: str, amount: float) -> float:
+        """
+        Helper wrapper around amount_to_contract_precision
+        """
+        contract_size = self.get_contract_size(pair)
+
+        return amount_to_contract_precision(amount, self.get_precision_amount(pair),
+                                            self.precisionMode, contract_size)
+
    def set_sandbox(self, api: ccxt.Exchange, exchange_config: dict, name: str) -> None:
        if exchange_config.get('sandbox'):
            if api.urls.get('test'):
@@ -1,7 +1,8 @@
 import copy
-import datetime
 import logging
 import shutil
+from datetime import datetime, timezone
+from math import cos, sin
 from pathlib import Path
 from typing import Any, Dict, List, Tuple

@@ -9,6 +10,7 @@ import numpy as np
 import numpy.typing as npt
 import pandas as pd
 from pandas import DataFrame
+from scipy import stats
 from sklearn import linear_model
 from sklearn.cluster import DBSCAN
 from sklearn.metrics.pairwise import pairwise_distances
@@ -360,7 +362,7 @@ class FreqaiDataKitchen:

    def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
        """
-        Normalize a set of data using the mean and standard deviation from
+        Denormalize a set of data using the mean and standard deviation from
        the associated training data.
        :param df: Dataframe of predictions to be denormalized
        """
@@ -399,7 +401,7 @@ class FreqaiDataKitchen:
        config_timerange = TimeRange.parse_timerange(self.config["timerange"])
        if config_timerange.stopts == 0:
            config_timerange.stopts = int(
-                datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+                datetime.now(tz=timezone.utc).timestamp()
            )
        timerange_train = copy.deepcopy(full_timerange)
        timerange_backtest = copy.deepcopy(full_timerange)
@@ -416,8 +418,8 @@ class FreqaiDataKitchen:
            timerange_train.stopts = timerange_train.startts + train_period_days

            first = False
-            start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
-            stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
+            start = datetime.fromtimestamp(timerange_train.startts, tz=timezone.utc)
+            stop = datetime.fromtimestamp(timerange_train.stopts, tz=timezone.utc)
            tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
            tr_training_list_timerange.append(copy.deepcopy(timerange_train))

@@ -430,8 +432,8 @@ class FreqaiDataKitchen:
            if timerange_backtest.stopts > config_timerange.stopts:
                timerange_backtest.stopts = config_timerange.stopts

-            start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
-            stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
+            start = datetime.fromtimestamp(timerange_backtest.startts, tz=timezone.utc)
+            stop = datetime.fromtimestamp(timerange_backtest.stopts, tz=timezone.utc)
            tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
            tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))

@@ -451,8 +453,8 @@ class FreqaiDataKitchen:
                   it is sliced down to just the present training period.
        """

-        start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
-        stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
+        start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
+        stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
        df = df.loc[df["date"] >= start, :]
        if not self.live:
            df = df.loc[df["date"] < stop, :]
@@ -653,8 +655,6 @@ class FreqaiDataKitchen:
        is an outlier.
        """

-        from math import cos, sin
-
        if predict:
            if not self.data['DBSCAN_eps']:
                return
@@ -747,6 +747,111 @@ class FreqaiDataKitchen:

        return

+    def compute_inlier_metric(self, set_='train') -> None:
+        """
+
+        Compute inlier metric from backwards distance distributions.
+        This metric defines how well features from a timepoint fit
+        into previous timepoints.
+        """
+
+        no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
+
+        if set_ == 'train':
+            compute_df = copy.deepcopy(self.data_dictionary['train_features'])
+        elif set_ == 'test':
+            compute_df = copy.deepcopy(self.data_dictionary['test_features'])
+        else:
+            compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
+
+        compute_df_reindexed = compute_df.reindex(
+            index=np.flip(compute_df.index)
+        )
+
+        pairwise = pd.DataFrame(
+            np.triu(
+                pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
+            ),
+            columns=compute_df_reindexed.index,
+            index=compute_df_reindexed.index
+        )
+        pairwise = pairwise.round(5)
+
+        column_labels = [
+            '{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
+        ]
+        distances = pd.DataFrame(
+            columns=column_labels, index=compute_df.index
+        )
+
+        for index in compute_df.index[no_prev_pts:]:
+            current_row = pairwise.loc[[index]]
+            current_row_no_zeros = current_row.loc[
+                :, (current_row != 0).any(axis=0)
+            ]
+            distances.loc[[index]] = current_row_no_zeros.iloc[
+                :, :no_prev_pts
+            ]
+        distances = distances.replace([np.inf, -np.inf], np.nan)
+        drop_index = pd.isnull(distances).any(1)
+        distances = distances[drop_index == 0]
+
+        inliers = pd.DataFrame(index=distances.index)
+        for key in distances.keys():
+            current_distances = distances[key].dropna()
+            fit_params = stats.weibull_min.fit(current_distances)
+            quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
+
+            df_inlier = pd.DataFrame(
+                {key: quantiles}, index=distances.index
+            )
+            inliers = pd.concat(
+                [inliers, df_inlier], axis=1
+            )
+
+        inlier_metric = pd.DataFrame(
+            data=inliers.sum(axis=1) / no_prev_pts,
+            columns=['inlier_metric'],
+            index=compute_df.index
+        )
+
+        inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
+                         (inlier_metric.max() - inlier_metric.min()) - 1)
+
+        if set_ in ('train', 'test'):
+            inlier_metric = inlier_metric.iloc[no_prev_pts:]
+            compute_df = compute_df.iloc[no_prev_pts:]
+            self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
+            self.data_dictionary[f'{set_}_features'] = pd.concat(
+                [compute_df, inlier_metric], axis=1)
+        else:
+            self.data_dictionary['prediction_features'] = pd.concat(
+                [compute_df, inlier_metric], axis=1)
+            self.data_dictionary['prediction_features'].fillna(0, inplace=True)
+
+        logger.info('Inlier metric computed and added to features.')
+
+        return None
+
+    def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
+        features = self.data_dictionary[f'{set_}_features']
+        weights = self.data_dictionary[f'{set_}_weights']
+        labels = self.data_dictionary[f'{set_}_labels']
+        self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
+        self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
+        self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
+
+    def add_noise_to_training_features(self) -> None:
+        """
+        Add noise to train features to reduce the risk of overfitting.
+        """
+        mu = 0  # no shift
+        sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"]
+        compute_df = self.data_dictionary['train_features']
+        noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
+        self.data_dictionary['train_features'] += noise
+        return
+
    def find_features(self, dataframe: DataFrame) -> None:
        """
        Find features in the strategy provided dataframe
@@ -872,14 +977,14 @@ class FreqaiDataKitchen:
                                       "Please indicate the end date of your desired backtesting. "
                                       "timerange.")
            # backtest_timerange.stopts = int(
-            #     datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+            #     datetime.now(tz=timezone.utc).timestamp()
            # )

        backtest_timerange.startts = (
            backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
        )
-        start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
-        stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
+        start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc)
+        stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc)
        full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")

        self.full_path = Path(
@@ -905,7 +1010,7 @@ class FreqaiDataKitchen:
        :return:
            bool = If the model is expired or not.
        """
-        time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+        time = datetime.now(tz=timezone.utc).timestamp()
        elapsed_time = (time - trained_timestamp) / 3600  # hours
        max_time = self.freqai_config.get("expiration_hours", 0)
        if max_time > 0:
@@ -917,7 +1022,7 @@ class FreqaiDataKitchen:
        self, trained_timestamp: int
    ) -> Tuple[bool, TimeRange, TimeRange]:

-        time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
+        time = datetime.now(tz=timezone.utc).timestamp()
        trained_timerange = TimeRange()
        data_load_timerange = TimeRange()

@@ -1,10 +1,9 @@
-# import contextlib
-import datetime
 import logging
 import shutil
 import threading
 import time
 from abc import ABC, abstractmethod
+from datetime import datetime, timezone
 from pathlib import Path
 from threading import Lock
 from typing import Any, Dict, List, Tuple
@@ -59,7 +58,6 @@ class IFreqaiModel(ABC):
            "data_split_parameters", {})
        self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
            "model_training_parameters", {})
-        self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
        self.retrain = False
        self.first = True
        self.set_full_path()
@@ -70,11 +68,14 @@ class IFreqaiModel(ABC):
        self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
        self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
        self.scanning = False
+        self.ft_params = self.freqai_info["feature_parameters"]
        self.keras: bool = self.freqai_info.get("keras", False)
-        if self.keras and self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0):
-            self.freqai_info["feature_parameters"]["DI_threshold"] = 0
+        if self.keras and self.ft_params.get("DI_threshold", 0):
+            self.ft_params["DI_threshold"] = 0
            logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
        self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
+        if self.ft_params.get("inlier_metric_window", 0):
+            self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
        self.pair_it = 0
        self.pair_it_train = 0
        self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
@@ -195,7 +196,7 @@ class IFreqaiModel(ABC):

                if retrain:
                    self.train_timer('start')
-                    self.train_model_in_series(
+                    self.extract_data_and_train_model(
                        new_trained_timerange, pair, strategy, dk, data_load_timerange
                    )
                    self.train_timer('stop')
@@ -235,12 +236,12 @@ class IFreqaiModel(ABC):
            dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)

            trained_timestamp = tr_train
-            tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
-                "%Y-%m-%d %H:%M:%S"
-            )
-            tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
-                "%Y-%m-%d %H:%M:%S"
-            )
+            tr_train_startts_str = datetime.fromtimestamp(
+                                                tr_train.startts,
+                                                tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
+            tr_train_stopts_str = datetime.fromtimestamp(
+                                                tr_train.stopts,
+                                                tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
            logger.info(
                f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
                f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
@@ -425,24 +426,25 @@ class IFreqaiModel(ABC):

    def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
        """
-        Base data cleaning method for train
-        Any function inside this method should drop training data points from the filtered_dataframe
-        based on user decided logic. See FreqaiDataKitchen::use_SVM_to_remove_outliers() for an
-        example of how outlier data points are dropped from the dataframe used for training.
+        Base data cleaning method for train.
+        Functions here improve/modify the input data by identifying outliers,
+        computing additional metrics, adding noise, reducing dimensionality etc.
        """

-        if self.freqai_info["feature_parameters"].get(
+        ft_params = self.freqai_info["feature_parameters"]
+
+        if ft_params.get(
            "principal_component_analysis", False
        ):
            dk.principal_component_analysis()

-        if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
+        if ft_params.get("use_SVM_to_remove_outliers", False):
            dk.use_SVM_to_remove_outliers(predict=False)

-        if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
+        if ft_params.get("DI_threshold", 0):
            dk.data["avg_mean_dist"] = dk.compute_distances()

-        if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
+        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
            if dk.pair in self.dd.old_DBSCAN_eps:
                eps = self.dd.old_DBSCAN_eps[dk.pair]
            else:
@@ -450,29 +452,36 @@ class IFreqaiModel(ABC):
            dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
            self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']

+        if ft_params.get('inlier_metric_window', 0):
+            dk.compute_inlier_metric(set_='train')
+            if self.freqai_info["data_split_parameters"]["test_size"] > 0:
+                dk.compute_inlier_metric(set_='test')
+
+        if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
+            dk.add_noise_to_training_features()
+
    def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
        """
        Base data cleaning method for predict.
-        These functions each modify dk.do_predict, which is a dataframe with equal length
-        to the number of candles coming from and returning to the strategy. Inside do_predict,
-         1 allows prediction and < 0 signals to the strategy that the model is not confident in
-         the prediction.
-         See FreqaiDataKitchen::remove_outliers() for an example
-        of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
-        for buy signals.
+        Functions here are complementary to the functions of data_cleaning_train.
        """
-        if self.freqai_info["feature_parameters"].get(
+        ft_params = self.freqai_info["feature_parameters"]
+
+        if ft_params.get('inlier_metric_window', 0):
+            dk.compute_inlier_metric(set_='predict')
+
+        if ft_params.get(
            "principal_component_analysis", False
        ):
            dk.pca_transform(dataframe)

-        if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
+        if ft_params.get("use_SVM_to_remove_outliers", False):
            dk.use_SVM_to_remove_outliers(predict=True)

-        if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
+        if ft_params.get("DI_threshold", 0):
            dk.check_if_pred_in_training_spaces()

-        if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
+        if ft_params.get("use_DBSCAN_to_remove_outliers", False):
            dk.use_DBSCAN_to_remove_outliers(predict=True)

    def model_exists(
@@ -508,7 +517,7 @@ class IFreqaiModel(ABC):
            Path(self.full_path, Path(self.config["config_files"][0]).name),
        )

-    def train_model_in_series(
+    def extract_data_and_train_model(
        self,
        new_trained_timerange: TimeRange,
        pair: str,
@@ -600,7 +609,7 @@ class IFreqaiModel(ABC):

        # # for keras type models, the conv_window needs to be prepended so
        # # viewing is correct in frequi
-        if self.freqai_info.get('keras', False):
+        if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
            n_lost_points = self.freqai_info.get('conv_width', 2)
            zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
                                 columns=hist_preds_df.columns)
@@ -583,7 +583,9 @@ class FreqtradeBot(LoggingMixin):

        if stake_amount is not None and stake_amount < 0.0:
            # We should decrease our position
-            amount = abs(float(FtPrecise(stake_amount) / FtPrecise(current_exit_rate)))
+            amount = self.exchange.amount_to_contract_precision(
+                trade.pair,
+                abs(float(FtPrecise(stake_amount) / FtPrecise(current_exit_rate))))
            if amount > trade.amount:
                # This is currently ineffective as remaining would become < min tradable
                # Fixing this would require checking for 0.0 there -
@@ -592,9 +594,14 @@ class FreqtradeBot(LoggingMixin):
                    f"Adjusting amount to trade.amount as it is higher. {amount} > {trade.amount}")
                amount = trade.amount

+            if amount == 0.0:
+                logger.info("Amount to sell is 0.0 due to exchange limits - not selling.")
+                return
+
            remaining = (trade.amount - amount) * current_exit_rate
            if remaining < min_exit_stake:
-                logger.info(f'Remaining amount of {remaining} would be too small.')
+                logger.info(f"Remaining amount of {remaining} would be smaller "
+                            f"than the minimum of {min_exit_stake}.")
                return

            self.execute_trade_exit(trade, current_exit_rate, exit_check=ExitCheckTuple(
@@ -537,7 +537,11 @@ class Backtesting:
                    return pos_trade

        if stake_amount is not None and stake_amount < 0.0:
-            amount = abs(stake_amount) / current_rate
+            amount = amount_to_contract_precision(
+                abs(stake_amount) / current_rate, trade.amount_precision,
+                self.precision_mode, trade.contract_size)
+            if amount == 0.0:
+                return trade
            if amount > trade.amount:
                # This is currently ineffective as remaining would become < min tradable
                amount = trade.amount
@@ -4456,6 +4456,39 @@ def test__amount_to_contracts(
    assert result_amount == param_amount


+@pytest.mark.parametrize('pair,amount,expected_spot,expected_fut', [
+    # Contract size of 0.01
+    ('ADA/USDT:USDT', 40, 40, 40),
+    ('ADA/USDT:USDT', 10.4445555, 10.4, 10.444),
+    ('LTC/ETH', 30, 30, 30),
+    ('LTC/USD', 30, 30, 30),
+    # contract size of 10
+    ('ETH/USDT:USDT', 10.111, 10.1, 10),
+    ('ETH/USDT:USDT', 10.188, 10.1, 10),
+    ('ETH/USDT:USDT', 10.988, 10.9, 10),
+])
+def test_amount_to_contract_precision(
+    mocker,
+    default_conf,
+    pair,
+    amount,
+    expected_spot,
+    expected_fut,
+):
+    api_mock = MagicMock()
+    default_conf['trading_mode'] = 'spot'
+    default_conf['margin_mode'] = 'isolated'
+    exchange = get_patched_exchange(mocker, default_conf, api_mock)
+
+    result_size = exchange.amount_to_contract_precision(pair, amount)
+    assert result_size == expected_spot
+
+    default_conf['trading_mode'] = 'futures'
+    exchange = get_patched_exchange(mocker, default_conf, api_mock)
+    result_size = exchange.amount_to_contract_precision(pair, amount)
+    assert result_size == expected_fut
+
+
@pytest.mark.parametrize('exchange_name,open_rate,is_short,trading_mode,margin_mode', [
    # Bittrex
    ('bittrex', 2.0, False, 'spot', None),
@@ -81,6 +81,37 @@ def get_patched_freqaimodel(mocker, freqaiconf):
    return freqaimodel


+def make_unfiltered_dataframe(mocker, freqai_conf):
+    freqai_conf.update({"timerange": "20180110-20180130"})
+
+    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
+    exchange = get_patched_exchange(mocker, freqai_conf)
+    strategy.dp = DataProvider(freqai_conf, exchange)
+    strategy.freqai_info = freqai_conf.get("freqai", {})
+    freqai = strategy.freqai
+    freqai.live = True
+    freqai.dk = FreqaiDataKitchen(freqai_conf)
+    freqai.dk.pair = "ADA/BTC"
+    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
+    freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk)
+
+    freqai.dd.pair_dict = MagicMock()
+
+    new_timerange = TimeRange.parse_timerange("20180120-20180130")
+
+    corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes(
+            data_load_timerange, freqai.dk.pair, freqai.dk
+        )
+
+    unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators(
+                strategy, corr_dataframes, base_dataframes, freqai.dk.pair
+            )
+
+    unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe)
+
+    return freqai, unfiltered_dataframe
+
+
 def make_data_dictionary(mocker, freqai_conf):
    freqai_conf.update({"timerange": "20180110-20180130"})

@@ -92,12 +123,11 @@ def make_data_dictionary(mocker, freqai_conf):
    freqai.live = True
    freqai.dk = FreqaiDataKitchen(freqai_conf)
    freqai.dk.pair = "ADA/BTC"
-    timerange = TimeRange.parse_timerange("20180110-20180130")
-    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
+    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
+    freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk)

    freqai.dd.pair_dict = MagicMock()

-    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

    corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes(
@@ -6,7 +6,8 @@ import pytest

 from freqtrade.exceptions import OperationalException
 from tests.conftest import log_has_re
-from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary
+from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary,
+                                   make_unfiltered_dataframe)


@pytest.mark.parametrize(
@@ -91,3 +92,72 @@ def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf,
        "SVM detected 8.09%",
        caplog,
    )
+
+
+def test_compute_inlier_metric(mocker, freqai_conf, caplog):
+    freqai = make_data_dictionary(mocker, freqai_conf)
+    freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10})
+    freqai.dk.compute_inlier_metric(set_='train')
+    assert log_has_re(
+        "Inlier metric computed and added to features.",
+        caplog,
+    )
+
+
+def test_add_noise_to_training_features(mocker, freqai_conf):
+    freqai = make_data_dictionary(mocker, freqai_conf)
+    freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1})
+    freqai.dk.add_noise_to_training_features()
+
+
+def test_remove_beginning_points_from_data_dict(mocker, freqai_conf):
+    freqai = make_data_dictionary(mocker, freqai_conf)
+    freqai.dk.remove_beginning_points_from_data_dict(set_='train')
+
+
+def test_principal_component_analysis(mocker, freqai_conf, caplog):
+    freqai = make_data_dictionary(mocker, freqai_conf)
+    freqai.dk.principal_component_analysis()
+    assert log_has_re(
+        "reduced feature dimension by",
+        caplog,
+    )
+
+
+def test_normalize_data(mocker, freqai_conf):
+    freqai = make_data_dictionary(mocker, freqai_conf)
+    data_dict = freqai.dk.data_dictionary
+    freqai.dk.normalize_data(data_dict)
+    assert len(freqai.dk.data) == 56
+
+
+def test_filter_features(mocker, freqai_conf):
+    freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
+    freqai.dk.find_features(unfiltered_dataframe)
+
+    filtered_df, labels = freqai.dk.filter_features(
+            unfiltered_dataframe,
+            freqai.dk.training_features_list,
+            freqai.dk.label_list,
+            training_filter=True,
+    )
+
+    assert len(filtered_df.columns) == 26
+
+
+def test_make_train_test_datasets(mocker, freqai_conf):
+    freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
+    freqai.dk.find_features(unfiltered_dataframe)
+
+    features_filtered, labels_filtered = freqai.dk.filter_features(
+            unfiltered_dataframe,
+            freqai.dk.training_features_list,
+            freqai.dk.label_list,
+            training_filter=True,
+        )
+
+    data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered)
+
+    assert data_dictionary
+    assert len(data_dictionary) == 7
+    assert len(data_dictionary['train_features'].index) == 1916
@@ -17,7 +17,7 @@ def is_arm() -> bool:
    return "arm" in machine or "aarch64" in machine


-def test_train_model_in_series_LightGBM(mocker, freqai_conf):
+def test_extract_data_and_train_model_LightGBM(mocker, freqai_conf):
    freqai_conf.update({"timerange": "20180110-20180130"})

    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
@@ -35,7 +35,8 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

-    freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
+    freqai.extract_data_and_train_model(
+        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)

    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
@@ -45,7 +46,7 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
    shutil.rmtree(Path(freqai.dk.full_path))


-def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
+def test_extract_data_and_train_model_LightGBMMultiModel(mocker, freqai_conf):
    freqai_conf.update({"timerange": "20180110-20180130"})
    freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
    freqai_conf.update({"freqaimodel": "LightGBMRegressorMultiTarget"})
@@ -64,7 +65,8 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

-    freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
+    freqai.extract_data_and_train_model(
+        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)

    assert len(freqai.dk.label_list) == 2
    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
@@ -77,7 +79,7 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):


@pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...")
-def test_train_model_in_series_Catboost(mocker, freqai_conf):
+def test_extract_data_and_train_model_Catboost(mocker, freqai_conf):
    freqai_conf.update({"timerange": "20180110-20180130"})
    freqai_conf.update({"freqaimodel": "CatboostRegressor"})
    # freqai_conf.get('freqai', {}).update(
@@ -98,8 +100,8 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

-    freqai.train_model_in_series(new_timerange, "ADA/BTC",
-                                 strategy, freqai.dk, data_load_timerange)
+    freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
+                                        strategy, freqai.dk, data_load_timerange)

    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
@@ -110,7 +112,7 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):


@pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...")
-def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
+def test_extract_data_and_train_model_CatboostClassifier(mocker, freqai_conf):
    freqai_conf.update({"timerange": "20180110-20180130"})
    freqai_conf.update({"freqaimodel": "CatboostClassifier"})
    freqai_conf.update({"strategy": "freqai_test_classifier"})
@@ -130,8 +132,8 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

-    freqai.train_model_in_series(new_timerange, "ADA/BTC",
-                                 strategy, freqai.dk, data_load_timerange)
+    freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
+                                        strategy, freqai.dk, data_load_timerange)

    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
@@ -141,7 +143,7 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
    shutil.rmtree(Path(freqai.dk.full_path))


-def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
+def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf):
    freqai_conf.update({"timerange": "20180110-20180130"})
    freqai_conf.update({"freqaimodel": "LightGBMClassifier"})
    freqai_conf.update({"strategy": "freqai_test_classifier"})
@@ -161,8 +163,8 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

-    freqai.train_model_in_series(new_timerange, "ADA/BTC",
-                                 strategy, freqai.dk, data_load_timerange)
+    freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
+                                        strategy, freqai.dk, data_load_timerange)

    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
@@ -296,7 +298,8 @@ def test_follow_mode(mocker, freqai_conf):
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

-    freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
+    freqai.extract_data_and_train_model(
+        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)

    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
@@ -345,7 +348,8 @@ def test_principal_component_analysis(mocker, freqai_conf):
    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
    new_timerange = TimeRange.parse_timerange("20180120-20180130")

-    freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
+    freqai.extract_data_and_train_model(
+        new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)

    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl")

@@ -485,7 +485,7 @@ def test_dca_exiting(default_conf_usdt, ticker_usdt, fee, mocker, caplog) -> Non
    assert len(trade.orders) == 1
    assert pytest.approx(trade.stake_amount) == 60
    assert pytest.approx(trade.amount) == 30.0
-    assert log_has_re("Remaining amount of 1.6.* would be too small.", caplog)
+    assert log_has_re("Remaining amount of 1.6.* would be smaller than the minimum of 10.", caplog)

    freqtrade.strategy.adjust_trade_position = MagicMock(return_value=-20)

@@ -504,9 +504,21 @@ def test_dca_exiting(default_conf_usdt, ticker_usdt, fee, mocker, caplog) -> Non
    freqtrade.strategy.adjust_trade_position = MagicMock(return_value=-50)
    freqtrade.process()
    assert log_has_re("Adjusting amount to trade.amount as it is higher.*", caplog)
-    assert log_has_re("Remaining amount of 0.0 would be too small.", caplog)
+    assert log_has_re("Remaining amount of 0.0 would be smaller than the minimum of 10.", caplog)
    trade = Trade.get_trades().first()
    assert len(trade.orders) == 2
    assert trade.orders[-1].ft_order_side == 'sell'
    assert pytest.approx(trade.stake_amount) == 40.198
    assert trade.is_open
+
+    # use amount that would trunc to 0.0 once selling
+    mocker.patch("freqtrade.exchange.Exchange.amount_to_contract_precision",
+                 lambda s, p, v: round(v, 1))
+    freqtrade.strategy.adjust_trade_position = MagicMock(return_value=-0.01)
+    freqtrade.process()
+    trade = Trade.get_trades().first()
+    assert len(trade.orders) == 2
+    assert trade.orders[-1].ft_order_side == 'sell'
+    assert pytest.approx(trade.stake_amount) == 40.198
+    assert trade.is_open
+    assert log_has_re('Amount to sell is 0.0 due to exchange limits - not selling.', caplog)