diff --git a/docs/freqai.md b/docs/freqai.md index 7aabb0e56..006fe1393 100644 --- a/docs/freqai.md +++ b/docs/freqai.md @@ -113,6 +113,8 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `use_SVM_to_remove_outliers` | Train a support vector machine to detect and remove outliers from the training data set, as well as from incoming data points. See details about how it works [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Boolean. | `svm_params` | All parameters available in Sklearn's `SGDOneClassSVM()`. See details about some select parameters [here](#removing-outliers-using-a-support-vector-machine-svm).
**Datatype:** Dictionary. | `use_DBSCAN_to_remove_outliers` | Cluster data using DBSCAN to identify and remove outliers from training and prediction data. See details about how it works [here](#removing-outliers-with-dbscan).
**Datatype:** Boolean. +| `inlier_metric_window` | If set, FreqAI will add the `inlier_metric` to the training feature set and set the lookback to be the `inlier_metric_window`. Details of how the `inlier_metric` is computed can be found [here](#using-the-inliermetric)
**Datatype:** int. Default: 0 +| `noise_standard_deviation` | If > 0, FreqAI adds noise to the training features. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. Value should be kept relative to the normalized space between -1 and 1). In other words, since data is always normalized between -1 and 1 in FreqAI, the user can expect a `noise_standard_deviation: 0.05` to see 32% of data randomly increased/decreased by more than 2.5% (i.e. the percent of data falling within the first standard deviation). Good for preventing overfitting.
**Datatype:** int. Default: 0 | `outlier_protection_percentage` | If more than `outlier_protection_percentage` % of points are detected as outliers by the SVM or DBSCAN, FreqAI will log a warning message and ignore outlier detection while keeping the original dataset intact. If the outlier protection is triggered, no predictions will be made based on the training data.
**Datatype:** Float. Default: `30` | `reverse_train_test_order` | If true, FreqAI will train on the latest data split and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, users should be careful to understand unorthodox nature of this parameter before employing it.
**Datatype:** Boolean. Default: False | | **Data split parameters** @@ -654,6 +656,18 @@ testing; the other points are used for training. The test data is used to evaluate the performance of the model after training. If the test score is high, the model is able to capture the behavior of the data well. If the test score is low, either the model either does not capture the complexity of the data, the test data is significantly different from the train data, or a different model should be used. +### Using the `inlier_metric` + +The `inlier_metric` is a metric aimed at quantifying how different a prediction data point is from the most recent historic data points. + +User can set `inlier_metric_window` to set the look back window. FreqAI will compute the distance between the present prediction point and each of the previous data points (total of `inlier_metric_window` points). + +This function goes one step further - during training, it computes the `inlier_metric` for all training data points and builds weibull distributions for each each lookback point. The cumulative distribution function for the weibull distribution is used to produce a quantile for each of the data points. The quantiles for each lookback point are averaged to create the `inlier_metric`. + +FreqAI adds this `inlier_metric` score to the training features! In other words, your model is trained to recognize how this temporal inlier metric is related to the user set labels. + +This function does **not** remove outliers from the data set. + ### Controlling the model learning process Model training parameters are unique to the machine learning library selected by the user. FreqAI allows the user to set any parameter for any library using the `model_training_parameters` dictionary in the user configuration file. The example configuration file (found in `config_examples/config_freqai.example.json`) show some of the example parameters associated with `Catboost` and `LightGBM`, but the user can add any parameters available in those libraries. diff --git a/freqtrade/exchange/exchange.py b/freqtrade/exchange/exchange.py index c3dca43a8..af072d555 100644 --- a/freqtrade/exchange/exchange.py +++ b/freqtrade/exchange/exchange.py @@ -446,6 +446,15 @@ class Exchange: contract_size = self.get_contract_size(pair) return contracts_to_amount(num_contracts, contract_size) + def amount_to_contract_precision(self, pair: str, amount: float) -> float: + """ + Helper wrapper around amount_to_contract_precision + """ + contract_size = self.get_contract_size(pair) + + return amount_to_contract_precision(amount, self.get_precision_amount(pair), + self.precisionMode, contract_size) + def set_sandbox(self, api: ccxt.Exchange, exchange_config: dict, name: str) -> None: if exchange_config.get('sandbox'): if api.urls.get('test'): diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 828734520..7670cfd45 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,7 +1,8 @@ import copy -import datetime import logging import shutil +from datetime import datetime, timezone +from math import cos, sin from pathlib import Path from typing import Any, Dict, List, Tuple @@ -9,6 +10,7 @@ import numpy as np import numpy.typing as npt import pandas as pd from pandas import DataFrame +from scipy import stats from sklearn import linear_model from sklearn.cluster import DBSCAN from sklearn.metrics.pairwise import pairwise_distances @@ -360,7 +362,7 @@ class FreqaiDataKitchen: def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: """ - Normalize a set of data using the mean and standard deviation from + Denormalize a set of data using the mean and standard deviation from the associated training data. :param df: Dataframe of predictions to be denormalized """ @@ -399,7 +401,7 @@ class FreqaiDataKitchen: config_timerange = TimeRange.parse_timerange(self.config["timerange"]) if config_timerange.stopts == 0: config_timerange.stopts = int( - datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + datetime.now(tz=timezone.utc).timestamp() ) timerange_train = copy.deepcopy(full_timerange) timerange_backtest = copy.deepcopy(full_timerange) @@ -416,8 +418,8 @@ class FreqaiDataKitchen: timerange_train.stopts = timerange_train.startts + train_period_days first = False - start = datetime.datetime.utcfromtimestamp(timerange_train.startts) - stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts) + start = datetime.fromtimestamp(timerange_train.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(timerange_train.stopts, tz=timezone.utc) tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_training_list_timerange.append(copy.deepcopy(timerange_train)) @@ -430,8 +432,8 @@ class FreqaiDataKitchen: if timerange_backtest.stopts > config_timerange.stopts: timerange_backtest.stopts = config_timerange.stopts - start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts) - stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts) + start = datetime.fromtimestamp(timerange_backtest.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(timerange_backtest.stopts, tz=timezone.utc) tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest)) @@ -451,8 +453,8 @@ class FreqaiDataKitchen: it is sliced down to just the present training period. """ - start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) - stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) + start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) df = df.loc[df["date"] >= start, :] if not self.live: df = df.loc[df["date"] < stop, :] @@ -653,8 +655,6 @@ class FreqaiDataKitchen: is an outlier. """ - from math import cos, sin - if predict: if not self.data['DBSCAN_eps']: return @@ -747,6 +747,111 @@ class FreqaiDataKitchen: return + def compute_inlier_metric(self, set_='train') -> None: + """ + + Compute inlier metric from backwards distance distributions. + This metric defines how well features from a timepoint fit + into previous timepoints. + """ + + no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"] + + if set_ == 'train': + compute_df = copy.deepcopy(self.data_dictionary['train_features']) + elif set_ == 'test': + compute_df = copy.deepcopy(self.data_dictionary['test_features']) + else: + compute_df = copy.deepcopy(self.data_dictionary['prediction_features']) + + compute_df_reindexed = compute_df.reindex( + index=np.flip(compute_df.index) + ) + + pairwise = pd.DataFrame( + np.triu( + pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count) + ), + columns=compute_df_reindexed.index, + index=compute_df_reindexed.index + ) + pairwise = pairwise.round(5) + + column_labels = [ + '{}{}'.format('d', i) for i in range(1, no_prev_pts + 1) + ] + distances = pd.DataFrame( + columns=column_labels, index=compute_df.index + ) + + for index in compute_df.index[no_prev_pts:]: + current_row = pairwise.loc[[index]] + current_row_no_zeros = current_row.loc[ + :, (current_row != 0).any(axis=0) + ] + distances.loc[[index]] = current_row_no_zeros.iloc[ + :, :no_prev_pts + ] + distances = distances.replace([np.inf, -np.inf], np.nan) + drop_index = pd.isnull(distances).any(1) + distances = distances[drop_index == 0] + + inliers = pd.DataFrame(index=distances.index) + for key in distances.keys(): + current_distances = distances[key].dropna() + fit_params = stats.weibull_min.fit(current_distances) + quantiles = stats.weibull_min.cdf(current_distances, *fit_params) + + df_inlier = pd.DataFrame( + {key: quantiles}, index=distances.index + ) + inliers = pd.concat( + [inliers, df_inlier], axis=1 + ) + + inlier_metric = pd.DataFrame( + data=inliers.sum(axis=1) / no_prev_pts, + columns=['inlier_metric'], + index=compute_df.index + ) + + inlier_metric = (2 * (inlier_metric - inlier_metric.min()) / + (inlier_metric.max() - inlier_metric.min()) - 1) + + if set_ in ('train', 'test'): + inlier_metric = inlier_metric.iloc[no_prev_pts:] + compute_df = compute_df.iloc[no_prev_pts:] + self.remove_beginning_points_from_data_dict(set_, no_prev_pts) + self.data_dictionary[f'{set_}_features'] = pd.concat( + [compute_df, inlier_metric], axis=1) + else: + self.data_dictionary['prediction_features'] = pd.concat( + [compute_df, inlier_metric], axis=1) + self.data_dictionary['prediction_features'].fillna(0, inplace=True) + + logger.info('Inlier metric computed and added to features.') + + return None + + def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10): + features = self.data_dictionary[f'{set_}_features'] + weights = self.data_dictionary[f'{set_}_weights'] + labels = self.data_dictionary[f'{set_}_labels'] + self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:] + self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:] + self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:] + + def add_noise_to_training_features(self) -> None: + """ + Add noise to train features to reduce the risk of overfitting. + """ + mu = 0 # no shift + sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"] + compute_df = self.data_dictionary['train_features'] + noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]]) + self.data_dictionary['train_features'] += noise + return + def find_features(self, dataframe: DataFrame) -> None: """ Find features in the strategy provided dataframe @@ -872,14 +977,14 @@ class FreqaiDataKitchen: "Please indicate the end date of your desired backtesting. " "timerange.") # backtest_timerange.stopts = int( - # datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + # datetime.now(tz=timezone.utc).timestamp() # ) backtest_timerange.startts = ( backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY ) - start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts) - stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts) + start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc) full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") self.full_path = Path( @@ -905,7 +1010,7 @@ class FreqaiDataKitchen: :return: bool = If the model is expired or not. """ - time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + time = datetime.now(tz=timezone.utc).timestamp() elapsed_time = (time - trained_timestamp) / 3600 # hours max_time = self.freqai_config.get("expiration_hours", 0) if max_time > 0: @@ -917,7 +1022,7 @@ class FreqaiDataKitchen: self, trained_timestamp: int ) -> Tuple[bool, TimeRange, TimeRange]: - time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + time = datetime.now(tz=timezone.utc).timestamp() trained_timerange = TimeRange() data_load_timerange = TimeRange() diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index f631c9126..8dd7a2889 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -1,10 +1,9 @@ -# import contextlib -import datetime import logging import shutil import threading import time from abc import ABC, abstractmethod +from datetime import datetime, timezone from pathlib import Path from threading import Lock from typing import Any, Dict, List, Tuple @@ -59,7 +58,6 @@ class IFreqaiModel(ABC): "data_split_parameters", {}) self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get( "model_training_parameters", {}) - self.feature_parameters = config.get("freqai", {}).get("feature_parameters") self.retrain = False self.first = True self.set_full_path() @@ -70,11 +68,14 @@ class IFreqaiModel(ABC): self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode) self.identifier: str = self.freqai_info.get("identifier", "no_id_provided") self.scanning = False + self.ft_params = self.freqai_info["feature_parameters"] self.keras: bool = self.freqai_info.get("keras", False) - if self.keras and self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0): - self.freqai_info["feature_parameters"]["DI_threshold"] = 0 + if self.keras and self.ft_params.get("DI_threshold", 0): + self.ft_params["DI_threshold"] = 0 logger.warning("DI threshold is not configured for Keras models yet. Deactivating.") self.CONV_WIDTH = self.freqai_info.get("conv_width", 2) + if self.ft_params.get("inlier_metric_window", 0): + self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2 self.pair_it = 0 self.pair_it_train = 0 self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist")) @@ -195,7 +196,7 @@ class IFreqaiModel(ABC): if retrain: self.train_timer('start') - self.train_model_in_series( + self.extract_data_and_train_model( new_trained_timerange, pair, strategy, dk, data_load_timerange ) self.train_timer('stop') @@ -235,12 +236,12 @@ class IFreqaiModel(ABC): dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) trained_timestamp = tr_train - tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime( - "%Y-%m-%d %H:%M:%S" - ) - tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime( - "%Y-%m-%d %H:%M:%S" - ) + tr_train_startts_str = datetime.fromtimestamp( + tr_train.startts, + tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + tr_train_stopts_str = datetime.fromtimestamp( + tr_train.stopts, + tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S") logger.info( f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs" f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} " @@ -425,24 +426,25 @@ class IFreqaiModel(ABC): def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None: """ - Base data cleaning method for train - Any function inside this method should drop training data points from the filtered_dataframe - based on user decided logic. See FreqaiDataKitchen::use_SVM_to_remove_outliers() for an - example of how outlier data points are dropped from the dataframe used for training. + Base data cleaning method for train. + Functions here improve/modify the input data by identifying outliers, + computing additional metrics, adding noise, reducing dimensionality etc. """ - if self.freqai_info["feature_parameters"].get( + ft_params = self.freqai_info["feature_parameters"] + + if ft_params.get( "principal_component_analysis", False ): dk.principal_component_analysis() - if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False): + if ft_params.get("use_SVM_to_remove_outliers", False): dk.use_SVM_to_remove_outliers(predict=False) - if self.freqai_info["feature_parameters"].get("DI_threshold", 0): + if ft_params.get("DI_threshold", 0): dk.data["avg_mean_dist"] = dk.compute_distances() - if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False): + if ft_params.get("use_DBSCAN_to_remove_outliers", False): if dk.pair in self.dd.old_DBSCAN_eps: eps = self.dd.old_DBSCAN_eps[dk.pair] else: @@ -450,29 +452,36 @@ class IFreqaiModel(ABC): dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps) self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps'] + if ft_params.get('inlier_metric_window', 0): + dk.compute_inlier_metric(set_='train') + if self.freqai_info["data_split_parameters"]["test_size"] > 0: + dk.compute_inlier_metric(set_='test') + + if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0): + dk.add_noise_to_training_features() + def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None: """ Base data cleaning method for predict. - These functions each modify dk.do_predict, which is a dataframe with equal length - to the number of candles coming from and returning to the strategy. Inside do_predict, - 1 allows prediction and < 0 signals to the strategy that the model is not confident in - the prediction. - See FreqaiDataKitchen::remove_outliers() for an example - of how the do_predict vector is modified. do_predict is ultimately passed back to strategy - for buy signals. + Functions here are complementary to the functions of data_cleaning_train. """ - if self.freqai_info["feature_parameters"].get( + ft_params = self.freqai_info["feature_parameters"] + + if ft_params.get('inlier_metric_window', 0): + dk.compute_inlier_metric(set_='predict') + + if ft_params.get( "principal_component_analysis", False ): dk.pca_transform(dataframe) - if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False): + if ft_params.get("use_SVM_to_remove_outliers", False): dk.use_SVM_to_remove_outliers(predict=True) - if self.freqai_info["feature_parameters"].get("DI_threshold", 0): + if ft_params.get("DI_threshold", 0): dk.check_if_pred_in_training_spaces() - if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False): + if ft_params.get("use_DBSCAN_to_remove_outliers", False): dk.use_DBSCAN_to_remove_outliers(predict=True) def model_exists( @@ -508,7 +517,7 @@ class IFreqaiModel(ABC): Path(self.full_path, Path(self.config["config_files"][0]).name), ) - def train_model_in_series( + def extract_data_and_train_model( self, new_trained_timerange: TimeRange, pair: str, @@ -600,7 +609,7 @@ class IFreqaiModel(ABC): # # for keras type models, the conv_window needs to be prepended so # # viewing is correct in frequi - if self.freqai_info.get('keras', False): + if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0): n_lost_points = self.freqai_info.get('conv_width', 2) zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))), columns=hist_preds_df.columns) diff --git a/freqtrade/freqtradebot.py b/freqtrade/freqtradebot.py index 1d171ae89..a2f39afd6 100644 --- a/freqtrade/freqtradebot.py +++ b/freqtrade/freqtradebot.py @@ -583,7 +583,9 @@ class FreqtradeBot(LoggingMixin): if stake_amount is not None and stake_amount < 0.0: # We should decrease our position - amount = abs(float(FtPrecise(stake_amount) / FtPrecise(current_exit_rate))) + amount = self.exchange.amount_to_contract_precision( + trade.pair, + abs(float(FtPrecise(stake_amount) / FtPrecise(current_exit_rate)))) if amount > trade.amount: # This is currently ineffective as remaining would become < min tradable # Fixing this would require checking for 0.0 there - @@ -592,9 +594,14 @@ class FreqtradeBot(LoggingMixin): f"Adjusting amount to trade.amount as it is higher. {amount} > {trade.amount}") amount = trade.amount + if amount == 0.0: + logger.info("Amount to sell is 0.0 due to exchange limits - not selling.") + return + remaining = (trade.amount - amount) * current_exit_rate if remaining < min_exit_stake: - logger.info(f'Remaining amount of {remaining} would be too small.') + logger.info(f"Remaining amount of {remaining} would be smaller " + f"than the minimum of {min_exit_stake}.") return self.execute_trade_exit(trade, current_exit_rate, exit_check=ExitCheckTuple( diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 8f6b6b332..97418b72c 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -537,7 +537,11 @@ class Backtesting: return pos_trade if stake_amount is not None and stake_amount < 0.0: - amount = abs(stake_amount) / current_rate + amount = amount_to_contract_precision( + abs(stake_amount) / current_rate, trade.amount_precision, + self.precision_mode, trade.contract_size) + if amount == 0.0: + return trade if amount > trade.amount: # This is currently ineffective as remaining would become < min tradable amount = trade.amount diff --git a/tests/exchange/test_exchange.py b/tests/exchange/test_exchange.py index 3352019a9..3b903f8ee 100644 --- a/tests/exchange/test_exchange.py +++ b/tests/exchange/test_exchange.py @@ -4456,6 +4456,39 @@ def test__amount_to_contracts( assert result_amount == param_amount +@pytest.mark.parametrize('pair,amount,expected_spot,expected_fut', [ + # Contract size of 0.01 + ('ADA/USDT:USDT', 40, 40, 40), + ('ADA/USDT:USDT', 10.4445555, 10.4, 10.444), + ('LTC/ETH', 30, 30, 30), + ('LTC/USD', 30, 30, 30), + # contract size of 10 + ('ETH/USDT:USDT', 10.111, 10.1, 10), + ('ETH/USDT:USDT', 10.188, 10.1, 10), + ('ETH/USDT:USDT', 10.988, 10.9, 10), +]) +def test_amount_to_contract_precision( + mocker, + default_conf, + pair, + amount, + expected_spot, + expected_fut, +): + api_mock = MagicMock() + default_conf['trading_mode'] = 'spot' + default_conf['margin_mode'] = 'isolated' + exchange = get_patched_exchange(mocker, default_conf, api_mock) + + result_size = exchange.amount_to_contract_precision(pair, amount) + assert result_size == expected_spot + + default_conf['trading_mode'] = 'futures' + exchange = get_patched_exchange(mocker, default_conf, api_mock) + result_size = exchange.amount_to_contract_precision(pair, amount) + assert result_size == expected_fut + + @pytest.mark.parametrize('exchange_name,open_rate,is_short,trading_mode,margin_mode', [ # Bittrex ('bittrex', 2.0, False, 'spot', None), diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index db499631b..2c6210a0e 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -81,6 +81,37 @@ def get_patched_freqaimodel(mocker, freqaiconf): return freqaimodel +def make_unfiltered_dataframe(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + freqai.dk.pair = "ADA/BTC" + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( + data_load_timerange, freqai.dk.pair, freqai.dk + ) + + unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators( + strategy, corr_dataframes, base_dataframes, freqai.dk.pair + ) + + unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe) + + return freqai, unfiltered_dataframe + + def make_data_dictionary(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) @@ -92,12 +123,11 @@ def make_data_dictionary(mocker, freqai_conf): freqai.live = True freqai.dk = FreqaiDataKitchen(freqai_conf) freqai.dk.pair = "ADA/BTC" - timerange = TimeRange.parse_timerange("20180110-20180130") - freqai.dd.load_all_pair_histories(timerange, freqai.dk) + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(data_load_timerange, freqai.dk) freqai.dd.pair_dict = MagicMock() - data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 908223c20..a9e7eac51 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -6,7 +6,8 @@ import pytest from freqtrade.exceptions import OperationalException from tests.conftest import log_has_re -from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary +from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary, + make_unfiltered_dataframe) @pytest.mark.parametrize( @@ -91,3 +92,72 @@ def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, "SVM detected 8.09%", caplog, ) + + +def test_compute_inlier_metric(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10}) + freqai.dk.compute_inlier_metric(set_='train') + assert log_has_re( + "Inlier metric computed and added to features.", + caplog, + ) + + +def test_add_noise_to_training_features(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1}) + freqai.dk.add_noise_to_training_features() + + +def test_remove_beginning_points_from_data_dict(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai.dk.remove_beginning_points_from_data_dict(set_='train') + + +def test_principal_component_analysis(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai.dk.principal_component_analysis() + assert log_has_re( + "reduced feature dimension by", + caplog, + ) + + +def test_normalize_data(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + data_dict = freqai.dk.data_dictionary + freqai.dk.normalize_data(data_dict) + assert len(freqai.dk.data) == 56 + + +def test_filter_features(mocker, freqai_conf): + freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + freqai.dk.find_features(unfiltered_dataframe) + + filtered_df, labels = freqai.dk.filter_features( + unfiltered_dataframe, + freqai.dk.training_features_list, + freqai.dk.label_list, + training_filter=True, + ) + + assert len(filtered_df.columns) == 26 + + +def test_make_train_test_datasets(mocker, freqai_conf): + freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + freqai.dk.find_features(unfiltered_dataframe) + + features_filtered, labels_filtered = freqai.dk.filter_features( + unfiltered_dataframe, + freqai.dk.training_features_list, + freqai.dk.label_list, + training_filter=True, + ) + + data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered) + + assert data_dictionary + assert len(data_dictionary) == 7 + assert len(data_dictionary['train_features'].index) == 1916 diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 5441b3c24..6364cb04f 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -17,7 +17,7 @@ def is_arm() -> bool: return "arm" in machine or "aarch64" in machine -def test_train_model_in_series_LightGBM(mocker, freqai_conf): +def test_extract_data_and_train_model_LightGBM(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) @@ -35,7 +35,8 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() @@ -45,7 +46,7 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): +def test_extract_data_and_train_model_LightGBMMultiModel(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) freqai_conf.update({"freqaimodel": "LightGBMRegressorMultiTarget"}) @@ -64,7 +65,8 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert len(freqai.dk.label_list) == 2 assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() @@ -77,7 +79,7 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") -def test_train_model_in_series_Catboost(mocker, freqai_conf): +def test_extract_data_and_train_model_Catboost(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "CatboostRegressor"}) # freqai_conf.get('freqai', {}).update( @@ -98,8 +100,8 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", - strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() @@ -110,7 +112,7 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf): @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") -def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): +def test_extract_data_and_train_model_CatboostClassifier(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "CatboostClassifier"}) freqai_conf.update({"strategy": "freqai_test_classifier"}) @@ -130,8 +132,8 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", - strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() @@ -141,7 +143,7 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): +def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "LightGBMClassifier"}) freqai_conf.update({"strategy": "freqai_test_classifier"}) @@ -161,8 +163,8 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", - strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() @@ -296,7 +298,8 @@ def test_follow_mode(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() @@ -345,7 +348,8 @@ def test_principal_component_analysis(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl") diff --git a/tests/test_integration.py b/tests/test_integration.py index dd3488f81..77ed822d1 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -485,7 +485,7 @@ def test_dca_exiting(default_conf_usdt, ticker_usdt, fee, mocker, caplog) -> Non assert len(trade.orders) == 1 assert pytest.approx(trade.stake_amount) == 60 assert pytest.approx(trade.amount) == 30.0 - assert log_has_re("Remaining amount of 1.6.* would be too small.", caplog) + assert log_has_re("Remaining amount of 1.6.* would be smaller than the minimum of 10.", caplog) freqtrade.strategy.adjust_trade_position = MagicMock(return_value=-20) @@ -504,9 +504,21 @@ def test_dca_exiting(default_conf_usdt, ticker_usdt, fee, mocker, caplog) -> Non freqtrade.strategy.adjust_trade_position = MagicMock(return_value=-50) freqtrade.process() assert log_has_re("Adjusting amount to trade.amount as it is higher.*", caplog) - assert log_has_re("Remaining amount of 0.0 would be too small.", caplog) + assert log_has_re("Remaining amount of 0.0 would be smaller than the minimum of 10.", caplog) trade = Trade.get_trades().first() assert len(trade.orders) == 2 assert trade.orders[-1].ft_order_side == 'sell' assert pytest.approx(trade.stake_amount) == 40.198 assert trade.is_open + + # use amount that would trunc to 0.0 once selling + mocker.patch("freqtrade.exchange.Exchange.amount_to_contract_precision", + lambda s, p, v: round(v, 1)) + freqtrade.strategy.adjust_trade_position = MagicMock(return_value=-0.01) + freqtrade.process() + trade = Trade.get_trades().first() + assert len(trade.orders) == 2 + assert trade.orders[-1].ft_order_side == 'sell' + assert pytest.approx(trade.stake_amount) == 40.198 + assert trade.is_open + assert log_has_re('Amount to sell is 0.0 due to exchange limits - not selling.', caplog)