From dd628eb525acc407fd0643aba2548d5eac6f3800 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 28 Aug 2022 12:56:39 +0200 Subject: [PATCH] add tests for outlier detection and removal functions --- freqtrade/freqai/data_drawer.py | 3 +- freqtrade/freqai/data_kitchen.py | 2 +- tests/freqai/conftest.py | 47 ++++++++++++++++++++++++- tests/freqai/test_freqai_datakitchen.py | 31 ++++++++++++++-- 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index b3060deff..477b9e098 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -566,7 +566,7 @@ class FreqaiDataDrawer: for training according to user defined train_period_days metadata: dict = strategy furnished pair metadata """ - + import pytest with self.history_lock: corr_dataframes: Dict[Any, Any] = {} base_dataframes: Dict[Any, Any] = {} @@ -576,6 +576,7 @@ class FreqaiDataDrawer: ) for tf in self.freqai_info["feature_parameters"].get("include_timeframes"): + # pytest.set_trace() base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf]) if pairs: for p in pairs: diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index ed3990de0..8e68c9a38 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -657,7 +657,7 @@ class FreqaiDataKitchen: return (x, y) MinPts = int(len(self.data_dictionary['train_features'].index) * 0.25) - # measure pairwise distances to train_features.shape[1]*2 nearest neighbours + # measure pairwise distances to nearest neighbours neighbors = NearestNeighbors( n_neighbors=MinPts, n_jobs=self.thread_count) neighbors_fit = neighbors.fit(self.data_dictionary['train_features']) diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index 6ace13677..98f086ec9 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -2,7 +2,7 @@ from copy import deepcopy from pathlib import Path import pytest - +from unittest.mock import MagicMock from freqtrade.configuration import TimeRange from freqtrade.data.dataprovider import DataProvider from freqtrade.freqai.data_drawer import FreqaiDataDrawer @@ -81,6 +81,51 @@ def get_patched_freqaimodel(mocker, freqaiconf): return freqaimodel +def make_data_dictionary(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + freqai.dk.pair = "ADA/BTC" + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( + data_load_timerange, freqai.dk.pair, freqai.dk + ) + + unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators( + strategy, corr_dataframes, base_dataframes, freqai.dk.pair + ) + + unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe) + + freqai.dk.find_features(unfiltered_dataframe) + + features_filtered, labels_filtered = freqai.dk.filter_features( + unfiltered_dataframe, + freqai.dk.training_features_list, + freqai.dk.label_list, + training_filter=True, + ) + + data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered) + + data_dictionary = freqai.dk.normalize_data(data_dictionary) + + return freqai + + def get_freqai_live_analyzed_dataframe(mocker, freqaiconf): strategy = get_patched_freqai_strategy(mocker, freqaiconf) exchange = get_patched_exchange(mocker, freqaiconf) diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 9f2a2f71e..581286715 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -5,8 +5,8 @@ from pathlib import Path import pytest from freqtrade.exceptions import OperationalException -from tests.freqai.conftest import get_patched_data_kitchen - +from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary +from tests.conftest import log_has_re @pytest.mark.parametrize( "timerange, train_period_days, expected_result", @@ -66,3 +66,30 @@ def test_check_if_model_expired(mocker, freqai_conf, timestamp, expected): dk = get_patched_data_kitchen(mocker, freqai_conf) assert dk.check_if_model_expired(timestamp) == expected shutil.rmtree(Path(dk.full_path)) + + +def test_use_DBSCAN_to_remove_outliers(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + # freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 1}) + freqai.dk.use_DBSCAN_to_remove_outliers(predict=False) + assert log_has_re( + "DBSCAN found eps of 2.42.", + caplog, + ) + + +def test_compute_distances(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"DI_threshold": 1}) + avg_mean_dist = freqai.dk.compute_distances() + assert round(avg_mean_dist, 2) == 2.56 + + +def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"outlier_protection_percentage": 0.1}) + freqai.dk.use_SVM_to_remove_outliers(predict=False) + assert log_has_re( + "SVM detected 8.46%", + caplog, + )