From c9be66b5b639f151abb1d9a5e76267d752eddb3b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 3 Sep 2022 15:52:29 +0200 Subject: [PATCH] increase test coverage for dk, improve function naming, extra cleaning --- freqtrade/freqai/data_kitchen.py | 30 ++++++----- freqtrade/freqai/freqai_interface.py | 10 ++-- tests/freqai/conftest.py | 32 +++++++++++ tests/freqai/test_freqai_datakitchen.py | 72 ++++++++++++++++++++++++- tests/freqai/test_freqai_interface.py | 34 ++++++------ 5 files changed, 143 insertions(+), 35 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 9d4a69287..fce9e8480 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,7 +1,7 @@ import copy -import datetime import logging import shutil +from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Tuple @@ -345,7 +345,7 @@ class FreqaiDataKitchen: def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: """ - Normalize a set of data using the mean and standard deviation from + Denormalize a set of data using the mean and standard deviation from the associated training data. :param df: Dataframe of predictions to be denormalized """ @@ -384,7 +384,7 @@ class FreqaiDataKitchen: config_timerange = TimeRange.parse_timerange(self.config["timerange"]) if config_timerange.stopts == 0: config_timerange.stopts = int( - datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + datetime.now(tz=timezone.utc).timestamp() ) timerange_train = copy.deepcopy(full_timerange) timerange_backtest = copy.deepcopy(full_timerange) @@ -401,8 +401,8 @@ class FreqaiDataKitchen: timerange_train.stopts = timerange_train.startts + train_period_days first = False - start = datetime.datetime.utcfromtimestamp(timerange_train.startts) - stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts) + start = datetime.utcfromtimestamp(timerange_train.startts) + stop = datetime.utcfromtimestamp(timerange_train.stopts) tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_training_list_timerange.append(copy.deepcopy(timerange_train)) @@ -415,8 +415,8 @@ class FreqaiDataKitchen: if timerange_backtest.stopts > config_timerange.stopts: timerange_backtest.stopts = config_timerange.stopts - start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts) - stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts) + start = datetime.utcfromtimestamp(timerange_backtest.startts) + stop = datetime.utcfromtimestamp(timerange_backtest.stopts) tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest)) @@ -436,8 +436,8 @@ class FreqaiDataKitchen: it is sliced down to just the present training period. """ - start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) - stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) + start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) + stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) df = df.loc[df["date"] >= start, :] df = df.loc[df["date"] <= stop, :] @@ -808,6 +808,8 @@ class FreqaiDataKitchen: [compute_df, inlier_metric], axis=1) self.data_dictionary['prediction_features'].fillna(0, inplace=True) + logger.info('Inlier metric computed and added to features.') + return None def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10): @@ -948,14 +950,14 @@ class FreqaiDataKitchen: "Please indicate the end date of your desired backtesting. " "timerange.") # backtest_timerange.stopts = int( - # datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + # datetime.now(tz=timezone.utc).timestamp() # ) backtest_timerange.startts = ( backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY ) - start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts) - stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts) + start = datetime.utcfromtimestamp(backtest_timerange.startts) + stop = datetime.utcfromtimestamp(backtest_timerange.stopts) full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") self.full_path = Path( @@ -981,7 +983,7 @@ class FreqaiDataKitchen: :return: bool = If the model is expired or not. """ - time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + time = datetime.now(tz=timezone.utc).timestamp() elapsed_time = (time - trained_timestamp) / 3600 # hours max_time = self.freqai_config.get("expiration_hours", 0) if max_time > 0: @@ -993,7 +995,7 @@ class FreqaiDataKitchen: self, trained_timestamp: int ) -> Tuple[bool, TimeRange, TimeRange]: - time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() + time = datetime.now(tz=timezone.utc).timestamp() trained_timerange = TimeRange() data_load_timerange = TimeRange() diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 893f960ea..fd0554248 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -1,10 +1,10 @@ # import contextlib -import datetime import logging import shutil import threading import time from abc import ABC, abstractmethod +from datetime import datetime from pathlib import Path from threading import Lock from typing import Any, Dict, Tuple @@ -174,7 +174,7 @@ class IFreqaiModel(ABC): if retrain: self.train_timer('start') - self.train_model_in_series( + self.extract_data_and_train_model( new_trained_timerange, pair, strategy, dk, data_load_timerange ) self.train_timer('stop') @@ -214,10 +214,10 @@ class IFreqaiModel(ABC): dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) trained_timestamp = tr_train - tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime( + tr_train_startts_str = datetime.utcfromtimestamp(tr_train.startts).strftime( "%Y-%m-%d %H:%M:%S" ) - tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime( + tr_train_stopts_str = datetime.utcfromtimestamp(tr_train.stopts).strftime( "%Y-%m-%d %H:%M:%S" ) logger.info( @@ -495,7 +495,7 @@ class IFreqaiModel(ABC): Path(self.full_path, Path(self.config["config_files"][0]).name), ) - def train_model_in_series( + def extract_data_and_train_model( self, new_trained_timerange: TimeRange, pair: str, diff --git a/tests/freqai/conftest.py b/tests/freqai/conftest.py index dd148da77..6528347e8 100644 --- a/tests/freqai/conftest.py +++ b/tests/freqai/conftest.py @@ -82,6 +82,38 @@ def get_patched_freqaimodel(mocker, freqaiconf): return freqaimodel +def make_unfiltered_dataframe(mocker, freqai_conf): + freqai_conf.update({"timerange": "20180110-20180130"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + freqai.dk.pair = "ADA/BTC" + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( + data_load_timerange, freqai.dk.pair, freqai.dk + ) + + unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators( + strategy, corr_dataframes, base_dataframes, freqai.dk.pair + ) + + unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe) + + return freqai, unfiltered_dataframe + + def make_data_dictionary(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) diff --git a/tests/freqai/test_freqai_datakitchen.py b/tests/freqai/test_freqai_datakitchen.py index 9ef955695..2204e94c6 100644 --- a/tests/freqai/test_freqai_datakitchen.py +++ b/tests/freqai/test_freqai_datakitchen.py @@ -6,7 +6,8 @@ import pytest from freqtrade.exceptions import OperationalException from tests.conftest import log_has_re -from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary +from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary, + make_unfiltered_dataframe) @pytest.mark.parametrize( @@ -94,3 +95,72 @@ def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, "SVM detected 8.46%", caplog, ) + + +def test_compute_inlier_metric(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10}) + freqai.dk.compute_inlier_metric(set_='train') + assert log_has_re( + "Inlier metric computed and added to features.", + caplog, + ) + + +def test_add_noise_to_training_features(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1}) + freqai.dk.add_noise_to_training_features() + + +def test_remove_beginning_points_from_data_dict(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai.dk.remove_beginning_points_from_data_dict(set_='train') + + +def test_principal_component_analysis(mocker, freqai_conf, caplog): + freqai = make_data_dictionary(mocker, freqai_conf) + freqai.dk.principal_component_analysis() + assert log_has_re( + "reduced feature dimension by", + caplog, + ) + + +def test_normalize_data(mocker, freqai_conf): + freqai = make_data_dictionary(mocker, freqai_conf) + data_dict = freqai.dk.data_dictionary + freqai.dk.normalize_data(data_dict) + assert len(freqai.dk.data) == 56 + + +def test_filter_features(mocker, freqai_conf): + freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + freqai.dk.find_features(unfiltered_dataframe) + + filtered_df, labels = freqai.dk.filter_features( + unfiltered_dataframe, + freqai.dk.training_features_list, + freqai.dk.label_list, + training_filter=True, + ) + + assert len(filtered_df.columns) == 26 + + +def test_make_train_test_datasets(mocker, freqai_conf): + freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf) + freqai.dk.find_features(unfiltered_dataframe) + + features_filtered, labels_filtered = freqai.dk.filter_features( + unfiltered_dataframe, + freqai.dk.training_features_list, + freqai.dk.label_list, + training_filter=True, + ) + + data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered) + + assert data_dictionary + assert len(data_dictionary) == 7 + assert len(data_dictionary['train_features'].index) == 1916 diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 792ffc467..927af2a02 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -17,7 +17,7 @@ def is_arm() -> bool: return "arm" in machine or "aarch64" in machine -def test_train_model_in_series_LightGBM(mocker, freqai_conf): +def test_extract_data_and_train_model_LightGBM(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) @@ -35,7 +35,8 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() @@ -45,7 +46,7 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): +def test_extract_data_and_train_model_LightGBMMultiModel(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) freqai_conf.update({"freqaimodel": "LightGBMRegressorMultiTarget"}) @@ -64,7 +65,8 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert len(freqai.dk.label_list) == 2 assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() @@ -77,7 +79,7 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") -def test_train_model_in_series_Catboost(mocker, freqai_conf): +def test_extract_data_and_train_model_Catboost(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "CatboostRegressor"}) # freqai_conf.get('freqai', {}).update( @@ -98,8 +100,8 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", - strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() @@ -110,7 +112,7 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf): @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") -def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): +def test_extract_data_and_train_model_CatboostClassifier(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "CatboostClassifier"}) freqai_conf.update({"strategy": "freqai_test_classifier"}) @@ -130,8 +132,8 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", - strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() @@ -141,7 +143,7 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) -def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): +def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf): freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"freqaimodel": "LightGBMClassifier"}) freqai_conf.update({"strategy": "freqai_test_classifier"}) @@ -161,8 +163,8 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", - strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", + strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() @@ -289,7 +291,8 @@ def test_follow_mode(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() @@ -338,7 +341,8 @@ def test_principal_component_analysis(mocker, freqai_conf): data_load_timerange = TimeRange.parse_timerange("20180110-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130") - freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl")