increase test coverage for dk, improve function naming, extra cleaning
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| import copy | ||||
| import datetime | ||||
| import logging | ||||
| import shutil | ||||
| from datetime import datetime, timezone | ||||
| from pathlib import Path | ||||
| from typing import Any, Dict, List, Tuple | ||||
|  | ||||
| @@ -345,7 +345,7 @@ class FreqaiDataKitchen: | ||||
|  | ||||
|     def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: | ||||
|         """ | ||||
|         Normalize a set of data using the mean and standard deviation from | ||||
|         Denormalize a set of data using the mean and standard deviation from | ||||
|         the associated training data. | ||||
|         :param df: Dataframe of predictions to be denormalized | ||||
|         """ | ||||
| @@ -384,7 +384,7 @@ class FreqaiDataKitchen: | ||||
|         config_timerange = TimeRange.parse_timerange(self.config["timerange"]) | ||||
|         if config_timerange.stopts == 0: | ||||
|             config_timerange.stopts = int( | ||||
|                 datetime.datetime.now(tz=datetime.timezone.utc).timestamp() | ||||
|                 datetime.now(tz=timezone.utc).timestamp() | ||||
|             ) | ||||
|         timerange_train = copy.deepcopy(full_timerange) | ||||
|         timerange_backtest = copy.deepcopy(full_timerange) | ||||
| @@ -401,8 +401,8 @@ class FreqaiDataKitchen: | ||||
|             timerange_train.stopts = timerange_train.startts + train_period_days | ||||
|  | ||||
|             first = False | ||||
|             start = datetime.datetime.utcfromtimestamp(timerange_train.startts) | ||||
|             stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts) | ||||
|             start = datetime.utcfromtimestamp(timerange_train.startts) | ||||
|             stop = datetime.utcfromtimestamp(timerange_train.stopts) | ||||
|             tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) | ||||
|             tr_training_list_timerange.append(copy.deepcopy(timerange_train)) | ||||
|  | ||||
| @@ -415,8 +415,8 @@ class FreqaiDataKitchen: | ||||
|             if timerange_backtest.stopts > config_timerange.stopts: | ||||
|                 timerange_backtest.stopts = config_timerange.stopts | ||||
|  | ||||
|             start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts) | ||||
|             stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts) | ||||
|             start = datetime.utcfromtimestamp(timerange_backtest.startts) | ||||
|             stop = datetime.utcfromtimestamp(timerange_backtest.stopts) | ||||
|             tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) | ||||
|             tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest)) | ||||
|  | ||||
| @@ -436,8 +436,8 @@ class FreqaiDataKitchen: | ||||
|                    it is sliced down to just the present training period. | ||||
|         """ | ||||
|  | ||||
|         start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) | ||||
|         stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) | ||||
|         start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) | ||||
|         stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) | ||||
|         df = df.loc[df["date"] >= start, :] | ||||
|         df = df.loc[df["date"] <= stop, :] | ||||
|  | ||||
| @@ -808,6 +808,8 @@ class FreqaiDataKitchen: | ||||
|                 [compute_df, inlier_metric], axis=1) | ||||
|             self.data_dictionary['prediction_features'].fillna(0, inplace=True) | ||||
|  | ||||
|         logger.info('Inlier metric computed and added to features.') | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10): | ||||
| @@ -948,14 +950,14 @@ class FreqaiDataKitchen: | ||||
|                                        "Please indicate the end date of your desired backtesting. " | ||||
|                                        "timerange.") | ||||
|             # backtest_timerange.stopts = int( | ||||
|             #     datetime.datetime.now(tz=datetime.timezone.utc).timestamp() | ||||
|             #     datetime.now(tz=timezone.utc).timestamp() | ||||
|             # ) | ||||
|  | ||||
|         backtest_timerange.startts = ( | ||||
|             backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY | ||||
|         ) | ||||
|         start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts) | ||||
|         stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts) | ||||
|         start = datetime.utcfromtimestamp(backtest_timerange.startts) | ||||
|         stop = datetime.utcfromtimestamp(backtest_timerange.stopts) | ||||
|         full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") | ||||
|  | ||||
|         self.full_path = Path( | ||||
| @@ -981,7 +983,7 @@ class FreqaiDataKitchen: | ||||
|         :return: | ||||
|             bool = If the model is expired or not. | ||||
|         """ | ||||
|         time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() | ||||
|         time = datetime.now(tz=timezone.utc).timestamp() | ||||
|         elapsed_time = (time - trained_timestamp) / 3600  # hours | ||||
|         max_time = self.freqai_config.get("expiration_hours", 0) | ||||
|         if max_time > 0: | ||||
| @@ -993,7 +995,7 @@ class FreqaiDataKitchen: | ||||
|         self, trained_timestamp: int | ||||
|     ) -> Tuple[bool, TimeRange, TimeRange]: | ||||
|  | ||||
|         time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() | ||||
|         time = datetime.now(tz=timezone.utc).timestamp() | ||||
|         trained_timerange = TimeRange() | ||||
|         data_load_timerange = TimeRange() | ||||
|  | ||||
|   | ||||
| @@ -1,10 +1,10 @@ | ||||
| # import contextlib | ||||
| import datetime | ||||
| import logging | ||||
| import shutil | ||||
| import threading | ||||
| import time | ||||
| from abc import ABC, abstractmethod | ||||
| from datetime import datetime | ||||
| from pathlib import Path | ||||
| from threading import Lock | ||||
| from typing import Any, Dict, Tuple | ||||
| @@ -174,7 +174,7 @@ class IFreqaiModel(ABC): | ||||
|  | ||||
|                 if retrain: | ||||
|                     self.train_timer('start') | ||||
|                     self.train_model_in_series( | ||||
|                     self.extract_data_and_train_model( | ||||
|                         new_trained_timerange, pair, strategy, dk, data_load_timerange | ||||
|                     ) | ||||
|                     self.train_timer('stop') | ||||
| @@ -214,10 +214,10 @@ class IFreqaiModel(ABC): | ||||
|             dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) | ||||
|  | ||||
|             trained_timestamp = tr_train | ||||
|             tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime( | ||||
|             tr_train_startts_str = datetime.utcfromtimestamp(tr_train.startts).strftime( | ||||
|                 "%Y-%m-%d %H:%M:%S" | ||||
|             ) | ||||
|             tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime( | ||||
|             tr_train_stopts_str = datetime.utcfromtimestamp(tr_train.stopts).strftime( | ||||
|                 "%Y-%m-%d %H:%M:%S" | ||||
|             ) | ||||
|             logger.info( | ||||
| @@ -495,7 +495,7 @@ class IFreqaiModel(ABC): | ||||
|             Path(self.full_path, Path(self.config["config_files"][0]).name), | ||||
|         ) | ||||
|  | ||||
|     def train_model_in_series( | ||||
|     def extract_data_and_train_model( | ||||
|         self, | ||||
|         new_trained_timerange: TimeRange, | ||||
|         pair: str, | ||||
|   | ||||
| @@ -82,6 +82,38 @@ def get_patched_freqaimodel(mocker, freqaiconf): | ||||
|     return freqaimodel | ||||
|  | ||||
|  | ||||
| def make_unfiltered_dataframe(mocker, freqai_conf): | ||||
|     freqai_conf.update({"timerange": "20180110-20180130"}) | ||||
|  | ||||
|     strategy = get_patched_freqai_strategy(mocker, freqai_conf) | ||||
|     exchange = get_patched_exchange(mocker, freqai_conf) | ||||
|     strategy.dp = DataProvider(freqai_conf, exchange) | ||||
|     strategy.freqai_info = freqai_conf.get("freqai", {}) | ||||
|     freqai = strategy.freqai | ||||
|     freqai.live = True | ||||
|     freqai.dk = FreqaiDataKitchen(freqai_conf) | ||||
|     freqai.dk.pair = "ADA/BTC" | ||||
|     timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     freqai.dd.load_all_pair_histories(timerange, freqai.dk) | ||||
|  | ||||
|     freqai.dd.pair_dict = MagicMock() | ||||
|  | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes( | ||||
|             data_load_timerange, freqai.dk.pair, freqai.dk | ||||
|         ) | ||||
|  | ||||
|     unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators( | ||||
|                 strategy, corr_dataframes, base_dataframes, freqai.dk.pair | ||||
|             ) | ||||
|  | ||||
|     unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe) | ||||
|  | ||||
|     return freqai, unfiltered_dataframe | ||||
|  | ||||
|  | ||||
| def make_data_dictionary(mocker, freqai_conf): | ||||
|     freqai_conf.update({"timerange": "20180110-20180130"}) | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,8 @@ import pytest | ||||
|  | ||||
| from freqtrade.exceptions import OperationalException | ||||
| from tests.conftest import log_has_re | ||||
| from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary | ||||
| from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary, | ||||
|                                    make_unfiltered_dataframe) | ||||
|  | ||||
|  | ||||
| @pytest.mark.parametrize( | ||||
| @@ -94,3 +95,72 @@ def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf, | ||||
|         "SVM detected 8.46%", | ||||
|         caplog, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_compute_inlier_metric(mocker, freqai_conf, caplog): | ||||
|     freqai = make_data_dictionary(mocker, freqai_conf) | ||||
|     freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10}) | ||||
|     freqai.dk.compute_inlier_metric(set_='train') | ||||
|     assert log_has_re( | ||||
|         "Inlier metric computed and added to features.", | ||||
|         caplog, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_add_noise_to_training_features(mocker, freqai_conf): | ||||
|     freqai = make_data_dictionary(mocker, freqai_conf) | ||||
|     freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1}) | ||||
|     freqai.dk.add_noise_to_training_features() | ||||
|  | ||||
|  | ||||
| def test_remove_beginning_points_from_data_dict(mocker, freqai_conf): | ||||
|     freqai = make_data_dictionary(mocker, freqai_conf) | ||||
|     freqai.dk.remove_beginning_points_from_data_dict(set_='train') | ||||
|  | ||||
|  | ||||
| def test_principal_component_analysis(mocker, freqai_conf, caplog): | ||||
|     freqai = make_data_dictionary(mocker, freqai_conf) | ||||
|     freqai.dk.principal_component_analysis() | ||||
|     assert log_has_re( | ||||
|         "reduced feature dimension by", | ||||
|         caplog, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_normalize_data(mocker, freqai_conf): | ||||
|     freqai = make_data_dictionary(mocker, freqai_conf) | ||||
|     data_dict = freqai.dk.data_dictionary | ||||
|     freqai.dk.normalize_data(data_dict) | ||||
|     assert len(freqai.dk.data) == 56 | ||||
|  | ||||
|  | ||||
| def test_filter_features(mocker, freqai_conf): | ||||
|     freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf) | ||||
|     freqai.dk.find_features(unfiltered_dataframe) | ||||
|  | ||||
|     filtered_df, labels = freqai.dk.filter_features( | ||||
|             unfiltered_dataframe, | ||||
|             freqai.dk.training_features_list, | ||||
|             freqai.dk.label_list, | ||||
|             training_filter=True, | ||||
|     ) | ||||
|  | ||||
|     assert len(filtered_df.columns) == 26 | ||||
|  | ||||
|  | ||||
| def test_make_train_test_datasets(mocker, freqai_conf): | ||||
|     freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf) | ||||
|     freqai.dk.find_features(unfiltered_dataframe) | ||||
|  | ||||
|     features_filtered, labels_filtered = freqai.dk.filter_features( | ||||
|             unfiltered_dataframe, | ||||
|             freqai.dk.training_features_list, | ||||
|             freqai.dk.label_list, | ||||
|             training_filter=True, | ||||
|         ) | ||||
|  | ||||
|     data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered) | ||||
|  | ||||
|     assert data_dictionary | ||||
|     assert len(data_dictionary) == 7 | ||||
|     assert len(data_dictionary['train_features'].index) == 1916 | ||||
|   | ||||
| @@ -17,7 +17,7 @@ def is_arm() -> bool: | ||||
|     return "arm" in machine or "aarch64" in machine | ||||
|  | ||||
|  | ||||
| def test_train_model_in_series_LightGBM(mocker, freqai_conf): | ||||
| def test_extract_data_and_train_model_LightGBM(mocker, freqai_conf): | ||||
|     freqai_conf.update({"timerange": "20180110-20180130"}) | ||||
|  | ||||
|     strategy = get_patched_freqai_strategy(mocker, freqai_conf) | ||||
| @@ -35,7 +35,8 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf): | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|     freqai.extract_data_and_train_model( | ||||
|         new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|  | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() | ||||
| @@ -45,7 +46,7 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf): | ||||
|     shutil.rmtree(Path(freqai.dk.full_path)) | ||||
|  | ||||
|  | ||||
| def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): | ||||
| def test_extract_data_and_train_model_LightGBMMultiModel(mocker, freqai_conf): | ||||
|     freqai_conf.update({"timerange": "20180110-20180130"}) | ||||
|     freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) | ||||
|     freqai_conf.update({"freqaimodel": "LightGBMRegressorMultiTarget"}) | ||||
| @@ -64,7 +65,8 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|     freqai.extract_data_and_train_model( | ||||
|         new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|  | ||||
|     assert len(freqai.dk.label_list) == 2 | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() | ||||
| @@ -77,7 +79,7 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): | ||||
|  | ||||
|  | ||||
| @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") | ||||
| def test_train_model_in_series_Catboost(mocker, freqai_conf): | ||||
| def test_extract_data_and_train_model_Catboost(mocker, freqai_conf): | ||||
|     freqai_conf.update({"timerange": "20180110-20180130"}) | ||||
|     freqai_conf.update({"freqaimodel": "CatboostRegressor"}) | ||||
|     # freqai_conf.get('freqai', {}).update( | ||||
| @@ -98,8 +100,8 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf): | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     freqai.train_model_in_series(new_timerange, "ADA/BTC", | ||||
|                                  strategy, freqai.dk, data_load_timerange) | ||||
|     freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", | ||||
|                                         strategy, freqai.dk, data_load_timerange) | ||||
|  | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() | ||||
| @@ -110,7 +112,7 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf): | ||||
|  | ||||
|  | ||||
| @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") | ||||
| def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): | ||||
| def test_extract_data_and_train_model_CatboostClassifier(mocker, freqai_conf): | ||||
|     freqai_conf.update({"timerange": "20180110-20180130"}) | ||||
|     freqai_conf.update({"freqaimodel": "CatboostClassifier"}) | ||||
|     freqai_conf.update({"strategy": "freqai_test_classifier"}) | ||||
| @@ -130,8 +132,8 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     freqai.train_model_in_series(new_timerange, "ADA/BTC", | ||||
|                                  strategy, freqai.dk, data_load_timerange) | ||||
|     freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", | ||||
|                                         strategy, freqai.dk, data_load_timerange) | ||||
|  | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() | ||||
| @@ -141,7 +143,7 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): | ||||
|     shutil.rmtree(Path(freqai.dk.full_path)) | ||||
|  | ||||
|  | ||||
| def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): | ||||
| def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf): | ||||
|     freqai_conf.update({"timerange": "20180110-20180130"}) | ||||
|     freqai_conf.update({"freqaimodel": "LightGBMClassifier"}) | ||||
|     freqai_conf.update({"strategy": "freqai_test_classifier"}) | ||||
| @@ -161,8 +163,8 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     freqai.train_model_in_series(new_timerange, "ADA/BTC", | ||||
|                                  strategy, freqai.dk, data_load_timerange) | ||||
|     freqai.extract_data_and_train_model(new_timerange, "ADA/BTC", | ||||
|                                         strategy, freqai.dk, data_load_timerange) | ||||
|  | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists() | ||||
| @@ -289,7 +291,8 @@ def test_follow_mode(mocker, freqai_conf): | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|     freqai.extract_data_and_train_model( | ||||
|         new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|  | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() | ||||
| @@ -338,7 +341,8 @@ def test_principal_component_analysis(mocker, freqai_conf): | ||||
|     data_load_timerange = TimeRange.parse_timerange("20180110-20180130") | ||||
|     new_timerange = TimeRange.parse_timerange("20180120-20180130") | ||||
|  | ||||
|     freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|     freqai.extract_data_and_train_model( | ||||
|         new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) | ||||
|  | ||||
|     assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl") | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user