increase test coverage for dk, improve function naming, extra cleaning

This commit is contained in:
robcaulk 2022-09-03 15:52:29 +02:00
parent 7e8e29e42d
commit c9be66b5b6
5 changed files with 143 additions and 35 deletions

View File

@ -1,7 +1,7 @@
import copy import copy
import datetime
import logging import logging
import shutil import shutil
from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Tuple from typing import Any, Dict, List, Tuple
@ -345,7 +345,7 @@ class FreqaiDataKitchen:
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame: def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
""" """
Normalize a set of data using the mean and standard deviation from Denormalize a set of data using the mean and standard deviation from
the associated training data. the associated training data.
:param df: Dataframe of predictions to be denormalized :param df: Dataframe of predictions to be denormalized
""" """
@ -384,7 +384,7 @@ class FreqaiDataKitchen:
config_timerange = TimeRange.parse_timerange(self.config["timerange"]) config_timerange = TimeRange.parse_timerange(self.config["timerange"])
if config_timerange.stopts == 0: if config_timerange.stopts == 0:
config_timerange.stopts = int( config_timerange.stopts = int(
datetime.datetime.now(tz=datetime.timezone.utc).timestamp() datetime.now(tz=timezone.utc).timestamp()
) )
timerange_train = copy.deepcopy(full_timerange) timerange_train = copy.deepcopy(full_timerange)
timerange_backtest = copy.deepcopy(full_timerange) timerange_backtest = copy.deepcopy(full_timerange)
@ -401,8 +401,8 @@ class FreqaiDataKitchen:
timerange_train.stopts = timerange_train.startts + train_period_days timerange_train.stopts = timerange_train.startts + train_period_days
first = False first = False
start = datetime.datetime.utcfromtimestamp(timerange_train.startts) start = datetime.utcfromtimestamp(timerange_train.startts)
stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts) stop = datetime.utcfromtimestamp(timerange_train.stopts)
tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_training_list_timerange.append(copy.deepcopy(timerange_train)) tr_training_list_timerange.append(copy.deepcopy(timerange_train))
@ -415,8 +415,8 @@ class FreqaiDataKitchen:
if timerange_backtest.stopts > config_timerange.stopts: if timerange_backtest.stopts > config_timerange.stopts:
timerange_backtest.stopts = config_timerange.stopts timerange_backtest.stopts = config_timerange.stopts
start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts) start = datetime.utcfromtimestamp(timerange_backtest.startts)
stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts) stop = datetime.utcfromtimestamp(timerange_backtest.stopts)
tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")) tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest)) tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))
@ -436,8 +436,8 @@ class FreqaiDataKitchen:
it is sliced down to just the present training period. it is sliced down to just the present training period.
""" """
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df["date"] >= start, :] df = df.loc[df["date"] >= start, :]
df = df.loc[df["date"] <= stop, :] df = df.loc[df["date"] <= stop, :]
@ -808,6 +808,8 @@ class FreqaiDataKitchen:
[compute_df, inlier_metric], axis=1) [compute_df, inlier_metric], axis=1)
self.data_dictionary['prediction_features'].fillna(0, inplace=True) self.data_dictionary['prediction_features'].fillna(0, inplace=True)
logger.info('Inlier metric computed and added to features.')
return None return None
def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10): def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
@ -948,14 +950,14 @@ class FreqaiDataKitchen:
"Please indicate the end date of your desired backtesting. " "Please indicate the end date of your desired backtesting. "
"timerange.") "timerange.")
# backtest_timerange.stopts = int( # backtest_timerange.stopts = int(
# datetime.datetime.now(tz=datetime.timezone.utc).timestamp() # datetime.now(tz=timezone.utc).timestamp()
# ) # )
backtest_timerange.startts = ( backtest_timerange.startts = (
backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
) )
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts) start = datetime.utcfromtimestamp(backtest_timerange.startts)
stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts) stop = datetime.utcfromtimestamp(backtest_timerange.stopts)
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d") full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
self.full_path = Path( self.full_path = Path(
@ -981,7 +983,7 @@ class FreqaiDataKitchen:
:return: :return:
bool = If the model is expired or not. bool = If the model is expired or not.
""" """
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() time = datetime.now(tz=timezone.utc).timestamp()
elapsed_time = (time - trained_timestamp) / 3600 # hours elapsed_time = (time - trained_timestamp) / 3600 # hours
max_time = self.freqai_config.get("expiration_hours", 0) max_time = self.freqai_config.get("expiration_hours", 0)
if max_time > 0: if max_time > 0:
@ -993,7 +995,7 @@ class FreqaiDataKitchen:
self, trained_timestamp: int self, trained_timestamp: int
) -> Tuple[bool, TimeRange, TimeRange]: ) -> Tuple[bool, TimeRange, TimeRange]:
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp() time = datetime.now(tz=timezone.utc).timestamp()
trained_timerange = TimeRange() trained_timerange = TimeRange()
data_load_timerange = TimeRange() data_load_timerange = TimeRange()

View File

@ -1,10 +1,10 @@
# import contextlib # import contextlib
import datetime
import logging import logging
import shutil import shutil
import threading import threading
import time import time
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from datetime import datetime
from pathlib import Path from pathlib import Path
from threading import Lock from threading import Lock
from typing import Any, Dict, Tuple from typing import Any, Dict, Tuple
@ -174,7 +174,7 @@ class IFreqaiModel(ABC):
if retrain: if retrain:
self.train_timer('start') self.train_timer('start')
self.train_model_in_series( self.extract_data_and_train_model(
new_trained_timerange, pair, strategy, dk, data_load_timerange new_trained_timerange, pair, strategy, dk, data_load_timerange
) )
self.train_timer('stop') self.train_timer('stop')
@ -214,10 +214,10 @@ class IFreqaiModel(ABC):
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe) dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train trained_timestamp = tr_train
tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime( tr_train_startts_str = datetime.utcfromtimestamp(tr_train.startts).strftime(
"%Y-%m-%d %H:%M:%S" "%Y-%m-%d %H:%M:%S"
) )
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime( tr_train_stopts_str = datetime.utcfromtimestamp(tr_train.stopts).strftime(
"%Y-%m-%d %H:%M:%S" "%Y-%m-%d %H:%M:%S"
) )
logger.info( logger.info(
@ -495,7 +495,7 @@ class IFreqaiModel(ABC):
Path(self.full_path, Path(self.config["config_files"][0]).name), Path(self.full_path, Path(self.config["config_files"][0]).name),
) )
def train_model_in_series( def extract_data_and_train_model(
self, self,
new_trained_timerange: TimeRange, new_trained_timerange: TimeRange,
pair: str, pair: str,

View File

@ -82,6 +82,38 @@ def get_patched_freqaimodel(mocker, freqaiconf):
return freqaimodel return freqaimodel
def make_unfiltered_dataframe(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
freqai.dk.pair = "ADA/BTC"
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes(
data_load_timerange, freqai.dk.pair, freqai.dk
)
unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators(
strategy, corr_dataframes, base_dataframes, freqai.dk.pair
)
unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe)
return freqai, unfiltered_dataframe
def make_data_dictionary(mocker, freqai_conf): def make_data_dictionary(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})

View File

@ -6,7 +6,8 @@ import pytest
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from tests.conftest import log_has_re from tests.conftest import log_has_re
from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary,
make_unfiltered_dataframe)
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -94,3 +95,72 @@ def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf,
"SVM detected 8.46%", "SVM detected 8.46%",
caplog, caplog,
) )
def test_compute_inlier_metric(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10})
freqai.dk.compute_inlier_metric(set_='train')
assert log_has_re(
"Inlier metric computed and added to features.",
caplog,
)
def test_add_noise_to_training_features(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1})
freqai.dk.add_noise_to_training_features()
def test_remove_beginning_points_from_data_dict(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai.dk.remove_beginning_points_from_data_dict(set_='train')
def test_principal_component_analysis(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai.dk.principal_component_analysis()
assert log_has_re(
"reduced feature dimension by",
caplog,
)
def test_normalize_data(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
data_dict = freqai.dk.data_dictionary
freqai.dk.normalize_data(data_dict)
assert len(freqai.dk.data) == 56
def test_filter_features(mocker, freqai_conf):
freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
freqai.dk.find_features(unfiltered_dataframe)
filtered_df, labels = freqai.dk.filter_features(
unfiltered_dataframe,
freqai.dk.training_features_list,
freqai.dk.label_list,
training_filter=True,
)
assert len(filtered_df.columns) == 26
def test_make_train_test_datasets(mocker, freqai_conf):
freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
freqai.dk.find_features(unfiltered_dataframe)
features_filtered, labels_filtered = freqai.dk.filter_features(
unfiltered_dataframe,
freqai.dk.training_features_list,
freqai.dk.label_list,
training_filter=True,
)
data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered)
assert data_dictionary
assert len(data_dictionary) == 7
assert len(data_dictionary['train_features'].index) == 1916

View File

@ -17,7 +17,7 @@ def is_arm() -> bool:
return "arm" in machine or "aarch64" in machine return "arm" in machine or "aarch64" in machine
def test_train_model_in_series_LightGBM(mocker, freqai_conf): def test_extract_data_and_train_model_LightGBM(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf) strategy = get_patched_freqai_strategy(mocker, freqai_conf)
@ -35,7 +35,8 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
@ -45,7 +46,7 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path)) shutil.rmtree(Path(freqai.dk.full_path))
def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf): def test_extract_data_and_train_model_LightGBMMultiModel(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"strategy": "freqai_test_multimodel_strat"}) freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
freqai_conf.update({"freqaimodel": "LightGBMRegressorMultiTarget"}) freqai_conf.update({"freqaimodel": "LightGBMRegressorMultiTarget"})
@ -64,7 +65,8 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert len(freqai.dk.label_list) == 2 assert len(freqai.dk.label_list) == 2
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
@ -77,7 +79,7 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
@pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...")
def test_train_model_in_series_Catboost(mocker, freqai_conf): def test_extract_data_and_train_model_Catboost(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "CatboostRegressor"}) freqai_conf.update({"freqaimodel": "CatboostRegressor"})
# freqai_conf.get('freqai', {}).update( # freqai_conf.get('freqai', {}).update(
@ -98,7 +100,7 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange) strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
@ -110,7 +112,7 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
@pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...") @pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...")
def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf): def test_extract_data_and_train_model_CatboostClassifier(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "CatboostClassifier"}) freqai_conf.update({"freqaimodel": "CatboostClassifier"})
freqai_conf.update({"strategy": "freqai_test_classifier"}) freqai_conf.update({"strategy": "freqai_test_classifier"})
@ -130,7 +132,7 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange) strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
@ -141,7 +143,7 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path)) shutil.rmtree(Path(freqai.dk.full_path))
def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf): def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "LightGBMClassifier"}) freqai_conf.update({"freqaimodel": "LightGBMClassifier"})
freqai_conf.update({"strategy": "freqai_test_classifier"}) freqai_conf.update({"strategy": "freqai_test_classifier"})
@ -161,7 +163,7 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange) strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
@ -289,7 +291,8 @@ def test_follow_mode(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file() assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
@ -338,7 +341,8 @@ def test_principal_component_analysis(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130") data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130") new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl") assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl")