increase test coverage for dk, improve function naming, extra cleaning

This commit is contained in:
robcaulk 2022-09-03 15:52:29 +02:00
parent 7e8e29e42d
commit c9be66b5b6
5 changed files with 143 additions and 35 deletions

View File

@ -1,7 +1,7 @@
import copy
import datetime
import logging
import shutil
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Tuple
@ -345,7 +345,7 @@ class FreqaiDataKitchen:
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Normalize a set of data using the mean and standard deviation from
Denormalize a set of data using the mean and standard deviation from
the associated training data.
:param df: Dataframe of predictions to be denormalized
"""
@ -384,7 +384,7 @@ class FreqaiDataKitchen:
config_timerange = TimeRange.parse_timerange(self.config["timerange"])
if config_timerange.stopts == 0:
config_timerange.stopts = int(
datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
datetime.now(tz=timezone.utc).timestamp()
)
timerange_train = copy.deepcopy(full_timerange)
timerange_backtest = copy.deepcopy(full_timerange)
@ -401,8 +401,8 @@ class FreqaiDataKitchen:
timerange_train.stopts = timerange_train.startts + train_period_days
first = False
start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
start = datetime.utcfromtimestamp(timerange_train.startts)
stop = datetime.utcfromtimestamp(timerange_train.stopts)
tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_training_list_timerange.append(copy.deepcopy(timerange_train))
@ -415,8 +415,8 @@ class FreqaiDataKitchen:
if timerange_backtest.stopts > config_timerange.stopts:
timerange_backtest.stopts = config_timerange.stopts
start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
start = datetime.utcfromtimestamp(timerange_backtest.startts)
stop = datetime.utcfromtimestamp(timerange_backtest.stopts)
tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))
@ -436,8 +436,8 @@ class FreqaiDataKitchen:
it is sliced down to just the present training period.
"""
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df["date"] >= start, :]
df = df.loc[df["date"] <= stop, :]
@ -808,6 +808,8 @@ class FreqaiDataKitchen:
[compute_df, inlier_metric], axis=1)
self.data_dictionary['prediction_features'].fillna(0, inplace=True)
logger.info('Inlier metric computed and added to features.')
return None
def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
@ -948,14 +950,14 @@ class FreqaiDataKitchen:
"Please indicate the end date of your desired backtesting. "
"timerange.")
# backtest_timerange.stopts = int(
# datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
# datetime.now(tz=timezone.utc).timestamp()
# )
backtest_timerange.startts = (
backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
)
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
start = datetime.utcfromtimestamp(backtest_timerange.startts)
stop = datetime.utcfromtimestamp(backtest_timerange.stopts)
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
self.full_path = Path(
@ -981,7 +983,7 @@ class FreqaiDataKitchen:
:return:
bool = If the model is expired or not.
"""
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
time = datetime.now(tz=timezone.utc).timestamp()
elapsed_time = (time - trained_timestamp) / 3600 # hours
max_time = self.freqai_config.get("expiration_hours", 0)
if max_time > 0:
@ -993,7 +995,7 @@ class FreqaiDataKitchen:
self, trained_timestamp: int
) -> Tuple[bool, TimeRange, TimeRange]:
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
time = datetime.now(tz=timezone.utc).timestamp()
trained_timerange = TimeRange()
data_load_timerange = TimeRange()

View File

@ -1,10 +1,10 @@
# import contextlib
import datetime
import logging
import shutil
import threading
import time
from abc import ABC, abstractmethod
from datetime import datetime
from pathlib import Path
from threading import Lock
from typing import Any, Dict, Tuple
@ -174,7 +174,7 @@ class IFreqaiModel(ABC):
if retrain:
self.train_timer('start')
self.train_model_in_series(
self.extract_data_and_train_model(
new_trained_timerange, pair, strategy, dk, data_load_timerange
)
self.train_timer('stop')
@ -214,10 +214,10 @@ class IFreqaiModel(ABC):
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train
tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
tr_train_startts_str = datetime.utcfromtimestamp(tr_train.startts).strftime(
"%Y-%m-%d %H:%M:%S"
)
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
tr_train_stopts_str = datetime.utcfromtimestamp(tr_train.stopts).strftime(
"%Y-%m-%d %H:%M:%S"
)
logger.info(
@ -495,7 +495,7 @@ class IFreqaiModel(ABC):
Path(self.full_path, Path(self.config["config_files"][0]).name),
)
def train_model_in_series(
def extract_data_and_train_model(
self,
new_trained_timerange: TimeRange,
pair: str,

View File

@ -82,6 +82,38 @@ def get_patched_freqaimodel(mocker, freqaiconf):
return freqaimodel
def make_unfiltered_dataframe(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
freqai.dk.pair = "ADA/BTC"
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
corr_dataframes, base_dataframes = freqai.dd.get_base_and_corr_dataframes(
data_load_timerange, freqai.dk.pair, freqai.dk
)
unfiltered_dataframe = freqai.dk.use_strategy_to_populate_indicators(
strategy, corr_dataframes, base_dataframes, freqai.dk.pair
)
unfiltered_dataframe = freqai.dk.slice_dataframe(new_timerange, unfiltered_dataframe)
return freqai, unfiltered_dataframe
def make_data_dictionary(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})

View File

@ -6,7 +6,8 @@ import pytest
from freqtrade.exceptions import OperationalException
from tests.conftest import log_has_re
from tests.freqai.conftest import get_patched_data_kitchen, make_data_dictionary
from tests.freqai.conftest import (get_patched_data_kitchen, make_data_dictionary,
make_unfiltered_dataframe)
@pytest.mark.parametrize(
@ -94,3 +95,72 @@ def test_use_SVM_to_remove_outliers_and_outlier_protection(mocker, freqai_conf,
"SVM detected 8.46%",
caplog,
)
def test_compute_inlier_metric(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"inlier_metric_window": 10})
freqai.dk.compute_inlier_metric(set_='train')
assert log_has_re(
"Inlier metric computed and added to features.",
caplog,
)
def test_add_noise_to_training_features(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai_conf['freqai']['feature_parameters'].update({"noise_standard_deviation": 0.1})
freqai.dk.add_noise_to_training_features()
def test_remove_beginning_points_from_data_dict(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai.dk.remove_beginning_points_from_data_dict(set_='train')
def test_principal_component_analysis(mocker, freqai_conf, caplog):
freqai = make_data_dictionary(mocker, freqai_conf)
freqai.dk.principal_component_analysis()
assert log_has_re(
"reduced feature dimension by",
caplog,
)
def test_normalize_data(mocker, freqai_conf):
freqai = make_data_dictionary(mocker, freqai_conf)
data_dict = freqai.dk.data_dictionary
freqai.dk.normalize_data(data_dict)
assert len(freqai.dk.data) == 56
def test_filter_features(mocker, freqai_conf):
freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
freqai.dk.find_features(unfiltered_dataframe)
filtered_df, labels = freqai.dk.filter_features(
unfiltered_dataframe,
freqai.dk.training_features_list,
freqai.dk.label_list,
training_filter=True,
)
assert len(filtered_df.columns) == 26
def test_make_train_test_datasets(mocker, freqai_conf):
freqai, unfiltered_dataframe = make_unfiltered_dataframe(mocker, freqai_conf)
freqai.dk.find_features(unfiltered_dataframe)
features_filtered, labels_filtered = freqai.dk.filter_features(
unfiltered_dataframe,
freqai.dk.training_features_list,
freqai.dk.label_list,
training_filter=True,
)
data_dictionary = freqai.dk.make_train_test_datasets(features_filtered, labels_filtered)
assert data_dictionary
assert len(data_dictionary) == 7
assert len(data_dictionary['train_features'].index) == 1916

View File

@ -17,7 +17,7 @@ def is_arm() -> bool:
return "arm" in machine or "aarch64" in machine
def test_train_model_in_series_LightGBM(mocker, freqai_conf):
def test_extract_data_and_train_model_LightGBM(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
@ -35,7 +35,8 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
@ -45,7 +46,7 @@ def test_train_model_in_series_LightGBM(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path))
def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
def test_extract_data_and_train_model_LightGBMMultiModel(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"strategy": "freqai_test_multimodel_strat"})
freqai_conf.update({"freqaimodel": "LightGBMRegressorMultiTarget"})
@ -64,7 +65,8 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert len(freqai.dk.label_list) == 2
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
@ -77,7 +79,7 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
@pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...")
def test_train_model_in_series_Catboost(mocker, freqai_conf):
def test_extract_data_and_train_model_Catboost(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "CatboostRegressor"})
# freqai_conf.get('freqai', {}).update(
@ -98,8 +100,8 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
@ -110,7 +112,7 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
@pytest.mark.skipif(is_arm(), reason="no ARM for Catboost ...")
def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
def test_extract_data_and_train_model_CatboostClassifier(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "CatboostClassifier"})
freqai_conf.update({"strategy": "freqai_test_classifier"})
@ -130,8 +132,8 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
@ -141,7 +143,7 @@ def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path))
def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
def test_extract_data_and_train_model_LightGBMClassifier(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "LightGBMClassifier"})
freqai_conf.update({"strategy": "freqai_test_classifier"})
@ -161,8 +163,8 @@ def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
freqai.extract_data_and_train_model(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
@ -289,7 +291,8 @@ def test_follow_mode(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").is_file()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").is_file()
@ -338,7 +341,8 @@ def test_principal_component_analysis(mocker, freqai_conf):
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_pca_object.pkl")