From c9bc91c75b8414d70bbe6291497d068f5b9d355e Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 16 Dec 2022 11:20:37 +0100 Subject: [PATCH] add shuffle_after_split option --- freqtrade/freqai/data_kitchen.py | 14 ++++++++++++++ tests/freqai/test_freqai_interface.py | 20 +++++++++++--------- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 9c8158c8a..de6b74b21 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1,5 +1,6 @@ import copy import logging +import random import shutil from datetime import datetime, timezone from math import cos, sin @@ -168,6 +169,19 @@ class FreqaiDataKitchen: train_labels = labels train_weights = weights + if feat_dict.get("shuffle_after_split", False): + rint1 = random.randint(0, 100) + rint2 = random.randint(0, 100) + train_features = train_features.sample( + frac=1, random_state=rint1).reset_index(drop=True) + train_labels = train_labels.sample(frac=1, random_state=rint1).reset_index(drop=True) + train_weights = pd.DataFrame(train_weights).sample( + frac=1, random_state=rint1).reset_index(drop=True).to_numpy()[:, 0] + test_features = test_features.sample(frac=1, random_state=rint2).reset_index(drop=True) + test_labels = test_labels.sample(frac=1, random_state=rint2).reset_index(drop=True) + test_weights = pd.DataFrame(test_weights).sample( + frac=1, random_state=rint2).reset_index(drop=True).to_numpy()[:, 0] + # Simplest way to reverse the order of training and test data: if self.freqai_config['feature_parameters'].get('reverse_train_test_order', False): return self.build_data_dictionary( diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index f19acb018..fde167823 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -27,16 +27,17 @@ def is_mac() -> bool: return "Darwin" in machine -@pytest.mark.parametrize('model, pca, dbscan, float32', [ - ('LightGBMRegressor', True, False, True), - ('XGBoostRegressor', False, True, False), - ('XGBoostRFRegressor', False, False, False), - ('CatboostRegressor', False, False, False), - ('ReinforcementLearner', False, True, False), - ('ReinforcementLearner_multiproc', False, False, False), - ('ReinforcementLearner_test_4ac', False, False, False) +@pytest.mark.parametrize('model, pca, dbscan, float32, shuffle', [ + ('LightGBMRegressor', True, False, True, False), + ('XGBoostRegressor', False, True, False, False), + ('XGBoostRFRegressor', False, False, False, False), + ('CatboostRegressor', False, False, False, True), + ('ReinforcementLearner', False, True, False, False), + ('ReinforcementLearner_multiproc', False, False, False, False), + ('ReinforcementLearner_test_4ac', False, False, False, False) ]) -def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan, float32): +def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, + dbscan, float32, shuffle): if is_arm() and model == 'CatboostRegressor': pytest.skip("CatBoost is not supported on ARM") @@ -50,6 +51,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca}) freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan}) freqai_conf.update({"reduce_df_footprint": float32}) + freqai_conf['freqai']['feature_parameters'].update({"shuffle_after_split": shuffle}) if 'ReinforcementLearner' in model: model_save_ext = 'zip'