add shuffle_after_split option

This commit is contained in:
robcaulk 2022-12-16 11:20:37 +01:00
parent 581a5296cc
commit c9bc91c75b
2 changed files with 25 additions and 9 deletions

View File

@ -1,5 +1,6 @@
import copy import copy
import logging import logging
import random
import shutil import shutil
from datetime import datetime, timezone from datetime import datetime, timezone
from math import cos, sin from math import cos, sin
@ -168,6 +169,19 @@ class FreqaiDataKitchen:
train_labels = labels train_labels = labels
train_weights = weights train_weights = weights
if feat_dict.get("shuffle_after_split", False):
rint1 = random.randint(0, 100)
rint2 = random.randint(0, 100)
train_features = train_features.sample(
frac=1, random_state=rint1).reset_index(drop=True)
train_labels = train_labels.sample(frac=1, random_state=rint1).reset_index(drop=True)
train_weights = pd.DataFrame(train_weights).sample(
frac=1, random_state=rint1).reset_index(drop=True).to_numpy()[:, 0]
test_features = test_features.sample(frac=1, random_state=rint2).reset_index(drop=True)
test_labels = test_labels.sample(frac=1, random_state=rint2).reset_index(drop=True)
test_weights = pd.DataFrame(test_weights).sample(
frac=1, random_state=rint2).reset_index(drop=True).to_numpy()[:, 0]
# Simplest way to reverse the order of training and test data: # Simplest way to reverse the order of training and test data:
if self.freqai_config['feature_parameters'].get('reverse_train_test_order', False): if self.freqai_config['feature_parameters'].get('reverse_train_test_order', False):
return self.build_data_dictionary( return self.build_data_dictionary(

View File

@ -27,16 +27,17 @@ def is_mac() -> bool:
return "Darwin" in machine return "Darwin" in machine
@pytest.mark.parametrize('model, pca, dbscan, float32', [ @pytest.mark.parametrize('model, pca, dbscan, float32, shuffle', [
('LightGBMRegressor', True, False, True), ('LightGBMRegressor', True, False, True, False),
('XGBoostRegressor', False, True, False), ('XGBoostRegressor', False, True, False, False),
('XGBoostRFRegressor', False, False, False), ('XGBoostRFRegressor', False, False, False, False),
('CatboostRegressor', False, False, False), ('CatboostRegressor', False, False, False, True),
('ReinforcementLearner', False, True, False), ('ReinforcementLearner', False, True, False, False),
('ReinforcementLearner_multiproc', False, False, False), ('ReinforcementLearner_multiproc', False, False, False, False),
('ReinforcementLearner_test_4ac', False, False, False) ('ReinforcementLearner_test_4ac', False, False, False, False)
]) ])
def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan, float32): def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
dbscan, float32, shuffle):
if is_arm() and model == 'CatboostRegressor': if is_arm() and model == 'CatboostRegressor':
pytest.skip("CatBoost is not supported on ARM") pytest.skip("CatBoost is not supported on ARM")
@ -50,6 +51,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca}) freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca})
freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan}) freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan})
freqai_conf.update({"reduce_df_footprint": float32}) freqai_conf.update({"reduce_df_footprint": float32})
freqai_conf['freqai']['feature_parameters'].update({"shuffle_after_split": shuffle})
if 'ReinforcementLearner' in model: if 'ReinforcementLearner' in model:
model_save_ext = 'zip' model_save_ext = 'zip'