Merge pull request #7904 from freqtrade/feat/shuffle_after_split
add shuffle_after_split option
This commit is contained in:
commit
b0ec35d526
@ -45,6 +45,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
|
|||||||
| `noise_standard_deviation` | If set, FreqAI adds noise to the training features with the aim of preventing overfitting. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. `noise_standard_deviation` should be kept relative to the normalized space, i.e., between -1 and 1. In other words, since data in FreqAI is always normalized to be between -1 and 1, `noise_standard_deviation: 0.05` would result in 32% of the data being randomly increased/decreased by more than 2.5% (i.e., the percent of data falling within the first standard deviation). <br> **Datatype:** Integer. <br> Default: `0`.
|
| `noise_standard_deviation` | If set, FreqAI adds noise to the training features with the aim of preventing overfitting. FreqAI generates random deviates from a gaussian distribution with a standard deviation of `noise_standard_deviation` and adds them to all data points. `noise_standard_deviation` should be kept relative to the normalized space, i.e., between -1 and 1. In other words, since data in FreqAI is always normalized to be between -1 and 1, `noise_standard_deviation: 0.05` would result in 32% of the data being randomly increased/decreased by more than 2.5% (i.e., the percent of data falling within the first standard deviation). <br> **Datatype:** Integer. <br> Default: `0`.
|
||||||
| `outlier_protection_percentage` | Enable to prevent outlier detection methods from discarding too much data. If more than `outlier_protection_percentage` % of points are detected as outliers by the SVM or DBSCAN, FreqAI will log a warning message and ignore outlier detection, i.e., the original dataset will be kept intact. If the outlier protection is triggered, no predictions will be made based on the training dataset. <br> **Datatype:** Float. <br> Default: `30`.
|
| `outlier_protection_percentage` | Enable to prevent outlier detection methods from discarding too much data. If more than `outlier_protection_percentage` % of points are detected as outliers by the SVM or DBSCAN, FreqAI will log a warning message and ignore outlier detection, i.e., the original dataset will be kept intact. If the outlier protection is triggered, no predictions will be made based on the training dataset. <br> **Datatype:** Float. <br> Default: `30`.
|
||||||
| `reverse_train_test_order` | Split the feature dataset (see below) and use the latest data split for training and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, you should be careful to understand the unorthodox nature of this parameter before employing it. <br> **Datatype:** Boolean. <br> Default: `False` (no reversal).
|
| `reverse_train_test_order` | Split the feature dataset (see below) and use the latest data split for training and test on historical split of the data. This allows the model to be trained up to the most recent data point, while avoiding overfitting. However, you should be careful to understand the unorthodox nature of this parameter before employing it. <br> **Datatype:** Boolean. <br> Default: `False` (no reversal).
|
||||||
|
| `shuffle_after_split` | Split the data into train and test sets, and then shuffle both sets individually. <br> **Datatype:** Boolean. <br> Default: `False`.
|
||||||
|
|
||||||
### Data split parameters
|
### Data split parameters
|
||||||
|
|
||||||
|
@ -568,7 +568,8 @@ CONF_SCHEMA = {
|
|||||||
"shuffle": {"type": "boolean", "default": False},
|
"shuffle": {"type": "boolean", "default": False},
|
||||||
"nu": {"type": "number", "default": 0.1}
|
"nu": {"type": "number", "default": 0.1}
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
|
"shuffle_after_split": {"type": "boolean", "default": False}
|
||||||
},
|
},
|
||||||
"required": ["include_timeframes", "include_corr_pairlist", ]
|
"required": ["include_timeframes", "include_corr_pairlist", ]
|
||||||
},
|
},
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import inspect
|
import inspect
|
||||||
import logging
|
import logging
|
||||||
|
import random
|
||||||
import shutil
|
import shutil
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from math import cos, sin
|
from math import cos, sin
|
||||||
@ -170,6 +171,19 @@ class FreqaiDataKitchen:
|
|||||||
train_labels = labels
|
train_labels = labels
|
||||||
train_weights = weights
|
train_weights = weights
|
||||||
|
|
||||||
|
if feat_dict["shuffle_after_split"]:
|
||||||
|
rint1 = random.randint(0, 100)
|
||||||
|
rint2 = random.randint(0, 100)
|
||||||
|
train_features = train_features.sample(
|
||||||
|
frac=1, random_state=rint1).reset_index(drop=True)
|
||||||
|
train_labels = train_labels.sample(frac=1, random_state=rint1).reset_index(drop=True)
|
||||||
|
train_weights = pd.DataFrame(train_weights).sample(
|
||||||
|
frac=1, random_state=rint1).reset_index(drop=True).to_numpy()[:, 0]
|
||||||
|
test_features = test_features.sample(frac=1, random_state=rint2).reset_index(drop=True)
|
||||||
|
test_labels = test_labels.sample(frac=1, random_state=rint2).reset_index(drop=True)
|
||||||
|
test_weights = pd.DataFrame(test_weights).sample(
|
||||||
|
frac=1, random_state=rint2).reset_index(drop=True).to_numpy()[:, 0]
|
||||||
|
|
||||||
# Simplest way to reverse the order of training and test data:
|
# Simplest way to reverse the order of training and test data:
|
||||||
if self.freqai_config['feature_parameters'].get('reverse_train_test_order', False):
|
if self.freqai_config['feature_parameters'].get('reverse_train_test_order', False):
|
||||||
return self.build_data_dictionary(
|
return self.build_data_dictionary(
|
||||||
|
@ -46,6 +46,7 @@ def freqai_conf(default_conf, tmpdir):
|
|||||||
"use_SVM_to_remove_outliers": True,
|
"use_SVM_to_remove_outliers": True,
|
||||||
"stratify_training_data": 0,
|
"stratify_training_data": 0,
|
||||||
"indicator_periods_candles": [10],
|
"indicator_periods_candles": [10],
|
||||||
|
"shuffle_after_split": False
|
||||||
},
|
},
|
||||||
"data_split_parameters": {"test_size": 0.33, "shuffle": False},
|
"data_split_parameters": {"test_size": 0.33, "shuffle": False},
|
||||||
"model_training_parameters": {"n_estimators": 100},
|
"model_training_parameters": {"n_estimators": 100},
|
||||||
|
@ -27,19 +27,20 @@ def is_mac() -> bool:
|
|||||||
return "Darwin" in machine
|
return "Darwin" in machine
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('model, pca, dbscan, float32, can_short', [
|
@pytest.mark.parametrize('model, pca, dbscan, float32, can_short, shuffle', [
|
||||||
('LightGBMRegressor', True, False, True, True),
|
('LightGBMRegressor', True, False, True, True, False),
|
||||||
('XGBoostRegressor', False, True, False, True),
|
('XGBoostRegressor', False, True, False, True, False),
|
||||||
('XGBoostRFRegressor', False, False, False, True),
|
('XGBoostRFRegressor', False, False, False, True, False),
|
||||||
('CatboostRegressor', False, False, False, True),
|
('CatboostRegressor', False, False, False, True, True),
|
||||||
('ReinforcementLearner', False, True, False, True),
|
('ReinforcementLearner', False, True, False, True, False),
|
||||||
('ReinforcementLearner_multiproc', False, False, False, True),
|
('ReinforcementLearner_multiproc', False, False, False, True, False),
|
||||||
('ReinforcementLearner_test_3ac', False, False, False, False),
|
('ReinforcementLearner_test_3ac', False, False, False, False, False),
|
||||||
('ReinforcementLearner_test_3ac', False, False, False, True),
|
('ReinforcementLearner_test_3ac', False, False, False, True, False),
|
||||||
('ReinforcementLearner_test_4ac', False, False, False, True)
|
('ReinforcementLearner_test_4ac', False, False, False, True, False)
|
||||||
])
|
])
|
||||||
def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
|
def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
|
||||||
dbscan, float32, can_short):
|
dbscan, float32, can_short, shuffle):
|
||||||
|
|
||||||
if is_arm() and model == 'CatboostRegressor':
|
if is_arm() and model == 'CatboostRegressor':
|
||||||
pytest.skip("CatBoost is not supported on ARM")
|
pytest.skip("CatBoost is not supported on ARM")
|
||||||
|
|
||||||
@ -53,6 +54,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
|
|||||||
freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca})
|
freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca})
|
||||||
freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan})
|
freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan})
|
||||||
freqai_conf.update({"reduce_df_footprint": float32})
|
freqai_conf.update({"reduce_df_footprint": float32})
|
||||||
|
freqai_conf['freqai']['feature_parameters'].update({"shuffle_after_split": shuffle})
|
||||||
|
|
||||||
if 'ReinforcementLearner' in model:
|
if 'ReinforcementLearner' in model:
|
||||||
model_save_ext = 'zip'
|
model_save_ext = 'zip'
|
||||||
|
Loading…
Reference in New Issue
Block a user