Merge branch 'develop' into backtest_live_models
This commit is contained in:
@@ -114,9 +114,8 @@ class FreqaiDataKitchen:
|
||||
) -> None:
|
||||
"""
|
||||
Set the paths to the data for the present coin/botloop
|
||||
:params:
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
trained_timestamp: int = timestamp of most recent training
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:param trained_timestamp: int = timestamp of most recent training
|
||||
"""
|
||||
self.full_path = freqai_util.get_full_models_path(self.config)
|
||||
self.data_path = Path(
|
||||
@@ -133,25 +132,20 @@ class FreqaiDataKitchen:
|
||||
Given the dataframe for the full history for training, split the data into
|
||||
training and test data according to user specified parameters in configuration
|
||||
file.
|
||||
:filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:labels: cleaned labels ready to be split.
|
||||
:param filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:param labels: cleaned labels ready to be split.
|
||||
"""
|
||||
feat_dict = self.freqai_config["feature_parameters"]
|
||||
|
||||
if 'shuffle' not in self.freqai_config['data_split_parameters']:
|
||||
self.freqai_config["data_split_parameters"].update({'shuffle': False})
|
||||
|
||||
weights: npt.ArrayLike
|
||||
if feat_dict.get("weight_factor", 0) > 0:
|
||||
weights = self.set_weights_higher_recent(len(filtered_dataframe))
|
||||
else:
|
||||
weights = np.ones(len(filtered_dataframe))
|
||||
|
||||
if feat_dict.get("stratify_training_data", 0) > 0:
|
||||
stratification = np.zeros(len(filtered_dataframe))
|
||||
for i in range(1, len(stratification)):
|
||||
if i % feat_dict.get("stratify_training_data", 0) == 0:
|
||||
stratification[i] = 1
|
||||
else:
|
||||
stratification = None
|
||||
|
||||
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
(
|
||||
train_features,
|
||||
@@ -164,7 +158,6 @@ class FreqaiDataKitchen:
|
||||
filtered_dataframe[: filtered_dataframe.shape[0]],
|
||||
labels,
|
||||
weights,
|
||||
stratify=stratification,
|
||||
**self.config["freqai"]["data_split_parameters"],
|
||||
)
|
||||
else:
|
||||
@@ -199,13 +192,14 @@ class FreqaiDataKitchen:
|
||||
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_df: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
|
||||
:param unfiltered_df: the full dataframe for the present training period
|
||||
:param training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified
|
||||
parameters in the configuration file.
|
||||
:param labels: the labels for the dataset
|
||||
:param training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
:returns:
|
||||
:filtered_df: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
@@ -214,7 +208,7 @@ class FreqaiDataKitchen:
|
||||
filtered_df = unfiltered_df.filter(training_feature_list, axis=1)
|
||||
filtered_df = filtered_df.replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
drop_index = pd.isnull(filtered_df).any(1) # get the rows that have NaNs,
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs,
|
||||
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
|
||||
if (training_filter):
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
@@ -225,7 +219,7 @@ class FreqaiDataKitchen:
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
labels = unfiltered_df.filter(label_list, axis=1)
|
||||
drop_index_labels = pd.isnull(labels).any(1)
|
||||
drop_index_labels = pd.isnull(labels).any(axis=1)
|
||||
drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
|
||||
dates = unfiltered_df['date']
|
||||
filtered_df = filtered_df[
|
||||
@@ -253,7 +247,7 @@ class FreqaiDataKitchen:
|
||||
else:
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(1)
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
self.data["filter_drop_index_prediction"] = drop_index
|
||||
filtered_df.fillna(0, inplace=True)
|
||||
# replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
|
||||
@@ -295,8 +289,8 @@ class FreqaiDataKitchen:
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:params:
|
||||
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
@@ -549,8 +543,7 @@ class FreqaiDataKitchen:
|
||||
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
|
||||
"""
|
||||
Use an existing pca transform to transform data into components
|
||||
:params:
|
||||
filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
:param filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
"""
|
||||
pca_components = self.pca.transform(filtered_dataframe)
|
||||
self.data_dictionary["prediction_features"] = pd.DataFrame(
|
||||
@@ -594,8 +587,7 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
Build/inference a Support Vector Machine to detect outliers
|
||||
in training data and prediction
|
||||
:params:
|
||||
predict: bool = If true, inference an existing SVM model, else construct one
|
||||
:param predict: bool = If true, inference an existing SVM model, else construct one
|
||||
"""
|
||||
|
||||
if self.keras:
|
||||
@@ -680,11 +672,11 @@ class FreqaiDataKitchen:
|
||||
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
|
||||
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
|
||||
pct of training data that they want to be considered outliers.
|
||||
:params:
|
||||
predict: bool = If False (training), iterate to find the best hyper parameters to match
|
||||
user requested outlier percent target. If True (prediction), use the parameters
|
||||
determined from the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
:param predict: bool = If False (training), iterate to find the best hyper parameters
|
||||
to match user requested outlier percent target.
|
||||
If True (prediction), use the parameters determined from
|
||||
the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
"""
|
||||
|
||||
if predict:
|
||||
@@ -835,7 +827,7 @@ class FreqaiDataKitchen:
|
||||
:, :no_prev_pts
|
||||
]
|
||||
distances = distances.replace([np.inf, -np.inf], np.nan)
|
||||
drop_index = pd.isnull(distances).any(1)
|
||||
drop_index = pd.isnull(distances).any(axis=1)
|
||||
distances = distances[drop_index == 0]
|
||||
|
||||
inliers = pd.DataFrame(index=distances.index)
|
||||
@@ -908,6 +900,7 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
column_names = dataframe.columns
|
||||
features = [c for c in column_names if "%" in c]
|
||||
|
||||
if not features:
|
||||
raise OperationalException("Could not find any features!")
|
||||
|
||||
@@ -1145,15 +1138,13 @@ class FreqaiDataKitchen:
|
||||
prediction_dataframe: DataFrame = pd.DataFrame(),
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Use the user defined strategy for populating indicators during
|
||||
retrain
|
||||
:params:
|
||||
strategy: IStrategy = user defined strategy object
|
||||
corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
Use the user defined strategy for populating indicators during retrain
|
||||
:param strategy: IStrategy = user defined strategy object
|
||||
:param corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:returns:
|
||||
dataframe: DataFrame = dataframe containing populated indicators
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user