merge develop into feat/freqai-rl-dev

This commit is contained in:
robcaulk
2022-10-30 10:13:03 +01:00
129 changed files with 2648 additions and 1004 deletions

View File

@@ -111,9 +111,8 @@ class FreqaiDataKitchen:
) -> None:
"""
Set the paths to the data for the present coin/botloop
:params:
metadata: dict = strategy furnished pair metadata
trained_timestamp: int = timestamp of most recent training
:param metadata: dict = strategy furnished pair metadata
:param trained_timestamp: int = timestamp of most recent training
"""
self.full_path = Path(
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
@@ -133,8 +132,8 @@ class FreqaiDataKitchen:
Given the dataframe for the full history for training, split the data into
training and test data according to user specified parameters in configuration
file.
:filtered_dataframe: cleaned dataframe ready to be split.
:labels: cleaned labels ready to be split.
:param filtered_dataframe: cleaned dataframe ready to be split.
:param labels: cleaned labels ready to be split.
"""
feat_dict = self.freqai_config["feature_parameters"]
@@ -193,13 +192,14 @@ class FreqaiDataKitchen:
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
row that had a NaN and will shield user from that prediction.
:params:
:unfiltered_df: the full dataframe for the present training period
:training_feature_list: list, the training feature list constructed by
self.build_feature_list() according to user specified parameters in the configuration file.
:labels: the labels for the dataset
:training_filter: boolean which lets the function know if it is training data or
prediction data to be filtered.
:param unfiltered_df: the full dataframe for the present training period
:param training_feature_list: list, the training feature list constructed by
self.build_feature_list() according to user specified
parameters in the configuration file.
:param labels: the labels for the dataset
:param training_filter: boolean which lets the function know if it is training data or
prediction data to be filtered.
:returns:
:filtered_df: dataframe cleaned of NaNs and only containing the user
requested feature set.
@@ -214,7 +214,10 @@ class FreqaiDataKitchen:
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
if const_cols:
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
self.data['constant_features_list'] = const_cols
logger.warning(f"Removed features {const_cols} with constant values.")
else:
self.data['constant_features_list'] = []
# we don't care about total row number (total no. datapoints) in training, we only care
# about removing any row with NaNs
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
@@ -245,6 +248,8 @@ class FreqaiDataKitchen:
self.data["filter_drop_index_training"] = drop_index
else:
if len(self.data['constant_features_list']):
filtered_df = self.check_pred_labels(filtered_df)
# we are backtesting so we need to preserve row number to send back to strategy,
# so now we use do_predict to avoid any prediction based on a NaN
drop_index = pd.isnull(filtered_df).any(axis=1)
@@ -289,8 +294,8 @@ class FreqaiDataKitchen:
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
"""
Normalize all data in the data_dictionary according to the training dataset
:params:
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
:param data_dictionary: dictionary containing the cleaned and
split training/test data/labels
:returns:
:data_dictionary: updated dictionary with standardized values.
"""
@@ -464,6 +469,22 @@ class FreqaiDataKitchen:
return df
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
"""
Check that prediction feature labels match training feature labels.
:param df_predictions: incoming predictions
"""
constant_labels = self.data['constant_features_list']
df_predictions = df_predictions.filter(
df_predictions.columns.difference(constant_labels)
)
logger.warning(
f"Removed {len(constant_labels)} features from prediction features, "
f"these were considered constant values during most recent training."
)
return df_predictions
def principal_component_analysis(self) -> None:
"""
Performs Principal Component Analysis on the data for dimensionality reduction
@@ -520,8 +541,7 @@ class FreqaiDataKitchen:
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
"""
Use an existing pca transform to transform data into components
:params:
filtered_dataframe: DataFrame = the cleaned dataframe
:param filtered_dataframe: DataFrame = the cleaned dataframe
"""
pca_components = self.pca.transform(filtered_dataframe)
self.data_dictionary["prediction_features"] = pd.DataFrame(
@@ -565,8 +585,7 @@ class FreqaiDataKitchen:
"""
Build/inference a Support Vector Machine to detect outliers
in training data and prediction
:params:
predict: bool = If true, inference an existing SVM model, else construct one
:param predict: bool = If true, inference an existing SVM model, else construct one
"""
if self.keras:
@@ -651,11 +670,11 @@ class FreqaiDataKitchen:
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
pct of training data that they want to be considered outliers.
:params:
predict: bool = If False (training), iterate to find the best hyper parameters to match
user requested outlier percent target. If True (prediction), use the parameters
determined from the previous training to estimate if the current prediction point
is an outlier.
:param predict: bool = If False (training), iterate to find the best hyper parameters
to match user requested outlier percent target.
If True (prediction), use the parameters determined from
the previous training to estimate if the current prediction point
is an outlier.
"""
if predict:
@@ -944,6 +963,9 @@ class FreqaiDataKitchen:
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
append_df[f"{label}_std"] = self.data["labels_std"][label]
for extra_col in self.data["extra_returns_per_train"]:
append_df["{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
append_df["do_predict"] = do_predict
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
append_df["DI_values"] = self.DI_values
@@ -1122,15 +1144,13 @@ class FreqaiDataKitchen:
prediction_dataframe: DataFrame = pd.DataFrame(),
) -> DataFrame:
"""
Use the user defined strategy for populating indicators during
retrain
:params:
strategy: IStrategy = user defined strategy object
corr_dataframes: dict = dict containing the informative pair dataframes
(for user defined timeframes)
base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
metadata: dict = strategy furnished pair metadata
Use the user defined strategy for populating indicators during retrain
:param strategy: IStrategy = user defined strategy object
:param corr_dataframes: dict = dict containing the informative pair dataframes
(for user defined timeframes)
:param base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
:param metadata: dict = strategy furnished pair metadata
:returns:
dataframe: DataFrame = dataframe containing populated indicators
"""