merge develop into feat/freqai-rl-dev
This commit is contained in:
@@ -111,9 +111,8 @@ class FreqaiDataKitchen:
|
||||
) -> None:
|
||||
"""
|
||||
Set the paths to the data for the present coin/botloop
|
||||
:params:
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
trained_timestamp: int = timestamp of most recent training
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:param trained_timestamp: int = timestamp of most recent training
|
||||
"""
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
|
||||
@@ -133,8 +132,8 @@ class FreqaiDataKitchen:
|
||||
Given the dataframe for the full history for training, split the data into
|
||||
training and test data according to user specified parameters in configuration
|
||||
file.
|
||||
:filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:labels: cleaned labels ready to be split.
|
||||
:param filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:param labels: cleaned labels ready to be split.
|
||||
"""
|
||||
feat_dict = self.freqai_config["feature_parameters"]
|
||||
|
||||
@@ -193,13 +192,14 @@ class FreqaiDataKitchen:
|
||||
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_df: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
|
||||
:param unfiltered_df: the full dataframe for the present training period
|
||||
:param training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified
|
||||
parameters in the configuration file.
|
||||
:param labels: the labels for the dataset
|
||||
:param training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
:returns:
|
||||
:filtered_df: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
@@ -214,7 +214,10 @@ class FreqaiDataKitchen:
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
if const_cols:
|
||||
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
||||
self.data['constant_features_list'] = const_cols
|
||||
logger.warning(f"Removed features {const_cols} with constant values.")
|
||||
else:
|
||||
self.data['constant_features_list'] = []
|
||||
# we don't care about total row number (total no. datapoints) in training, we only care
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
@@ -245,6 +248,8 @@ class FreqaiDataKitchen:
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
if len(self.data['constant_features_list']):
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
@@ -289,8 +294,8 @@ class FreqaiDataKitchen:
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:params:
|
||||
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
@@ -464,6 +469,22 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Check that prediction feature labels match training feature labels.
|
||||
:param df_predictions: incoming predictions
|
||||
"""
|
||||
constant_labels = self.data['constant_features_list']
|
||||
df_predictions = df_predictions.filter(
|
||||
df_predictions.columns.difference(constant_labels)
|
||||
)
|
||||
logger.warning(
|
||||
f"Removed {len(constant_labels)} features from prediction features, "
|
||||
f"these were considered constant values during most recent training."
|
||||
)
|
||||
|
||||
return df_predictions
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -520,8 +541,7 @@ class FreqaiDataKitchen:
|
||||
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
|
||||
"""
|
||||
Use an existing pca transform to transform data into components
|
||||
:params:
|
||||
filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
:param filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
"""
|
||||
pca_components = self.pca.transform(filtered_dataframe)
|
||||
self.data_dictionary["prediction_features"] = pd.DataFrame(
|
||||
@@ -565,8 +585,7 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
Build/inference a Support Vector Machine to detect outliers
|
||||
in training data and prediction
|
||||
:params:
|
||||
predict: bool = If true, inference an existing SVM model, else construct one
|
||||
:param predict: bool = If true, inference an existing SVM model, else construct one
|
||||
"""
|
||||
|
||||
if self.keras:
|
||||
@@ -651,11 +670,11 @@ class FreqaiDataKitchen:
|
||||
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
|
||||
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
|
||||
pct of training data that they want to be considered outliers.
|
||||
:params:
|
||||
predict: bool = If False (training), iterate to find the best hyper parameters to match
|
||||
user requested outlier percent target. If True (prediction), use the parameters
|
||||
determined from the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
:param predict: bool = If False (training), iterate to find the best hyper parameters
|
||||
to match user requested outlier percent target.
|
||||
If True (prediction), use the parameters determined from
|
||||
the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
"""
|
||||
|
||||
if predict:
|
||||
@@ -944,6 +963,9 @@ class FreqaiDataKitchen:
|
||||
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
|
||||
append_df[f"{label}_std"] = self.data["labels_std"][label]
|
||||
|
||||
for extra_col in self.data["extra_returns_per_train"]:
|
||||
append_df["{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
|
||||
|
||||
append_df["do_predict"] = do_predict
|
||||
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
|
||||
append_df["DI_values"] = self.DI_values
|
||||
@@ -1122,15 +1144,13 @@ class FreqaiDataKitchen:
|
||||
prediction_dataframe: DataFrame = pd.DataFrame(),
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Use the user defined strategy for populating indicators during
|
||||
retrain
|
||||
:params:
|
||||
strategy: IStrategy = user defined strategy object
|
||||
corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
Use the user defined strategy for populating indicators during retrain
|
||||
:param strategy: IStrategy = user defined strategy object
|
||||
:param corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:returns:
|
||||
dataframe: DataFrame = dataframe containing populated indicators
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user