Merge branch 'develop' into backtest_live_models
This commit is contained in:
@@ -214,7 +214,10 @@ class FreqaiDataKitchen:
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
if const_cols:
|
||||
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
||||
self.data['constant_features_list'] = const_cols
|
||||
logger.warning(f"Removed features {const_cols} with constant values.")
|
||||
else:
|
||||
self.data['constant_features_list'] = []
|
||||
# we don't care about total row number (total no. datapoints) in training, we only care
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
@@ -245,7 +248,8 @@ class FreqaiDataKitchen:
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
if len(self.data['constant_features_list']):
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
@@ -354,13 +358,19 @@ class FreqaiDataKitchen:
|
||||
:param df: Dataframe to be standardized
|
||||
"""
|
||||
|
||||
for item in df.keys():
|
||||
df[item] = (
|
||||
2
|
||||
* (df[item] - self.data[f"{item}_min"])
|
||||
/ (self.data[f"{item}_max"] - self.data[f"{item}_min"])
|
||||
- 1
|
||||
)
|
||||
train_max = [None] * len(df.keys())
|
||||
train_min = [None] * len(df.keys())
|
||||
|
||||
for i, item in enumerate(df.keys()):
|
||||
train_max[i] = self.data[f"{item}_max"]
|
||||
train_min[i] = self.data[f"{item}_min"]
|
||||
|
||||
train_max_series = pd.Series(train_max, index=df.keys())
|
||||
train_min_series = pd.Series(train_min, index=df.keys())
|
||||
|
||||
df = (
|
||||
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
@@ -491,18 +501,16 @@ class FreqaiDataKitchen:
|
||||
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Check that prediction feature labels match training feature labels.
|
||||
:params:
|
||||
:df_predictions: incoming predictions
|
||||
:param df_predictions: incoming predictions
|
||||
"""
|
||||
train_labels = self.data_dictionary["train_features"].columns
|
||||
pred_labels = df_predictions.columns
|
||||
num_diffs = len(pred_labels.difference(train_labels))
|
||||
if num_diffs != 0:
|
||||
df_predictions = df_predictions[train_labels]
|
||||
logger.warning(
|
||||
f"Removed {num_diffs} features from prediction features, "
|
||||
f"these were likely considered constant values during most recent training."
|
||||
)
|
||||
constant_labels = self.data['constant_features_list']
|
||||
df_predictions = df_predictions.filter(
|
||||
df_predictions.columns.difference(constant_labels)
|
||||
)
|
||||
logger.warning(
|
||||
f"Removed {len(constant_labels)} features from prediction features, "
|
||||
f"these were considered constant values during most recent training."
|
||||
)
|
||||
|
||||
return df_predictions
|
||||
|
||||
@@ -986,6 +994,9 @@ class FreqaiDataKitchen:
|
||||
if "labels_std" in self.data:
|
||||
append_df[f"{label}_std"] = self.data["labels_std"][label]
|
||||
|
||||
for extra_col in self.data["extra_returns_per_train"]:
|
||||
append_df[f"{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
|
||||
|
||||
append_df["do_predict"] = do_predict
|
||||
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
|
||||
append_df["DI_values"] = self.DI_values
|
||||
@@ -1150,6 +1161,51 @@ class FreqaiDataKitchen:
|
||||
if pair not in self.all_pairs:
|
||||
self.all_pairs.append(pair)
|
||||
|
||||
def extract_corr_pair_columns_from_populated_indicators(
|
||||
self,
|
||||
dataframe: DataFrame
|
||||
) -> Dict[str, DataFrame]:
|
||||
"""
|
||||
Find the columns of the dataframe corresponding to the corr_pairlist, save them
|
||||
in a dictionary to be reused and attached to other pairs.
|
||||
|
||||
:param dataframe: fully populated dataframe (current pair + corr_pairs)
|
||||
:return: corr_dataframes, dictionary of dataframes to be attached
|
||||
to other pairs in same candle.
|
||||
"""
|
||||
corr_dataframes: Dict[str, DataFrame] = {}
|
||||
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
|
||||
|
||||
for pair in pairs:
|
||||
valid_strs = [f"%-{pair}", f"%{pair}", f"%_{pair}"]
|
||||
pair_cols = [col for col in dataframe.columns if
|
||||
any(substr in col for substr in valid_strs)]
|
||||
pair_cols.insert(0, 'date')
|
||||
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
|
||||
|
||||
return corr_dataframes
|
||||
|
||||
def attach_corr_pair_columns(self, dataframe: DataFrame,
|
||||
corr_dataframes: Dict[str, DataFrame],
|
||||
current_pair: str) -> DataFrame:
|
||||
"""
|
||||
Attach the existing corr_pair dataframes to the current pair dataframe before training
|
||||
|
||||
:param dataframe: current pair strategy dataframe, indicators populated already
|
||||
:param corr_dataframes: dictionary of saved dataframes from earlier in the same candle
|
||||
:param current_pair: current pair to which we will attach corr pair dataframe
|
||||
:return:
|
||||
:dataframe: current pair dataframe of populated indicators, concatenated with corr_pairs
|
||||
ready for training
|
||||
"""
|
||||
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
|
||||
|
||||
for pair in pairs:
|
||||
if current_pair != pair:
|
||||
dataframe = dataframe.merge(corr_dataframes[pair], how='left', on='date')
|
||||
|
||||
return dataframe
|
||||
|
||||
def use_strategy_to_populate_indicators(
|
||||
self,
|
||||
strategy: IStrategy,
|
||||
@@ -1157,6 +1213,7 @@ class FreqaiDataKitchen:
|
||||
base_dataframes: dict = {},
|
||||
pair: str = "",
|
||||
prediction_dataframe: DataFrame = pd.DataFrame(),
|
||||
do_corr_pairs: bool = True,
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Use the user defined strategy for populating indicators during retrain
|
||||
@@ -1166,15 +1223,15 @@ class FreqaiDataKitchen:
|
||||
:param base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:returns:
|
||||
:return:
|
||||
dataframe: DataFrame = dataframe containing populated indicators
|
||||
"""
|
||||
|
||||
# for prediction dataframe creation, we let dataprovider handle everything in the strategy
|
||||
# so we create empty dictionaries, which allows us to pass None to
|
||||
# `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe.
|
||||
tfs = self.freqai_config["feature_parameters"].get("include_timeframes")
|
||||
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
|
||||
tfs: List[str] = self.freqai_config["feature_parameters"].get("include_timeframes")
|
||||
pairs: List[str] = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
|
||||
if not prediction_dataframe.empty:
|
||||
dataframe = prediction_dataframe.copy()
|
||||
for tf in tfs:
|
||||
@@ -1197,15 +1254,18 @@ class FreqaiDataKitchen:
|
||||
informative=base_dataframes[tf],
|
||||
set_generalized_indicators=sgi
|
||||
)
|
||||
if pairs:
|
||||
for i in pairs:
|
||||
if pair in i:
|
||||
continue # dont repeat anything from whitelist
|
||||
|
||||
# ensure corr pairs are always last
|
||||
for corr_pair in pairs:
|
||||
if pair == corr_pair:
|
||||
continue # dont repeat anything from whitelist
|
||||
for tf in tfs:
|
||||
if pairs and do_corr_pairs:
|
||||
dataframe = strategy.populate_any_indicators(
|
||||
i,
|
||||
corr_pair,
|
||||
dataframe.copy(),
|
||||
tf,
|
||||
informative=corr_dataframes[i][tf]
|
||||
informative=corr_dataframes[corr_pair][tf]
|
||||
)
|
||||
|
||||
self.get_unique_classes_from_labels(dataframe)
|
||||
|
||||
Reference in New Issue
Block a user