Merge remote-tracking branch 'origin/develop' into reduce-indicator-population

This commit is contained in:
robcaulk
2022-10-31 09:42:01 +01:00
58 changed files with 971 additions and 472 deletions

View File

@@ -51,7 +51,7 @@ class BaseClassifierModel(IFreqaiModel):
f"{end_date} --------------------")
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)

View File

@@ -50,7 +50,7 @@ class BaseRegressionModel(IFreqaiModel):
f"{end_date} --------------------")
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)

View File

@@ -47,7 +47,7 @@ class BaseTensorFlowModel(IFreqaiModel):
f"{end_date} --------------------")
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)

View File

@@ -210,7 +210,10 @@ class FreqaiDataKitchen:
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
if const_cols:
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
self.data['constant_features_list'] = const_cols
logger.warning(f"Removed features {const_cols} with constant values.")
else:
self.data['constant_features_list'] = []
# we don't care about total row number (total no. datapoints) in training, we only care
# about removing any row with NaNs
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
@@ -241,7 +244,8 @@ class FreqaiDataKitchen:
self.data["filter_drop_index_training"] = drop_index
else:
filtered_df = self.check_pred_labels(filtered_df)
if len(self.data['constant_features_list']):
filtered_df = self.check_pred_labels(filtered_df)
# we are backtesting so we need to preserve row number to send back to strategy,
# so now we use do_predict to avoid any prediction based on a NaN
drop_index = pd.isnull(filtered_df).any(axis=1)
@@ -350,13 +354,19 @@ class FreqaiDataKitchen:
:param df: Dataframe to be standardized
"""
for item in df.keys():
df[item] = (
2
* (df[item] - self.data[f"{item}_min"])
/ (self.data[f"{item}_max"] - self.data[f"{item}_min"])
- 1
)
train_max = [None] * len(df.keys())
train_min = [None] * len(df.keys())
for i, item in enumerate(df.keys()):
train_max[i] = self.data[f"{item}_max"]
train_min[i] = self.data[f"{item}_min"]
train_max_series = pd.Series(train_max, index=df.keys())
train_min_series = pd.Series(train_min, index=df.keys())
df = (
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
)
return df
@@ -464,18 +474,16 @@ class FreqaiDataKitchen:
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
"""
Check that prediction feature labels match training feature labels.
:params:
:df_predictions: incoming predictions
:param df_predictions: incoming predictions
"""
train_labels = self.data_dictionary["train_features"].columns
pred_labels = df_predictions.columns
num_diffs = len(pred_labels.difference(train_labels))
if num_diffs != 0:
df_predictions = df_predictions[train_labels]
logger.warning(
f"Removed {num_diffs} features from prediction features, "
f"these were likely considered constant values during most recent training."
)
constant_labels = self.data['constant_features_list']
df_predictions = df_predictions.filter(
df_predictions.columns.difference(constant_labels)
)
logger.warning(
f"Removed {len(constant_labels)} features from prediction features, "
f"these were considered constant values during most recent training."
)
return df_predictions
@@ -957,6 +965,9 @@ class FreqaiDataKitchen:
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
append_df[f"{label}_std"] = self.data["labels_std"][label]
for extra_col in self.data["extra_returns_per_train"]:
append_df[f"{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
append_df["do_predict"] = do_predict
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
append_df["DI_values"] = self.DI_values

View File

@@ -1,5 +1,4 @@
import logging
import shutil
import threading
import time
from abc import ABC, abstractmethod
@@ -20,7 +19,7 @@ from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.utils import plot_feature_importance
from freqtrade.freqai.utils import plot_feature_importance, record_params
from freqtrade.strategy.interface import IStrategy
@@ -60,6 +59,7 @@ class IFreqaiModel(ABC):
"data_split_parameters", {})
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
"model_training_parameters", {})
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.retrain = False
self.first = True
self.set_full_path()
@@ -68,7 +68,6 @@ class IFreqaiModel(ABC):
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.scanning = False
self.ft_params = self.freqai_info["feature_parameters"]
self.corr_pairlist: List[str] = self.ft_params.get("include_corr_pairlist", [])
@@ -96,6 +95,8 @@ class IFreqaiModel(ABC):
self._threads: List[threading.Thread] = []
self._stop_event = threading.Event()
record_params(config, self.full_path)
def __getstate__(self):
"""
Return an empty state to be pickled in hyperopt
@@ -528,14 +529,13 @@ class IFreqaiModel(ABC):
return file_exists
def set_full_path(self) -> None:
"""
Creates and sets the full path for the identifier
"""
self.full_path = Path(
self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}"
self.config["user_data_dir"] / "models" / f"{self.identifier}"
)
self.full_path.mkdir(parents=True, exist_ok=True)
shutil.copy(
self.config["config_files"][0],
Path(self.full_path, Path(self.config["config_files"][0]).name),
)
def extract_data_and_train_model(
self,

View File

@@ -26,9 +26,8 @@ class XGBoostRFClassifier(BaseClassifierModel):
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
:data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
X = data_dictionary["train_features"].to_numpy()
@@ -65,7 +64,7 @@ class XGBoostRFClassifier(BaseClassifierModel):
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_df: Full dataframe for the current backtest period.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove

View File

@@ -29,6 +29,7 @@ class XGBoostRFRegressor(BaseRegressionModel):
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
eval_weights = None
else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']]

View File

@@ -29,6 +29,7 @@ class XGBoostRegressor(BaseRegressionModel):
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
eval_weights = None
else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']]

View File

@@ -1,9 +1,11 @@
import logging
from datetime import datetime, timezone
from typing import Any
from pathlib import Path
from typing import Any, Dict
import numpy as np
import pandas as pd
import rapidjson
from freqtrade.configuration import TimeRange
from freqtrade.constants import Config
@@ -191,3 +193,28 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
def record_params(config: Dict[str, Any], full_path: Path) -> None:
"""
Records run params in the full path for reproducibility
"""
params_record_path = full_path / "run_params.json"
run_params = {
"freqai": config.get('freqai', {}),
"timeframe": config.get('timeframe'),
"stake_amount": config.get('stake_amount'),
"stake_currency": config.get('stake_currency'),
"max_open_trades": config.get('max_open_trades'),
"pairs": config.get('exchange', {}).get('pair_whitelist')
}
with open(params_record_path, "w") as handle:
rapidjson.dump(
run_params,
handle,
indent=4,
default=str,
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN
)