Merge remote-tracking branch 'origin/develop' into reduce-indicator-population
This commit is contained in:
@@ -51,7 +51,7 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
|
@@ -50,7 +50,7 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
|
@@ -47,7 +47,7 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
|
@@ -210,7 +210,10 @@ class FreqaiDataKitchen:
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
if const_cols:
|
||||
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
||||
self.data['constant_features_list'] = const_cols
|
||||
logger.warning(f"Removed features {const_cols} with constant values.")
|
||||
else:
|
||||
self.data['constant_features_list'] = []
|
||||
# we don't care about total row number (total no. datapoints) in training, we only care
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
@@ -241,7 +244,8 @@ class FreqaiDataKitchen:
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
if len(self.data['constant_features_list']):
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
@@ -350,13 +354,19 @@ class FreqaiDataKitchen:
|
||||
:param df: Dataframe to be standardized
|
||||
"""
|
||||
|
||||
for item in df.keys():
|
||||
df[item] = (
|
||||
2
|
||||
* (df[item] - self.data[f"{item}_min"])
|
||||
/ (self.data[f"{item}_max"] - self.data[f"{item}_min"])
|
||||
- 1
|
||||
)
|
||||
train_max = [None] * len(df.keys())
|
||||
train_min = [None] * len(df.keys())
|
||||
|
||||
for i, item in enumerate(df.keys()):
|
||||
train_max[i] = self.data[f"{item}_max"]
|
||||
train_min[i] = self.data[f"{item}_min"]
|
||||
|
||||
train_max_series = pd.Series(train_max, index=df.keys())
|
||||
train_min_series = pd.Series(train_min, index=df.keys())
|
||||
|
||||
df = (
|
||||
2 * (df - train_min_series) / (train_max_series - train_min_series) - 1
|
||||
)
|
||||
|
||||
return df
|
||||
|
||||
@@ -464,18 +474,16 @@ class FreqaiDataKitchen:
|
||||
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Check that prediction feature labels match training feature labels.
|
||||
:params:
|
||||
:df_predictions: incoming predictions
|
||||
:param df_predictions: incoming predictions
|
||||
"""
|
||||
train_labels = self.data_dictionary["train_features"].columns
|
||||
pred_labels = df_predictions.columns
|
||||
num_diffs = len(pred_labels.difference(train_labels))
|
||||
if num_diffs != 0:
|
||||
df_predictions = df_predictions[train_labels]
|
||||
logger.warning(
|
||||
f"Removed {num_diffs} features from prediction features, "
|
||||
f"these were likely considered constant values during most recent training."
|
||||
)
|
||||
constant_labels = self.data['constant_features_list']
|
||||
df_predictions = df_predictions.filter(
|
||||
df_predictions.columns.difference(constant_labels)
|
||||
)
|
||||
logger.warning(
|
||||
f"Removed {len(constant_labels)} features from prediction features, "
|
||||
f"these were considered constant values during most recent training."
|
||||
)
|
||||
|
||||
return df_predictions
|
||||
|
||||
@@ -957,6 +965,9 @@ class FreqaiDataKitchen:
|
||||
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
|
||||
append_df[f"{label}_std"] = self.data["labels_std"][label]
|
||||
|
||||
for extra_col in self.data["extra_returns_per_train"]:
|
||||
append_df[f"{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
|
||||
|
||||
append_df["do_predict"] = do_predict
|
||||
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
|
||||
append_df["DI_values"] = self.DI_values
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
@@ -20,7 +19,7 @@ from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.utils import plot_feature_importance
|
||||
from freqtrade.freqai.utils import plot_feature_importance, record_params
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
|
||||
@@ -60,6 +59,7 @@ class IFreqaiModel(ABC):
|
||||
"data_split_parameters", {})
|
||||
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
|
||||
"model_training_parameters", {})
|
||||
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
|
||||
self.retrain = False
|
||||
self.first = True
|
||||
self.set_full_path()
|
||||
@@ -68,7 +68,6 @@ class IFreqaiModel(ABC):
|
||||
if self.save_backtest_models:
|
||||
logger.info('Backtesting module configured to save all models.')
|
||||
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
|
||||
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
|
||||
self.scanning = False
|
||||
self.ft_params = self.freqai_info["feature_parameters"]
|
||||
self.corr_pairlist: List[str] = self.ft_params.get("include_corr_pairlist", [])
|
||||
@@ -96,6 +95,8 @@ class IFreqaiModel(ABC):
|
||||
self._threads: List[threading.Thread] = []
|
||||
self._stop_event = threading.Event()
|
||||
|
||||
record_params(config, self.full_path)
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Return an empty state to be pickled in hyperopt
|
||||
@@ -528,14 +529,13 @@ class IFreqaiModel(ABC):
|
||||
return file_exists
|
||||
|
||||
def set_full_path(self) -> None:
|
||||
"""
|
||||
Creates and sets the full path for the identifier
|
||||
"""
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}"
|
||||
self.config["user_data_dir"] / "models" / f"{self.identifier}"
|
||||
)
|
||||
self.full_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(
|
||||
self.config["config_files"][0],
|
||||
Path(self.full_path, Path(self.config["config_files"][0]).name),
|
||||
)
|
||||
|
||||
def extract_data_and_train_model(
|
||||
self,
|
||||
|
@@ -26,9 +26,8 @@ class XGBoostRFClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
@@ -65,7 +64,7 @@ class XGBoostRFClassifier(BaseClassifierModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
@@ -29,6 +29,7 @@ class XGBoostRFRegressor(BaseRegressionModel):
|
||||
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
eval_set = None
|
||||
eval_weights = None
|
||||
else:
|
||||
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
|
||||
eval_weights = [data_dictionary['test_weights']]
|
||||
|
@@ -29,6 +29,7 @@ class XGBoostRegressor(BaseRegressionModel):
|
||||
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
eval_set = None
|
||||
eval_weights = None
|
||||
else:
|
||||
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
|
||||
eval_weights = [data_dictionary['test_weights']]
|
||||
|
@@ -1,9 +1,11 @@
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import rapidjson
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
@@ -191,3 +193,28 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
|
||||
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
|
||||
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
|
||||
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
|
||||
|
||||
|
||||
def record_params(config: Dict[str, Any], full_path: Path) -> None:
|
||||
"""
|
||||
Records run params in the full path for reproducibility
|
||||
"""
|
||||
params_record_path = full_path / "run_params.json"
|
||||
|
||||
run_params = {
|
||||
"freqai": config.get('freqai', {}),
|
||||
"timeframe": config.get('timeframe'),
|
||||
"stake_amount": config.get('stake_amount'),
|
||||
"stake_currency": config.get('stake_currency'),
|
||||
"max_open_trades": config.get('max_open_trades'),
|
||||
"pairs": config.get('exchange', {}).get('pair_whitelist')
|
||||
}
|
||||
|
||||
with open(params_record_path, "w") as handle:
|
||||
rapidjson.dump(
|
||||
run_params,
|
||||
handle,
|
||||
indent=4,
|
||||
default=str,
|
||||
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN
|
||||
)
|
||||
|
Reference in New Issue
Block a user