Merge branch 'develop' into add-xgboostclassifier
This commit is contained in:
@@ -36,9 +36,6 @@ class FreqaiMultiOutputRegressor(MultiOutputRegressor):
|
||||
|
||||
y = self._validate_data(X="no_validation", y=y, multi_output=True)
|
||||
|
||||
# if is_classifier(self):
|
||||
# check_classification_targets(y)
|
||||
|
||||
if y.ndim == 1:
|
||||
raise ValueError(
|
||||
"y must have at least two dimensions for "
|
||||
@@ -50,19 +47,12 @@ class FreqaiMultiOutputRegressor(MultiOutputRegressor):
|
||||
):
|
||||
raise ValueError("Underlying estimator does not support sample weights.")
|
||||
|
||||
# fit_params_validated = _check_fit_params(X, fit_params)
|
||||
|
||||
if not fit_params:
|
||||
fit_params = [None] * y.shape[1]
|
||||
|
||||
# if not init_models:
|
||||
# init_models = [None] * y.shape[1]
|
||||
|
||||
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
|
||||
delayed(_fit_estimator)(
|
||||
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
|
||||
# init_model=init_models[i], eval_set=eval_sets[i],
|
||||
# **fit_params_validated
|
||||
)
|
||||
for i in range(y.shape[1])
|
||||
)
|
||||
|
@@ -184,7 +184,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
def filter_features(
|
||||
self,
|
||||
unfiltered_dataframe: DataFrame,
|
||||
unfiltered_df: DataFrame,
|
||||
training_feature_list: List,
|
||||
label_list: List = list(),
|
||||
training_filter: bool = True,
|
||||
@@ -195,31 +195,35 @@ class FreqaiDataKitchen:
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_dataframe: the full dataframe for the present training period
|
||||
:unfiltered_df: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
:returns:
|
||||
:filtered_dataframe: dataframe cleaned of NaNs and only containing the user
|
||||
:filtered_df: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
:labels: labels cleaned of NaNs.
|
||||
"""
|
||||
filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
|
||||
filtered_dataframe = filtered_dataframe.replace([np.inf, -np.inf], np.nan)
|
||||
filtered_df = unfiltered_df.filter(training_feature_list, axis=1)
|
||||
filtered_df = filtered_df.replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs,
|
||||
drop_index = pd.isnull(filtered_df).any(1) # get the rows that have NaNs,
|
||||
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
|
||||
if (training_filter):
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
if const_cols:
|
||||
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
||||
logger.warning(f"Removed features {const_cols} with constant values.")
|
||||
# we don't care about total row number (total no. datapoints) in training, we only care
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
labels = unfiltered_dataframe.filter(label_list, axis=1)
|
||||
labels = unfiltered_df.filter(label_list, axis=1)
|
||||
drop_index_labels = pd.isnull(labels).any(1)
|
||||
drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
|
||||
dates = unfiltered_dataframe['date']
|
||||
filtered_dataframe = filtered_dataframe[
|
||||
dates = unfiltered_df['date']
|
||||
filtered_df = filtered_df[
|
||||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
] # dropping values
|
||||
labels = labels[
|
||||
@@ -229,13 +233,13 @@ class FreqaiDataKitchen:
|
||||
(drop_index == 0) & (drop_index_labels == 0)
|
||||
]
|
||||
logger.info(
|
||||
f"dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points"
|
||||
f" due to NaNs in populated dataset {len(unfiltered_dataframe)}."
|
||||
f"dropped {len(unfiltered_df) - len(filtered_df)} training points"
|
||||
f" due to NaNs in populated dataset {len(unfiltered_df)}."
|
||||
)
|
||||
if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
|
||||
worst_indicator = str(unfiltered_dataframe.count().idxmin())
|
||||
if (1 - len(filtered_df) / len(unfiltered_df)) > 0.1 and self.live:
|
||||
worst_indicator = str(unfiltered_df.count().idxmin())
|
||||
logger.warning(
|
||||
f" {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.0f} percent "
|
||||
f" {(1 - len(filtered_df)/len(unfiltered_df)) * 100:.0f} percent "
|
||||
" of training data dropped due to NaNs, model may perform inconsistent "
|
||||
f"with expectations. Verify {worst_indicator}"
|
||||
)
|
||||
@@ -244,9 +248,9 @@ class FreqaiDataKitchen:
|
||||
else:
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_dataframe).any(1)
|
||||
drop_index = pd.isnull(filtered_df).any(1)
|
||||
self.data["filter_drop_index_prediction"] = drop_index
|
||||
filtered_dataframe.fillna(0, inplace=True)
|
||||
filtered_df.fillna(0, inplace=True)
|
||||
# replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
|
||||
# that was based on a single NaN is ultimately protected from buys with do_predict
|
||||
drop_index = ~drop_index
|
||||
@@ -255,11 +259,11 @@ class FreqaiDataKitchen:
|
||||
logger.info(
|
||||
"dropped %s of %s prediction data points due to NaNs.",
|
||||
len(self.do_predict) - self.do_predict.sum(),
|
||||
len(filtered_dataframe),
|
||||
len(filtered_df),
|
||||
)
|
||||
labels = []
|
||||
|
||||
return filtered_dataframe, labels
|
||||
return filtered_df, labels
|
||||
|
||||
def build_data_dictionary(
|
||||
self,
|
||||
@@ -466,10 +470,17 @@ class FreqaiDataKitchen:
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Function which takes the backtesting time range and
|
||||
remove training data from dataframe
|
||||
remove training data from dataframe, keeping only the
|
||||
startup_candle_count candles
|
||||
"""
|
||||
startup_candle_count = self.config.get('startup_candle_count', 0)
|
||||
tf = self.config['timeframe']
|
||||
tr = self.config["timerange"]
|
||||
|
||||
backtesting_timerange = TimeRange.parse_timerange(tr)
|
||||
if startup_candle_count > 0 and backtesting_timerange:
|
||||
backtesting_timerange.subtract_start(timeframe_to_seconds(tf) * startup_candle_count)
|
||||
|
||||
start = datetime.fromtimestamp(backtesting_timerange.startts, tz=timezone.utc)
|
||||
df = self.return_dataframe
|
||||
df = df.loc[df["date"] >= start, :]
|
||||
@@ -1215,7 +1226,6 @@ class FreqaiDataKitchen:
|
||||
def save_backtesting_prediction(
|
||||
self, append_df: DataFrame
|
||||
) -> None:
|
||||
|
||||
"""
|
||||
Save prediction dataframe from backtesting to h5 file format
|
||||
:param append_df: dataframe for backtesting period
|
||||
@@ -1229,7 +1239,6 @@ class FreqaiDataKitchen:
|
||||
def get_backtesting_prediction(
|
||||
self
|
||||
) -> DataFrame:
|
||||
|
||||
"""
|
||||
Get prediction dataframe from h5 file format
|
||||
"""
|
||||
|
@@ -14,6 +14,7 @@ from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DATETIME_PRINT_FORMAT
|
||||
from freqtrade.enums import RunMode
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
@@ -92,6 +93,12 @@ class IFreqaiModel(ABC):
|
||||
self._threads: List[threading.Thread] = []
|
||||
self._stop_event = threading.Event()
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Return an empty state to be pickled in hyperopt
|
||||
"""
|
||||
return ({})
|
||||
|
||||
def assert_config(self, config: Dict[str, Any]) -> None:
|
||||
|
||||
if not config.get("freqai", {}):
|
||||
@@ -233,10 +240,10 @@ class IFreqaiModel(ABC):
|
||||
trained_timestamp = tr_train
|
||||
tr_train_startts_str = datetime.fromtimestamp(
|
||||
tr_train.startts,
|
||||
tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
|
||||
tr_train_stopts_str = datetime.fromtimestamp(
|
||||
tr_train.stopts,
|
||||
tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
|
||||
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
|
||||
logger.info(
|
||||
f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
|
||||
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
|
||||
|
@@ -60,6 +60,9 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
{'eval_set': eval_sets[i], 'init_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=cbr)
|
||||
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||
if thread_training:
|
||||
model.n_jobs = y.shape[1]
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
return model
|
||||
|
@@ -56,9 +56,9 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
|
||||
'init_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=lgb)
|
||||
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||
if thread_training:
|
||||
model.n_jobs = y.shape[1]
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
# model = FreqaiMultiOutputRegressor(estimator=lgb)
|
||||
# model.fit(X=X, y=y, sample_weight=sample_weight, init_models=init_models,
|
||||
# eval_sets=eval_sets, eval_sample_weight=eval_weights)
|
||||
return model
|
||||
|
@@ -55,6 +55,9 @@ class XGBoostRegressorMultiTarget(BaseRegressionModel):
|
||||
'xgb_model': init_models[i]})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=xgb)
|
||||
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||
if thread_training:
|
||||
model.n_jobs = y.shape[1]
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
|
||||
|
||||
return model
|
||||
|
Reference in New Issue
Block a user