Merge branch 'develop' into feat/externalsignals
This commit is contained in:
@@ -313,7 +313,9 @@ class DataProvider:
|
||||
Clear pair dataframe cache.
|
||||
"""
|
||||
self.__cached_pairs = {}
|
||||
self.__cached_pairs_backtesting = {}
|
||||
# Don't reset backtesting pairs -
|
||||
# otherwise they're reloaded each time during hyperopt due to with analyze_per_epoch
|
||||
# self.__cached_pairs_backtesting = {}
|
||||
self.__slice_index = 0
|
||||
|
||||
# Exchange functions
|
||||
|
@@ -355,7 +355,7 @@ class FreqaiDataDrawer:
|
||||
for dir in model_folders:
|
||||
result = pattern.match(str(dir.name))
|
||||
if result is None:
|
||||
break
|
||||
continue
|
||||
coin = result.group(1)
|
||||
timestamp = result.group(2)
|
||||
|
||||
|
85
freqtrade/freqai/prediction_models/XGBoostClassifier.py
Normal file
85
freqtrade/freqai/prediction_models/XGBoostClassifier.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from pandas.api.types import is_integer_dtype
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
y = data_dictionary["train_labels"].to_numpy()[:, 0]
|
||||
|
||||
le = LabelEncoder()
|
||||
if not is_integer_dtype(y):
|
||||
y = pd.Series(le.fit_transform(y), dtype="int64")
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
eval_set = None
|
||||
else:
|
||||
test_features = data_dictionary["test_features"].to_numpy()
|
||||
test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
|
||||
|
||||
if not is_integer_dtype(test_labels):
|
||||
test_labels = pd.Series(le.transform(test_labels), dtype="int64")
|
||||
|
||||
eval_set = [(test_features, test_labels)]
|
||||
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBClassifier(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
xgb_model=init_model)
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
|
||||
|
||||
le = LabelEncoder()
|
||||
label = dk.label_list[0]
|
||||
labels_before = list(dk.data['labels_std'].keys())
|
||||
labels_after = le.fit_transform(labels_before).tolist()
|
||||
pred_df[label] = le.inverse_transform(pred_df[label])
|
||||
pred_df = pred_df.rename(
|
||||
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
|
||||
|
||||
return (pred_df, dk.do_predict)
|
@@ -580,11 +580,23 @@ class Hyperopt:
|
||||
max_value=self.total_epochs, redirect_stdout=False, redirect_stderr=False,
|
||||
widgets=widgets
|
||||
) as pbar:
|
||||
EVALS = ceil(self.total_epochs / jobs)
|
||||
for i in range(EVALS):
|
||||
start = 0
|
||||
|
||||
if self.analyze_per_epoch:
|
||||
# First analysis not in parallel mode when using --analyze-per-epoch.
|
||||
# This allows dataprovider to load it's informative cache.
|
||||
asked, is_random = self.get_asked_points(n_points=1)
|
||||
f_val0 = self.generate_optimizer(asked[0])
|
||||
self.opt.tell(asked, [f_val0['loss']])
|
||||
self.evaluate_result(f_val0, 1, is_random[0])
|
||||
pbar.update(1)
|
||||
start += 1
|
||||
|
||||
evals = ceil((self.total_epochs - start) / jobs)
|
||||
for i in range(evals):
|
||||
# Correct the number of epochs to be processed for the last
|
||||
# iteration (should not exceed self.total_epochs in total)
|
||||
n_rest = (i + 1) * jobs - self.total_epochs
|
||||
n_rest = (i + 1) * jobs - (self.total_epochs - start)
|
||||
current_jobs = jobs - n_rest if n_rest > 0 else jobs
|
||||
|
||||
asked, is_random = self.get_asked_points(n_points=current_jobs)
|
||||
@@ -594,7 +606,7 @@ class Hyperopt:
|
||||
# Calculate progressbar outputs
|
||||
for j, val in enumerate(f_val):
|
||||
# Use human-friendly indexes here (starting from 1)
|
||||
current = i * jobs + j + 1
|
||||
current = i * jobs + j + 1 + start
|
||||
|
||||
self.evaluate_result(val, current, is_random[j])
|
||||
|
||||
|
Reference in New Issue
Block a user