Merge branch 'develop' into feat/externalsignals

This commit is contained in:
Timothy Pogue
2022-09-15 18:09:25 -06:00
9 changed files with 151 additions and 11 deletions

View File

@@ -313,7 +313,9 @@ class DataProvider:
Clear pair dataframe cache.
"""
self.__cached_pairs = {}
self.__cached_pairs_backtesting = {}
# Don't reset backtesting pairs -
# otherwise they're reloaded each time during hyperopt due to with analyze_per_epoch
# self.__cached_pairs_backtesting = {}
self.__slice_index = 0
# Exchange functions

View File

@@ -355,7 +355,7 @@ class FreqaiDataDrawer:
for dir in model_folders:
result = pattern.match(str(dir.name))
if result is None:
break
continue
coin = result.group(1)
timestamp = result.group(2)

View File

@@ -0,0 +1,85 @@
import logging
from typing import Any, Dict, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from pandas.api.types import is_integer_dtype
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostClassifier(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
:data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
X = data_dictionary["train_features"].to_numpy()
y = data_dictionary["train_labels"].to_numpy()[:, 0]
le = LabelEncoder()
if not is_integer_dtype(y):
y = pd.Series(le.fit_transform(y), dtype="int64")
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
eval_set = None
else:
test_features = data_dictionary["test_features"].to_numpy()
test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
if not is_integer_dtype(test_labels):
test_labels = pd.Series(le.transform(test_labels), dtype="int64")
eval_set = [(test_features, test_labels)]
train_weights = data_dictionary["train_weights"]
init_model = self.get_init_model(dk.pair)
model = XGBClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
xgb_model=init_model)
return model
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
le = LabelEncoder()
label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys())
labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
return (pred_df, dk.do_predict)

View File

@@ -580,11 +580,23 @@ class Hyperopt:
max_value=self.total_epochs, redirect_stdout=False, redirect_stderr=False,
widgets=widgets
) as pbar:
EVALS = ceil(self.total_epochs / jobs)
for i in range(EVALS):
start = 0
if self.analyze_per_epoch:
# First analysis not in parallel mode when using --analyze-per-epoch.
# This allows dataprovider to load it's informative cache.
asked, is_random = self.get_asked_points(n_points=1)
f_val0 = self.generate_optimizer(asked[0])
self.opt.tell(asked, [f_val0['loss']])
self.evaluate_result(f_val0, 1, is_random[0])
pbar.update(1)
start += 1
evals = ceil((self.total_epochs - start) / jobs)
for i in range(evals):
# Correct the number of epochs to be processed for the last
# iteration (should not exceed self.total_epochs in total)
n_rest = (i + 1) * jobs - self.total_epochs
n_rest = (i + 1) * jobs - (self.total_epochs - start)
current_jobs = jobs - n_rest if n_rest > 0 else jobs
asked, is_random = self.get_asked_points(n_points=current_jobs)
@@ -594,7 +606,7 @@ class Hyperopt:
# Calculate progressbar outputs
for j, val in enumerate(f_val):
# Use human-friendly indexes here (starting from 1)
current = i * jobs + j + 1
current = i * jobs + j + 1 + start
self.evaluate_result(val, current, is_random[j])