Merge branch 'develop' into dev-merge-rl

This commit is contained in:
robcaulk
2022-09-14 22:49:11 +02:00
59 changed files with 1259 additions and 679 deletions

View File

@@ -21,12 +21,12 @@ class BaseClassifierModel(IFreqaiModel):
"""
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return:
:model: Trained model which can be used to inference (self.predict)
@@ -36,14 +36,14 @@ class BaseClassifierModel(IFreqaiModel):
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
unfiltered_df,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date}--------------------")
# split data into train/test data.
@@ -61,32 +61,32 @@ class BaseClassifierModel(IFreqaiModel):
f' features and {len(data_dictionary["train_features"])} data points'
)
model = self.fit(data_dictionary)
model = self.fit(data_dictionary, dk)
logger.info(f"--------------------done training {pair}--------------------")
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:param: unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
self.data_cleaning_predict(dk, filtered_dataframe)
self.data_cleaning_predict(dk, filtered_df)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)

View File

@@ -20,12 +20,12 @@ class BaseRegressionModel(IFreqaiModel):
"""
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return:
:model: Trained model which can be used to inference (self.predict)
@@ -35,14 +35,14 @@ class BaseRegressionModel(IFreqaiModel):
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
unfiltered_df,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date}--------------------")
# split data into train/test data.
@@ -60,33 +60,33 @@ class BaseRegressionModel(IFreqaiModel):
f' features and {len(data_dictionary["train_features"])} data points'
)
model = self.fit(data_dictionary)
model = self.fit(data_dictionary, dk)
logger.info(f"--------------------done training {pair}--------------------")
return model
def predict(
self, unfiltered_dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = False
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:param: unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
dk.find_features(unfiltered_dataframe)
filtered_dataframe, _ = dk.filter_features(
unfiltered_dataframe, dk.training_features_list, training_filter=False
dk.find_features(unfiltered_df)
filtered_df, _ = dk.filter_features(
unfiltered_df, dk.training_features_list, training_filter=False
)
filtered_dataframe = dk.normalize_data_from_metadata(filtered_dataframe)
dk.data_dictionary["prediction_features"] = filtered_dataframe
filtered_df = dk.normalize_data_from_metadata(filtered_df)
dk.data_dictionary["prediction_features"] = filtered_df
# optional additional data cleaning/analysis
self.data_cleaning_predict(dk, filtered_dataframe)
self.data_cleaning_predict(dk, filtered_df)
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
pred_df = DataFrame(predictions, columns=dk.label_list)

View File

@@ -17,12 +17,12 @@ class BaseTensorFlowModel(IFreqaiModel):
"""
def train(
self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen
self, unfiltered_df: DataFrame, pair: str, dk: FreqaiDataKitchen, **kwargs
) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datakitchen
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return:
:model: Trained model which can be used to inference (self.predict)
@@ -32,14 +32,14 @@ class BaseTensorFlowModel(IFreqaiModel):
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = dk.filter_features(
unfiltered_dataframe,
unfiltered_df,
dk.training_features_list,
dk.label_list,
training_filter=True,
)
start_date = unfiltered_dataframe["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_dataframe["date"].iloc[-1].strftime("%Y-%m-%d")
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
logger.info(f"-------------------- Training on data from {start_date} to "
f"{end_date}--------------------")
# split data into train/test data.
@@ -57,7 +57,7 @@ class BaseTensorFlowModel(IFreqaiModel):
f' features and {len(data_dictionary["train_features"])} data points'
)
model = self.fit(data_dictionary)
model = self.fit(data_dictionary, dk)
logger.info(f"--------------------done training {pair}--------------------")

View File

@@ -0,0 +1,65 @@
from joblib import Parallel
from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
from sklearn.utils.fixes import delayed
from sklearn.utils.validation import has_fit_parameter
class FreqaiMultiOutputRegressor(MultiOutputRegressor):
def fit(self, X, y, sample_weight=None, fit_params=None):
"""Fit the model to data, separately for each output variable.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The input data.
y : {array-like, sparse matrix} of shape (n_samples, n_outputs)
Multi-output targets. An indicator matrix turns on multilabel
estimation.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights. If `None`, then samples are equally weighted.
Only supported if the underlying regressor supports sample
weights.
fit_params : A list of dicts for the fit_params
Parameters passed to the ``estimator.fit`` method of each step.
Each dict may contain same or different values (e.g. different
eval_sets or init_models)
.. versionadded:: 0.23
Returns
-------
self : object
Returns a fitted instance.
"""
if not hasattr(self.estimator, "fit"):
raise ValueError("The base estimator should implement a fit method")
y = self._validate_data(X="no_validation", y=y, multi_output=True)
if y.ndim == 1:
raise ValueError(
"y must have at least two dimensions for "
"multi-output regression but has only one."
)
if sample_weight is not None and not has_fit_parameter(
self.estimator, "sample_weight"
):
raise ValueError("Underlying estimator does not support sample weights.")
if not fit_params:
fit_params = [None] * y.shape[1]
self.estimators_ = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_estimator)(
self.estimator, X, y[:, i], sample_weight, **fit_params[i]
)
for i in range(y.shape[1])
)
if hasattr(self.estimators_[0], "n_features_in_"):
self.n_features_in_ = self.estimators_[0].n_features_in_
if hasattr(self.estimators_[0], "feature_names_in_"):
self.feature_names_in_ = self.estimators_[0].feature_names_in_
return

View File

@@ -76,6 +76,8 @@ class FreqaiDataDrawer:
self.full_path / f"follower_dictionary-{self.follower_name}.json"
)
self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl")
self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.follow_mode = follow_mode
if follow_mode:
@@ -119,13 +121,21 @@ class FreqaiDataDrawer:
"""
exists = self.historic_predictions_path.is_file()
if exists:
with open(self.historic_predictions_path, "rb") as fp:
self.historic_predictions = cloudpickle.load(fp)
logger.info(
f"Found existing historic predictions at {self.full_path}, but beware "
"that statistics may be inaccurate if the bot has been offline for "
"an extended period of time."
)
try:
with open(self.historic_predictions_path, "rb") as fp:
self.historic_predictions = cloudpickle.load(fp)
logger.info(
f"Found existing historic predictions at {self.full_path}, but beware "
"that statistics may be inaccurate if the bot has been offline for "
"an extended period of time."
)
except EOFError:
logger.warning(
'Historical prediction file was corrupted. Trying to load backup file.')
with open(self.historic_predictions_bkp_path, "rb") as fp:
self.historic_predictions = cloudpickle.load(fp)
logger.warning('FreqAI successfully loaded the backup historical predictions file.')
elif not self.follow_mode:
logger.info("Could not find existing historic_predictions, starting from scratch")
else:
@@ -143,6 +153,9 @@ class FreqaiDataDrawer:
with open(self.historic_predictions_path, "wb") as fp:
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
# create a backup
shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path)
def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.

View File

@@ -1,7 +1,8 @@
import copy
import datetime
import logging
import shutil
from datetime import datetime, timezone
from math import cos, sin
from pathlib import Path
from typing import Any, Dict, List, Tuple
@@ -9,6 +10,7 @@ import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from scipy import stats
from sklearn import linear_model
from sklearn.cluster import DBSCAN
from sklearn.metrics.pairwise import pairwise_distances
@@ -182,7 +184,7 @@ class FreqaiDataKitchen:
def filter_features(
self,
unfiltered_dataframe: DataFrame,
unfiltered_df: DataFrame,
training_feature_list: List,
label_list: List = list(),
training_filter: bool = True,
@@ -193,31 +195,35 @@ class FreqaiDataKitchen:
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
row that had a NaN and will shield user from that prediction.
:params:
:unfiltered_dataframe: the full dataframe for the present training period
:unfiltered_df: the full dataframe for the present training period
:training_feature_list: list, the training feature list constructed by
self.build_feature_list() according to user specified parameters in the configuration file.
:labels: the labels for the dataset
:training_filter: boolean which lets the function know if it is training data or
prediction data to be filtered.
:returns:
:filtered_dataframe: dataframe cleaned of NaNs and only containing the user
:filtered_df: dataframe cleaned of NaNs and only containing the user
requested feature set.
:labels: labels cleaned of NaNs.
"""
filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
filtered_dataframe = filtered_dataframe.replace([np.inf, -np.inf], np.nan)
filtered_df = unfiltered_df.filter(training_feature_list, axis=1)
filtered_df = filtered_df.replace([np.inf, -np.inf], np.nan)
drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs,
drop_index = pd.isnull(filtered_df).any(1) # get the rows that have NaNs,
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
if (training_filter):
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
if const_cols:
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
logger.warning(f"Removed features {const_cols} with constant values.")
# we don't care about total row number (total no. datapoints) in training, we only care
# about removing any row with NaNs
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
labels = unfiltered_dataframe.filter(label_list, axis=1)
labels = unfiltered_df.filter(label_list, axis=1)
drop_index_labels = pd.isnull(labels).any(1)
drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
dates = unfiltered_dataframe['date']
filtered_dataframe = filtered_dataframe[
dates = unfiltered_df['date']
filtered_df = filtered_df[
(drop_index == 0) & (drop_index_labels == 0)
] # dropping values
labels = labels[
@@ -227,13 +233,13 @@ class FreqaiDataKitchen:
(drop_index == 0) & (drop_index_labels == 0)
]
logger.info(
f"dropped {len(unfiltered_dataframe) - len(filtered_dataframe)} training points"
f" due to NaNs in populated dataset {len(unfiltered_dataframe)}."
f"dropped {len(unfiltered_df) - len(filtered_df)} training points"
f" due to NaNs in populated dataset {len(unfiltered_df)}."
)
if (1 - len(filtered_dataframe) / len(unfiltered_dataframe)) > 0.1 and self.live:
worst_indicator = str(unfiltered_dataframe.count().idxmin())
if (1 - len(filtered_df) / len(unfiltered_df)) > 0.1 and self.live:
worst_indicator = str(unfiltered_df.count().idxmin())
logger.warning(
f" {(1 - len(filtered_dataframe)/len(unfiltered_dataframe)) * 100:.0f} percent "
f" {(1 - len(filtered_df)/len(unfiltered_df)) * 100:.0f} percent "
" of training data dropped due to NaNs, model may perform inconsistent "
f"with expectations. Verify {worst_indicator}"
)
@@ -242,9 +248,9 @@ class FreqaiDataKitchen:
else:
# we are backtesting so we need to preserve row number to send back to strategy,
# so now we use do_predict to avoid any prediction based on a NaN
drop_index = pd.isnull(filtered_dataframe).any(1)
drop_index = pd.isnull(filtered_df).any(1)
self.data["filter_drop_index_prediction"] = drop_index
filtered_dataframe.fillna(0, inplace=True)
filtered_df.fillna(0, inplace=True)
# replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
# that was based on a single NaN is ultimately protected from buys with do_predict
drop_index = ~drop_index
@@ -253,11 +259,11 @@ class FreqaiDataKitchen:
logger.info(
"dropped %s of %s prediction data points due to NaNs.",
len(self.do_predict) - self.do_predict.sum(),
len(filtered_dataframe),
len(filtered_df),
)
labels = []
return filtered_dataframe, labels
return filtered_df, labels
def build_data_dictionary(
self,
@@ -360,7 +366,7 @@ class FreqaiDataKitchen:
def denormalize_labels_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Normalize a set of data using the mean and standard deviation from
Denormalize a set of data using the mean and standard deviation from
the associated training data.
:param df: Dataframe of predictions to be denormalized
"""
@@ -399,7 +405,7 @@ class FreqaiDataKitchen:
config_timerange = TimeRange.parse_timerange(self.config["timerange"])
if config_timerange.stopts == 0:
config_timerange.stopts = int(
datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
datetime.now(tz=timezone.utc).timestamp()
)
timerange_train = copy.deepcopy(full_timerange)
timerange_backtest = copy.deepcopy(full_timerange)
@@ -416,8 +422,8 @@ class FreqaiDataKitchen:
timerange_train.stopts = timerange_train.startts + train_period_days
first = False
start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
start = datetime.fromtimestamp(timerange_train.startts, tz=timezone.utc)
stop = datetime.fromtimestamp(timerange_train.stopts, tz=timezone.utc)
tr_training_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_training_list_timerange.append(copy.deepcopy(timerange_train))
@@ -430,8 +436,8 @@ class FreqaiDataKitchen:
if timerange_backtest.stopts > config_timerange.stopts:
timerange_backtest.stopts = config_timerange.stopts
start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
start = datetime.fromtimestamp(timerange_backtest.startts, tz=timezone.utc)
stop = datetime.fromtimestamp(timerange_backtest.stopts, tz=timezone.utc)
tr_backtesting_list.append(start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d"))
tr_backtesting_list_timerange.append(copy.deepcopy(timerange_backtest))
@@ -451,13 +457,35 @@ class FreqaiDataKitchen:
it is sliced down to just the present training period.
"""
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df["date"] >= start, :]
df = df.loc[df["date"] <= stop, :]
if not self.live:
df = df.loc[df["date"] < stop, :]
return df
def remove_training_from_backtesting(
self
) -> DataFrame:
"""
Function which takes the backtesting time range and
remove training data from dataframe, keeping only the
startup_candle_count candles
"""
startup_candle_count = self.config.get('startup_candle_count', 0)
tf = self.config['timeframe']
tr = self.config["timerange"]
backtesting_timerange = TimeRange.parse_timerange(tr)
if startup_candle_count > 0 and backtesting_timerange:
backtesting_timerange.subtract_start(timeframe_to_seconds(tf) * startup_candle_count)
start = datetime.fromtimestamp(backtesting_timerange.startts, tz=timezone.utc)
df = self.return_dataframe
df = df.loc[df["date"] >= start, :]
return df
def principal_component_analysis(self) -> None:
"""
Performs Principal Component Analysis on the data for dimensionality reduction
@@ -652,8 +680,6 @@ class FreqaiDataKitchen:
is an outlier.
"""
from math import cos, sin
if predict:
if not self.data['DBSCAN_eps']:
return
@@ -746,6 +772,111 @@ class FreqaiDataKitchen:
return
def compute_inlier_metric(self, set_='train') -> None:
"""
Compute inlier metric from backwards distance distributions.
This metric defines how well features from a timepoint fit
into previous timepoints.
"""
no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
if set_ == 'train':
compute_df = copy.deepcopy(self.data_dictionary['train_features'])
elif set_ == 'test':
compute_df = copy.deepcopy(self.data_dictionary['test_features'])
else:
compute_df = copy.deepcopy(self.data_dictionary['prediction_features'])
compute_df_reindexed = compute_df.reindex(
index=np.flip(compute_df.index)
)
pairwise = pd.DataFrame(
np.triu(
pairwise_distances(compute_df_reindexed, n_jobs=self.thread_count)
),
columns=compute_df_reindexed.index,
index=compute_df_reindexed.index
)
pairwise = pairwise.round(5)
column_labels = [
'{}{}'.format('d', i) for i in range(1, no_prev_pts + 1)
]
distances = pd.DataFrame(
columns=column_labels, index=compute_df.index
)
for index in compute_df.index[no_prev_pts:]:
current_row = pairwise.loc[[index]]
current_row_no_zeros = current_row.loc[
:, (current_row != 0).any(axis=0)
]
distances.loc[[index]] = current_row_no_zeros.iloc[
:, :no_prev_pts
]
distances = distances.replace([np.inf, -np.inf], np.nan)
drop_index = pd.isnull(distances).any(1)
distances = distances[drop_index == 0]
inliers = pd.DataFrame(index=distances.index)
for key in distances.keys():
current_distances = distances[key].dropna()
fit_params = stats.weibull_min.fit(current_distances)
quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
df_inlier = pd.DataFrame(
{key: quantiles}, index=distances.index
)
inliers = pd.concat(
[inliers, df_inlier], axis=1
)
inlier_metric = pd.DataFrame(
data=inliers.sum(axis=1) / no_prev_pts,
columns=['inlier_metric'],
index=compute_df.index
)
inlier_metric = (2 * (inlier_metric - inlier_metric.min()) /
(inlier_metric.max() - inlier_metric.min()) - 1)
if set_ in ('train', 'test'):
inlier_metric = inlier_metric.iloc[no_prev_pts:]
compute_df = compute_df.iloc[no_prev_pts:]
self.remove_beginning_points_from_data_dict(set_, no_prev_pts)
self.data_dictionary[f'{set_}_features'] = pd.concat(
[compute_df, inlier_metric], axis=1)
else:
self.data_dictionary['prediction_features'] = pd.concat(
[compute_df, inlier_metric], axis=1)
self.data_dictionary['prediction_features'].fillna(0, inplace=True)
logger.info('Inlier metric computed and added to features.')
return None
def remove_beginning_points_from_data_dict(self, set_='train', no_prev_pts: int = 10):
features = self.data_dictionary[f'{set_}_features']
weights = self.data_dictionary[f'{set_}_weights']
labels = self.data_dictionary[f'{set_}_labels']
self.data_dictionary[f'{set_}_weights'] = weights[no_prev_pts:]
self.data_dictionary[f'{set_}_features'] = features.iloc[no_prev_pts:]
self.data_dictionary[f'{set_}_labels'] = labels.iloc[no_prev_pts:]
def add_noise_to_training_features(self) -> None:
"""
Add noise to train features to reduce the risk of overfitting.
"""
mu = 0 # no shift
sigma = self.freqai_config["feature_parameters"]["noise_standard_deviation"]
compute_df = self.data_dictionary['train_features']
noise = np.random.normal(mu, sigma, [compute_df.shape[0], compute_df.shape[1]])
self.data_dictionary['train_features'] += noise
return
def find_features(self, dataframe: DataFrame) -> None:
"""
Find features in the strategy provided dataframe
@@ -848,6 +979,7 @@ class FreqaiDataKitchen:
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1)
self.return_dataframe = self.remove_training_from_backtesting()
self.full_df = DataFrame()
return
@@ -871,14 +1003,14 @@ class FreqaiDataKitchen:
"Please indicate the end date of your desired backtesting. "
"timerange.")
# backtest_timerange.stopts = int(
# datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
# datetime.now(tz=timezone.utc).timestamp()
# )
backtest_timerange.startts = (
backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
)
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
start = datetime.fromtimestamp(backtest_timerange.startts, tz=timezone.utc)
stop = datetime.fromtimestamp(backtest_timerange.stopts, tz=timezone.utc)
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
self.full_path = Path(
@@ -904,7 +1036,7 @@ class FreqaiDataKitchen:
:return:
bool = If the model is expired or not.
"""
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
time = datetime.now(tz=timezone.utc).timestamp()
elapsed_time = (time - trained_timestamp) / 3600 # hours
max_time = self.freqai_config.get("expiration_hours", 0)
if max_time > 0:
@@ -916,7 +1048,7 @@ class FreqaiDataKitchen:
self, trained_timestamp: int
) -> Tuple[bool, TimeRange, TimeRange]:
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
time = datetime.now(tz=timezone.utc).timestamp()
trained_timerange = TimeRange()
data_load_timerange = TimeRange()
@@ -1094,7 +1226,6 @@ class FreqaiDataKitchen:
def save_backtesting_prediction(
self, append_df: DataFrame
) -> None:
"""
Save prediction dataframe from backtesting to h5 file format
:param append_df: dataframe for backtesting period
@@ -1108,7 +1239,6 @@ class FreqaiDataKitchen:
def get_backtesting_prediction(
self
) -> DataFrame:
"""
Get prediction dataframe from h5 file format
"""

View File

@@ -1,13 +1,12 @@
# import contextlib
import datetime
import logging
import shutil
import threading
import time
from abc import ABC, abstractmethod
from datetime import datetime, timezone
from pathlib import Path
from threading import Lock
from typing import Any, Dict, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple
import numpy as np
import pandas as pd
@@ -15,6 +14,7 @@ from numpy.typing import NDArray
from pandas import DataFrame
from freqtrade.configuration import TimeRange
from freqtrade.constants import DATETIME_PRINT_FORMAT
from freqtrade.enums import RunMode
from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
@@ -27,13 +27,6 @@ pd.options.mode.chained_assignment = None
logger = logging.getLogger(__name__)
def threaded(fn):
def wrapper(*args, **kwargs):
threading.Thread(target=fn, args=args, kwargs=kwargs).start()
return wrapper
class IFreqaiModel(ABC):
"""
Class containing all tools for training and prediction in the strategy.
@@ -66,7 +59,6 @@ class IFreqaiModel(ABC):
"data_split_parameters", {})
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
"model_training_parameters", {})
self.feature_parameters = config.get("freqai", {}).get("feature_parameters")
self.retrain = False
self.first = True
self.set_full_path()
@@ -77,11 +69,14 @@ class IFreqaiModel(ABC):
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.scanning = False
self.ft_params = self.freqai_info["feature_parameters"]
self.keras: bool = self.freqai_info.get("keras", False)
if self.keras and self.freqai_info.get("feature_parameters", {}).get("DI_threshold", 0):
self.freqai_info["feature_parameters"]["DI_threshold"] = 0
if self.keras and self.ft_params.get("DI_threshold", 0):
self.ft_params["DI_threshold"] = 0
logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
if self.ft_params.get("inlier_metric_window", 0):
self.CONV_WIDTH = self.ft_params.get("inlier_metric_window", 0) * 2
self.pair_it = 0
self.pair_it_train = 0
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
@@ -93,6 +88,16 @@ class IFreqaiModel(ABC):
self.begin_time: float = 0
self.begin_time_train: float = 0
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
self.continual_learning = self.freqai_info.get('continual_learning', False)
self._threads: List[threading.Thread] = []
self._stop_event = threading.Event()
def __getstate__(self):
"""
Return an empty state to be pickled in hyperopt
"""
return ({})
self.strategy: Optional[IStrategy] = None
def assert_config(self, config: Dict[str, Any]) -> None:
@@ -148,15 +153,34 @@ class IFreqaiModel(ABC):
self.model = None
self.dk = None
@threaded
def start_scanning(self, strategy: IStrategy) -> None:
def shutdown(self):
"""
Cleans up threads on Shutdown, set stop event. Join threads to wait
for current training iteration.
"""
logger.info("Stopping FreqAI")
self._stop_event.set()
logger.info("Waiting on Training iteration")
for _thread in self._threads:
_thread.join()
def start_scanning(self, *args, **kwargs) -> None:
"""
Start `self._start_scanning` in a separate thread
"""
_thread = threading.Thread(target=self._start_scanning, args=args, kwargs=kwargs)
self._threads.append(_thread)
_thread.start()
def _start_scanning(self, strategy: IStrategy) -> None:
"""
Function designed to constantly scan pairs for retraining on a separate thread (intracandle)
to improve model youth. This function is agnostic to data preparation/collection/storage,
it simply trains on what ever data is available in the self.dd.
:param strategy: IStrategy = The user defined strategy class
"""
while 1:
while not self._stop_event.is_set():
time.sleep(1)
for pair in self.config.get("exchange", {}).get("pair_whitelist"):
@@ -175,7 +199,7 @@ class IFreqaiModel(ABC):
if retrain:
self.train_timer('start')
self.train_model_in_series(
self.extract_data_and_train_model(
new_trained_timerange, pair, strategy, dk, data_load_timerange
)
self.train_timer('stop')
@@ -215,12 +239,12 @@ class IFreqaiModel(ABC):
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train
tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
"%Y-%m-%d %H:%M:%S"
)
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
"%Y-%m-%d %H:%M:%S"
)
tr_train_startts_str = datetime.fromtimestamp(
tr_train.startts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
tr_train_stopts_str = datetime.fromtimestamp(
tr_train.stopts,
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
logger.info(
f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
@@ -405,24 +429,30 @@ class IFreqaiModel(ABC):
def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
"""
Base data cleaning method for train
Any function inside this method should drop training data points from the filtered_dataframe
based on user decided logic. See FreqaiDataKitchen::use_SVM_to_remove_outliers() for an
example of how outlier data points are dropped from the dataframe used for training.
Base data cleaning method for train.
Functions here improve/modify the input data by identifying outliers,
computing additional metrics, adding noise, reducing dimensionality etc.
"""
if self.freqai_info["feature_parameters"].get(
ft_params = self.freqai_info["feature_parameters"]
if ft_params.get('inlier_metric_window', 0):
dk.compute_inlier_metric(set_='train')
if self.freqai_info["data_split_parameters"]["test_size"] > 0:
dk.compute_inlier_metric(set_='test')
if ft_params.get(
"principal_component_analysis", False
):
dk.principal_component_analysis()
if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
if ft_params.get("use_SVM_to_remove_outliers", False):
dk.use_SVM_to_remove_outliers(predict=False)
if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
if ft_params.get("DI_threshold", 0):
dk.data["avg_mean_dist"] = dk.compute_distances()
if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
if dk.pair in self.dd.old_DBSCAN_eps:
eps = self.dd.old_DBSCAN_eps[dk.pair]
else:
@@ -430,29 +460,31 @@ class IFreqaiModel(ABC):
dk.use_DBSCAN_to_remove_outliers(predict=False, eps=eps)
self.dd.old_DBSCAN_eps[dk.pair] = dk.data['DBSCAN_eps']
if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
dk.add_noise_to_training_features()
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
"""
Base data cleaning method for predict.
These functions each modify dk.do_predict, which is a dataframe with equal length
to the number of candles coming from and returning to the strategy. Inside do_predict,
1 allows prediction and < 0 signals to the strategy that the model is not confident in
the prediction.
See FreqaiDataKitchen::remove_outliers() for an example
of how the do_predict vector is modified. do_predict is ultimately passed back to strategy
for buy signals.
Functions here are complementary to the functions of data_cleaning_train.
"""
if self.freqai_info["feature_parameters"].get(
ft_params = self.freqai_info["feature_parameters"]
if ft_params.get('inlier_metric_window', 0):
dk.compute_inlier_metric(set_='predict')
if ft_params.get(
"principal_component_analysis", False
):
dk.pca_transform(dataframe)
dk.pca_transform(self.dk.data_dictionary['prediction_features'])
if self.freqai_info["feature_parameters"].get("use_SVM_to_remove_outliers", False):
if ft_params.get("use_SVM_to_remove_outliers", False):
dk.use_SVM_to_remove_outliers(predict=True)
if self.freqai_info["feature_parameters"].get("DI_threshold", 0):
if ft_params.get("DI_threshold", 0):
dk.check_if_pred_in_training_spaces()
if self.freqai_info["feature_parameters"].get("use_DBSCAN_to_remove_outliers", False):
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
dk.use_DBSCAN_to_remove_outliers(predict=True)
def model_exists(
@@ -488,7 +520,7 @@ class IFreqaiModel(ABC):
Path(self.full_path, Path(self.config["config_files"][0]).name),
)
def train_model_in_series(
def extract_data_and_train_model(
self,
new_trained_timerange: TimeRange,
pair: str,
@@ -580,7 +612,7 @@ class IFreqaiModel(ABC):
# # for keras type models, the conv_window needs to be prepended so
# # viewing is correct in frequi
if self.freqai_info.get('keras', False):
if self.freqai_info.get('keras', False) or self.ft_params.get('inlier_metric_window', 0):
n_lost_points = self.freqai_info.get('conv_width', 2)
zeros_df = DataFrame(np.zeros((n_lost_points, len(hist_preds_df.columns))),
columns=hist_preds_df.columns)
@@ -646,21 +678,30 @@ class IFreqaiModel(ABC):
self.train_time = 0
return
def get_init_model(self, pair: str) -> Any:
if pair not in self.dd.model_dictionary or not self.continual_learning:
init_model = None
else:
init_model = self.dd.model_dictionary[pair]
return init_model
# Following methods which are overridden by user made prediction models.
# See freqai/prediction_models/CatboostPredictionModel.py for an example.
@abstractmethod
def train(self, unfiltered_dataframe: DataFrame, pair: str, dk: FreqaiDataKitchen) -> Any:
def train(self, unfiltered_df: DataFrame, pair: str,
dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Filter the training data and train a model to it. Train makes heavy use of the datahandler
for storing, saving, loading, and analyzing the data.
:param unfiltered_dataframe: Full dataframe for the current training period
:param unfiltered_df: Full dataframe for the current training period
:param metadata: pair metadata from strategy.
:return: Trained model which can be used to inference (self.predict)
"""
@abstractmethod
def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
def fit(self, data_dictionary: Dict[str, Any], dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Most regressors use the same function names and arguments e.g. user
can drop in LGBMRegressor in place of CatBoostRegressor and all data
@@ -673,11 +714,11 @@ class IFreqaiModel(ABC):
@abstractmethod
def predict(
self, dataframe: DataFrame, dk: FreqaiDataKitchen, first: bool = True
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_dataframe: Full dataframe for the current backtest period.
:param unfiltered_df: Full dataframe for the current backtest period.
:param dk: FreqaiDataKitchen = Data management/analysis tool associated to present pair only
:param first: boolean = whether this is the first prediction or not.
:return:

View File

@@ -3,7 +3,8 @@ from typing import Any, Dict
from catboost import CatBoostClassifier, Pool
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@@ -16,7 +17,7 @@ class CatboostClassifier(BaseClassifierModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
@@ -36,6 +37,8 @@ class CatboostClassifier(BaseClassifierModel):
**self.model_training_parameters,
)
cbr.fit(train_data)
init_model = self.get_init_model(dk.pair)
cbr.fit(train_data, init_model=init_model)
return cbr

View File

@@ -1,10 +1,10 @@
import gc
import logging
from typing import Any, Dict
from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@@ -17,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
@@ -38,16 +38,13 @@ class CatboostRegressor(BaseRegressionModel):
weight=data_dictionary["test_weights"],
)
init_model = self.get_init_model(dk.pair)
model = CatBoostRegressor(
allow_writing_files=False,
**self.model_training_parameters,
)
model.fit(X=train_data, eval_set=test_data)
# some evidence that catboost pools have memory leaks:
# https://github.com/catboost/catboost/issues/1835
del train_data, test_data
gc.collect()
model.fit(X=train_data, eval_set=test_data, init_model=init_model)
return model

View File

@@ -1,10 +1,11 @@
import logging
from typing import Any, Dict
from catboost import CatBoostRegressor # , Pool
from sklearn.multioutput import MultiOutputRegressor
from catboost import CatBoostRegressor, Pool
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@@ -17,7 +18,7 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
@@ -31,14 +32,37 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
sample_weight = data_dictionary["train_weights"]
model = MultiOutputRegressor(estimator=cbr)
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
train_score = model.score(X, y)
test_score = model.score(*eval_set)
logger.info(f"Train score {train_score}, Test score {test_score}")
eval_sets = [None] * data_dictionary['test_labels'].shape[1]
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"].iloc[:, i],
weight=data_dictionary["test_weights"],
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'init_model': init_models[i]})
model = FreqaiMultiOutputRegressor(estimator=cbr)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model

View File

@@ -3,7 +3,8 @@ from typing import Any, Dict
from lightgbm import LGBMClassifier
from freqtrade.freqai.prediction_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@@ -16,7 +17,7 @@ class LightGBMClassifier(BaseClassifierModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
@@ -35,9 +36,11 @@ class LightGBMClassifier(BaseClassifierModel):
y = data_dictionary["train_labels"].to_numpy()[:, 0]
train_weights = data_dictionary["train_weights"]
init_model = self.get_init_model(dk.pair)
model = LGBMClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
eval_sample_weight=[test_weights])
eval_sample_weight=[test_weights], init_model=init_model)
return model

View File

@@ -3,7 +3,8 @@ from typing import Any, Dict
from lightgbm import LGBMRegressor
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@@ -16,7 +17,7 @@ class LightGBMRegressor(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
Most regressors use the same function names and arguments e.g. user
can drop in LGBMRegressor in place of CatBoostRegressor and all data
@@ -35,9 +36,11 @@ class LightGBMRegressor(BaseRegressionModel):
y = data_dictionary["train_labels"]
train_weights = data_dictionary["train_weights"]
init_model = self.get_init_model(dk.pair)
model = LGBMRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
eval_sample_weight=[eval_weights])
eval_sample_weight=[eval_weights], init_model=init_model)
return model

View File

@@ -2,9 +2,10 @@ import logging
from typing import Any, Dict
from lightgbm import LGBMRegressor
from sklearn.multioutput import MultiOutputRegressor
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@@ -17,7 +18,7 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
@@ -28,12 +29,36 @@ class LightGBMRegressorMultiTarget(BaseRegressionModel):
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
sample_weight = data_dictionary["train_weights"]
model = MultiOutputRegressor(estimator=lgb)
model.fit(X=X, y=y, sample_weight=sample_weight) # , eval_set=eval_set)
train_score = model.score(X, y)
test_score = model.score(*eval_set)
logger.info(f"Train score {train_score}, Test score {test_score}")
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
eval_sets = [(None, None)] * data_dictionary['test_labels'].shape[1] # type: ignore
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = ( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'eval_sample_weight': eval_weights,
'init_model': init_models[i]})
model = FreqaiMultiOutputRegressor(estimator=lgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model

View File

@@ -0,0 +1,45 @@
import logging
from typing import Any, Dict
from xgboost import XGBRegressor
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostRegressor(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']]
sample_weight = data_dictionary["train_weights"]
xgb_model = self.get_init_model(dk.pair)
model = XGBRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
return model

View File

@@ -0,0 +1,63 @@
import logging
from typing import Any, Dict
from xgboost import XGBRegressor
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.base_models.FreqaiMultiOutputRegressor import FreqaiMultiOutputRegressor
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostRegressorMultiTarget(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
xgb = XGBRegressor(**self.model_training_parameters)
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
sample_weight = data_dictionary["train_weights"]
eval_weights = None
eval_sets = [None] * y.shape[1]
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
eval_weights = [data_dictionary["test_weights"]]
for i in range(data_dictionary['test_labels'].shape[1]):
eval_sets[i] = [( # type: ignore
data_dictionary["test_features"],
data_dictionary["test_labels"].iloc[:, i]
)]
init_model = self.get_init_model(dk.pair)
if init_model:
init_models = init_model.estimators_
else:
init_models = [None] * y.shape[1]
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'sample_weight_eval_set': eval_weights,
'xgb_model': init_models[i]})
model = FreqaiMultiOutputRegressor(estimator=xgb)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
if thread_training:
model.n_jobs = y.shape[1]
model.fit(X=X, y=y, sample_weight=sample_weight, fit_params=fit_params)
return model