Merge remote-tracking branch 'origin/develop' into add-metric-tracker
This commit is contained in:
@@ -78,7 +78,7 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
@@ -77,7 +77,7 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
@@ -461,9 +461,8 @@ class FreqaiDataDrawer:
|
||||
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Saves all data associated with a model for a single sub-train time range
|
||||
:params:
|
||||
:model: User trained model which can be reused for inferencing to generate
|
||||
predictions
|
||||
:param model: User trained model which can be reused for inferencing to generate
|
||||
predictions
|
||||
"""
|
||||
|
||||
if not dk.data_path.is_dir():
|
||||
@@ -581,8 +580,7 @@ class FreqaiDataDrawer:
|
||||
Append new candles to our stores historic data (in memory) so that
|
||||
we do not need to load candle history from disk and we dont need to
|
||||
pinging exchange multiple times for the same candle.
|
||||
:params:
|
||||
dataframe: DataFrame = strategy provided dataframe
|
||||
:param dataframe: DataFrame = strategy provided dataframe
|
||||
"""
|
||||
feat_params = self.freqai_info["feature_parameters"]
|
||||
with self.history_lock:
|
||||
@@ -628,9 +626,8 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
Load pair histories for all whitelist and corr_pairlist pairs.
|
||||
Only called once upon startup of bot.
|
||||
:params:
|
||||
timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
:param timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
"""
|
||||
history_data = self.historic_data
|
||||
|
||||
@@ -653,10 +650,9 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
Searches through our historic_data in memory and returns the dataframes relevant
|
||||
to the present pair.
|
||||
:params:
|
||||
timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
:param timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
"""
|
||||
with self.history_lock:
|
||||
corr_dataframes: Dict[Any, Any] = {}
|
||||
|
@@ -107,9 +107,8 @@ class FreqaiDataKitchen:
|
||||
) -> None:
|
||||
"""
|
||||
Set the paths to the data for the present coin/botloop
|
||||
:params:
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
trained_timestamp: int = timestamp of most recent training
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:param trained_timestamp: int = timestamp of most recent training
|
||||
"""
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
|
||||
@@ -129,8 +128,8 @@ class FreqaiDataKitchen:
|
||||
Given the dataframe for the full history for training, split the data into
|
||||
training and test data according to user specified parameters in configuration
|
||||
file.
|
||||
:filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:labels: cleaned labels ready to be split.
|
||||
:param filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:param labels: cleaned labels ready to be split.
|
||||
"""
|
||||
feat_dict = self.freqai_config["feature_parameters"]
|
||||
|
||||
@@ -189,13 +188,14 @@ class FreqaiDataKitchen:
|
||||
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_df: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
|
||||
:param unfiltered_df: the full dataframe for the present training period
|
||||
:param training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified
|
||||
parameters in the configuration file.
|
||||
:param labels: the labels for the dataset
|
||||
:param training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
:returns:
|
||||
:filtered_df: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
@@ -241,6 +241,7 @@ class FreqaiDataKitchen:
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
@@ -285,8 +286,8 @@ class FreqaiDataKitchen:
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:params:
|
||||
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
@@ -460,6 +461,24 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Check that prediction feature labels match training feature labels.
|
||||
:params:
|
||||
:df_predictions: incoming predictions
|
||||
"""
|
||||
train_labels = self.data_dictionary["train_features"].columns
|
||||
pred_labels = df_predictions.columns
|
||||
num_diffs = len(pred_labels.difference(train_labels))
|
||||
if num_diffs != 0:
|
||||
df_predictions = df_predictions[train_labels]
|
||||
logger.warning(
|
||||
f"Removed {num_diffs} features from prediction features, "
|
||||
f"these were likely considered constant values during most recent training."
|
||||
)
|
||||
|
||||
return df_predictions
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -516,8 +535,7 @@ class FreqaiDataKitchen:
|
||||
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
|
||||
"""
|
||||
Use an existing pca transform to transform data into components
|
||||
:params:
|
||||
filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
:param filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
"""
|
||||
pca_components = self.pca.transform(filtered_dataframe)
|
||||
self.data_dictionary["prediction_features"] = pd.DataFrame(
|
||||
@@ -561,8 +579,7 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
Build/inference a Support Vector Machine to detect outliers
|
||||
in training data and prediction
|
||||
:params:
|
||||
predict: bool = If true, inference an existing SVM model, else construct one
|
||||
:param predict: bool = If true, inference an existing SVM model, else construct one
|
||||
"""
|
||||
|
||||
if self.keras:
|
||||
@@ -647,11 +664,11 @@ class FreqaiDataKitchen:
|
||||
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
|
||||
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
|
||||
pct of training data that they want to be considered outliers.
|
||||
:params:
|
||||
predict: bool = If False (training), iterate to find the best hyper parameters to match
|
||||
user requested outlier percent target. If True (prediction), use the parameters
|
||||
determined from the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
:param predict: bool = If False (training), iterate to find the best hyper parameters
|
||||
to match user requested outlier percent target.
|
||||
If True (prediction), use the parameters determined from
|
||||
the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
"""
|
||||
|
||||
if predict:
|
||||
@@ -1118,15 +1135,13 @@ class FreqaiDataKitchen:
|
||||
prediction_dataframe: DataFrame = pd.DataFrame(),
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Use the user defined strategy for populating indicators during
|
||||
retrain
|
||||
:params:
|
||||
strategy: IStrategy = user defined strategy object
|
||||
corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
Use the user defined strategy for populating indicators during retrain
|
||||
:param strategy: IStrategy = user defined strategy object
|
||||
:param corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:returns:
|
||||
dataframe: DataFrame = dataframe containing populated indicators
|
||||
"""
|
||||
|
@@ -196,16 +196,15 @@ class IFreqaiModel(ABC):
|
||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
||||
|
||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||
dk.set_paths(pair, trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
try:
|
||||
self.extract_data_and_train_model(
|
||||
new_trained_timerange, pair, strategy, dk, data_load_timerange
|
||||
@@ -270,9 +269,7 @@ class IFreqaiModel(ABC):
|
||||
)
|
||||
|
||||
trained_timestamp_int = int(trained_timestamp.stopts)
|
||||
dk.data_path = Path(
|
||||
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
|
||||
)
|
||||
dk.set_paths(pair, trained_timestamp_int)
|
||||
|
||||
dk.set_new_model_names(pair, trained_timestamp)
|
||||
|
||||
@@ -605,11 +602,11 @@ class IFreqaiModel(ABC):
|
||||
If the user reuses an identifier on a subsequent instance,
|
||||
this function will not be called. In that case, "real" predictions
|
||||
will be appended to the loaded set of historic predictions.
|
||||
:param: df: DataFrame = the dataframe containing the training feature data
|
||||
:param: model: Any = A model which was `fit` using a common library such as
|
||||
catboost or lightgbm
|
||||
:param: dk: FreqaiDataKitchen = object containing methods for data analysis
|
||||
:param: pair: str = current pair
|
||||
:param df: DataFrame = the dataframe containing the training feature data
|
||||
:param model: Any = A model which was `fit` using a common library such as
|
||||
catboost or lightgbm
|
||||
:param dk: FreqaiDataKitchen = object containing methods for data analysis
|
||||
:param pair: str = current pair
|
||||
"""
|
||||
|
||||
self.dd.historic_predictions[pair] = pred_df
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
@@ -20,9 +21,8 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
train_data = Pool(
|
||||
@@ -32,8 +32,9 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
)
|
||||
|
||||
cbr = CatBoostClassifier(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
loss_function='MultiClass',
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
@@ -41,7 +42,8 @@ class CatboostRegressor(BaseRegressionModel):
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
@@ -26,7 +27,8 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
|
||||
cbr = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
|
@@ -20,9 +20,8 @@ class LightGBMClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
|
@@ -26,9 +26,8 @@ class XGBoostClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
@@ -65,7 +64,7 @@ class XGBoostClassifier(BaseClassifierModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
Reference in New Issue
Block a user