Merge remote-tracking branch 'origin/develop' into dev-merge-rl
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from time import time
|
||||
from typing import Any, Tuple
|
||||
|
||||
import numpy as np
|
||||
@@ -32,7 +33,9 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info("-------------------- Starting training " f"{pair} --------------------")
|
||||
logger.info(f"-------------------- Starting training {pair} --------------------")
|
||||
|
||||
start_time = time()
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
@@ -45,10 +48,10 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get('fit_live_predictions', 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -57,13 +60,16 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
f' features and {len(data_dictionary["train_features"])} data points'
|
||||
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
|
||||
)
|
||||
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
end_time = time()
|
||||
|
||||
logger.info(f"-------------------- Done training {pair} "
|
||||
f"({end_time - start_time:.2f} secs) --------------------")
|
||||
|
||||
return model
|
||||
|
||||
@@ -86,7 +92,7 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
filtered_df = dk.normalize_data_from_metadata(filtered_df)
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
|
||||
self.data_cleaning_predict(dk, filtered_df)
|
||||
self.data_cleaning_predict(dk)
|
||||
|
||||
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
||||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from time import time
|
||||
from typing import Any, Tuple
|
||||
|
||||
import numpy as np
|
||||
@@ -31,7 +32,9 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info("-------------------- Starting training " f"{pair} --------------------")
|
||||
logger.info(f"-------------------- Starting training {pair} --------------------")
|
||||
|
||||
start_time = time()
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
@@ -44,10 +47,10 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get('fit_live_predictions', 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -56,13 +59,16 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
f' features and {len(data_dictionary["train_features"])} data points'
|
||||
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
|
||||
)
|
||||
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
end_time = time()
|
||||
|
||||
logger.info(f"-------------------- Done training {pair} "
|
||||
f"({end_time - start_time:.2f} secs) --------------------")
|
||||
|
||||
return model
|
||||
|
||||
@@ -86,7 +92,7 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
dk.data_dictionary["prediction_features"] = filtered_df
|
||||
|
||||
# optional additional data cleaning/analysis
|
||||
self.data_cleaning_predict(dk, filtered_df)
|
||||
self.data_cleaning_predict(dk)
|
||||
|
||||
predictions = self.model.predict(dk.data_dictionary["prediction_features"])
|
||||
pred_df = DataFrame(predictions, columns=dk.label_list)
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from time import time
|
||||
from typing import Any
|
||||
|
||||
from pandas import DataFrame
|
||||
@@ -28,7 +29,9 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
:model: Trained model which can be used to inference (self.predict)
|
||||
"""
|
||||
|
||||
logger.info("-------------------- Starting training " f"{pair} --------------------")
|
||||
logger.info(f"-------------------- Starting training {pair} --------------------")
|
||||
|
||||
start_time = time()
|
||||
|
||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||
features_filtered, labels_filtered = dk.filter_features(
|
||||
@@ -41,10 +44,10 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
start_date = unfiltered_df["date"].iloc[0].strftime("%Y-%m-%d")
|
||||
end_date = unfiltered_df["date"].iloc[-1].strftime("%Y-%m-%d")
|
||||
logger.info(f"-------------------- Training on data from {start_date} to "
|
||||
f"{end_date}--------------------")
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get('fit_live_predictions', 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -53,12 +56,15 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
self.data_cleaning_train(dk)
|
||||
|
||||
logger.info(
|
||||
f'Training model on {len(dk.data_dictionary["train_features"].columns)}'
|
||||
f' features and {len(data_dictionary["train_features"])} data points'
|
||||
f"Training model on {len(dk.data_dictionary['train_features'].columns)} features"
|
||||
)
|
||||
logger.info(f"Training model on {len(data_dictionary['train_features'])} data points")
|
||||
|
||||
model = self.fit(data_dictionary, dk)
|
||||
|
||||
logger.info(f"--------------------done training {pair}--------------------")
|
||||
end_time = time()
|
||||
|
||||
logger.info(f"-------------------- Done training {pair} "
|
||||
f"({end_time - start_time:.2f} secs) --------------------")
|
||||
|
||||
return model
|
||||
|
@@ -1,4 +1,3 @@
|
||||
|
||||
from joblib import Parallel
|
||||
from sklearn.multioutput import MultiOutputRegressor, _fit_estimator
|
||||
from sklearn.utils.fixes import delayed
|
||||
|
@@ -314,6 +314,7 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
|
||||
dk.find_features(dataframe)
|
||||
dk.find_labels(dataframe)
|
||||
|
||||
full_labels = dk.label_list + dk.unique_class_list
|
||||
|
||||
@@ -377,7 +378,27 @@ class FreqaiDataDrawer:
|
||||
if self.config.get("freqai", {}).get("purge_old_models", False):
|
||||
self.purge_old_models()
|
||||
|
||||
# Functions pulled back from FreqaiDataKitchen because they relied on DataDrawer
|
||||
def save_metadata(self, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Saves only metadata for backtesting studies if user prefers
|
||||
not to save model data. This saves tremendous amounts of space
|
||||
for users generating huge studies.
|
||||
This is only active when `save_backtest_models`: false (not default)
|
||||
"""
|
||||
if not dk.data_path.is_dir():
|
||||
dk.data_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
save_path = Path(dk.data_path)
|
||||
|
||||
dk.data["data_path"] = str(dk.data_path)
|
||||
dk.data["model_filename"] = str(dk.model_filename)
|
||||
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
|
||||
dk.data["label_list"] = dk.label_list
|
||||
|
||||
with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp:
|
||||
rapidjson.dump(dk.data, fp, default=self.np_encoder, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
return
|
||||
|
||||
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
@@ -406,7 +427,7 @@ class FreqaiDataDrawer:
|
||||
|
||||
dk.data["data_path"] = str(dk.data_path)
|
||||
dk.data["model_filename"] = str(dk.model_filename)
|
||||
dk.data["training_features_list"] = list(dk.data_dictionary["train_features"].columns)
|
||||
dk.data["training_features_list"] = dk.training_features_list
|
||||
dk.data["label_list"] = dk.label_list
|
||||
# store the metadata
|
||||
with open(save_path / f"{dk.model_filename}_metadata.json", "w") as fp:
|
||||
@@ -434,6 +455,16 @@ class FreqaiDataDrawer:
|
||||
|
||||
return
|
||||
|
||||
def load_metadata(self, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Load only metadata into datakitchen to increase performance during
|
||||
presaved backtesting (prediction file loading).
|
||||
"""
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = json.load(fp)
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
def load_data(self, coin: str, dk: FreqaiDataKitchen) -> Any:
|
||||
"""
|
||||
loads all data required to make a prediction on a sub-train time range
|
||||
|
@@ -138,20 +138,15 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
feat_dict = self.freqai_config["feature_parameters"]
|
||||
|
||||
if 'shuffle' not in self.freqai_config['data_split_parameters']:
|
||||
self.freqai_config["data_split_parameters"].update({'shuffle': False})
|
||||
|
||||
weights: npt.ArrayLike
|
||||
if feat_dict.get("weight_factor", 0) > 0:
|
||||
weights = self.set_weights_higher_recent(len(filtered_dataframe))
|
||||
else:
|
||||
weights = np.ones(len(filtered_dataframe))
|
||||
|
||||
if feat_dict.get("stratify_training_data", 0) > 0:
|
||||
stratification = np.zeros(len(filtered_dataframe))
|
||||
for i in range(1, len(stratification)):
|
||||
if i % feat_dict.get("stratify_training_data", 0) == 0:
|
||||
stratification[i] = 1
|
||||
else:
|
||||
stratification = None
|
||||
|
||||
if self.freqai_config.get('data_split_parameters', {}).get('test_size', 0.1) != 0:
|
||||
(
|
||||
train_features,
|
||||
@@ -164,7 +159,6 @@ class FreqaiDataKitchen:
|
||||
filtered_dataframe[: filtered_dataframe.shape[0]],
|
||||
labels,
|
||||
weights,
|
||||
stratify=stratification,
|
||||
**self.config["freqai"]["data_split_parameters"],
|
||||
)
|
||||
else:
|
||||
@@ -214,7 +208,7 @@ class FreqaiDataKitchen:
|
||||
filtered_df = unfiltered_df.filter(training_feature_list, axis=1)
|
||||
filtered_df = filtered_df.replace([np.inf, -np.inf], np.nan)
|
||||
|
||||
drop_index = pd.isnull(filtered_df).any(1) # get the rows that have NaNs,
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1) # get the rows that have NaNs,
|
||||
drop_index = drop_index.replace(True, 1).replace(False, 0) # pep8 requirement.
|
||||
if (training_filter):
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
@@ -225,7 +219,7 @@ class FreqaiDataKitchen:
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
labels = unfiltered_df.filter(label_list, axis=1)
|
||||
drop_index_labels = pd.isnull(labels).any(1)
|
||||
drop_index_labels = pd.isnull(labels).any(axis=1)
|
||||
drop_index_labels = drop_index_labels.replace(True, 1).replace(False, 0)
|
||||
dates = unfiltered_df['date']
|
||||
filtered_df = filtered_df[
|
||||
@@ -253,7 +247,7 @@ class FreqaiDataKitchen:
|
||||
else:
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(1)
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
self.data["filter_drop_index_prediction"] = drop_index
|
||||
filtered_df.fillna(0, inplace=True)
|
||||
# replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction
|
||||
@@ -470,27 +464,6 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def remove_training_from_backtesting(
|
||||
self
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Function which takes the backtesting time range and
|
||||
remove training data from dataframe, keeping only the
|
||||
startup_candle_count candles
|
||||
"""
|
||||
startup_candle_count = self.config.get('startup_candle_count', 0)
|
||||
tf = self.config['timeframe']
|
||||
tr = self.config["timerange"]
|
||||
|
||||
backtesting_timerange = TimeRange.parse_timerange(tr)
|
||||
if startup_candle_count > 0 and backtesting_timerange:
|
||||
backtesting_timerange.subtract_start(timeframe_to_seconds(tf) * startup_candle_count)
|
||||
|
||||
start = datetime.fromtimestamp(backtesting_timerange.startts, tz=timezone.utc)
|
||||
df = self.return_dataframe
|
||||
df = df.loc[df["date"] >= start, :]
|
||||
return df
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -833,7 +806,7 @@ class FreqaiDataKitchen:
|
||||
:, :no_prev_pts
|
||||
]
|
||||
distances = distances.replace([np.inf, -np.inf], np.nan)
|
||||
drop_index = pd.isnull(distances).any(1)
|
||||
drop_index = pd.isnull(distances).any(axis=1)
|
||||
distances = distances[drop_index == 0]
|
||||
|
||||
inliers = pd.DataFrame(index=distances.index)
|
||||
@@ -856,7 +829,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
inlier_metric = pd.DataFrame(
|
||||
data=inliers.sum(axis=1) / no_prev_pts,
|
||||
columns=['inlier_metric'],
|
||||
columns=['%-inlier_metric'],
|
||||
index=compute_df.index
|
||||
)
|
||||
|
||||
@@ -906,11 +879,15 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
column_names = dataframe.columns
|
||||
features = [c for c in column_names if "%" in c]
|
||||
labels = [c for c in column_names if "&" in c]
|
||||
|
||||
if not features:
|
||||
raise OperationalException("Could not find any features!")
|
||||
|
||||
self.training_features_list = features
|
||||
|
||||
def find_labels(self, dataframe: DataFrame) -> None:
|
||||
column_names = dataframe.columns
|
||||
labels = [c for c in column_names if "&" in c]
|
||||
self.label_list = labels
|
||||
|
||||
def check_if_pred_in_training_spaces(self) -> None:
|
||||
@@ -998,8 +975,6 @@ class FreqaiDataKitchen:
|
||||
|
||||
to_keep = [col for col in dataframe.columns if not col.startswith("&")]
|
||||
self.return_dataframe = pd.concat([dataframe[to_keep], self.full_df], axis=1)
|
||||
|
||||
self.return_dataframe = self.remove_training_from_backtesting()
|
||||
self.full_df = DataFrame()
|
||||
|
||||
return
|
||||
@@ -1233,7 +1208,8 @@ class FreqaiDataKitchen:
|
||||
|
||||
def get_unique_classes_from_labels(self, dataframe: DataFrame) -> None:
|
||||
|
||||
self.find_features(dataframe)
|
||||
# self.find_features(dataframe)
|
||||
self.find_labels(dataframe)
|
||||
|
||||
for key in self.label_list:
|
||||
if dataframe[key].dtype == object:
|
||||
|
@@ -66,7 +66,7 @@ class IFreqaiModel(ABC):
|
||||
self.first = True
|
||||
self.set_full_path()
|
||||
self.follow_mode: bool = self.freqai_info.get("follow_mode", False)
|
||||
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", False)
|
||||
self.save_backtest_models: bool = self.freqai_info.get("save_backtest_models", True)
|
||||
if self.save_backtest_models:
|
||||
logger.info('Backtesting module configured to save all models.')
|
||||
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
|
||||
@@ -93,6 +93,7 @@ class IFreqaiModel(ABC):
|
||||
self.begin_time_train: float = 0
|
||||
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
|
||||
self.continual_learning = self.freqai_info.get('continual_learning', False)
|
||||
self.plot_features = self.ft_params.get("plot_feature_importances", 0)
|
||||
|
||||
self._threads: List[threading.Thread] = []
|
||||
self._stop_event = threading.Event()
|
||||
@@ -258,7 +259,8 @@ class IFreqaiModel(ABC):
|
||||
# following tr_train. Both of these windows slide through the
|
||||
# entire backtest
|
||||
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
|
||||
(_, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
|
||||
pair = metadata["pair"]
|
||||
(_, _, _) = self.dd.get_pair_dict_info(pair)
|
||||
train_it += 1
|
||||
total_trains = len(dk.backtesting_timeranges)
|
||||
self.training_timerange = tr_train
|
||||
@@ -273,39 +275,41 @@ class IFreqaiModel(ABC):
|
||||
tr_train.stopts,
|
||||
tz=timezone.utc).strftime(DATETIME_PRINT_FORMAT)
|
||||
logger.info(
|
||||
f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
|
||||
f"Training {pair}, {self.pair_it}/{self.total_pairs} pairs"
|
||||
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
|
||||
"trains"
|
||||
)
|
||||
|
||||
trained_timestamp_int = int(trained_timestamp.stopts)
|
||||
dk.data_path = Path(
|
||||
dk.full_path
|
||||
/
|
||||
f"sub-train-{metadata['pair'].split('/')[0]}_{trained_timestamp_int}"
|
||||
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
|
||||
)
|
||||
|
||||
dk.set_new_model_names(metadata["pair"], trained_timestamp)
|
||||
dk.set_new_model_names(pair, trained_timestamp)
|
||||
|
||||
if dk.check_if_backtest_prediction_exists():
|
||||
self.dd.load_metadata(dk)
|
||||
dk.find_features(dataframe_train)
|
||||
self.check_if_feature_list_matches_strategy(dk)
|
||||
append_df = dk.get_backtesting_prediction()
|
||||
dk.append_predictions(append_df)
|
||||
else:
|
||||
if not self.model_exists(
|
||||
metadata["pair"], dk, trained_timestamp=trained_timestamp_int
|
||||
):
|
||||
if not self.model_exists(dk):
|
||||
dk.find_features(dataframe_train)
|
||||
self.model = self.train(dataframe_train, metadata["pair"], dk)
|
||||
self.dd.pair_dict[metadata["pair"]]["trained_timestamp"] = int(
|
||||
dk.find_labels(dataframe_train)
|
||||
self.model = self.train(dataframe_train, pair, dk)
|
||||
self.dd.pair_dict[pair]["trained_timestamp"] = int(
|
||||
trained_timestamp.stopts)
|
||||
|
||||
if self.plot_features:
|
||||
plot_feature_importance(self.model, pair, dk, self.plot_features)
|
||||
if self.save_backtest_models:
|
||||
logger.info('Saving backtest model to disk.')
|
||||
self.dd.save_data(self.model, metadata["pair"], dk)
|
||||
self.dd.save_data(self.model, pair, dk)
|
||||
else:
|
||||
logger.info('Saving metadata to disk.')
|
||||
self.dd.save_metadata(dk)
|
||||
else:
|
||||
self.model = self.dd.load_data(metadata["pair"], dk)
|
||||
|
||||
self.check_if_feature_list_matches_strategy(dataframe_train, dk)
|
||||
self.model = self.dd.load_data(pair, dk)
|
||||
|
||||
pred_df, do_preds = self.predict(dataframe_backtest, dk)
|
||||
append_df = dk.get_predictions_to_append(pred_df, do_preds)
|
||||
@@ -385,8 +389,7 @@ class IFreqaiModel(ABC):
|
||||
self.dd.return_null_values_to_strategy(dataframe, dk)
|
||||
return dk
|
||||
|
||||
# ensure user is feeding the correct indicators to the model
|
||||
self.check_if_feature_list_matches_strategy(dataframe, dk)
|
||||
dk.find_labels(dataframe)
|
||||
|
||||
self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
|
||||
|
||||
@@ -431,7 +434,7 @@ class IFreqaiModel(ABC):
|
||||
return
|
||||
|
||||
def check_if_feature_list_matches_strategy(
|
||||
self, dataframe: DataFrame, dk: FreqaiDataKitchen
|
||||
self, dk: FreqaiDataKitchen
|
||||
) -> None:
|
||||
"""
|
||||
Ensure user is passing the proper feature set if they are reusing an `identifier` pointing
|
||||
@@ -440,18 +443,21 @@ class IFreqaiModel(ABC):
|
||||
:param dk: FreqaiDataKitchen = non-persistent data container/analyzer for
|
||||
current coin/bot loop
|
||||
"""
|
||||
dk.find_features(dataframe)
|
||||
|
||||
if "training_features_list_raw" in dk.data:
|
||||
feature_list = dk.data["training_features_list_raw"]
|
||||
else:
|
||||
feature_list = dk.training_features_list
|
||||
feature_list = dk.data['training_features_list']
|
||||
|
||||
if dk.training_features_list != feature_list:
|
||||
raise OperationalException(
|
||||
"Trying to access pretrained model with `identifier` "
|
||||
"but found different features furnished by current strategy."
|
||||
"Change `identifier` to train from scratch, or ensure the"
|
||||
"strategy is furnishing the same features as the pretrained"
|
||||
"model"
|
||||
"model. In case of --strategy-list, please be aware that FreqAI "
|
||||
"requires all strategies to maintain identical "
|
||||
"populate_any_indicator() functions"
|
||||
)
|
||||
|
||||
def data_cleaning_train(self, dk: FreqaiDataKitchen) -> None:
|
||||
@@ -490,20 +496,23 @@ class IFreqaiModel(ABC):
|
||||
if self.freqai_info["feature_parameters"].get('noise_standard_deviation', 0):
|
||||
dk.add_noise_to_training_features()
|
||||
|
||||
def data_cleaning_predict(self, dk: FreqaiDataKitchen, dataframe: DataFrame) -> None:
|
||||
def data_cleaning_predict(self, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Base data cleaning method for predict.
|
||||
Functions here are complementary to the functions of data_cleaning_train.
|
||||
"""
|
||||
ft_params = self.freqai_info["feature_parameters"]
|
||||
|
||||
# ensure user is feeding the correct indicators to the model
|
||||
self.check_if_feature_list_matches_strategy(dk)
|
||||
|
||||
if ft_params.get('inlier_metric_window', 0):
|
||||
dk.compute_inlier_metric(set_='predict')
|
||||
|
||||
if ft_params.get(
|
||||
"principal_component_analysis", False
|
||||
):
|
||||
dk.pca_transform(self.dk.data_dictionary['prediction_features'])
|
||||
dk.pca_transform(dk.data_dictionary['prediction_features'])
|
||||
|
||||
if ft_params.get("use_SVM_to_remove_outliers", False):
|
||||
dk.use_SVM_to_remove_outliers(predict=True)
|
||||
@@ -514,14 +523,7 @@ class IFreqaiModel(ABC):
|
||||
if ft_params.get("use_DBSCAN_to_remove_outliers", False):
|
||||
dk.use_DBSCAN_to_remove_outliers(predict=True)
|
||||
|
||||
def model_exists(
|
||||
self,
|
||||
pair: str,
|
||||
dk: FreqaiDataKitchen,
|
||||
trained_timestamp: int = None,
|
||||
model_filename: str = "",
|
||||
scanning: bool = False,
|
||||
) -> bool:
|
||||
def model_exists(self, dk: FreqaiDataKitchen) -> bool:
|
||||
"""
|
||||
Given a pair and path, check if a model already exists
|
||||
:param pair: pair e.g. BTC/USD
|
||||
@@ -529,11 +531,11 @@ class IFreqaiModel(ABC):
|
||||
:return:
|
||||
:boolean: whether the model file exists or not.
|
||||
"""
|
||||
path_to_modelfile = Path(dk.data_path / f"{model_filename}_model.joblib")
|
||||
path_to_modelfile = Path(dk.data_path / f"{dk.model_filename}_model.joblib")
|
||||
file_exists = path_to_modelfile.is_file()
|
||||
if file_exists and not scanning:
|
||||
if file_exists:
|
||||
logger.info("Found model at %s", dk.data_path / dk.model_filename)
|
||||
elif not scanning:
|
||||
else:
|
||||
logger.info("Could not find model at %s", dk.data_path / dk.model_filename)
|
||||
return file_exists
|
||||
|
||||
@@ -580,16 +582,16 @@ class IFreqaiModel(ABC):
|
||||
|
||||
# find the features indicated by strategy and store in datakitchen
|
||||
dk.find_features(unfiltered_dataframe)
|
||||
# import pytest
|
||||
# pytest.set_trace()
|
||||
dk.find_labels(unfiltered_dataframe)
|
||||
|
||||
model = self.train(unfiltered_dataframe, pair, dk)
|
||||
|
||||
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
|
||||
dk.set_new_model_names(pair, new_trained_timerange)
|
||||
self.dd.save_data(model, pair, dk)
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("plot_feature_importance", False):
|
||||
plot_feature_importance(model, pair, dk)
|
||||
if self.plot_features:
|
||||
plot_feature_importance(model, pair, dk, self.plot_features)
|
||||
|
||||
if self.freqai_info.get("purge_old_models", False):
|
||||
self.dd.purge_old_models()
|
||||
|
@@ -170,7 +170,7 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
|
||||
|
||||
# Data preparation
|
||||
fi_df = pd.DataFrame({
|
||||
"feature_names": np.array(dk.training_features_list),
|
||||
"feature_names": np.array(dk.data_dictionary['train_features'].columns),
|
||||
"feature_importance": np.array(feature_importance)
|
||||
})
|
||||
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
|
||||
|
Reference in New Issue
Block a user