merge from origin develop; and fix conflicts
This commit is contained in:
@@ -78,7 +78,7 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
@@ -77,7 +77,7 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
@@ -1,14 +1,15 @@
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Tuple, TypedDict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psutil
|
||||
import rapidjson
|
||||
from joblib import dump, load
|
||||
from joblib.externals import cloudpickle
|
||||
@@ -65,6 +66,8 @@ class FreqaiDataDrawer:
|
||||
self.pair_dict: Dict[str, pair_info] = {}
|
||||
# dictionary holding all actively inferenced models in memory given a model filename
|
||||
self.model_dictionary: Dict[str, Any] = {}
|
||||
# all additional metadata that we want to keep in ram
|
||||
self.meta_data_dictionary: Dict[str, Dict[str, Any]] = {}
|
||||
self.model_return_values: Dict[str, DataFrame] = {}
|
||||
self.historic_data: Dict[str, Dict[str, DataFrame]] = {}
|
||||
self.historic_predictions: Dict[str, DataFrame] = {}
|
||||
@@ -78,30 +81,60 @@ class FreqaiDataDrawer:
|
||||
self.historic_predictions_bkp_path = Path(
|
||||
self.full_path / "historic_predictions.backup.pkl")
|
||||
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
|
||||
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
|
||||
self.follow_mode = follow_mode
|
||||
if follow_mode:
|
||||
self.create_follower_dict()
|
||||
self.load_drawer_from_disk()
|
||||
self.load_historic_predictions_from_disk()
|
||||
self.load_metric_tracker_from_disk()
|
||||
self.training_queue: Dict[str, int] = {}
|
||||
self.history_lock = threading.Lock()
|
||||
self.save_lock = threading.Lock()
|
||||
self.pair_dict_lock = threading.Lock()
|
||||
self.metric_tracker_lock = threading.Lock()
|
||||
self.old_DBSCAN_eps: Dict[str, float] = {}
|
||||
self.empty_pair_dict: pair_info = {
|
||||
"model_filename": "", "trained_timestamp": 0,
|
||||
"data_path": "", "extras": {}}
|
||||
self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {}
|
||||
|
||||
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
|
||||
"""
|
||||
General utility for adding and updating custom metrics. Typically used
|
||||
for adding training performance, train timings, inferenc timings, cpu loads etc.
|
||||
"""
|
||||
with self.metric_tracker_lock:
|
||||
if pair not in self.metric_tracker:
|
||||
self.metric_tracker[pair] = {}
|
||||
if metric not in self.metric_tracker[pair]:
|
||||
self.metric_tracker[pair][metric] = {'timestamp': [], 'value': []}
|
||||
|
||||
timestamp = int(datetime.now(timezone.utc).timestamp())
|
||||
self.metric_tracker[pair][metric]['value'].append(value)
|
||||
self.metric_tracker[pair][metric]['timestamp'].append(timestamp)
|
||||
|
||||
def collect_metrics(self, time_spent: float, pair: str):
|
||||
"""
|
||||
Add metrics to the metric tracker dictionary
|
||||
"""
|
||||
load1, load5, load15 = psutil.getloadavg()
|
||||
cpus = psutil.cpu_count()
|
||||
self.update_metric_tracker('train_time', time_spent, pair)
|
||||
self.update_metric_tracker('cpu_load1min', load1 / cpus, pair)
|
||||
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
|
||||
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
|
||||
|
||||
def load_drawer_from_disk(self):
|
||||
"""
|
||||
Locate and load a previously saved data drawer full of all pair model metadata in
|
||||
present model folder.
|
||||
:return: bool - whether or not the drawer was located
|
||||
Load any existing metric tracker that may be present.
|
||||
"""
|
||||
exists = self.pair_dictionary_path.is_file()
|
||||
if exists:
|
||||
with open(self.pair_dictionary_path, "r") as fp:
|
||||
self.pair_dict = json.load(fp)
|
||||
self.pair_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
elif not self.follow_mode:
|
||||
logger.info("Could not find existing datadrawer, starting from scratch")
|
||||
else:
|
||||
@@ -110,7 +143,18 @@ class FreqaiDataDrawer:
|
||||
"sending null values back to strategy"
|
||||
)
|
||||
|
||||
return exists
|
||||
def load_metric_tracker_from_disk(self):
|
||||
"""
|
||||
Tries to load an existing metrics dictionary if the user
|
||||
wants to collect metrics.
|
||||
"""
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
exists = self.metric_tracker_path.is_file()
|
||||
if exists:
|
||||
with open(self.metric_tracker_path, "r") as fp:
|
||||
self.metric_tracker = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
else:
|
||||
logger.info("Could not find existing metric tracker, starting from scratch")
|
||||
|
||||
def load_historic_predictions_from_disk(self):
|
||||
"""
|
||||
@@ -146,7 +190,7 @@ class FreqaiDataDrawer:
|
||||
|
||||
def save_historic_predictions_to_disk(self):
|
||||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
Save historic predictions pickle to disk
|
||||
"""
|
||||
with open(self.historic_predictions_path, "wb") as fp:
|
||||
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
|
||||
@@ -154,6 +198,15 @@ class FreqaiDataDrawer:
|
||||
# create a backup
|
||||
shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path)
|
||||
|
||||
def save_metric_tracker_to_disk(self):
|
||||
"""
|
||||
Save metric tracker of all pair metrics collected.
|
||||
"""
|
||||
with self.save_lock:
|
||||
with open(self.metric_tracker_path, 'w') as fp:
|
||||
rapidjson.dump(self.metric_tracker, fp, default=self.np_encoder,
|
||||
number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
def save_drawer_to_disk(self):
|
||||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
@@ -412,9 +465,8 @@ class FreqaiDataDrawer:
|
||||
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Saves all data associated with a model for a single sub-train time range
|
||||
:params:
|
||||
:model: User trained model which can be reused for inferencing to generate
|
||||
predictions
|
||||
:param model: User trained model which can be reused for inferencing to generate
|
||||
predictions
|
||||
"""
|
||||
|
||||
if not dk.data_path.is_dir():
|
||||
@@ -454,9 +506,14 @@ class FreqaiDataDrawer:
|
||||
)
|
||||
|
||||
# if self.live:
|
||||
# store as much in ram as possible to increase performance
|
||||
self.model_dictionary[coin] = model
|
||||
self.pair_dict[coin]["model_filename"] = dk.model_filename
|
||||
self.pair_dict[coin]["data_path"] = str(dk.data_path)
|
||||
if coin not in self.meta_data_dictionary:
|
||||
self.meta_data_dictionary[coin] = {}
|
||||
self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
|
||||
self.meta_data_dictionary[coin]["meta_data"] = dk.data
|
||||
self.save_drawer_to_disk()
|
||||
|
||||
return
|
||||
@@ -467,7 +524,7 @@ class FreqaiDataDrawer:
|
||||
presaved backtesting (prediction file loading).
|
||||
"""
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = json.load(fp)
|
||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
@@ -493,14 +550,19 @@ class FreqaiDataDrawer:
|
||||
/ dk.data_path.parts[-1]
|
||||
)
|
||||
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = json.load(fp)
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
if coin in self.meta_data_dictionary:
|
||||
dk.data = self.meta_data_dictionary[coin]["meta_data"]
|
||||
dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
|
||||
else:
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
dk.data_dictionary["train_features"] = pd.read_pickle(
|
||||
dk.data_path / f"{dk.model_filename}_trained_df.pkl.bz2"
|
||||
)
|
||||
dk.data_dictionary["train_features"] = pd.read_pickle(
|
||||
dk.data_path / f"{dk.model_filename}_trained_df.pkl.bz2"
|
||||
)
|
||||
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
# try to access model in memory instead of loading object from disk to save time
|
||||
if dk.live and coin in self.model_dictionary:
|
||||
@@ -532,8 +594,7 @@ class FreqaiDataDrawer:
|
||||
Append new candles to our stores historic data (in memory) so that
|
||||
we do not need to load candle history from disk and we dont need to
|
||||
pinging exchange multiple times for the same candle.
|
||||
:params:
|
||||
dataframe: DataFrame = strategy provided dataframe
|
||||
:param dataframe: DataFrame = strategy provided dataframe
|
||||
"""
|
||||
feat_params = self.freqai_info["feature_parameters"]
|
||||
with self.history_lock:
|
||||
@@ -579,9 +640,8 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
Load pair histories for all whitelist and corr_pairlist pairs.
|
||||
Only called once upon startup of bot.
|
||||
:params:
|
||||
timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
:param timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
"""
|
||||
history_data = self.historic_data
|
||||
|
||||
@@ -604,10 +664,9 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
Searches through our historic_data in memory and returns the dataframes relevant
|
||||
to the present pair.
|
||||
:params:
|
||||
timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
:param timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
"""
|
||||
with self.history_lock:
|
||||
corr_dataframes: Dict[Any, Any] = {}
|
||||
@@ -631,22 +690,3 @@ class FreqaiDataDrawer:
|
||||
).reset_index(drop=True)
|
||||
|
||||
return corr_dataframes, base_dataframes
|
||||
|
||||
# to be used if we want to send predictions directly to the follower instead of forcing
|
||||
# follower to load models and inference
|
||||
# def save_model_return_values_to_disk(self) -> None:
|
||||
# with open(self.full_path / str('model_return_values.json'), "w") as fp:
|
||||
# json.dump(self.model_return_values, fp, default=self.np_encoder)
|
||||
|
||||
# def load_model_return_values_from_disk(self, dk: FreqaiDataKitchen) -> FreqaiDataKitchen:
|
||||
# exists = Path(self.full_path / str('model_return_values.json')).resolve().exists()
|
||||
# if exists:
|
||||
# with open(self.full_path / str('model_return_values.json'), "r") as fp:
|
||||
# self.model_return_values = json.load(fp)
|
||||
# elif not self.follow_mode:
|
||||
# logger.info("Could not find existing datadrawer, starting from scratch")
|
||||
# else:
|
||||
# logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
|
||||
# 'sending null values back to strategy')
|
||||
|
||||
# return exists, dk
|
||||
|
@@ -107,9 +107,8 @@ class FreqaiDataKitchen:
|
||||
) -> None:
|
||||
"""
|
||||
Set the paths to the data for the present coin/botloop
|
||||
:params:
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
trained_timestamp: int = timestamp of most recent training
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:param trained_timestamp: int = timestamp of most recent training
|
||||
"""
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
|
||||
@@ -129,8 +128,8 @@ class FreqaiDataKitchen:
|
||||
Given the dataframe for the full history for training, split the data into
|
||||
training and test data according to user specified parameters in configuration
|
||||
file.
|
||||
:filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:labels: cleaned labels ready to be split.
|
||||
:param filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:param labels: cleaned labels ready to be split.
|
||||
"""
|
||||
feat_dict = self.freqai_config["feature_parameters"]
|
||||
|
||||
@@ -189,13 +188,14 @@ class FreqaiDataKitchen:
|
||||
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_df: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
|
||||
:param unfiltered_df: the full dataframe for the present training period
|
||||
:param training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified
|
||||
parameters in the configuration file.
|
||||
:param labels: the labels for the dataset
|
||||
:param training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
:returns:
|
||||
:filtered_df: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
@@ -241,6 +241,7 @@ class FreqaiDataKitchen:
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
@@ -285,8 +286,8 @@ class FreqaiDataKitchen:
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:params:
|
||||
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
@@ -460,6 +461,24 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Check that prediction feature labels match training feature labels.
|
||||
:params:
|
||||
:df_predictions: incoming predictions
|
||||
"""
|
||||
train_labels = self.data_dictionary["train_features"].columns
|
||||
pred_labels = df_predictions.columns
|
||||
num_diffs = len(pred_labels.difference(train_labels))
|
||||
if num_diffs != 0:
|
||||
df_predictions = df_predictions[train_labels]
|
||||
logger.warning(
|
||||
f"Removed {num_diffs} features from prediction features, "
|
||||
f"these were likely considered constant values during most recent training."
|
||||
)
|
||||
|
||||
return df_predictions
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -516,8 +535,7 @@ class FreqaiDataKitchen:
|
||||
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
|
||||
"""
|
||||
Use an existing pca transform to transform data into components
|
||||
:params:
|
||||
filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
:param filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
"""
|
||||
pca_components = self.pca.transform(filtered_dataframe)
|
||||
self.data_dictionary["prediction_features"] = pd.DataFrame(
|
||||
@@ -561,8 +579,7 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
Build/inference a Support Vector Machine to detect outliers
|
||||
in training data and prediction
|
||||
:params:
|
||||
predict: bool = If true, inference an existing SVM model, else construct one
|
||||
:param predict: bool = If true, inference an existing SVM model, else construct one
|
||||
"""
|
||||
|
||||
if self.keras:
|
||||
@@ -647,11 +664,11 @@ class FreqaiDataKitchen:
|
||||
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
|
||||
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
|
||||
pct of training data that they want to be considered outliers.
|
||||
:params:
|
||||
predict: bool = If False (training), iterate to find the best hyper parameters to match
|
||||
user requested outlier percent target. If True (prediction), use the parameters
|
||||
determined from the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
:param predict: bool = If False (training), iterate to find the best hyper parameters
|
||||
to match user requested outlier percent target.
|
||||
If True (prediction), use the parameters determined from
|
||||
the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
"""
|
||||
|
||||
if predict:
|
||||
@@ -1132,15 +1149,13 @@ class FreqaiDataKitchen:
|
||||
prediction_dataframe: DataFrame = pd.DataFrame(),
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Use the user defined strategy for populating indicators during
|
||||
retrain
|
||||
:params:
|
||||
strategy: IStrategy = user defined strategy object
|
||||
corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
Use the user defined strategy for populating indicators during retrain
|
||||
:param strategy: IStrategy = user defined strategy object
|
||||
:param corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:returns:
|
||||
dataframe: DataFrame = dataframe containing populated indicators
|
||||
"""
|
||||
|
@@ -7,7 +7,7 @@ from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, List, Tuple
|
||||
from typing import Any, Dict, List, Literal, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@@ -144,7 +144,7 @@ class IFreqaiModel(ABC):
|
||||
dataframe = dk.remove_features_from_df(dk.return_dataframe)
|
||||
self.clean_up()
|
||||
if self.live:
|
||||
self.inference_timer('stop')
|
||||
self.inference_timer('stop', metadata["pair"])
|
||||
return dataframe
|
||||
|
||||
def clean_up(self):
|
||||
@@ -196,16 +196,15 @@ class IFreqaiModel(ABC):
|
||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
||||
|
||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||
dk.set_paths(pair, trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
try:
|
||||
self.extract_data_and_train_model(
|
||||
new_trained_timerange, pair, strategy, dk, data_load_timerange
|
||||
@@ -214,12 +213,14 @@ class IFreqaiModel(ABC):
|
||||
logger.warning(f"Training {pair} raised exception {msg.__class__.__name__}. "
|
||||
f"Message: {msg}, skipping.")
|
||||
|
||||
self.train_timer('stop')
|
||||
self.train_timer('stop', pair)
|
||||
|
||||
# only rotate the queue after the first has been trained.
|
||||
self.train_queue.rotate(-1)
|
||||
|
||||
self.dd.save_historic_predictions_to_disk()
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
self.dd.save_metric_tracker_to_disk()
|
||||
|
||||
def start_backtesting(
|
||||
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
|
||||
@@ -268,9 +269,7 @@ class IFreqaiModel(ABC):
|
||||
)
|
||||
|
||||
trained_timestamp_int = int(trained_timestamp.stopts)
|
||||
dk.data_path = Path(
|
||||
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
|
||||
)
|
||||
dk.set_paths(pair, trained_timestamp_int)
|
||||
|
||||
dk.set_new_model_names(pair, trained_timestamp)
|
||||
|
||||
@@ -603,11 +602,11 @@ class IFreqaiModel(ABC):
|
||||
If the user reuses an identifier on a subsequent instance,
|
||||
this function will not be called. In that case, "real" predictions
|
||||
will be appended to the loaded set of historic predictions.
|
||||
:param: df: DataFrame = the dataframe containing the training feature data
|
||||
:param: model: Any = A model which was `fit` using a common library such as
|
||||
catboost or lightgbm
|
||||
:param: dk: FreqaiDataKitchen = object containing methods for data analysis
|
||||
:param: pair: str = current pair
|
||||
:param df: DataFrame = the dataframe containing the training feature data
|
||||
:param model: Any = A model which was `fit` using a common library such as
|
||||
catboost or lightgbm
|
||||
:param dk: FreqaiDataKitchen = object containing methods for data analysis
|
||||
:param pair: str = current pair
|
||||
"""
|
||||
|
||||
self.dd.historic_predictions[pair] = pred_df
|
||||
@@ -658,7 +657,7 @@ class IFreqaiModel(ABC):
|
||||
|
||||
return
|
||||
|
||||
def inference_timer(self, do='start'):
|
||||
def inference_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
|
||||
"""
|
||||
Timer designed to track the cumulative time spent in FreqAI for one pass through
|
||||
the whitelist. This will check if the time spent is more than 1/4 the time
|
||||
@@ -669,7 +668,10 @@ class IFreqaiModel(ABC):
|
||||
self.begin_time = time.time()
|
||||
elif do == 'stop':
|
||||
end = time.time()
|
||||
self.inference_time += (end - self.begin_time)
|
||||
time_spent = (end - self.begin_time)
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
self.dd.update_metric_tracker('inference_time', time_spent, pair)
|
||||
self.inference_time += time_spent
|
||||
if self.pair_it == self.total_pairs:
|
||||
logger.info(
|
||||
f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
|
||||
@@ -680,7 +682,7 @@ class IFreqaiModel(ABC):
|
||||
self.inference_time = 0
|
||||
return
|
||||
|
||||
def train_timer(self, do='start'):
|
||||
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
|
||||
"""
|
||||
Timer designed to track the cumulative time spent training the full pairlist in
|
||||
FreqAI.
|
||||
@@ -690,7 +692,11 @@ class IFreqaiModel(ABC):
|
||||
self.begin_time_train = time.time()
|
||||
elif do == 'stop':
|
||||
end = time.time()
|
||||
self.train_time += (end - self.begin_time_train)
|
||||
time_spent = (end - self.begin_time_train)
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
self.dd.collect_metrics(time_spent, pair)
|
||||
|
||||
self.train_time += time_spent
|
||||
if self.pair_it_train == self.total_pairs:
|
||||
logger.info(
|
||||
f'Total time spent training pairlist {self.train_time:.2f} seconds')
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
@@ -20,9 +21,8 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
train_data = Pool(
|
||||
@@ -30,15 +30,24 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
label=data_dictionary["train_labels"],
|
||||
weight=data_dictionary["train_weights"],
|
||||
)
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
test_data = None
|
||||
else:
|
||||
test_data = Pool(
|
||||
data=data_dictionary["test_features"],
|
||||
label=data_dictionary["test_labels"],
|
||||
weight=data_dictionary["test_weights"],
|
||||
)
|
||||
|
||||
cbr = CatBoostClassifier(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
loss_function='MultiClass',
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
cbr.fit(train_data, init_model=init_model)
|
||||
cbr.fit(X=train_data, eval_set=test_data, init_model=init_model)
|
||||
|
||||
return cbr
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
@@ -41,7 +42,8 @@ class CatboostRegressor(BaseRegressionModel):
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
|
@@ -1,4 +1,5 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
@@ -26,7 +27,8 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
|
||||
cbr = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
|
@@ -20,9 +20,8 @@ class LightGBMClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
|
@@ -26,9 +26,8 @@ class XGBoostClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
@@ -65,7 +64,7 @@ class XGBoostClassifier(BaseClassifierModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
85
freqtrade/freqai/prediction_models/XGBoostRFClassifier.py
Normal file
85
freqtrade/freqai/prediction_models/XGBoostRFClassifier.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from pandas.api.types import is_integer_dtype
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from xgboost import XGBRFClassifier
|
||||
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRFClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
y = data_dictionary["train_labels"].to_numpy()[:, 0]
|
||||
|
||||
le = LabelEncoder()
|
||||
if not is_integer_dtype(y):
|
||||
y = pd.Series(le.fit_transform(y), dtype="int64")
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
eval_set = None
|
||||
else:
|
||||
test_features = data_dictionary["test_features"].to_numpy()
|
||||
test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
|
||||
|
||||
if not is_integer_dtype(test_labels):
|
||||
test_labels = pd.Series(le.transform(test_labels), dtype="int64")
|
||||
|
||||
eval_set = [(test_features, test_labels)]
|
||||
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBRFClassifier(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
xgb_model=init_model)
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
|
||||
|
||||
le = LabelEncoder()
|
||||
label = dk.label_list[0]
|
||||
labels_before = list(dk.data['labels_std'].keys())
|
||||
labels_after = le.fit_transform(labels_before).tolist()
|
||||
pred_df[label] = le.inverse_transform(pred_df[label])
|
||||
pred_df = pred_df.rename(
|
||||
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
|
||||
|
||||
return (pred_df, dk.do_predict)
|
45
freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
Normal file
45
freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from xgboost import XGBRFRegressor
|
||||
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRFRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
eval_set = None
|
||||
else:
|
||||
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
|
||||
eval_weights = [data_dictionary['test_weights']]
|
||||
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
xgb_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBRFRegressor(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
|
||||
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
|
||||
|
||||
return model
|
Reference in New Issue
Block a user