merge develop into feat/freqai-rl-dev
This commit is contained in:
@@ -51,7 +51,7 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -78,7 +78,7 @@ class BaseClassifierModel(IFreqaiModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
@@ -50,7 +50,7 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
@@ -77,7 +77,7 @@ class BaseRegressionModel(IFreqaiModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
@@ -47,7 +47,7 @@ class BaseTensorFlowModel(IFreqaiModel):
|
||||
f"{end_date} --------------------")
|
||||
# split data into train/test data.
|
||||
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
|
||||
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
|
||||
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
|
||||
dk.fit_labels()
|
||||
# normalize all data based on train_dataset only
|
||||
data_dictionary = dk.normalize_data(data_dictionary)
|
||||
|
@@ -1,14 +1,15 @@
|
||||
import collections
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Tuple, TypedDict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import psutil
|
||||
import rapidjson
|
||||
from joblib import dump, load
|
||||
from joblib.externals import cloudpickle
|
||||
@@ -65,6 +66,8 @@ class FreqaiDataDrawer:
|
||||
self.pair_dict: Dict[str, pair_info] = {}
|
||||
# dictionary holding all actively inferenced models in memory given a model filename
|
||||
self.model_dictionary: Dict[str, Any] = {}
|
||||
# all additional metadata that we want to keep in ram
|
||||
self.meta_data_dictionary: Dict[str, Dict[str, Any]] = {}
|
||||
self.model_return_values: Dict[str, DataFrame] = {}
|
||||
self.historic_data: Dict[str, Dict[str, DataFrame]] = {}
|
||||
self.historic_predictions: Dict[str, DataFrame] = {}
|
||||
@@ -78,19 +81,49 @@ class FreqaiDataDrawer:
|
||||
self.historic_predictions_bkp_path = Path(
|
||||
self.full_path / "historic_predictions.backup.pkl")
|
||||
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
|
||||
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
|
||||
self.follow_mode = follow_mode
|
||||
if follow_mode:
|
||||
self.create_follower_dict()
|
||||
self.load_drawer_from_disk()
|
||||
self.load_historic_predictions_from_disk()
|
||||
self.load_metric_tracker_from_disk()
|
||||
self.training_queue: Dict[str, int] = {}
|
||||
self.history_lock = threading.Lock()
|
||||
self.save_lock = threading.Lock()
|
||||
self.pair_dict_lock = threading.Lock()
|
||||
self.metric_tracker_lock = threading.Lock()
|
||||
self.old_DBSCAN_eps: Dict[str, float] = {}
|
||||
self.empty_pair_dict: pair_info = {
|
||||
"model_filename": "", "trained_timestamp": 0,
|
||||
"data_path": "", "extras": {}}
|
||||
self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {}
|
||||
|
||||
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
|
||||
"""
|
||||
General utility for adding and updating custom metrics. Typically used
|
||||
for adding training performance, train timings, inferenc timings, cpu loads etc.
|
||||
"""
|
||||
with self.metric_tracker_lock:
|
||||
if pair not in self.metric_tracker:
|
||||
self.metric_tracker[pair] = {}
|
||||
if metric not in self.metric_tracker[pair]:
|
||||
self.metric_tracker[pair][metric] = {'timestamp': [], 'value': []}
|
||||
|
||||
timestamp = int(datetime.now(timezone.utc).timestamp())
|
||||
self.metric_tracker[pair][metric]['value'].append(value)
|
||||
self.metric_tracker[pair][metric]['timestamp'].append(timestamp)
|
||||
|
||||
def collect_metrics(self, time_spent: float, pair: str):
|
||||
"""
|
||||
Add metrics to the metric tracker dictionary
|
||||
"""
|
||||
load1, load5, load15 = psutil.getloadavg()
|
||||
cpus = psutil.cpu_count()
|
||||
self.update_metric_tracker('train_time', time_spent, pair)
|
||||
self.update_metric_tracker('cpu_load1min', load1 / cpus, pair)
|
||||
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
|
||||
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
|
||||
self.limit_ram_use = self.freqai_info.get('limit_ram_usage', False)
|
||||
if 'rl_config' in self.freqai_info:
|
||||
self.model_type = 'stable_baselines'
|
||||
@@ -103,12 +136,12 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
Locate and load a previously saved data drawer full of all pair model metadata in
|
||||
present model folder.
|
||||
:return: bool - whether or not the drawer was located
|
||||
Load any existing metric tracker that may be present.
|
||||
"""
|
||||
exists = self.pair_dictionary_path.is_file()
|
||||
if exists:
|
||||
with open(self.pair_dictionary_path, "r") as fp:
|
||||
self.pair_dict = json.load(fp)
|
||||
self.pair_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
elif not self.follow_mode:
|
||||
logger.info("Could not find existing datadrawer, starting from scratch")
|
||||
else:
|
||||
@@ -117,7 +150,18 @@ class FreqaiDataDrawer:
|
||||
"sending null values back to strategy"
|
||||
)
|
||||
|
||||
return exists
|
||||
def load_metric_tracker_from_disk(self):
|
||||
"""
|
||||
Tries to load an existing metrics dictionary if the user
|
||||
wants to collect metrics.
|
||||
"""
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
exists = self.metric_tracker_path.is_file()
|
||||
if exists:
|
||||
with open(self.metric_tracker_path, "r") as fp:
|
||||
self.metric_tracker = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
else:
|
||||
logger.info("Could not find existing metric tracker, starting from scratch")
|
||||
|
||||
def load_historic_predictions_from_disk(self):
|
||||
"""
|
||||
@@ -153,7 +197,7 @@ class FreqaiDataDrawer:
|
||||
|
||||
def save_historic_predictions_to_disk(self):
|
||||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
Save historic predictions pickle to disk
|
||||
"""
|
||||
with open(self.historic_predictions_path, "wb") as fp:
|
||||
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
|
||||
@@ -161,6 +205,15 @@ class FreqaiDataDrawer:
|
||||
# create a backup
|
||||
shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path)
|
||||
|
||||
def save_metric_tracker_to_disk(self):
|
||||
"""
|
||||
Save metric tracker of all pair metrics collected.
|
||||
"""
|
||||
with self.save_lock:
|
||||
with open(self.metric_tracker_path, 'w') as fp:
|
||||
rapidjson.dump(self.metric_tracker, fp, default=self.np_encoder,
|
||||
number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
def save_drawer_to_disk(self):
|
||||
"""
|
||||
Save data drawer full of all pair model metadata in present model folder.
|
||||
@@ -419,9 +472,8 @@ class FreqaiDataDrawer:
|
||||
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
|
||||
"""
|
||||
Saves all data associated with a model for a single sub-train time range
|
||||
:params:
|
||||
:model: User trained model which can be reused for inferencing to generate
|
||||
predictions
|
||||
:param model: User trained model which can be reused for inferencing to generate
|
||||
predictions
|
||||
"""
|
||||
|
||||
if not dk.data_path.is_dir():
|
||||
@@ -466,6 +518,10 @@ class FreqaiDataDrawer:
|
||||
self.model_dictionary[coin] = model
|
||||
self.pair_dict[coin]["model_filename"] = dk.model_filename
|
||||
self.pair_dict[coin]["data_path"] = str(dk.data_path)
|
||||
if coin not in self.meta_data_dictionary:
|
||||
self.meta_data_dictionary[coin] = {}
|
||||
self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
|
||||
self.meta_data_dictionary[coin]["meta_data"] = dk.data
|
||||
self.save_drawer_to_disk()
|
||||
|
||||
return
|
||||
@@ -476,7 +532,7 @@ class FreqaiDataDrawer:
|
||||
presaved backtesting (prediction file loading).
|
||||
"""
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = json.load(fp)
|
||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
@@ -502,14 +558,19 @@ class FreqaiDataDrawer:
|
||||
/ dk.data_path.parts[-1]
|
||||
)
|
||||
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = json.load(fp)
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
if coin in self.meta_data_dictionary:
|
||||
dk.data = self.meta_data_dictionary[coin]["meta_data"]
|
||||
dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
|
||||
else:
|
||||
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
|
||||
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
|
||||
|
||||
dk.data_dictionary["train_features"] = pd.read_pickle(
|
||||
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
|
||||
)
|
||||
dk.data_dictionary["train_features"] = pd.read_pickle(
|
||||
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
|
||||
)
|
||||
|
||||
dk.training_features_list = dk.data["training_features_list"]
|
||||
dk.label_list = dk.data["label_list"]
|
||||
|
||||
# try to access model in memory instead of loading object from disk to save time
|
||||
if dk.live and coin in self.model_dictionary and not self.limit_ram_use:
|
||||
@@ -549,8 +610,7 @@ class FreqaiDataDrawer:
|
||||
Append new candles to our stores historic data (in memory) so that
|
||||
we do not need to load candle history from disk and we dont need to
|
||||
pinging exchange multiple times for the same candle.
|
||||
:params:
|
||||
dataframe: DataFrame = strategy provided dataframe
|
||||
:param dataframe: DataFrame = strategy provided dataframe
|
||||
"""
|
||||
feat_params = self.freqai_info["feature_parameters"]
|
||||
with self.history_lock:
|
||||
@@ -596,9 +656,8 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
Load pair histories for all whitelist and corr_pairlist pairs.
|
||||
Only called once upon startup of bot.
|
||||
:params:
|
||||
timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
:param timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
"""
|
||||
history_data = self.historic_data
|
||||
|
||||
@@ -621,10 +680,9 @@ class FreqaiDataDrawer:
|
||||
"""
|
||||
Searches through our historic_data in memory and returns the dataframes relevant
|
||||
to the present pair.
|
||||
:params:
|
||||
timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
:param timerange: TimeRange = full timerange required to populate all indicators
|
||||
for training according to user defined train_period_days
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
"""
|
||||
with self.history_lock:
|
||||
corr_dataframes: Dict[Any, Any] = {}
|
||||
@@ -635,7 +693,8 @@ class FreqaiDataDrawer:
|
||||
)
|
||||
|
||||
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
|
||||
base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf])
|
||||
base_dataframes[tf] = dk.slice_dataframe(
|
||||
timerange, historic_data[pair][tf]).reset_index(drop=True)
|
||||
if pairs:
|
||||
for p in pairs:
|
||||
if pair in p:
|
||||
@@ -644,6 +703,6 @@ class FreqaiDataDrawer:
|
||||
corr_dataframes[p] = {}
|
||||
corr_dataframes[p][tf] = dk.slice_dataframe(
|
||||
timerange, historic_data[p][tf]
|
||||
)
|
||||
).reset_index(drop=True)
|
||||
|
||||
return corr_dataframes, base_dataframes
|
||||
|
@@ -111,9 +111,8 @@ class FreqaiDataKitchen:
|
||||
) -> None:
|
||||
"""
|
||||
Set the paths to the data for the present coin/botloop
|
||||
:params:
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
trained_timestamp: int = timestamp of most recent training
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:param trained_timestamp: int = timestamp of most recent training
|
||||
"""
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
|
||||
@@ -133,8 +132,8 @@ class FreqaiDataKitchen:
|
||||
Given the dataframe for the full history for training, split the data into
|
||||
training and test data according to user specified parameters in configuration
|
||||
file.
|
||||
:filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:labels: cleaned labels ready to be split.
|
||||
:param filtered_dataframe: cleaned dataframe ready to be split.
|
||||
:param labels: cleaned labels ready to be split.
|
||||
"""
|
||||
feat_dict = self.freqai_config["feature_parameters"]
|
||||
|
||||
@@ -193,13 +192,14 @@ class FreqaiDataKitchen:
|
||||
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
|
||||
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||
row that had a NaN and will shield user from that prediction.
|
||||
:params:
|
||||
:unfiltered_df: the full dataframe for the present training period
|
||||
:training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified parameters in the configuration file.
|
||||
:labels: the labels for the dataset
|
||||
:training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
|
||||
:param unfiltered_df: the full dataframe for the present training period
|
||||
:param training_feature_list: list, the training feature list constructed by
|
||||
self.build_feature_list() according to user specified
|
||||
parameters in the configuration file.
|
||||
:param labels: the labels for the dataset
|
||||
:param training_filter: boolean which lets the function know if it is training data or
|
||||
prediction data to be filtered.
|
||||
:returns:
|
||||
:filtered_df: dataframe cleaned of NaNs and only containing the user
|
||||
requested feature set.
|
||||
@@ -214,7 +214,10 @@ class FreqaiDataKitchen:
|
||||
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
|
||||
if const_cols:
|
||||
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
|
||||
self.data['constant_features_list'] = const_cols
|
||||
logger.warning(f"Removed features {const_cols} with constant values.")
|
||||
else:
|
||||
self.data['constant_features_list'] = []
|
||||
# we don't care about total row number (total no. datapoints) in training, we only care
|
||||
# about removing any row with NaNs
|
||||
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
|
||||
@@ -245,6 +248,8 @@ class FreqaiDataKitchen:
|
||||
self.data["filter_drop_index_training"] = drop_index
|
||||
|
||||
else:
|
||||
if len(self.data['constant_features_list']):
|
||||
filtered_df = self.check_pred_labels(filtered_df)
|
||||
# we are backtesting so we need to preserve row number to send back to strategy,
|
||||
# so now we use do_predict to avoid any prediction based on a NaN
|
||||
drop_index = pd.isnull(filtered_df).any(axis=1)
|
||||
@@ -289,8 +294,8 @@ class FreqaiDataKitchen:
|
||||
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
|
||||
"""
|
||||
Normalize all data in the data_dictionary according to the training dataset
|
||||
:params:
|
||||
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||
:param data_dictionary: dictionary containing the cleaned and
|
||||
split training/test data/labels
|
||||
:returns:
|
||||
:data_dictionary: updated dictionary with standardized values.
|
||||
"""
|
||||
@@ -464,6 +469,22 @@ class FreqaiDataKitchen:
|
||||
|
||||
return df
|
||||
|
||||
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
|
||||
"""
|
||||
Check that prediction feature labels match training feature labels.
|
||||
:param df_predictions: incoming predictions
|
||||
"""
|
||||
constant_labels = self.data['constant_features_list']
|
||||
df_predictions = df_predictions.filter(
|
||||
df_predictions.columns.difference(constant_labels)
|
||||
)
|
||||
logger.warning(
|
||||
f"Removed {len(constant_labels)} features from prediction features, "
|
||||
f"these were considered constant values during most recent training."
|
||||
)
|
||||
|
||||
return df_predictions
|
||||
|
||||
def principal_component_analysis(self) -> None:
|
||||
"""
|
||||
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||
@@ -520,8 +541,7 @@ class FreqaiDataKitchen:
|
||||
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
|
||||
"""
|
||||
Use an existing pca transform to transform data into components
|
||||
:params:
|
||||
filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
:param filtered_dataframe: DataFrame = the cleaned dataframe
|
||||
"""
|
||||
pca_components = self.pca.transform(filtered_dataframe)
|
||||
self.data_dictionary["prediction_features"] = pd.DataFrame(
|
||||
@@ -565,8 +585,7 @@ class FreqaiDataKitchen:
|
||||
"""
|
||||
Build/inference a Support Vector Machine to detect outliers
|
||||
in training data and prediction
|
||||
:params:
|
||||
predict: bool = If true, inference an existing SVM model, else construct one
|
||||
:param predict: bool = If true, inference an existing SVM model, else construct one
|
||||
"""
|
||||
|
||||
if self.keras:
|
||||
@@ -651,11 +670,11 @@ class FreqaiDataKitchen:
|
||||
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
|
||||
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
|
||||
pct of training data that they want to be considered outliers.
|
||||
:params:
|
||||
predict: bool = If False (training), iterate to find the best hyper parameters to match
|
||||
user requested outlier percent target. If True (prediction), use the parameters
|
||||
determined from the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
:param predict: bool = If False (training), iterate to find the best hyper parameters
|
||||
to match user requested outlier percent target.
|
||||
If True (prediction), use the parameters determined from
|
||||
the previous training to estimate if the current prediction point
|
||||
is an outlier.
|
||||
"""
|
||||
|
||||
if predict:
|
||||
@@ -944,6 +963,9 @@ class FreqaiDataKitchen:
|
||||
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
|
||||
append_df[f"{label}_std"] = self.data["labels_std"][label]
|
||||
|
||||
for extra_col in self.data["extra_returns_per_train"]:
|
||||
append_df["{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
|
||||
|
||||
append_df["do_predict"] = do_predict
|
||||
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
|
||||
append_df["DI_values"] = self.DI_values
|
||||
@@ -1122,15 +1144,13 @@ class FreqaiDataKitchen:
|
||||
prediction_dataframe: DataFrame = pd.DataFrame(),
|
||||
) -> DataFrame:
|
||||
"""
|
||||
Use the user defined strategy for populating indicators during
|
||||
retrain
|
||||
:params:
|
||||
strategy: IStrategy = user defined strategy object
|
||||
corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
metadata: dict = strategy furnished pair metadata
|
||||
Use the user defined strategy for populating indicators during retrain
|
||||
:param strategy: IStrategy = user defined strategy object
|
||||
:param corr_dataframes: dict = dict containing the informative pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param base_dataframes: dict = dict containing the current pair dataframes
|
||||
(for user defined timeframes)
|
||||
:param metadata: dict = strategy furnished pair metadata
|
||||
:returns:
|
||||
dataframe: DataFrame = dataframe containing populated indicators
|
||||
"""
|
||||
|
@@ -1,5 +1,4 @@
|
||||
import logging
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
@@ -7,7 +6,7 @@ from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional, Literal, Tuple
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
@@ -22,7 +21,7 @@ from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.utils import plot_feature_importance
|
||||
from freqtrade.freqai.utils import plot_feature_importance, record_params
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
|
||||
@@ -62,6 +61,7 @@ class IFreqaiModel(ABC):
|
||||
"data_split_parameters", {})
|
||||
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
|
||||
"model_training_parameters", {})
|
||||
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
|
||||
self.retrain = False
|
||||
self.first = True
|
||||
self.set_full_path()
|
||||
@@ -70,7 +70,6 @@ class IFreqaiModel(ABC):
|
||||
if self.save_backtest_models:
|
||||
logger.info('Backtesting module configured to save all models.')
|
||||
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
|
||||
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
|
||||
self.scanning = False
|
||||
self.ft_params = self.freqai_info["feature_parameters"]
|
||||
self.keras: bool = self.freqai_info.get("keras", False)
|
||||
@@ -100,12 +99,13 @@ class IFreqaiModel(ABC):
|
||||
self.strategy: Optional[IStrategy] = None
|
||||
self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
|
||||
|
||||
record_params(config, self.full_path)
|
||||
|
||||
def __getstate__(self):
|
||||
"""
|
||||
Return an empty state to be pickled in hyperopt
|
||||
"""
|
||||
return ({})
|
||||
self.strategy: Optional[IStrategy] = None
|
||||
|
||||
def assert_config(self, config: Config) -> None:
|
||||
|
||||
@@ -149,7 +149,7 @@ class IFreqaiModel(ABC):
|
||||
dataframe = dk.remove_features_from_df(dk.return_dataframe)
|
||||
self.clean_up()
|
||||
if self.live:
|
||||
self.inference_timer('stop')
|
||||
self.inference_timer('stop', metadata["pair"])
|
||||
return dataframe
|
||||
|
||||
def clean_up(self):
|
||||
@@ -210,29 +210,31 @@ class IFreqaiModel(ABC):
|
||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
||||
|
||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||
dk.set_paths(pair, trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
try:
|
||||
self.extract_data_and_train_model(
|
||||
new_trained_timerange, pair, strategy, dk, data_load_timerange
|
||||
)
|
||||
except Exception as msg:
|
||||
logger.warning(f'Training {pair} raised exception {msg}, skipping.')
|
||||
logger.warning(f"Training {pair} raised exception {msg.__class__.__name__}. "
|
||||
f"Message: {msg}, skipping.")
|
||||
|
||||
self.train_timer('stop')
|
||||
self.train_timer('stop', pair)
|
||||
|
||||
# only rotate the queue after the first has been trained.
|
||||
self.train_queue.rotate(-1)
|
||||
|
||||
self.dd.save_historic_predictions_to_disk()
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
self.dd.save_metric_tracker_to_disk()
|
||||
|
||||
def start_backtesting(
|
||||
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
|
||||
@@ -281,9 +283,7 @@ class IFreqaiModel(ABC):
|
||||
)
|
||||
|
||||
trained_timestamp_int = int(trained_timestamp.stopts)
|
||||
dk.data_path = Path(
|
||||
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
|
||||
)
|
||||
dk.set_paths(pair, trained_timestamp_int)
|
||||
|
||||
dk.set_new_model_names(pair, trained_timestamp)
|
||||
|
||||
@@ -540,14 +540,13 @@ class IFreqaiModel(ABC):
|
||||
return file_exists
|
||||
|
||||
def set_full_path(self) -> None:
|
||||
"""
|
||||
Creates and sets the full path for the identifier
|
||||
"""
|
||||
self.full_path = Path(
|
||||
self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}"
|
||||
self.config["user_data_dir"] / "models" / f"{self.identifier}"
|
||||
)
|
||||
self.full_path.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy(
|
||||
self.config["config_files"][0],
|
||||
Path(self.full_path, Path(self.config["config_files"][0]).name),
|
||||
)
|
||||
|
||||
def extract_data_and_train_model(
|
||||
self,
|
||||
@@ -616,11 +615,11 @@ class IFreqaiModel(ABC):
|
||||
If the user reuses an identifier on a subsequent instance,
|
||||
this function will not be called. In that case, "real" predictions
|
||||
will be appended to the loaded set of historic predictions.
|
||||
:param: df: DataFrame = the dataframe containing the training feature data
|
||||
:param: model: Any = A model which was `fit` using a common library such as
|
||||
catboost or lightgbm
|
||||
:param: dk: FreqaiDataKitchen = object containing methods for data analysis
|
||||
:param: pair: str = current pair
|
||||
:param df: DataFrame = the dataframe containing the training feature data
|
||||
:param model: Any = A model which was `fit` using a common library such as
|
||||
catboost or lightgbm
|
||||
:param dk: FreqaiDataKitchen = object containing methods for data analysis
|
||||
:param pair: str = current pair
|
||||
"""
|
||||
|
||||
self.dd.historic_predictions[pair] = pred_df
|
||||
@@ -671,7 +670,7 @@ class IFreqaiModel(ABC):
|
||||
|
||||
return
|
||||
|
||||
def inference_timer(self, do='start'):
|
||||
def inference_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
|
||||
"""
|
||||
Timer designed to track the cumulative time spent in FreqAI for one pass through
|
||||
the whitelist. This will check if the time spent is more than 1/4 the time
|
||||
@@ -682,7 +681,10 @@ class IFreqaiModel(ABC):
|
||||
self.begin_time = time.time()
|
||||
elif do == 'stop':
|
||||
end = time.time()
|
||||
self.inference_time += (end - self.begin_time)
|
||||
time_spent = (end - self.begin_time)
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
self.dd.update_metric_tracker('inference_time', time_spent, pair)
|
||||
self.inference_time += time_spent
|
||||
if self.pair_it == self.total_pairs:
|
||||
logger.info(
|
||||
f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
|
||||
@@ -693,7 +695,7 @@ class IFreqaiModel(ABC):
|
||||
self.inference_time = 0
|
||||
return
|
||||
|
||||
def train_timer(self, do='start'):
|
||||
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
|
||||
"""
|
||||
Timer designed to track the cumulative time spent training the full pairlist in
|
||||
FreqAI.
|
||||
@@ -703,7 +705,11 @@ class IFreqaiModel(ABC):
|
||||
self.begin_time_train = time.time()
|
||||
elif do == 'stop':
|
||||
end = time.time()
|
||||
self.train_time += (end - self.begin_time_train)
|
||||
time_spent = (end - self.begin_time_train)
|
||||
if self.freqai_info.get('write_metrics_to_disk', False):
|
||||
self.dd.collect_metrics(time_spent, pair)
|
||||
|
||||
self.train_time += time_spent
|
||||
if self.pair_it_train == self.total_pairs:
|
||||
logger.info(
|
||||
f'Total time spent training pairlist {self.train_time:.2f} seconds')
|
||||
|
@@ -1,4 +1,6 @@
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostClassifier, Pool
|
||||
@@ -20,9 +22,8 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
train_data = Pool(
|
||||
@@ -30,15 +31,25 @@ class CatboostClassifier(BaseClassifierModel):
|
||||
label=data_dictionary["train_labels"],
|
||||
weight=data_dictionary["train_weights"],
|
||||
)
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
test_data = None
|
||||
else:
|
||||
test_data = Pool(
|
||||
data=data_dictionary["test_features"],
|
||||
label=data_dictionary["test_labels"],
|
||||
weight=data_dictionary["test_weights"],
|
||||
)
|
||||
|
||||
cbr = CatBoostClassifier(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
loss_function='MultiClass',
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
cbr.fit(train_data, init_model=init_model)
|
||||
cbr.fit(X=train_data, eval_set=test_data, init_model=init_model,
|
||||
log_cout=sys.stdout, log_cerr=sys.stderr)
|
||||
|
||||
return cbr
|
||||
|
@@ -1,4 +1,6 @@
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
@@ -41,10 +43,12 @@ class CatboostRegressor(BaseRegressionModel):
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
model.fit(X=train_data, eval_set=test_data, init_model=init_model)
|
||||
model.fit(X=train_data, eval_set=test_data, init_model=init_model,
|
||||
log_cout=sys.stdout, log_cerr=sys.stderr)
|
||||
|
||||
return model
|
||||
|
@@ -1,4 +1,6 @@
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
from catboost import CatBoostRegressor, Pool
|
||||
@@ -26,7 +28,8 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
"""
|
||||
|
||||
cbr = CatBoostRegressor(
|
||||
allow_writing_files=False,
|
||||
allow_writing_files=True,
|
||||
train_dir=Path(dk.data_path),
|
||||
**self.model_training_parameters,
|
||||
)
|
||||
|
||||
@@ -56,8 +59,10 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
|
||||
|
||||
fit_params = []
|
||||
for i in range(len(eval_sets)):
|
||||
fit_params.append(
|
||||
{'eval_set': eval_sets[i], 'init_model': init_models[i]})
|
||||
fit_params.append({
|
||||
'eval_set': eval_sets[i], 'init_model': init_models[i],
|
||||
'log_cout': sys.stdout, 'log_cerr': sys.stderr,
|
||||
})
|
||||
|
||||
model = FreqaiMultiOutputRegressor(estimator=cbr)
|
||||
thread_training = self.freqai_info.get('multitarget_parallel_training', False)
|
||||
|
@@ -20,9 +20,8 @@ class LightGBMClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
|
@@ -26,9 +26,8 @@ class XGBoostClassifier(BaseClassifierModel):
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
@@ -65,7 +64,7 @@ class XGBoostClassifier(BaseClassifierModel):
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
|
84
freqtrade/freqai/prediction_models/XGBoostRFClassifier.py
Normal file
84
freqtrade/freqai/prediction_models/XGBoostRFClassifier.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from pandas.api.types import is_integer_dtype
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from xgboost import XGBRFClassifier
|
||||
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRFClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
y = data_dictionary["train_labels"].to_numpy()[:, 0]
|
||||
|
||||
le = LabelEncoder()
|
||||
if not is_integer_dtype(y):
|
||||
y = pd.Series(le.fit_transform(y), dtype="int64")
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
eval_set = None
|
||||
else:
|
||||
test_features = data_dictionary["test_features"].to_numpy()
|
||||
test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
|
||||
|
||||
if not is_integer_dtype(test_labels):
|
||||
test_labels = pd.Series(le.transform(test_labels), dtype="int64")
|
||||
|
||||
eval_set = [(test_features, test_labels)]
|
||||
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBRFClassifier(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
xgb_model=init_model)
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
|
||||
|
||||
le = LabelEncoder()
|
||||
label = dk.label_list[0]
|
||||
labels_before = list(dk.data['labels_std'].keys())
|
||||
labels_after = le.fit_transform(labels_before).tolist()
|
||||
pred_df[label] = le.inverse_transform(pred_df[label])
|
||||
pred_df = pred_df.rename(
|
||||
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
|
||||
|
||||
return (pred_df, dk.do_predict)
|
46
freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
Normal file
46
freqtrade/freqai/prediction_models/XGBoostRFRegressor.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import logging
|
||||
from typing import Any, Dict
|
||||
|
||||
from xgboost import XGBRFRegressor
|
||||
|
||||
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostRFRegressor(BaseRegressionModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:param data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"]
|
||||
y = data_dictionary["train_labels"]
|
||||
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
eval_set = None
|
||||
eval_weights = None
|
||||
else:
|
||||
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
|
||||
eval_weights = [data_dictionary['test_weights']]
|
||||
|
||||
sample_weight = data_dictionary["train_weights"]
|
||||
|
||||
xgb_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBRFRegressor(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
|
||||
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
|
||||
|
||||
return model
|
@@ -29,6 +29,7 @@ class XGBoostRegressor(BaseRegressionModel):
|
||||
|
||||
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
|
||||
eval_set = None
|
||||
eval_weights = None
|
||||
else:
|
||||
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
|
||||
eval_weights = [data_dictionary['test_weights']]
|
||||
|
@@ -1,9 +1,11 @@
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import rapidjson
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
@@ -191,3 +193,28 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
|
||||
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
|
||||
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
|
||||
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
|
||||
|
||||
|
||||
def record_params(config: Dict[str, Any], full_path: Path) -> None:
|
||||
"""
|
||||
Records run params in the full path for reproducibility
|
||||
"""
|
||||
params_record_path = full_path / "run_params.json"
|
||||
|
||||
run_params = {
|
||||
"freqai": config.get('freqai', {}),
|
||||
"timeframe": config.get('timeframe'),
|
||||
"stake_amount": config.get('stake_amount'),
|
||||
"stake_currency": config.get('stake_currency'),
|
||||
"max_open_trades": config.get('max_open_trades'),
|
||||
"pairs": config.get('exchange', {}).get('pair_whitelist')
|
||||
}
|
||||
|
||||
with open(params_record_path, "w") as handle:
|
||||
rapidjson.dump(
|
||||
run_params,
|
||||
handle,
|
||||
indent=4,
|
||||
default=str,
|
||||
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN
|
||||
)
|
||||
|
Reference in New Issue
Block a user