merge develop into feat/freqai-rl-dev

This commit is contained in:
robcaulk
2022-10-30 10:13:03 +01:00
129 changed files with 2648 additions and 1004 deletions

View File

@@ -51,7 +51,7 @@ class BaseClassifierModel(IFreqaiModel):
f"{end_date} --------------------")
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
@@ -78,7 +78,7 @@ class BaseClassifierModel(IFreqaiModel):
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_df: Full dataframe for the current backtest period.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove

View File

@@ -50,7 +50,7 @@ class BaseRegressionModel(IFreqaiModel):
f"{end_date} --------------------")
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)
@@ -77,7 +77,7 @@ class BaseRegressionModel(IFreqaiModel):
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_df: Full dataframe for the current backtest period.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove

View File

@@ -47,7 +47,7 @@ class BaseTensorFlowModel(IFreqaiModel):
f"{end_date} --------------------")
# split data into train/test data.
data_dictionary = dk.make_train_test_datasets(features_filtered, labels_filtered)
if not self.freqai_info.get("fit_live_predictions", 0) or not self.live:
if not self.freqai_info.get("fit_live_predictions_candles", 0) or not self.live:
dk.fit_labels()
# normalize all data based on train_dataset only
data_dictionary = dk.normalize_data(data_dictionary)

View File

@@ -1,14 +1,15 @@
import collections
import json
import logging
import re
import shutil
import threading
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Tuple, TypedDict
import numpy as np
import pandas as pd
import psutil
import rapidjson
from joblib import dump, load
from joblib.externals import cloudpickle
@@ -65,6 +66,8 @@ class FreqaiDataDrawer:
self.pair_dict: Dict[str, pair_info] = {}
# dictionary holding all actively inferenced models in memory given a model filename
self.model_dictionary: Dict[str, Any] = {}
# all additional metadata that we want to keep in ram
self.meta_data_dictionary: Dict[str, Dict[str, Any]] = {}
self.model_return_values: Dict[str, DataFrame] = {}
self.historic_data: Dict[str, Dict[str, DataFrame]] = {}
self.historic_predictions: Dict[str, DataFrame] = {}
@@ -78,19 +81,49 @@ class FreqaiDataDrawer:
self.historic_predictions_bkp_path = Path(
self.full_path / "historic_predictions.backup.pkl")
self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json")
self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")
self.follow_mode = follow_mode
if follow_mode:
self.create_follower_dict()
self.load_drawer_from_disk()
self.load_historic_predictions_from_disk()
self.load_metric_tracker_from_disk()
self.training_queue: Dict[str, int] = {}
self.history_lock = threading.Lock()
self.save_lock = threading.Lock()
self.pair_dict_lock = threading.Lock()
self.metric_tracker_lock = threading.Lock()
self.old_DBSCAN_eps: Dict[str, float] = {}
self.empty_pair_dict: pair_info = {
"model_filename": "", "trained_timestamp": 0,
"data_path": "", "extras": {}}
self.metric_tracker: Dict[str, Dict[str, Dict[str, list]]] = {}
def update_metric_tracker(self, metric: str, value: float, pair: str) -> None:
"""
General utility for adding and updating custom metrics. Typically used
for adding training performance, train timings, inferenc timings, cpu loads etc.
"""
with self.metric_tracker_lock:
if pair not in self.metric_tracker:
self.metric_tracker[pair] = {}
if metric not in self.metric_tracker[pair]:
self.metric_tracker[pair][metric] = {'timestamp': [], 'value': []}
timestamp = int(datetime.now(timezone.utc).timestamp())
self.metric_tracker[pair][metric]['value'].append(value)
self.metric_tracker[pair][metric]['timestamp'].append(timestamp)
def collect_metrics(self, time_spent: float, pair: str):
"""
Add metrics to the metric tracker dictionary
"""
load1, load5, load15 = psutil.getloadavg()
cpus = psutil.cpu_count()
self.update_metric_tracker('train_time', time_spent, pair)
self.update_metric_tracker('cpu_load1min', load1 / cpus, pair)
self.update_metric_tracker('cpu_load5min', load5 / cpus, pair)
self.update_metric_tracker('cpu_load15min', load15 / cpus, pair)
self.limit_ram_use = self.freqai_info.get('limit_ram_usage', False)
if 'rl_config' in self.freqai_info:
self.model_type = 'stable_baselines'
@@ -103,12 +136,12 @@ class FreqaiDataDrawer:
"""
Locate and load a previously saved data drawer full of all pair model metadata in
present model folder.
:return: bool - whether or not the drawer was located
Load any existing metric tracker that may be present.
"""
exists = self.pair_dictionary_path.is_file()
if exists:
with open(self.pair_dictionary_path, "r") as fp:
self.pair_dict = json.load(fp)
self.pair_dict = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch")
else:
@@ -117,7 +150,18 @@ class FreqaiDataDrawer:
"sending null values back to strategy"
)
return exists
def load_metric_tracker_from_disk(self):
"""
Tries to load an existing metrics dictionary if the user
wants to collect metrics.
"""
if self.freqai_info.get('write_metrics_to_disk', False):
exists = self.metric_tracker_path.is_file()
if exists:
with open(self.metric_tracker_path, "r") as fp:
self.metric_tracker = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
else:
logger.info("Could not find existing metric tracker, starting from scratch")
def load_historic_predictions_from_disk(self):
"""
@@ -153,7 +197,7 @@ class FreqaiDataDrawer:
def save_historic_predictions_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
Save historic predictions pickle to disk
"""
with open(self.historic_predictions_path, "wb") as fp:
cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL)
@@ -161,6 +205,15 @@ class FreqaiDataDrawer:
# create a backup
shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path)
def save_metric_tracker_to_disk(self):
"""
Save metric tracker of all pair metrics collected.
"""
with self.save_lock:
with open(self.metric_tracker_path, 'w') as fp:
rapidjson.dump(self.metric_tracker, fp, default=self.np_encoder,
number_mode=rapidjson.NM_NATIVE)
def save_drawer_to_disk(self):
"""
Save data drawer full of all pair model metadata in present model folder.
@@ -419,9 +472,8 @@ class FreqaiDataDrawer:
def save_data(self, model: Any, coin: str, dk: FreqaiDataKitchen) -> None:
"""
Saves all data associated with a model for a single sub-train time range
:params:
:model: User trained model which can be reused for inferencing to generate
predictions
:param model: User trained model which can be reused for inferencing to generate
predictions
"""
if not dk.data_path.is_dir():
@@ -466,6 +518,10 @@ class FreqaiDataDrawer:
self.model_dictionary[coin] = model
self.pair_dict[coin]["model_filename"] = dk.model_filename
self.pair_dict[coin]["data_path"] = str(dk.data_path)
if coin not in self.meta_data_dictionary:
self.meta_data_dictionary[coin] = {}
self.meta_data_dictionary[coin]["train_df"] = dk.data_dictionary["train_features"]
self.meta_data_dictionary[coin]["meta_data"] = dk.data
self.save_drawer_to_disk()
return
@@ -476,7 +532,7 @@ class FreqaiDataDrawer:
presaved backtesting (prediction file loading).
"""
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
dk.data = json.load(fp)
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"]
@@ -502,14 +558,19 @@ class FreqaiDataDrawer:
/ dk.data_path.parts[-1]
)
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
dk.data = json.load(fp)
dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"]
if coin in self.meta_data_dictionary:
dk.data = self.meta_data_dictionary[coin]["meta_data"]
dk.data_dictionary["train_features"] = self.meta_data_dictionary[coin]["train_df"]
else:
with open(dk.data_path / f"{dk.model_filename}_metadata.json", "r") as fp:
dk.data = rapidjson.load(fp, number_mode=rapidjson.NM_NATIVE)
dk.data_dictionary["train_features"] = pd.read_pickle(
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
)
dk.data_dictionary["train_features"] = pd.read_pickle(
dk.data_path / f"{dk.model_filename}_trained_df.pkl"
)
dk.training_features_list = dk.data["training_features_list"]
dk.label_list = dk.data["label_list"]
# try to access model in memory instead of loading object from disk to save time
if dk.live and coin in self.model_dictionary and not self.limit_ram_use:
@@ -549,8 +610,7 @@ class FreqaiDataDrawer:
Append new candles to our stores historic data (in memory) so that
we do not need to load candle history from disk and we dont need to
pinging exchange multiple times for the same candle.
:params:
dataframe: DataFrame = strategy provided dataframe
:param dataframe: DataFrame = strategy provided dataframe
"""
feat_params = self.freqai_info["feature_parameters"]
with self.history_lock:
@@ -596,9 +656,8 @@ class FreqaiDataDrawer:
"""
Load pair histories for all whitelist and corr_pairlist pairs.
Only called once upon startup of bot.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
:param timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
"""
history_data = self.historic_data
@@ -621,10 +680,9 @@ class FreqaiDataDrawer:
"""
Searches through our historic_data in memory and returns the dataframes relevant
to the present pair.
:params:
timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
metadata: dict = strategy furnished pair metadata
:param timerange: TimeRange = full timerange required to populate all indicators
for training according to user defined train_period_days
:param metadata: dict = strategy furnished pair metadata
"""
with self.history_lock:
corr_dataframes: Dict[Any, Any] = {}
@@ -635,7 +693,8 @@ class FreqaiDataDrawer:
)
for tf in self.freqai_info["feature_parameters"].get("include_timeframes"):
base_dataframes[tf] = dk.slice_dataframe(timerange, historic_data[pair][tf])
base_dataframes[tf] = dk.slice_dataframe(
timerange, historic_data[pair][tf]).reset_index(drop=True)
if pairs:
for p in pairs:
if pair in p:
@@ -644,6 +703,6 @@ class FreqaiDataDrawer:
corr_dataframes[p] = {}
corr_dataframes[p][tf] = dk.slice_dataframe(
timerange, historic_data[p][tf]
)
).reset_index(drop=True)
return corr_dataframes, base_dataframes

View File

@@ -111,9 +111,8 @@ class FreqaiDataKitchen:
) -> None:
"""
Set the paths to the data for the present coin/botloop
:params:
metadata: dict = strategy furnished pair metadata
trained_timestamp: int = timestamp of most recent training
:param metadata: dict = strategy furnished pair metadata
:param trained_timestamp: int = timestamp of most recent training
"""
self.full_path = Path(
self.config["user_data_dir"] / "models" / str(self.freqai_config.get("identifier"))
@@ -133,8 +132,8 @@ class FreqaiDataKitchen:
Given the dataframe for the full history for training, split the data into
training and test data according to user specified parameters in configuration
file.
:filtered_dataframe: cleaned dataframe ready to be split.
:labels: cleaned labels ready to be split.
:param filtered_dataframe: cleaned dataframe ready to be split.
:param labels: cleaned labels ready to be split.
"""
feat_dict = self.freqai_config["feature_parameters"]
@@ -193,13 +192,14 @@ class FreqaiDataKitchen:
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
row that had a NaN and will shield user from that prediction.
:params:
:unfiltered_df: the full dataframe for the present training period
:training_feature_list: list, the training feature list constructed by
self.build_feature_list() according to user specified parameters in the configuration file.
:labels: the labels for the dataset
:training_filter: boolean which lets the function know if it is training data or
prediction data to be filtered.
:param unfiltered_df: the full dataframe for the present training period
:param training_feature_list: list, the training feature list constructed by
self.build_feature_list() according to user specified
parameters in the configuration file.
:param labels: the labels for the dataset
:param training_filter: boolean which lets the function know if it is training data or
prediction data to be filtered.
:returns:
:filtered_df: dataframe cleaned of NaNs and only containing the user
requested feature set.
@@ -214,7 +214,10 @@ class FreqaiDataKitchen:
const_cols = list((filtered_df.nunique() == 1).loc[lambda x: x].index)
if const_cols:
filtered_df = filtered_df.filter(filtered_df.columns.difference(const_cols))
self.data['constant_features_list'] = const_cols
logger.warning(f"Removed features {const_cols} with constant values.")
else:
self.data['constant_features_list'] = []
# we don't care about total row number (total no. datapoints) in training, we only care
# about removing any row with NaNs
# if labels has multiple columns (user wants to train multiple modelEs), we detect here
@@ -245,6 +248,8 @@ class FreqaiDataKitchen:
self.data["filter_drop_index_training"] = drop_index
else:
if len(self.data['constant_features_list']):
filtered_df = self.check_pred_labels(filtered_df)
# we are backtesting so we need to preserve row number to send back to strategy,
# so now we use do_predict to avoid any prediction based on a NaN
drop_index = pd.isnull(filtered_df).any(axis=1)
@@ -289,8 +294,8 @@ class FreqaiDataKitchen:
def normalize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
"""
Normalize all data in the data_dictionary according to the training dataset
:params:
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
:param data_dictionary: dictionary containing the cleaned and
split training/test data/labels
:returns:
:data_dictionary: updated dictionary with standardized values.
"""
@@ -464,6 +469,22 @@ class FreqaiDataKitchen:
return df
def check_pred_labels(self, df_predictions: DataFrame) -> DataFrame:
"""
Check that prediction feature labels match training feature labels.
:param df_predictions: incoming predictions
"""
constant_labels = self.data['constant_features_list']
df_predictions = df_predictions.filter(
df_predictions.columns.difference(constant_labels)
)
logger.warning(
f"Removed {len(constant_labels)} features from prediction features, "
f"these were considered constant values during most recent training."
)
return df_predictions
def principal_component_analysis(self) -> None:
"""
Performs Principal Component Analysis on the data for dimensionality reduction
@@ -520,8 +541,7 @@ class FreqaiDataKitchen:
def pca_transform(self, filtered_dataframe: DataFrame) -> None:
"""
Use an existing pca transform to transform data into components
:params:
filtered_dataframe: DataFrame = the cleaned dataframe
:param filtered_dataframe: DataFrame = the cleaned dataframe
"""
pca_components = self.pca.transform(filtered_dataframe)
self.data_dictionary["prediction_features"] = pd.DataFrame(
@@ -565,8 +585,7 @@ class FreqaiDataKitchen:
"""
Build/inference a Support Vector Machine to detect outliers
in training data and prediction
:params:
predict: bool = If true, inference an existing SVM model, else construct one
:param predict: bool = If true, inference an existing SVM model, else construct one
"""
if self.keras:
@@ -651,11 +670,11 @@ class FreqaiDataKitchen:
Use DBSCAN to cluster training data and remove "noisy" data (read outliers).
User controls this via the config param `DBSCAN_outlier_pct` which indicates the
pct of training data that they want to be considered outliers.
:params:
predict: bool = If False (training), iterate to find the best hyper parameters to match
user requested outlier percent target. If True (prediction), use the parameters
determined from the previous training to estimate if the current prediction point
is an outlier.
:param predict: bool = If False (training), iterate to find the best hyper parameters
to match user requested outlier percent target.
If True (prediction), use the parameters determined from
the previous training to estimate if the current prediction point
is an outlier.
"""
if predict:
@@ -944,6 +963,9 @@ class FreqaiDataKitchen:
append_df[f"{label}_mean"] = self.data["labels_mean"][label]
append_df[f"{label}_std"] = self.data["labels_std"][label]
for extra_col in self.data["extra_returns_per_train"]:
append_df["{extra_col}"] = self.data["extra_returns_per_train"][extra_col]
append_df["do_predict"] = do_predict
if self.freqai_config["feature_parameters"].get("DI_threshold", 0) > 0:
append_df["DI_values"] = self.DI_values
@@ -1122,15 +1144,13 @@ class FreqaiDataKitchen:
prediction_dataframe: DataFrame = pd.DataFrame(),
) -> DataFrame:
"""
Use the user defined strategy for populating indicators during
retrain
:params:
strategy: IStrategy = user defined strategy object
corr_dataframes: dict = dict containing the informative pair dataframes
(for user defined timeframes)
base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
metadata: dict = strategy furnished pair metadata
Use the user defined strategy for populating indicators during retrain
:param strategy: IStrategy = user defined strategy object
:param corr_dataframes: dict = dict containing the informative pair dataframes
(for user defined timeframes)
:param base_dataframes: dict = dict containing the current pair dataframes
(for user defined timeframes)
:param metadata: dict = strategy furnished pair metadata
:returns:
dataframe: DataFrame = dataframe containing populated indicators
"""

View File

@@ -1,5 +1,4 @@
import logging
import shutil
import threading
import time
from abc import ABC, abstractmethod
@@ -7,7 +6,7 @@ from collections import deque
from datetime import datetime, timezone
from pathlib import Path
from threading import Lock
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Literal, Tuple
import numpy as np
import pandas as pd
@@ -22,7 +21,7 @@ from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.utils import plot_feature_importance
from freqtrade.freqai.utils import plot_feature_importance, record_params
from freqtrade.strategy.interface import IStrategy
@@ -62,6 +61,7 @@ class IFreqaiModel(ABC):
"data_split_parameters", {})
self.model_training_parameters: Dict[str, Any] = config.get("freqai", {}).get(
"model_training_parameters", {})
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.retrain = False
self.first = True
self.set_full_path()
@@ -70,7 +70,6 @@ class IFreqaiModel(ABC):
if self.save_backtest_models:
logger.info('Backtesting module configured to save all models.')
self.dd = FreqaiDataDrawer(Path(self.full_path), self.config, self.follow_mode)
self.identifier: str = self.freqai_info.get("identifier", "no_id_provided")
self.scanning = False
self.ft_params = self.freqai_info["feature_parameters"]
self.keras: bool = self.freqai_info.get("keras", False)
@@ -100,12 +99,13 @@ class IFreqaiModel(ABC):
self.strategy: Optional[IStrategy] = None
self.max_system_threads = max(int(psutil.cpu_count() * 2 - 2), 1)
record_params(config, self.full_path)
def __getstate__(self):
"""
Return an empty state to be pickled in hyperopt
"""
return ({})
self.strategy: Optional[IStrategy] = None
def assert_config(self, config: Config) -> None:
@@ -149,7 +149,7 @@ class IFreqaiModel(ABC):
dataframe = dk.remove_features_from_df(dk.return_dataframe)
self.clean_up()
if self.live:
self.inference_timer('stop')
self.inference_timer('stop', metadata["pair"])
return dataframe
def clean_up(self):
@@ -210,29 +210,31 @@ class IFreqaiModel(ABC):
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
dk = FreqaiDataKitchen(self.config, self.live, pair)
dk.set_paths(pair, trained_timestamp)
(
retrain,
new_trained_timerange,
data_load_timerange,
) = dk.check_if_new_training_required(trained_timestamp)
dk.set_paths(pair, new_trained_timerange.stopts)
if retrain:
self.train_timer('start')
dk.set_paths(pair, new_trained_timerange.stopts)
try:
self.extract_data_and_train_model(
new_trained_timerange, pair, strategy, dk, data_load_timerange
)
except Exception as msg:
logger.warning(f'Training {pair} raised exception {msg}, skipping.')
logger.warning(f"Training {pair} raised exception {msg.__class__.__name__}. "
f"Message: {msg}, skipping.")
self.train_timer('stop')
self.train_timer('stop', pair)
# only rotate the queue after the first has been trained.
self.train_queue.rotate(-1)
self.dd.save_historic_predictions_to_disk()
if self.freqai_info.get('write_metrics_to_disk', False):
self.dd.save_metric_tracker_to_disk()
def start_backtesting(
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
@@ -281,9 +283,7 @@ class IFreqaiModel(ABC):
)
trained_timestamp_int = int(trained_timestamp.stopts)
dk.data_path = Path(
dk.full_path / f"sub-train-{pair.split('/')[0]}_{trained_timestamp_int}"
)
dk.set_paths(pair, trained_timestamp_int)
dk.set_new_model_names(pair, trained_timestamp)
@@ -540,14 +540,13 @@ class IFreqaiModel(ABC):
return file_exists
def set_full_path(self) -> None:
"""
Creates and sets the full path for the identifier
"""
self.full_path = Path(
self.config["user_data_dir"] / "models" / f"{self.freqai_info['identifier']}"
self.config["user_data_dir"] / "models" / f"{self.identifier}"
)
self.full_path.mkdir(parents=True, exist_ok=True)
shutil.copy(
self.config["config_files"][0],
Path(self.full_path, Path(self.config["config_files"][0]).name),
)
def extract_data_and_train_model(
self,
@@ -616,11 +615,11 @@ class IFreqaiModel(ABC):
If the user reuses an identifier on a subsequent instance,
this function will not be called. In that case, "real" predictions
will be appended to the loaded set of historic predictions.
:param: df: DataFrame = the dataframe containing the training feature data
:param: model: Any = A model which was `fit` using a common library such as
catboost or lightgbm
:param: dk: FreqaiDataKitchen = object containing methods for data analysis
:param: pair: str = current pair
:param df: DataFrame = the dataframe containing the training feature data
:param model: Any = A model which was `fit` using a common library such as
catboost or lightgbm
:param dk: FreqaiDataKitchen = object containing methods for data analysis
:param pair: str = current pair
"""
self.dd.historic_predictions[pair] = pred_df
@@ -671,7 +670,7 @@ class IFreqaiModel(ABC):
return
def inference_timer(self, do='start'):
def inference_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
"""
Timer designed to track the cumulative time spent in FreqAI for one pass through
the whitelist. This will check if the time spent is more than 1/4 the time
@@ -682,7 +681,10 @@ class IFreqaiModel(ABC):
self.begin_time = time.time()
elif do == 'stop':
end = time.time()
self.inference_time += (end - self.begin_time)
time_spent = (end - self.begin_time)
if self.freqai_info.get('write_metrics_to_disk', False):
self.dd.update_metric_tracker('inference_time', time_spent, pair)
self.inference_time += time_spent
if self.pair_it == self.total_pairs:
logger.info(
f'Total time spent inferencing pairlist {self.inference_time:.2f} seconds')
@@ -693,7 +695,7 @@ class IFreqaiModel(ABC):
self.inference_time = 0
return
def train_timer(self, do='start'):
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):
"""
Timer designed to track the cumulative time spent training the full pairlist in
FreqAI.
@@ -703,7 +705,11 @@ class IFreqaiModel(ABC):
self.begin_time_train = time.time()
elif do == 'stop':
end = time.time()
self.train_time += (end - self.begin_time_train)
time_spent = (end - self.begin_time_train)
if self.freqai_info.get('write_metrics_to_disk', False):
self.dd.collect_metrics(time_spent, pair)
self.train_time += time_spent
if self.pair_it_train == self.total_pairs:
logger.info(
f'Total time spent training pairlist {self.train_time:.2f} seconds')

View File

@@ -1,4 +1,6 @@
import logging
import sys
from pathlib import Path
from typing import Any, Dict
from catboost import CatBoostClassifier, Pool
@@ -20,9 +22,8 @@ class CatboostClassifier(BaseClassifierModel):
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
:data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
train_data = Pool(
@@ -30,15 +31,25 @@ class CatboostClassifier(BaseClassifierModel):
label=data_dictionary["train_labels"],
weight=data_dictionary["train_weights"],
)
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
test_data = None
else:
test_data = Pool(
data=data_dictionary["test_features"],
label=data_dictionary["test_labels"],
weight=data_dictionary["test_weights"],
)
cbr = CatBoostClassifier(
allow_writing_files=False,
allow_writing_files=True,
loss_function='MultiClass',
train_dir=Path(dk.data_path),
**self.model_training_parameters,
)
init_model = self.get_init_model(dk.pair)
cbr.fit(train_data, init_model=init_model)
cbr.fit(X=train_data, eval_set=test_data, init_model=init_model,
log_cout=sys.stdout, log_cerr=sys.stderr)
return cbr

View File

@@ -1,4 +1,6 @@
import logging
import sys
from pathlib import Path
from typing import Any, Dict
from catboost import CatBoostRegressor, Pool
@@ -41,10 +43,12 @@ class CatboostRegressor(BaseRegressionModel):
init_model = self.get_init_model(dk.pair)
model = CatBoostRegressor(
allow_writing_files=False,
allow_writing_files=True,
train_dir=Path(dk.data_path),
**self.model_training_parameters,
)
model.fit(X=train_data, eval_set=test_data, init_model=init_model)
model.fit(X=train_data, eval_set=test_data, init_model=init_model,
log_cout=sys.stdout, log_cerr=sys.stderr)
return model

View File

@@ -1,4 +1,6 @@
import logging
import sys
from pathlib import Path
from typing import Any, Dict
from catboost import CatBoostRegressor, Pool
@@ -26,7 +28,8 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
"""
cbr = CatBoostRegressor(
allow_writing_files=False,
allow_writing_files=True,
train_dir=Path(dk.data_path),
**self.model_training_parameters,
)
@@ -56,8 +59,10 @@ class CatboostRegressorMultiTarget(BaseRegressionModel):
fit_params = []
for i in range(len(eval_sets)):
fit_params.append(
{'eval_set': eval_sets[i], 'init_model': init_models[i]})
fit_params.append({
'eval_set': eval_sets[i], 'init_model': init_models[i],
'log_cout': sys.stdout, 'log_cerr': sys.stderr,
})
model = FreqaiMultiOutputRegressor(estimator=cbr)
thread_training = self.freqai_info.get('multitarget_parallel_training', False)

View File

@@ -20,9 +20,8 @@ class LightGBMClassifier(BaseClassifierModel):
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
:data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:

View File

@@ -26,9 +26,8 @@ class XGBoostClassifier(BaseClassifierModel):
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
:data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
X = data_dictionary["train_features"].to_numpy()
@@ -65,7 +64,7 @@ class XGBoostClassifier(BaseClassifierModel):
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_df: Full dataframe for the current backtest period.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove

View File

@@ -0,0 +1,84 @@
import logging
from typing import Any, Dict, Tuple
import numpy as np
import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from pandas.api.types import is_integer_dtype
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRFClassifier
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostRFClassifier(BaseClassifierModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
X = data_dictionary["train_features"].to_numpy()
y = data_dictionary["train_labels"].to_numpy()[:, 0]
le = LabelEncoder()
if not is_integer_dtype(y):
y = pd.Series(le.fit_transform(y), dtype="int64")
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
eval_set = None
else:
test_features = data_dictionary["test_features"].to_numpy()
test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
if not is_integer_dtype(test_labels):
test_labels = pd.Series(le.transform(test_labels), dtype="int64")
eval_set = [(test_features, test_labels)]
train_weights = data_dictionary["train_weights"]
init_model = self.get_init_model(dk.pair)
model = XGBRFClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
xgb_model=init_model)
return model
def predict(
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
"""
Filter the prediction features data and predict with it.
:param unfiltered_df: Full dataframe for the current backtest period.
:return:
:pred_df: dataframe containing the predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
le = LabelEncoder()
label = dk.label_list[0]
labels_before = list(dk.data['labels_std'].keys())
labels_after = le.fit_transform(labels_before).tolist()
pred_df[label] = le.inverse_transform(pred_df[label])
pred_df = pred_df.rename(
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
return (pred_df, dk.do_predict)

View File

@@ -0,0 +1,46 @@
import logging
from typing import Any, Dict
from xgboost import XGBRFRegressor
from freqtrade.freqai.base_models.BaseRegressionModel import BaseRegressionModel
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
class XGBoostRFRegressor(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
"""
User sets up the training and test data to fit their desired model here
:param data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
eval_weights = None
else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']]
sample_weight = data_dictionary["train_weights"]
xgb_model = self.get_init_model(dk.pair)
model = XGBRFRegressor(**self.model_training_parameters)
model.fit(X=X, y=y, sample_weight=sample_weight, eval_set=eval_set,
sample_weight_eval_set=eval_weights, xgb_model=xgb_model)
return model

View File

@@ -29,6 +29,7 @@ class XGBoostRegressor(BaseRegressionModel):
if self.freqai_info.get("data_split_parameters", {}).get("test_size", 0.1) == 0:
eval_set = None
eval_weights = None
else:
eval_set = [(data_dictionary["test_features"], data_dictionary["test_labels"])]
eval_weights = [data_dictionary['test_weights']]

View File

@@ -1,9 +1,11 @@
import logging
from datetime import datetime, timezone
from typing import Any
from pathlib import Path
from typing import Any, Dict
import numpy as np
import pandas as pd
import rapidjson
from freqtrade.configuration import TimeRange
from freqtrade.constants import Config
@@ -191,3 +193,28 @@ def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
def record_params(config: Dict[str, Any], full_path: Path) -> None:
"""
Records run params in the full path for reproducibility
"""
params_record_path = full_path / "run_params.json"
run_params = {
"freqai": config.get('freqai', {}),
"timeframe": config.get('timeframe'),
"stake_amount": config.get('stake_amount'),
"stake_currency": config.get('stake_currency'),
"max_open_trades": config.get('max_open_trades'),
"pairs": config.get('exchange', {}).get('pair_whitelist')
}
with open(params_record_path, "w") as handle:
rapidjson.dump(
run_params,
handle,
indent=4,
default=str,
number_mode=rapidjson.NM_NATIVE | rapidjson.NM_NAN
)