Merge branch 'develop' into dev-merge-rl
This commit is contained in:
@@ -16,6 +16,7 @@ from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.data.history import load_pair_history
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
@@ -27,9 +28,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
class pair_info(TypedDict):
|
||||
model_filename: str
|
||||
first: bool
|
||||
trained_timestamp: int
|
||||
priority: int
|
||||
data_path: str
|
||||
extras: dict
|
||||
|
||||
@@ -58,7 +57,7 @@ class FreqaiDataDrawer:
|
||||
Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
|
||||
"""
|
||||
|
||||
def __init__(self, full_path: Path, config: dict, follow_mode: bool = False):
|
||||
def __init__(self, full_path: Path, config: Config, follow_mode: bool = False):
|
||||
|
||||
self.config = config
|
||||
self.freqai_info = config.get("freqai", {})
|
||||
@@ -91,7 +90,7 @@ class FreqaiDataDrawer:
|
||||
self.old_DBSCAN_eps: Dict[str, float] = {}
|
||||
self.empty_pair_dict: pair_info = {
|
||||
"model_filename": "", "trained_timestamp": 0,
|
||||
"priority": 1, "first": True, "data_path": "", "extras": {}}
|
||||
"data_path": "", "extras": {}}
|
||||
self.limit_ram_use = self.freqai_info.get('limit_ram_usage', False)
|
||||
|
||||
def load_drawer_from_disk(self):
|
||||
@@ -217,7 +216,6 @@ class FreqaiDataDrawer:
|
||||
self.pair_dict[pair] = self.empty_pair_dict.copy()
|
||||
model_filename = ""
|
||||
trained_timestamp = 0
|
||||
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
||||
|
||||
if not data_path_set and self.follow_mode:
|
||||
logger.warning(
|
||||
@@ -237,18 +235,9 @@ class FreqaiDataDrawer:
|
||||
return
|
||||
else:
|
||||
self.pair_dict[metadata["pair"]] = self.empty_pair_dict.copy()
|
||||
self.pair_dict[metadata["pair"]]["priority"] = len(self.pair_dict)
|
||||
|
||||
return
|
||||
|
||||
def pair_to_end_of_training_queue(self, pair: str) -> None:
|
||||
# march all pairs up in the queue
|
||||
with self.pair_dict_lock:
|
||||
for p in self.pair_dict:
|
||||
self.pair_dict[p]["priority"] -= 1
|
||||
# send pair to end of queue
|
||||
self.pair_dict[pair]["priority"] = len(self.pair_dict)
|
||||
|
||||
def set_initial_return_values(self, pair: str, pred_df: DataFrame) -> None:
|
||||
"""
|
||||
Set the initial return values to the historical predictions dataframe. This avoids needing
|
||||
@@ -356,7 +345,7 @@ class FreqaiDataDrawer:
|
||||
for dir in model_folders:
|
||||
result = pattern.match(str(dir.name))
|
||||
if result is None:
|
||||
break
|
||||
continue
|
||||
coin = result.group(1)
|
||||
timestamp = result.group(2)
|
||||
|
||||
|
@@ -18,6 +18,7 @@ from sklearn.model_selection import train_test_split
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
@@ -57,7 +58,7 @@ class FreqaiDataKitchen:
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: Dict[str, Any],
|
||||
config: Config,
|
||||
live: bool = False,
|
||||
pair: str = "",
|
||||
):
|
||||
@@ -774,12 +775,22 @@ class FreqaiDataKitchen:
|
||||
|
||||
def compute_inlier_metric(self, set_='train') -> None:
|
||||
"""
|
||||
|
||||
Compute inlier metric from backwards distance distributions.
|
||||
This metric defines how well features from a timepoint fit
|
||||
into previous timepoints.
|
||||
"""
|
||||
|
||||
def normalise(dataframe: DataFrame, key: str) -> DataFrame:
|
||||
if set_ == 'train':
|
||||
min_value = dataframe.min()
|
||||
max_value = dataframe.max()
|
||||
self.data[f'{key}_min'] = min_value
|
||||
self.data[f'{key}_max'] = max_value
|
||||
else:
|
||||
min_value = self.data[f'{key}_min']
|
||||
max_value = self.data[f'{key}_max']
|
||||
return (dataframe - min_value) / (max_value - min_value)
|
||||
|
||||
no_prev_pts = self.freqai_config["feature_parameters"]["inlier_metric_window"]
|
||||
|
||||
if set_ == 'train':
|
||||
@@ -824,7 +835,12 @@ class FreqaiDataKitchen:
|
||||
inliers = pd.DataFrame(index=distances.index)
|
||||
for key in distances.keys():
|
||||
current_distances = distances[key].dropna()
|
||||
fit_params = stats.weibull_min.fit(current_distances)
|
||||
current_distances = normalise(current_distances, key)
|
||||
if set_ == 'train':
|
||||
fit_params = stats.weibull_min.fit(current_distances)
|
||||
self.data[f'{key}_fit_params'] = fit_params
|
||||
else:
|
||||
fit_params = self.data[f'{key}_fit_params']
|
||||
quantiles = stats.weibull_min.cdf(current_distances, *fit_params)
|
||||
|
||||
df_inlier = pd.DataFrame(
|
||||
|
@@ -3,6 +3,7 @@ import shutil
|
||||
import threading
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import deque
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from threading import Lock
|
||||
@@ -14,12 +15,13 @@ from numpy.typing import NDArray
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DATETIME_PRINT_FORMAT
|
||||
from freqtrade.constants import DATETIME_PRINT_FORMAT, Config
|
||||
from freqtrade.enums import RunMode
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.freqai.data_drawer import FreqaiDataDrawer
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.freqai.utils import plot_feature_importance
|
||||
from freqtrade.strategy.interface import IStrategy
|
||||
|
||||
|
||||
@@ -50,7 +52,7 @@ class IFreqaiModel(ABC):
|
||||
Juha Nykänen @suikula, Wagner Costa @wagnercosta, Johan Vlugt @Jooopieeert
|
||||
"""
|
||||
|
||||
def __init__(self, config: Dict[str, Any]) -> None:
|
||||
def __init__(self, config: Config) -> None:
|
||||
|
||||
self.config = config
|
||||
self.assert_config(self.config)
|
||||
@@ -80,6 +82,7 @@ class IFreqaiModel(ABC):
|
||||
self.pair_it = 0
|
||||
self.pair_it_train = 0
|
||||
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
|
||||
self.train_queue = self._set_train_queue()
|
||||
self.last_trade_database_summary: DataFrame = {}
|
||||
self.current_trade_database_summary: DataFrame = {}
|
||||
self.analysis_lock = Lock()
|
||||
@@ -101,7 +104,7 @@ class IFreqaiModel(ABC):
|
||||
return ({})
|
||||
self.strategy: Optional[IStrategy] = None
|
||||
|
||||
def assert_config(self, config: Dict[str, Any]) -> None:
|
||||
def assert_config(self, config: Config) -> None:
|
||||
|
||||
if not config.get("freqai", {}):
|
||||
raise OperationalException("No freqai parameters found in configuration file.")
|
||||
@@ -184,29 +187,40 @@ class IFreqaiModel(ABC):
|
||||
"""
|
||||
while not self._stop_event.is_set():
|
||||
time.sleep(1)
|
||||
for pair in self.config.get("exchange", {}).get("pair_whitelist"):
|
||||
pair = self.train_queue[0]
|
||||
|
||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
||||
# ensure pair is avaialble in dp
|
||||
if pair not in strategy.dp.current_whitelist():
|
||||
self.train_queue.popleft()
|
||||
logger.warning(f'{pair} not in current whitelist, removing from train queue.')
|
||||
continue
|
||||
|
||||
if self.dd.pair_dict[pair]["priority"] != 1:
|
||||
continue
|
||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||
dk.set_paths(pair, trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
(_, trained_timestamp, _) = self.dd.get_pair_dict_info(pair)
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
dk = FreqaiDataKitchen(self.config, self.live, pair)
|
||||
dk.set_paths(pair, trained_timestamp)
|
||||
(
|
||||
retrain,
|
||||
new_trained_timerange,
|
||||
data_load_timerange,
|
||||
) = dk.check_if_new_training_required(trained_timestamp)
|
||||
dk.set_paths(pair, new_trained_timerange.stopts)
|
||||
|
||||
if retrain:
|
||||
self.train_timer('start')
|
||||
try:
|
||||
self.extract_data_and_train_model(
|
||||
new_trained_timerange, pair, strategy, dk, data_load_timerange
|
||||
)
|
||||
self.train_timer('stop')
|
||||
except Exception as msg:
|
||||
logger.warning(f'Training {pair} raised exception {msg}, skipping.')
|
||||
|
||||
self.dd.save_historic_predictions_to_disk()
|
||||
self.train_timer('stop')
|
||||
|
||||
# only rotate the queue after the first has been trained.
|
||||
self.train_queue.rotate(-1)
|
||||
|
||||
self.dd.save_historic_predictions_to_disk()
|
||||
|
||||
def start_backtesting(
|
||||
self, dataframe: DataFrame, metadata: dict, dk: FreqaiDataKitchen
|
||||
@@ -561,11 +575,11 @@ class IFreqaiModel(ABC):
|
||||
|
||||
self.dd.pair_dict[pair]["trained_timestamp"] = new_trained_timerange.stopts
|
||||
dk.set_new_model_names(pair, new_trained_timerange)
|
||||
self.dd.pair_dict[pair]["first"] = False
|
||||
if self.dd.pair_dict[pair]["priority"] == 1 and self.scanning:
|
||||
self.dd.pair_to_end_of_training_queue(pair)
|
||||
self.dd.save_data(model, pair, dk)
|
||||
|
||||
if self.freqai_info["feature_parameters"].get("plot_feature_importance", False):
|
||||
plot_feature_importance(model, pair, dk)
|
||||
|
||||
if self.freqai_info.get("purge_old_models", False):
|
||||
self.dd.purge_old_models()
|
||||
|
||||
@@ -689,6 +703,32 @@ class IFreqaiModel(ABC):
|
||||
|
||||
return init_model
|
||||
|
||||
def _set_train_queue(self):
|
||||
"""
|
||||
Sets train queue from existing train timestamps if they exist
|
||||
otherwise it sets the train queue based on the provided whitelist.
|
||||
"""
|
||||
current_pairlist = self.config.get("exchange", {}).get("pair_whitelist")
|
||||
if not self.dd.pair_dict:
|
||||
logger.info('Set fresh train queue from whitelist. '
|
||||
f'Queue: {current_pairlist}')
|
||||
return deque(current_pairlist)
|
||||
|
||||
best_queue = deque()
|
||||
|
||||
pair_dict_sorted = sorted(self.dd.pair_dict.items(),
|
||||
key=lambda k: k[1]['trained_timestamp'])
|
||||
for pair in pair_dict_sorted:
|
||||
if pair[0] in current_pairlist:
|
||||
best_queue.append(pair[0])
|
||||
for pair in current_pairlist:
|
||||
if pair not in best_queue:
|
||||
best_queue.appendleft(pair)
|
||||
|
||||
logger.info('Set existing queue from trained timestamps. '
|
||||
f'Best approximation queue: {best_queue}')
|
||||
return best_queue
|
||||
|
||||
# Following methods which are overridden by user made prediction models.
|
||||
# See freqai/prediction_models/CatboostPredictionModel.py for an example.
|
||||
|
||||
|
85
freqtrade/freqai/prediction_models/XGBoostClassifier.py
Normal file
85
freqtrade/freqai/prediction_models/XGBoostClassifier.py
Normal file
@@ -0,0 +1,85 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
import pandas as pd
|
||||
from pandas import DataFrame
|
||||
from pandas.api.types import is_integer_dtype
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from xgboost import XGBClassifier
|
||||
|
||||
from freqtrade.freqai.base_models.BaseClassifierModel import BaseClassifierModel
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class XGBoostClassifier(BaseClassifierModel):
|
||||
"""
|
||||
User created prediction model. The class needs to override three necessary
|
||||
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||
"""
|
||||
|
||||
def fit(self, data_dictionary: Dict, dk: FreqaiDataKitchen, **kwargs) -> Any:
|
||||
"""
|
||||
User sets up the training and test data to fit their desired model here
|
||||
:params:
|
||||
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||
all the training and test data/labels.
|
||||
"""
|
||||
|
||||
X = data_dictionary["train_features"].to_numpy()
|
||||
y = data_dictionary["train_labels"].to_numpy()[:, 0]
|
||||
|
||||
le = LabelEncoder()
|
||||
if not is_integer_dtype(y):
|
||||
y = pd.Series(le.fit_transform(y), dtype="int64")
|
||||
|
||||
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
|
||||
eval_set = None
|
||||
else:
|
||||
test_features = data_dictionary["test_features"].to_numpy()
|
||||
test_labels = data_dictionary["test_labels"].to_numpy()[:, 0]
|
||||
|
||||
if not is_integer_dtype(test_labels):
|
||||
test_labels = pd.Series(le.transform(test_labels), dtype="int64")
|
||||
|
||||
eval_set = [(test_features, test_labels)]
|
||||
|
||||
train_weights = data_dictionary["train_weights"]
|
||||
|
||||
init_model = self.get_init_model(dk.pair)
|
||||
|
||||
model = XGBClassifier(**self.model_training_parameters)
|
||||
|
||||
model.fit(X=X, y=y, eval_set=eval_set, sample_weight=train_weights,
|
||||
xgb_model=init_model)
|
||||
|
||||
return model
|
||||
|
||||
def predict(
|
||||
self, unfiltered_df: DataFrame, dk: FreqaiDataKitchen, **kwargs
|
||||
) -> Tuple[DataFrame, npt.NDArray[np.int_]]:
|
||||
"""
|
||||
Filter the prediction features data and predict with it.
|
||||
:param: unfiltered_df: Full dataframe for the current backtest period.
|
||||
:return:
|
||||
:pred_df: dataframe containing the predictions
|
||||
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||
"""
|
||||
|
||||
(pred_df, dk.do_predict) = super().predict(unfiltered_df, dk, **kwargs)
|
||||
|
||||
le = LabelEncoder()
|
||||
label = dk.label_list[0]
|
||||
labels_before = list(dk.data['labels_std'].keys())
|
||||
labels_after = le.fit_transform(labels_before).tolist()
|
||||
pred_df[label] = le.inverse_transform(pred_df[label])
|
||||
pred_df = pred_df.rename(
|
||||
columns={labels_after[i]: labels_before[i] for i in range(len(labels_before))})
|
||||
|
||||
return (pred_df, dk.do_predict)
|
@@ -1,19 +1,25 @@
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import Config
|
||||
from freqtrade.data.dataprovider import DataProvider
|
||||
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
from freqtrade.exchange.exchange import market_is_active
|
||||
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
|
||||
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
|
||||
def download_all_data_for_training(dp: DataProvider, config: Config) -> None:
|
||||
"""
|
||||
Called only once upon start of bot to download the necessary data for
|
||||
populating indicators and training the model.
|
||||
@@ -47,9 +53,7 @@ def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
|
||||
)
|
||||
|
||||
|
||||
def get_required_data_timerange(
|
||||
config: dict
|
||||
) -> TimeRange:
|
||||
def get_required_data_timerange(config: Config) -> TimeRange:
|
||||
"""
|
||||
Used to compute the required data download time range
|
||||
for auto data-download in FreqAI
|
||||
@@ -86,7 +90,7 @@ def get_required_data_timerange(
|
||||
|
||||
|
||||
# Keep below for when we wish to download heterogeneously lengthed data for FreqAI.
|
||||
# def download_all_data_for_training(dp: DataProvider, config: dict) -> None:
|
||||
# def download_all_data_for_training(dp: DataProvider, config: Config) -> None:
|
||||
# """
|
||||
# Called only once upon start of bot to download the necessary data for
|
||||
# populating indicators and training a FreqAI model.
|
||||
@@ -132,3 +136,58 @@ def get_required_data_timerange(
|
||||
# trading_mode=config.get("trading_mode", "spot"),
|
||||
# prepend=config.get("prepend_data", False),
|
||||
# )
|
||||
|
||||
|
||||
def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
|
||||
count_max: int = 25) -> None:
|
||||
"""
|
||||
Plot Best and worst features by importance for a single sub-train.
|
||||
:param model: Any = A model which was `fit` using a common library
|
||||
such as catboost or lightgbm
|
||||
:param pair: str = pair e.g. BTC/USD
|
||||
:param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
|
||||
:param count_max: int = the amount of features to be loaded per column
|
||||
"""
|
||||
from freqtrade.plot.plotting import go, make_subplots, store_plot_file
|
||||
|
||||
# Extract feature importance from model
|
||||
models = {}
|
||||
if 'FreqaiMultiOutputRegressor' in str(model.__class__):
|
||||
for estimator, label in zip(model.estimators_, dk.label_list):
|
||||
models[label] = estimator
|
||||
else:
|
||||
models[dk.label_list[0]] = model
|
||||
|
||||
for label in models:
|
||||
mdl = models[label]
|
||||
if "catboost.core" in str(mdl.__class__):
|
||||
feature_importance = mdl.get_feature_importance()
|
||||
elif "lightgbm.sklearn" or "xgb" in str(mdl.__class__):
|
||||
feature_importance = mdl.feature_importances_
|
||||
else:
|
||||
logger.info('Model type not support for generating feature importances.')
|
||||
return
|
||||
|
||||
# Data preparation
|
||||
fi_df = pd.DataFrame({
|
||||
"feature_names": np.array(dk.training_features_list),
|
||||
"feature_importance": np.array(feature_importance)
|
||||
})
|
||||
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
|
||||
fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1]
|
||||
|
||||
# Plotting
|
||||
def add_feature_trace(fig, fi_df, col):
|
||||
return fig.add_trace(
|
||||
go.Bar(
|
||||
x=fi_df["feature_importance"],
|
||||
y=fi_df["feature_names"],
|
||||
orientation='h', showlegend=False
|
||||
), row=1, col=col
|
||||
)
|
||||
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5)
|
||||
fig = add_feature_trace(fig, fi_df_top, 1)
|
||||
fig = add_feature_trace(fig, fi_df_worst, 2)
|
||||
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
|
||||
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
|
||||
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)
|
||||
|
Reference in New Issue
Block a user