Merge pull request #7431 from initrv/add-plot-feature-importance

Add plot feature importance
This commit is contained in:
Matthias 2022-09-19 08:41:10 +02:00 committed by GitHub
commit 225f7cd5f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 101 additions and 1 deletions

View File

@ -77,7 +77,8 @@
"indicator_periods_candles": [ "indicator_periods_candles": [
10, 10,
20 20
] ],
"plot_feature_importance": true
}, },
"data_split_parameters": { "data_split_parameters": {
"test_size": 0.33, "test_size": 0.33,

View File

@ -20,6 +20,7 @@ from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds from freqtrade.exchange import timeframe_to_seconds
from freqtrade.freqai.data_drawer import FreqaiDataDrawer from freqtrade.freqai.data_drawer import FreqaiDataDrawer
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.freqai.utils import plot_feature_importance
from freqtrade.strategy.interface import IStrategy from freqtrade.strategy.interface import IStrategy
@ -562,6 +563,9 @@ class IFreqaiModel(ABC):
self.dd.pair_to_end_of_training_queue(pair) self.dd.pair_to_end_of_training_queue(pair)
self.dd.save_data(model, pair, dk) self.dd.save_data(model, pair, dk)
if self.freqai_info["feature_parameters"].get("plot_feature_importance", True):
plot_feature_importance(model, pair, dk)
if self.freqai_info.get("purge_old_models", False): if self.freqai_info.get("purge_old_models", False):
self.dd.purge_old_models() self.dd.purge_old_models()

View File

@ -1,5 +1,9 @@
import logging import logging
from datetime import datetime, timezone from datetime import datetime, timezone
from typing import Any
import numpy as np
import pandas as pd
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
from freqtrade.constants import Config from freqtrade.constants import Config
@ -8,6 +12,7 @@ from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.exceptions import OperationalException from freqtrade.exceptions import OperationalException
from freqtrade.exchange import timeframe_to_seconds from freqtrade.exchange import timeframe_to_seconds
from freqtrade.exchange.exchange import market_is_active from freqtrade.exchange.exchange import market_is_active
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist
@ -131,3 +136,58 @@ def get_required_data_timerange(config: Config) -> TimeRange:
# trading_mode=config.get("trading_mode", "spot"), # trading_mode=config.get("trading_mode", "spot"),
# prepend=config.get("prepend_data", False), # prepend=config.get("prepend_data", False),
# ) # )
def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen,
count_max: int = 25) -> None:
"""
Plot Best and worst features by importance for a single sub-train.
:param model: Any = A model which was `fit` using a common library
such as catboost or lightgbm
:param pair: str = pair e.g. BTC/USD
:param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop
:param count_max: int = the amount of features to be loaded per column
"""
from freqtrade.plot.plotting import go, make_subplots, store_plot_file
# Extract feature importance from model
models = {}
if 'FreqaiMultiOutputRegressor' in str(model.__class__):
for estimator, label in zip(model.estimators_, dk.label_list):
models[label] = estimator
else:
models[dk.label_list[0]] = model
for label in models:
mdl = models[label]
if "catboost.core" in str(mdl.__class__):
feature_importance = mdl.get_feature_importance()
elif "lightgbm.sklearn" or "xgb" in str(mdl.__class__):
feature_importance = mdl.feature_importances_
else:
logger.info('Model type not support for generating feature importances.')
return
# Data preparation
fi_df = pd.DataFrame({
"feature_names": np.array(dk.training_features_list),
"feature_importance": np.array(feature_importance)
})
fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1]
fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1]
# Plotting
def add_feature_trace(fig, fi_df, col):
return fig.add_trace(
go.Bar(
x=fi_df["feature_importance"],
y=fi_df["feature_names"],
orientation='h', showlegend=False
), row=1, col=col
)
fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5)
fig = add_feature_trace(fig, fi_df_top, 1)
fig = add_feature_trace(fig, fi_df_worst, 2)
fig.update_layout(title_text=f"Best and worst features by importance {pair}")
label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters
store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path)

View File

@ -318,6 +318,41 @@ def test_principal_component_analysis(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path)) shutil.rmtree(Path(freqai.dk.full_path))
def test_plot_feature_importance(mocker, freqai_conf):
from freqtrade.freqai.utils import plot_feature_importance
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.get("freqai", {}).get("feature_parameters", {}).update(
{"princpial_component_analysis": "true"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.extract_data_and_train_model(
new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange)
model = freqai.dd.load_data("ADA/BTC", freqai.dk)
plot_feature_importance(model, "ADA/BTC", freqai.dk)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}.html")
shutil.rmtree(Path(freqai.dk.full_path))
@pytest.mark.parametrize('timeframes,corr_pairs', [ @pytest.mark.parametrize('timeframes,corr_pairs', [
(['5m'], ['ADA/BTC', 'DASH/BTC']), (['5m'], ['ADA/BTC', 'DASH/BTC']),
(['5m'], ['ADA/BTC', 'DASH/BTC', 'ETH/USDT']), (['5m'], ['ADA/BTC', 'DASH/BTC', 'ETH/USDT']),