diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json index 12eb30128..9494ba0e1 100644 --- a/config_examples/config_freqai.example.json +++ b/config_examples/config_freqai.example.json @@ -77,7 +77,8 @@ "indicator_periods_candles": [ 10, 20 - ] + ], + "plot_feature_importance": true }, "data_split_parameters": { "test_size": 0.33, diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py index 9a2c64cc3..be1231a53 100644 --- a/freqtrade/freqai/freqai_interface.py +++ b/freqtrade/freqai/freqai_interface.py @@ -20,6 +20,7 @@ from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds from freqtrade.freqai.data_drawer import FreqaiDataDrawer from freqtrade.freqai.data_kitchen import FreqaiDataKitchen +from freqtrade.freqai.utils import plot_feature_importance from freqtrade.strategy.interface import IStrategy @@ -562,6 +563,9 @@ class IFreqaiModel(ABC): self.dd.pair_to_end_of_training_queue(pair) self.dd.save_data(model, pair, dk) + if self.freqai_info["feature_parameters"].get("plot_feature_importance", True): + plot_feature_importance(model, pair, dk) + if self.freqai_info.get("purge_old_models", False): self.dd.purge_old_models() diff --git a/freqtrade/freqai/utils.py b/freqtrade/freqai/utils.py index 063965ded..f6358925c 100644 --- a/freqtrade/freqai/utils.py +++ b/freqtrade/freqai/utils.py @@ -1,5 +1,9 @@ import logging from datetime import datetime, timezone +from typing import Any + +import numpy as np +import pandas as pd from freqtrade.configuration import TimeRange from freqtrade.constants import Config @@ -8,6 +12,7 @@ from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds from freqtrade.exchange.exchange import market_is_active +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen from freqtrade.plugins.pairlist.pairlist_helpers import dynamic_expand_pairlist @@ -131,3 +136,58 @@ def get_required_data_timerange(config: Config) -> TimeRange: # trading_mode=config.get("trading_mode", "spot"), # prepend=config.get("prepend_data", False), # ) + + +def plot_feature_importance(model: Any, pair: str, dk: FreqaiDataKitchen, + count_max: int = 25) -> None: + """ + Plot Best and worst features by importance for a single sub-train. + :param model: Any = A model which was `fit` using a common library + such as catboost or lightgbm + :param pair: str = pair e.g. BTC/USD + :param dk: FreqaiDataKitchen = non-persistent data container for current coin/loop + :param count_max: int = the amount of features to be loaded per column + """ + from freqtrade.plot.plotting import go, make_subplots, store_plot_file + + # Extract feature importance from model + models = {} + if 'FreqaiMultiOutputRegressor' in str(model.__class__): + for estimator, label in zip(model.estimators_, dk.label_list): + models[label] = estimator + else: + models[dk.label_list[0]] = model + + for label in models: + mdl = models[label] + if "catboost.core" in str(mdl.__class__): + feature_importance = mdl.get_feature_importance() + elif "lightgbm.sklearn" or "xgb" in str(mdl.__class__): + feature_importance = mdl.feature_importances_ + else: + logger.info('Model type not support for generating feature importances.') + return + + # Data preparation + fi_df = pd.DataFrame({ + "feature_names": np.array(dk.training_features_list), + "feature_importance": np.array(feature_importance) + }) + fi_df_top = fi_df.nlargest(count_max, "feature_importance")[::-1] + fi_df_worst = fi_df.nsmallest(count_max, "feature_importance")[::-1] + + # Plotting + def add_feature_trace(fig, fi_df, col): + return fig.add_trace( + go.Bar( + x=fi_df["feature_importance"], + y=fi_df["feature_names"], + orientation='h', showlegend=False + ), row=1, col=col + ) + fig = make_subplots(rows=1, cols=2, horizontal_spacing=0.5) + fig = add_feature_trace(fig, fi_df_top, 1) + fig = add_feature_trace(fig, fi_df_worst, 2) + fig.update_layout(title_text=f"Best and worst features by importance {pair}") + label = label.replace('&', '').replace('%', '') # escape two FreqAI specific characters + store_plot_file(fig, f"{dk.model_filename}-{label}.html", dk.data_path) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 998bce903..4512a43f0 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -318,6 +318,41 @@ def test_principal_component_analysis(mocker, freqai_conf): shutil.rmtree(Path(freqai.dk.full_path)) +def test_plot_feature_importance(mocker, freqai_conf): + + from freqtrade.freqai.utils import plot_feature_importance + + freqai_conf.update({"timerange": "20180110-20180130"}) + freqai_conf.get("freqai", {}).get("feature_parameters", {}).update( + {"princpial_component_analysis": "true"}) + + strategy = get_patched_freqai_strategy(mocker, freqai_conf) + exchange = get_patched_exchange(mocker, freqai_conf) + strategy.dp = DataProvider(freqai_conf, exchange) + strategy.freqai_info = freqai_conf.get("freqai", {}) + freqai = strategy.freqai + freqai.live = True + freqai.dk = FreqaiDataKitchen(freqai_conf) + timerange = TimeRange.parse_timerange("20180110-20180130") + freqai.dd.load_all_pair_histories(timerange, freqai.dk) + + freqai.dd.pair_dict = MagicMock() + + data_load_timerange = TimeRange.parse_timerange("20180110-20180130") + new_timerange = TimeRange.parse_timerange("20180120-20180130") + + freqai.extract_data_and_train_model( + new_timerange, "ADA/BTC", strategy, freqai.dk, data_load_timerange) + + model = freqai.dd.load_data("ADA/BTC", freqai.dk) + + plot_feature_importance(model, "ADA/BTC", freqai.dk) + + assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}.html") + + shutil.rmtree(Path(freqai.dk.full_path)) + + @pytest.mark.parametrize('timeframes,corr_pairs', [ (['5m'], ['ADA/BTC', 'DASH/BTC']), (['5m'], ['ADA/BTC', 'DASH/BTC', 'ETH/USDT']),