From eb8bde37c16d201cae16fa9f13883924cd8e697a Mon Sep 17 00:00:00 2001
From: robcaulk <rob.caulk@gmail.com>
Date: Sat, 6 Aug 2022 17:51:21 +0200
Subject: [PATCH] Add lightgbm classifier, add classifier check test, fix
 classifier bug.

---
 freqtrade/freqai/freqai_interface.py          |   2 +
 .../prediction_models/CatboostClassifier.py   |   3 -
 .../prediction_models/LightGBMClassifier.py   |  38 +++++
 freqtrade/templates/FreqaiExampleStrategy.py  |   4 +
 tests/freqai/test_freqai_interface.py         |  63 ++++++++
 tests/rpc/test_rpc_apiserver.py               |   1 +
 .../strategy/strats/freqai_test_classifier.py | 138 ++++++++++++++++++
 tests/strategy/test_strategy_loading.py       |   6 +-
 8 files changed, 249 insertions(+), 6 deletions(-)
 create mode 100644 freqtrade/freqai/prediction_models/LightGBMClassifier.py
 create mode 100644 tests/strategy/strats/freqai_test_classifier.py

diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py
index 248484b05..097ecf553 100644
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@@ -566,6 +566,8 @@ class IFreqaiModel(ABC):
         num_candles = self.freqai_info.get("fit_live_predictions_candles", 100)
         dk.data["labels_mean"], dk.data["labels_std"] = {}, {}
         for label in dk.label_list:
+            if self.dd.historic_predictions[dk.pair][label].dtype == object:
+                continue
             f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles))
             dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]
 
diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py
index d003744fb..ac1386eee 100644
--- a/freqtrade/freqai/prediction_models/CatboostClassifier.py
+++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py
@@ -32,9 +32,6 @@ class CatboostClassifier(BaseRegressionModel):
 
         cbr = CatBoostClassifier(
             allow_writing_files=False,
-            gpu_ram_part=0.5,
-            verbose=100,
-            early_stopping_rounds=400,
             loss_function='MultiClass',
             **self.model_training_parameters,
         )
diff --git a/freqtrade/freqai/prediction_models/LightGBMClassifier.py b/freqtrade/freqai/prediction_models/LightGBMClassifier.py
new file mode 100644
index 000000000..782dbce35
--- /dev/null
+++ b/freqtrade/freqai/prediction_models/LightGBMClassifier.py
@@ -0,0 +1,38 @@
+import logging
+from typing import Any, Dict
+
+from lightgbm import LGBMClassifier
+
+from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
+
+
+logger = logging.getLogger(__name__)
+
+
+class LightGBMClassifier(BaseRegressionModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed.
+    """
+
+    def fit(self, data_dictionary: Dict) -> Any:
+        """
+        User sets up the training and test data to fit their desired model here
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold
+        all the training and test data/labels.
+        """
+
+        if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
+            eval_set = None
+        else:
+            eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
+        X = data_dictionary["train_features"]
+        y = data_dictionary["train_labels"]
+
+        model = LGBMClassifier(**self.model_training_parameters)
+
+        model.fit(X=X, y=y, eval_set=eval_set)
+
+        return model
diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py
index 4f632f907..ee1602725 100644
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -155,6 +155,10 @@ class FreqaiExampleStrategy(IStrategy):
                     - 1
                 )
 
+                # Classifiers are typically set up with strings as targets:
+                # df['&s-up_or_down'] = np.where( df["close"].shift(-100) >
+                #                                 df["close"], 'up', 'down')
+
                 # If user wishes to use multiple targets, they can add more by
                 # appending more columns with '&'. User should keep in mind that multi targets
                 # requires a multioutput prediction model such as
diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py
index 1f96cf6df..676f451ab 100644
--- a/tests/freqai/test_freqai_interface.py
+++ b/tests/freqai/test_freqai_interface.py
@@ -103,6 +103,69 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
     shutil.rmtree(Path(freqai.dk.full_path))
 
 
+@pytest.mark.skipif("arm" in platform.uname()[-1], reason="no ARM for Catboost ...")
+def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
+    freqai_conf.update({"timerange": "20180110-20180130"})
+    freqai_conf.update({"freqaimodel": "CatboostClassifier"})
+    freqai_conf.update({"strategy": "freqai_test_classifier"})
+    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
+    exchange = get_patched_exchange(mocker, freqai_conf)
+    strategy.dp = DataProvider(freqai_conf, exchange)
+
+    strategy.freqai_info = freqai_conf.get("freqai", {})
+    freqai = strategy.freqai
+    freqai.live = True
+    freqai.dk = FreqaiDataKitchen(freqai_conf)
+    timerange = TimeRange.parse_timerange("20180110-20180130")
+    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
+
+    freqai.dd.pair_dict = MagicMock()
+
+    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
+    new_timerange = TimeRange.parse_timerange("20180120-20180130")
+
+    freqai.train_model_in_series(new_timerange, "ADA/BTC",
+                                 strategy, freqai.dk, data_load_timerange)
+
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
+
+    shutil.rmtree(Path(freqai.dk.full_path))
+
+
+def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
+    freqai_conf.update({"timerange": "20180110-20180130"})
+    freqai_conf.update({"freqaimodel": "LightGBMClassifier"})
+    freqai_conf.update({"strategy": "freqai_test_classifier"})
+    strategy = get_patched_freqai_strategy(mocker, freqai_conf)
+    exchange = get_patched_exchange(mocker, freqai_conf)
+    strategy.dp = DataProvider(freqai_conf, exchange)
+
+    strategy.freqai_info = freqai_conf.get("freqai", {})
+    freqai = strategy.freqai
+    freqai.live = True
+    freqai.dk = FreqaiDataKitchen(freqai_conf)
+    timerange = TimeRange.parse_timerange("20180110-20180130")
+    freqai.dd.load_all_pair_histories(timerange, freqai.dk)
+
+    freqai.dd.pair_dict = MagicMock()
+
+    data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
+    new_timerange = TimeRange.parse_timerange("20180120-20180130")
+
+    freqai.train_model_in_series(new_timerange, "ADA/BTC",
+                                 strategy, freqai.dk, data_load_timerange)
+
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
+    assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
+
+    shutil.rmtree(Path(freqai.dk.full_path))
+
+
 def test_start_backtesting(mocker, freqai_conf):
     freqai_conf.update({"timerange": "20180120-20180130"})
     strategy = get_patched_freqai_strategy(mocker, freqai_conf)
diff --git a/tests/rpc/test_rpc_apiserver.py b/tests/rpc/test_rpc_apiserver.py
index e6b7c4dd2..2b66ed332 100644
--- a/tests/rpc/test_rpc_apiserver.py
+++ b/tests/rpc/test_rpc_apiserver.py
@@ -1403,6 +1403,7 @@ def test_api_strategies(botclient):
         'StrategyTestV2',
         'StrategyTestV3',
         'StrategyTestV3Futures',
+        'freqai_test_classifier',
         'freqai_test_multimodel_strat',
         'freqai_test_strat'
     ]}
diff --git a/tests/strategy/strats/freqai_test_classifier.py b/tests/strategy/strats/freqai_test_classifier.py
new file mode 100644
index 000000000..c333ac818
--- /dev/null
+++ b/tests/strategy/strats/freqai_test_classifier.py
@@ -0,0 +1,138 @@
+import logging
+from functools import reduce
+
+import pandas as pd
+import talib.abstract as ta
+from pandas import DataFrame
+import numpy as np
+from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair
+
+
+logger = logging.getLogger(__name__)
+
+
+class freqai_test_classifier(IStrategy):
+    """
+    Test strategy - used for testing freqAI functionalities.
+    DO not use in production.
+    """
+
+    minimal_roi = {"0": 0.1, "240": -1}
+
+    plot_config = {
+        "main_plot": {},
+        "subplots": {
+            "prediction": {"prediction": {"color": "blue"}},
+            "target_roi": {
+                "target_roi": {"color": "brown"},
+            },
+            "do_predict": {
+                "do_predict": {"color": "brown"},
+            },
+        },
+    }
+
+    process_only_new_candles = True
+    stoploss = -0.05
+    use_exit_signal = True
+    startup_candle_count: int = 300
+    can_short = False
+
+    linear_roi_offset = DecimalParameter(
+        0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
+    )
+    max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True)
+
+    def informative_pairs(self):
+        whitelist_pairs = self.dp.current_whitelist()
+        corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"]
+        informative_pairs = []
+        for tf in self.config["freqai"]["feature_parameters"]["include_timeframes"]:
+            for pair in whitelist_pairs:
+                informative_pairs.append((pair, tf))
+            for pair in corr_pairs:
+                if pair in whitelist_pairs:
+                    continue  # avoid duplication
+                informative_pairs.append((pair, tf))
+        return informative_pairs
+
+    def populate_any_indicators(
+        self, pair, df, tf, informative=None, set_generalized_indicators=False
+    ):
+
+        coin = pair.split('/')[0]
+
+        with self.freqai.lock:
+            if informative is None:
+                informative = self.dp.get_pair_dataframe(pair, tf)
+
+            # first loop is automatically duplicating indicators for time periods
+            for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
+
+                t = int(t)
+                informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
+                informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
+                informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t)
+
+            informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
+            informative[f"%-{coin}raw_volume"] = informative["volume"]
+            informative[f"%-{coin}raw_price"] = informative["close"]
+
+            indicators = [col for col in informative if col.startswith("%")]
+            # This loop duplicates and shifts all indicators to add a sense of recency to data
+            for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
+                if n == 0:
+                    continue
+                informative_shift = informative[indicators].shift(n)
+                informative_shift = informative_shift.add_suffix("_shift-" + str(n))
+                informative = pd.concat((informative, informative_shift), axis=1)
+
+            df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
+            skip_columns = [
+                (s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]
+            ]
+            df = df.drop(columns=skip_columns)
+
+            # Add generalized indicators here (because in live, it will call this
+            # function to populate indicators during training). Notice how we ensure not to
+            # add them multiple times
+            if set_generalized_indicators:
+                df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
+                df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
+
+                # user adds targets here by prepending them with &- (see convention below)
+                # If user wishes to use multiple targets, a multioutput prediction model
+                # needs to be used such as templates/CatboostPredictionMultiModel.py
+            df['&s-up_or_down'] = np.where(df["close"].shift(-100) > df["close"], 'up', 'down')
+
+        return df
+
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        self.freqai_info = self.config["freqai"]
+
+        dataframe = self.freqai.start(dataframe, metadata, self)
+
+        return dataframe
+
+    def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
+
+        enter_long_conditions = [df['&s-up_or_down'] == 'up']
+
+        if enter_long_conditions:
+            df.loc[
+                reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
+            ] = (1, "long")
+
+        enter_short_conditions = [df['&s-up_or_down'] == 'down']
+
+        if enter_short_conditions:
+            df.loc[
+                reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"]
+            ] = (1, "short")
+
+        return df
+
+    def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
+
+        return df
diff --git a/tests/strategy/test_strategy_loading.py b/tests/strategy/test_strategy_loading.py
index aaad26e5b..5b6f15d11 100644
--- a/tests/strategy/test_strategy_loading.py
+++ b/tests/strategy/test_strategy_loading.py
@@ -34,7 +34,7 @@ def test_search_all_strategies_no_failed():
     directory = Path(__file__).parent / "strats"
     strategies = StrategyResolver.search_all_objects(directory, enum_failed=False)
     assert isinstance(strategies, list)
-    assert len(strategies) == 8
+    assert len(strategies) == 9
     assert isinstance(strategies[0], dict)
 
 
@@ -42,10 +42,10 @@ def test_search_all_strategies_with_failed():
     directory = Path(__file__).parent / "strats"
     strategies = StrategyResolver.search_all_objects(directory, enum_failed=True)
     assert isinstance(strategies, list)
-    assert len(strategies) == 9
+    assert len(strategies) == 10
     # with enum_failed=True search_all_objects() shall find 2 good strategies
     # and 1 which fails to load
-    assert len([x for x in strategies if x['class'] is not None]) == 8
+    assert len([x for x in strategies if x['class'] is not None]) == 9
     assert len([x for x in strategies if x['class'] is None]) == 1