Add lightgbm classifier, add classifier check test, fix classifier bug.

This commit is contained in:
robcaulk 2022-08-06 17:51:21 +02:00
parent 47a30047eb
commit eb8bde37c1
8 changed files with 249 additions and 6 deletions

View File

@ -566,6 +566,8 @@ class IFreqaiModel(ABC):
num_candles = self.freqai_info.get("fit_live_predictions_candles", 100)
dk.data["labels_mean"], dk.data["labels_std"] = {}, {}
for label in dk.label_list:
if self.dd.historic_predictions[dk.pair][label].dtype == object:
continue
f = spy.stats.norm.fit(self.dd.historic_predictions[dk.pair][label].tail(num_candles))
dk.data["labels_mean"][label], dk.data["labels_std"][label] = f[0], f[1]

View File

@ -32,9 +32,6 @@ class CatboostClassifier(BaseRegressionModel):
cbr = CatBoostClassifier(
allow_writing_files=False,
gpu_ram_part=0.5,
verbose=100,
early_stopping_rounds=400,
loss_function='MultiClass',
**self.model_training_parameters,
)

View File

@ -0,0 +1,38 @@
import logging
from typing import Any, Dict
from lightgbm import LGBMClassifier
from freqtrade.freqai.prediction_models.BaseRegressionModel import BaseRegressionModel
logger = logging.getLogger(__name__)
class LightGBMClassifier(BaseRegressionModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def fit(self, data_dictionary: Dict) -> Any:
"""
User sets up the training and test data to fit their desired model here
:params:
:data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
if self.freqai_info.get('data_split_parameters', {}).get('test_size', 0.1) == 0:
eval_set = None
else:
eval_set = (data_dictionary["test_features"], data_dictionary["test_labels"])
X = data_dictionary["train_features"]
y = data_dictionary["train_labels"]
model = LGBMClassifier(**self.model_training_parameters)
model.fit(X=X, y=y, eval_set=eval_set)
return model

View File

@ -155,6 +155,10 @@ class FreqaiExampleStrategy(IStrategy):
- 1
)
# Classifiers are typically set up with strings as targets:
# df['&s-up_or_down'] = np.where( df["close"].shift(-100) >
# df["close"], 'up', 'down')
# If user wishes to use multiple targets, they can add more by
# appending more columns with '&'. User should keep in mind that multi targets
# requires a multioutput prediction model such as

View File

@ -103,6 +103,69 @@ def test_train_model_in_series_Catboost(mocker, freqai_conf):
shutil.rmtree(Path(freqai.dk.full_path))
@pytest.mark.skipif("arm" in platform.uname()[-1], reason="no ARM for Catboost ...")
def test_train_model_in_series_CatboostClassifier(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "CatboostClassifier"})
freqai_conf.update({"strategy": "freqai_test_classifier"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
shutil.rmtree(Path(freqai.dk.full_path))
def test_train_model_in_series_LightGBMClassifier(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "LightGBMClassifier"})
freqai_conf.update({"strategy": "freqai_test_classifier"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange)
strategy.freqai_info = freqai_conf.get("freqai", {})
freqai = strategy.freqai
freqai.live = True
freqai.dk = FreqaiDataKitchen(freqai_conf)
timerange = TimeRange.parse_timerange("20180110-20180130")
freqai.dd.load_all_pair_histories(timerange, freqai.dk)
freqai.dd.pair_dict = MagicMock()
data_load_timerange = TimeRange.parse_timerange("20180110-20180130")
new_timerange = TimeRange.parse_timerange("20180120-20180130")
freqai.train_model_in_series(new_timerange, "ADA/BTC",
strategy, freqai.dk, data_load_timerange)
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_model.joblib").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_metadata.json").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_trained_df.pkl").exists()
assert Path(freqai.dk.data_path / f"{freqai.dk.model_filename}_svm_model.joblib").exists()
shutil.rmtree(Path(freqai.dk.full_path))
def test_start_backtesting(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180120-20180130"})
strategy = get_patched_freqai_strategy(mocker, freqai_conf)

View File

@ -1403,6 +1403,7 @@ def test_api_strategies(botclient):
'StrategyTestV2',
'StrategyTestV3',
'StrategyTestV3Futures',
'freqai_test_classifier',
'freqai_test_multimodel_strat',
'freqai_test_strat'
]}

View File

@ -0,0 +1,138 @@
import logging
from functools import reduce
import pandas as pd
import talib.abstract as ta
from pandas import DataFrame
import numpy as np
from freqtrade.strategy import DecimalParameter, IntParameter, IStrategy, merge_informative_pair
logger = logging.getLogger(__name__)
class freqai_test_classifier(IStrategy):
"""
Test strategy - used for testing freqAI functionalities.
DO not use in production.
"""
minimal_roi = {"0": 0.1, "240": -1}
plot_config = {
"main_plot": {},
"subplots": {
"prediction": {"prediction": {"color": "blue"}},
"target_roi": {
"target_roi": {"color": "brown"},
},
"do_predict": {
"do_predict": {"color": "brown"},
},
},
}
process_only_new_candles = True
stoploss = -0.05
use_exit_signal = True
startup_candle_count: int = 300
can_short = False
linear_roi_offset = DecimalParameter(
0.00, 0.02, default=0.005, space="sell", optimize=False, load=True
)
max_roi_time_long = IntParameter(0, 800, default=400, space="sell", optimize=False, load=True)
def informative_pairs(self):
whitelist_pairs = self.dp.current_whitelist()
corr_pairs = self.config["freqai"]["feature_parameters"]["include_corr_pairlist"]
informative_pairs = []
for tf in self.config["freqai"]["feature_parameters"]["include_timeframes"]:
for pair in whitelist_pairs:
informative_pairs.append((pair, tf))
for pair in corr_pairs:
if pair in whitelist_pairs:
continue # avoid duplication
informative_pairs.append((pair, tf))
return informative_pairs
def populate_any_indicators(
self, pair, df, tf, informative=None, set_generalized_indicators=False
):
coin = pair.split('/')[0]
with self.freqai.lock:
if informative is None:
informative = self.dp.get_pair_dataframe(pair, tf)
# first loop is automatically duplicating indicators for time periods
for t in self.freqai_info["feature_parameters"]["indicator_periods_candles"]:
t = int(t)
informative[f"%-{coin}rsi-period_{t}"] = ta.RSI(informative, timeperiod=t)
informative[f"%-{coin}mfi-period_{t}"] = ta.MFI(informative, timeperiod=t)
informative[f"%-{coin}adx-period_{t}"] = ta.ADX(informative, window=t)
informative[f"%-{coin}pct-change"] = informative["close"].pct_change()
informative[f"%-{coin}raw_volume"] = informative["volume"]
informative[f"%-{coin}raw_price"] = informative["close"]
indicators = [col for col in informative if col.startswith("%")]
# This loop duplicates and shifts all indicators to add a sense of recency to data
for n in range(self.freqai_info["feature_parameters"]["include_shifted_candles"] + 1):
if n == 0:
continue
informative_shift = informative[indicators].shift(n)
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
informative = pd.concat((informative, informative_shift), axis=1)
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
skip_columns = [
(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]
]
df = df.drop(columns=skip_columns)
# Add generalized indicators here (because in live, it will call this
# function to populate indicators during training). Notice how we ensure not to
# add them multiple times
if set_generalized_indicators:
df["%-day_of_week"] = (df["date"].dt.dayofweek + 1) / 7
df["%-hour_of_day"] = (df["date"].dt.hour + 1) / 25
# user adds targets here by prepending them with &- (see convention below)
# If user wishes to use multiple targets, a multioutput prediction model
# needs to be used such as templates/CatboostPredictionMultiModel.py
df['&s-up_or_down'] = np.where(df["close"].shift(-100) > df["close"], 'up', 'down')
return df
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
self.freqai_info = self.config["freqai"]
dataframe = self.freqai.start(dataframe, metadata, self)
return dataframe
def populate_entry_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
enter_long_conditions = [df['&s-up_or_down'] == 'up']
if enter_long_conditions:
df.loc[
reduce(lambda x, y: x & y, enter_long_conditions), ["enter_long", "enter_tag"]
] = (1, "long")
enter_short_conditions = [df['&s-up_or_down'] == 'down']
if enter_short_conditions:
df.loc[
reduce(lambda x, y: x & y, enter_short_conditions), ["enter_short", "enter_tag"]
] = (1, "short")
return df
def populate_exit_trend(self, df: DataFrame, metadata: dict) -> DataFrame:
return df

View File

@ -34,7 +34,7 @@ def test_search_all_strategies_no_failed():
directory = Path(__file__).parent / "strats"
strategies = StrategyResolver.search_all_objects(directory, enum_failed=False)
assert isinstance(strategies, list)
assert len(strategies) == 8
assert len(strategies) == 9
assert isinstance(strategies[0], dict)
@ -42,10 +42,10 @@ def test_search_all_strategies_with_failed():
directory = Path(__file__).parent / "strats"
strategies = StrategyResolver.search_all_objects(directory, enum_failed=True)
assert isinstance(strategies, list)
assert len(strategies) == 9
assert len(strategies) == 10
# with enum_failed=True search_all_objects() shall find 2 good strategies
# and 1 which fails to load
assert len([x for x in strategies if x['class'] is not None]) == 8
assert len([x for x in strategies if x['class'] is not None]) == 9
assert len([x for x in strategies if x['class'] is None]) == 1