alleviate FutureWarning in sklearn about ensuring svm model features are passed with identical order

This commit is contained in:
robcaulk 2022-05-24 14:46:16 +02:00
parent 255d35976e
commit 31ae2b3060
4 changed files with 61 additions and 11 deletions

View File

@ -105,11 +105,11 @@ config setup includes:
### Building the feature set
Most of these parameters are controlling the feature data set. Features are added by the user
inside the `populate_any_indicators()` method of the strategy by prepending indicators with `%`:
Features are added by the user inside the `populate_any_indicators()` method of the strategy
by prepending indicators with `%`:
```python
def populate_any_indicators(self, pair, df, tf, informative=None, coin=""):
def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
informative['%-''%-' + coin + "rsi"] = ta.RSI(informative, timeperiod=14)
informative['%-' + coin + "mfi"] = ta.MFI(informative, timeperiod=25)
informative['%-' + coin + "adx"] = ta.ADX(informative, window=20)
@ -120,11 +120,46 @@ inside the `populate_any_indicators()` method of the strategy by prepending indi
informative['%-' + coin + "bb_width"] = (
informative[coin + "bb_upperband"] - informative[coin + "bb_lowerband"]
) / informative[coin + "bb_middleband"]
# The following code automatically adds features according to the `shift` parameter passed
# in the config. Do not remove
indicators = [col for col in informative if col.startswith('%')]
for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
if n == 0:
continue
informative_shift = informative[indicators].shift(n)
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
informative = pd.concat((informative, informative_shift), axis=1)
# The following code safely merges into the base timeframe.
# Do not remove.
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
df = df.drop(columns=skip_columns)
```
The user of the present example does not want to pass the `bb_lowerband` as a feature to the model,
and has therefore not prepended it with `%`. The user does, however, wish to pass `bb_width` to the
model for training/prediction and has therfore prepended it with `%`._
Note: features **must** be defined in `populate_any_indicators()`. Making features in `populate_indicators()`
will fail in live/dry. If the user wishes to add generalized features that are not associated with
a specific pair or timeframe, they should use the following structure inside `populate_any_indicators()`
(as exemplified in `freqtrade/templates/FreqaiExampleStrategy.py`:
```python
def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
# Add generalized indicators here (because in live, it will call only this function to populate
# indicators for retraining). Notice how we ensure not to add them multiple times by associating
# these generalized indicators to the basepair/timeframe
if pair == metadata['pair'] and tf == self.timeframe:
df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7
df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25
(Please see the example script located in `freqtrade/templates/FreqaiExampleStrategy.py` for a full example of `populate_any_indicators()`)
The `timeframes` from the example config above are the timeframes of each `populate_any_indicator()`

View File

@ -823,7 +823,9 @@ class FreqaiDataKitchen:
pairs = self.freqai_config.get("corr_pairlist", [])
for tf in self.freqai_config.get("timeframes"):
dataframe = strategy.populate_any_indicators(metadata['pair'],
dataframe = strategy.populate_any_indicators(
metadata,
metadata['pair'],
dataframe.copy(),
tf,
base_dataframes[tf],
@ -833,7 +835,9 @@ class FreqaiDataKitchen:
for i in pairs:
if metadata['pair'] in i:
continue # dont repeat anything from whitelist
dataframe = strategy.populate_any_indicators(i,
dataframe = strategy.populate_any_indicators(
metadata,
i,
dataframe.copy(),
tf,
corr_dataframes[i][tf],

View File

@ -532,7 +532,7 @@ class IStrategy(ABC, HyperStrategyMixin):
"""
return None
def populate_any_indicators(self, pair: str, df: DataFrame, tf: str,
def populate_any_indicators(self, metadata: dict, pair: str, df: DataFrame, tf: str,
informative: DataFrame = None, coin: str = "") -> DataFrame:
"""
Function designed to automatically generate, name and merge features

View File

@ -63,7 +63,7 @@ class FreqaiExampleStrategy(IStrategy):
def bot_start(self):
self.model = CustomModel(self.config)
def populate_any_indicators(self, pair, df, tf, informative=None, coin=""):
def populate_any_indicators(self, metadata, pair, df, tf, informative=None, coin=""):
"""
Function designed to automatically generate, name and merge features
from user indicated timeframes in the configuration file. User controls the indicators
@ -124,8 +124,9 @@ class FreqaiExampleStrategy(IStrategy):
informative[coin + "pct-change"] = informative["close"].pct_change()
# The following code automatically adds features according to the `shift` parameter passed
# in the config. Do not remove
indicators = [col for col in informative if col.startswith('%')]
for n in range(self.freqai_info["feature_parameters"]["shift"] + 1):
if n == 0:
continue
@ -133,28 +134,38 @@ class FreqaiExampleStrategy(IStrategy):
informative_shift = informative_shift.add_suffix("_shift-" + str(n))
informative = pd.concat((informative, informative_shift), axis=1)
# The following code safely merges into the base timeframe.
# Do not remove.
df = merge_informative_pair(df, informative, self.config["timeframe"], tf, ffill=True)
skip_columns = [(s + "_" + tf) for s in ["date", "open", "high", "low", "close", "volume"]]
df = df.drop(columns=skip_columns)
# Add generalized indicators (not associated to any individual coin or timeframe) here
# because in live, it will call this function to populate
# indicators during training. Notice how we ensure not to add them multiple times
if pair == metadata['pair'] and tf == self.timeframe:
df['%-day_of_week'] = (df["date"].dt.dayofweek + 1) / 7
df['%-hour_of_day'] = (df['date'].dt.hour + 1) / 25
return df
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
# the configuration file parameters are stored here
self.freqai_info = self.config["freqai"]
self.pair = metadata['pair']
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
# All indicators must be populated by populate_any_indicators() for live functionality
# to work correctly.
for tf in self.freqai_info["timeframes"]:
dataframe = self.populate_any_indicators(self.pair, dataframe.copy(), tf,
dataframe = self.populate_any_indicators(metadata, self.pair, dataframe.copy(), tf,
coin=self.pair.split("/")[0] + "-")
for pair in self.freqai_info["corr_pairlist"]:
if metadata['pair'] in pair:
continue # do not include whitelisted pair twice if it is in corr_pairlist
dataframe = self.populate_any_indicators(
pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
metadata, pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
)
# the model will return 4 values, its prediction, an indication of whether or not the