improve flexibility of user defined prediction dataframe

This commit is contained in:
Robert Caulk 2022-08-06 13:51:19 +02:00
parent fdc82f8302
commit c172ce1011
6 changed files with 42 additions and 31 deletions

View File

@ -62,13 +62,13 @@ pip install -r requirements-freqai.txt
## Running from the example files ## Running from the example files
An example strategy, an example prediction model, and example config can all be found in An example strategy, an example prediction model, and example config can all be found in
`freqtrade/templates/FreqaiExampleStrategy.py`, `freqtrade/freqai/prediction_models/LightGBMPredictionModel.py`, `freqtrade/templates/FreqaiExampleStrategy.py`, `freqtrade/freqai/prediction_models/LightGBMRegressor.py`,
`config_examples/config_freqai.example.json`, respectively. `config_examples/config_freqai.example.json`, respectively.
Assuming the user has downloaded the necessary data, Freqai can be executed from these templates with: Assuming the user has downloaded the necessary data, Freqai can be executed from these templates with:
```bash ```bash
freqtrade backtesting --config config_examples/config_freqai.example.json --strategy FreqaiExampleStrategy --freqaimodel LightGBMPredictionModel --strategy-path freqtrade/templates --timerange 20220101-20220201 freqtrade backtesting --config config_examples/config_freqai.example.json --strategy FreqaiExampleStrategy --freqaimodel LightGBMRegressor --strategy-path freqtrade/templates --timerange 20220101-20220201
``` ```
## Configuring the bot ## Configuring the bot
@ -111,7 +111,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
| `test_size` | Fraction of data that should be used for testing instead of training. <br> **Datatype:** positive float below 1. | `test_size` | Fraction of data that should be used for testing instead of training. <br> **Datatype:** positive float below 1.
| `shuffle` | Shuffle the training data points during training. Typically for time-series forecasting, this is set to False. **Datatype:** boolean. | `shuffle` | Shuffle the training data points during training. Typically for time-series forecasting, this is set to False. **Datatype:** boolean.
| | **Model training parameters** | | **Model training parameters**
| `model_training_parameters` | A flexible dictionary that includes all parameters available by the user selected library. For example, if the user uses `LightGBMPredictionModel`, then this dictionary can contain any parameter available by the `LightGBMRegressor` [here](https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html). If the user selects a different model, then this dictionary can contain any parameter from that different model. <br> **Datatype:** dictionary. | `model_training_parameters` | A flexible dictionary that includes all parameters available by the user selected library. For example, if the user uses `LightGBMRegressor`, then this dictionary can contain any parameter available by the `LightGBMRegressor` [here](https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMRegressor.html). If the user selects a different model, then this dictionary can contain any parameter from that different model. <br> **Datatype:** dictionary.
| `n_estimators` | A common parameter among regressors which sets the number of boosted trees to fit <br> **Datatype:** integer. | `n_estimators` | A common parameter among regressors which sets the number of boosted trees to fit <br> **Datatype:** integer.
| `learning_rate` | A common parameter among regressors which sets the boosting learning rate. <br> **Datatype:** float. | `learning_rate` | A common parameter among regressors which sets the boosting learning rate. <br> **Datatype:** float.
| `n_jobs`, `thread_count`, `task_type` | Different libraries use different parameter names to control the number of threads used for parallel processing or whether or not it is a `task_type` of `gpu` or `cpu`. <br> **Datatype:** float. | `n_jobs`, `thread_count`, `task_type` | Different libraries use different parameter names to control the number of threads used for parallel processing or whether or not it is a `task_type` of `gpu` or `cpu`. <br> **Datatype:** float.
@ -356,7 +356,7 @@ and adding this to the `train_period_days`. The units need to be in the base can
The freqai training/backtesting module can be executed with the following command: The freqai training/backtesting module can be executed with the following command:
```bash ```bash
freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel LightGBMPredictionModel --timerange 20210501-20210701 freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel LightGBMRegressor --timerange 20210501-20210701
``` ```
If this command has never been executed with the existing config file, then it will train a new model If this command has never been executed with the existing config file, then it will train a new model

View File

@ -245,7 +245,7 @@ class FreqaiDataDrawer:
logger.info(f'Setting initial FreqUI plots from historical data for {pair}.') logger.info(f'Setting initial FreqUI plots from historical data for {pair}.')
else: else:
for label in dk.label_list: for label in pred_df.columns:
mrv_df[label] = pred_df[label] mrv_df[label] = pred_df[label]
if mrv_df[label].dtype == object: if mrv_df[label].dtype == object:
continue continue
@ -278,15 +278,16 @@ class FreqaiDataDrawer:
# strat seems to feed us variable sized dataframes - and since we are trying to build our # strat seems to feed us variable sized dataframes - and since we are trying to build our
# own return array in the same shape, we need to figure out how the size has changed # own return array in the same shape, we need to figure out how the size has changed
# and adapt our stored/returned info accordingly. # and adapt our stored/returned info accordingly.
length_difference = len(self.model_return_values[pair]) - len_df
i = 0
if length_difference == 0: # length_difference = len(self.model_return_values[pair]) - len_df
i = 1 # i = 0
elif length_difference > 0:
i = length_difference + 1
df = self.model_return_values[pair] = self.model_return_values[pair].shift(-i) # if length_difference == 0:
# i = 1
# elif length_difference > 0:
# i = length_difference + 1
df = self.model_return_values[pair] = self.model_return_values[pair].shift(-1)
if pair in self.historic_predictions: if pair in self.historic_predictions:
hp_df = self.historic_predictions[pair] hp_df = self.historic_predictions[pair]
@ -296,7 +297,8 @@ class FreqaiDataDrawer:
hp_df = pd.concat([hp_df, nan_df], ignore_index=True, axis=0) hp_df = pd.concat([hp_df, nan_df], ignore_index=True, axis=0)
self.historic_predictions[pair] = hp_df[:-1] self.historic_predictions[pair] = hp_df[:-1]
for label in dk.label_list: # incase user adds additional "predictions" e.g. predict_proba output:
for label in predictions.columns:
df[label].iloc[-1] = predictions[label].iloc[-1] df[label].iloc[-1] = predictions[label].iloc[-1]
if df[label].dtype == object: if df[label].dtype == object:
continue continue
@ -318,11 +320,11 @@ class FreqaiDataDrawer:
for key in df.keys(): for key in df.keys():
self.historic_predictions[pair][key].iloc[-1] = df[key].iloc[-1] self.historic_predictions[pair][key].iloc[-1] = df[key].iloc[-1]
if length_difference < 0: # if length_difference < 0:
prepend_df = pd.DataFrame( # prepend_df = pd.DataFrame(
np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns # np.zeros((abs(length_difference) - 1, len(df.columns))), columns=df.columns
) # )
df = pd.concat([prepend_df, df], axis=0) # df = pd.concat([prepend_df, df], axis=0)
def attach_return_values_to_return_dataframe( def attach_return_values_to_return_dataframe(
self, pair: str, dataframe: DataFrame) -> DataFrame: self, pair: str, dataframe: DataFrame) -> DataFrame:
@ -343,7 +345,12 @@ class FreqaiDataDrawer:
dk.find_features(dataframe) dk.find_features(dataframe)
for label in dk.label_list: if self.freqai_info.get('predict_proba', []):
full_labels = dk.label_list + self.freqai_info['predict_proba']
else:
full_labels = dk.label_list
for label in full_labels:
dataframe[label] = 0 dataframe[label] = 0
dataframe[f"{label}_mean"] = 0 dataframe[f"{label}_mean"] = 0
dataframe[f"{label}_std"] = 0 dataframe[f"{label}_std"] = 0

View File

@ -342,7 +342,7 @@ class FreqaiDataKitchen:
:df: Dataframe of predictions to be denormalized :df: Dataframe of predictions to be denormalized
""" """
for label in self.label_list: for label in df.columns:
if df[label].dtype == object: if df[label].dtype == object:
continue continue
df[label] = ( df[label] = (
@ -716,14 +716,16 @@ class FreqaiDataKitchen:
weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1] weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
return weights return weights
def append_predictions(self, predictions, do_predict, len_dataframe): def append_predictions(self, predictions: DataFrame, do_predict: npt.ArrayLike) -> None:
""" """
Append backtest prediction from current backtest period to all previous periods Append backtest prediction from current backtest period to all previous periods
""" """
append_df = DataFrame() append_df = DataFrame()
for label in self.label_list: for label in predictions.columns:
append_df[label] = predictions[label] append_df[label] = predictions[label]
if append_df[label].dtype == object:
continue
append_df[f"{label}_mean"] = self.data["labels_mean"][label] append_df[f"{label}_mean"] = self.data["labels_mean"][label]
append_df[f"{label}_std"] = self.data["labels_std"][label] append_df[f"{label}_std"] = self.data["labels_std"][label]
@ -1009,7 +1011,7 @@ class FreqaiDataKitchen:
import scipy as spy import scipy as spy
self.data["labels_mean"], self.data["labels_std"] = {}, {} self.data["labels_mean"], self.data["labels_std"] = {}, {}
for label in self.label_list: for label in self.data_dictionary["train_labels"].columns:
if self.data_dictionary["train_labels"][label].dtype == object: if self.data_dictionary["train_labels"][label].dtype == object:
continue continue
f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label]) f = spy.stats.norm.fit(self.data_dictionary["train_labels"][label])

View File

@ -221,7 +221,7 @@ class IFreqaiModel(ABC):
pred_df, do_preds = self.predict(dataframe_backtest, dk) pred_df, do_preds = self.predict(dataframe_backtest, dk)
dk.append_predictions(pred_df, do_preds, len(dataframe_backtest)) dk.append_predictions(pred_df, do_preds)
dk.fill_predictions(dataframe) dk.fill_predictions(dataframe)
@ -543,15 +543,17 @@ class IFreqaiModel(ABC):
self.dd.historic_predictions[pair] = pred_df self.dd.historic_predictions[pair] = pred_df
hist_preds_df = self.dd.historic_predictions[pair] hist_preds_df = self.dd.historic_predictions[pair]
for label in hist_preds_df.columns:
if hist_preds_df[label].dtype == object:
continue
hist_preds_df[f'{label}_mean'] = 0
hist_preds_df[f'{label}_std'] = 0
hist_preds_df['do_predict'] = 0 hist_preds_df['do_predict'] = 0
if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0: if self.freqai_info['feature_parameters'].get('DI_threshold', 0) > 0:
hist_preds_df['DI_values'] = 0 hist_preds_df['DI_values'] = 0
for label in dk.data['labels_mean']:
hist_preds_df[f'{label}_mean'] = 0
hist_preds_df[f'{label}_std'] = 0
for return_str in dk.data['extra_returns_per_train']: for return_str in dk.data['extra_returns_per_train']:
hist_preds_df[return_str] = 0 hist_preds_df[return_str] = 0

View File

@ -47,7 +47,7 @@ def freqai_conf(default_conf, tmpdir):
"indicator_periods_candles": [10], "indicator_periods_candles": [10],
}, },
"data_split_parameters": {"test_size": 0.33, "random_state": 1}, "data_split_parameters": {"test_size": 0.33, "random_state": 1},
"model_training_parameters": {"n_estimators": 100, "verbosity": 0}, "model_training_parameters": {"n_estimators": 100},
}, },
"config_files": [Path('config_examples', 'config_freqai.example.json')] "config_files": [Path('config_examples', 'config_freqai.example.json')]
} }

View File

@ -74,8 +74,8 @@ def test_train_model_in_series_LightGBMMultiModel(mocker, freqai_conf):
def test_train_model_in_series_Catboost(mocker, freqai_conf): def test_train_model_in_series_Catboost(mocker, freqai_conf):
freqai_conf.update({"timerange": "20180110-20180130"}) freqai_conf.update({"timerange": "20180110-20180130"})
freqai_conf.update({"freqaimodel": "CatboostRegressor"}) freqai_conf.update({"freqaimodel": "CatboostRegressor"})
freqai_conf.get('freqai', {}).update( # freqai_conf.get('freqai', {}).update(
{'model_training_parameters': {"n_estimators": 100, "verbose": 0}}) # {'model_training_parameters': {"n_estimators": 100, "verbose": 0}})
strategy = get_patched_freqai_strategy(mocker, freqai_conf) strategy = get_patched_freqai_strategy(mocker, freqai_conf)
exchange = get_patched_exchange(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf)
strategy.dp = DataProvider(freqai_conf, exchange) strategy.dp = DataProvider(freqai_conf, exchange)