fix bug for target_mean/std array merging in backtesting

This commit is contained in:
robcaulk 2022-05-26 21:07:50 +02:00
parent ff531c416f
commit 6193205012
6 changed files with 186 additions and 110 deletions

View File

@ -1,7 +1,7 @@
{ {
"max_open_trades": 1, "max_open_trades": 1,
"stake_currency": "USDT", "stake_currency": "USDT",
"stake_amount": 800, "stake_amount": 900,
"tradable_balance_ratio": 1, "tradable_balance_ratio": 1,
"fiat_display_currency": "USD", "fiat_display_currency": "USD",
"dry_run": true, "dry_run": true,
@ -24,8 +24,7 @@
"rateLimit": 200 "rateLimit": 200
}, },
"pair_whitelist": [ "pair_whitelist": [
"BTC/USDT", "BTC/USDT"
"ETH/USDT"
], ],
"pair_blacklist": [] "pair_blacklist": []
}, },
@ -55,7 +54,7 @@
"15m" "15m"
], ],
"train_period": 30, "train_period": 30,
"backtest_period": 10, "backtest_period": 7,
"identifier": "example", "identifier": "example",
"live_trained_timestamp": 0, "live_trained_timestamp": 0,
"corr_pairlist": [ "corr_pairlist": [
@ -64,16 +63,16 @@
"DOT/USDT" "DOT/USDT"
], ],
"feature_parameters": { "feature_parameters": {
"period": 12, "period": 24,
"shift": 1, "shift": 1,
"DI_threshold": 1, "DI_threshold": 0,
"weight_factor": 0, "weight_factor": 0.9,
"principal_component_analysis": false, "principal_component_analysis": false,
"use_SVM_to_remove_outliers": false, "use_SVM_to_remove_outliers": true,
"stratify": 0 "stratify": 3
}, },
"data_split_parameters": { "data_split_parameters": {
"test_size": 0.25, "test_size": 0.33,
"random_state": 1 "random_state": 1
}, },
"model_training_parameters": { "model_training_parameters": {

View File

@ -221,19 +221,29 @@ This way, the user can return to using any model they wish by simply changing th
### Building a freqai strategy ### Building a freqai strategy
The Freqai strategy requires the user to include the following lines of code in `populate_ any _indicators()` The Freqai strategy requires the user to include the following lines of code in the strategy:
```python ```python
from freqtrade.freqai.strategy_bridge import CustomModel from freqtrade.freqai.strategy_bridge import CustomModel
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: def informative_pairs(self):
# the configuration file parameters are stored here whitelist_pairs = self.dp.current_whitelist()
self.freqai_info = self.config['freqai'] corr_pairs = self.config["freqai"]["corr_pairlist"]
informative_pairs = []
for tf in self.config["freqai"]["timeframes"]:
for pair in whitelist_pairs:
informative_pairs.append((pair, tf))
for pair in corr_pairs:
if pair in whitelist_pairs:
continue # avoid duplication
informative_pairs.append((pair, tf))
return informative_pairs
# the model is instantiated here def bot_start(self):
self.model = CustomModel(self.config) self.model = CustomModel(self.config)
print('Populating indicators...') def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
self.freqai_info = self.config['freqai']
# the following loops are necessary for building the features # the following loops are necessary for building the features
# indicated by the user in the configuration file. # indicated by the user in the configuration file.
@ -314,7 +324,7 @@ data point and all other training data points:
$$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$ $$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$
where $d_{ab}$ is the distance between the standardized points $a$ and $b$. $p$ where $d_{ab}$ is the distance between the normalized points $a$ and $b$. $p$
is the number of features i.e. the length of the vector $X$. The is the number of features i.e. the length of the vector $X$. The
characteristic distance, $\overline{d}$ for a set of training data points is simply the mean characteristic distance, $\overline{d}$ for a set of training data points is simply the mean
of the average distances: of the average distances:
@ -394,11 +404,61 @@ which will split the data chronolocially so that every X data points is a testin
present example, the user is asking for every third data point in the dataframe to be used for present example, the user is asking for every third data point in the dataframe to be used for
testing, the other points are used for training. testing, the other points are used for training.
<!-- ## Dynamic target expectation
The labels used for model training have a unique statistical distribution for each separate model training.
We can use this information to know if our current prediction is in the realm of what the model was trained on,
and if so, what is the statistical probability of the current prediction. With this information, we can
make more informed prediction._
FreqAI builds this label distribution and provides a quantile to the strategy, which can be optionally used as a
dynamic threshold. The `target_quantile: X` means that X% of the labels are below this value. So setting:
```json
"freqai": {
"feature_parameters" : {
"target_quantile": 0.9
}
}
```
Means the user will get back in the strategy the label threshold at which 90% of the labels were
below this value. An example usage in the strategy may look something like:
```python
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
# ... #
(
dataframe["prediction"],
dataframe["do_predict"],
dataframe["target_upper_quantile"],
dataframe["target_lower_quantile"],
) = self.model.bridge.start(dataframe, metadata, self)
return dataframe
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
buy_conditions = [
(dataframe["prediction"] > dataframe["target_upper_quantile"]) & (dataframe["do_predict"] == 1)
]
if buy_conditions:
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), "buy"] = 1
return dataframe
``` -->
## Additional information ## Additional information
### Feature standardization ### Feature normalization
The feature set created by the user is automatically standardized to the training The feature set created by the user is automatically normalized to the training
data only. This includes all test data and unseen prediction data (dry/live/backtest). data only. This includes all test data and unseen prediction data (dry/live/backtest).
### File structure ### File structure

View File

@ -141,7 +141,7 @@ class FreqaiDataKitchen:
:model: User trained model which can be inferenced for new predictions :model: User trained model which can be inferenced for new predictions
""" """
# if self.live: if self.live:
self.model_filename = self.data_drawer.pair_dict[coin]['model_filename'] self.model_filename = self.data_drawer.pair_dict[coin]['model_filename']
self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path']) self.data_path = Path(self.data_drawer.pair_dict[coin]['data_path'])
@ -329,42 +329,6 @@ class FreqaiDataKitchen:
:data_dictionary: updated dictionary with standardized values. :data_dictionary: updated dictionary with standardized values.
""" """
# standardize the data by training stats # standardize the data by training stats
train_mean = data_dictionary["train_features"].mean()
train_std = data_dictionary["train_features"].std()
data_dictionary["train_features"] = (
data_dictionary["train_features"] - train_mean
) / train_std
data_dictionary["test_features"] = (
data_dictionary["test_features"] - train_mean
) / train_std
train_labels_std = data_dictionary["train_labels"].std()
train_labels_mean = data_dictionary["train_labels"].mean()
data_dictionary["train_labels"] = (
data_dictionary["train_labels"] - train_labels_mean
) / train_labels_std
data_dictionary["test_labels"] = (
data_dictionary["test_labels"] - train_labels_mean
) / train_labels_std
for item in train_std.keys():
self.data[item + "_std"] = train_std[item]
self.data[item + "_mean"] = train_mean[item]
self.data["labels_std"] = train_labels_std
self.data["labels_mean"] = train_labels_mean
return data_dictionary
def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
"""
Standardize all data in the data_dictionary according to the training dataset
:params:
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
:returns:
:data_dictionary: updated dictionary with standardized values.
"""
# standardize the data by training stats
train_max = data_dictionary["train_features"].max() train_max = data_dictionary["train_features"].max()
train_min = data_dictionary["train_features"].min() train_min = data_dictionary["train_features"].min()
data_dictionary["train_features"] = 2 * ( data_dictionary["train_features"] = 2 * (
@ -392,9 +356,9 @@ class FreqaiDataKitchen:
return data_dictionary return data_dictionary
def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame: def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
""" """
Standardizes a set of data using the mean and standard deviation from Normalize a set of data using the mean and standard deviation from
the associated training data. the associated training data.
:params: :params:
:df: Dataframe to be standardized :df: Dataframe to be standardized
@ -406,19 +370,6 @@ class FreqaiDataKitchen:
return df return df
def normalize_data_from_metadata(self, df: DataFrame) -> DataFrame:
"""
Normalizes a set of data using the mean and standard deviation from
the associated training data.
:params:
:df: Dataframe to be standardized
"""
for item in df.keys():
df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
return df
def split_timerange( def split_timerange(
self, tr: str, train_split: int = 28, bt_split: int = 7 self, tr: str, train_split: int = 28, bt_split: int = 7
) -> Tuple[list, list]: ) -> Tuple[list, list]:
@ -657,12 +608,12 @@ class FreqaiDataKitchen:
""" """
ones = np.ones(len_dataframe) ones = np.ones(len_dataframe)
s_mean, s_std = ones * self.data["s_mean"], ones * self.data["s_std"] target_mean, target_std = ones * self.data["target_mean"], ones * self.data["target_std"]
self.full_predictions = np.append(self.full_predictions, predictions) self.full_predictions = np.append(self.full_predictions, predictions)
self.full_do_predict = np.append(self.full_do_predict, do_predict) self.full_do_predict = np.append(self.full_do_predict, do_predict)
self.full_target_mean = np.append(self.full_target_mean, s_mean) self.full_target_mean = np.append(self.full_target_mean, target_mean)
self.full_target_std = np.append(self.full_target_std, s_std) self.full_target_std = np.append(self.full_target_std, target_std)
return return
@ -827,6 +778,23 @@ class FreqaiDataKitchen:
return dataframe return dataframe
def fit_labels(self) -> None:
import scipy as spy
f = spy.stats.norm.fit(self.data_dictionary["train_labels"])
# KEEPME incase we want to let user start to grab quantiles.
# upper_q = spy.stats.norm.ppf(self.freqai_config['feature_parameters'][
# 'target_quantile'], *f)
# lower_q = spy.stats.norm.ppf(1 - self.freqai_config['feature_parameters'][
# 'target_quantile'], *f)
self.data["target_mean"], self.data["target_std"] = f[0], f[1]
# self.data["upper_quantile"] = upper_q
# self.data["lower_quantile"] = lower_q
return
def np_encoder(self, object): def np_encoder(self, object):
if isinstance(object, np.generic): if isinstance(object, np.generic):
return object.item() return object.item()
@ -968,3 +936,52 @@ class FreqaiDataKitchen:
# ) # )
# return # return
# def standardize_data(self, data_dictionary: Dict) -> Dict[Any, Any]:
# """
# standardize all data in the data_dictionary according to the training dataset
# :params:
# :data_dictionary: dictionary containing the cleaned and split training/test data/labels
# :returns:
# :data_dictionary: updated dictionary with standardized values.
# """
# # standardize the data by training stats
# train_mean = data_dictionary["train_features"].mean()
# train_std = data_dictionary["train_features"].std()
# data_dictionary["train_features"] = (
# data_dictionary["train_features"] - train_mean
# ) / train_std
# data_dictionary["test_features"] = (
# data_dictionary["test_features"] - train_mean
# ) / train_std
# train_labels_std = data_dictionary["train_labels"].std()
# train_labels_mean = data_dictionary["train_labels"].mean()
# data_dictionary["train_labels"] = (
# data_dictionary["train_labels"] - train_labels_mean
# ) / train_labels_std
# data_dictionary["test_labels"] = (
# data_dictionary["test_labels"] - train_labels_mean
# ) / train_labels_std
# for item in train_std.keys():
# self.data[item + "_std"] = train_std[item]
# self.data[item + "_mean"] = train_mean[item]
# self.data["labels_std"] = train_labels_std
# self.data["labels_mean"] = train_labels_mean
# return data_dictionary
# def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
# """
# Normalizes a set of data using the mean and standard deviation from
# the associated training data.
# :params:
# :df: Dataframe to be standardized
# """
# for item in df.keys():
# df[item] = (df[item] - self.data[item + "_mean"]) / self.data[item + "_std"]
# return df

View File

@ -158,12 +158,7 @@ class IFreqaiModel(ABC):
else: else:
self.model = dh.load_data(metadata['pair']) self.model = dh.load_data(metadata['pair'])
# strategy_provided_features = self.dh.find_features(dataframe_train) self.check_if_feature_list_matches_strategy(dataframe_train, dh)
# # FIXME doesnt work with PCA
# if strategy_provided_features != self.dh.training_features_list:
# logger.info("User changed input features, retraining model.")
# self.model = self.train(dataframe_train, metadata)
# self.dh.save_data(self.model)
preds, do_preds = self.predict(dataframe_backtest, dh) preds, do_preds = self.predict(dataframe_backtest, dh)
@ -220,16 +215,23 @@ class IFreqaiModel(ABC):
self.model = dh.load_data(coin=metadata['pair']) self.model = dh.load_data(coin=metadata['pair'])
# FIXME self.check_if_feature_list_matches_strategy(dataframe, dh)
# strategy_provided_features = dh.find_features(dataframe)
# if strategy_provided_features != dh.training_features_list:
# self.train_model_in_series(new_trained_timerange, metadata, strategy)
preds, do_preds = self.predict(dataframe, dh) preds, do_preds = self.predict(dataframe, dh)
dh.append_predictions(preds, do_preds, len(dataframe)) dh.append_predictions(preds, do_preds, len(dataframe))
return dh return dh
def check_if_feature_list_matches_strategy(self, dataframe: DataFrame,
dh: FreqaiDataKitchen) -> None:
strategy_provided_features = dh.find_features(dataframe)
if strategy_provided_features != dh.training_features_list:
raise OperationalException("Trying to access pretrained model with `identifier` "
"but found different features furnished by current strategy."
"Change `identifer` to train from scratch, or ensure the"
"strategy is furnishing the same features as the pretrained"
"model")
def data_cleaning_train(self, dh: FreqaiDataKitchen) -> None: def data_cleaning_train(self, dh: FreqaiDataKitchen) -> None:
""" """
Base data cleaning method for train Base data cleaning method for train
@ -237,6 +239,7 @@ class IFreqaiModel(ABC):
based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example based on user decided logic. See FreqaiDataKitchen::remove_outliers() for an example
of how outlier data points are dropped from the dataframe used for training. of how outlier data points are dropped from the dataframe used for training.
""" """
if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'): if self.freqai_info.get('feature_parameters', {}).get('principal_component_analysis'):
dh.principal_component_analysis() dh.principal_component_analysis()

View File

@ -33,10 +33,6 @@ class CatboostPredictionModel(IFreqaiModel):
/ dataframe["close"] / dataframe["close"]
- 1 - 1
) )
dh.data["s_mean"] = dataframe["s"].mean()
dh.data["s_std"] = dataframe["s"].std()
# logger.info("label mean", dh.data["s_mean"], "label std", dh.data["s_std"])
return dataframe["s"] return dataframe["s"]
@ -68,8 +64,9 @@ class CatboostPredictionModel(IFreqaiModel):
# split data into train/test data. # split data into train/test data.
data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered) data_dictionary = dh.make_train_test_datasets(features_filtered, labels_filtered)
# standardize all data based on train_dataset only dh.fit_labels() # fit labels to a cauchy distribution so we know what to expect in strategy
data_dictionary = dh.standardize_data(data_dictionary) # normalize all data based on train_dataset only
data_dictionary = dh.normalize_data(data_dictionary)
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
self.data_cleaning_train(dh) self.data_cleaning_train(dh)
@ -128,7 +125,7 @@ class CatboostPredictionModel(IFreqaiModel):
filtered_dataframe, _ = dh.filter_features( filtered_dataframe, _ = dh.filter_features(
unfiltered_dataframe, original_feature_list, training_filter=False unfiltered_dataframe, original_feature_list, training_filter=False
) )
filtered_dataframe = dh.standardize_data_from_metadata(filtered_dataframe) filtered_dataframe = dh.normalize_data_from_metadata(filtered_dataframe)
dh.data_dictionary["prediction_features"] = filtered_dataframe dh.data_dictionary["prediction_features"] = filtered_dataframe
# optional additional data cleaning/analysis # optional additional data cleaning/analysis
@ -136,7 +133,7 @@ class CatboostPredictionModel(IFreqaiModel):
predictions = self.model.predict(dh.data_dictionary["prediction_features"]) predictions = self.model.predict(dh.data_dictionary["prediction_features"])
# compute the non-standardized predictions # compute the non-normalized predictions
dh.predictions = (predictions + 1) * (dh.data["labels_max"] - dh.predictions = (predictions + 1) * (dh.data["labels_max"] -
dh.data["labels_min"]) / 2 + dh.data["labels_min"] dh.data["labels_min"]) / 2 + dh.data["labels_min"]

View File

@ -178,8 +178,8 @@ class FreqaiExampleStrategy(IStrategy):
dataframe["target_std"], dataframe["target_std"],
) = self.model.bridge.start(dataframe, metadata, self) ) = self.model.bridge.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 1.5 dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"]
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1 dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"]
return dataframe return dataframe
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: