create more flexible whitelist, avoid duplicating whitelist features into corr_pairlist, update docs

This commit is contained in:
robcaulk 2022-05-09 17:01:49 +02:00
parent 22bd5556ed
commit 9b3e5faebe
6 changed files with 119 additions and 39 deletions

View File

@ -24,7 +24,8 @@
"rateLimit": 200
},
"pair_whitelist": [
"BTC/USDT"
"BTC/USDT",
"ETH/USDT"
],
"pair_blacklist": []
},
@ -55,7 +56,7 @@
],
"train_period": 30,
"backtest_period": 7,
"identifier": "livetest5",
"identifier": "new_corrlist",
"live_trained_timerange": "20220330-20220429",
"live_full_backtestrange": "20220302-20220501",
"base_features": [

View File

@ -65,8 +65,6 @@ config setup includes:
"feature_parameters" : {
"period": 24,
"shift": 2,
"drop_features": false,
"DI_threshold": 1,
"weight_factor": 0,
},
"data_split_parameters" : {
@ -79,8 +77,7 @@ config setup includes:
"learning_rate": 0.02,
"task_type": "CPU",
},
},
}
```
### Building the feature set
@ -153,8 +150,6 @@ The Freqai strategy requires the user to include the following lines of code in
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
for tf in self.freqai_info['timeframes']:
dataframe = self.populate_any_indicators(metadata['pair'],
dataframe.copy(), tf)
for i in self.freqai_info['corr_pairlist']:
dataframe = self.populate_any_indicators(i,
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
@ -177,8 +172,36 @@ and `make_labels()` to let them customize various aspects of their training proc
### Running the model live
TODO: Freqai is not automated for live yet.
Freqai can be run dry/live using the following command
```bash
freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel ExamplePredictionModel
```
By default, Freqai will not find find any existing models and will start by training a new one
given the user configuration settings. Following training, it will use that model to predict for the
duration of `backtest_period`. After a full `backtest_period` has elapsed, Freqai will auto retrain
a new model, and begin making predictions with the updated model.
If the user wishes to start dry/live from a saved model, the following configuration
parameters need to be set:
```json
"freqai": {
"identifier": "example",
"live_trained_timerange": "20220330-20220429",
"live_full_backtestrange": "20220302-20220501"
}
```
Where the `identifier` is the same identifier which was set during the backtesting/training. Meanwhile,
the `live_trained_timerange` is the sub-trained timerange (the training window) which was set
during backtesting/training. These are available to the user inside `user_data/models/*/sub-train-*`.
`live_full_backtestrange` was the full data range assocaited with the backtest/training (the full time
window that the training window and backtesting windows slide through). These values can be located
inside the `user_data/models/` directory. In this case, although Freqai will initiate with a
pretrained model, if a full `backtest_period` has elapsed since the end of the user set
`live_trained_timerange`, it will self retrain.
## Data anylsis techniques
### Controlling the model learning process
@ -226,12 +249,49 @@ $$ DI_k = d_k/\overline{d} $$
Equity and crypto markets suffer from a high level of non-patterned noise in the
form of outlier data points. The dissimilarity index allows predictions which
are outliers and not existent in the model feature space, to be thrown out due
to low levels of certainty. The user can tweak the DI with `DI_threshold` to increase
or decrease the extrapolation of the trained model.
to low levels of certainty. Activating the Dissimilarity Index can be achieved with:
```json
"freqai": {
"feature_parameters" : {
"DI_threshold": 1
}
}
```
The user can tweak the DI with `DI_threshold` to increase or decrease the extrapolation of the
trained model.
### Reducing data dimensionality with Principal Component Analysis
TO BE WRITTEN
Users can reduce the dimensionality of their features by activating the `principal_component_analysis`:
```json
"freqai": {
"feature_parameters" : {
"principal_component_analysis": true
}
}
```
Which will perform PCA on the features and reduce the dimensionality of the data so that the explained
variance of the data set is >= 0.999.
### Removing outliers based on feature statistical distributions
The user can tell Freqai to remove outlier data points from the trainig/test data sets by setting:
```json
"freqai": {
"feature_parameters" : {
"remove_outliers": true
}
}
```
Freqai will check the statistical distributions of each feature (or component if the user activated
`principal_component_analysis`) and remove any data point that sits more than 3 standard deviations away
from the mean.
## Additional information
### Feature standardization
@ -242,5 +302,5 @@ data only. This includes all test data and unseen prediction data (dry/live/back
### File structure
`user_data_dir/models/` contains all the data associated with the trainings and
backtestings. This file structure is heavily controlled and read by the `DataHandler()`
backtestings. This file structure is heavily controlled and read by the `FreqaiDataKitchen()`
and should thus not be modified.

View File

@ -485,7 +485,7 @@ class FreqaiDataKitchen:
return
def build_feature_list(self, config: dict) -> list:
def build_feature_list(self, config: dict, metadata: dict) -> list:
"""
Build the list of features that will be used to filter
the full dataframe. Feature list is construced from the
@ -501,8 +501,10 @@ class FreqaiDataKitchen:
shift = ""
if n > 0:
shift = "_shift-" + str(n)
# features.append(ft + shift + "_" + tf)
features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
for p in config["freqai"]["corr_pairlist"]:
if metadata['pair'] in p:
continue # avoid duplicate features
features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
# logger.info("number of features %s", len(features))
@ -640,9 +642,10 @@ class FreqaiDataKitchen:
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
self.config, validate=False)
pairs = self.freqai_config['corr_pairlist'] + [metadata['pair']]
pairs = self.freqai_config['corr_pairlist']
if metadata['pair'] not in pairs:
pairs += metadata['pair'] # dont include pair twice
timerange = TimeRange.parse_timerange(new_timerange)
# data_handler = get_datahandler(datadir, data_format)
refresh_backtest_ohlcv_data(
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
@ -656,33 +659,45 @@ class FreqaiDataKitchen:
def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any],
DataFrame]:
corr_dataframes: Dict[Any, Any] = {}
# pair_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
timerange = TimeRange.parse_timerange(new_timerange)
for p in pairs:
corr_dataframes[p] = {}
for tf in self.freqai_config['timeframes']:
for tf in self.freqai_config['timeframes']:
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=metadata['pair'], timerange=timerange)
for p in pairs:
if metadata['pair'] in p:
continue # dont repeat anything from whitelist
corr_dataframes[p] = {}
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=p, timerange=timerange)
base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
if metadata['pair'] in key]
# base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
# if metadata['pair'] in key]
# [0] indexes the lowest tf for the basepair
return corr_dataframes, base_dataframe[0][self.config['timeframe']]
return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(self, strategy: IStrategy, metadata: dict,
def use_strategy_to_populate_indicators(self, strategy: IStrategy,
corr_dataframes: dict,
dataframe: DataFrame) -> DataFrame:
base_dataframes: dict,
metadata: dict) -> DataFrame:
# dataframe = pair_dataframes[0] # this is the base tf pair df
dataframe = base_dataframes[self.config['timeframe']]
for tf in self.freqai_config["timeframes"]:
# dataframe = strategy.populate_any_indicators(metadata["pair"], dataframe.copy,
# tf, pair_dataframes[tf])
dataframe = strategy.populate_any_indicators(metadata['pair'],
dataframe.copy(),
tf,
base_dataframes[tf],
coin=metadata['pair'].split("/")[0] + "-"
)
for i in self.freqai_config["corr_pairlist"]:
if metadata['pair'] in i:
continue # dont repeat anything from whitelist
dataframe = strategy.populate_any_indicators(i,
dataframe.copy(),
tf,

View File

@ -93,7 +93,7 @@ class IFreqaiModel(ABC):
else:
self.model = self.dh.load_data()
preds, do_preds = self.predict(dataframe_backtest)
preds, do_preds = self.predict(dataframe_backtest, metadata)
self.dh.append_predictions(preds, do_preds, len(dataframe_backtest))
print('predictions', len(self.dh.full_predictions),
@ -120,13 +120,13 @@ class IFreqaiModel(ABC):
if retrain or not file_exists:
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)
# dataframe = download-data
corr_dataframes, pair_dataframes = self.dh.load_pairs_histories(new_trained_timerange,
corr_dataframes, base_dataframes = self.dh.load_pairs_histories(new_trained_timerange,
metadata)
unfiltered_dataframe = self.dh.use_strategy_to_populate_indicators(strategy,
metadata,
corr_dataframes,
pair_dataframes)
base_dataframes,
metadata)
self.model = self.train(unfiltered_dataframe, metadata)
self.dh.save_data(self.model)
@ -134,7 +134,7 @@ class IFreqaiModel(ABC):
self.freqai_info
self.model = self.dh.load_data()
preds, do_preds = self.predict(dataframe)
preds, do_preds = self.predict(dataframe, metadata)
self.dh.append_predictions(preds, do_preds, len(dataframe))
# dataframe should have len 1 here
@ -175,7 +175,7 @@ class IFreqaiModel(ABC):
return
@abstractmethod
def predict(self, dataframe: DataFrame) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
def predict(self, dataframe: DataFrame, metadata: dict) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.

View File

@ -53,7 +53,7 @@ class ExamplePredictionModel(IFreqaiModel):
logger.info("--------------------Starting training--------------------")
# create the full feature list based on user config info
self.dh.training_features_list = self.dh.build_feature_list(self.config)
self.dh.training_features_list = self.dh.build_feature_list(self.config, metadata)
unfiltered_labels = self.make_labels(unfiltered_dataframe)
# filter the features requested by user in the configuration file and elegantly handle NaNs
@ -114,7 +114,8 @@ class ExamplePredictionModel(IFreqaiModel):
return model
def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]:
def predict(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame,
DataFrame]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
@ -126,7 +127,7 @@ class ExamplePredictionModel(IFreqaiModel):
# logger.info("--------------------Starting prediction--------------------")
original_feature_list = self.dh.build_feature_list(self.config)
original_feature_list = self.dh.build_feature_list(self.config, metadata)
filtered_dataframe, _ = self.dh.filter_features(
unfiltered_dataframe, original_feature_list, training_filter=False
)

View File

@ -142,8 +142,11 @@ class FreqaiExampleStrategy(IStrategy):
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
for tf in self.freqai_info["timeframes"]:
# dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf)
dataframe = self.populate_any_indicators(self.pair, dataframe.copy(), tf,
coin=self.pair.split("/")[0] + "-")
for pair in self.freqai_info["corr_pairlist"]:
if metadata['pair'] in pair:
continue # do not include whitelisted pair twice if it is in corr_pairlist
dataframe = self.populate_any_indicators(
pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
)