create more flexible whitelist, avoid duplicating whitelist features into corr_pairlist, update docs
This commit is contained in:
parent
22bd5556ed
commit
9b3e5faebe
@ -24,7 +24,8 @@
|
|||||||
"rateLimit": 200
|
"rateLimit": 200
|
||||||
},
|
},
|
||||||
"pair_whitelist": [
|
"pair_whitelist": [
|
||||||
"BTC/USDT"
|
"BTC/USDT",
|
||||||
|
"ETH/USDT"
|
||||||
],
|
],
|
||||||
"pair_blacklist": []
|
"pair_blacklist": []
|
||||||
},
|
},
|
||||||
@ -55,7 +56,7 @@
|
|||||||
],
|
],
|
||||||
"train_period": 30,
|
"train_period": 30,
|
||||||
"backtest_period": 7,
|
"backtest_period": 7,
|
||||||
"identifier": "livetest5",
|
"identifier": "new_corrlist",
|
||||||
"live_trained_timerange": "20220330-20220429",
|
"live_trained_timerange": "20220330-20220429",
|
||||||
"live_full_backtestrange": "20220302-20220501",
|
"live_full_backtestrange": "20220302-20220501",
|
||||||
"base_features": [
|
"base_features": [
|
||||||
|
@ -65,8 +65,6 @@ config setup includes:
|
|||||||
"feature_parameters" : {
|
"feature_parameters" : {
|
||||||
"period": 24,
|
"period": 24,
|
||||||
"shift": 2,
|
"shift": 2,
|
||||||
"drop_features": false,
|
|
||||||
"DI_threshold": 1,
|
|
||||||
"weight_factor": 0,
|
"weight_factor": 0,
|
||||||
},
|
},
|
||||||
"data_split_parameters" : {
|
"data_split_parameters" : {
|
||||||
@ -79,8 +77,7 @@ config setup includes:
|
|||||||
"learning_rate": 0.02,
|
"learning_rate": 0.02,
|
||||||
"task_type": "CPU",
|
"task_type": "CPU",
|
||||||
},
|
},
|
||||||
},
|
}
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### Building the feature set
|
### Building the feature set
|
||||||
@ -153,8 +150,6 @@ The Freqai strategy requires the user to include the following lines of code in
|
|||||||
# the following loops are necessary for building the features
|
# the following loops are necessary for building the features
|
||||||
# indicated by the user in the configuration file.
|
# indicated by the user in the configuration file.
|
||||||
for tf in self.freqai_info['timeframes']:
|
for tf in self.freqai_info['timeframes']:
|
||||||
dataframe = self.populate_any_indicators(metadata['pair'],
|
|
||||||
dataframe.copy(), tf)
|
|
||||||
for i in self.freqai_info['corr_pairlist']:
|
for i in self.freqai_info['corr_pairlist']:
|
||||||
dataframe = self.populate_any_indicators(i,
|
dataframe = self.populate_any_indicators(i,
|
||||||
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
|
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
|
||||||
@ -177,8 +172,36 @@ and `make_labels()` to let them customize various aspects of their training proc
|
|||||||
|
|
||||||
### Running the model live
|
### Running the model live
|
||||||
|
|
||||||
TODO: Freqai is not automated for live yet.
|
Freqai can be run dry/live using the following command
|
||||||
|
|
||||||
|
```bash
|
||||||
|
freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel ExamplePredictionModel
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, Freqai will not find find any existing models and will start by training a new one
|
||||||
|
given the user configuration settings. Following training, it will use that model to predict for the
|
||||||
|
duration of `backtest_period`. After a full `backtest_period` has elapsed, Freqai will auto retrain
|
||||||
|
a new model, and begin making predictions with the updated model.
|
||||||
|
|
||||||
|
If the user wishes to start dry/live from a saved model, the following configuration
|
||||||
|
parameters need to be set:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"freqai": {
|
||||||
|
"identifier": "example",
|
||||||
|
"live_trained_timerange": "20220330-20220429",
|
||||||
|
"live_full_backtestrange": "20220302-20220501"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Where the `identifier` is the same identifier which was set during the backtesting/training. Meanwhile,
|
||||||
|
the `live_trained_timerange` is the sub-trained timerange (the training window) which was set
|
||||||
|
during backtesting/training. These are available to the user inside `user_data/models/*/sub-train-*`.
|
||||||
|
`live_full_backtestrange` was the full data range assocaited with the backtest/training (the full time
|
||||||
|
window that the training window and backtesting windows slide through). These values can be located
|
||||||
|
inside the `user_data/models/` directory. In this case, although Freqai will initiate with a
|
||||||
|
pretrained model, if a full `backtest_period` has elapsed since the end of the user set
|
||||||
|
`live_trained_timerange`, it will self retrain.
|
||||||
|
|
||||||
## Data anylsis techniques
|
## Data anylsis techniques
|
||||||
### Controlling the model learning process
|
### Controlling the model learning process
|
||||||
@ -226,12 +249,49 @@ $$ DI_k = d_k/\overline{d} $$
|
|||||||
Equity and crypto markets suffer from a high level of non-patterned noise in the
|
Equity and crypto markets suffer from a high level of non-patterned noise in the
|
||||||
form of outlier data points. The dissimilarity index allows predictions which
|
form of outlier data points. The dissimilarity index allows predictions which
|
||||||
are outliers and not existent in the model feature space, to be thrown out due
|
are outliers and not existent in the model feature space, to be thrown out due
|
||||||
to low levels of certainty. The user can tweak the DI with `DI_threshold` to increase
|
to low levels of certainty. Activating the Dissimilarity Index can be achieved with:
|
||||||
or decrease the extrapolation of the trained model.
|
|
||||||
|
```json
|
||||||
|
"freqai": {
|
||||||
|
"feature_parameters" : {
|
||||||
|
"DI_threshold": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The user can tweak the DI with `DI_threshold` to increase or decrease the extrapolation of the
|
||||||
|
trained model.
|
||||||
|
|
||||||
### Reducing data dimensionality with Principal Component Analysis
|
### Reducing data dimensionality with Principal Component Analysis
|
||||||
|
|
||||||
TO BE WRITTEN
|
Users can reduce the dimensionality of their features by activating the `principal_component_analysis`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"freqai": {
|
||||||
|
"feature_parameters" : {
|
||||||
|
"principal_component_analysis": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Which will perform PCA on the features and reduce the dimensionality of the data so that the explained
|
||||||
|
variance of the data set is >= 0.999.
|
||||||
|
|
||||||
|
### Removing outliers based on feature statistical distributions
|
||||||
|
|
||||||
|
The user can tell Freqai to remove outlier data points from the trainig/test data sets by setting:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"freqai": {
|
||||||
|
"feature_parameters" : {
|
||||||
|
"remove_outliers": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Freqai will check the statistical distributions of each feature (or component if the user activated
|
||||||
|
`principal_component_analysis`) and remove any data point that sits more than 3 standard deviations away
|
||||||
|
from the mean.
|
||||||
|
|
||||||
## Additional information
|
## Additional information
|
||||||
### Feature standardization
|
### Feature standardization
|
||||||
@ -242,5 +302,5 @@ data only. This includes all test data and unseen prediction data (dry/live/back
|
|||||||
### File structure
|
### File structure
|
||||||
|
|
||||||
`user_data_dir/models/` contains all the data associated with the trainings and
|
`user_data_dir/models/` contains all the data associated with the trainings and
|
||||||
backtestings. This file structure is heavily controlled and read by the `DataHandler()`
|
backtestings. This file structure is heavily controlled and read by the `FreqaiDataKitchen()`
|
||||||
and should thus not be modified.
|
and should thus not be modified.
|
||||||
|
@ -485,7 +485,7 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
def build_feature_list(self, config: dict) -> list:
|
def build_feature_list(self, config: dict, metadata: dict) -> list:
|
||||||
"""
|
"""
|
||||||
Build the list of features that will be used to filter
|
Build the list of features that will be used to filter
|
||||||
the full dataframe. Feature list is construced from the
|
the full dataframe. Feature list is construced from the
|
||||||
@ -501,8 +501,10 @@ class FreqaiDataKitchen:
|
|||||||
shift = ""
|
shift = ""
|
||||||
if n > 0:
|
if n > 0:
|
||||||
shift = "_shift-" + str(n)
|
shift = "_shift-" + str(n)
|
||||||
# features.append(ft + shift + "_" + tf)
|
features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
|
||||||
for p in config["freqai"]["corr_pairlist"]:
|
for p in config["freqai"]["corr_pairlist"]:
|
||||||
|
if metadata['pair'] in p:
|
||||||
|
continue # avoid duplicate features
|
||||||
features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
|
features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
|
||||||
|
|
||||||
# logger.info("number of features %s", len(features))
|
# logger.info("number of features %s", len(features))
|
||||||
@ -640,9 +642,10 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
|
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
|
||||||
self.config, validate=False)
|
self.config, validate=False)
|
||||||
pairs = self.freqai_config['corr_pairlist'] + [metadata['pair']]
|
pairs = self.freqai_config['corr_pairlist']
|
||||||
|
if metadata['pair'] not in pairs:
|
||||||
|
pairs += metadata['pair'] # dont include pair twice
|
||||||
timerange = TimeRange.parse_timerange(new_timerange)
|
timerange = TimeRange.parse_timerange(new_timerange)
|
||||||
# data_handler = get_datahandler(datadir, data_format)
|
|
||||||
|
|
||||||
refresh_backtest_ohlcv_data(
|
refresh_backtest_ohlcv_data(
|
||||||
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
|
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
|
||||||
@ -656,33 +659,45 @@ class FreqaiDataKitchen:
|
|||||||
def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any],
|
def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any],
|
||||||
DataFrame]:
|
DataFrame]:
|
||||||
corr_dataframes: Dict[Any, Any] = {}
|
corr_dataframes: Dict[Any, Any] = {}
|
||||||
# pair_dataframes: Dict[Any, Any] = {}
|
base_dataframes: Dict[Any, Any] = {}
|
||||||
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
|
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
|
||||||
timerange = TimeRange.parse_timerange(new_timerange)
|
timerange = TimeRange.parse_timerange(new_timerange)
|
||||||
|
|
||||||
for p in pairs:
|
for tf in self.freqai_config['timeframes']:
|
||||||
corr_dataframes[p] = {}
|
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
|
||||||
for tf in self.freqai_config['timeframes']:
|
timeframe=tf,
|
||||||
|
pair=metadata['pair'], timerange=timerange)
|
||||||
|
for p in pairs:
|
||||||
|
if metadata['pair'] in p:
|
||||||
|
continue # dont repeat anything from whitelist
|
||||||
|
corr_dataframes[p] = {}
|
||||||
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
|
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
|
||||||
timeframe=tf,
|
timeframe=tf,
|
||||||
pair=p, timerange=timerange)
|
pair=p, timerange=timerange)
|
||||||
|
|
||||||
base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
|
# base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
|
||||||
if metadata['pair'] in key]
|
# if metadata['pair'] in key]
|
||||||
|
|
||||||
# [0] indexes the lowest tf for the basepair
|
# [0] indexes the lowest tf for the basepair
|
||||||
return corr_dataframes, base_dataframe[0][self.config['timeframe']]
|
return corr_dataframes, base_dataframes
|
||||||
|
|
||||||
def use_strategy_to_populate_indicators(self, strategy: IStrategy, metadata: dict,
|
def use_strategy_to_populate_indicators(self, strategy: IStrategy,
|
||||||
corr_dataframes: dict,
|
corr_dataframes: dict,
|
||||||
dataframe: DataFrame) -> DataFrame:
|
base_dataframes: dict,
|
||||||
|
metadata: dict) -> DataFrame:
|
||||||
|
|
||||||
# dataframe = pair_dataframes[0] # this is the base tf pair df
|
dataframe = base_dataframes[self.config['timeframe']]
|
||||||
|
|
||||||
for tf in self.freqai_config["timeframes"]:
|
for tf in self.freqai_config["timeframes"]:
|
||||||
# dataframe = strategy.populate_any_indicators(metadata["pair"], dataframe.copy,
|
dataframe = strategy.populate_any_indicators(metadata['pair'],
|
||||||
# tf, pair_dataframes[tf])
|
dataframe.copy(),
|
||||||
|
tf,
|
||||||
|
base_dataframes[tf],
|
||||||
|
coin=metadata['pair'].split("/")[0] + "-"
|
||||||
|
)
|
||||||
for i in self.freqai_config["corr_pairlist"]:
|
for i in self.freqai_config["corr_pairlist"]:
|
||||||
|
if metadata['pair'] in i:
|
||||||
|
continue # dont repeat anything from whitelist
|
||||||
dataframe = strategy.populate_any_indicators(i,
|
dataframe = strategy.populate_any_indicators(i,
|
||||||
dataframe.copy(),
|
dataframe.copy(),
|
||||||
tf,
|
tf,
|
||||||
|
@ -93,7 +93,7 @@ class IFreqaiModel(ABC):
|
|||||||
else:
|
else:
|
||||||
self.model = self.dh.load_data()
|
self.model = self.dh.load_data()
|
||||||
|
|
||||||
preds, do_preds = self.predict(dataframe_backtest)
|
preds, do_preds = self.predict(dataframe_backtest, metadata)
|
||||||
|
|
||||||
self.dh.append_predictions(preds, do_preds, len(dataframe_backtest))
|
self.dh.append_predictions(preds, do_preds, len(dataframe_backtest))
|
||||||
print('predictions', len(self.dh.full_predictions),
|
print('predictions', len(self.dh.full_predictions),
|
||||||
@ -120,13 +120,13 @@ class IFreqaiModel(ABC):
|
|||||||
if retrain or not file_exists:
|
if retrain or not file_exists:
|
||||||
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)
|
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)
|
||||||
# dataframe = download-data
|
# dataframe = download-data
|
||||||
corr_dataframes, pair_dataframes = self.dh.load_pairs_histories(new_trained_timerange,
|
corr_dataframes, base_dataframes = self.dh.load_pairs_histories(new_trained_timerange,
|
||||||
metadata)
|
metadata)
|
||||||
|
|
||||||
unfiltered_dataframe = self.dh.use_strategy_to_populate_indicators(strategy,
|
unfiltered_dataframe = self.dh.use_strategy_to_populate_indicators(strategy,
|
||||||
metadata,
|
|
||||||
corr_dataframes,
|
corr_dataframes,
|
||||||
pair_dataframes)
|
base_dataframes,
|
||||||
|
metadata)
|
||||||
|
|
||||||
self.model = self.train(unfiltered_dataframe, metadata)
|
self.model = self.train(unfiltered_dataframe, metadata)
|
||||||
self.dh.save_data(self.model)
|
self.dh.save_data(self.model)
|
||||||
@ -134,7 +134,7 @@ class IFreqaiModel(ABC):
|
|||||||
self.freqai_info
|
self.freqai_info
|
||||||
|
|
||||||
self.model = self.dh.load_data()
|
self.model = self.dh.load_data()
|
||||||
preds, do_preds = self.predict(dataframe)
|
preds, do_preds = self.predict(dataframe, metadata)
|
||||||
self.dh.append_predictions(preds, do_preds, len(dataframe))
|
self.dh.append_predictions(preds, do_preds, len(dataframe))
|
||||||
# dataframe should have len 1 here
|
# dataframe should have len 1 here
|
||||||
|
|
||||||
@ -175,7 +175,7 @@ class IFreqaiModel(ABC):
|
|||||||
return
|
return
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def predict(self, dataframe: DataFrame) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
|
def predict(self, dataframe: DataFrame, metadata: dict) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
|
||||||
"""
|
"""
|
||||||
Filter the prediction features data and predict with it.
|
Filter the prediction features data and predict with it.
|
||||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||||
|
@ -53,7 +53,7 @@ class ExamplePredictionModel(IFreqaiModel):
|
|||||||
logger.info("--------------------Starting training--------------------")
|
logger.info("--------------------Starting training--------------------")
|
||||||
|
|
||||||
# create the full feature list based on user config info
|
# create the full feature list based on user config info
|
||||||
self.dh.training_features_list = self.dh.build_feature_list(self.config)
|
self.dh.training_features_list = self.dh.build_feature_list(self.config, metadata)
|
||||||
unfiltered_labels = self.make_labels(unfiltered_dataframe)
|
unfiltered_labels = self.make_labels(unfiltered_dataframe)
|
||||||
|
|
||||||
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||||
@ -114,7 +114,8 @@ class ExamplePredictionModel(IFreqaiModel):
|
|||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
||||||
def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]:
|
def predict(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame,
|
||||||
|
DataFrame]:
|
||||||
"""
|
"""
|
||||||
Filter the prediction features data and predict with it.
|
Filter the prediction features data and predict with it.
|
||||||
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||||
@ -126,7 +127,7 @@ class ExamplePredictionModel(IFreqaiModel):
|
|||||||
|
|
||||||
# logger.info("--------------------Starting prediction--------------------")
|
# logger.info("--------------------Starting prediction--------------------")
|
||||||
|
|
||||||
original_feature_list = self.dh.build_feature_list(self.config)
|
original_feature_list = self.dh.build_feature_list(self.config, metadata)
|
||||||
filtered_dataframe, _ = self.dh.filter_features(
|
filtered_dataframe, _ = self.dh.filter_features(
|
||||||
unfiltered_dataframe, original_feature_list, training_filter=False
|
unfiltered_dataframe, original_feature_list, training_filter=False
|
||||||
)
|
)
|
||||||
|
@ -142,8 +142,11 @@ class FreqaiExampleStrategy(IStrategy):
|
|||||||
# the following loops are necessary for building the features
|
# the following loops are necessary for building the features
|
||||||
# indicated by the user in the configuration file.
|
# indicated by the user in the configuration file.
|
||||||
for tf in self.freqai_info["timeframes"]:
|
for tf in self.freqai_info["timeframes"]:
|
||||||
# dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf)
|
dataframe = self.populate_any_indicators(self.pair, dataframe.copy(), tf,
|
||||||
|
coin=self.pair.split("/")[0] + "-")
|
||||||
for pair in self.freqai_info["corr_pairlist"]:
|
for pair in self.freqai_info["corr_pairlist"]:
|
||||||
|
if metadata['pair'] in pair:
|
||||||
|
continue # do not include whitelisted pair twice if it is in corr_pairlist
|
||||||
dataframe = self.populate_any_indicators(
|
dataframe = self.populate_any_indicators(
|
||||||
pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
|
pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
|
||||||
)
|
)
|
||||||
|
Loading…
Reference in New Issue
Block a user