add freqao backend machinery, user interface, documentation
This commit is contained in:
parent
ebab02fce3
commit
fc837c4daa
3
.gitignore
vendored
3
.gitignore
vendored
@ -7,6 +7,8 @@ logfile.txt
|
|||||||
user_data/*
|
user_data/*
|
||||||
!user_data/strategy/sample_strategy.py
|
!user_data/strategy/sample_strategy.py
|
||||||
!user_data/notebooks
|
!user_data/notebooks
|
||||||
|
!user_data/models
|
||||||
|
user_data/models/*
|
||||||
user_data/notebooks/*
|
user_data/notebooks/*
|
||||||
freqtrade-plot.html
|
freqtrade-plot.html
|
||||||
freqtrade-profit-plot.html
|
freqtrade-profit-plot.html
|
||||||
@ -105,3 +107,4 @@ target/
|
|||||||
!config_examples/config_ftx.example.json
|
!config_examples/config_ftx.example.json
|
||||||
!config_examples/config_full.example.json
|
!config_examples/config_full.example.json
|
||||||
!config_examples/config_kraken.example.json
|
!config_examples/config_kraken.example.json
|
||||||
|
!config_examples/config_freqai.example.json
|
||||||
|
100
config_examples/config_freqai.example.json
Normal file
100
config_examples/config_freqai.example.json
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
{
|
||||||
|
"max_open_trades": 1,
|
||||||
|
"stake_currency": "USDT",
|
||||||
|
"stake_amount": 800,
|
||||||
|
"tradable_balance_ratio": 1,
|
||||||
|
"fiat_display_currency": "USD",
|
||||||
|
"dry_run": true,
|
||||||
|
"timeframe": "5m",
|
||||||
|
"dry_run_wallet":1000,
|
||||||
|
"cancel_open_orders_on_exit": true,
|
||||||
|
"unfilledtimeout": {
|
||||||
|
"entry": 10,
|
||||||
|
"exit": 30
|
||||||
|
},
|
||||||
|
"exchange": {
|
||||||
|
"name": "ftx",
|
||||||
|
"key": "",
|
||||||
|
"secret": "",
|
||||||
|
"ccxt_config": {"enableRateLimit": true},
|
||||||
|
"ccxt_async_config": {
|
||||||
|
"enableRateLimit": true,
|
||||||
|
"rateLimit": 200
|
||||||
|
},
|
||||||
|
"pair_whitelist": [
|
||||||
|
"BTC/USDT"
|
||||||
|
],
|
||||||
|
"pair_blacklist": [
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"entry_pricing": {
|
||||||
|
"price_side": "same",
|
||||||
|
"use_order_book": true,
|
||||||
|
"order_book_top": 1,
|
||||||
|
"price_last_balance": 0.0,
|
||||||
|
"check_depth_of_market": {
|
||||||
|
"enabled": false,
|
||||||
|
"bids_to_ask_delta": 1
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"exit_pricing": {
|
||||||
|
"price_side": "same",
|
||||||
|
"use_order_book": true,
|
||||||
|
"order_book_top": 1
|
||||||
|
},
|
||||||
|
"pairlists": [
|
||||||
|
{"method": "StaticPairList"}
|
||||||
|
],
|
||||||
|
|
||||||
|
"freqai": {
|
||||||
|
"btc_pair" : "BTC/USDT",
|
||||||
|
"timeframes" : ["5m","15m","1h"],
|
||||||
|
"full_timerange" : "20210601-20220101",
|
||||||
|
"train_period" : 30,
|
||||||
|
"backtest_period" : 7,
|
||||||
|
"identifier" : "example",
|
||||||
|
"base_features": [
|
||||||
|
"rsi",
|
||||||
|
"close_over_20sma",
|
||||||
|
"relative_volume",
|
||||||
|
"bb_width",
|
||||||
|
"mfi",
|
||||||
|
"roc",
|
||||||
|
"pct-change",
|
||||||
|
"adx",
|
||||||
|
"macd"
|
||||||
|
],
|
||||||
|
"corr_pairlist": [
|
||||||
|
"ETH/USDT",
|
||||||
|
"LINK/USDT",
|
||||||
|
"DOT/USDT"
|
||||||
|
],
|
||||||
|
"training_timerange" : "20211220-20220117",
|
||||||
|
|
||||||
|
"feature_parameters" : {
|
||||||
|
"period": 12,
|
||||||
|
"shift": 2,
|
||||||
|
"drop_features": false,
|
||||||
|
"DI_threshold": 1,
|
||||||
|
"weight_factor": 0,
|
||||||
|
"principal_component_analysis": false,
|
||||||
|
"remove_outliers": false
|
||||||
|
},
|
||||||
|
"data_split_parameters" : {
|
||||||
|
"test_size": 0.25,
|
||||||
|
"random_state": 1
|
||||||
|
},
|
||||||
|
"model_training_parameters" : {
|
||||||
|
"n_estimators": 2000,
|
||||||
|
"random_state": 1,
|
||||||
|
"learning_rate": 0.02,
|
||||||
|
"task_type": "CPU"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bot_name": "",
|
||||||
|
"initial_state": "running",
|
||||||
|
"forcebuy_enable": false,
|
||||||
|
"internals": {
|
||||||
|
"process_throttle_secs": 5
|
||||||
|
}
|
||||||
|
}
|
265
docs/freqai.md
Normal file
265
docs/freqai.md
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
# Freqai
|
||||||
|
|
||||||
|
!!! Note
|
||||||
|
Freqai is still experimental, and should be used at the user's own discretion.
|
||||||
|
|
||||||
|
Freqai is a module designed to automate a variety of tasks associated with
|
||||||
|
training a regressor to predict signals based on input features. Among the
|
||||||
|
the features includes:
|
||||||
|
|
||||||
|
* Easy large feature set construction based on simple user input
|
||||||
|
* Sweep model training and backtesting to simulate consistent model retraining through time
|
||||||
|
* Smart outlier removal of data points from prediction sets using a Dissimilarity Index.
|
||||||
|
* Data dimensionality reduction with Principal Component Analysis
|
||||||
|
* Automatic file management for storage of models to be reused during live
|
||||||
|
* Smart and safe data standardization
|
||||||
|
* Cleaning of NaNs from the data set before training and prediction.
|
||||||
|
|
||||||
|
TODO:
|
||||||
|
* live is not automated, still some architectural work to be done
|
||||||
|
|
||||||
|
## Background and vocabulary
|
||||||
|
|
||||||
|
**Features** are the quantities with which a model is trained. $X_i$ represents the
|
||||||
|
vector of all features for a single candle. In Freqai, the user
|
||||||
|
builds the features from anything they can construct in the strategy.
|
||||||
|
|
||||||
|
**Labels** are the target values with which the weights inside a model are trained
|
||||||
|
toward. Each set of features is associated with a single label, which is also
|
||||||
|
defined within the strategy by the user. These labels look forward into the
|
||||||
|
future, and are not available to the model during dryrun/live/backtesting.
|
||||||
|
|
||||||
|
**Training** refers to the process of feeding individual feature sets into the
|
||||||
|
model with associated labels with the goal of matching input feature sets to
|
||||||
|
associated labels.
|
||||||
|
|
||||||
|
**Train data** is a subset of the historic data which is fed to the model during
|
||||||
|
training to adjust weights. This data directly influences weight connections
|
||||||
|
in the model.
|
||||||
|
|
||||||
|
**Test data** is a subset of the historic data which is used to evaluate the
|
||||||
|
intermediate performance of the model during training. This data does not
|
||||||
|
directly influence nodal weights within the model.
|
||||||
|
|
||||||
|
## Configuring the bot
|
||||||
|
### Example config file
|
||||||
|
The user interface is isolated to the typical config file. A typical Freqai
|
||||||
|
config setup includes:
|
||||||
|
|
||||||
|
```json
|
||||||
|
"freqai": {
|
||||||
|
"timeframes" : ["5m","15m","4h"],
|
||||||
|
"full_timerange" : "20211220-20220220",
|
||||||
|
"train_period" : "month",
|
||||||
|
"backtest_period" : "week",
|
||||||
|
"identifier" : "unique-id",
|
||||||
|
"base_features": [
|
||||||
|
"rsi",
|
||||||
|
"mfi",
|
||||||
|
"roc",
|
||||||
|
],
|
||||||
|
"corr_pairlist": [
|
||||||
|
"ETH/USD",
|
||||||
|
"LINK/USD",
|
||||||
|
"BNB/USD"
|
||||||
|
],
|
||||||
|
"train_params" : {
|
||||||
|
"period": 24,
|
||||||
|
"shift": 2,
|
||||||
|
"drop_features": false,
|
||||||
|
"DI_threshold": 1,
|
||||||
|
"weight_factor": 0,
|
||||||
|
},
|
||||||
|
"SPLIT_PARAMS" : {
|
||||||
|
"test_size": 0.25,
|
||||||
|
"random_state": 42
|
||||||
|
},
|
||||||
|
"CLASSIFIER_PARAMS" : {
|
||||||
|
"n_estimators": 100,
|
||||||
|
"random_state": 42,
|
||||||
|
"learning_rate": 0.02,
|
||||||
|
"task_type": "CPU",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Building the feature set
|
||||||
|
|
||||||
|
Most of these parameters are controlling the feature data set. The `base_features`
|
||||||
|
indicates the basic indicators the user wishes to include in the feature set.
|
||||||
|
The `timeframes` are the timeframes of each base_feature that the user wishes to
|
||||||
|
include in the feature set. In the present case, the user is asking for the
|
||||||
|
`5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, etc. to be included
|
||||||
|
in the feature set.
|
||||||
|
|
||||||
|
In addition, the user can ask for each of these features to be included from
|
||||||
|
informative pairs using the `corr_pairlist`. This means that the present feature
|
||||||
|
set will include all the `base_features` on all the `timeframes` for each of
|
||||||
|
`ETH/USD`, `LINK/USD`, and `BNB/USD`.
|
||||||
|
|
||||||
|
`shift` is another user controlled parameter which indicates the number of previous
|
||||||
|
candles to include in the present feature set. In other words, `shift: 2`, tells
|
||||||
|
Freqai to include the the past 2 candles for each of the features included
|
||||||
|
in the dataset.
|
||||||
|
|
||||||
|
In total, the number of features the present user has created is:_
|
||||||
|
|
||||||
|
no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_
|
||||||
|
3 * 3 * 3 * 2 = 54._
|
||||||
|
|
||||||
|
### Deciding the sliding training window and backtesting duration
|
||||||
|
|
||||||
|
`full_timerange` lets the user set the full backtesting range to train and
|
||||||
|
backtest through. Meanwhile `train_period` is the sliding training window and
|
||||||
|
`backtest_period` is the sliding backtesting window. In the present example,
|
||||||
|
the user is asking Freqai to train and backtest the range of `20211220-20220220` (`month`).
|
||||||
|
The user wishes to backtest each `week` with a newly trained model. This means that
|
||||||
|
Freqai will train 8 separate models (because the full range comprises 8 weeks),
|
||||||
|
and then backtest the subsequent week associated with each of the 8 training
|
||||||
|
data set timerange months. Users can think of this as a "sliding window" which
|
||||||
|
emulates Freqai retraining itself once per week in live using the previous
|
||||||
|
month of data.
|
||||||
|
|
||||||
|
|
||||||
|
## Running Freqai
|
||||||
|
### Training and backtesting
|
||||||
|
|
||||||
|
The freqai training/backtesting module can be executed with the following command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel ExamplePredictionModel
|
||||||
|
```
|
||||||
|
|
||||||
|
where the user needs to have a FreqaiExampleStrategy that fits to the requirements outlined
|
||||||
|
below. The ExamplePredictionModel is a user built class which lets users design their
|
||||||
|
own training procedures and data analysis.
|
||||||
|
|
||||||
|
### Building a freqai strategy
|
||||||
|
|
||||||
|
The Freqai strategy requires the user to include the following lines of code in `populate_ any _indicators()`
|
||||||
|
|
||||||
|
```python
|
||||||
|
from freqtrade.freqai.strategy_bridge import CustomModel
|
||||||
|
|
||||||
|
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
# the configuration file parameters are stored here
|
||||||
|
self.freqai_info = self.config['freqai']
|
||||||
|
|
||||||
|
# the model is instantiated here
|
||||||
|
self.model = CustomModel(self.config)
|
||||||
|
|
||||||
|
print('Populating indicators...')
|
||||||
|
|
||||||
|
# the following loops are necessary for building the features
|
||||||
|
# indicated by the user in the configuration file.
|
||||||
|
for tf in self.freqai_info['timeframes']:
|
||||||
|
dataframe = self.populate_any_indicators(metadata['pair'],
|
||||||
|
dataframe.copy(), tf)
|
||||||
|
for i in self.freqai_info['corr_pairlist']:
|
||||||
|
dataframe = self.populate_any_indicators(i,
|
||||||
|
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
|
||||||
|
|
||||||
|
# the model will return 4 values, its prediction, an indication of whether or not the prediction
|
||||||
|
# should be accepted, the target mean/std values from the labels used during each training period.
|
||||||
|
(dataframe['prediction'], dataframe['do_predict'],
|
||||||
|
dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
|
||||||
|
|
||||||
|
return dataframe
|
||||||
|
```
|
||||||
|
The user should also include `populate_any_indicators()` from `templates/FreqaiExampleStrategy.py` which builds
|
||||||
|
the feature set with a proper naming convention for the IFreqaiModel to use later.
|
||||||
|
|
||||||
|
### Building an IFreqaiModel
|
||||||
|
|
||||||
|
Freqai has a base example model in `templates/ExamplePredictionModel.py`, but users can customize and create
|
||||||
|
their own prediction models using the `IFreqaiModel` class. Users are encouraged to inherit `train()`, `predict()`,
|
||||||
|
and `make_labels()` to let them customize various aspects of their training procedures.
|
||||||
|
|
||||||
|
### Running the model live
|
||||||
|
|
||||||
|
After the user has designed a desirable featureset, Freqai can be run in dry/live
|
||||||
|
using the typical trade command:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.json --training_timerange '20211220-20220120'
|
||||||
|
```
|
||||||
|
|
||||||
|
Where the user has now specified exactly which of the models from the sliding window
|
||||||
|
that they wish to run live using `--training_timerange` (typically this would be the most
|
||||||
|
recent model trained). As of right now, freqai will
|
||||||
|
not automatically retain itself, so the user needs to manually retrain and then
|
||||||
|
reload the config file with a new `--training_timerange` in order to update the
|
||||||
|
model.
|
||||||
|
|
||||||
|
|
||||||
|
## Data anylsis techniques
|
||||||
|
### Controlling the model learning process
|
||||||
|
|
||||||
|
The user can define model settings for the data split `data_split_parameters` and learning parameters
|
||||||
|
`model_training_parameters`. Users are encouraged to visit the Catboost documentation
|
||||||
|
for more information on how to select these values. `n_estimators` increases the
|
||||||
|
computational effort and the fit to the training data. If a user has a GPU
|
||||||
|
installed in their system, they may benefit from changing `task_type` to `GPU`.
|
||||||
|
The `weight_factor` allows the user to weight more recent data more strongly
|
||||||
|
than past data via an exponential function:
|
||||||
|
|
||||||
|
$$ W_i = \exp(\frac{-i}{\alpha*n}) $$
|
||||||
|
|
||||||
|
where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._
|
||||||
|
|
||||||
|
`drop_features` tells Freqai to train the model on the user defined features,
|
||||||
|
followed by a feature importance evaluation where it drops the top and bottom
|
||||||
|
performing features (there is evidence to suggest the top features may not be
|
||||||
|
helpful in equity/crypto trading since the ultimate objective is to predict low
|
||||||
|
frequency patterns, source: numerai)._
|
||||||
|
|
||||||
|
Finally, `period` defines the offset used for the `labels`. In the present example,
|
||||||
|
the user is asking for `labels` that are 24 candles in the future.
|
||||||
|
|
||||||
|
### Removing outliers with the Dissimilarity Index
|
||||||
|
|
||||||
|
The Dissimilarity Index (DI) aims to quantiy the uncertainty associated with each
|
||||||
|
prediction by the model. To do so, Freqai measures the distance between each training
|
||||||
|
data point and all other training data points:
|
||||||
|
|
||||||
|
$$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$
|
||||||
|
|
||||||
|
where $d_{ab}$ is the distance between the standardized points $a$ and $b$. $p$
|
||||||
|
is the number of features i.e. the length of the vector $X$. The
|
||||||
|
characteristic distance, $\overline{d}$ for a set of training data points is simply the mean
|
||||||
|
of the average distances:
|
||||||
|
|
||||||
|
$$ \overline{d} = \sum_{a=1}^n(\sum_{b=1}^n(d_{ab}/n)/n) $$
|
||||||
|
|
||||||
|
$\overline{d}$ quantifies the spread of the training data, which is compared to
|
||||||
|
the distance between the new prediction feature vectors, $X_k$ and all the training
|
||||||
|
data:
|
||||||
|
|
||||||
|
$$ d_k = \argmin_i d_{k,i} $$
|
||||||
|
|
||||||
|
which enables the estimation of a Dissimilarity Index:
|
||||||
|
|
||||||
|
$$ DI_k = d_k/\overline{d} $$
|
||||||
|
|
||||||
|
Equity and crypto markets suffer from a high level of non-patterned noise in the
|
||||||
|
form of outlier data points. The dissimilarity index allows predictions which
|
||||||
|
are outliers and not existent in the model feature space, to be thrown out due
|
||||||
|
to low levels of certainty. The user can tweak the DI with `DI_threshold` to increase
|
||||||
|
or decrease the extrapolation of the trained model.
|
||||||
|
|
||||||
|
### Reducing data dimensionality with Principal Component Analysis
|
||||||
|
|
||||||
|
TO BE WRITTEN
|
||||||
|
|
||||||
|
## Additional information
|
||||||
|
### Feature standardization
|
||||||
|
|
||||||
|
The feature set created by the user is automatically standardized to the training
|
||||||
|
data only. This includes all test data and unseen prediction data (dry/live/backtest).
|
||||||
|
|
||||||
|
### File structure
|
||||||
|
|
||||||
|
`user_data_dir/models/` contains all the data associated with the trainings and
|
||||||
|
backtestings. This file structure is heavily controlled and read by the `DataHandler()`
|
||||||
|
and should thus not be modified.
|
@ -19,6 +19,7 @@ from freqtrade.commands.list_commands import (start_list_exchanges, start_list_m
|
|||||||
start_show_trades)
|
start_show_trades)
|
||||||
from freqtrade.commands.optimize_commands import (start_backtesting, start_backtesting_show,
|
from freqtrade.commands.optimize_commands import (start_backtesting, start_backtesting_show,
|
||||||
start_edge, start_hyperopt)
|
start_edge, start_hyperopt)
|
||||||
|
from freqtrade.commands.freqai_commands import (start_training)
|
||||||
from freqtrade.commands.pairlist_commands import start_test_pairlist
|
from freqtrade.commands.pairlist_commands import start_test_pairlist
|
||||||
from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit
|
from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit
|
||||||
from freqtrade.commands.trade_commands import start_trading
|
from freqtrade.commands.trade_commands import start_trading
|
||||||
|
@ -12,7 +12,7 @@ from freqtrade.constants import DEFAULT_CONFIG
|
|||||||
|
|
||||||
ARGS_COMMON = ["verbosity", "logfile", "version", "config", "datadir", "user_data_dir"]
|
ARGS_COMMON = ["verbosity", "logfile", "version", "config", "datadir", "user_data_dir"]
|
||||||
|
|
||||||
ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search"]
|
ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search", "freqaimodel", "freqaimodel_path"]
|
||||||
|
|
||||||
ARGS_TRADE = ["db_url", "sd_notify", "dry_run", "dry_run_wallet", "fee"]
|
ARGS_TRADE = ["db_url", "sd_notify", "dry_run", "dry_run_wallet", "fee"]
|
||||||
|
|
||||||
@ -190,7 +190,8 @@ class Arguments:
|
|||||||
start_list_markets, start_list_strategies,
|
start_list_markets, start_list_strategies,
|
||||||
start_list_timeframes, start_new_config, start_new_strategy,
|
start_list_timeframes, start_new_config, start_new_strategy,
|
||||||
start_plot_dataframe, start_plot_profit, start_show_trades,
|
start_plot_dataframe, start_plot_profit, start_show_trades,
|
||||||
start_test_pairlist, start_trading, start_webserver)
|
start_test_pairlist, start_trading, start_webserver,
|
||||||
|
start_training)
|
||||||
|
|
||||||
subparsers = self.parser.add_subparsers(dest='command',
|
subparsers = self.parser.add_subparsers(dest='command',
|
||||||
# Use custom message when no subhandler is added
|
# Use custom message when no subhandler is added
|
||||||
|
@ -614,4 +614,16 @@ AVAILABLE_CLI_OPTIONS = {
|
|||||||
"that do not contain any parameters."),
|
"that do not contain any parameters."),
|
||||||
action="store_true",
|
action="store_true",
|
||||||
),
|
),
|
||||||
|
|
||||||
|
"freqaimodel": Arg(
|
||||||
|
'--freqaimodel',
|
||||||
|
help='Specify a custom freqaimodels.',
|
||||||
|
metavar='NAME',
|
||||||
|
),
|
||||||
|
|
||||||
|
"freqaimodel_path": Arg(
|
||||||
|
'--freqaimodel-path',
|
||||||
|
help='Specify additional lookup path for freqaimodels.',
|
||||||
|
metavar='PATH',
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
24
freqtrade/commands/freqai_commands.py
Normal file
24
freqtrade/commands/freqai_commands.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from freqtrade import constants
|
||||||
|
from freqtrade.configuration import setup_utils_configuration
|
||||||
|
from freqtrade.enums import RunMode
|
||||||
|
from freqtrade.exceptions import OperationalException
|
||||||
|
from freqtrade.misc import round_coin_value
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def start_training(args: Dict[str, Any]) -> None:
|
||||||
|
"""
|
||||||
|
Train a model for predicting signals
|
||||||
|
:param args: Cli args from Arguments()
|
||||||
|
:return: None
|
||||||
|
"""
|
||||||
|
from freqtrade.freqai.training import Training
|
||||||
|
|
||||||
|
config = setup_utils_configuration(args, RunMode.FREQAI)
|
||||||
|
|
||||||
|
training = Training(config)
|
||||||
|
training.start()
|
@ -95,6 +95,8 @@ class Configuration:
|
|||||||
|
|
||||||
self._process_data_options(config)
|
self._process_data_options(config)
|
||||||
|
|
||||||
|
self._process_freqai_options(config)
|
||||||
|
|
||||||
# Check if the exchange set by the user is supported
|
# Check if the exchange set by the user is supported
|
||||||
check_exchange(config, config.get('experimental', {}).get('block_bad_exchanges', True))
|
check_exchange(config, config.get('experimental', {}).get('block_bad_exchanges', True))
|
||||||
|
|
||||||
@ -446,6 +448,16 @@ class Configuration:
|
|||||||
|
|
||||||
config.update({'runmode': self.runmode})
|
config.update({'runmode': self.runmode})
|
||||||
|
|
||||||
|
def _process_freqai_options(self, config: Dict[str, Any]) -> None:
|
||||||
|
|
||||||
|
self._args_to_config(config, argname='freqaimodel',
|
||||||
|
logstring='Using freqaimodel class name: {}')
|
||||||
|
|
||||||
|
self._args_to_config(config, argname='freqaimodel_path',
|
||||||
|
logstring='Using freqaimodel path: {}')
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
def _args_to_config(self, config: Dict[str, Any], argname: str,
|
def _args_to_config(self, config: Dict[str, Any], argname: str,
|
||||||
logstring: str, logfun: Optional[Callable] = None,
|
logstring: str, logfun: Optional[Callable] = None,
|
||||||
deprecated_msg: Optional[str] = None) -> None:
|
deprecated_msg: Optional[str] = None) -> None:
|
||||||
|
@ -55,6 +55,7 @@ FTHYPT_FILEVERSION = 'fthypt_fileversion'
|
|||||||
USERPATH_HYPEROPTS = 'hyperopts'
|
USERPATH_HYPEROPTS = 'hyperopts'
|
||||||
USERPATH_STRATEGIES = 'strategies'
|
USERPATH_STRATEGIES = 'strategies'
|
||||||
USERPATH_NOTEBOOKS = 'notebooks'
|
USERPATH_NOTEBOOKS = 'notebooks'
|
||||||
|
USERPATH_FREQAIMODELS = 'freqaimodels'
|
||||||
|
|
||||||
TELEGRAM_SETTING_OPTIONS = ['on', 'off', 'silent']
|
TELEGRAM_SETTING_OPTIONS = ['on', 'off', 'silent']
|
||||||
WEBHOOK_FORMAT_OPTIONS = ['form', 'json', 'raw']
|
WEBHOOK_FORMAT_OPTIONS = ['form', 'json', 'raw']
|
||||||
|
@ -15,9 +15,10 @@ class RunMode(Enum):
|
|||||||
UTIL_NO_EXCHANGE = "util_no_exchange"
|
UTIL_NO_EXCHANGE = "util_no_exchange"
|
||||||
PLOT = "plot"
|
PLOT = "plot"
|
||||||
WEBSERVER = "webserver"
|
WEBSERVER = "webserver"
|
||||||
|
FREQAI = "freqai"
|
||||||
OTHER = "other"
|
OTHER = "other"
|
||||||
|
|
||||||
|
|
||||||
TRADING_MODES = [RunMode.LIVE, RunMode.DRY_RUN]
|
TRADING_MODES = [RunMode.LIVE, RunMode.DRY_RUN]
|
||||||
OPTIMIZE_MODES = [RunMode.BACKTEST, RunMode.EDGE, RunMode.HYPEROPT]
|
OPTIMIZE_MODES = [RunMode.BACKTEST, RunMode.EDGE, RunMode.HYPEROPT, RunMode.FREQAI]
|
||||||
NON_UTIL_MODES = TRADING_MODES + OPTIMIZE_MODES
|
NON_UTIL_MODES = TRADING_MODES + OPTIMIZE_MODES
|
||||||
|
434
freqtrade/freqai/data_handler.py
Normal file
434
freqtrade/freqai/data_handler.py
Normal file
@ -0,0 +1,434 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import copy
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
from joblib import dump
|
||||||
|
from joblib import load
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics.pairwise import pairwise_distances
|
||||||
|
import datetime
|
||||||
|
from typing import Any, Dict, List, Tuple
|
||||||
|
import pickle as pk
|
||||||
|
from freqtrade.configuration import TimeRange
|
||||||
|
|
||||||
|
SECONDS_IN_DAY = 86400
|
||||||
|
|
||||||
|
class DataHandler:
|
||||||
|
"""
|
||||||
|
Class designed to handle all the data for the IFreqaiModel class model.
|
||||||
|
Functionalities include holding, saving, loading, and analyzing the data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict[str, Any], dataframe: DataFrame, data: List):
|
||||||
|
self.full_dataframe = dataframe
|
||||||
|
(self.training_timeranges,
|
||||||
|
self.backtesting_timeranges) = self.split_timerange(
|
||||||
|
config['freqai']['full_timerange'],
|
||||||
|
config['freqai']['train_period'],
|
||||||
|
config['freqai']['backtest_period'])
|
||||||
|
self.data = data
|
||||||
|
self.data_dictionary = {}
|
||||||
|
self.config = config
|
||||||
|
self.freq_config = config['freqai']
|
||||||
|
|
||||||
|
def save_data(self, model: Any) -> None:
|
||||||
|
"""
|
||||||
|
Saves all data associated with a model for a single sub-train time range
|
||||||
|
:params:
|
||||||
|
:model: User trained model which can be reused for inferencing to generate
|
||||||
|
predictions
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not os.path.exists(self.model_path): os.mkdir(self.model_path)
|
||||||
|
save_path = self.model_path + self.model_filename
|
||||||
|
# Save the trained model
|
||||||
|
dump(model, save_path+"_model.joblib")
|
||||||
|
self.data['model_path'] = self.model_path
|
||||||
|
self.data['model_filename'] = self.model_filename
|
||||||
|
self.data['training_features_list'] = list(self.data_dictionary['train_features'].columns)
|
||||||
|
# store the metadata
|
||||||
|
with open(save_path+"_metadata.json", 'w') as fp:
|
||||||
|
json.dump(self.data, fp, default=self.np_encoder)
|
||||||
|
|
||||||
|
# save the train data to file so we can check preds for area of applicability later
|
||||||
|
self.data_dictionary['train_features'].to_pickle(save_path+"_trained_df.pkl")
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def load_data(self) -> Any:
|
||||||
|
"""
|
||||||
|
loads all data required to make a prediction on a sub-train time range
|
||||||
|
:returns:
|
||||||
|
:model: User trained model which can be inferenced for new predictions
|
||||||
|
"""
|
||||||
|
model = load(self.model_path+self.model_filename+"_model.joblib")
|
||||||
|
|
||||||
|
with open(self.model_path+self.model_filename+"_metadata.json", 'r') as fp:
|
||||||
|
self.data = json.load(fp)
|
||||||
|
if self.data.get('training_features_list'):
|
||||||
|
self.training_features_list = [*self.data.get('training_features_list')]
|
||||||
|
|
||||||
|
self.data_dictionary['train_features'] = pd.read_pickle(self.model_path+
|
||||||
|
self.model_filename+"_trained_df.pkl")
|
||||||
|
|
||||||
|
self.model_path = self.data['model_path']
|
||||||
|
self.model_filename = self.data['model_filename']
|
||||||
|
if self.config['freqai']['feature_parameters']['principal_component_analysis']:
|
||||||
|
self.pca = pk.load(open(self.model_path+self.model_filename+"_pca_object.pkl","rb"))
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def make_train_test_datasets(self, filtered_dataframe: DataFrame, labels: DataFrame) -> None:
|
||||||
|
'''
|
||||||
|
Given the dataframe for the full history for training, split the data into
|
||||||
|
training and test data according to user specified parameters in configuration
|
||||||
|
file.
|
||||||
|
:filtered_dataframe: cleaned dataframe ready to be split.
|
||||||
|
:labels: cleaned labels ready to be split.
|
||||||
|
'''
|
||||||
|
|
||||||
|
if self.config['freqai']['feature_parameters']['weight_factor'] > 0:
|
||||||
|
weights = self.set_weights_higher_recent(len(filtered_dataframe))
|
||||||
|
else: weights = np.ones(len(filtered_dataframe))
|
||||||
|
|
||||||
|
(train_features, test_features, train_labels,
|
||||||
|
test_labels, train_weights, test_weights) = train_test_split(
|
||||||
|
filtered_dataframe[:filtered_dataframe.shape[0]],
|
||||||
|
labels,
|
||||||
|
weights,
|
||||||
|
**self.config['freqai']['data_split_parameters']
|
||||||
|
)
|
||||||
|
|
||||||
|
return self.build_data_dictionary(
|
||||||
|
train_features,test_features,
|
||||||
|
train_labels,test_labels,
|
||||||
|
train_weights,test_weights)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def filter_features(self, unfiltered_dataframe: DataFrame, training_feature_list: List,
|
||||||
|
labels: DataFrame = None, training_filter: bool=True) -> Tuple[DataFrame, DataFrame]:
|
||||||
|
'''
|
||||||
|
Filter the unfiltered dataframe to extract the user requested features and properly
|
||||||
|
remove all NaNs. Any row with a NaN is removed from training dataset or replaced with
|
||||||
|
0s in the prediction dataset. However, prediction dataset do_predict will reflect any
|
||||||
|
row that had a NaN and will shield user from that prediction.
|
||||||
|
:params:
|
||||||
|
:unfiltered_dataframe: the full dataframe for the present training period
|
||||||
|
:training_feature_list: list, the training feature list constructed by self.build_feature_list()
|
||||||
|
according to user specified parameters in the configuration file.
|
||||||
|
:labels: the labels for the dataset
|
||||||
|
:training_filter: boolean which lets the function know if it is training data or
|
||||||
|
prediction data to be filtered.
|
||||||
|
:returns:
|
||||||
|
:filtered_dataframe: dataframe cleaned of NaNs and only containing the user
|
||||||
|
requested feature set.
|
||||||
|
:labels: labels cleaned of NaNs.
|
||||||
|
'''
|
||||||
|
filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1)
|
||||||
|
drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs,
|
||||||
|
|
||||||
|
if training_filter: # we don't care about total row number (total no. datapoints) in training, we only care about removing any row with NaNs
|
||||||
|
drop_index_labels = pd.isnull(labels)
|
||||||
|
filtered_dataframe = filtered_dataframe[(drop_index==False) & (drop_index_labels==False)] # dropping values
|
||||||
|
labels = labels[(drop_index==False) & (drop_index_labels==False)] # assuming the labels depend entirely on the dataframe here.
|
||||||
|
print('dropped',len(unfiltered_dataframe)-len(filtered_dataframe),
|
||||||
|
'training data points due to NaNs, ensure you have downloaded all historical training data')
|
||||||
|
self.data['filter_drop_index_training'] = drop_index
|
||||||
|
|
||||||
|
else: # we are backtesting so we need to preserve row number to send back to strategy, so now we use do_predict to avoid any prediction based on a NaN
|
||||||
|
drop_index = pd.isnull(filtered_dataframe).any(1)
|
||||||
|
self.data['filter_drop_index_prediction'] = drop_index
|
||||||
|
filtered_dataframe.fillna(0, inplace=True) # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction that was based on a single NaN is ultimately protected from buys with do_predict
|
||||||
|
drop_index = ~drop_index
|
||||||
|
self.do_predict = np.array(drop_index.replace(True,1).replace(False,0))
|
||||||
|
print('dropped',len(self.do_predict) - self.do_predict.sum(),'of',len(filtered_dataframe),
|
||||||
|
'prediction data points due to NaNs. These are protected from prediction with do_predict vector returned to strategy.')
|
||||||
|
|
||||||
|
|
||||||
|
return filtered_dataframe, labels
|
||||||
|
|
||||||
|
def build_data_dictionary(self, train_df: DataFrame, test_df: DataFrame,
|
||||||
|
train_labels: DataFrame, test_labels: DataFrame,
|
||||||
|
train_weights: Any, test_weights: Any) -> Dict:
|
||||||
|
|
||||||
|
self.data_dictionary = {'train_features': train_df,
|
||||||
|
'test_features': test_df,
|
||||||
|
'train_labels': train_labels,
|
||||||
|
'test_labels': test_labels,
|
||||||
|
'train_weights': train_weights,
|
||||||
|
'test_weights': test_weights}
|
||||||
|
|
||||||
|
return self.data_dictionary
|
||||||
|
|
||||||
|
def standardize_data(self, data_dictionary: Dict) -> None:
|
||||||
|
'''
|
||||||
|
Standardize all data in the data_dictionary according to the training dataset
|
||||||
|
:params:
|
||||||
|
:data_dictionary: dictionary containing the cleaned and split training/test data/labels
|
||||||
|
:returns:
|
||||||
|
:data_dictionary: updated dictionary with standardized values.
|
||||||
|
'''
|
||||||
|
# standardize the data by training stats
|
||||||
|
train_mean = data_dictionary['train_features'].mean()
|
||||||
|
train_std = data_dictionary['train_features'].std()
|
||||||
|
data_dictionary['train_features'] = (data_dictionary['train_features'] - train_mean) / train_std
|
||||||
|
data_dictionary['test_features'] = (data_dictionary['test_features'] - train_mean) / train_std
|
||||||
|
|
||||||
|
train_labels_std = data_dictionary['train_labels'].std()
|
||||||
|
train_labels_mean = data_dictionary['train_labels'].mean()
|
||||||
|
data_dictionary['train_labels'] = (data_dictionary['train_labels'] - train_labels_mean) / train_labels_std
|
||||||
|
data_dictionary['test_labels'] = (data_dictionary['test_labels'] - train_labels_mean) / train_labels_std
|
||||||
|
|
||||||
|
for item in train_std.keys():
|
||||||
|
self.data[item+'_std'] = train_std[item]
|
||||||
|
self.data[item+'_mean'] = train_mean[item]
|
||||||
|
|
||||||
|
self.data['labels_std'] = train_labels_std
|
||||||
|
self.data['labels_mean'] = train_labels_mean
|
||||||
|
|
||||||
|
return data_dictionary
|
||||||
|
|
||||||
|
def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame:
|
||||||
|
'''
|
||||||
|
Standardizes a set of data using the mean and standard deviation from
|
||||||
|
the associated training data.
|
||||||
|
:params:
|
||||||
|
:df: Dataframe to be standardized
|
||||||
|
'''
|
||||||
|
|
||||||
|
for item in df.keys():
|
||||||
|
df[item] = (df[item] - self.data[item+'_mean']) / self.data[item+'_std']
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def split_timerange(self, tr: Dict, train_split: int=28, bt_split: int=7) -> list:
|
||||||
|
'''
|
||||||
|
Function which takes a single time range (tr) and splits it
|
||||||
|
into sub timeranges to train and backtest on based on user input
|
||||||
|
tr: str, full timerange to train on
|
||||||
|
train_split: the period length for the each training (days). Specified in user
|
||||||
|
configuration file
|
||||||
|
bt_split: the backtesting length (dats). Specified in user configuration file
|
||||||
|
'''
|
||||||
|
|
||||||
|
train_period = train_split * SECONDS_IN_DAY
|
||||||
|
bt_period = bt_split * SECONDS_IN_DAY
|
||||||
|
|
||||||
|
full_timerange = TimeRange.parse_timerange(tr)
|
||||||
|
timerange_train = copy.deepcopy(full_timerange)
|
||||||
|
timerange_backtest = copy.deepcopy(full_timerange)
|
||||||
|
|
||||||
|
tr_training_list = []
|
||||||
|
tr_backtesting_list = []
|
||||||
|
first = True
|
||||||
|
while True:
|
||||||
|
if not first: timerange_train.startts = timerange_train.startts + bt_period
|
||||||
|
timerange_train.stopts = timerange_train.startts + train_period
|
||||||
|
|
||||||
|
# if a full training period doesnt fit, we stop
|
||||||
|
if timerange_train.stopts > full_timerange.stopts: break
|
||||||
|
first = False
|
||||||
|
start = datetime.datetime.utcfromtimestamp(timerange_train.startts)
|
||||||
|
stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts)
|
||||||
|
tr_training_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
|
||||||
|
|
||||||
|
## associated backtest period
|
||||||
|
timerange_backtest.startts = timerange_train.stopts
|
||||||
|
timerange_backtest.stopts = timerange_backtest.startts + bt_period
|
||||||
|
start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts)
|
||||||
|
stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts)
|
||||||
|
tr_backtesting_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d"))
|
||||||
|
|
||||||
|
return tr_training_list, tr_backtesting_list
|
||||||
|
|
||||||
|
def slice_dataframe(self, tr: str, df: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Given a full dataframe, extract the user desired window
|
||||||
|
:params:
|
||||||
|
:tr: timerange string that we wish to extract from df
|
||||||
|
:df: Dataframe containing all candles to run the entire backtest. Here
|
||||||
|
it is sliced down to just the present training period.
|
||||||
|
"""
|
||||||
|
timerange = TimeRange.parse_timerange(tr)
|
||||||
|
start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc)
|
||||||
|
stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc)
|
||||||
|
df = df.loc[df['date'] >= start, :]
|
||||||
|
df = df.loc[df['date'] <= stop, :]
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
def principal_component_analysis(self) -> None:
|
||||||
|
"""
|
||||||
|
Performs Principal Component Analysis on the data for dimensionality reduction
|
||||||
|
and outlier detection (see self.remove_outliers())
|
||||||
|
No parameters or returns, it acts on the data_dictionary held by the DataHandler.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from sklearn.decomposition import PCA # avoid importing if we dont need it
|
||||||
|
|
||||||
|
n_components = self.data_dictionary['train_features'].shape[1]
|
||||||
|
pca = PCA(n_components=n_components)
|
||||||
|
pca = pca.fit(self.data_dictionary['train_features'])
|
||||||
|
n_keep_components = np.argmin(pca.explained_variance_ratio_.cumsum() < 0.999)
|
||||||
|
pca2 = PCA(n_components=n_keep_components)
|
||||||
|
self.data['n_kept_components'] = n_keep_components
|
||||||
|
pca2 = pca2.fit(self.data_dictionary['train_features'])
|
||||||
|
print('reduced feature dimension by',n_components-n_keep_components)
|
||||||
|
print("explained variance",np.sum(pca2.explained_variance_ratio_))
|
||||||
|
train_components = pca2.transform(self.data_dictionary['train_features'])
|
||||||
|
test_components = pca2.transform(self.data_dictionary['test_features'])
|
||||||
|
|
||||||
|
self.data_dictionary['train_features'] = pd.DataFrame(data=train_components,
|
||||||
|
columns = ['PC'+str(i) for i in range(0,n_keep_components)],
|
||||||
|
index = self.data_dictionary['train_features'].index)
|
||||||
|
|
||||||
|
self.data_dictionary['test_features'] = pd.DataFrame(data=test_components,
|
||||||
|
columns = ['PC'+str(i) for i in range(0,n_keep_components)],
|
||||||
|
index = self.data_dictionary['test_features'].index)
|
||||||
|
|
||||||
|
self.data['n_kept_components'] = n_keep_components
|
||||||
|
self.pca = pca2
|
||||||
|
if not os.path.exists(self.model_path): os.mkdir(self.model_path)
|
||||||
|
pk.dump(pca2, open(self.model_path + self.model_filename+"_pca_object.pkl","wb"))
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def compute_distances(self) -> float:
|
||||||
|
print('computing average mean distance for all training points')
|
||||||
|
pairwise = pairwise_distances(self.data_dictionary['train_features'],n_jobs=-1)
|
||||||
|
avg_mean_dist = pairwise.mean(axis=1).mean()
|
||||||
|
print('avg_mean_dist',avg_mean_dist)
|
||||||
|
|
||||||
|
return avg_mean_dist
|
||||||
|
|
||||||
|
def remove_outliers(self,predict: bool) -> None:
|
||||||
|
"""
|
||||||
|
Remove data that looks like an outlier based on the distribution of each
|
||||||
|
variable.
|
||||||
|
:params:
|
||||||
|
:predict: boolean which tells the function if this is prediction data or
|
||||||
|
training data coming in.
|
||||||
|
"""
|
||||||
|
|
||||||
|
lower_quantile = self.data_dictionary['train_features'].quantile(0.001)
|
||||||
|
upper_quantile = self.data_dictionary['train_features'].quantile(0.999)
|
||||||
|
|
||||||
|
if predict:
|
||||||
|
|
||||||
|
df = self.data_dictionary['prediction_features'][(self.data_dictionary['prediction_features']<upper_quantile) & (self.data_dictionary['prediction_features']>lower_quantile)]
|
||||||
|
drop_index = pd.isnull(df).any(1)
|
||||||
|
self.data_dictionary['prediction_features'].fillna(0,inplace=True)
|
||||||
|
drop_index = ~drop_index
|
||||||
|
do_predict = np.array(drop_index.replace(True,1).replace(False,0))
|
||||||
|
|
||||||
|
print('remove_outliers() tossed',len(do_predict)-do_predict.sum(),'predictions because they were beyond 3 std deviations from training data.')
|
||||||
|
self.do_predict += do_predict
|
||||||
|
self.do_predict -= 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
filter_train_df = self.data_dictionary['train_features'][(self.data_dictionary['train_features']<upper_quantile) & (self.data_dictionary['train_features']>lower_quantile)]
|
||||||
|
drop_index = pd.isnull(filter_train_df).any(1)
|
||||||
|
self.data_dictionary['train_features'] = self.data_dictionary['train_features'][(drop_index==False)]
|
||||||
|
self.data_dictionary['train_labels'] = self.data_dictionary['train_labels'][(drop_index==False)]
|
||||||
|
self.data_dictionary['train_weights'] = self.data_dictionary['train_weights'][(drop_index==False)]
|
||||||
|
|
||||||
|
# do the same for the test data
|
||||||
|
filter_test_df = self.data_dictionary['test_features'][(self.data_dictionary['test_features']<upper_quantile) & (self.data_dictionary['test_features']>lower_quantile)]
|
||||||
|
drop_index = pd.isnull(filter_test_df).any(1)
|
||||||
|
#pdb.set_trace()
|
||||||
|
self.data_dictionary['test_labels'] = self.data_dictionary['test_labels'][(drop_index==False)]
|
||||||
|
self.data_dictionary['test_features'] = self.data_dictionary['test_features'][(drop_index==False)]
|
||||||
|
self.data_dictionary['test_weights'] = self.data_dictionary['test_weights'][(drop_index==False)]
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def build_feature_list(self, config: dict) -> int:
|
||||||
|
"""
|
||||||
|
Build the list of features that will be used to filter
|
||||||
|
the full dataframe. Feature list is construced from the
|
||||||
|
user configuration file.
|
||||||
|
:params:
|
||||||
|
:config: Canonical freqtrade config file containing all
|
||||||
|
user defined input in config['freqai] dictionary.
|
||||||
|
"""
|
||||||
|
features = []
|
||||||
|
for tf in config['freqai']['timeframes']:
|
||||||
|
for ft in config['freqai']['base_features']:
|
||||||
|
for n in range(config['freqai']['feature_parameters']['shift']+1):
|
||||||
|
shift=''
|
||||||
|
if n>0: shift = '_shift-'+str(n)
|
||||||
|
features.append(ft+shift+'_'+tf)
|
||||||
|
for p in config['freqai']['corr_pairlist']:
|
||||||
|
features.append(p.split("/")[0]+'-'+ft+shift+'_'+tf)
|
||||||
|
|
||||||
|
print('number of features',len(features))
|
||||||
|
return features
|
||||||
|
|
||||||
|
def check_if_pred_in_training_spaces(self) -> None:
|
||||||
|
"""
|
||||||
|
Compares the distance from each prediction point to each training data
|
||||||
|
point. It uses this information to estimate a Dissimilarity Index (DI)
|
||||||
|
and avoid making predictions on any points that are too far away
|
||||||
|
from the training data set.
|
||||||
|
"""
|
||||||
|
|
||||||
|
print('checking if prediction features are in AOA')
|
||||||
|
distance = pairwise_distances(self.data_dictionary['train_features'],
|
||||||
|
self.data_dictionary['prediction_features'],n_jobs=-1)
|
||||||
|
|
||||||
|
do_predict = np.where(distance.min(axis=0) /
|
||||||
|
self.data['avg_mean_dist'] < self.config['freqai']['feature_parameters']['DI_threshold'],1,0)
|
||||||
|
|
||||||
|
print('Distance checker tossed',len(do_predict)-do_predict.sum(),
|
||||||
|
'predictions for being too far from training data')
|
||||||
|
|
||||||
|
self.do_predict += do_predict
|
||||||
|
self.do_predict -= 1
|
||||||
|
|
||||||
|
def set_weights_higher_recent(self, num_weights: int) -> int:
|
||||||
|
"""
|
||||||
|
Set weights so that recent data is more heavily weighted during
|
||||||
|
training than older data.
|
||||||
|
"""
|
||||||
|
weights = np.zeros(num_weights)
|
||||||
|
for i in range(1, len(weights)):
|
||||||
|
weights[len(weights) - i] = np.exp(-i/
|
||||||
|
(self.config['freqai']['feature_parameters']['weight_factor']*num_weights))
|
||||||
|
return weights
|
||||||
|
|
||||||
|
def append_predictions(self, predictions, do_predict, len_dataframe):
|
||||||
|
"""
|
||||||
|
Append backtest prediction from current backtest period to all previous periods
|
||||||
|
"""
|
||||||
|
|
||||||
|
ones = np.ones(len_dataframe)
|
||||||
|
s_mean, s_std = ones*self.data['s_mean'], ones*self.data['s_std']
|
||||||
|
|
||||||
|
self.predictions = np.append(self.predictions,predictions)
|
||||||
|
self.do_predict = np.append(self.do_predict,do_predict)
|
||||||
|
self.target_mean = np.append(self.target_mean,s_mean)
|
||||||
|
self.target_std = np.append(self.target_std,s_std)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def fill_predictions(self, len_dataframe):
|
||||||
|
"""
|
||||||
|
Back fill values to before the backtesting range so that the dataframe matches size
|
||||||
|
when it goes back to the strategy. These rows are not included in the backtest.
|
||||||
|
"""
|
||||||
|
|
||||||
|
filler = np.zeros(len_dataframe -len(self.predictions)) # startup_candle_count
|
||||||
|
self.predictions = np.append(filler,self.predictions)
|
||||||
|
self.do_predict = np.append(filler,self.do_predict)
|
||||||
|
self.target_mean = np.append(filler,self.target_mean)
|
||||||
|
self.target_std = np.append(filler,self.target_std)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def np_encoder(self, object):
|
||||||
|
if isinstance(object, np.generic):
|
||||||
|
return object.item()
|
158
freqtrade/freqai/freqai_interface.py
Normal file
158
freqtrade/freqai/freqai_interface.py
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from pandas import DataFrame
|
||||||
|
import shutil
|
||||||
|
import gc
|
||||||
|
from typing import Any, Dict, Optional, Tuple
|
||||||
|
from abc import ABC
|
||||||
|
from freqtrade.freqai.data_handler import DataHandler
|
||||||
|
|
||||||
|
pd.options.mode.chained_assignment = None
|
||||||
|
|
||||||
|
class IFreqaiModel(ABC):
|
||||||
|
"""
|
||||||
|
Class containing all tools for training and prediction in the strategy.
|
||||||
|
User models should inherit from this class as shown in
|
||||||
|
templates/ExamplePredictionModel.py where the user overrides
|
||||||
|
train(), predict(), fit(), and make_labels().
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: Dict[str, Any]) -> None:
|
||||||
|
|
||||||
|
self.config = config
|
||||||
|
self.freqai_info = config['freqai']
|
||||||
|
self.data_split_parameters = config['freqai']['data_split_parameters']
|
||||||
|
self.model_training_parameters = config['freqai']['model_training_parameters']
|
||||||
|
self.feature_parameters = config['freqai']['feature_parameters']
|
||||||
|
self.full_path = (str(config['user_data_dir'])+
|
||||||
|
"/models/"+self.freqai_info['full_timerange']+
|
||||||
|
'-'+self.freqai_info['identifier'])
|
||||||
|
self.metadata = {}
|
||||||
|
self.data = {}
|
||||||
|
self.time_last_trained = None
|
||||||
|
self.current_time = None
|
||||||
|
self.model = None
|
||||||
|
self.predictions = None
|
||||||
|
|
||||||
|
if not os.path.exists(self.full_path):
|
||||||
|
os.mkdir(self.full_path)
|
||||||
|
shutil.copy(self.config['config_files'][0],self.full_path+"/"+self.config['config_files'][0])
|
||||||
|
|
||||||
|
def start(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Entry point to the FreqaiModel, it will train a new model if
|
||||||
|
necesssary before making the prediction.
|
||||||
|
The backtesting and training paradigm is a sliding training window
|
||||||
|
with a following backtest window. Both windows slide according to the
|
||||||
|
length of the backtest window. This function is not intended to be
|
||||||
|
overridden by children of IFreqaiModel, but technically, it can be
|
||||||
|
if the user wishes to make deeper changes to the sliding window
|
||||||
|
logic.
|
||||||
|
:params:
|
||||||
|
:dataframe: Full dataframe coming from strategy - it contains entire
|
||||||
|
backtesting timerange + additional historical data necessary to train
|
||||||
|
the model.
|
||||||
|
:metadata: pair metadataa coming from strategy.
|
||||||
|
"""
|
||||||
|
self.pair = metadata['pair']
|
||||||
|
self.dh = DataHandler(self.config, dataframe, self.data)
|
||||||
|
|
||||||
|
print('going to train',len(self.dh.training_timeranges),
|
||||||
|
'timeranges:',self.dh.training_timeranges)
|
||||||
|
predictions = np.array([])
|
||||||
|
do_predict = np.array([])
|
||||||
|
target_mean = np.array([])
|
||||||
|
target_std = np.array([])
|
||||||
|
|
||||||
|
# Loop enforcing the sliding window training/backtesting paragigm
|
||||||
|
# tr_train is the training time range e.g. 1 historical month
|
||||||
|
# tr_backtest is the backtesting time range e.g. the week directly
|
||||||
|
# following tr_train. Both of these windows slide through the
|
||||||
|
# entire backtest
|
||||||
|
for tr_train, tr_backtest in zip(self.dh.training_timeranges,
|
||||||
|
self.dh.backtesting_timeranges):
|
||||||
|
gc.collect()
|
||||||
|
#self.config['timerange'] = tr_train
|
||||||
|
self.dh.data = {} # clean the pair specific data between models
|
||||||
|
self.freqai_info['training_timerange'] = tr_train
|
||||||
|
dataframe_train = self.dh.slice_dataframe(tr_train, dataframe)
|
||||||
|
dataframe_backtest = self.dh.slice_dataframe(tr_backtest, dataframe)
|
||||||
|
print("training",self.pair,"for",tr_train)
|
||||||
|
self.dh.model_path = self.full_path+"/"+ 'sub-train'+'-'+str(tr_train)+'/'
|
||||||
|
if not self.model_exists(self.pair, training_timerange=tr_train):
|
||||||
|
self.model = self.train(dataframe_train, metadata)
|
||||||
|
self.dh.save_data(self.model)
|
||||||
|
else:
|
||||||
|
self.model = self.dh.load_data(self.dh.model_path)
|
||||||
|
|
||||||
|
preds, do_preds = self.predict(dataframe_backtest)
|
||||||
|
|
||||||
|
self.dh.append_predictions(preds,do_preds,len(dataframe_backtest))
|
||||||
|
|
||||||
|
self.dh.fill_predictions(len(dataframe))
|
||||||
|
|
||||||
|
return self.dh.predictions, self.dh.do_predict, self.dh.target_mean, self.dh.target_std
|
||||||
|
|
||||||
|
def make_labels(self, dataframe: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
User defines the labels here (target values).
|
||||||
|
:params:
|
||||||
|
:dataframe: the full dataframe for the present training period
|
||||||
|
"""
|
||||||
|
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
|
||||||
|
"""
|
||||||
|
Filter the training data and train a model to it. Train makes heavy use of the datahandler
|
||||||
|
for storing, saving, loading, and managed.
|
||||||
|
:params:
|
||||||
|
:unfiltered_dataframe: Full dataframe for the current training period
|
||||||
|
:metadata: pair metadata from strategy.
|
||||||
|
:returns:
|
||||||
|
:model: Trained model which can be used to inference (self.predict)
|
||||||
|
"""
|
||||||
|
|
||||||
|
return unfiltered_dataframe, unfiltered_dataframe
|
||||||
|
|
||||||
|
def fit(self) -> Any:
|
||||||
|
"""
|
||||||
|
Most regressors use the same function names and arguments e.g. user
|
||||||
|
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||||
|
management will be properly handled by Freqai.
|
||||||
|
:params:
|
||||||
|
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||||
|
all the training and test data/labels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def predict(self) -> Optional[Tuple[DataFrame, DataFrame]]:
|
||||||
|
"""
|
||||||
|
Filter the prediction features data and predict with it.
|
||||||
|
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||||
|
:return:
|
||||||
|
:predictions: np.array of predictions
|
||||||
|
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||||
|
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||||
|
"""
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def model_exists(self, pair: str, training_timerange: str = None) -> bool:
|
||||||
|
"""
|
||||||
|
Given a pair and path, check if a model already exists
|
||||||
|
:param pair: pair e.g. BTC/USD
|
||||||
|
:param path: path to model
|
||||||
|
"""
|
||||||
|
coin,_ = pair.split('/')
|
||||||
|
self.dh.model_filename = f"cb_"+coin.lower()+"_"+self.freqai_info['trained_stake']+"_"+training_timerange
|
||||||
|
file_exists = os.path.isfile(self.dh.model_path+
|
||||||
|
self.dh.model_filename+"_model.joblib")
|
||||||
|
if file_exists:
|
||||||
|
print("Found model at", self.dh.model_path+self.dh.model_filename)
|
||||||
|
else: print("Could not find model at",
|
||||||
|
self.dh.model_path+self.dh.model_filename)
|
||||||
|
return file_exists
|
12
freqtrade/freqai/strategy_bridge.py
Normal file
12
freqtrade/freqai/strategy_bridge.py
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver
|
||||||
|
|
||||||
|
|
||||||
|
class CustomModel:
|
||||||
|
"""
|
||||||
|
A bridge between the user defined IFreqaiModel class
|
||||||
|
and the strategy.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self,config):
|
||||||
|
|
||||||
|
self.bridge = FreqaiModelResolver.load_freqaimodel(config)
|
@ -204,6 +204,12 @@ class Backtesting:
|
|||||||
"""
|
"""
|
||||||
self.progress.init_step(BacktestState.DATALOAD, 1)
|
self.progress.init_step(BacktestState.DATALOAD, 1)
|
||||||
|
|
||||||
|
if self.config['freqaimodel']:
|
||||||
|
self.required_startup += int((self.config['freqai']['train_period']*86400) /
|
||||||
|
timeframe_to_seconds(self.config['timeframe']))
|
||||||
|
self.config['startup_candle_count'] = self.required_startup
|
||||||
|
|
||||||
|
|
||||||
data = history.load_data(
|
data = history.load_data(
|
||||||
datadir=self.config['datadir'],
|
datadir=self.config['datadir'],
|
||||||
pairs=self.pairlists.whitelist,
|
pairs=self.pairlists.whitelist,
|
||||||
|
45
freqtrade/resolvers/freqaimodel_resolver.py
Normal file
45
freqtrade/resolvers/freqaimodel_resolver.py
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
# pragma pylint: disable=attribute-defined-outside-init
|
||||||
|
|
||||||
|
"""
|
||||||
|
This module load a custom model for freqai
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Dict
|
||||||
|
|
||||||
|
from freqtrade.constants import USERPATH_FREQAIMODELS
|
||||||
|
from freqtrade.exceptions import OperationalException
|
||||||
|
from freqtrade.freqai.freqai_interface import IFreqaiModel
|
||||||
|
from freqtrade.resolvers import IResolver
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class FreqaiModelResolver(IResolver):
|
||||||
|
"""
|
||||||
|
This class contains all the logic to load custom hyperopt loss class
|
||||||
|
"""
|
||||||
|
object_type = IFreqaiModel
|
||||||
|
object_type_str = "FreqaiModel"
|
||||||
|
user_subdir = USERPATH_FREQAIMODELS
|
||||||
|
initial_search_path = Path(__file__).parent.parent.joinpath('optimize').resolve()
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def load_freqaimodel(config: Dict) -> IFreqaiModel:
|
||||||
|
"""
|
||||||
|
Load the custom class from config parameter
|
||||||
|
:param config: configuration dictionary
|
||||||
|
"""
|
||||||
|
|
||||||
|
freqaimodel_name = config.get('freqaimodel')
|
||||||
|
if not freqaimodel_name:
|
||||||
|
raise OperationalException(
|
||||||
|
"No freqaimodel set. Please use `--freqaimodel` to "
|
||||||
|
"specify the FreqaiModel class to use.\n"
|
||||||
|
)
|
||||||
|
freqaimodel = FreqaiModelResolver.load_object(freqaimodel_name,
|
||||||
|
config, kwargs={'config': config},
|
||||||
|
extra_dir=config.get('freqaimodel_path'))
|
||||||
|
|
||||||
|
|
||||||
|
return freqaimodel
|
139
freqtrade/templates/ExamplePredictionModel.py
Normal file
139
freqtrade/templates/ExamplePredictionModel.py
Normal file
@ -0,0 +1,139 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from catboost import CatBoostRegressor, Pool
|
||||||
|
from pandas import DataFrame
|
||||||
|
from typing import Any, Dict, Tuple
|
||||||
|
from freqtrade.freqai.freqai_interface import IFreqaiModel
|
||||||
|
|
||||||
|
class ExamplePredictionModel(IFreqaiModel):
|
||||||
|
"""
|
||||||
|
User created prediction model. The class needs to override three necessary
|
||||||
|
functions, predict(), train(), fit(). The class inherits ModelHandler which
|
||||||
|
has its own DataHandler where data is held, saved, loaded, and managed.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def make_labels(self, dataframe: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
User defines the labels here (target values).
|
||||||
|
:params:
|
||||||
|
:dataframe: the full dataframe for the present training period
|
||||||
|
"""
|
||||||
|
|
||||||
|
dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling(
|
||||||
|
self.feature_parameters['period']).max() / dataframe['close'] - 1)
|
||||||
|
self.dh.data['s_mean'] = dataframe['s'].mean()
|
||||||
|
self.dh.data['s_std'] = dataframe['s'].std()
|
||||||
|
|
||||||
|
print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std'])
|
||||||
|
|
||||||
|
return dataframe['s']
|
||||||
|
|
||||||
|
|
||||||
|
def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
|
||||||
|
"""
|
||||||
|
Filter the training data and train a model to it. Train makes heavy use of the datahandler
|
||||||
|
for storing, saving, loading, and managed.
|
||||||
|
:params:
|
||||||
|
:unfiltered_dataframe: Full dataframe for the current training period
|
||||||
|
:metadata: pair metadata from strategy.
|
||||||
|
:returns:
|
||||||
|
:model: Trained model which can be used to inference (self.predict)
|
||||||
|
"""
|
||||||
|
print("--------------------Starting training--------------------")
|
||||||
|
|
||||||
|
# create the full feature list based on user config info
|
||||||
|
self.dh.training_features_list = self.dh.build_feature_list(self.config)
|
||||||
|
unfiltered_labels = self.make_labels(unfiltered_dataframe)
|
||||||
|
|
||||||
|
# filter the features requested by user in the configuration file and elegantly handle NaNs
|
||||||
|
features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe,
|
||||||
|
self.dh.training_features_list, unfiltered_labels, training_filter=True)
|
||||||
|
|
||||||
|
# split data into train/test data.
|
||||||
|
data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
|
||||||
|
# standardize all data based on train_dataset only
|
||||||
|
data_dictionary = self.dh.standardize_data(data_dictionary)
|
||||||
|
|
||||||
|
# optional additional data cleaning
|
||||||
|
if self.feature_parameters['principal_component_analysis']:
|
||||||
|
self.dh.principal_component_analysis()
|
||||||
|
if self.feature_parameters["remove_outliers"]:
|
||||||
|
self.dh.remove_outliers(predict=False)
|
||||||
|
if self.feature_parameters['DI_threshold']:
|
||||||
|
self.dh.data['avg_mean_dist'] = self.dh.compute_distances()
|
||||||
|
|
||||||
|
print("length of train data", len(data_dictionary['train_features']))
|
||||||
|
|
||||||
|
model = self.fit(data_dictionary)
|
||||||
|
|
||||||
|
print('Finished training')
|
||||||
|
print(f'--------------------done training {metadata["pair"]}--------------------')
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def fit(self, data_dictionary: Dict) -> Any:
|
||||||
|
"""
|
||||||
|
Most regressors use the same function names and arguments e.g. user
|
||||||
|
can drop in LGBMRegressor in place of CatBoostRegressor and all data
|
||||||
|
management will be properly handled by Freqai.
|
||||||
|
:params:
|
||||||
|
:data_dictionary: the dictionary constructed by DataHandler to hold
|
||||||
|
all the training and test data/labels.
|
||||||
|
"""
|
||||||
|
|
||||||
|
train_data = Pool(
|
||||||
|
data=data_dictionary['train_features'],
|
||||||
|
label=data_dictionary['train_labels'],
|
||||||
|
weight=data_dictionary['train_weights']
|
||||||
|
)
|
||||||
|
|
||||||
|
test_data = Pool(
|
||||||
|
data=data_dictionary['test_features'],
|
||||||
|
label=data_dictionary['test_labels'],
|
||||||
|
weight=data_dictionary['test_weights']
|
||||||
|
)
|
||||||
|
|
||||||
|
model = CatBoostRegressor(verbose=100, early_stopping_rounds=400,
|
||||||
|
**self.model_training_parameters)
|
||||||
|
model.fit(X=train_data, eval_set=test_data)
|
||||||
|
|
||||||
|
return model
|
||||||
|
|
||||||
|
def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]:
|
||||||
|
"""
|
||||||
|
Filter the prediction features data and predict with it.
|
||||||
|
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
|
||||||
|
:return:
|
||||||
|
:predictions: np.array of predictions
|
||||||
|
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
|
||||||
|
data (NaNs) or felt uncertain about data (PCA and DI index)
|
||||||
|
"""
|
||||||
|
|
||||||
|
print("--------------------Starting prediction--------------------")
|
||||||
|
|
||||||
|
original_feature_list = self.dh.build_feature_list(self.config)
|
||||||
|
filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False)
|
||||||
|
filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
|
||||||
|
self.dh.data_dictionary['prediction_features'] = filtered_dataframe
|
||||||
|
|
||||||
|
# optional additional data cleaning
|
||||||
|
if self.feature_parameters['principal_component_analysis']:
|
||||||
|
pca_components = self.dh.pca.transform(filtered_dataframe)
|
||||||
|
self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components,
|
||||||
|
columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])],
|
||||||
|
index = filtered_dataframe.index)
|
||||||
|
|
||||||
|
if self.feature_parameters["remove_outliers"]:
|
||||||
|
self.dh.remove_outliers(predict=True) # creates dropped index
|
||||||
|
|
||||||
|
if self.feature_parameters['DI_threshold']:
|
||||||
|
self.dh.check_if_pred_in_training_spaces() # sets do_predict
|
||||||
|
|
||||||
|
predictions = self.model.predict(self.dh.data_dictionary['prediction_features'])
|
||||||
|
|
||||||
|
# compute the non-standardized predictions
|
||||||
|
predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean']
|
||||||
|
|
||||||
|
print("--------------------Finished prediction--------------------")
|
||||||
|
|
||||||
|
return (predictions, self.dh.do_predict)
|
179
freqtrade/templates/FreqaiExampleStrategy.py
Normal file
179
freqtrade/templates/FreqaiExampleStrategy.py
Normal file
@ -0,0 +1,179 @@
|
|||||||
|
import logging
|
||||||
|
import talib.abstract as ta
|
||||||
|
from pandas import DataFrame
|
||||||
|
import pandas as pd
|
||||||
|
from technical import qtpylib
|
||||||
|
import numpy as np
|
||||||
|
from freqtrade.strategy import (merge_informative_pair)
|
||||||
|
from freqtrade.strategy.interface import IStrategy
|
||||||
|
from freqtrade.freqai.strategy_bridge import CustomModel
|
||||||
|
from functools import reduce
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class FreqaiExampleStrategy(IStrategy):
|
||||||
|
"""
|
||||||
|
Example strategy showing how the user connects their own
|
||||||
|
IFreqaiModel to the strategy. Namely, the user uses:
|
||||||
|
self.model = CustomModel(self.config)
|
||||||
|
self.model.bridge.start(dataframe, metadata)
|
||||||
|
|
||||||
|
to make predictions on their data. populate_any_indicators() automatically
|
||||||
|
generates the variety of features indicated by the user in the
|
||||||
|
canonical freqtrade configuration file under config['freqai'].
|
||||||
|
"""
|
||||||
|
|
||||||
|
minimal_roi = {
|
||||||
|
"0": 0.01,
|
||||||
|
"240": -1
|
||||||
|
}
|
||||||
|
|
||||||
|
plot_config = {
|
||||||
|
'main_plot': {
|
||||||
|
},
|
||||||
|
'subplots': {
|
||||||
|
"prediction":{
|
||||||
|
'prediction':{'color':'blue'}
|
||||||
|
},
|
||||||
|
"target_roi":{
|
||||||
|
'target_roi':{'color':'brown'},
|
||||||
|
},
|
||||||
|
"do_predict":{
|
||||||
|
'do_predict':{'color':'brown'},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stoploss = -0.05
|
||||||
|
use_sell_signal = True
|
||||||
|
startup_candle_count: int = 1000
|
||||||
|
|
||||||
|
|
||||||
|
def informative_pairs(self):
|
||||||
|
pairs = self.freqai_info['corr_pairlist']
|
||||||
|
informative_pairs = []
|
||||||
|
for tf in self.timeframes:
|
||||||
|
informative_pairs.append([(pair, tf) for pair in pairs])
|
||||||
|
return informative_pairs
|
||||||
|
|
||||||
|
def populate_any_indicators(self, pair, df, tf, informative=None,coin=''):
|
||||||
|
"""
|
||||||
|
Function designed to automatically generate, name and merge features
|
||||||
|
from user indicated timeframes in the configuration file. User can add
|
||||||
|
additional features here, but must follow the naming convention.
|
||||||
|
:params:
|
||||||
|
:pair: pair to be used as informative
|
||||||
|
:df: strategy dataframe which will receive merges from informatives
|
||||||
|
:tf: timeframe of the dataframe which will modify the feature names
|
||||||
|
:informative: the dataframe associated with the informative pair
|
||||||
|
:coin: the name of the coin which will modify the feature names.
|
||||||
|
"""
|
||||||
|
if informative is None:
|
||||||
|
informative = self.dp.get_pair_dataframe(pair, tf)
|
||||||
|
|
||||||
|
informative[coin+'rsi'] = ta.RSI(informative, timeperiod=14)
|
||||||
|
informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
|
||||||
|
informative[coin+'adx'] = ta.ADX(informative, window=20)
|
||||||
|
|
||||||
|
informative[coin+'20sma'] = ta.SMA(informative,timeperiod=20)
|
||||||
|
informative[coin+'21ema'] = ta.EMA(informative,timeperiod=21)
|
||||||
|
informative[coin+'bmsb'] = np.where(informative[coin+'20sma'].lt(informative[coin+'21ema']),1,0)
|
||||||
|
informative[coin+'close_over_20sma'] = informative['close']/informative[coin+'20sma']
|
||||||
|
|
||||||
|
informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
|
||||||
|
|
||||||
|
informative[coin+'ema21'] = ta.EMA(informative, timeperiod=21)
|
||||||
|
informative[coin+'sma20'] = ta.SMA(informative, timeperiod=20)
|
||||||
|
stoch = ta.STOCHRSI(informative, 15, 20, 2, 2)
|
||||||
|
informative[coin+'srsi-fk'] = stoch['fastk']
|
||||||
|
informative[coin+'srsi-fd'] = stoch['fastd']
|
||||||
|
|
||||||
|
bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
|
||||||
|
informative[coin+'bb_lowerband'] = bollinger['lower']
|
||||||
|
informative[coin+'bb_middleband'] = bollinger['mid']
|
||||||
|
informative[coin+'bb_upperband'] = bollinger['upper']
|
||||||
|
informative[coin+'bb_width'] = ((informative[coin+"bb_upperband"] - informative[coin+"bb_lowerband"]) / informative[coin+"bb_middleband"])
|
||||||
|
informative[coin+'close-bb_lower'] = informative['close'] / informative[coin+'bb_lowerband']
|
||||||
|
|
||||||
|
informative[coin+'roc'] = ta.ROC(informative, timeperiod=3)
|
||||||
|
informative[coin+'adx'] = ta.ADX(informative, window=14)
|
||||||
|
|
||||||
|
macd = ta.MACD(informative)
|
||||||
|
informative[coin+'macd'] = macd['macd']
|
||||||
|
informative[coin+'pct-change'] = informative['close'].pct_change()
|
||||||
|
informative[coin+'relative_volume'] = informative['volume'] / informative['volume'].rolling(10).mean()
|
||||||
|
|
||||||
|
informative[coin+'pct-change'] = informative['close'].pct_change()
|
||||||
|
|
||||||
|
indicators = [col for col in informative if col.startswith(coin)]
|
||||||
|
|
||||||
|
for n in range(self.freqai_info['feature_parameters']['shift']+1):
|
||||||
|
if n==0: continue
|
||||||
|
informative_shift = informative[indicators].shift(n)
|
||||||
|
informative_shift = informative_shift.add_suffix('_shift-'+str(n))
|
||||||
|
informative = pd.concat((informative,informative_shift),axis=1)
|
||||||
|
|
||||||
|
df = merge_informative_pair(df, informative, self.config['timeframe'], tf, ffill=True)
|
||||||
|
skip_columns = [(s + '_'+tf) for s in
|
||||||
|
['date', 'open', 'high', 'low', 'close', 'volume']]
|
||||||
|
df = df.drop(columns=skip_columns)
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
|
||||||
|
# the configuration file parameters are stored here
|
||||||
|
self.freqai_info = self.config['freqai']
|
||||||
|
|
||||||
|
# the model is instantiated here
|
||||||
|
self.model = CustomModel(self.config)
|
||||||
|
|
||||||
|
print('Populating indicators...')
|
||||||
|
|
||||||
|
# the following loops are necessary for building the features
|
||||||
|
# indicated by the user in the configuration file.
|
||||||
|
for tf in self.freqai_info['timeframes']:
|
||||||
|
dataframe = self.populate_any_indicators(metadata['pair'],
|
||||||
|
dataframe.copy(), tf)
|
||||||
|
for i in self.freqai_info['corr_pairlist']:
|
||||||
|
dataframe = self.populate_any_indicators(i,
|
||||||
|
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
|
||||||
|
|
||||||
|
# the model will return 4 values, its prediction, an indication of whether or not the prediction
|
||||||
|
# should be accepted, the target mean/std values from the labels used during each training period.
|
||||||
|
(dataframe['prediction'], dataframe['do_predict'],
|
||||||
|
dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
|
||||||
|
|
||||||
|
dataframe['target_roi'] = dataframe['target_mean']+dataframe['target_std']*0.5
|
||||||
|
dataframe['sell_roi'] = dataframe['target_mean']-dataframe['target_std']*1.5
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
|
||||||
|
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
|
||||||
|
buy_conditions = [
|
||||||
|
(dataframe['prediction'] > dataframe['target_roi'])
|
||||||
|
&
|
||||||
|
(dataframe['do_predict'] == 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
if buy_conditions:
|
||||||
|
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), 'buy'] = 1
|
||||||
|
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
|
||||||
|
def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
|
||||||
|
# sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
|
||||||
|
sell_conditions = [
|
||||||
|
(dataframe['prediction'] < dataframe['sell_roi'])
|
||||||
|
&
|
||||||
|
(dataframe['do_predict'] == 1)
|
||||||
|
]
|
||||||
|
if sell_conditions:
|
||||||
|
dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), 'sell'] = 1
|
||||||
|
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
def get_ticker_indicator(self):
|
||||||
|
return int(self.config['timeframe'][:-1])
|
@ -36,6 +36,7 @@ nav:
|
|||||||
- Advanced Strategy: strategy-advanced.md
|
- Advanced Strategy: strategy-advanced.md
|
||||||
- Advanced Hyperopt: advanced-hyperopt.md
|
- Advanced Hyperopt: advanced-hyperopt.md
|
||||||
- Sandbox Testing: sandbox-testing.md
|
- Sandbox Testing: sandbox-testing.md
|
||||||
|
- Freqai: freqai.md
|
||||||
- FAQ: faq.md
|
- FAQ: faq.md
|
||||||
- SQL Cheat-sheet: sql_cheatsheet.md
|
- SQL Cheat-sheet: sql_cheatsheet.md
|
||||||
- Strategy migration: strategy_migration.md
|
- Strategy migration: strategy_migration.md
|
||||||
|
8
requirements-freqai.txt
Normal file
8
requirements-freqai.txt
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
# Include all requirements to run the bot.
|
||||||
|
-r requirements.txt
|
||||||
|
|
||||||
|
# Required for freqai
|
||||||
|
scikit-learn==1.0.2
|
||||||
|
scikit-optimize==0.9.0
|
||||||
|
joblib==1.1.0
|
||||||
|
catboost==1.0.4
|
Loading…
Reference in New Issue
Block a user