From fc837c4daa27a18ff0e86128f4d52089b88fa5fb Mon Sep 17 00:00:00 2001 From: robcaulk Date: Tue, 3 May 2022 10:14:17 +0200 Subject: [PATCH] add freqao backend machinery, user interface, documentation --- .gitignore | 3 + config_examples/config_freqai.example.json | 100 ++++ docs/freqai.md | 265 +++++++++++ freqtrade/commands/__init__.py | 1 + freqtrade/commands/arguments.py | 5 +- freqtrade/commands/cli_options.py | 12 + freqtrade/commands/freqai_commands.py | 24 + freqtrade/configuration/configuration.py | 12 + freqtrade/constants.py | 1 + freqtrade/enums/runmode.py | 3 +- freqtrade/freqai/data_handler.py | 434 ++++++++++++++++++ freqtrade/freqai/freqai_interface.py | 158 +++++++ freqtrade/freqai/strategy_bridge.py | 12 + freqtrade/optimize/backtesting.py | 6 + freqtrade/resolvers/freqaimodel_resolver.py | 45 ++ freqtrade/templates/ExamplePredictionModel.py | 139 ++++++ freqtrade/templates/FreqaiExampleStrategy.py | 179 ++++++++ mkdocs.yml | 1 + requirements-freqai.txt | 8 + 19 files changed, 1405 insertions(+), 3 deletions(-) create mode 100644 config_examples/config_freqai.example.json create mode 100644 docs/freqai.md create mode 100644 freqtrade/commands/freqai_commands.py create mode 100644 freqtrade/freqai/data_handler.py create mode 100644 freqtrade/freqai/freqai_interface.py create mode 100644 freqtrade/freqai/strategy_bridge.py create mode 100644 freqtrade/resolvers/freqaimodel_resolver.py create mode 100644 freqtrade/templates/ExamplePredictionModel.py create mode 100644 freqtrade/templates/FreqaiExampleStrategy.py create mode 100644 requirements-freqai.txt diff --git a/.gitignore b/.gitignore index 97f77f779..17823f642 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,8 @@ logfile.txt user_data/* !user_data/strategy/sample_strategy.py !user_data/notebooks +!user_data/models +user_data/models/* user_data/notebooks/* freqtrade-plot.html freqtrade-profit-plot.html @@ -105,3 +107,4 @@ target/ !config_examples/config_ftx.example.json !config_examples/config_full.example.json !config_examples/config_kraken.example.json +!config_examples/config_freqai.example.json diff --git a/config_examples/config_freqai.example.json b/config_examples/config_freqai.example.json new file mode 100644 index 000000000..0092a8c51 --- /dev/null +++ b/config_examples/config_freqai.example.json @@ -0,0 +1,100 @@ +{ + "max_open_trades": 1, + "stake_currency": "USDT", + "stake_amount": 800, + "tradable_balance_ratio": 1, + "fiat_display_currency": "USD", + "dry_run": true, + "timeframe": "5m", + "dry_run_wallet":1000, + "cancel_open_orders_on_exit": true, + "unfilledtimeout": { + "entry": 10, + "exit": 30 + }, + "exchange": { + "name": "ftx", + "key": "", + "secret": "", + "ccxt_config": {"enableRateLimit": true}, + "ccxt_async_config": { + "enableRateLimit": true, + "rateLimit": 200 + }, + "pair_whitelist": [ + "BTC/USDT" + ], + "pair_blacklist": [ + ] + }, + "entry_pricing": { + "price_side": "same", + "use_order_book": true, + "order_book_top": 1, + "price_last_balance": 0.0, + "check_depth_of_market": { + "enabled": false, + "bids_to_ask_delta": 1 + } + }, + "exit_pricing": { + "price_side": "same", + "use_order_book": true, + "order_book_top": 1 + }, + "pairlists": [ + {"method": "StaticPairList"} + ], + + "freqai": { + "btc_pair" : "BTC/USDT", + "timeframes" : ["5m","15m","1h"], + "full_timerange" : "20210601-20220101", + "train_period" : 30, + "backtest_period" : 7, + "identifier" : "example", + "base_features": [ + "rsi", + "close_over_20sma", + "relative_volume", + "bb_width", + "mfi", + "roc", + "pct-change", + "adx", + "macd" + ], + "corr_pairlist": [ + "ETH/USDT", + "LINK/USDT", + "DOT/USDT" + ], + "training_timerange" : "20211220-20220117", + + "feature_parameters" : { + "period": 12, + "shift": 2, + "drop_features": false, + "DI_threshold": 1, + "weight_factor": 0, + "principal_component_analysis": false, + "remove_outliers": false + }, + "data_split_parameters" : { + "test_size": 0.25, + "random_state": 1 + }, + "model_training_parameters" : { + "n_estimators": 2000, + "random_state": 1, + "learning_rate": 0.02, + "task_type": "CPU" + } + }, + "bot_name": "", + "initial_state": "running", + "forcebuy_enable": false, + "internals": { + "process_throttle_secs": 5 + } +} diff --git a/docs/freqai.md b/docs/freqai.md new file mode 100644 index 000000000..6bc1e9365 --- /dev/null +++ b/docs/freqai.md @@ -0,0 +1,265 @@ +# Freqai + +!!! Note + Freqai is still experimental, and should be used at the user's own discretion. + +Freqai is a module designed to automate a variety of tasks associated with +training a regressor to predict signals based on input features. Among the +the features includes: + +* Easy large feature set construction based on simple user input +* Sweep model training and backtesting to simulate consistent model retraining through time +* Smart outlier removal of data points from prediction sets using a Dissimilarity Index. +* Data dimensionality reduction with Principal Component Analysis +* Automatic file management for storage of models to be reused during live +* Smart and safe data standardization +* Cleaning of NaNs from the data set before training and prediction. + +TODO: +* live is not automated, still some architectural work to be done + +## Background and vocabulary + +**Features** are the quantities with which a model is trained. $X_i$ represents the +vector of all features for a single candle. In Freqai, the user +builds the features from anything they can construct in the strategy. + +**Labels** are the target values with which the weights inside a model are trained +toward. Each set of features is associated with a single label, which is also +defined within the strategy by the user. These labels look forward into the +future, and are not available to the model during dryrun/live/backtesting. + +**Training** refers to the process of feeding individual feature sets into the +model with associated labels with the goal of matching input feature sets to +associated labels. + +**Train data** is a subset of the historic data which is fed to the model during +training to adjust weights. This data directly influences weight connections +in the model. + +**Test data** is a subset of the historic data which is used to evaluate the +intermediate performance of the model during training. This data does not +directly influence nodal weights within the model. + +## Configuring the bot +### Example config file +The user interface is isolated to the typical config file. A typical Freqai +config setup includes: + +```json + "freqai": { + "timeframes" : ["5m","15m","4h"], + "full_timerange" : "20211220-20220220", + "train_period" : "month", + "backtest_period" : "week", + "identifier" : "unique-id", + "base_features": [ + "rsi", + "mfi", + "roc", + ], + "corr_pairlist": [ + "ETH/USD", + "LINK/USD", + "BNB/USD" + ], + "train_params" : { + "period": 24, + "shift": 2, + "drop_features": false, + "DI_threshold": 1, + "weight_factor": 0, + }, + "SPLIT_PARAMS" : { + "test_size": 0.25, + "random_state": 42 + }, + "CLASSIFIER_PARAMS" : { + "n_estimators": 100, + "random_state": 42, + "learning_rate": 0.02, + "task_type": "CPU", + }, + }, + +``` + +### Building the feature set + +Most of these parameters are controlling the feature data set. The `base_features` +indicates the basic indicators the user wishes to include in the feature set. +The `timeframes` are the timeframes of each base_feature that the user wishes to +include in the feature set. In the present case, the user is asking for the +`5m`, `15m`, and `4h` timeframes of the `rsi`, `mfi`, `roc`, etc. to be included +in the feature set. + +In addition, the user can ask for each of these features to be included from +informative pairs using the `corr_pairlist`. This means that the present feature +set will include all the `base_features` on all the `timeframes` for each of +`ETH/USD`, `LINK/USD`, and `BNB/USD`. + +`shift` is another user controlled parameter which indicates the number of previous +candles to include in the present feature set. In other words, `shift: 2`, tells +Freqai to include the the past 2 candles for each of the features included +in the dataset. + +In total, the number of features the present user has created is:_ + +no. `timeframes` * no. `base_features` * no. `corr_pairlist` * no. `shift`_ +3 * 3 * 3 * 2 = 54._ + +### Deciding the sliding training window and backtesting duration + +`full_timerange` lets the user set the full backtesting range to train and +backtest through. Meanwhile `train_period` is the sliding training window and +`backtest_period` is the sliding backtesting window. In the present example, +the user is asking Freqai to train and backtest the range of `20211220-20220220` (`month`). +The user wishes to backtest each `week` with a newly trained model. This means that +Freqai will train 8 separate models (because the full range comprises 8 weeks), +and then backtest the subsequent week associated with each of the 8 training +data set timerange months. Users can think of this as a "sliding window" which +emulates Freqai retraining itself once per week in live using the previous +month of data. + + +## Running Freqai +### Training and backtesting + +The freqai training/backtesting module can be executed with the following command: + +```bash +freqtrade backtesting --strategy FreqaiExampleStrategy --config config_freqai.example.json --freqaimodel ExamplePredictionModel +``` + +where the user needs to have a FreqaiExampleStrategy that fits to the requirements outlined +below. The ExamplePredictionModel is a user built class which lets users design their +own training procedures and data analysis. + +### Building a freqai strategy + +The Freqai strategy requires the user to include the following lines of code in `populate_ any _indicators()` + +```python + from freqtrade.freqai.strategy_bridge import CustomModel + + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + # the configuration file parameters are stored here + self.freqai_info = self.config['freqai'] + + # the model is instantiated here + self.model = CustomModel(self.config) + + print('Populating indicators...') + + # the following loops are necessary for building the features + # indicated by the user in the configuration file. + for tf in self.freqai_info['timeframes']: + dataframe = self.populate_any_indicators(metadata['pair'], + dataframe.copy(), tf) + for i in self.freqai_info['corr_pairlist']: + dataframe = self.populate_any_indicators(i, + dataframe.copy(), tf, coin=i.split("/")[0]+'-') + + # the model will return 4 values, its prediction, an indication of whether or not the prediction + # should be accepted, the target mean/std values from the labels used during each training period. + (dataframe['prediction'], dataframe['do_predict'], + dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata) + + return dataframe +``` +The user should also include `populate_any_indicators()` from `templates/FreqaiExampleStrategy.py` which builds +the feature set with a proper naming convention for the IFreqaiModel to use later. + +### Building an IFreqaiModel + +Freqai has a base example model in `templates/ExamplePredictionModel.py`, but users can customize and create +their own prediction models using the `IFreqaiModel` class. Users are encouraged to inherit `train()`, `predict()`, +and `make_labels()` to let them customize various aspects of their training procedures. + +### Running the model live + +After the user has designed a desirable featureset, Freqai can be run in dry/live +using the typical trade command: + +```bash +freqtrade trade --strategy FreqaiExampleStrategy --config config_freqai.example.json --training_timerange '20211220-20220120' +``` + +Where the user has now specified exactly which of the models from the sliding window +that they wish to run live using `--training_timerange` (typically this would be the most +recent model trained). As of right now, freqai will +not automatically retain itself, so the user needs to manually retrain and then +reload the config file with a new `--training_timerange` in order to update the +model. + + +## Data anylsis techniques +### Controlling the model learning process + +The user can define model settings for the data split `data_split_parameters` and learning parameters +`model_training_parameters`. Users are encouraged to visit the Catboost documentation +for more information on how to select these values. `n_estimators` increases the +computational effort and the fit to the training data. If a user has a GPU +installed in their system, they may benefit from changing `task_type` to `GPU`. +The `weight_factor` allows the user to weight more recent data more strongly +than past data via an exponential function: + +$$ W_i = \exp(\frac{-i}{\alpha*n}) $$ + +where $W_i$ is the weight of data point $i$ in a total set of $n$ data points._ + +`drop_features` tells Freqai to train the model on the user defined features, +followed by a feature importance evaluation where it drops the top and bottom +performing features (there is evidence to suggest the top features may not be +helpful in equity/crypto trading since the ultimate objective is to predict low +frequency patterns, source: numerai)._ + +Finally, `period` defines the offset used for the `labels`. In the present example, +the user is asking for `labels` that are 24 candles in the future. + +### Removing outliers with the Dissimilarity Index + +The Dissimilarity Index (DI) aims to quantiy the uncertainty associated with each +prediction by the model. To do so, Freqai measures the distance between each training +data point and all other training data points: + +$$ d_{ab} = \sqrt{\sum_{j=1}^p(X_{a,j}-X_{b,j})^2} $$ + +where $d_{ab}$ is the distance between the standardized points $a$ and $b$. $p$ +is the number of features i.e. the length of the vector $X$. The +characteristic distance, $\overline{d}$ for a set of training data points is simply the mean +of the average distances: + +$$ \overline{d} = \sum_{a=1}^n(\sum_{b=1}^n(d_{ab}/n)/n) $$ + +$\overline{d}$ quantifies the spread of the training data, which is compared to +the distance between the new prediction feature vectors, $X_k$ and all the training +data: + +$$ d_k = \argmin_i d_{k,i} $$ + +which enables the estimation of a Dissimilarity Index: + +$$ DI_k = d_k/\overline{d} $$ + +Equity and crypto markets suffer from a high level of non-patterned noise in the +form of outlier data points. The dissimilarity index allows predictions which +are outliers and not existent in the model feature space, to be thrown out due +to low levels of certainty. The user can tweak the DI with `DI_threshold` to increase +or decrease the extrapolation of the trained model. + +### Reducing data dimensionality with Principal Component Analysis + +TO BE WRITTEN + +## Additional information +### Feature standardization + +The feature set created by the user is automatically standardized to the training +data only. This includes all test data and unseen prediction data (dry/live/backtest). + +### File structure + +`user_data_dir/models/` contains all the data associated with the trainings and +backtestings. This file structure is heavily controlled and read by the `DataHandler()` +and should thus not be modified. diff --git a/freqtrade/commands/__init__.py b/freqtrade/commands/__init__.py index 0e637c487..d5aea62be 100644 --- a/freqtrade/commands/__init__.py +++ b/freqtrade/commands/__init__.py @@ -19,6 +19,7 @@ from freqtrade.commands.list_commands import (start_list_exchanges, start_list_m start_show_trades) from freqtrade.commands.optimize_commands import (start_backtesting, start_backtesting_show, start_edge, start_hyperopt) +from freqtrade.commands.freqai_commands import (start_training) from freqtrade.commands.pairlist_commands import start_test_pairlist from freqtrade.commands.plot_commands import start_plot_dataframe, start_plot_profit from freqtrade.commands.trade_commands import start_trading diff --git a/freqtrade/commands/arguments.py b/freqtrade/commands/arguments.py index 815e28175..4388e84e4 100644 --- a/freqtrade/commands/arguments.py +++ b/freqtrade/commands/arguments.py @@ -12,7 +12,7 @@ from freqtrade.constants import DEFAULT_CONFIG ARGS_COMMON = ["verbosity", "logfile", "version", "config", "datadir", "user_data_dir"] -ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search"] +ARGS_STRATEGY = ["strategy", "strategy_path", "recursive_strategy_search", "freqaimodel", "freqaimodel_path"] ARGS_TRADE = ["db_url", "sd_notify", "dry_run", "dry_run_wallet", "fee"] @@ -190,7 +190,8 @@ class Arguments: start_list_markets, start_list_strategies, start_list_timeframes, start_new_config, start_new_strategy, start_plot_dataframe, start_plot_profit, start_show_trades, - start_test_pairlist, start_trading, start_webserver) + start_test_pairlist, start_trading, start_webserver, + start_training) subparsers = self.parser.add_subparsers(dest='command', # Use custom message when no subhandler is added diff --git a/freqtrade/commands/cli_options.py b/freqtrade/commands/cli_options.py index aac9f5713..4061418f7 100644 --- a/freqtrade/commands/cli_options.py +++ b/freqtrade/commands/cli_options.py @@ -614,4 +614,16 @@ AVAILABLE_CLI_OPTIONS = { "that do not contain any parameters."), action="store_true", ), + + "freqaimodel": Arg( + '--freqaimodel', + help='Specify a custom freqaimodels.', + metavar='NAME', + ), + + "freqaimodel_path": Arg( + '--freqaimodel-path', + help='Specify additional lookup path for freqaimodels.', + metavar='PATH', + ), } diff --git a/freqtrade/commands/freqai_commands.py b/freqtrade/commands/freqai_commands.py new file mode 100644 index 000000000..2733c851a --- /dev/null +++ b/freqtrade/commands/freqai_commands.py @@ -0,0 +1,24 @@ +import logging +from typing import Any, Dict + +from freqtrade import constants +from freqtrade.configuration import setup_utils_configuration +from freqtrade.enums import RunMode +from freqtrade.exceptions import OperationalException +from freqtrade.misc import round_coin_value + + +logger = logging.getLogger(__name__) + +def start_training(args: Dict[str, Any]) -> None: + """ + Train a model for predicting signals + :param args: Cli args from Arguments() + :return: None + """ + from freqtrade.freqai.training import Training + + config = setup_utils_configuration(args, RunMode.FREQAI) + + training = Training(config) + training.start() diff --git a/freqtrade/configuration/configuration.py b/freqtrade/configuration/configuration.py index 96b585cd1..e13985270 100644 --- a/freqtrade/configuration/configuration.py +++ b/freqtrade/configuration/configuration.py @@ -95,6 +95,8 @@ class Configuration: self._process_data_options(config) + self._process_freqai_options(config) + # Check if the exchange set by the user is supported check_exchange(config, config.get('experimental', {}).get('block_bad_exchanges', True)) @@ -446,6 +448,16 @@ class Configuration: config.update({'runmode': self.runmode}) + def _process_freqai_options(self, config: Dict[str, Any]) -> None: + + self._args_to_config(config, argname='freqaimodel', + logstring='Using freqaimodel class name: {}') + + self._args_to_config(config, argname='freqaimodel_path', + logstring='Using freqaimodel path: {}') + + return + def _args_to_config(self, config: Dict[str, Any], argname: str, logstring: str, logfun: Optional[Callable] = None, deprecated_msg: Optional[str] = None) -> None: diff --git a/freqtrade/constants.py b/freqtrade/constants.py index 372472db8..f8a9dc06d 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -55,6 +55,7 @@ FTHYPT_FILEVERSION = 'fthypt_fileversion' USERPATH_HYPEROPTS = 'hyperopts' USERPATH_STRATEGIES = 'strategies' USERPATH_NOTEBOOKS = 'notebooks' +USERPATH_FREQAIMODELS = 'freqaimodels' TELEGRAM_SETTING_OPTIONS = ['on', 'off', 'silent'] WEBHOOK_FORMAT_OPTIONS = ['form', 'json', 'raw'] diff --git a/freqtrade/enums/runmode.py b/freqtrade/enums/runmode.py index 6545aaec7..c280edf7c 100644 --- a/freqtrade/enums/runmode.py +++ b/freqtrade/enums/runmode.py @@ -15,9 +15,10 @@ class RunMode(Enum): UTIL_NO_EXCHANGE = "util_no_exchange" PLOT = "plot" WEBSERVER = "webserver" + FREQAI = "freqai" OTHER = "other" TRADING_MODES = [RunMode.LIVE, RunMode.DRY_RUN] -OPTIMIZE_MODES = [RunMode.BACKTEST, RunMode.EDGE, RunMode.HYPEROPT] +OPTIMIZE_MODES = [RunMode.BACKTEST, RunMode.EDGE, RunMode.HYPEROPT, RunMode.FREQAI] NON_UTIL_MODES = TRADING_MODES + OPTIMIZE_MODES diff --git a/freqtrade/freqai/data_handler.py b/freqtrade/freqai/data_handler.py new file mode 100644 index 000000000..d399cd12b --- /dev/null +++ b/freqtrade/freqai/data_handler.py @@ -0,0 +1,434 @@ +import json +import os +import copy +import numpy as np +import pandas as pd +from pandas import DataFrame +from joblib import dump +from joblib import load +from sklearn.model_selection import train_test_split +from sklearn.metrics.pairwise import pairwise_distances +import datetime +from typing import Any, Dict, List, Tuple +import pickle as pk +from freqtrade.configuration import TimeRange + +SECONDS_IN_DAY = 86400 + +class DataHandler: + """ + Class designed to handle all the data for the IFreqaiModel class model. + Functionalities include holding, saving, loading, and analyzing the data. + """ + + def __init__(self, config: Dict[str, Any], dataframe: DataFrame, data: List): + self.full_dataframe = dataframe + (self.training_timeranges, + self.backtesting_timeranges) = self.split_timerange( + config['freqai']['full_timerange'], + config['freqai']['train_period'], + config['freqai']['backtest_period']) + self.data = data + self.data_dictionary = {} + self.config = config + self.freq_config = config['freqai'] + + def save_data(self, model: Any) -> None: + """ + Saves all data associated with a model for a single sub-train time range + :params: + :model: User trained model which can be reused for inferencing to generate + predictions + """ + + if not os.path.exists(self.model_path): os.mkdir(self.model_path) + save_path = self.model_path + self.model_filename + # Save the trained model + dump(model, save_path+"_model.joblib") + self.data['model_path'] = self.model_path + self.data['model_filename'] = self.model_filename + self.data['training_features_list'] = list(self.data_dictionary['train_features'].columns) + # store the metadata + with open(save_path+"_metadata.json", 'w') as fp: + json.dump(self.data, fp, default=self.np_encoder) + + # save the train data to file so we can check preds for area of applicability later + self.data_dictionary['train_features'].to_pickle(save_path+"_trained_df.pkl") + + return + + def load_data(self) -> Any: + """ + loads all data required to make a prediction on a sub-train time range + :returns: + :model: User trained model which can be inferenced for new predictions + """ + model = load(self.model_path+self.model_filename+"_model.joblib") + + with open(self.model_path+self.model_filename+"_metadata.json", 'r') as fp: + self.data = json.load(fp) + if self.data.get('training_features_list'): + self.training_features_list = [*self.data.get('training_features_list')] + + self.data_dictionary['train_features'] = pd.read_pickle(self.model_path+ + self.model_filename+"_trained_df.pkl") + + self.model_path = self.data['model_path'] + self.model_filename = self.data['model_filename'] + if self.config['freqai']['feature_parameters']['principal_component_analysis']: + self.pca = pk.load(open(self.model_path+self.model_filename+"_pca_object.pkl","rb")) + + return model + + def make_train_test_datasets(self, filtered_dataframe: DataFrame, labels: DataFrame) -> None: + ''' + Given the dataframe for the full history for training, split the data into + training and test data according to user specified parameters in configuration + file. + :filtered_dataframe: cleaned dataframe ready to be split. + :labels: cleaned labels ready to be split. + ''' + + if self.config['freqai']['feature_parameters']['weight_factor'] > 0: + weights = self.set_weights_higher_recent(len(filtered_dataframe)) + else: weights = np.ones(len(filtered_dataframe)) + + (train_features, test_features, train_labels, + test_labels, train_weights, test_weights) = train_test_split( + filtered_dataframe[:filtered_dataframe.shape[0]], + labels, + weights, + **self.config['freqai']['data_split_parameters'] + ) + + return self.build_data_dictionary( + train_features,test_features, + train_labels,test_labels, + train_weights,test_weights) + + + + def filter_features(self, unfiltered_dataframe: DataFrame, training_feature_list: List, + labels: DataFrame = None, training_filter: bool=True) -> Tuple[DataFrame, DataFrame]: + ''' + Filter the unfiltered dataframe to extract the user requested features and properly + remove all NaNs. Any row with a NaN is removed from training dataset or replaced with + 0s in the prediction dataset. However, prediction dataset do_predict will reflect any + row that had a NaN and will shield user from that prediction. + :params: + :unfiltered_dataframe: the full dataframe for the present training period + :training_feature_list: list, the training feature list constructed by self.build_feature_list() + according to user specified parameters in the configuration file. + :labels: the labels for the dataset + :training_filter: boolean which lets the function know if it is training data or + prediction data to be filtered. + :returns: + :filtered_dataframe: dataframe cleaned of NaNs and only containing the user + requested feature set. + :labels: labels cleaned of NaNs. + ''' + filtered_dataframe = unfiltered_dataframe.filter(training_feature_list, axis=1) + drop_index = pd.isnull(filtered_dataframe).any(1) # get the rows that have NaNs, + + if training_filter: # we don't care about total row number (total no. datapoints) in training, we only care about removing any row with NaNs + drop_index_labels = pd.isnull(labels) + filtered_dataframe = filtered_dataframe[(drop_index==False) & (drop_index_labels==False)] # dropping values + labels = labels[(drop_index==False) & (drop_index_labels==False)] # assuming the labels depend entirely on the dataframe here. + print('dropped',len(unfiltered_dataframe)-len(filtered_dataframe), + 'training data points due to NaNs, ensure you have downloaded all historical training data') + self.data['filter_drop_index_training'] = drop_index + + else: # we are backtesting so we need to preserve row number to send back to strategy, so now we use do_predict to avoid any prediction based on a NaN + drop_index = pd.isnull(filtered_dataframe).any(1) + self.data['filter_drop_index_prediction'] = drop_index + filtered_dataframe.fillna(0, inplace=True) # replacing all NaNs with zeros to avoid issues in 'prediction', but any prediction that was based on a single NaN is ultimately protected from buys with do_predict + drop_index = ~drop_index + self.do_predict = np.array(drop_index.replace(True,1).replace(False,0)) + print('dropped',len(self.do_predict) - self.do_predict.sum(),'of',len(filtered_dataframe), + 'prediction data points due to NaNs. These are protected from prediction with do_predict vector returned to strategy.') + + + return filtered_dataframe, labels + + def build_data_dictionary(self, train_df: DataFrame, test_df: DataFrame, + train_labels: DataFrame, test_labels: DataFrame, + train_weights: Any, test_weights: Any) -> Dict: + + self.data_dictionary = {'train_features': train_df, + 'test_features': test_df, + 'train_labels': train_labels, + 'test_labels': test_labels, + 'train_weights': train_weights, + 'test_weights': test_weights} + + return self.data_dictionary + + def standardize_data(self, data_dictionary: Dict) -> None: + ''' + Standardize all data in the data_dictionary according to the training dataset + :params: + :data_dictionary: dictionary containing the cleaned and split training/test data/labels + :returns: + :data_dictionary: updated dictionary with standardized values. + ''' + # standardize the data by training stats + train_mean = data_dictionary['train_features'].mean() + train_std = data_dictionary['train_features'].std() + data_dictionary['train_features'] = (data_dictionary['train_features'] - train_mean) / train_std + data_dictionary['test_features'] = (data_dictionary['test_features'] - train_mean) / train_std + + train_labels_std = data_dictionary['train_labels'].std() + train_labels_mean = data_dictionary['train_labels'].mean() + data_dictionary['train_labels'] = (data_dictionary['train_labels'] - train_labels_mean) / train_labels_std + data_dictionary['test_labels'] = (data_dictionary['test_labels'] - train_labels_mean) / train_labels_std + + for item in train_std.keys(): + self.data[item+'_std'] = train_std[item] + self.data[item+'_mean'] = train_mean[item] + + self.data['labels_std'] = train_labels_std + self.data['labels_mean'] = train_labels_mean + + return data_dictionary + + def standardize_data_from_metadata(self, df: DataFrame) -> DataFrame: + ''' + Standardizes a set of data using the mean and standard deviation from + the associated training data. + :params: + :df: Dataframe to be standardized + ''' + + for item in df.keys(): + df[item] = (df[item] - self.data[item+'_mean']) / self.data[item+'_std'] + + return df + + def split_timerange(self, tr: Dict, train_split: int=28, bt_split: int=7) -> list: + ''' + Function which takes a single time range (tr) and splits it + into sub timeranges to train and backtest on based on user input + tr: str, full timerange to train on + train_split: the period length for the each training (days). Specified in user + configuration file + bt_split: the backtesting length (dats). Specified in user configuration file + ''' + + train_period = train_split * SECONDS_IN_DAY + bt_period = bt_split * SECONDS_IN_DAY + + full_timerange = TimeRange.parse_timerange(tr) + timerange_train = copy.deepcopy(full_timerange) + timerange_backtest = copy.deepcopy(full_timerange) + + tr_training_list = [] + tr_backtesting_list = [] + first = True + while True: + if not first: timerange_train.startts = timerange_train.startts + bt_period + timerange_train.stopts = timerange_train.startts + train_period + + # if a full training period doesnt fit, we stop + if timerange_train.stopts > full_timerange.stopts: break + first = False + start = datetime.datetime.utcfromtimestamp(timerange_train.startts) + stop = datetime.datetime.utcfromtimestamp(timerange_train.stopts) + tr_training_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d")) + + ## associated backtest period + timerange_backtest.startts = timerange_train.stopts + timerange_backtest.stopts = timerange_backtest.startts + bt_period + start = datetime.datetime.utcfromtimestamp(timerange_backtest.startts) + stop = datetime.datetime.utcfromtimestamp(timerange_backtest.stopts) + tr_backtesting_list.append(start.strftime("%Y%m%d")+'-'+stop.strftime("%Y%m%d")) + + return tr_training_list, tr_backtesting_list + + def slice_dataframe(self, tr: str, df: DataFrame) -> DataFrame: + """ + Given a full dataframe, extract the user desired window + :params: + :tr: timerange string that we wish to extract from df + :df: Dataframe containing all candles to run the entire backtest. Here + it is sliced down to just the present training period. + """ + timerange = TimeRange.parse_timerange(tr) + start = datetime.datetime.fromtimestamp(timerange.startts, tz=datetime.timezone.utc) + stop = datetime.datetime.fromtimestamp(timerange.stopts, tz=datetime.timezone.utc) + df = df.loc[df['date'] >= start, :] + df = df.loc[df['date'] <= stop, :] + + return df + + def principal_component_analysis(self) -> None: + """ + Performs Principal Component Analysis on the data for dimensionality reduction + and outlier detection (see self.remove_outliers()) + No parameters or returns, it acts on the data_dictionary held by the DataHandler. + """ + + from sklearn.decomposition import PCA # avoid importing if we dont need it + + n_components = self.data_dictionary['train_features'].shape[1] + pca = PCA(n_components=n_components) + pca = pca.fit(self.data_dictionary['train_features']) + n_keep_components = np.argmin(pca.explained_variance_ratio_.cumsum() < 0.999) + pca2 = PCA(n_components=n_keep_components) + self.data['n_kept_components'] = n_keep_components + pca2 = pca2.fit(self.data_dictionary['train_features']) + print('reduced feature dimension by',n_components-n_keep_components) + print("explained variance",np.sum(pca2.explained_variance_ratio_)) + train_components = pca2.transform(self.data_dictionary['train_features']) + test_components = pca2.transform(self.data_dictionary['test_features']) + + self.data_dictionary['train_features'] = pd.DataFrame(data=train_components, + columns = ['PC'+str(i) for i in range(0,n_keep_components)], + index = self.data_dictionary['train_features'].index) + + self.data_dictionary['test_features'] = pd.DataFrame(data=test_components, + columns = ['PC'+str(i) for i in range(0,n_keep_components)], + index = self.data_dictionary['test_features'].index) + + self.data['n_kept_components'] = n_keep_components + self.pca = pca2 + if not os.path.exists(self.model_path): os.mkdir(self.model_path) + pk.dump(pca2, open(self.model_path + self.model_filename+"_pca_object.pkl","wb")) + + return None + + def compute_distances(self) -> float: + print('computing average mean distance for all training points') + pairwise = pairwise_distances(self.data_dictionary['train_features'],n_jobs=-1) + avg_mean_dist = pairwise.mean(axis=1).mean() + print('avg_mean_dist',avg_mean_dist) + + return avg_mean_dist + + def remove_outliers(self,predict: bool) -> None: + """ + Remove data that looks like an outlier based on the distribution of each + variable. + :params: + :predict: boolean which tells the function if this is prediction data or + training data coming in. + """ + + lower_quantile = self.data_dictionary['train_features'].quantile(0.001) + upper_quantile = self.data_dictionary['train_features'].quantile(0.999) + + if predict: + + df = self.data_dictionary['prediction_features'][(self.data_dictionary['prediction_features']lower_quantile)] + drop_index = pd.isnull(df).any(1) + self.data_dictionary['prediction_features'].fillna(0,inplace=True) + drop_index = ~drop_index + do_predict = np.array(drop_index.replace(True,1).replace(False,0)) + + print('remove_outliers() tossed',len(do_predict)-do_predict.sum(),'predictions because they were beyond 3 std deviations from training data.') + self.do_predict += do_predict + self.do_predict -= 1 + + else: + + filter_train_df = self.data_dictionary['train_features'][(self.data_dictionary['train_features']lower_quantile)] + drop_index = pd.isnull(filter_train_df).any(1) + self.data_dictionary['train_features'] = self.data_dictionary['train_features'][(drop_index==False)] + self.data_dictionary['train_labels'] = self.data_dictionary['train_labels'][(drop_index==False)] + self.data_dictionary['train_weights'] = self.data_dictionary['train_weights'][(drop_index==False)] + + # do the same for the test data + filter_test_df = self.data_dictionary['test_features'][(self.data_dictionary['test_features']lower_quantile)] + drop_index = pd.isnull(filter_test_df).any(1) + #pdb.set_trace() + self.data_dictionary['test_labels'] = self.data_dictionary['test_labels'][(drop_index==False)] + self.data_dictionary['test_features'] = self.data_dictionary['test_features'][(drop_index==False)] + self.data_dictionary['test_weights'] = self.data_dictionary['test_weights'][(drop_index==False)] + + return + + def build_feature_list(self, config: dict) -> int: + """ + Build the list of features that will be used to filter + the full dataframe. Feature list is construced from the + user configuration file. + :params: + :config: Canonical freqtrade config file containing all + user defined input in config['freqai] dictionary. + """ + features = [] + for tf in config['freqai']['timeframes']: + for ft in config['freqai']['base_features']: + for n in range(config['freqai']['feature_parameters']['shift']+1): + shift='' + if n>0: shift = '_shift-'+str(n) + features.append(ft+shift+'_'+tf) + for p in config['freqai']['corr_pairlist']: + features.append(p.split("/")[0]+'-'+ft+shift+'_'+tf) + + print('number of features',len(features)) + return features + + def check_if_pred_in_training_spaces(self) -> None: + """ + Compares the distance from each prediction point to each training data + point. It uses this information to estimate a Dissimilarity Index (DI) + and avoid making predictions on any points that are too far away + from the training data set. + """ + + print('checking if prediction features are in AOA') + distance = pairwise_distances(self.data_dictionary['train_features'], + self.data_dictionary['prediction_features'],n_jobs=-1) + + do_predict = np.where(distance.min(axis=0) / + self.data['avg_mean_dist'] < self.config['freqai']['feature_parameters']['DI_threshold'],1,0) + + print('Distance checker tossed',len(do_predict)-do_predict.sum(), + 'predictions for being too far from training data') + + self.do_predict += do_predict + self.do_predict -= 1 + + def set_weights_higher_recent(self, num_weights: int) -> int: + """ + Set weights so that recent data is more heavily weighted during + training than older data. + """ + weights = np.zeros(num_weights) + for i in range(1, len(weights)): + weights[len(weights) - i] = np.exp(-i/ + (self.config['freqai']['feature_parameters']['weight_factor']*num_weights)) + return weights + + def append_predictions(self, predictions, do_predict, len_dataframe): + """ + Append backtest prediction from current backtest period to all previous periods + """ + + ones = np.ones(len_dataframe) + s_mean, s_std = ones*self.data['s_mean'], ones*self.data['s_std'] + + self.predictions = np.append(self.predictions,predictions) + self.do_predict = np.append(self.do_predict,do_predict) + self.target_mean = np.append(self.target_mean,s_mean) + self.target_std = np.append(self.target_std,s_std) + + return + + def fill_predictions(self, len_dataframe): + """ + Back fill values to before the backtesting range so that the dataframe matches size + when it goes back to the strategy. These rows are not included in the backtest. + """ + + filler = np.zeros(len_dataframe -len(self.predictions)) # startup_candle_count + self.predictions = np.append(filler,self.predictions) + self.do_predict = np.append(filler,self.do_predict) + self.target_mean = np.append(filler,self.target_mean) + self.target_std = np.append(filler,self.target_std) + + return + + def np_encoder(self, object): + if isinstance(object, np.generic): + return object.item() diff --git a/freqtrade/freqai/freqai_interface.py b/freqtrade/freqai/freqai_interface.py new file mode 100644 index 000000000..eb15e7e49 --- /dev/null +++ b/freqtrade/freqai/freqai_interface.py @@ -0,0 +1,158 @@ + +import os +import numpy as np +import pandas as pd +from pandas import DataFrame +import shutil +import gc +from typing import Any, Dict, Optional, Tuple +from abc import ABC +from freqtrade.freqai.data_handler import DataHandler + +pd.options.mode.chained_assignment = None + +class IFreqaiModel(ABC): + """ + Class containing all tools for training and prediction in the strategy. + User models should inherit from this class as shown in + templates/ExamplePredictionModel.py where the user overrides + train(), predict(), fit(), and make_labels(). + """ + + def __init__(self, config: Dict[str, Any]) -> None: + + self.config = config + self.freqai_info = config['freqai'] + self.data_split_parameters = config['freqai']['data_split_parameters'] + self.model_training_parameters = config['freqai']['model_training_parameters'] + self.feature_parameters = config['freqai']['feature_parameters'] + self.full_path = (str(config['user_data_dir'])+ + "/models/"+self.freqai_info['full_timerange']+ + '-'+self.freqai_info['identifier']) + self.metadata = {} + self.data = {} + self.time_last_trained = None + self.current_time = None + self.model = None + self.predictions = None + + if not os.path.exists(self.full_path): + os.mkdir(self.full_path) + shutil.copy(self.config['config_files'][0],self.full_path+"/"+self.config['config_files'][0]) + + def start(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + """ + Entry point to the FreqaiModel, it will train a new model if + necesssary before making the prediction. + The backtesting and training paradigm is a sliding training window + with a following backtest window. Both windows slide according to the + length of the backtest window. This function is not intended to be + overridden by children of IFreqaiModel, but technically, it can be + if the user wishes to make deeper changes to the sliding window + logic. + :params: + :dataframe: Full dataframe coming from strategy - it contains entire + backtesting timerange + additional historical data necessary to train + the model. + :metadata: pair metadataa coming from strategy. + """ + self.pair = metadata['pair'] + self.dh = DataHandler(self.config, dataframe, self.data) + + print('going to train',len(self.dh.training_timeranges), + 'timeranges:',self.dh.training_timeranges) + predictions = np.array([]) + do_predict = np.array([]) + target_mean = np.array([]) + target_std = np.array([]) + + # Loop enforcing the sliding window training/backtesting paragigm + # tr_train is the training time range e.g. 1 historical month + # tr_backtest is the backtesting time range e.g. the week directly + # following tr_train. Both of these windows slide through the + # entire backtest + for tr_train, tr_backtest in zip(self.dh.training_timeranges, + self.dh.backtesting_timeranges): + gc.collect() + #self.config['timerange'] = tr_train + self.dh.data = {} # clean the pair specific data between models + self.freqai_info['training_timerange'] = tr_train + dataframe_train = self.dh.slice_dataframe(tr_train, dataframe) + dataframe_backtest = self.dh.slice_dataframe(tr_backtest, dataframe) + print("training",self.pair,"for",tr_train) + self.dh.model_path = self.full_path+"/"+ 'sub-train'+'-'+str(tr_train)+'/' + if not self.model_exists(self.pair, training_timerange=tr_train): + self.model = self.train(dataframe_train, metadata) + self.dh.save_data(self.model) + else: + self.model = self.dh.load_data(self.dh.model_path) + + preds, do_preds = self.predict(dataframe_backtest) + + self.dh.append_predictions(preds,do_preds,len(dataframe_backtest)) + + self.dh.fill_predictions(len(dataframe)) + + return self.dh.predictions, self.dh.do_predict, self.dh.target_mean, self.dh.target_std + + def make_labels(self, dataframe: DataFrame) -> DataFrame: + """ + User defines the labels here (target values). + :params: + :dataframe: the full dataframe for the present training period + """ + + return dataframe + + def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]: + """ + Filter the training data and train a model to it. Train makes heavy use of the datahandler + for storing, saving, loading, and managed. + :params: + :unfiltered_dataframe: Full dataframe for the current training period + :metadata: pair metadata from strategy. + :returns: + :model: Trained model which can be used to inference (self.predict) + """ + + return unfiltered_dataframe, unfiltered_dataframe + + def fit(self) -> Any: + """ + Most regressors use the same function names and arguments e.g. user + can drop in LGBMRegressor in place of CatBoostRegressor and all data + management will be properly handled by Freqai. + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + return None + + def predict(self) -> Optional[Tuple[DataFrame, DataFrame]]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :return: + :predictions: np.array of predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + return None + + def model_exists(self, pair: str, training_timerange: str = None) -> bool: + """ + Given a pair and path, check if a model already exists + :param pair: pair e.g. BTC/USD + :param path: path to model + """ + coin,_ = pair.split('/') + self.dh.model_filename = f"cb_"+coin.lower()+"_"+self.freqai_info['trained_stake']+"_"+training_timerange + file_exists = os.path.isfile(self.dh.model_path+ + self.dh.model_filename+"_model.joblib") + if file_exists: + print("Found model at", self.dh.model_path+self.dh.model_filename) + else: print("Could not find model at", + self.dh.model_path+self.dh.model_filename) + return file_exists diff --git a/freqtrade/freqai/strategy_bridge.py b/freqtrade/freqai/strategy_bridge.py new file mode 100644 index 000000000..c336e3c84 --- /dev/null +++ b/freqtrade/freqai/strategy_bridge.py @@ -0,0 +1,12 @@ +from freqtrade.resolvers.freqaimodel_resolver import FreqaiModelResolver + + +class CustomModel: + """ + A bridge between the user defined IFreqaiModel class + and the strategy. + """ + + def __init__(self,config): + + self.bridge = FreqaiModelResolver.load_freqaimodel(config) diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index 621812b0a..5051a8db0 100755 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -204,6 +204,12 @@ class Backtesting: """ self.progress.init_step(BacktestState.DATALOAD, 1) + if self.config['freqaimodel']: + self.required_startup += int((self.config['freqai']['train_period']*86400) / + timeframe_to_seconds(self.config['timeframe'])) + self.config['startup_candle_count'] = self.required_startup + + data = history.load_data( datadir=self.config['datadir'], pairs=self.pairlists.whitelist, diff --git a/freqtrade/resolvers/freqaimodel_resolver.py b/freqtrade/resolvers/freqaimodel_resolver.py new file mode 100644 index 000000000..9545afd24 --- /dev/null +++ b/freqtrade/resolvers/freqaimodel_resolver.py @@ -0,0 +1,45 @@ +# pragma pylint: disable=attribute-defined-outside-init + +""" +This module load a custom model for freqai +""" +import logging +from pathlib import Path +from typing import Dict + +from freqtrade.constants import USERPATH_FREQAIMODELS +from freqtrade.exceptions import OperationalException +from freqtrade.freqai.freqai_interface import IFreqaiModel +from freqtrade.resolvers import IResolver + +logger = logging.getLogger(__name__) + + +class FreqaiModelResolver(IResolver): + """ + This class contains all the logic to load custom hyperopt loss class + """ + object_type = IFreqaiModel + object_type_str = "FreqaiModel" + user_subdir = USERPATH_FREQAIMODELS + initial_search_path = Path(__file__).parent.parent.joinpath('optimize').resolve() + + @staticmethod + def load_freqaimodel(config: Dict) -> IFreqaiModel: + """ + Load the custom class from config parameter + :param config: configuration dictionary + """ + + freqaimodel_name = config.get('freqaimodel') + if not freqaimodel_name: + raise OperationalException( + "No freqaimodel set. Please use `--freqaimodel` to " + "specify the FreqaiModel class to use.\n" + ) + freqaimodel = FreqaiModelResolver.load_object(freqaimodel_name, + config, kwargs={'config': config}, + extra_dir=config.get('freqaimodel_path')) + + + return freqaimodel diff --git a/freqtrade/templates/ExamplePredictionModel.py b/freqtrade/templates/ExamplePredictionModel.py new file mode 100644 index 000000000..a5370b5ac --- /dev/null +++ b/freqtrade/templates/ExamplePredictionModel.py @@ -0,0 +1,139 @@ +import numpy as np +import pandas as pd +from catboost import CatBoostRegressor, Pool +from pandas import DataFrame +from typing import Any, Dict, Tuple +from freqtrade.freqai.freqai_interface import IFreqaiModel + +class ExamplePredictionModel(IFreqaiModel): + """ + User created prediction model. The class needs to override three necessary + functions, predict(), train(), fit(). The class inherits ModelHandler which + has its own DataHandler where data is held, saved, loaded, and managed. + """ + + def make_labels(self, dataframe: DataFrame) -> DataFrame: + """ + User defines the labels here (target values). + :params: + :dataframe: the full dataframe for the present training period + """ + + dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling( + self.feature_parameters['period']).max() / dataframe['close'] - 1) + self.dh.data['s_mean'] = dataframe['s'].mean() + self.dh.data['s_std'] = dataframe['s'].std() + + print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std']) + + return dataframe['s'] + + + def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]: + """ + Filter the training data and train a model to it. Train makes heavy use of the datahandler + for storing, saving, loading, and managed. + :params: + :unfiltered_dataframe: Full dataframe for the current training period + :metadata: pair metadata from strategy. + :returns: + :model: Trained model which can be used to inference (self.predict) + """ + print("--------------------Starting training--------------------") + + # create the full feature list based on user config info + self.dh.training_features_list = self.dh.build_feature_list(self.config) + unfiltered_labels = self.make_labels(unfiltered_dataframe) + + # filter the features requested by user in the configuration file and elegantly handle NaNs + features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe, + self.dh.training_features_list, unfiltered_labels, training_filter=True) + + # split data into train/test data. + data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered) + # standardize all data based on train_dataset only + data_dictionary = self.dh.standardize_data(data_dictionary) + + # optional additional data cleaning + if self.feature_parameters['principal_component_analysis']: + self.dh.principal_component_analysis() + if self.feature_parameters["remove_outliers"]: + self.dh.remove_outliers(predict=False) + if self.feature_parameters['DI_threshold']: + self.dh.data['avg_mean_dist'] = self.dh.compute_distances() + + print("length of train data", len(data_dictionary['train_features'])) + + model = self.fit(data_dictionary) + + print('Finished training') + print(f'--------------------done training {metadata["pair"]}--------------------') + + return model + + def fit(self, data_dictionary: Dict) -> Any: + """ + Most regressors use the same function names and arguments e.g. user + can drop in LGBMRegressor in place of CatBoostRegressor and all data + management will be properly handled by Freqai. + :params: + :data_dictionary: the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + train_data = Pool( + data=data_dictionary['train_features'], + label=data_dictionary['train_labels'], + weight=data_dictionary['train_weights'] + ) + + test_data = Pool( + data=data_dictionary['test_features'], + label=data_dictionary['test_labels'], + weight=data_dictionary['test_weights'] + ) + + model = CatBoostRegressor(verbose=100, early_stopping_rounds=400, + **self.model_training_parameters) + model.fit(X=train_data, eval_set=test_data) + + return model + + def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]: + """ + Filter the prediction features data and predict with it. + :param: unfiltered_dataframe: Full dataframe for the current backtest period. + :return: + :predictions: np.array of predictions + :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove + data (NaNs) or felt uncertain about data (PCA and DI index) + """ + + print("--------------------Starting prediction--------------------") + + original_feature_list = self.dh.build_feature_list(self.config) + filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False) + filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe) + self.dh.data_dictionary['prediction_features'] = filtered_dataframe + + # optional additional data cleaning + if self.feature_parameters['principal_component_analysis']: + pca_components = self.dh.pca.transform(filtered_dataframe) + self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components, + columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])], + index = filtered_dataframe.index) + + if self.feature_parameters["remove_outliers"]: + self.dh.remove_outliers(predict=True) # creates dropped index + + if self.feature_parameters['DI_threshold']: + self.dh.check_if_pred_in_training_spaces() # sets do_predict + + predictions = self.model.predict(self.dh.data_dictionary['prediction_features']) + + # compute the non-standardized predictions + predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean'] + + print("--------------------Finished prediction--------------------") + + return (predictions, self.dh.do_predict) diff --git a/freqtrade/templates/FreqaiExampleStrategy.py b/freqtrade/templates/FreqaiExampleStrategy.py new file mode 100644 index 000000000..d6b1295ec --- /dev/null +++ b/freqtrade/templates/FreqaiExampleStrategy.py @@ -0,0 +1,179 @@ +import logging +import talib.abstract as ta +from pandas import DataFrame +import pandas as pd +from technical import qtpylib +import numpy as np +from freqtrade.strategy import (merge_informative_pair) +from freqtrade.strategy.interface import IStrategy +from freqtrade.freqai.strategy_bridge import CustomModel +from functools import reduce +logger = logging.getLogger(__name__) + +class FreqaiExampleStrategy(IStrategy): + """ + Example strategy showing how the user connects their own + IFreqaiModel to the strategy. Namely, the user uses: + self.model = CustomModel(self.config) + self.model.bridge.start(dataframe, metadata) + + to make predictions on their data. populate_any_indicators() automatically + generates the variety of features indicated by the user in the + canonical freqtrade configuration file under config['freqai']. + """ + + minimal_roi = { + "0": 0.01, + "240": -1 + } + + plot_config = { + 'main_plot': { + }, + 'subplots': { + "prediction":{ + 'prediction':{'color':'blue'} + }, + "target_roi":{ + 'target_roi':{'color':'brown'}, + }, + "do_predict":{ + 'do_predict':{'color':'brown'}, + }, + } + } + + stoploss = -0.05 + use_sell_signal = True + startup_candle_count: int = 1000 + + + def informative_pairs(self): + pairs = self.freqai_info['corr_pairlist'] + informative_pairs = [] + for tf in self.timeframes: + informative_pairs.append([(pair, tf) for pair in pairs]) + return informative_pairs + + def populate_any_indicators(self, pair, df, tf, informative=None,coin=''): + """ + Function designed to automatically generate, name and merge features + from user indicated timeframes in the configuration file. User can add + additional features here, but must follow the naming convention. + :params: + :pair: pair to be used as informative + :df: strategy dataframe which will receive merges from informatives + :tf: timeframe of the dataframe which will modify the feature names + :informative: the dataframe associated with the informative pair + :coin: the name of the coin which will modify the feature names. + """ + if informative is None: + informative = self.dp.get_pair_dataframe(pair, tf) + + informative[coin+'rsi'] = ta.RSI(informative, timeperiod=14) + informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25) + informative[coin+'adx'] = ta.ADX(informative, window=20) + + informative[coin+'20sma'] = ta.SMA(informative,timeperiod=20) + informative[coin+'21ema'] = ta.EMA(informative,timeperiod=21) + informative[coin+'bmsb'] = np.where(informative[coin+'20sma'].lt(informative[coin+'21ema']),1,0) + informative[coin+'close_over_20sma'] = informative['close']/informative[coin+'20sma'] + + informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25) + + informative[coin+'ema21'] = ta.EMA(informative, timeperiod=21) + informative[coin+'sma20'] = ta.SMA(informative, timeperiod=20) + stoch = ta.STOCHRSI(informative, 15, 20, 2, 2) + informative[coin+'srsi-fk'] = stoch['fastk'] + informative[coin+'srsi-fd'] = stoch['fastd'] + + bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2) + informative[coin+'bb_lowerband'] = bollinger['lower'] + informative[coin+'bb_middleband'] = bollinger['mid'] + informative[coin+'bb_upperband'] = bollinger['upper'] + informative[coin+'bb_width'] = ((informative[coin+"bb_upperband"] - informative[coin+"bb_lowerband"]) / informative[coin+"bb_middleband"]) + informative[coin+'close-bb_lower'] = informative['close'] / informative[coin+'bb_lowerband'] + + informative[coin+'roc'] = ta.ROC(informative, timeperiod=3) + informative[coin+'adx'] = ta.ADX(informative, window=14) + + macd = ta.MACD(informative) + informative[coin+'macd'] = macd['macd'] + informative[coin+'pct-change'] = informative['close'].pct_change() + informative[coin+'relative_volume'] = informative['volume'] / informative['volume'].rolling(10).mean() + + informative[coin+'pct-change'] = informative['close'].pct_change() + + indicators = [col for col in informative if col.startswith(coin)] + + for n in range(self.freqai_info['feature_parameters']['shift']+1): + if n==0: continue + informative_shift = informative[indicators].shift(n) + informative_shift = informative_shift.add_suffix('_shift-'+str(n)) + informative = pd.concat((informative,informative_shift),axis=1) + + df = merge_informative_pair(df, informative, self.config['timeframe'], tf, ffill=True) + skip_columns = [(s + '_'+tf) for s in + ['date', 'open', 'high', 'low', 'close', 'volume']] + df = df.drop(columns=skip_columns) + + return df + + + def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + # the configuration file parameters are stored here + self.freqai_info = self.config['freqai'] + + # the model is instantiated here + self.model = CustomModel(self.config) + + print('Populating indicators...') + + # the following loops are necessary for building the features + # indicated by the user in the configuration file. + for tf in self.freqai_info['timeframes']: + dataframe = self.populate_any_indicators(metadata['pair'], + dataframe.copy(), tf) + for i in self.freqai_info['corr_pairlist']: + dataframe = self.populate_any_indicators(i, + dataframe.copy(), tf, coin=i.split("/")[0]+'-') + + # the model will return 4 values, its prediction, an indication of whether or not the prediction + # should be accepted, the target mean/std values from the labels used during each training period. + (dataframe['prediction'], dataframe['do_predict'], + dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata) + + dataframe['target_roi'] = dataframe['target_mean']+dataframe['target_std']*0.5 + dataframe['sell_roi'] = dataframe['target_mean']-dataframe['target_std']*1.5 + return dataframe + + + def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + + buy_conditions = [ + (dataframe['prediction'] > dataframe['target_roi']) + & + (dataframe['do_predict'] == 1) + ] + + if buy_conditions: + dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), 'buy'] = 1 + + return dataframe + + + def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame: + # sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value') + sell_conditions = [ + (dataframe['prediction'] < dataframe['sell_roi']) + & + (dataframe['do_predict'] == 1) + ] + if sell_conditions: + dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), 'sell'] = 1 + + return dataframe + + def get_ticker_indicator(self): + return int(self.config['timeframe'][:-1]) diff --git a/mkdocs.yml b/mkdocs.yml index a43322f78..64d78363d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -36,6 +36,7 @@ nav: - Advanced Strategy: strategy-advanced.md - Advanced Hyperopt: advanced-hyperopt.md - Sandbox Testing: sandbox-testing.md + - Freqai: freqai.md - FAQ: faq.md - SQL Cheat-sheet: sql_cheatsheet.md - Strategy migration: strategy_migration.md diff --git a/requirements-freqai.txt b/requirements-freqai.txt new file mode 100644 index 000000000..f84d3df07 --- /dev/null +++ b/requirements-freqai.txt @@ -0,0 +1,8 @@ +# Include all requirements to run the bot. +-r requirements.txt + +# Required for freqai +scikit-learn==1.0.2 +scikit-optimize==0.9.0 +joblib==1.1.0 +catboost==1.0.4