add freqao backend machinery, user interface, documentation

2022-05-03 10:14:17 +02:00
parent ebab02fce3
commit fc837c4daa
19 changed files with 1405 additions and 3 deletions
--- a/freqtrade/templates/ExamplePredictionModel.py
+++ b/freqtrade/templates/ExamplePredictionModel.py
@@ -0,0 +1,139 @@
+import numpy as np
+import pandas as pd
+from catboost import CatBoostRegressor, Pool
+from pandas import DataFrame
+from typing import Any, Dict, Tuple
+from freqtrade.freqai.freqai_interface import IFreqaiModel
+
+class ExamplePredictionModel(IFreqaiModel):
+    """
+    User created prediction model. The class needs to override three necessary
+    functions, predict(), train(), fit(). The class inherits ModelHandler which
+    has its own DataHandler where data is held, saved, loaded, and managed. 
+    """
+
+    def make_labels(self, dataframe: DataFrame) -> DataFrame:
+        """
+        User defines the labels here (target values).
+        :params:
+        :dataframe: the full dataframe for the present training period
+        """
+
+        dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling(
+            self.feature_parameters['period']).max() / dataframe['close'] - 1)
+        self.dh.data['s_mean'] = dataframe['s'].mean()
+        self.dh.data['s_std'] = dataframe['s'].std()
+
+        print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std'])
+
+        return dataframe['s']
+
+
+    def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the training data and train a model to it. Train makes heavy use of the datahandler
+        for storing, saving, loading, and managed.
+        :params:
+        :unfiltered_dataframe: Full dataframe for the current training period
+        :metadata: pair metadata from strategy. 
+        :returns:
+        :model: Trained model which can be used to inference (self.predict)
+        """
+        print("--------------------Starting training--------------------")
+
+        # create the full feature list based on user config info
+        self.dh.training_features_list = self.dh.build_feature_list(self.config)
+        unfiltered_labels = self.make_labels(unfiltered_dataframe)
+
+        # filter the features requested by user in the configuration file and elegantly handle NaNs
+        features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe, 
+                            self.dh.training_features_list, unfiltered_labels, training_filter=True)
+
+        # split data into train/test data.
+        data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
+        # standardize all data based on train_dataset only
+        data_dictionary = self.dh.standardize_data(data_dictionary)
+
+        # optional additional data cleaning
+        if self.feature_parameters['principal_component_analysis']:
+            self.dh.principal_component_analysis()
+        if self.feature_parameters["remove_outliers"]:
+            self.dh.remove_outliers(predict=False)
+        if self.feature_parameters['DI_threshold']:
+            self.dh.data['avg_mean_dist'] = self.dh.compute_distances()
+
+        print("length of train data", len(data_dictionary['train_features']))
+
+        model = self.fit(data_dictionary)
+
+        print('Finished training')
+        print(f'--------------------done training {metadata["pair"]}--------------------')
+
+        return model
+
+    def fit(self, data_dictionary: Dict) -> Any:
+        """
+        Most regressors use the same function names and arguments e.g. user 
+        can drop in LGBMRegressor in place of CatBoostRegressor and all data
+        management will be properly handled by Freqai.
+        :params:
+        :data_dictionary: the dictionary constructed by DataHandler to hold 
+        all the training and test data/labels.
+        """
+
+        train_data = Pool(
+            data=data_dictionary['train_features'],
+            label=data_dictionary['train_labels'],
+            weight=data_dictionary['train_weights']
+        )
+
+        test_data = Pool(
+            data=data_dictionary['test_features'],
+            label=data_dictionary['test_labels'],
+            weight=data_dictionary['test_weights']
+        )
+
+        model = CatBoostRegressor(verbose=100, early_stopping_rounds=400,
+                            **self.model_training_parameters)
+        model.fit(X=train_data, eval_set=test_data)
+
+        return model
+
+    def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]:
+        """
+        Filter the prediction features data and predict with it.
+        :param: unfiltered_dataframe: Full dataframe for the current backtest period.
+        :return: 
+        :predictions: np.array of predictions
+        :do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
+        data (NaNs) or felt uncertain about data (PCA and DI index)
+        """
+
+        print("--------------------Starting prediction--------------------")
+
+        original_feature_list = self.dh.build_feature_list(self.config)
+        filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False)
+        filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
+        self.dh.data_dictionary['prediction_features'] = filtered_dataframe
+
+        # optional additional data cleaning 
+        if self.feature_parameters['principal_component_analysis']:
+            pca_components = self.dh.pca.transform(filtered_dataframe)
+            self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components,
+                              columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])],
+                              index = filtered_dataframe.index)
+        
+        if self.feature_parameters["remove_outliers"]:
+            self.dh.remove_outliers(predict=True) # creates dropped index
+
+        if self.feature_parameters['DI_threshold']:
+            self.dh.check_if_pred_in_training_spaces() # sets do_predict
+
+        predictions = self.model.predict(self.dh.data_dictionary['prediction_features'])
+
+        # compute the non-standardized predictions
+        predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean']
+
+        print("--------------------Finished prediction--------------------")
+
+        return (predictions, self.dh.do_predict)
--- a/freqtrade/templates/FreqaiExampleStrategy.py
+++ b/freqtrade/templates/FreqaiExampleStrategy.py
@@ -0,0 +1,179 @@
+import logging
+import talib.abstract as ta
+from pandas import DataFrame
+import pandas as pd
+from technical import qtpylib
+import numpy as np
+from freqtrade.strategy import (merge_informative_pair)
+from freqtrade.strategy.interface import IStrategy
+from freqtrade.freqai.strategy_bridge import CustomModel
+from functools import reduce
+logger = logging.getLogger(__name__)
+
+class FreqaiExampleStrategy(IStrategy):
+    """
+    Example strategy showing how the user connects their own 
+    IFreqaiModel to the strategy. Namely, the user uses:
+    self.model = CustomModel(self.config)
+    self.model.bridge.start(dataframe, metadata)
+
+    to make predictions on their data. populate_any_indicators() automatically 
+    generates the variety of features indicated by the user in the
+    canonical freqtrade configuration file under config['freqai'].
+    """
+
+    minimal_roi = {
+          "0": 0.01,
+          "240": -1
+     }
+
+    plot_config = {
+        'main_plot': {
+        },
+        'subplots': {
+            "prediction":{
+                'prediction':{'color':'blue'}
+            },
+            "target_roi":{
+                'target_roi':{'color':'brown'},
+            },
+            "do_predict":{
+                'do_predict':{'color':'brown'},
+            },
+        }
+    }
+
+    stoploss = -0.05
+    use_sell_signal = True
+    startup_candle_count: int = 1000 
+
+
+    def informative_pairs(self):
+        pairs = self.freqai_info['corr_pairlist'] 
+        informative_pairs = []
+        for tf in self.timeframes:
+            informative_pairs.append([(pair, tf) for pair in pairs])
+        return informative_pairs
+
+    def populate_any_indicators(self, pair, df, tf, informative=None,coin=''):
+        """
+        Function designed to automatically generate, name and merge features
+        from user indicated timeframes in the configuration file. User can add
+        additional features here, but must follow the naming convention.
+        :params:
+        :pair: pair to be used as informative
+        :df: strategy dataframe which will receive merges from informatives
+        :tf: timeframe of the dataframe which will modify the feature names
+        :informative: the dataframe associated with the informative pair
+        :coin: the name of the coin which will modify the feature names.
+        """
+        if informative is None:
+            informative = self.dp.get_pair_dataframe(pair, tf)
+
+        informative[coin+'rsi'] = ta.RSI(informative, timeperiod=14)
+        informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
+        informative[coin+'adx'] = ta.ADX(informative, window=20)
+
+        informative[coin+'20sma'] = ta.SMA(informative,timeperiod=20)
+        informative[coin+'21ema'] = ta.EMA(informative,timeperiod=21)
+        informative[coin+'bmsb'] = np.where(informative[coin+'20sma'].lt(informative[coin+'21ema']),1,0)
+        informative[coin+'close_over_20sma'] = informative['close']/informative[coin+'20sma']
+
+        informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
+
+        informative[coin+'ema21'] = ta.EMA(informative, timeperiod=21)
+        informative[coin+'sma20'] = ta.SMA(informative, timeperiod=20)
+        stoch = ta.STOCHRSI(informative, 15, 20, 2, 2)
+        informative[coin+'srsi-fk'] = stoch['fastk']
+        informative[coin+'srsi-fd'] = stoch['fastd']
+
+        bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
+        informative[coin+'bb_lowerband'] = bollinger['lower']
+        informative[coin+'bb_middleband'] = bollinger['mid']
+        informative[coin+'bb_upperband'] = bollinger['upper']
+        informative[coin+'bb_width'] = ((informative[coin+"bb_upperband"] - informative[coin+"bb_lowerband"]) / informative[coin+"bb_middleband"])
+        informative[coin+'close-bb_lower'] = informative['close'] / informative[coin+'bb_lowerband']
+
+        informative[coin+'roc'] = ta.ROC(informative, timeperiod=3)
+        informative[coin+'adx'] = ta.ADX(informative, window=14)
+
+        macd = ta.MACD(informative)
+        informative[coin+'macd'] = macd['macd']
+        informative[coin+'pct-change'] = informative['close'].pct_change()
+        informative[coin+'relative_volume'] = informative['volume'] / informative['volume'].rolling(10).mean()
+
+        informative[coin+'pct-change'] = informative['close'].pct_change()
+
+        indicators = [col for col in informative if col.startswith(coin)]
+
+        for n in range(self.freqai_info['feature_parameters']['shift']+1):
+            if n==0: continue
+            informative_shift = informative[indicators].shift(n)
+            informative_shift = informative_shift.add_suffix('_shift-'+str(n))
+            informative = pd.concat((informative,informative_shift),axis=1)
+
+        df = merge_informative_pair(df, informative, self.config['timeframe'], tf, ffill=True)
+        skip_columns = [(s + '_'+tf) for s in
+                        ['date', 'open', 'high', 'low', 'close', 'volume']]
+        df = df.drop(columns=skip_columns)
+
+        return df
+
+
+    def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        # the configuration file parameters are stored here
+        self.freqai_info = self.config['freqai']
+
+        # the model is instantiated here
+        self.model = CustomModel(self.config)
+
+        print('Populating indicators...')
+
+        # the following loops are necessary for building the features 
+        # indicated by the user in the configuration file.
+        for tf in self.freqai_info['timeframes']:
+            dataframe = self.populate_any_indicators(metadata['pair'],
+                                                        dataframe.copy(), tf)
+            for i in self.freqai_info['corr_pairlist']:
+                dataframe = self.populate_any_indicators(i,
+                            dataframe.copy(), tf, coin=i.split("/")[0]+'-')
+
+        # the model will return 4 values, its prediction, an indication of whether or not the prediction 
+        # should be accepted, the target mean/std values from the labels used during each training period.
+        (dataframe['prediction'], dataframe['do_predict'], 
+            dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
+
+        dataframe['target_roi'] = dataframe['target_mean']+dataframe['target_std']*0.5
+        dataframe['sell_roi'] = dataframe['target_mean']-dataframe['target_std']*1.5
+        return dataframe
+
+
+    def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+
+        buy_conditions = [
+                    (dataframe['prediction'] > dataframe['target_roi'])
+                    &
+                    (dataframe['do_predict'] == 1)
+        ]
+
+        if buy_conditions:
+            dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), 'buy'] = 1
+
+        return dataframe
+
+
+    def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
+       # sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
+        sell_conditions = [
+                    (dataframe['prediction'] < dataframe['sell_roi'])
+                    &
+                    (dataframe['do_predict'] == 1)
+        ]
+        if sell_conditions:
+            dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), 'sell'] = 1
+
+        return dataframe
+
+    def get_ticker_indicator(self):
+        return int(self.config['timeframe'][:-1])