add freqao backend machinery, user interface, documentation

This commit is contained in:
robcaulk
2022-05-03 10:14:17 +02:00
parent ebab02fce3
commit fc837c4daa
19 changed files with 1405 additions and 3 deletions

View File

@@ -0,0 +1,139 @@
import numpy as np
import pandas as pd
from catboost import CatBoostRegressor, Pool
from pandas import DataFrame
from typing import Any, Dict, Tuple
from freqtrade.freqai.freqai_interface import IFreqaiModel
class ExamplePredictionModel(IFreqaiModel):
"""
User created prediction model. The class needs to override three necessary
functions, predict(), train(), fit(). The class inherits ModelHandler which
has its own DataHandler where data is held, saved, loaded, and managed.
"""
def make_labels(self, dataframe: DataFrame) -> DataFrame:
"""
User defines the labels here (target values).
:params:
:dataframe: the full dataframe for the present training period
"""
dataframe['s'] = (dataframe['close'].shift(-self.feature_parameters['period']).rolling(
self.feature_parameters['period']).max() / dataframe['close'] - 1)
self.dh.data['s_mean'] = dataframe['s'].mean()
self.dh.data['s_std'] = dataframe['s'].std()
print('label mean',self.dh.data['s_mean'],'label std',self.dh.data['s_std'])
return dataframe['s']
def train(self, unfiltered_dataframe: DataFrame, metadata: dict) -> Tuple[DataFrame, DataFrame]:
"""
Filter the training data and train a model to it. Train makes heavy use of the datahandler
for storing, saving, loading, and managed.
:params:
:unfiltered_dataframe: Full dataframe for the current training period
:metadata: pair metadata from strategy.
:returns:
:model: Trained model which can be used to inference (self.predict)
"""
print("--------------------Starting training--------------------")
# create the full feature list based on user config info
self.dh.training_features_list = self.dh.build_feature_list(self.config)
unfiltered_labels = self.make_labels(unfiltered_dataframe)
# filter the features requested by user in the configuration file and elegantly handle NaNs
features_filtered, labels_filtered = self.dh.filter_features(unfiltered_dataframe,
self.dh.training_features_list, unfiltered_labels, training_filter=True)
# split data into train/test data.
data_dictionary = self.dh.make_train_test_datasets(features_filtered, labels_filtered)
# standardize all data based on train_dataset only
data_dictionary = self.dh.standardize_data(data_dictionary)
# optional additional data cleaning
if self.feature_parameters['principal_component_analysis']:
self.dh.principal_component_analysis()
if self.feature_parameters["remove_outliers"]:
self.dh.remove_outliers(predict=False)
if self.feature_parameters['DI_threshold']:
self.dh.data['avg_mean_dist'] = self.dh.compute_distances()
print("length of train data", len(data_dictionary['train_features']))
model = self.fit(data_dictionary)
print('Finished training')
print(f'--------------------done training {metadata["pair"]}--------------------')
return model
def fit(self, data_dictionary: Dict) -> Any:
"""
Most regressors use the same function names and arguments e.g. user
can drop in LGBMRegressor in place of CatBoostRegressor and all data
management will be properly handled by Freqai.
:params:
:data_dictionary: the dictionary constructed by DataHandler to hold
all the training and test data/labels.
"""
train_data = Pool(
data=data_dictionary['train_features'],
label=data_dictionary['train_labels'],
weight=data_dictionary['train_weights']
)
test_data = Pool(
data=data_dictionary['test_features'],
label=data_dictionary['test_labels'],
weight=data_dictionary['test_weights']
)
model = CatBoostRegressor(verbose=100, early_stopping_rounds=400,
**self.model_training_parameters)
model.fit(X=train_data, eval_set=test_data)
return model
def predict(self, unfiltered_dataframe: DataFrame) -> Tuple[DataFrame, DataFrame]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.
:return:
:predictions: np.array of predictions
:do_predict: np.array of 1s and 0s to indicate places where freqai needed to remove
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
print("--------------------Starting prediction--------------------")
original_feature_list = self.dh.build_feature_list(self.config)
filtered_dataframe, _ = self.dh.filter_features(unfiltered_dataframe, original_feature_list, training_filter=False)
filtered_dataframe = self.dh.standardize_data_from_metadata(filtered_dataframe)
self.dh.data_dictionary['prediction_features'] = filtered_dataframe
# optional additional data cleaning
if self.feature_parameters['principal_component_analysis']:
pca_components = self.dh.pca.transform(filtered_dataframe)
self.dh.data_dictionary['prediction_features'] = pd.DataFrame(data=pca_components,
columns = ['PC'+str(i) for i in range(0,self.dh.data['n_kept_components'])],
index = filtered_dataframe.index)
if self.feature_parameters["remove_outliers"]:
self.dh.remove_outliers(predict=True) # creates dropped index
if self.feature_parameters['DI_threshold']:
self.dh.check_if_pred_in_training_spaces() # sets do_predict
predictions = self.model.predict(self.dh.data_dictionary['prediction_features'])
# compute the non-standardized predictions
predictions = predictions * self.dh.data['labels_std'] + self.dh.data['labels_mean']
print("--------------------Finished prediction--------------------")
return (predictions, self.dh.do_predict)

View File

@@ -0,0 +1,179 @@
import logging
import talib.abstract as ta
from pandas import DataFrame
import pandas as pd
from technical import qtpylib
import numpy as np
from freqtrade.strategy import (merge_informative_pair)
from freqtrade.strategy.interface import IStrategy
from freqtrade.freqai.strategy_bridge import CustomModel
from functools import reduce
logger = logging.getLogger(__name__)
class FreqaiExampleStrategy(IStrategy):
"""
Example strategy showing how the user connects their own
IFreqaiModel to the strategy. Namely, the user uses:
self.model = CustomModel(self.config)
self.model.bridge.start(dataframe, metadata)
to make predictions on their data. populate_any_indicators() automatically
generates the variety of features indicated by the user in the
canonical freqtrade configuration file under config['freqai'].
"""
minimal_roi = {
"0": 0.01,
"240": -1
}
plot_config = {
'main_plot': {
},
'subplots': {
"prediction":{
'prediction':{'color':'blue'}
},
"target_roi":{
'target_roi':{'color':'brown'},
},
"do_predict":{
'do_predict':{'color':'brown'},
},
}
}
stoploss = -0.05
use_sell_signal = True
startup_candle_count: int = 1000
def informative_pairs(self):
pairs = self.freqai_info['corr_pairlist']
informative_pairs = []
for tf in self.timeframes:
informative_pairs.append([(pair, tf) for pair in pairs])
return informative_pairs
def populate_any_indicators(self, pair, df, tf, informative=None,coin=''):
"""
Function designed to automatically generate, name and merge features
from user indicated timeframes in the configuration file. User can add
additional features here, but must follow the naming convention.
:params:
:pair: pair to be used as informative
:df: strategy dataframe which will receive merges from informatives
:tf: timeframe of the dataframe which will modify the feature names
:informative: the dataframe associated with the informative pair
:coin: the name of the coin which will modify the feature names.
"""
if informative is None:
informative = self.dp.get_pair_dataframe(pair, tf)
informative[coin+'rsi'] = ta.RSI(informative, timeperiod=14)
informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
informative[coin+'adx'] = ta.ADX(informative, window=20)
informative[coin+'20sma'] = ta.SMA(informative,timeperiod=20)
informative[coin+'21ema'] = ta.EMA(informative,timeperiod=21)
informative[coin+'bmsb'] = np.where(informative[coin+'20sma'].lt(informative[coin+'21ema']),1,0)
informative[coin+'close_over_20sma'] = informative['close']/informative[coin+'20sma']
informative[coin+'mfi'] = ta.MFI(informative, timeperiod=25)
informative[coin+'ema21'] = ta.EMA(informative, timeperiod=21)
informative[coin+'sma20'] = ta.SMA(informative, timeperiod=20)
stoch = ta.STOCHRSI(informative, 15, 20, 2, 2)
informative[coin+'srsi-fk'] = stoch['fastk']
informative[coin+'srsi-fd'] = stoch['fastd']
bollinger = qtpylib.bollinger_bands(qtpylib.typical_price(informative), window=14, stds=2.2)
informative[coin+'bb_lowerband'] = bollinger['lower']
informative[coin+'bb_middleband'] = bollinger['mid']
informative[coin+'bb_upperband'] = bollinger['upper']
informative[coin+'bb_width'] = ((informative[coin+"bb_upperband"] - informative[coin+"bb_lowerband"]) / informative[coin+"bb_middleband"])
informative[coin+'close-bb_lower'] = informative['close'] / informative[coin+'bb_lowerband']
informative[coin+'roc'] = ta.ROC(informative, timeperiod=3)
informative[coin+'adx'] = ta.ADX(informative, window=14)
macd = ta.MACD(informative)
informative[coin+'macd'] = macd['macd']
informative[coin+'pct-change'] = informative['close'].pct_change()
informative[coin+'relative_volume'] = informative['volume'] / informative['volume'].rolling(10).mean()
informative[coin+'pct-change'] = informative['close'].pct_change()
indicators = [col for col in informative if col.startswith(coin)]
for n in range(self.freqai_info['feature_parameters']['shift']+1):
if n==0: continue
informative_shift = informative[indicators].shift(n)
informative_shift = informative_shift.add_suffix('_shift-'+str(n))
informative = pd.concat((informative,informative_shift),axis=1)
df = merge_informative_pair(df, informative, self.config['timeframe'], tf, ffill=True)
skip_columns = [(s + '_'+tf) for s in
['date', 'open', 'high', 'low', 'close', 'volume']]
df = df.drop(columns=skip_columns)
return df
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
# the configuration file parameters are stored here
self.freqai_info = self.config['freqai']
# the model is instantiated here
self.model = CustomModel(self.config)
print('Populating indicators...')
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
for tf in self.freqai_info['timeframes']:
dataframe = self.populate_any_indicators(metadata['pair'],
dataframe.copy(), tf)
for i in self.freqai_info['corr_pairlist']:
dataframe = self.populate_any_indicators(i,
dataframe.copy(), tf, coin=i.split("/")[0]+'-')
# the model will return 4 values, its prediction, an indication of whether or not the prediction
# should be accepted, the target mean/std values from the labels used during each training period.
(dataframe['prediction'], dataframe['do_predict'],
dataframe['target_mean'], dataframe['target_std']) = self.model.bridge.start(dataframe, metadata)
dataframe['target_roi'] = dataframe['target_mean']+dataframe['target_std']*0.5
dataframe['sell_roi'] = dataframe['target_mean']-dataframe['target_std']*1.5
return dataframe
def populate_buy_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
buy_conditions = [
(dataframe['prediction'] > dataframe['target_roi'])
&
(dataframe['do_predict'] == 1)
]
if buy_conditions:
dataframe.loc[reduce(lambda x, y: x | y, buy_conditions), 'buy'] = 1
return dataframe
def populate_sell_trend(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
# sell_goal = eval('self.'+metadata['pair'].split("/")[0]+'_sell_goal.value')
sell_conditions = [
(dataframe['prediction'] < dataframe['sell_roi'])
&
(dataframe['do_predict'] == 1)
]
if sell_conditions:
dataframe.loc[reduce(lambda x, y: x | y, sell_conditions), 'sell'] = 1
return dataframe
def get_ticker_indicator(self):
return int(self.config['timeframe'][:-1])