add self-retraining functionality for live/dry

This commit is contained in:
robcaulk 2022-05-09 15:25:00 +02:00
parent 178c2014b0
commit 22bd5556ed
7 changed files with 218 additions and 44 deletions

View File

@ -13,7 +13,7 @@
"exit": 30
},
"exchange": {
"name": "ftx",
"name": "binance",
"key": "",
"secret": "",
"ccxt_config": {
@ -55,7 +55,9 @@
],
"train_period": 30,
"backtest_period": 7,
"identifier": "example",
"identifier": "livetest5",
"live_trained_timerange": "20220330-20220429",
"live_full_backtestrange": "20220302-20220501",
"base_features": [
"rsi",
"close_over_20sma",
@ -68,6 +70,7 @@
"macd"
],
"corr_pairlist": [
"BTC/USDT",
"ETH/USDT",
"LINK/USDT",
"DOT/USDT"

View File

@ -440,6 +440,8 @@ CONF_SCHEMA = {
"train_period": {"type": "integer", "default": 0},
"backtest_period": {"type": "integer", "default": 7},
"identifier": {"type": "str", "default": "example"},
"live_trained_timerange": {"type": "str"},
"live_full_backtestrange": {"type": "str"},
"base_features": {"type": "list"},
"corr_pairlist": {"type": "list"},
"feature_parameters": {

View File

@ -16,6 +16,10 @@ from sklearn.metrics.pairwise import pairwise_distances
from sklearn.model_selection import train_test_split
from freqtrade.configuration import TimeRange
from freqtrade.data.history import load_pair_history
from freqtrade.data.history.history_utils import refresh_backtest_ohlcv_data
from freqtrade.resolvers import ExchangeResolver
from freqtrade.strategy.interface import IStrategy
SECONDS_IN_DAY = 86400
@ -30,7 +34,7 @@ class FreqaiDataKitchen:
author: Robert Caulk, rob.caulk@gmail.com
"""
def __init__(self, config: Dict[str, Any], dataframe: DataFrame):
def __init__(self, config: Dict[str, Any], dataframe: DataFrame, live: bool = False):
self.full_dataframe = dataframe
self.data: Dict[Any, Any] = {}
self.data_dictionary: Dict[Any, Any] = {}
@ -45,17 +49,29 @@ class FreqaiDataKitchen:
self.full_target_mean: npt.ArrayLike = np.array([])
self.full_target_std: npt.ArrayLike = np.array([])
self.model_path = Path()
self.model_filename = ""
self.model_filename: str = ""
self.full_timerange = self.create_fulltimerange(
self.config["timerange"], self.freqai_config["train_period"]
)
if not live:
self.full_timerange = self.create_fulltimerange(self.config["timerange"],
self.freqai_config["train_period"]
)
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
self.full_timerange,
config["freqai"]["train_period"],
config["freqai"]["backtest_period"],
)
(self.training_timeranges, self.backtesting_timeranges) = self.split_timerange(
self.full_timerange,
config["freqai"]["train_period"],
config["freqai"]["backtest_period"],
)
def set_paths(self) -> None:
self.full_path = Path(self.config['user_data_dir'] /
"models" /
str(self.freqai_config['live_full_backtestrange'] +
self.freqai_config['identifier']))
self.model_path = Path(self.full_path / str("sub-train" + "-" +
str(self.freqai_config['live_trained_timerange'])))
return
def save_data(self, model: Any) -> None:
"""
@ -187,10 +203,10 @@ class FreqaiDataKitchen:
labels = labels[
(drop_index == 0) & (drop_index_labels == 0)
] # assuming the labels depend entirely on the dataframe here.
logger.info(
"dropped %s training points due to NaNs, ensure all historical data downloaded",
len(unfiltered_dataframe) - len(filtered_dataframe),
)
# logger.info(
# "dropped %s training points due to NaNs, ensure all historical data downloaded",
# len(unfiltered_dataframe) - len(filtered_dataframe),
# )
self.data["filter_drop_index_training"] = drop_index
else:
@ -485,11 +501,11 @@ class FreqaiDataKitchen:
shift = ""
if n > 0:
shift = "_shift-" + str(n)
features.append(ft + shift + "_" + tf)
# features.append(ft + shift + "_" + tf)
for p in config["freqai"]["corr_pairlist"]:
features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
logger.info("number of features %s", len(features))
# logger.info("number of features %s", len(features))
return features
def check_if_pred_in_training_spaces(self) -> None:
@ -513,10 +529,10 @@ class FreqaiDataKitchen:
0,
)
logger.info(
"Distance checker tossed %s predictions for being too far from training data",
len(do_predict) - do_predict.sum(),
)
# logger.info(
# "Distance checker tossed %s predictions for being too far from training data",
# len(do_predict) - do_predict.sum(),
# )
self.do_predict += do_predict
self.do_predict -= 1
@ -577,15 +593,105 @@ class FreqaiDataKitchen:
/ str(full_timerange + self.freqai_config["identifier"])
)
config_path = Path(self.config["config_files"][0])
if not self.full_path.is_dir():
self.full_path.mkdir(parents=True, exist_ok=True)
shutil.copy(
Path(self.config["config_files"][0]).name,
Path(self.full_path / self.config["config_files"][0]),
config_path.name,
Path(self.full_path / config_path.parts[-1]),
)
return full_timerange
def check_if_new_training_required(self, training_timerange: str,
metadata: dict) -> Tuple[bool, str]:
time = datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
trained_timerange = TimeRange.parse_timerange(training_timerange)
elapsed_time = (time - trained_timerange.stopts) / SECONDS_IN_DAY
trained_timerange.startts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
trained_timerange.stopts += self.freqai_config['backtest_period'] * SECONDS_IN_DAY
start = datetime.datetime.utcfromtimestamp(trained_timerange.startts)
stop = datetime.datetime.utcfromtimestamp(trained_timerange.stopts)
new_trained_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
retrain = elapsed_time > self.freqai_config['backtest_period']
if retrain:
coin, _ = metadata['pair'].split("/")
# set the new model_path
self.model_path = Path(self.full_path / str("sub-train" + "-" +
str(new_trained_timerange)))
self.model_filename = "cb_" + coin.lower() + "_" + new_trained_timerange
# this is not persistent at the moment TODO
self.freqai_config['live_trained_timerange'] = new_trained_timerange
# enables persistence, but not fully implemented into save/load data yer
self.data['live_trained_timerange'] = new_trained_timerange
return retrain, new_trained_timerange
def download_new_data_for_retraining(self, new_timerange: str, metadata: dict) -> None:
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
self.config, validate=False)
pairs = self.freqai_config['corr_pairlist'] + [metadata['pair']]
timerange = TimeRange.parse_timerange(new_timerange)
# data_handler = get_datahandler(datadir, data_format)
refresh_backtest_ohlcv_data(
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
datadir=self.config['datadir'], timerange=timerange,
new_pairs_days=self.config['new_pairs_days'],
erase=False, data_format=self.config['dataformat_ohlcv'],
trading_mode=self.config.get('trading_mode', 'spot'),
prepend=self.config.get('prepend_data', False)
)
def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any],
DataFrame]:
corr_dataframes: Dict[Any, Any] = {}
# pair_dataframes: Dict[Any, Any] = {}
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
timerange = TimeRange.parse_timerange(new_timerange)
for p in pairs:
corr_dataframes[p] = {}
for tf in self.freqai_config['timeframes']:
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=p, timerange=timerange)
base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
if metadata['pair'] in key]
# [0] indexes the lowest tf for the basepair
return corr_dataframes, base_dataframe[0][self.config['timeframe']]
def use_strategy_to_populate_indicators(self, strategy: IStrategy, metadata: dict,
corr_dataframes: dict,
dataframe: DataFrame) -> DataFrame:
# dataframe = pair_dataframes[0] # this is the base tf pair df
for tf in self.freqai_config["timeframes"]:
# dataframe = strategy.populate_any_indicators(metadata["pair"], dataframe.copy,
# tf, pair_dataframes[tf])
for i in self.freqai_config["corr_pairlist"]:
dataframe = strategy.populate_any_indicators(i,
dataframe.copy(),
tf,
corr_dataframes[i][tf],
coin=i.split("/")[0] + "-"
)
return dataframe
def np_encoder(self, object):
if isinstance(object, np.generic):
return object.item()

View File

@ -8,9 +8,9 @@ import numpy.typing as npt
import pandas as pd
from pandas import DataFrame
from freqtrade.data.dataprovider import DataProvider
from freqtrade.enums import RunMode
from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
from freqtrade.strategy.interface import IStrategy
pd.options.mode.chained_assignment = None
@ -33,15 +33,14 @@ class IFreqaiModel(ABC):
self.data_split_parameters = config["freqai"]["data_split_parameters"]
self.model_training_parameters = config["freqai"]["model_training_parameters"]
self.feature_parameters = config["freqai"]["feature_parameters"]
self.backtest_timerange = config["timerange"]
# self.backtest_timerange = config["timerange"]
self.time_last_trained = None
self.current_time = None
self.model = None
self.predictions = None
self.live_trained_timerange = None
def start(self, dataframe: DataFrame, metadata: dict, dp: DataProvider) -> DataFrame:
def start(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> DataFrame:
"""
Entry point to the FreqaiModel, it will train a new model if
necesssary before making the prediction.
@ -57,11 +56,18 @@ class IFreqaiModel(ABC):
the model.
:metadata: pair metadataa coming from strategy.
"""
self.pair = metadata["pair"]
self.dh = FreqaiDataKitchen(self.config, dataframe)
if dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE):
logger.info('testing live')
live = strategy.dp.runmode in (RunMode.DRY_RUN, RunMode.LIVE)
self.pair = metadata["pair"]
self.dh = FreqaiDataKitchen(self.config, dataframe, live)
if live:
# logger.info('testing live')
self.start_live(dataframe, metadata, strategy)
return (self.dh.full_predictions, self.dh.full_do_predict,
self.dh.full_target_mean, self.dh.full_target_std)
logger.info("going to train %s timeranges", len(self.dh.training_timeranges))
@ -98,6 +104,42 @@ class IFreqaiModel(ABC):
return (self.dh.full_predictions, self.dh.full_do_predict,
self.dh.full_target_mean, self.dh.full_target_std)
def start_live(self, dataframe: DataFrame, metadata: dict, strategy: IStrategy) -> None:
self.dh.set_paths()
file_exists = self.model_exists(metadata['pair'],
training_timerange=self.freqai_info[
'live_trained_timerange'])
(retrain,
new_trained_timerange) = self.dh.check_if_new_training_required(self.freqai_info[
'live_trained_timerange'],
metadata)
if retrain or not file_exists:
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)
# dataframe = download-data
corr_dataframes, pair_dataframes = self.dh.load_pairs_histories(new_trained_timerange,
metadata)
unfiltered_dataframe = self.dh.use_strategy_to_populate_indicators(strategy,
metadata,
corr_dataframes,
pair_dataframes)
self.model = self.train(unfiltered_dataframe, metadata)
self.dh.save_data(self.model)
self.freqai_info
self.model = self.dh.load_data()
preds, do_preds = self.predict(dataframe)
self.dh.append_predictions(preds, do_preds, len(dataframe))
# dataframe should have len 1 here
return
def make_labels(self, dataframe: DataFrame) -> DataFrame:
"""
User defines the labels here (target values).

View File

@ -532,6 +532,22 @@ class IStrategy(ABC, HyperStrategyMixin):
"""
return None
def populate_any_indicators(self, pair: str, df: DataFrame, tf: str,
informative: DataFrame = None, coin: str = "") -> DataFrame:
"""
Function designed to automatically generate, name and merge features
from user indicated timeframes in the configuration file. User can add
additional features here, but must follow the naming convention.
Defined in IStrategy because Freqai needs to know it exists.
:params:
:pair: pair to be used as informative
:df: strategy dataframe which will receive merges from informatives
:tf: timeframe of the dataframe which will modify the feature names
:informative: the dataframe associated with the informative pair
:coin: the name of the coin which will modify the feature names.
"""
return df
###
# END - Intended to be overridden by strategy
###

View File

@ -124,7 +124,7 @@ class ExamplePredictionModel(IFreqaiModel):
data (NaNs) or felt uncertain about data (PCA and DI index)
"""
logger.info("--------------------Starting prediction--------------------")
# logger.info("--------------------Starting prediction--------------------")
original_feature_list = self.dh.build_feature_list(self.config)
filtered_dataframe, _ = self.dh.filter_features(
@ -151,8 +151,8 @@ class ExamplePredictionModel(IFreqaiModel):
predictions = self.model.predict(self.dh.data_dictionary["prediction_features"])
# compute the non-standardized predictions
predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"]
self.dh.predictions = predictions * self.dh.data["labels_std"] + self.dh.data["labels_mean"]
logger.info("--------------------Finished prediction--------------------")
# logger.info("--------------------Finished prediction--------------------")
return (predictions, self.dh.do_predict)
return (self.dh.predictions, self.dh.do_predict)

View File

@ -44,13 +44,16 @@ class FreqaiExampleStrategy(IStrategy):
stoploss = -0.05
use_sell_signal = True
startup_candle_count: int = 1000
startup_candle_count: int = 300
def informative_pairs(self):
pairs = self.freqai_info["corr_pairlist"]
pairs = self.config["freqai"]["corr_pairlist"]
informative_pairs = []
for tf in self.timeframes:
informative_pairs.append([(pair, tf) for pair in pairs])
for tf in self.config["freqai"]["timeframes"]:
# informative_pairs.append((self.pair, tf))
# informative_pairs.append([(pair, tf) for pair in pairs])
for pair in pairs:
informative_pairs.append((pair, tf))
return informative_pairs
def populate_any_indicators(self, pair, df, tf, informative=None, coin=""):
@ -129,6 +132,7 @@ class FreqaiExampleStrategy(IStrategy):
# the configuration file parameters are stored here
self.freqai_info = self.config["freqai"]
self.pair = metadata['pair']
# the model is instantiated here
self.model = CustomModel(self.config)
@ -138,12 +142,13 @@ class FreqaiExampleStrategy(IStrategy):
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
for tf in self.freqai_info["timeframes"]:
dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf)
for i in self.freqai_info["corr_pairlist"]:
# dataframe = self.populate_any_indicators(metadata["pair"], dataframe.copy(), tf)
for pair in self.freqai_info["corr_pairlist"]:
dataframe = self.populate_any_indicators(
i, dataframe.copy(), tf, coin=i.split("/")[0] + "-"
pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
)
print('dataframe_built')
# the model will return 4 values, its prediction, an indication of whether or not the
# prediction should be accepted, the target mean/std values from the labels used during
# each training period.
@ -152,7 +157,7 @@ class FreqaiExampleStrategy(IStrategy):
dataframe["do_predict"],
dataframe["target_mean"],
dataframe["target_std"],
) = self.model.bridge.start(dataframe, metadata)
) = self.model.bridge.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["target_mean"] + dataframe["target_std"] * 0.5
dataframe["sell_roi"] = dataframe["target_mean"] - dataframe["target_std"] * 1.5