fix conflicts

This commit is contained in:
longyu
2022-07-22 17:17:57 +02:00
parent 5ad8d08b84
commit 38841e30b8
16 changed files with 838 additions and 180 deletions

View File

@@ -301,7 +301,7 @@ class FreqaiDataDrawer:
model_folders = [x for x in self.full_path.iterdir() if x.is_dir()]
pattern = re.compile(r"sub-train-(\w+)(\d{10})")
pattern = re.compile(r"sub-train-(\w+)_(\d{10})")
delete_dict: Dict[str, Any] = {}

View File

@@ -88,7 +88,8 @@ class FreqaiDataKitchen:
)
self.data_path = Path(
self.full_path / str("sub-train" + "-" + pair.split("/")[0] + str(trained_timestamp))
self.full_path
/ str("sub-train" + "-" + pair.split("/")[0] + "_" + str(trained_timestamp))
)
return
@@ -179,6 +180,7 @@ class FreqaiDataKitchen:
model = load(self.data_path / str(self.model_filename + "_model.joblib"))
else:
from tensorflow import keras
model = keras.models.load_model(self.data_path / str(self.model_filename + "_model.h5"))
if Path(self.data_path / str(self.model_filename + "_svm_model.joblib")).resolve().exists():
@@ -410,6 +412,10 @@ class FreqaiDataKitchen:
bt_split: the backtesting length (dats). Specified in user configuration file
"""
if not isinstance(train_split, int) or train_split < 1:
raise OperationalException(
"train_period_days must be an integer greater than 0. " f"Got {train_split}."
)
train_period_days = train_split * SECONDS_IN_DAY
bt_period = bt_split * SECONDS_IN_DAY
@@ -561,8 +567,10 @@ class FreqaiDataKitchen:
"""
if self.keras:
logger.warning("SVM outlier removal not currently supported for Keras based models. "
"Skipping user requested function.")
logger.warning(
"SVM outlier removal not currently supported for Keras based models. "
"Skipping user requested function."
)
if predict:
self.do_predict = np.ones(len(self.data_dictionary["prediction_features"]))
return
@@ -676,8 +684,7 @@ class FreqaiDataKitchen:
training than older data.
"""
wfactor = self.config["freqai"]["feature_parameters"]["weight_factor"]
weights = np.exp(
- np.arange(num_weights) / (wfactor * num_weights))[::-1]
weights = np.exp(-np.arange(num_weights) / (wfactor * num_weights))[::-1]
return weights
def append_predictions(self, predictions, do_predict, len_dataframe):
@@ -685,8 +692,6 @@ class FreqaiDataKitchen:
Append backtest prediction from current backtest period to all previous periods
"""
# ones = np.ones(len(predictions))
# target_mean, target_std = ones * self.data["target_mean"], ones * self.data["target_std"]
self.append_df = DataFrame()
for label in self.label_list:
self.append_df[label] = predictions[label]
@@ -702,13 +707,6 @@ class FreqaiDataKitchen:
else:
self.full_df = pd.concat([self.full_df, self.append_df], axis=0)
# self.full_predictions = np.append(self.full_predictions, predictions)
# self.full_do_predict = np.append(self.full_do_predict, do_predict)
# if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
# self.full_DI_values = np.append(self.full_DI_values, self.DI_values)
# self.full_target_mean = np.append(self.full_target_mean, target_mean)
# self.full_target_std = np.append(self.full_target_std, target_std)
return
def fill_predictions(self, dataframe):
@@ -729,25 +727,34 @@ class FreqaiDataKitchen:
self.append_df = DataFrame()
self.full_df = DataFrame()
# self.full_predictions = np.append(filler, self.full_predictions)
# self.full_do_predict = np.append(filler, self.full_do_predict)
# if self.freqai_config.get("feature_parameters", {}).get("DI_threshold", 0) > 0:
# self.full_DI_values = np.append(filler, self.full_DI_values)
# self.full_target_mean = np.append(filler, self.full_target_mean)
# self.full_target_std = np.append(filler, self.full_target_std)
return
def create_fulltimerange(self, backtest_tr: str, backtest_period_days: int) -> str:
if not isinstance(backtest_period_days, int):
raise OperationalException("backtest_period_days must be an integer")
if backtest_period_days < 0:
raise OperationalException("backtest_period_days must be positive")
backtest_timerange = TimeRange.parse_timerange(backtest_tr)
if backtest_timerange.stopts == 0:
backtest_timerange.stopts = int(
datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
)
# typically open ended time ranges do work, however, there are some edge cases where
# it does not. accomodating these kinds of edge cases just to allow open-ended
# timerange is not high enough priority to warrant the effort. It is safer for now
# to simply ask user to add their end date
raise OperationalException("FreqAI backtesting does not allow open ended timeranges. "
"Please indicate the end date of your desired backtesting. "
"timerange.")
# backtest_timerange.stopts = int(
# datetime.datetime.now(tz=datetime.timezone.utc).timestamp()
# )
backtest_timerange.startts = (backtest_timerange.startts
- backtest_period_days * SECONDS_IN_DAY)
backtest_timerange.startts = (
backtest_timerange.startts - backtest_period_days * SECONDS_IN_DAY
)
start = datetime.datetime.utcfromtimestamp(backtest_timerange.startts)
stop = datetime.datetime.utcfromtimestamp(backtest_timerange.stopts)
full_timerange = start.strftime("%Y%m%d") + "-" + stop.strftime("%Y%m%d")
@@ -793,8 +800,9 @@ class FreqaiDataKitchen:
data_load_timerange = TimeRange()
# find the max indicator length required
max_timeframe_chars = self.freqai_config.get(
"feature_parameters", {}).get("include_timeframes")[-1]
max_timeframe_chars = self.freqai_config.get("feature_parameters", {}).get(
"include_timeframes"
)[-1]
max_period = self.freqai_config.get("feature_parameters", {}).get(
"indicator_max_period_candles", 50
)
@@ -861,35 +869,11 @@ class FreqaiDataKitchen:
coin, _ = pair.split("/")
self.data_path = Path(
self.full_path
/ str("sub-train" + "-" + pair.split("/")[0] + str(int(trained_timerange.stopts)))
/ str("sub-train" + "-" + pair.split("/")[0] + "_" + str(int(trained_timerange.stopts)))
)
self.model_filename = "cb_" + coin.lower() + "_" + str(int(trained_timerange.stopts))
# self.freqai_config['live_trained_timerange'] = str(int(trained_timerange.stopts))
# enables persistence, but not fully implemented into save/load data yer
# self.data['live_trained_timerange'] = str(int(trained_timerange.stopts))
# SUPERCEDED
# def download_new_data_for_retraining(self, timerange: TimeRange, metadata: dict,
# strategy: IStrategy) -> None:
# exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
# self.config, validate=False, freqai=True)
# # exchange = strategy.dp._exchange # closes ccxt session
# pairs = copy.deepcopy(self.freqai_config.get('corr_pairlist', []))
# if str(metadata['pair']) not in pairs:
# pairs.append(str(metadata['pair']))
# refresh_backtest_ohlcv_data(
# exchange, pairs=pairs, timeframes=self.freqai_config.get('timeframes'),
# datadir=self.config['datadir'], timerange=timerange,
# new_pairs_days=self.config['new_pairs_days'],
# erase=False, data_format=self.config.get('dataformat_ohlcv', 'json'),
# trading_mode=self.config.get('trading_mode', 'spot'),
# prepend=self.config.get('prepend_data', False)
# )
def download_all_data_for_training(self, timerange: TimeRange) -> None:
"""
Called only once upon start of bot to download the necessary data for
@@ -969,8 +953,9 @@ class FreqaiDataKitchen:
def set_all_pairs(self) -> None:
self.all_pairs = copy.deepcopy(self.freqai_config.get(
'feature_parameters', {}).get('include_corr_pairlist', []))
self.all_pairs = copy.deepcopy(
self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", [])
)
for pair in self.config.get("exchange", "").get("pair_whitelist"):
if pair not in self.all_pairs:
self.all_pairs.append(pair)
@@ -1014,8 +999,9 @@ class FreqaiDataKitchen:
corr_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
historic_data = self.dd.historic_data
pairs = self.freqai_config.get('feature_parameters', {}).get(
'include_corr_pairlist', [])
pairs = self.freqai_config.get("feature_parameters", {}).get(
"include_corr_pairlist", []
)
for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
base_dataframes[tf] = self.slice_dataframe(timerange, historic_data[pair][tf])
@@ -1031,40 +1017,13 @@ class FreqaiDataKitchen:
return corr_dataframes, base_dataframes
# SUPERCEDED
# def load_pairs_histories(self, timerange: TimeRange, metadata: dict) -> Tuple[Dict[Any, Any],
# DataFrame]:
# corr_dataframes: Dict[Any, Any] = {}
# base_dataframes: Dict[Any, Any] = {}
# pairs = self.freqai_config.get('include_corr_pairlist', []) # + [metadata['pair']]
# # timerange = TimeRange.parse_timerange(new_timerange)
# for tf in self.freqai_config.get('timeframes'):
# base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
# timeframe=tf,
# pair=metadata['pair'], timerange=timerange,
# data_format=self.config.get(
# 'dataformat_ohlcv', 'json'),
# candle_type=self.config.get(
# 'trading_mode', 'spot'))
# if pairs:
# for p in pairs:
# if metadata['pair'] in p:
# continue # dont repeat anything from whitelist
# if p not in corr_dataframes:
# corr_dataframes[p] = {}
# corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
# timeframe=tf,
# pair=p, timerange=timerange,
# data_format=self.config.get(
# 'dataformat_ohlcv', 'json'),
# candle_type=self.config.get(
# 'trading_mode', 'spot'))
# return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(
self, strategy: IStrategy, corr_dataframes: dict, base_dataframes: dict, pair: str
self,
strategy: IStrategy,
corr_dataframes: dict = {},
base_dataframes: dict = {},
pair: str = "",
prediction_dataframe: DataFrame = pd.DataFrame(),
) -> DataFrame:
"""
Use the user defined strategy for populating indicators during
@@ -1079,16 +1038,31 @@ class FreqaiDataKitchen:
:returns:
dataframe: DataFrame = dataframe containing populated indicators
"""
dataframe = base_dataframes[self.config["timeframe"]].copy()
pairs = self.freqai_config.get('feature_parameters', {}).get('include_corr_pairlist', [])
# for prediction dataframe creation, we let dataprovider handle everything in the strategy
# so we create empty dictionaries, which allows us to pass None to
# `populate_any_indicators()`. Signaling we want the dp to give us the live dataframe.
tfs = self.freqai_config.get("feature_parameters", {}).get("include_timeframes")
pairs = self.freqai_config.get("feature_parameters", {}).get("include_corr_pairlist", [])
if not prediction_dataframe.empty:
dataframe = prediction_dataframe.copy()
for tf in tfs:
base_dataframes[tf] = None
for p in pairs:
if p not in corr_dataframes:
corr_dataframes[p] = {}
corr_dataframes[p][tf] = None
else:
dataframe = base_dataframes[self.config["timeframe"]].copy()
sgi = True
for tf in self.freqai_config.get("feature_parameters", {}).get("include_timeframes"):
for tf in tfs:
dataframe = strategy.populate_any_indicators(
pair,
pair,
dataframe.copy(),
tf,
base_dataframes[tf],
informative=base_dataframes[tf],
coin=pair.split("/")[0] + "-",
set_generalized_indicators=sgi,
)
@@ -1102,7 +1076,7 @@ class FreqaiDataKitchen:
i,
dataframe.copy(),
tf,
corr_dataframes[i][tf],
informative=corr_dataframes[i][tf],
coin=i.split("/")[0] + "-",
)
@@ -1113,7 +1087,8 @@ class FreqaiDataKitchen:
Fit the labels with a gaussian distribution
"""
import scipy as spy
num_candles = self.freqai_config.get('fit_live_predictions_candles', 100)
num_candles = self.freqai_config.get("fit_live_predictions_candles", 100)
self.data["labels_mean"], self.data["labels_std"] = {}, {}
for label in self.label_list:
f = spy.stats.norm.fit(self.dd.historic_predictions[self.pair][label].tail(num_candles))

View File

@@ -73,6 +73,8 @@ class IFreqaiModel(ABC):
self.freqai_info["feature_parameters"]["DI_threshold"] = 0
logger.warning("DI threshold is not configured for Keras models yet. Deactivating.")
self.CONV_WIDTH = self.freqai_info.get("conv_width", 2)
self.pair_it = 0
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
def assert_config(self, config: Dict[str, Any]) -> None:
@@ -106,6 +108,10 @@ class IFreqaiModel(ABC):
elif not self.follow_mode:
self.dk = FreqaiDataKitchen(self.config, self.dd, self.live, metadata["pair"])
logger.info(f"Training {len(self.dk.training_timeranges)} timeranges")
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
dk = self.start_backtesting(dataframe, metadata, self.dk)
dataframe = self.remove_features_from_df(dk.return_dataframe)
@@ -160,6 +166,8 @@ class IFreqaiModel(ABC):
dk: FreqaiDataKitchen = Data management/analysis tool assoicated to present pair only
"""
self.pair_it += 1
train_it = 0
# Loop enforcing the sliding window training/backtesting paradigm
# tr_train is the training time range e.g. 1 historical month
# tr_backtest is the backtesting time range e.g. the week directly
@@ -167,22 +175,26 @@ class IFreqaiModel(ABC):
# entire backtest
for tr_train, tr_backtest in zip(dk.training_timeranges, dk.backtesting_timeranges):
(_, _, _, _) = self.dd.get_pair_dict_info(metadata["pair"])
train_it += 1
total_trains = len(dk.backtesting_timeranges)
gc.collect()
dk.data = {} # clean the pair specific data between training window sliding
self.training_timerange = tr_train
# self.training_timerange_timerange = tr_train
dataframe_train = dk.slice_dataframe(tr_train, dataframe)
dataframe_backtest = dk.slice_dataframe(tr_backtest, dataframe)
trained_timestamp = tr_train # TimeRange.parse_timerange(tr_train)
trained_timestamp = tr_train
tr_train_startts_str = datetime.datetime.utcfromtimestamp(tr_train.startts).strftime(
"%Y-%m-%d %H:%M:%S"
)
tr_train_stopts_str = datetime.datetime.utcfromtimestamp(tr_train.stopts).strftime(
"%Y-%m-%d %H:%M:%S"
)
logger.info("Training %s", metadata["pair"])
logger.info(f"Training {tr_train_startts_str} to {tr_train_stopts_str}")
logger.info(
f"Training {metadata['pair']}, {self.pair_it}/{self.total_pairs} pairs"
f" from {tr_train_startts_str} to {tr_train_stopts_str}, {train_it}/{total_trains} "
"trains"
)
dk.data_path = Path(
dk.full_path
@@ -190,6 +202,7 @@ class IFreqaiModel(ABC):
"sub-train"
+ "-"
+ metadata["pair"].split("/")[0]
+ "_"
+ str(int(trained_timestamp.stopts))
)
)
@@ -281,6 +294,10 @@ class IFreqaiModel(ABC):
# load the model and associated data into the data kitchen
self.model = dk.load_data(coin=metadata["pair"])
dataframe = self.dk.use_strategy_to_populate_indicators(
strategy, prediction_dataframe=dataframe, pair=metadata["pair"]
)
if not self.model:
logger.warning(
f"No model ready for {metadata['pair']}, returning null values to strategy."

View File

@@ -171,32 +171,15 @@ class FreqaiExampleStrategy(IStrategy):
def populate_indicators(self, dataframe: DataFrame, metadata: dict) -> DataFrame:
self.freqai_info = self.config["freqai"]
self.pair = metadata["pair"]
sgi = True
# the following loops are necessary for building the features
# indicated by the user in the configuration file.
# All indicators must be populated by populate_any_indicators() for live functionality
# to work correctly.
for tf in self.freqai_info["feature_parameters"]["include_timeframes"]:
dataframe = self.populate_any_indicators(
metadata,
self.pair,
dataframe.copy(),
tf,
coin=self.pair.split("/")[0] + "-",
set_generalized_indicators=sgi,
)
sgi = False
for pair in self.freqai_info["feature_parameters"]["include_corr_pairlist"]:
if metadata["pair"] in pair:
continue # do not include whitelisted pair twice if it is in corr_pairlist
dataframe = self.populate_any_indicators(
metadata, pair, dataframe.copy(), tf, coin=pair.split("/")[0] + "-"
)
# the model will return 4 values, its prediction, an indication of whether or not the
# prediction should be accepted, the target mean/std values from the labels used during
# each training period.
# the model will return all labels created by user in `populate_any_indicators`
# (& appended targets), an indication of whether or not the prediction should be accepted,
# the target mean/std values for each of the labels created by user in
# `populate_any_indicators()` for each training period.
dataframe = self.model.bridge.start(dataframe, metadata, self)
dataframe["target_roi"] = dataframe["&-s_close_mean"] + dataframe["&-s_close_std"] * 1.25