create more flexible whitelist, avoid duplicating whitelist features into corr_pairlist, update docs

This commit is contained in:
robcaulk
2022-05-09 17:01:49 +02:00
parent 22bd5556ed
commit 9b3e5faebe
6 changed files with 119 additions and 39 deletions

View File

@@ -485,7 +485,7 @@ class FreqaiDataKitchen:
return
def build_feature_list(self, config: dict) -> list:
def build_feature_list(self, config: dict, metadata: dict) -> list:
"""
Build the list of features that will be used to filter
the full dataframe. Feature list is construced from the
@@ -501,8 +501,10 @@ class FreqaiDataKitchen:
shift = ""
if n > 0:
shift = "_shift-" + str(n)
# features.append(ft + shift + "_" + tf)
features.append(metadata['pair'].split("/")[0] + "-" + ft + shift + "_" + tf)
for p in config["freqai"]["corr_pairlist"]:
if metadata['pair'] in p:
continue # avoid duplicate features
features.append(p.split("/")[0] + "-" + ft + shift + "_" + tf)
# logger.info("number of features %s", len(features))
@@ -640,9 +642,10 @@ class FreqaiDataKitchen:
exchange = ExchangeResolver.load_exchange(self.config['exchange']['name'],
self.config, validate=False)
pairs = self.freqai_config['corr_pairlist'] + [metadata['pair']]
pairs = self.freqai_config['corr_pairlist']
if metadata['pair'] not in pairs:
pairs += metadata['pair'] # dont include pair twice
timerange = TimeRange.parse_timerange(new_timerange)
# data_handler = get_datahandler(datadir, data_format)
refresh_backtest_ohlcv_data(
exchange, pairs=pairs, timeframes=self.freqai_config['timeframes'],
@@ -656,33 +659,45 @@ class FreqaiDataKitchen:
def load_pairs_histories(self, new_timerange: str, metadata: dict) -> Tuple[Dict[Any, Any],
DataFrame]:
corr_dataframes: Dict[Any, Any] = {}
# pair_dataframes: Dict[Any, Any] = {}
base_dataframes: Dict[Any, Any] = {}
pairs = self.freqai_config['corr_pairlist'] # + [metadata['pair']]
timerange = TimeRange.parse_timerange(new_timerange)
for p in pairs:
corr_dataframes[p] = {}
for tf in self.freqai_config['timeframes']:
for tf in self.freqai_config['timeframes']:
base_dataframes[tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=metadata['pair'], timerange=timerange)
for p in pairs:
if metadata['pair'] in p:
continue # dont repeat anything from whitelist
corr_dataframes[p] = {}
corr_dataframes[p][tf] = load_pair_history(datadir=self.config['datadir'],
timeframe=tf,
pair=p, timerange=timerange)
base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
if metadata['pair'] in key]
# base_dataframe = [dataframe for key, dataframe in corr_dataframes.items()
# if metadata['pair'] in key]
# [0] indexes the lowest tf for the basepair
return corr_dataframes, base_dataframe[0][self.config['timeframe']]
return corr_dataframes, base_dataframes
def use_strategy_to_populate_indicators(self, strategy: IStrategy, metadata: dict,
def use_strategy_to_populate_indicators(self, strategy: IStrategy,
corr_dataframes: dict,
dataframe: DataFrame) -> DataFrame:
base_dataframes: dict,
metadata: dict) -> DataFrame:
# dataframe = pair_dataframes[0] # this is the base tf pair df
dataframe = base_dataframes[self.config['timeframe']]
for tf in self.freqai_config["timeframes"]:
# dataframe = strategy.populate_any_indicators(metadata["pair"], dataframe.copy,
# tf, pair_dataframes[tf])
dataframe = strategy.populate_any_indicators(metadata['pair'],
dataframe.copy(),
tf,
base_dataframes[tf],
coin=metadata['pair'].split("/")[0] + "-"
)
for i in self.freqai_config["corr_pairlist"]:
if metadata['pair'] in i:
continue # dont repeat anything from whitelist
dataframe = strategy.populate_any_indicators(i,
dataframe.copy(),
tf,

View File

@@ -93,7 +93,7 @@ class IFreqaiModel(ABC):
else:
self.model = self.dh.load_data()
preds, do_preds = self.predict(dataframe_backtest)
preds, do_preds = self.predict(dataframe_backtest, metadata)
self.dh.append_predictions(preds, do_preds, len(dataframe_backtest))
print('predictions', len(self.dh.full_predictions),
@@ -120,13 +120,13 @@ class IFreqaiModel(ABC):
if retrain or not file_exists:
self.dh.download_new_data_for_retraining(new_trained_timerange, metadata)
# dataframe = download-data
corr_dataframes, pair_dataframes = self.dh.load_pairs_histories(new_trained_timerange,
corr_dataframes, base_dataframes = self.dh.load_pairs_histories(new_trained_timerange,
metadata)
unfiltered_dataframe = self.dh.use_strategy_to_populate_indicators(strategy,
metadata,
corr_dataframes,
pair_dataframes)
base_dataframes,
metadata)
self.model = self.train(unfiltered_dataframe, metadata)
self.dh.save_data(self.model)
@@ -134,7 +134,7 @@ class IFreqaiModel(ABC):
self.freqai_info
self.model = self.dh.load_data()
preds, do_preds = self.predict(dataframe)
preds, do_preds = self.predict(dataframe, metadata)
self.dh.append_predictions(preds, do_preds, len(dataframe))
# dataframe should have len 1 here
@@ -175,7 +175,7 @@ class IFreqaiModel(ABC):
return
@abstractmethod
def predict(self, dataframe: DataFrame) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
def predict(self, dataframe: DataFrame, metadata: dict) -> Tuple[npt.ArrayLike, npt.ArrayLike]:
"""
Filter the prediction features data and predict with it.
:param: unfiltered_dataframe: Full dataframe for the current backtest period.