avoid redundant indicator population for corr_pair list

This commit is contained in:
rcaulk 2022-10-20 16:30:32 +02:00
parent 7192ed7be6
commit a9db668082
2 changed files with 65 additions and 10 deletions

View File

@ -1126,6 +1126,49 @@ class FreqaiDataKitchen:
if pair not in self.all_pairs: if pair not in self.all_pairs:
self.all_pairs.append(pair) self.all_pairs.append(pair)
def extract_corr_pair_columns_from_populated_indicators(
self,
dataframe: DataFrame
) -> Dict[str, DataFrame]:
"""
Find the columns of the dataframe corresponding to the corr_pairlist, save them
in a dictionary to be reused and attached to other pairs.
:params:
:dataframe: fully populated dataframe (current pair + corr_pairs)
:return:
:corr_dataframes: dictionary of dataframes to be attached to other pairs in same candle.
"""
corr_dataframes: Dict[str, DataFrame] = {}
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs:
coin = pair.split('/')[0]
pair_cols = [col for col in dataframe.columns if coin in col]
corr_dataframes[pair] = dataframe.filter(pair_cols, axis=1)
return corr_dataframes
def attach_corr_pair_columns(self, dataframe: DataFrame,
corr_dataframes: Dict[str, DataFrame],
current_pair: str) -> DataFrame:
"""
Attach the existing corr_pair dataframes to the current pair dataframe before training
:params:
:dataframe: current pair strategy dataframe, indicators populated already
:corr_dataframes: dictionary of saved dataframes from earlier in the same candle
:current_pair: current pair to which we will attach corr pair dataframe
:return:
:dataframe: current pair dataframe of populated indicators, concatenated with corr_pairs
ready for training
"""
pairs = self.freqai_config["feature_parameters"].get("include_corr_pairlist", [])
for pair in pairs:
if current_pair not in pair:
dataframe = pd.concat([dataframe, corr_dataframes[pair]], axis=1)
return dataframe
def use_strategy_to_populate_indicators( def use_strategy_to_populate_indicators(
self, self,
strategy: IStrategy, strategy: IStrategy,
@ -1133,6 +1176,7 @@ class FreqaiDataKitchen:
base_dataframes: dict = {}, base_dataframes: dict = {},
pair: str = "", pair: str = "",
prediction_dataframe: DataFrame = pd.DataFrame(), prediction_dataframe: DataFrame = pd.DataFrame(),
do_corr_pairs: bool = True,
) -> DataFrame: ) -> DataFrame:
""" """
Use the user defined strategy for populating indicators during retrain Use the user defined strategy for populating indicators during retrain
@ -1173,10 +1217,13 @@ class FreqaiDataKitchen:
informative=base_dataframes[tf], informative=base_dataframes[tf],
set_generalized_indicators=sgi set_generalized_indicators=sgi
) )
if pairs:
for i in pairs: # ensure corr pairs are always last
if pair in i: for i in pairs:
continue # dont repeat anything from whitelist if pair in i:
continue # dont repeat anything from whitelist
for tf in tfs:
if pairs and do_corr_pairs:
dataframe = strategy.populate_any_indicators( dataframe = strategy.populate_any_indicators(
i, i,
dataframe.copy(), dataframe.copy(),

View File

@ -83,8 +83,6 @@ class IFreqaiModel(ABC):
self.pair_it_train = 0 self.pair_it_train = 0
self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist")) self.total_pairs = len(self.config.get("exchange", {}).get("pair_whitelist"))
self.train_queue = self._set_train_queue() self.train_queue = self._set_train_queue()
self.last_trade_database_summary: DataFrame = {}
self.current_trade_database_summary: DataFrame = {}
self.analysis_lock = Lock() self.analysis_lock = Lock()
self.inference_time: float = 0 self.inference_time: float = 0
self.train_time: float = 0 self.train_time: float = 0
@ -93,6 +91,8 @@ class IFreqaiModel(ABC):
self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe']) self.base_tf_seconds = timeframe_to_seconds(self.config['timeframe'])
self.continual_learning = self.freqai_info.get('continual_learning', False) self.continual_learning = self.freqai_info.get('continual_learning', False)
self.plot_features = self.ft_params.get("plot_feature_importances", 0) self.plot_features = self.ft_params.get("plot_feature_importances", 0)
self.corr_dataframes: Dict[str, DataFrame] = {}
self.get_corr_dataframes: bool = True
self._threads: List[threading.Thread] = [] self._threads: List[threading.Thread] = []
self._stop_event = threading.Event() self._stop_event = threading.Event()
@ -363,10 +363,10 @@ class IFreqaiModel(ABC):
# load the model and associated data into the data kitchen # load the model and associated data into the data kitchen
self.model = self.dd.load_data(metadata["pair"], dk) self.model = self.dd.load_data(metadata["pair"], dk)
with self.analysis_lock: dataframe = dk.use_strategy_to_populate_indicators(
dataframe = self.dk.use_strategy_to_populate_indicators( strategy, prediction_dataframe=dataframe, pair=metadata["pair"],
strategy, prediction_dataframe=dataframe, pair=metadata["pair"] do_corr_pairs=self.get_corr_dataframes
) )
if not self.model: if not self.model:
logger.warning( logger.warning(
@ -375,6 +375,13 @@ class IFreqaiModel(ABC):
self.dd.return_null_values_to_strategy(dataframe, dk) self.dd.return_null_values_to_strategy(dataframe, dk)
return dk return dk
if self.get_corr_dataframes:
self.corr_dataframes = dk.extract_corr_pair_columns_from_populated_indicators(dataframe)
self.get_corr_dataframes = False
else:
dataframe = dk.attach_corr_pair_columns(
dataframe, self.corr_dataframes, metadata["pair"])
dk.find_labels(dataframe) dk.find_labels(dataframe)
self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp) self.build_strategy_return_arrays(dataframe, dk, metadata["pair"], trained_timestamp)
@ -680,6 +687,7 @@ class IFreqaiModel(ABC):
" avoid blinding open trades and degrading performance.") " avoid blinding open trades and degrading performance.")
self.pair_it = 0 self.pair_it = 0
self.inference_time = 0 self.inference_time = 0
self.get_corr_dataframes = True
return return
def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''): def train_timer(self, do: Literal['start', 'stop'] = 'start', pair: str = ''):