Ref: timeseries friendly merge_ordered in merge_informative_pair function

This commit is contained in:
Patel Kaushal 2022-04-21 18:35:41 +05:30 committed by GitHub
parent e4629a2730
commit ba305e93ed
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -56,12 +56,18 @@ def merge_informative_pair(dataframe: pd.DataFrame, informative: pd.DataFrame,
# Combine the 2 dataframes # Combine the 2 dataframes
# all indicators on the informative sample MUST be calculated before this point # all indicators on the informative sample MUST be calculated before this point
dataframe = pd.merge(dataframe, informative, left_on='date', if ffill:
right_on=date_merge, how='left') # https://pandas.pydata.org/docs/user_guide/merging.html#timeseries-friendly-merging
# merge_ordered - ffill method is 2.5x faster than seperate ffill()
dataframe = pd.merge_ordered(dataframe, informative, fill_method="ffill", left_on='date',
right_on=date_merge, how='left')
else:
dataframe = pd.merge(dataframe, informative, left_on='date',
right_on=date_merge, how='left')
dataframe = dataframe.drop(date_merge, axis=1) dataframe = dataframe.drop(date_merge, axis=1)
if ffill: # if ffill:
dataframe = dataframe.ffill() # dataframe = dataframe.ffill()
return dataframe return dataframe