Implement merge_informative_pairs helper
This commit is contained in:
parent
79ea8cf771
commit
bd4f3d838a
@ -2,3 +2,4 @@
|
|||||||
from freqtrade.exchange import (timeframe_to_minutes, timeframe_to_prev_date,
|
from freqtrade.exchange import (timeframe_to_minutes, timeframe_to_prev_date,
|
||||||
timeframe_to_seconds, timeframe_to_next_date, timeframe_to_msecs)
|
timeframe_to_seconds, timeframe_to_next_date, timeframe_to_msecs)
|
||||||
from freqtrade.strategy.interface import IStrategy
|
from freqtrade.strategy.interface import IStrategy
|
||||||
|
from freqtrade.strategy.strategy_helper import merge_informative_pairs
|
||||||
|
39
freqtrade/strategy/strategy_helper.py
Normal file
39
freqtrade/strategy/strategy_helper.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from freqtrade.exchange import timeframe_to_minutes
|
||||||
|
|
||||||
|
|
||||||
|
def merge_informative_pairs(dataframe: pd.DataFrame, informative: pd.DataFrame,
|
||||||
|
timeframe_inf: str, ffill: bool = True) -> pd.DataFrame:
|
||||||
|
"""
|
||||||
|
Correctly merge informative samples to the original dataframe, avoiding lookahead bias.
|
||||||
|
|
||||||
|
Since dates are candle open dates, merging a 15m candle that starts at 15:00, and a
|
||||||
|
1h candle that starts at 15:00 will result in all candles to know the close at 16:00
|
||||||
|
which they should not know.
|
||||||
|
|
||||||
|
Moves the date of the informative pair by 1 time interval forward.
|
||||||
|
This way, the 14:00 1h candle is merged to 15:00 15m candle, since the 14:00 1h candle is the
|
||||||
|
last candle that's closed at 15:00, 15:15, 15:30 or 15:45.
|
||||||
|
|
||||||
|
:param dataframe: Original dataframe
|
||||||
|
:param informative: Informative pair, most likely loaded via dp.get_pair_dataframe
|
||||||
|
:param timeframe_inf: Timeframe of the informative pair sample.
|
||||||
|
:param ffill: Forwardfill missing values - optional but usually required
|
||||||
|
"""
|
||||||
|
# Rename columns to be unique
|
||||||
|
|
||||||
|
minutes = timeframe_to_minutes(timeframe_inf)
|
||||||
|
informative['date_merge'] = informative["date"] + pd.to_timedelta(minutes, 'm')
|
||||||
|
|
||||||
|
informative.columns = [f"{col}_{timeframe_inf}" for col in informative.columns]
|
||||||
|
|
||||||
|
# Combine the 2 dataframes
|
||||||
|
# all indicators on the informative sample MUST be calculated before this point
|
||||||
|
dataframe = pd.merge(dataframe, informative, left_on='date',
|
||||||
|
right_on=f'date_merge_{timeframe_inf}', how='left')
|
||||||
|
dataframe = dataframe.drop(f'date_merge_{timeframe_inf}', axis=1)
|
||||||
|
|
||||||
|
if ffill:
|
||||||
|
dataframe = dataframe.ffill()
|
||||||
|
|
||||||
|
return dataframe
|
61
tests/strategy/test_strategy_helpers.py
Normal file
61
tests/strategy/test_strategy_helpers.py
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from freqtrade.strategy import merge_informative_pairs, timeframe_to_minutes
|
||||||
|
|
||||||
|
|
||||||
|
def generate_test_data(timeframe: str, size: int):
|
||||||
|
np.random.seed(42)
|
||||||
|
tf_mins = timeframe_to_minutes(timeframe)
|
||||||
|
|
||||||
|
base = np.random.normal(20, 2, size=size)
|
||||||
|
|
||||||
|
date = pd.period_range('2020-07-05', periods=size, freq=f'{tf_mins}min').to_timestamp()
|
||||||
|
df = pd.DataFrame({
|
||||||
|
'date': date,
|
||||||
|
'open': base,
|
||||||
|
'high': base + np.random.normal(2, 1, size=size),
|
||||||
|
'low': base - np.random.normal(2, 1, size=size),
|
||||||
|
'close': base + np.random.normal(0, 1, size=size),
|
||||||
|
'volume': np.random.normal(200, size=size)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
df = df.dropna()
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def test_merge_informative_pairs():
|
||||||
|
data = generate_test_data('15m', 40)
|
||||||
|
informative = generate_test_data('1h', 40)
|
||||||
|
|
||||||
|
result = merge_informative_pairs(data, informative, '1h', ffill=True)
|
||||||
|
assert isinstance(result, pd.DataFrame)
|
||||||
|
assert len(result) == len(data)
|
||||||
|
assert 'date' in result.columns
|
||||||
|
assert result['date'].equals(data['date'])
|
||||||
|
assert 'date_1h' in result.columns
|
||||||
|
|
||||||
|
assert 'open' in result.columns
|
||||||
|
assert 'open_1h' in result.columns
|
||||||
|
assert result['open'].equals(data['open'])
|
||||||
|
|
||||||
|
assert 'close' in result.columns
|
||||||
|
assert 'close_1h' in result.columns
|
||||||
|
assert result['close'].equals(data['close'])
|
||||||
|
|
||||||
|
assert 'volume' in result.columns
|
||||||
|
assert 'volume_1h' in result.columns
|
||||||
|
assert result['volume'].equals(data['volume'])
|
||||||
|
|
||||||
|
# First 4 rows are empty
|
||||||
|
assert result.iloc[0]['date_1h'] is pd.NaT
|
||||||
|
assert result.iloc[1]['date_1h'] is pd.NaT
|
||||||
|
assert result.iloc[2]['date_1h'] is pd.NaT
|
||||||
|
assert result.iloc[3]['date_1h'] is pd.NaT
|
||||||
|
# Next 4 rows contain the starting date (0:00)
|
||||||
|
assert result.iloc[4]['date_1h'] == result.iloc[0]['date']
|
||||||
|
assert result.iloc[5]['date_1h'] == result.iloc[0]['date']
|
||||||
|
assert result.iloc[6]['date_1h'] == result.iloc[0]['date']
|
||||||
|
assert result.iloc[7]['date_1h'] == result.iloc[0]['date']
|
||||||
|
# Next 4 rows contain the next Hourly date original date row 4
|
||||||
|
assert result.iloc[8]['date_1h'] == result.iloc[4]['date']
|
Loading…
Reference in New Issue
Block a user