From bd4f3d838ab959c3c8ff9f0e39daaeb62c3bddce Mon Sep 17 00:00:00 2001 From: Matthias Date: Fri, 4 Sep 2020 19:44:35 +0200 Subject: [PATCH] Implement merge_informative_pairs helper --- freqtrade/strategy/__init__.py | 1 + freqtrade/strategy/strategy_helper.py | 39 ++++++++++++++++ tests/strategy/test_strategy_helpers.py | 61 +++++++++++++++++++++++++ 3 files changed, 101 insertions(+) create mode 100644 freqtrade/strategy/strategy_helper.py create mode 100644 tests/strategy/test_strategy_helpers.py diff --git a/freqtrade/strategy/__init__.py b/freqtrade/strategy/__init__.py index 91ea0e075..5758bbbcc 100644 --- a/freqtrade/strategy/__init__.py +++ b/freqtrade/strategy/__init__.py @@ -2,3 +2,4 @@ from freqtrade.exchange import (timeframe_to_minutes, timeframe_to_prev_date, timeframe_to_seconds, timeframe_to_next_date, timeframe_to_msecs) from freqtrade.strategy.interface import IStrategy +from freqtrade.strategy.strategy_helper import merge_informative_pairs diff --git a/freqtrade/strategy/strategy_helper.py b/freqtrade/strategy/strategy_helper.py new file mode 100644 index 000000000..ce98cccba --- /dev/null +++ b/freqtrade/strategy/strategy_helper.py @@ -0,0 +1,39 @@ +import pandas as pd +from freqtrade.exchange import timeframe_to_minutes + + +def merge_informative_pairs(dataframe: pd.DataFrame, informative: pd.DataFrame, + timeframe_inf: str, ffill: bool = True) -> pd.DataFrame: + """ + Correctly merge informative samples to the original dataframe, avoiding lookahead bias. + + Since dates are candle open dates, merging a 15m candle that starts at 15:00, and a + 1h candle that starts at 15:00 will result in all candles to know the close at 16:00 + which they should not know. + + Moves the date of the informative pair by 1 time interval forward. + This way, the 14:00 1h candle is merged to 15:00 15m candle, since the 14:00 1h candle is the + last candle that's closed at 15:00, 15:15, 15:30 or 15:45. + + :param dataframe: Original dataframe + :param informative: Informative pair, most likely loaded via dp.get_pair_dataframe + :param timeframe_inf: Timeframe of the informative pair sample. + :param ffill: Forwardfill missing values - optional but usually required + """ + # Rename columns to be unique + + minutes = timeframe_to_minutes(timeframe_inf) + informative['date_merge'] = informative["date"] + pd.to_timedelta(minutes, 'm') + + informative.columns = [f"{col}_{timeframe_inf}" for col in informative.columns] + + # Combine the 2 dataframes + # all indicators on the informative sample MUST be calculated before this point + dataframe = pd.merge(dataframe, informative, left_on='date', + right_on=f'date_merge_{timeframe_inf}', how='left') + dataframe = dataframe.drop(f'date_merge_{timeframe_inf}', axis=1) + + if ffill: + dataframe = dataframe.ffill() + + return dataframe diff --git a/tests/strategy/test_strategy_helpers.py b/tests/strategy/test_strategy_helpers.py new file mode 100644 index 000000000..89bbba2c1 --- /dev/null +++ b/tests/strategy/test_strategy_helpers.py @@ -0,0 +1,61 @@ +import pandas as pd +import numpy as np + +from freqtrade.strategy import merge_informative_pairs, timeframe_to_minutes + + +def generate_test_data(timeframe: str, size: int): + np.random.seed(42) + tf_mins = timeframe_to_minutes(timeframe) + + base = np.random.normal(20, 2, size=size) + + date = pd.period_range('2020-07-05', periods=size, freq=f'{tf_mins}min').to_timestamp() + df = pd.DataFrame({ + 'date': date, + 'open': base, + 'high': base + np.random.normal(2, 1, size=size), + 'low': base - np.random.normal(2, 1, size=size), + 'close': base + np.random.normal(0, 1, size=size), + 'volume': np.random.normal(200, size=size) + } + ) + df = df.dropna() + return df + + +def test_merge_informative_pairs(): + data = generate_test_data('15m', 40) + informative = generate_test_data('1h', 40) + + result = merge_informative_pairs(data, informative, '1h', ffill=True) + assert isinstance(result, pd.DataFrame) + assert len(result) == len(data) + assert 'date' in result.columns + assert result['date'].equals(data['date']) + assert 'date_1h' in result.columns + + assert 'open' in result.columns + assert 'open_1h' in result.columns + assert result['open'].equals(data['open']) + + assert 'close' in result.columns + assert 'close_1h' in result.columns + assert result['close'].equals(data['close']) + + assert 'volume' in result.columns + assert 'volume_1h' in result.columns + assert result['volume'].equals(data['volume']) + + # First 4 rows are empty + assert result.iloc[0]['date_1h'] is pd.NaT + assert result.iloc[1]['date_1h'] is pd.NaT + assert result.iloc[2]['date_1h'] is pd.NaT + assert result.iloc[3]['date_1h'] is pd.NaT + # Next 4 rows contain the starting date (0:00) + assert result.iloc[4]['date_1h'] == result.iloc[0]['date'] + assert result.iloc[5]['date_1h'] == result.iloc[0]['date'] + assert result.iloc[6]['date_1h'] == result.iloc[0]['date'] + assert result.iloc[7]['date_1h'] == result.iloc[0]['date'] + # Next 4 rows contain the next Hourly date original date row 4 + assert result.iloc[8]['date_1h'] == result.iloc[4]['date']