Merge pull request #3744 from freqtrade/fix/infomrativesample

fix Informative pair documentation
This commit is contained in:
Matthias 2020-09-08 16:38:08 +02:00 committed by GitHub
commit aa2d1e9cca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 212 additions and 12 deletions

View File

@ -483,6 +483,9 @@ if self.dp:
### Complete Data-provider sample
```python
from freqtrade.strategy import IStrategy, merge_informative_pairs
from pandas import DataFrame
class SampleStrategy(IStrategy):
# strategy init stuff...
@ -513,17 +516,12 @@ class SampleStrategy(IStrategy):
# Get the 14 day rsi
informative['rsi'] = ta.RSI(informative, timeperiod=14)
# Rename columns to be unique
informative.columns = [f"{col}_{inf_tf}" for col in informative.columns]
# Assuming inf_tf = '1d' - then the columns will now be:
# date_1d, open_1d, high_1d, low_1d, close_1d, rsi_1d
# Combine the 2 dataframes
# all indicators on the informative sample MUST be calculated before this point
dataframe = pd.merge(dataframe, informative, left_on='date', right_on=f'date_{inf_tf}', how='left')
# FFill to have the 1d value available in every row throughout the day.
# Without this, comparisons would only work once per day.
dataframe = dataframe.ffill()
# Use the helper function merge_informative_pair to safely merge the pair
# Automatically renames the columns and merges a shorter timeframe dataframe and a longer timeframe informative pair
# use ffill to have the 1d value available in every row throughout the day.
# Without this, comparisons between columns of the original and the informative pair would only work once per day.
# Full documentation of this method, see below
dataframe = merge_informative_pair(dataframe, informative, self.timeframe, inf_tf, ffill=True)
# Calculate rsi of the original dataframe (5m timeframe)
dataframe['rsi'] = ta.RSI(dataframe, timeperiod=14)
@ -547,6 +545,68 @@ class SampleStrategy(IStrategy):
***
## Helper functions
### *merge_informative_pair()*
This method helps you merge an informative pair to a regular dataframe without lookahead bias.
It's there to help you merge the dataframe in a safe and consistent way.
Options:
- Rename the columns for you to create unique columns
- Merge the dataframe without lookahead bias
- Forward-fill (optional)
All columns of the informative dataframe will be available on the returning dataframe in a renamed fashion:
!!! Example "Column renaming"
Assuming `inf_tf = '1d'` the resulting columns will be:
``` python
'date', 'open', 'high', 'low', 'close', 'rsi' # from the original dataframe
'date_1d', 'open_1d', 'high_1d', 'low_1d', 'close_1d', 'rsi_1d' # from the informative dataframe
```
??? Example "Column renaming - 1h"
Assuming `inf_tf = '1h'` the resulting columns will be:
``` python
'date', 'open', 'high', 'low', 'close', 'rsi' # from the original dataframe
'date_1h', 'open_1h', 'high_1h', 'low_1h', 'close_1h', 'rsi_1h' # from the informative dataframe
```
??? Example "Custom implementation"
A custom implementation for this is possible, and can be done as follows:
``` python
# Rename columns to be unique
informative.columns = [f"{col}_{inf_tf}" for col in informative.columns]
# Assuming inf_tf = '1d' - then the columns will now be:
# date_1d, open_1d, high_1d, low_1d, close_1d, rsi_1d
# Shift date by 1 candle
# This is necessary since the data is always the "open date"
# and a 15m candle starting at 12:15 should not know the close of the 1h candle from 12:00 to 13:00
minutes = timeframe_to_minutes(inf_tf)
# Only do this if the timeframes are different:
informative['date_merge'] = informative["date"] + pd.to_timedelta(minutes, 'm')
# Combine the 2 dataframes
# all indicators on the informative sample MUST be calculated before this point
dataframe = pd.merge(dataframe, informative, left_on='date', right_on=f'date_merge_{inf_tf}', how='left')
# FFill to have the 1d value available in every row throughout the day.
# Without this, comparisons would only work once per day.
dataframe = dataframe.ffill()
```
!!! Warning "Informative timeframe < timeframe"
Using informative timeframes smaller than the dataframe timeframe is not recommended with this method, as it will not use any of the additional information this would provide.
To use the more detailed information properly, more advanced methods should be applied (which are out of scope for freqtrade documentation, as it'll depend on the respective need).
***
## Additional data (Wallets)
The strategy provides access to the `Wallets` object. This contains the current balances on the exchange.

View File

@ -1 +1,5 @@
from freqtrade.strategy.interface import IStrategy # noqa: F401
# flake8: noqa: F401
from freqtrade.exchange import (timeframe_to_minutes, timeframe_to_prev_date,
timeframe_to_seconds, timeframe_to_next_date, timeframe_to_msecs)
from freqtrade.strategy.interface import IStrategy
from freqtrade.strategy.strategy_helper import merge_informative_pair

View File

@ -0,0 +1,48 @@
import pandas as pd
from freqtrade.exchange import timeframe_to_minutes
def merge_informative_pair(dataframe: pd.DataFrame, informative: pd.DataFrame,
timeframe: str, timeframe_inf: str, ffill: bool = True) -> pd.DataFrame:
"""
Correctly merge informative samples to the original dataframe, avoiding lookahead bias.
Since dates are candle open dates, merging a 15m candle that starts at 15:00, and a
1h candle that starts at 15:00 will result in all candles to know the close at 16:00
which they should not know.
Moves the date of the informative pair by 1 time interval forward.
This way, the 14:00 1h candle is merged to 15:00 15m candle, since the 14:00 1h candle is the
last candle that's closed at 15:00, 15:15, 15:30 or 15:45.
Assuming inf_tf = '1d' - then the resulting columns will be:
date_1d, open_1d, high_1d, low_1d, close_1d, rsi_1d
:param dataframe: Original dataframe
:param informative: Informative pair, most likely loaded via dp.get_pair_dataframe
:param timeframe: Timeframe of the original pair sample.
:param timeframe_inf: Timeframe of the informative pair sample.
:param ffill: Forwardfill missing values - optional but usually required
"""
minutes_inf = timeframe_to_minutes(timeframe_inf)
minutes = timeframe_to_minutes(timeframe)
if minutes >= minutes_inf:
# No need to forwardshift if the timeframes are identical
informative['date_merge'] = informative["date"]
else:
informative['date_merge'] = informative["date"] + pd.to_timedelta(minutes_inf, 'm')
# Rename columns to be unique
informative.columns = [f"{col}_{timeframe_inf}" for col in informative.columns]
# Combine the 2 dataframes
# all indicators on the informative sample MUST be calculated before this point
dataframe = pd.merge(dataframe, informative, left_on='date',
right_on=f'date_merge_{timeframe_inf}', how='left')
dataframe = dataframe.drop(f'date_merge_{timeframe_inf}', axis=1)
if ffill:
dataframe = dataframe.ffill()
return dataframe

View File

@ -0,0 +1,88 @@
import pandas as pd
import numpy as np
from freqtrade.strategy import merge_informative_pair, timeframe_to_minutes
def generate_test_data(timeframe: str, size: int):
np.random.seed(42)
tf_mins = timeframe_to_minutes(timeframe)
base = np.random.normal(20, 2, size=size)
date = pd.period_range('2020-07-05', periods=size, freq=f'{tf_mins}min').to_timestamp()
df = pd.DataFrame({
'date': date,
'open': base,
'high': base + np.random.normal(2, 1, size=size),
'low': base - np.random.normal(2, 1, size=size),
'close': base + np.random.normal(0, 1, size=size),
'volume': np.random.normal(200, size=size)
}
)
df = df.dropna()
return df
def test_merge_informative_pair():
data = generate_test_data('15m', 40)
informative = generate_test_data('1h', 40)
result = merge_informative_pair(data, informative, '15m', '1h', ffill=True)
assert isinstance(result, pd.DataFrame)
assert len(result) == len(data)
assert 'date' in result.columns
assert result['date'].equals(data['date'])
assert 'date_1h' in result.columns
assert 'open' in result.columns
assert 'open_1h' in result.columns
assert result['open'].equals(data['open'])
assert 'close' in result.columns
assert 'close_1h' in result.columns
assert result['close'].equals(data['close'])
assert 'volume' in result.columns
assert 'volume_1h' in result.columns
assert result['volume'].equals(data['volume'])
# First 4 rows are empty
assert result.iloc[0]['date_1h'] is pd.NaT
assert result.iloc[1]['date_1h'] is pd.NaT
assert result.iloc[2]['date_1h'] is pd.NaT
assert result.iloc[3]['date_1h'] is pd.NaT
# Next 4 rows contain the starting date (0:00)
assert result.iloc[4]['date_1h'] == result.iloc[0]['date']
assert result.iloc[5]['date_1h'] == result.iloc[0]['date']
assert result.iloc[6]['date_1h'] == result.iloc[0]['date']
assert result.iloc[7]['date_1h'] == result.iloc[0]['date']
# Next 4 rows contain the next Hourly date original date row 4
assert result.iloc[8]['date_1h'] == result.iloc[4]['date']
def test_merge_informative_pair_same():
data = generate_test_data('15m', 40)
informative = generate_test_data('15m', 40)
result = merge_informative_pair(data, informative, '15m', '15m', ffill=True)
assert isinstance(result, pd.DataFrame)
assert len(result) == len(data)
assert 'date' in result.columns
assert result['date'].equals(data['date'])
assert 'date_15m' in result.columns
assert 'open' in result.columns
assert 'open_15m' in result.columns
assert result['open'].equals(data['open'])
assert 'close' in result.columns
assert 'close_15m' in result.columns
assert result['close'].equals(data['close'])
assert 'volume' in result.columns
assert 'volume_15m' in result.columns
assert result['volume'].equals(data['volume'])
# Dates match 1:1
assert result['date_15m'].equals(result['date'])