Add documentation for merge_informative_pair helper

This commit is contained in:
Matthias 2020-09-04 20:02:31 +02:00
parent bd4f3d838a
commit 7bc8927914
4 changed files with 71 additions and 23 deletions

View File

@ -483,9 +483,8 @@ if self.dp:
### Complete Data-provider sample ### Complete Data-provider sample
```python ```python
from freqtrade.strategy import IStrategy, timeframe_to_minutes from freqtrade.strategy import IStrategy, merge_informative_pairs
from pandas import DataFrame from pandas import DataFrame
import pandas as pd
class SampleStrategy(IStrategy): class SampleStrategy(IStrategy):
# strategy init stuff... # strategy init stuff...
@ -517,23 +516,12 @@ class SampleStrategy(IStrategy):
# Get the 14 day rsi # Get the 14 day rsi
informative['rsi'] = ta.RSI(informative, timeperiod=14) informative['rsi'] = ta.RSI(informative, timeperiod=14)
# Rename columns to be unique # Use the helper function merge_informative_pair to safely merge the pair
informative.columns = [f"{col}_{inf_tf}" for col in informative.columns] # Automatically renames the columns and merges a shorter timeframe dataframe and a longer timeframe informative pair
# Assuming inf_tf = '1d' - then the columns will now be:
# date_1d, open_1d, high_1d, low_1d, close_1d, rsi_1d
# Shift date by 1 candle
# This is necessary since the data is always the "open date"
# and a 15m candle starting at 12:15 should not know the close of the 1h candle from 12:00 to 13:00
minutes = timeframe_to_minutes(inf_tf)
informative['date_merge'] = informative["date"] + pd.to_timedelta(minutes, 'm')
# Combine the 2 dataframes
# all indicators on the informative sample MUST be calculated before this point
dataframe = pd.merge(dataframe, informative, left_on='date', right_on=f'date_merge_{inf_tf}', how='left')
# FFill to have the 1d value available in every row throughout the day. # FFill to have the 1d value available in every row throughout the day.
# Without this, comparisons would only work once per day. # Without this, comparisons would only work once per day.
dataframe = dataframe.ffill() # Full documentation of this method, see below
dataframe = merge_informative_pair(dataframe, informative_pairs, inf_tf, ffill=True)
# Calculate rsi of the original dataframe (5m timeframe) # Calculate rsi of the original dataframe (5m timeframe)
dataframe['rsi'] = ta.RSI(dataframe, timeperiod=14) dataframe['rsi'] = ta.RSI(dataframe, timeperiod=14)
@ -557,6 +545,63 @@ class SampleStrategy(IStrategy):
*** ***
## Helper functions
### *merge_informative_pair()*
This method helps you merge an informative pair to a regular dataframe without lookahead bias.
It's there to help you merge the dataframe in a safe and consistent way.
Options:
- Rename the columns for you to create unique columns
- Merge the dataframe without lookahead bias
- Forward-fill (optional)
All columns of the informative dataframe will be available on the returning dataframe in a renamed fashion:
!!! Example "Column renaming"
Assuming `inf_tf = '1d'` the resulting columns will be:
``` python
'date', 'open', 'high', 'low', 'close', 'rsi' # from the original dataframe
'date_1d', 'open_1d', 'high_1d', 'low_1d', 'close_1d', 'rsi_1d' # from the informative dataframe
```
??? Example "Column renaming - 1h"
Assuming `inf_tf = '1h'` the resulting columns will be:
``` python
'date', 'open', 'high', 'low', 'close', 'rsi' # from the original dataframe
'date_1h', 'open_1h', 'high_1h', 'low_1h', 'close_1h', 'rsi_1h' # from the informative dataframe
```
??? Example "Custom implementation"
A custom implementation for this is possible, and can be done as follows:
``` python
# Rename columns to be unique
informative.columns = [f"{col}_{inf_tf}" for col in informative.columns]
# Assuming inf_tf = '1d' - then the columns will now be:
# date_1d, open_1d, high_1d, low_1d, close_1d, rsi_1d
# Shift date by 1 candle
# This is necessary since the data is always the "open date"
# and a 15m candle starting at 12:15 should not know the close of the 1h candle from 12:00 to 13:00
minutes = timeframe_to_minutes(inf_tf)
informative['date_merge'] = informative["date"] + pd.to_timedelta(minutes, 'm')
# Combine the 2 dataframes
# all indicators on the informative sample MUST be calculated before this point
dataframe = pd.merge(dataframe, informative, left_on='date', right_on=f'date_merge_{inf_tf}', how='left')
# FFill to have the 1d value available in every row throughout the day.
# Without this, comparisons would only work once per day.
dataframe = dataframe.ffill()
```
***
## Additional data (Wallets) ## Additional data (Wallets)
The strategy provides access to the `Wallets` object. This contains the current balances on the exchange. The strategy provides access to the `Wallets` object. This contains the current balances on the exchange.

View File

@ -2,4 +2,4 @@
from freqtrade.exchange import (timeframe_to_minutes, timeframe_to_prev_date, from freqtrade.exchange import (timeframe_to_minutes, timeframe_to_prev_date,
timeframe_to_seconds, timeframe_to_next_date, timeframe_to_msecs) timeframe_to_seconds, timeframe_to_next_date, timeframe_to_msecs)
from freqtrade.strategy.interface import IStrategy from freqtrade.strategy.interface import IStrategy
from freqtrade.strategy.strategy_helper import merge_informative_pairs from freqtrade.strategy.strategy_helper import merge_informative_pair

View File

@ -2,7 +2,7 @@ import pandas as pd
from freqtrade.exchange import timeframe_to_minutes from freqtrade.exchange import timeframe_to_minutes
def merge_informative_pairs(dataframe: pd.DataFrame, informative: pd.DataFrame, def merge_informative_pair(dataframe: pd.DataFrame, informative: pd.DataFrame,
timeframe_inf: str, ffill: bool = True) -> pd.DataFrame: timeframe_inf: str, ffill: bool = True) -> pd.DataFrame:
""" """
Correctly merge informative samples to the original dataframe, avoiding lookahead bias. Correctly merge informative samples to the original dataframe, avoiding lookahead bias.
@ -15,6 +15,9 @@ def merge_informative_pairs(dataframe: pd.DataFrame, informative: pd.DataFrame,
This way, the 14:00 1h candle is merged to 15:00 15m candle, since the 14:00 1h candle is the This way, the 14:00 1h candle is merged to 15:00 15m candle, since the 14:00 1h candle is the
last candle that's closed at 15:00, 15:15, 15:30 or 15:45. last candle that's closed at 15:00, 15:15, 15:30 or 15:45.
Assuming inf_tf = '1d' - then the resulting columns will be:
date_1d, open_1d, high_1d, low_1d, close_1d, rsi_1d
:param dataframe: Original dataframe :param dataframe: Original dataframe
:param informative: Informative pair, most likely loaded via dp.get_pair_dataframe :param informative: Informative pair, most likely loaded via dp.get_pair_dataframe
:param timeframe_inf: Timeframe of the informative pair sample. :param timeframe_inf: Timeframe of the informative pair sample.

View File

@ -1,7 +1,7 @@
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from freqtrade.strategy import merge_informative_pairs, timeframe_to_minutes from freqtrade.strategy import merge_informative_pair, timeframe_to_minutes
def generate_test_data(timeframe: str, size: int): def generate_test_data(timeframe: str, size: int):
@ -24,11 +24,11 @@ def generate_test_data(timeframe: str, size: int):
return df return df
def test_merge_informative_pairs(): def test_merge_informative_pair():
data = generate_test_data('15m', 40) data = generate_test_data('15m', 40)
informative = generate_test_data('1h', 40) informative = generate_test_data('1h', 40)
result = merge_informative_pairs(data, informative, '1h', ffill=True) result = merge_informative_pair(data, informative, '1h', ffill=True)
assert isinstance(result, pd.DataFrame) assert isinstance(result, pd.DataFrame)
assert len(result) == len(data) assert len(result) == len(data)
assert 'date' in result.columns assert 'date' in result.columns