stable/freqtrade/data/converter.py

116 lines
4.2 KiB
Python
Raw Normal View History

2017-11-18 07:34:32 +00:00
"""
2018-12-12 18:57:25 +00:00
Functions to convert data from one format to another
2017-11-18 07:34:32 +00:00
"""
2018-03-25 19:37:14 +00:00
import logging
2018-08-05 04:41:06 +00:00
import pandas as pd
2018-03-02 15:22:00 +00:00
from pandas import DataFrame, to_datetime
2018-03-17 21:44:47 +00:00
2018-12-30 15:07:47 +00:00
2018-03-25 19:37:14 +00:00
logger = logging.getLogger(__name__)
2019-06-09 12:35:58 +00:00
def parse_ticker_dataframe(ticker: list, ticker_interval: str, *,
fill_missing: bool = True,
drop_incomplete: bool = True) -> DataFrame:
"""
2018-12-12 18:57:25 +00:00
Converts a ticker-list (format ccxt.fetch_ohlcv) to a Dataframe
2018-11-25 13:40:21 +00:00
:param ticker: ticker list, as returned by exchange.async_get_candle_history
:param ticker_interval: ticker_interval (e.g. 5m). Used to fill up eventual missing data
2018-12-31 18:42:14 +00:00
:param fill_missing: fill up missing candles with 0 candles
(see ohlcv_fill_up_missing_data for details)
2019-06-09 12:35:58 +00:00
:param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
:return: DataFrame
"""
2018-12-11 18:47:48 +00:00
logger.debug("Parsing tickerlist to dataframe")
cols = ['date', 'open', 'high', 'low', 'close', 'volume']
frame = DataFrame(ticker, columns=cols)
frame['date'] = to_datetime(frame['date'],
unit='ms',
utc=True,
infer_datetime_format=True)
# Some exchanges return int values for volume and even for ohlc.
2019-02-10 19:23:00 +00:00
# Convert them since TA-LIB indicators used in the strategy assume floats
2019-02-10 19:13:40 +00:00
# and fail with exception...
frame = frame.astype(dtype={'open': 'float', 'high': 'float', 'low': 'float', 'close': 'float',
2019-02-10 19:01:46 +00:00
'volume': 'float'})
# group by index and aggregate results to eliminate duplicate ticks
frame = frame.groupby(by='date', as_index=False, sort=True).agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'max',
})
2019-06-09 12:35:58 +00:00
# eliminate partial candle
if drop_incomplete:
frame.drop(frame.tail(1).index, inplace=True)
logger.debug('Dropping last candle')
if fill_missing:
return ohlcv_fill_up_missing_data(frame, ticker_interval)
else:
return frame
2018-08-05 04:41:06 +00:00
2018-12-30 15:07:47 +00:00
def ohlcv_fill_up_missing_data(dataframe: DataFrame, ticker_interval: str) -> DataFrame:
"""
Fills up missing data with 0 volume rows,
using the previous close as price for "open", "high" "low" and "close", volume is set to 0
"""
from freqtrade.exchange import timeframe_to_minutes
2018-12-30 15:07:47 +00:00
ohlc_dict = {
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum'
}
ticker_minutes = timeframe_to_minutes(ticker_interval)
2018-12-30 15:07:47 +00:00
# Resample to create "NAN" values
df = dataframe.resample(f'{ticker_minutes}min', on='date').agg(ohlc_dict)
2018-12-30 15:07:47 +00:00
# Forwardfill close for missing columns
df['close'] = df['close'].fillna(method='ffill')
# Use close for "open, high, low"
df.loc[:, ['open', 'high', 'low']] = df[['open', 'high', 'low']].fillna(
value={'open': df['close'],
'high': df['close'],
'low': df['close'],
})
df.reset_index(inplace=True)
2019-06-15 11:31:14 +00:00
len_before = len(dataframe)
len_after = len(df)
if len_before != len_after:
logger.info(f"Missing data fillup: before: {len_before} - after: {len_after}")
2018-12-30 15:07:47 +00:00
return df
2018-08-05 13:08:07 +00:00
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
2018-08-05 04:41:06 +00:00
"""
Gets order book list, returns dataframe with below format per suggested by creslin
-------------------------------------------------------------------
b_sum b_size bids asks a_size a_sum
-------------------------------------------------------------------
"""
cols = ['bids', 'b_size']
2018-08-05 13:08:07 +00:00
bids_frame = DataFrame(bids, columns=cols)
2018-08-05 04:41:06 +00:00
# add cumulative sum column
bids_frame['b_sum'] = bids_frame['b_size'].cumsum()
cols2 = ['asks', 'a_size']
2018-08-05 13:08:07 +00:00
asks_frame = DataFrame(asks, columns=cols2)
2018-08-05 04:41:06 +00:00
# add cumulative sum column
asks_frame['a_sum'] = asks_frame['a_size'].cumsum()
frame = pd.concat([bids_frame['b_sum'], bids_frame['b_size'], bids_frame['bids'],
asks_frame['asks'], asks_frame['a_size'], asks_frame['a_sum']], axis=1,
keys=['b_sum', 'b_size', 'bids', 'asks', 'a_size', 'a_sum'])
# logger.info('order book %s', frame )
return frame