2017-11-18 07:34:32 +00:00
|
|
|
"""
|
2018-12-12 18:57:25 +00:00
|
|
|
Functions to convert data from one format to another
|
2017-11-18 07:34:32 +00:00
|
|
|
"""
|
2018-03-25 19:37:14 +00:00
|
|
|
import logging
|
2019-12-25 14:47:04 +00:00
|
|
|
from datetime import datetime, timezone
|
2019-04-09 09:27:35 +00:00
|
|
|
|
2018-08-05 04:41:06 +00:00
|
|
|
import pandas as pd
|
2018-03-02 15:22:00 +00:00
|
|
|
from pandas import DataFrame, to_datetime
|
2018-03-17 21:44:47 +00:00
|
|
|
|
2019-12-25 14:47:04 +00:00
|
|
|
from freqtrade.configuration.timerange import TimeRange
|
2018-12-30 15:07:47 +00:00
|
|
|
|
2018-03-25 19:37:14 +00:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2019-11-02 19:19:13 +00:00
|
|
|
def parse_ticker_dataframe(ticker: list, timeframe: str, pair: str, *,
|
2019-06-09 12:35:58 +00:00
|
|
|
fill_missing: bool = True,
|
|
|
|
drop_incomplete: bool = True) -> DataFrame:
|
2018-07-10 10:04:37 +00:00
|
|
|
"""
|
2018-12-12 18:57:25 +00:00
|
|
|
Converts a ticker-list (format ccxt.fetch_ohlcv) to a Dataframe
|
2018-11-25 13:40:21 +00:00
|
|
|
:param ticker: ticker list, as returned by exchange.async_get_candle_history
|
2019-11-02 19:19:13 +00:00
|
|
|
:param timeframe: timeframe (e.g. 5m). Used to fill up eventual missing data
|
2019-06-15 11:47:20 +00:00
|
|
|
:param pair: Pair this data is for (used to warn if fillup was necessary)
|
2018-12-31 18:42:14 +00:00
|
|
|
:param fill_missing: fill up missing candles with 0 candles
|
|
|
|
(see ohlcv_fill_up_missing_data for details)
|
2019-06-09 12:35:58 +00:00
|
|
|
:param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
|
2018-07-10 10:04:37 +00:00
|
|
|
:return: DataFrame
|
|
|
|
"""
|
2018-12-11 18:47:48 +00:00
|
|
|
logger.debug("Parsing tickerlist to dataframe")
|
2018-07-10 10:04:37 +00:00
|
|
|
cols = ['date', 'open', 'high', 'low', 'close', 'volume']
|
|
|
|
frame = DataFrame(ticker, columns=cols)
|
|
|
|
|
|
|
|
frame['date'] = to_datetime(frame['date'],
|
|
|
|
unit='ms',
|
|
|
|
utc=True,
|
|
|
|
infer_datetime_format=True)
|
|
|
|
|
2019-02-10 18:52:33 +00:00
|
|
|
# Some exchanges return int values for volume and even for ohlc.
|
2019-02-10 19:23:00 +00:00
|
|
|
# Convert them since TA-LIB indicators used in the strategy assume floats
|
2019-02-10 19:13:40 +00:00
|
|
|
# and fail with exception...
|
|
|
|
frame = frame.astype(dtype={'open': 'float', 'high': 'float', 'low': 'float', 'close': 'float',
|
2019-02-10 19:01:46 +00:00
|
|
|
'volume': 'float'})
|
2019-12-25 12:24:09 +00:00
|
|
|
return clean_ohlcv_dataframe(frame, timeframe, pair,
|
|
|
|
fill_missing=fill_missing,
|
|
|
|
drop_incomplete=drop_incomplete)
|
2019-02-10 18:52:33 +00:00
|
|
|
|
2019-12-25 12:24:09 +00:00
|
|
|
|
|
|
|
def clean_ohlcv_dataframe(data: DataFrame, timeframe: str, pair: str, *,
|
|
|
|
fill_missing: bool = True,
|
|
|
|
drop_incomplete: bool = True) -> DataFrame:
|
|
|
|
"""
|
|
|
|
Clense a ohlcv dataframe by
|
|
|
|
* Grouping it by date (removes duplicate tics)
|
|
|
|
* dropping last candles if requested
|
|
|
|
* Filling up missing data (if requested)
|
|
|
|
:param data: DataFrame containing ohlcv data.
|
|
|
|
:param timeframe: timeframe (e.g. 5m). Used to fill up eventual missing data
|
|
|
|
:param pair: Pair this data is for (used to warn if fillup was necessary)
|
|
|
|
:param fill_missing: fill up missing candles with 0 candles
|
|
|
|
(see ohlcv_fill_up_missing_data for details)
|
|
|
|
:param drop_incomplete: Drop the last candle of the dataframe, assuming it's incomplete
|
|
|
|
:return: DataFrame
|
|
|
|
"""
|
2018-07-10 10:04:37 +00:00
|
|
|
# group by index and aggregate results to eliminate duplicate ticks
|
2019-12-25 12:24:09 +00:00
|
|
|
data = data.groupby(by='date', as_index=False, sort=True).agg({
|
2018-07-10 10:04:37 +00:00
|
|
|
'open': 'first',
|
|
|
|
'high': 'max',
|
|
|
|
'low': 'min',
|
|
|
|
'close': 'last',
|
|
|
|
'volume': 'max',
|
|
|
|
})
|
2019-06-09 12:35:58 +00:00
|
|
|
# eliminate partial candle
|
|
|
|
if drop_incomplete:
|
2019-12-25 12:24:09 +00:00
|
|
|
data.drop(data.tail(1).index, inplace=True)
|
2019-06-09 12:35:58 +00:00
|
|
|
logger.debug('Dropping last candle')
|
2018-12-31 18:13:34 +00:00
|
|
|
|
|
|
|
if fill_missing:
|
2019-12-25 12:24:09 +00:00
|
|
|
return ohlcv_fill_up_missing_data(data, timeframe, pair)
|
2018-12-31 18:13:34 +00:00
|
|
|
else:
|
2019-12-25 12:24:09 +00:00
|
|
|
return data
|
2018-08-05 04:41:06 +00:00
|
|
|
|
|
|
|
|
2019-11-02 19:19:13 +00:00
|
|
|
def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str) -> DataFrame:
|
2018-12-30 15:07:47 +00:00
|
|
|
"""
|
|
|
|
Fills up missing data with 0 volume rows,
|
|
|
|
using the previous close as price for "open", "high" "low" and "close", volume is set to 0
|
|
|
|
|
|
|
|
"""
|
2019-04-09 09:27:35 +00:00
|
|
|
from freqtrade.exchange import timeframe_to_minutes
|
|
|
|
|
2018-12-30 15:07:47 +00:00
|
|
|
ohlc_dict = {
|
|
|
|
'open': 'first',
|
|
|
|
'high': 'max',
|
|
|
|
'low': 'min',
|
|
|
|
'close': 'last',
|
|
|
|
'volume': 'sum'
|
|
|
|
}
|
2019-11-02 19:19:13 +00:00
|
|
|
ticker_minutes = timeframe_to_minutes(timeframe)
|
2018-12-30 15:07:47 +00:00
|
|
|
# Resample to create "NAN" values
|
2019-04-04 17:56:40 +00:00
|
|
|
df = dataframe.resample(f'{ticker_minutes}min', on='date').agg(ohlc_dict)
|
2018-12-30 15:07:47 +00:00
|
|
|
|
|
|
|
# Forwardfill close for missing columns
|
|
|
|
df['close'] = df['close'].fillna(method='ffill')
|
|
|
|
# Use close for "open, high, low"
|
|
|
|
df.loc[:, ['open', 'high', 'low']] = df[['open', 'high', 'low']].fillna(
|
|
|
|
value={'open': df['close'],
|
|
|
|
'high': df['close'],
|
|
|
|
'low': df['close'],
|
|
|
|
})
|
|
|
|
df.reset_index(inplace=True)
|
2019-06-15 11:31:14 +00:00
|
|
|
len_before = len(dataframe)
|
|
|
|
len_after = len(df)
|
|
|
|
if len_before != len_after:
|
2019-06-15 11:46:19 +00:00
|
|
|
logger.info(f"Missing data fillup for {pair}: before: {len_before} - after: {len_after}")
|
2018-12-30 15:07:47 +00:00
|
|
|
return df
|
|
|
|
|
|
|
|
|
2019-12-25 14:47:04 +00:00
|
|
|
def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
|
|
|
|
"""
|
|
|
|
Trim dataframe based on given timerange
|
|
|
|
:param df: Dataframe to trim
|
|
|
|
:param timerange: timerange (use start and end date if available)
|
|
|
|
:param: df_date_col: Column in the dataframe to use as Date column
|
|
|
|
:return: trimmed dataframe
|
|
|
|
"""
|
|
|
|
if timerange.starttype == 'date':
|
|
|
|
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
|
|
|
|
df = df.loc[df[df_date_col] >= start, :]
|
|
|
|
if timerange.stoptype == 'date':
|
|
|
|
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
|
|
|
|
df = df.loc[df[df_date_col] <= stop, :]
|
|
|
|
return df
|
|
|
|
|
|
|
|
|
2018-08-05 13:08:07 +00:00
|
|
|
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
|
2018-08-05 04:41:06 +00:00
|
|
|
"""
|
|
|
|
Gets order book list, returns dataframe with below format per suggested by creslin
|
|
|
|
-------------------------------------------------------------------
|
|
|
|
b_sum b_size bids asks a_size a_sum
|
|
|
|
-------------------------------------------------------------------
|
|
|
|
"""
|
|
|
|
cols = ['bids', 'b_size']
|
2018-08-05 13:08:07 +00:00
|
|
|
|
|
|
|
bids_frame = DataFrame(bids, columns=cols)
|
2018-08-05 04:41:06 +00:00
|
|
|
# add cumulative sum column
|
|
|
|
bids_frame['b_sum'] = bids_frame['b_size'].cumsum()
|
|
|
|
cols2 = ['asks', 'a_size']
|
2018-08-05 13:08:07 +00:00
|
|
|
asks_frame = DataFrame(asks, columns=cols2)
|
2018-08-05 04:41:06 +00:00
|
|
|
# add cumulative sum column
|
|
|
|
asks_frame['a_sum'] = asks_frame['a_size'].cumsum()
|
|
|
|
|
|
|
|
frame = pd.concat([bids_frame['b_sum'], bids_frame['b_size'], bids_frame['bids'],
|
|
|
|
asks_frame['asks'], asks_frame['a_size'], asks_frame['a_sum']], axis=1,
|
|
|
|
keys=['b_sum', 'b_size', 'bids', 'asks', 'a_size', 'a_sum'])
|
|
|
|
# logger.info('order book %s', frame )
|
|
|
|
return frame
|
2019-10-13 17:21:27 +00:00
|
|
|
|
|
|
|
|
2019-10-14 04:19:59 +00:00
|
|
|
def trades_to_ohlcv(trades: list, timeframe: str) -> list:
|
2019-10-13 17:21:27 +00:00
|
|
|
"""
|
|
|
|
Converts trades list to ohlcv list
|
2019-10-14 04:19:59 +00:00
|
|
|
:param trades: List of trades, as returned by ccxt.fetch_trades.
|
|
|
|
:param timeframe: Ticker timeframe to resample data to
|
|
|
|
:return: ohlcv timeframe as list (as returned by ccxt.fetch_ohlcv)
|
2019-10-13 17:21:27 +00:00
|
|
|
"""
|
|
|
|
from freqtrade.exchange import timeframe_to_minutes
|
|
|
|
ticker_minutes = timeframe_to_minutes(timeframe)
|
|
|
|
df = pd.DataFrame(trades)
|
|
|
|
df['datetime'] = pd.to_datetime(df['datetime'])
|
|
|
|
df = df.set_index('datetime')
|
|
|
|
|
2019-10-14 04:19:59 +00:00
|
|
|
df_new = df['price'].resample(f'{ticker_minutes}min').ohlc()
|
2019-10-13 17:21:27 +00:00
|
|
|
df_new['volume'] = df['amount'].resample(f'{ticker_minutes}min').sum()
|
|
|
|
df_new['date'] = df_new.index.astype("int64") // 10 ** 6
|
2019-10-14 04:19:59 +00:00
|
|
|
# Drop 0 volume rows
|
2019-10-13 17:21:27 +00:00
|
|
|
df_new = df_new.dropna()
|
|
|
|
columns = ["date", "open", "high", "low", "close", "volume"]
|
|
|
|
return list(zip(*[df_new[x].values.tolist() for x in columns]))
|