Add support for download-data "until"
This commit is contained in:
parent
4580127fa8
commit
11d447cd5a
@ -139,8 +139,9 @@ def _load_cached_data_for_updating(
|
|||||||
timeframe: str,
|
timeframe: str,
|
||||||
timerange: Optional[TimeRange],
|
timerange: Optional[TimeRange],
|
||||||
data_handler: IDataHandler,
|
data_handler: IDataHandler,
|
||||||
candle_type: CandleType
|
candle_type: CandleType,
|
||||||
) -> Tuple[DataFrame, Optional[int]]:
|
prepend: bool = False,
|
||||||
|
) -> Tuple[DataFrame, Optional[int], Optional[int]]:
|
||||||
"""
|
"""
|
||||||
Load cached data to download more data.
|
Load cached data to download more data.
|
||||||
If timerange is passed in, checks whether data from an before the stored data will be
|
If timerange is passed in, checks whether data from an before the stored data will be
|
||||||
@ -150,9 +151,12 @@ def _load_cached_data_for_updating(
|
|||||||
Note: Only used by download_pair_history().
|
Note: Only used by download_pair_history().
|
||||||
"""
|
"""
|
||||||
start = None
|
start = None
|
||||||
|
end = None
|
||||||
if timerange:
|
if timerange:
|
||||||
if timerange.starttype == 'date':
|
if timerange.starttype == 'date':
|
||||||
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
|
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
|
||||||
|
if timerange.stoptype == 'date':
|
||||||
|
end = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
|
||||||
|
|
||||||
# Intentionally don't pass timerange in - since we need to load the full dataset.
|
# Intentionally don't pass timerange in - since we need to load the full dataset.
|
||||||
data = data_handler.ohlcv_load(pair, timeframe=timeframe,
|
data = data_handler.ohlcv_load(pair, timeframe=timeframe,
|
||||||
@ -160,14 +164,18 @@ def _load_cached_data_for_updating(
|
|||||||
drop_incomplete=True, warn_no_data=False,
|
drop_incomplete=True, warn_no_data=False,
|
||||||
candle_type=candle_type)
|
candle_type=candle_type)
|
||||||
if not data.empty:
|
if not data.empty:
|
||||||
if start and start < data.iloc[0]['date']:
|
if not prepend and start and start < data.iloc[0]['date']:
|
||||||
# Earlier data than existing data requested, redownload all
|
# Earlier data than existing data requested, redownload all
|
||||||
data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS)
|
data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS)
|
||||||
else:
|
else:
|
||||||
start = data.iloc[-1]['date']
|
if prepend:
|
||||||
|
end = data.iloc[0]['date']
|
||||||
|
else:
|
||||||
|
start = data.iloc[-1]['date']
|
||||||
|
|
||||||
start_ms = int(start.timestamp() * 1000) if start else None
|
start_ms = int(start.timestamp() * 1000) if start else None
|
||||||
return data, start_ms
|
end_ms = int(end.timestamp() * 1000) if end else None
|
||||||
|
return data, start_ms, end_ms
|
||||||
|
|
||||||
|
|
||||||
def _download_pair_history(pair: str, *,
|
def _download_pair_history(pair: str, *,
|
||||||
@ -208,9 +216,12 @@ def _download_pair_history(pair: str, *,
|
|||||||
f'candle type: {candle_type} and store in {datadir}.'
|
f'candle type: {candle_type} and store in {datadir}.'
|
||||||
)
|
)
|
||||||
|
|
||||||
data, since_ms = _load_cached_data_for_updating(pair, timeframe, timerange,
|
data, since_ms, until_ms = _load_cached_data_for_updating(
|
||||||
data_handler=data_handler,
|
pair, timeframe, timerange,
|
||||||
candle_type=candle_type)
|
data_handler=data_handler,
|
||||||
|
candle_type=candle_type,
|
||||||
|
prepend=False)
|
||||||
|
# TODO: Prepend should come from a param
|
||||||
|
|
||||||
logger.debug("Current Start: %s",
|
logger.debug("Current Start: %s",
|
||||||
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
|
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
|
||||||
@ -225,6 +236,7 @@ def _download_pair_history(pair: str, *,
|
|||||||
days=-new_pairs_days).int_timestamp * 1000,
|
days=-new_pairs_days).int_timestamp * 1000,
|
||||||
is_new_pair=data.empty,
|
is_new_pair=data.empty,
|
||||||
candle_type=candle_type,
|
candle_type=candle_type,
|
||||||
|
until_ms=until_ms if until_ms else None
|
||||||
)
|
)
|
||||||
# TODO: Maybe move parsing to exchange class (?)
|
# TODO: Maybe move parsing to exchange class (?)
|
||||||
new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair,
|
new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair,
|
||||||
|
@ -95,6 +95,7 @@ class Binance(Exchange):
|
|||||||
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
|
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
|
||||||
since_ms: int, candle_type: CandleType,
|
since_ms: int, candle_type: CandleType,
|
||||||
is_new_pair: bool = False, raise_: bool = False,
|
is_new_pair: bool = False, raise_: bool = False,
|
||||||
|
until_ms: int = None
|
||||||
) -> Tuple[str, str, str, List]:
|
) -> Tuple[str, str, str, List]:
|
||||||
"""
|
"""
|
||||||
Overwrite to introduce "fast new pair" functionality by detecting the pair's listing date
|
Overwrite to introduce "fast new pair" functionality by detecting the pair's listing date
|
||||||
@ -115,7 +116,8 @@ class Binance(Exchange):
|
|||||||
since_ms=since_ms,
|
since_ms=since_ms,
|
||||||
is_new_pair=is_new_pair,
|
is_new_pair=is_new_pair,
|
||||||
raise_=raise_,
|
raise_=raise_,
|
||||||
candle_type=candle_type
|
candle_type=candle_type,
|
||||||
|
until_ms=until_ms,
|
||||||
)
|
)
|
||||||
|
|
||||||
def funding_fee_cutoff(self, open_date: datetime):
|
def funding_fee_cutoff(self, open_date: datetime):
|
||||||
|
@ -1645,7 +1645,8 @@ class Exchange:
|
|||||||
|
|
||||||
def get_historic_ohlcv(self, pair: str, timeframe: str,
|
def get_historic_ohlcv(self, pair: str, timeframe: str,
|
||||||
since_ms: int, candle_type: CandleType,
|
since_ms: int, candle_type: CandleType,
|
||||||
is_new_pair: bool = False) -> List:
|
is_new_pair: bool = False,
|
||||||
|
until_ms: int = None) -> List:
|
||||||
"""
|
"""
|
||||||
Get candle history using asyncio and returns the list of candles.
|
Get candle history using asyncio and returns the list of candles.
|
||||||
Handles all async work for this.
|
Handles all async work for this.
|
||||||
@ -1653,13 +1654,14 @@ class Exchange:
|
|||||||
:param pair: Pair to download
|
:param pair: Pair to download
|
||||||
:param timeframe: Timeframe to get data for
|
:param timeframe: Timeframe to get data for
|
||||||
:param since_ms: Timestamp in milliseconds to get history from
|
:param since_ms: Timestamp in milliseconds to get history from
|
||||||
|
:param until_ms: Timestamp in milliseconds to get history up to
|
||||||
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
:param candle_type: '', mark, index, premiumIndex, or funding_rate
|
||||||
:return: List with candle (OHLCV) data
|
:return: List with candle (OHLCV) data
|
||||||
"""
|
"""
|
||||||
pair, _, _, data = self.loop.run_until_complete(
|
pair, _, _, data = self.loop.run_until_complete(
|
||||||
self._async_get_historic_ohlcv(pair=pair, timeframe=timeframe,
|
self._async_get_historic_ohlcv(pair=pair, timeframe=timeframe,
|
||||||
since_ms=since_ms, is_new_pair=is_new_pair,
|
since_ms=since_ms, until_ms=until_ms,
|
||||||
candle_type=candle_type))
|
is_new_pair=is_new_pair, candle_type=candle_type))
|
||||||
logger.info(f"Downloaded data for {pair} with length {len(data)}.")
|
logger.info(f"Downloaded data for {pair} with length {len(data)}.")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
@ -1680,6 +1682,7 @@ class Exchange:
|
|||||||
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
|
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
|
||||||
since_ms: int, candle_type: CandleType,
|
since_ms: int, candle_type: CandleType,
|
||||||
is_new_pair: bool = False, raise_: bool = False,
|
is_new_pair: bool = False, raise_: bool = False,
|
||||||
|
until_ms: int = None
|
||||||
) -> Tuple[str, str, str, List]:
|
) -> Tuple[str, str, str, List]:
|
||||||
"""
|
"""
|
||||||
Download historic ohlcv
|
Download historic ohlcv
|
||||||
@ -1695,7 +1698,7 @@ class Exchange:
|
|||||||
)
|
)
|
||||||
input_coroutines = [self._async_get_candle_history(
|
input_coroutines = [self._async_get_candle_history(
|
||||||
pair, timeframe, candle_type, since) for since in
|
pair, timeframe, candle_type, since) for since in
|
||||||
range(since_ms, arrow.utcnow().int_timestamp * 1000, one_call)]
|
range(since_ms, until_ms or (arrow.utcnow().int_timestamp * 1000), one_call)]
|
||||||
|
|
||||||
data: List = []
|
data: List = []
|
||||||
# Chunk requests into batches of 100 to avoid overwelming ccxt Throttling
|
# Chunk requests into batches of 100 to avoid overwelming ccxt Throttling
|
||||||
|
@ -223,42 +223,65 @@ def test_load_cached_data_for_updating(mocker, testdatadir) -> None:
|
|||||||
# timeframe starts earlier than the cached data
|
# timeframe starts earlier than the cached data
|
||||||
# should fully update data
|
# should fully update data
|
||||||
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
|
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
|
||||||
data, start_ts = _load_cached_data_for_updating(
|
data, start_ts, end_ts = _load_cached_data_for_updating(
|
||||||
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
||||||
assert data.empty
|
assert data.empty
|
||||||
assert start_ts == test_data[0][0] - 1000
|
assert start_ts == test_data[0][0] - 1000
|
||||||
|
assert end_ts is None
|
||||||
|
|
||||||
|
# timeframe starts earlier than the cached data - prepending
|
||||||
|
|
||||||
|
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
|
||||||
|
data, start_ts, end_ts = _load_cached_data_for_updating(
|
||||||
|
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT, True)
|
||||||
|
assert_frame_equal(data, test_data_df.iloc[:-1])
|
||||||
|
assert start_ts == test_data[0][0] - 1000
|
||||||
|
assert end_ts == test_data[0][0]
|
||||||
|
|
||||||
# timeframe starts in the center of the cached data
|
# timeframe starts in the center of the cached data
|
||||||
# should return the cached data w/o the last item
|
# should return the cached data w/o the last item
|
||||||
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
|
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
|
||||||
data, start_ts = _load_cached_data_for_updating(
|
data, start_ts, end_ts = _load_cached_data_for_updating(
|
||||||
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
||||||
|
|
||||||
assert_frame_equal(data, test_data_df.iloc[:-1])
|
assert_frame_equal(data, test_data_df.iloc[:-1])
|
||||||
assert test_data[-2][0] <= start_ts < test_data[-1][0]
|
assert test_data[-2][0] <= start_ts < test_data[-1][0]
|
||||||
|
assert end_ts is None
|
||||||
|
|
||||||
# timeframe starts after the cached data
|
# timeframe starts after the cached data
|
||||||
# should return the cached data w/o the last item
|
# should return the cached data w/o the last item
|
||||||
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 100, 0)
|
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 100, 0)
|
||||||
data, start_ts = _load_cached_data_for_updating(
|
data, start_ts, end_ts = _load_cached_data_for_updating(
|
||||||
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
||||||
assert_frame_equal(data, test_data_df.iloc[:-1])
|
assert_frame_equal(data, test_data_df.iloc[:-1])
|
||||||
assert test_data[-2][0] <= start_ts < test_data[-1][0]
|
assert test_data[-2][0] <= start_ts < test_data[-1][0]
|
||||||
|
assert end_ts is None
|
||||||
|
|
||||||
# no datafile exist
|
# no datafile exist
|
||||||
# should return timestamp start time
|
# should return timestamp start time
|
||||||
timerange = TimeRange('date', None, now_ts - 10000, 0)
|
timerange = TimeRange('date', None, now_ts - 10000, 0)
|
||||||
data, start_ts = _load_cached_data_for_updating(
|
data, start_ts, end_ts = _load_cached_data_for_updating(
|
||||||
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
||||||
assert data.empty
|
assert data.empty
|
||||||
assert start_ts == (now_ts - 10000) * 1000
|
assert start_ts == (now_ts - 10000) * 1000
|
||||||
|
assert end_ts is None
|
||||||
|
|
||||||
|
# no datafile exist
|
||||||
|
# should return timestamp start and end time time
|
||||||
|
timerange = TimeRange('date', 'date', now_ts - 1000000, now_ts - 100000)
|
||||||
|
data, start_ts, end_ts = _load_cached_data_for_updating(
|
||||||
|
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
|
||||||
|
assert data.empty
|
||||||
|
assert start_ts == (now_ts - 1000000) * 1000
|
||||||
|
assert end_ts == (now_ts - 100000) * 1000
|
||||||
|
|
||||||
# no datafile exist, no timeframe is set
|
# no datafile exist, no timeframe is set
|
||||||
# should return an empty array and None
|
# should return an empty array and None
|
||||||
data, start_ts = _load_cached_data_for_updating(
|
data, start_ts, end_ts = _load_cached_data_for_updating(
|
||||||
'NONEXIST/BTC', '1m', None, data_handler, CandleType.SPOT)
|
'NONEXIST/BTC', '1m', None, data_handler, CandleType.SPOT)
|
||||||
assert data.empty
|
assert data.empty
|
||||||
assert start_ts is None
|
assert start_ts is None
|
||||||
|
assert end_ts is None
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('candle_type,subdir,file_tail', [
|
@pytest.mark.parametrize('candle_type,subdir,file_tail', [
|
||||||
|
Loading…
Reference in New Issue
Block a user