Add support for download-data "until"

This commit is contained in:
Matthias 2022-04-30 15:28:01 +02:00
parent 4580127fa8
commit 11d447cd5a
4 changed files with 58 additions and 18 deletions

View File

@ -139,8 +139,9 @@ def _load_cached_data_for_updating(
timeframe: str, timeframe: str,
timerange: Optional[TimeRange], timerange: Optional[TimeRange],
data_handler: IDataHandler, data_handler: IDataHandler,
candle_type: CandleType candle_type: CandleType,
) -> Tuple[DataFrame, Optional[int]]: prepend: bool = False,
) -> Tuple[DataFrame, Optional[int], Optional[int]]:
""" """
Load cached data to download more data. Load cached data to download more data.
If timerange is passed in, checks whether data from an before the stored data will be If timerange is passed in, checks whether data from an before the stored data will be
@ -150,9 +151,12 @@ def _load_cached_data_for_updating(
Note: Only used by download_pair_history(). Note: Only used by download_pair_history().
""" """
start = None start = None
end = None
if timerange: if timerange:
if timerange.starttype == 'date': if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
if timerange.stoptype == 'date':
end = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
# Intentionally don't pass timerange in - since we need to load the full dataset. # Intentionally don't pass timerange in - since we need to load the full dataset.
data = data_handler.ohlcv_load(pair, timeframe=timeframe, data = data_handler.ohlcv_load(pair, timeframe=timeframe,
@ -160,14 +164,18 @@ def _load_cached_data_for_updating(
drop_incomplete=True, warn_no_data=False, drop_incomplete=True, warn_no_data=False,
candle_type=candle_type) candle_type=candle_type)
if not data.empty: if not data.empty:
if start and start < data.iloc[0]['date']: if not prepend and start and start < data.iloc[0]['date']:
# Earlier data than existing data requested, redownload all # Earlier data than existing data requested, redownload all
data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS) data = DataFrame(columns=DEFAULT_DATAFRAME_COLUMNS)
else: else:
start = data.iloc[-1]['date'] if prepend:
end = data.iloc[0]['date']
else:
start = data.iloc[-1]['date']
start_ms = int(start.timestamp() * 1000) if start else None start_ms = int(start.timestamp() * 1000) if start else None
return data, start_ms end_ms = int(end.timestamp() * 1000) if end else None
return data, start_ms, end_ms
def _download_pair_history(pair: str, *, def _download_pair_history(pair: str, *,
@ -208,9 +216,12 @@ def _download_pair_history(pair: str, *,
f'candle type: {candle_type} and store in {datadir}.' f'candle type: {candle_type} and store in {datadir}.'
) )
data, since_ms = _load_cached_data_for_updating(pair, timeframe, timerange, data, since_ms, until_ms = _load_cached_data_for_updating(
data_handler=data_handler, pair, timeframe, timerange,
candle_type=candle_type) data_handler=data_handler,
candle_type=candle_type,
prepend=False)
# TODO: Prepend should come from a param
logger.debug("Current Start: %s", logger.debug("Current Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None') f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
@ -225,6 +236,7 @@ def _download_pair_history(pair: str, *,
days=-new_pairs_days).int_timestamp * 1000, days=-new_pairs_days).int_timestamp * 1000,
is_new_pair=data.empty, is_new_pair=data.empty,
candle_type=candle_type, candle_type=candle_type,
until_ms=until_ms if until_ms else None
) )
# TODO: Maybe move parsing to exchange class (?) # TODO: Maybe move parsing to exchange class (?)
new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair, new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair,

View File

@ -95,6 +95,7 @@ class Binance(Exchange):
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str, async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
since_ms: int, candle_type: CandleType, since_ms: int, candle_type: CandleType,
is_new_pair: bool = False, raise_: bool = False, is_new_pair: bool = False, raise_: bool = False,
until_ms: int = None
) -> Tuple[str, str, str, List]: ) -> Tuple[str, str, str, List]:
""" """
Overwrite to introduce "fast new pair" functionality by detecting the pair's listing date Overwrite to introduce "fast new pair" functionality by detecting the pair's listing date
@ -115,7 +116,8 @@ class Binance(Exchange):
since_ms=since_ms, since_ms=since_ms,
is_new_pair=is_new_pair, is_new_pair=is_new_pair,
raise_=raise_, raise_=raise_,
candle_type=candle_type candle_type=candle_type,
until_ms=until_ms,
) )
def funding_fee_cutoff(self, open_date: datetime): def funding_fee_cutoff(self, open_date: datetime):

View File

@ -1645,7 +1645,8 @@ class Exchange:
def get_historic_ohlcv(self, pair: str, timeframe: str, def get_historic_ohlcv(self, pair: str, timeframe: str,
since_ms: int, candle_type: CandleType, since_ms: int, candle_type: CandleType,
is_new_pair: bool = False) -> List: is_new_pair: bool = False,
until_ms: int = None) -> List:
""" """
Get candle history using asyncio and returns the list of candles. Get candle history using asyncio and returns the list of candles.
Handles all async work for this. Handles all async work for this.
@ -1653,13 +1654,14 @@ class Exchange:
:param pair: Pair to download :param pair: Pair to download
:param timeframe: Timeframe to get data for :param timeframe: Timeframe to get data for
:param since_ms: Timestamp in milliseconds to get history from :param since_ms: Timestamp in milliseconds to get history from
:param until_ms: Timestamp in milliseconds to get history up to
:param candle_type: '', mark, index, premiumIndex, or funding_rate :param candle_type: '', mark, index, premiumIndex, or funding_rate
:return: List with candle (OHLCV) data :return: List with candle (OHLCV) data
""" """
pair, _, _, data = self.loop.run_until_complete( pair, _, _, data = self.loop.run_until_complete(
self._async_get_historic_ohlcv(pair=pair, timeframe=timeframe, self._async_get_historic_ohlcv(pair=pair, timeframe=timeframe,
since_ms=since_ms, is_new_pair=is_new_pair, since_ms=since_ms, until_ms=until_ms,
candle_type=candle_type)) is_new_pair=is_new_pair, candle_type=candle_type))
logger.info(f"Downloaded data for {pair} with length {len(data)}.") logger.info(f"Downloaded data for {pair} with length {len(data)}.")
return data return data
@ -1680,6 +1682,7 @@ class Exchange:
async def _async_get_historic_ohlcv(self, pair: str, timeframe: str, async def _async_get_historic_ohlcv(self, pair: str, timeframe: str,
since_ms: int, candle_type: CandleType, since_ms: int, candle_type: CandleType,
is_new_pair: bool = False, raise_: bool = False, is_new_pair: bool = False, raise_: bool = False,
until_ms: int = None
) -> Tuple[str, str, str, List]: ) -> Tuple[str, str, str, List]:
""" """
Download historic ohlcv Download historic ohlcv
@ -1695,7 +1698,7 @@ class Exchange:
) )
input_coroutines = [self._async_get_candle_history( input_coroutines = [self._async_get_candle_history(
pair, timeframe, candle_type, since) for since in pair, timeframe, candle_type, since) for since in
range(since_ms, arrow.utcnow().int_timestamp * 1000, one_call)] range(since_ms, until_ms or (arrow.utcnow().int_timestamp * 1000), one_call)]
data: List = [] data: List = []
# Chunk requests into batches of 100 to avoid overwelming ccxt Throttling # Chunk requests into batches of 100 to avoid overwelming ccxt Throttling

View File

@ -223,42 +223,65 @@ def test_load_cached_data_for_updating(mocker, testdatadir) -> None:
# timeframe starts earlier than the cached data # timeframe starts earlier than the cached data
# should fully update data # should fully update data
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0) timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts = _load_cached_data_for_updating( data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT) 'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty assert data.empty
assert start_ts == test_data[0][0] - 1000 assert start_ts == test_data[0][0] - 1000
assert end_ts is None
# timeframe starts earlier than the cached data - prepending
timerange = TimeRange('date', None, test_data[0][0] / 1000 - 1, 0)
data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT, True)
assert_frame_equal(data, test_data_df.iloc[:-1])
assert start_ts == test_data[0][0] - 1000
assert end_ts == test_data[0][0]
# timeframe starts in the center of the cached data # timeframe starts in the center of the cached data
# should return the cached data w/o the last item # should return the cached data w/o the last item
timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0) timerange = TimeRange('date', None, test_data[0][0] / 1000 + 1, 0)
data, start_ts = _load_cached_data_for_updating( data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT) 'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert_frame_equal(data, test_data_df.iloc[:-1]) assert_frame_equal(data, test_data_df.iloc[:-1])
assert test_data[-2][0] <= start_ts < test_data[-1][0] assert test_data[-2][0] <= start_ts < test_data[-1][0]
assert end_ts is None
# timeframe starts after the cached data # timeframe starts after the cached data
# should return the cached data w/o the last item # should return the cached data w/o the last item
timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 100, 0) timerange = TimeRange('date', None, test_data[-1][0] / 1000 + 100, 0)
data, start_ts = _load_cached_data_for_updating( data, start_ts, end_ts = _load_cached_data_for_updating(
'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT) 'UNITTEST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert_frame_equal(data, test_data_df.iloc[:-1]) assert_frame_equal(data, test_data_df.iloc[:-1])
assert test_data[-2][0] <= start_ts < test_data[-1][0] assert test_data[-2][0] <= start_ts < test_data[-1][0]
assert end_ts is None
# no datafile exist # no datafile exist
# should return timestamp start time # should return timestamp start time
timerange = TimeRange('date', None, now_ts - 10000, 0) timerange = TimeRange('date', None, now_ts - 10000, 0)
data, start_ts = _load_cached_data_for_updating( data, start_ts, end_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT) 'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty assert data.empty
assert start_ts == (now_ts - 10000) * 1000 assert start_ts == (now_ts - 10000) * 1000
assert end_ts is None
# no datafile exist
# should return timestamp start and end time time
timerange = TimeRange('date', 'date', now_ts - 1000000, now_ts - 100000)
data, start_ts, end_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', timerange, data_handler, CandleType.SPOT)
assert data.empty
assert start_ts == (now_ts - 1000000) * 1000
assert end_ts == (now_ts - 100000) * 1000
# no datafile exist, no timeframe is set # no datafile exist, no timeframe is set
# should return an empty array and None # should return an empty array and None
data, start_ts = _load_cached_data_for_updating( data, start_ts, end_ts = _load_cached_data_for_updating(
'NONEXIST/BTC', '1m', None, data_handler, CandleType.SPOT) 'NONEXIST/BTC', '1m', None, data_handler, CandleType.SPOT)
assert data.empty assert data.empty
assert start_ts is None assert start_ts is None
assert end_ts is None
@pytest.mark.parametrize('candle_type,subdir,file_tail', [ @pytest.mark.parametrize('candle_type,subdir,file_tail', [