Added intermediate files to help with progressive downloads

This commit is contained in:
Mohammad Dashti 2021-09-16 08:48:22 -07:00
parent 90ad178932
commit 0439ae0072
3 changed files with 89 additions and 15 deletions

View File

@ -21,6 +21,11 @@ def create_datadir(config: Dict[str, Any], datadir: Optional[str] = None) -> Pat
if not folder.is_dir(): if not folder.is_dir():
folder.mkdir(parents=True) folder.mkdir(parents=True)
logger.info(f'Created data directory: {datadir}') logger.info(f'Created data directory: {datadir}')
intermediate_dir = folder.joinpath('trades-intermediate-parts')
if not intermediate_dir.is_dir():
intermediate_dir.mkdir(parents=True)
logger.info(f'Created intermediate data directory: {intermediate_dir}')
return folder return folder

View File

@ -306,6 +306,7 @@ def _download_trades_history(exchange: Exchange,
since=since, since=since,
until=until, until=until,
from_id=from_id, from_id=from_id,
datadir=data_handler._datadir
) )
trades.extend(new_trades[1]) trades.extend(new_trades[1])
# Remove duplicates to make sure we're not storing data we don't need # Remove duplicates to make sure we're not storing data we don't need

View File

@ -13,6 +13,10 @@ from typing import Any, Dict, List, Optional, Tuple
import arrow import arrow
import ccxt import ccxt
import json
import os
from pathlib import Path
import ccxt.async_support as ccxt_async import ccxt.async_support as ccxt_async
from cachetools import TTLCache from cachetools import TTLCache
from ccxt.base.decimal_to_precision import (ROUND_DOWN, ROUND_UP, TICK_SIZE, TRUNCATE, from ccxt.base.decimal_to_precision import (ROUND_DOWN, ROUND_UP, TICK_SIZE, TRUNCATE,
@ -1369,11 +1373,32 @@ class Exchange:
f'for pair {pair}. Message: {e}') from e f'for pair {pair}. Message: {e}') from e
# Fetch historic trades # Fetch historic trades
def _intermediate_trades_dir(self, datadir: str, pair: str, from_id: int) -> str:
tmpdata_file = os.path.join(datadir, "trades-intermediate-parts")
tmpdata_file = os.path.join(tmpdata_file, pair.replace("/","-"))
tmpdata_file = os.path.join(tmpdata_file, str(int(from_id)//1000000))
Path(tmpdata_file).mkdir(parents=True, exist_ok=True)
tmpdata_file = os.path.join(tmpdata_file, pair.replace("/","-")+"_"+from_id+".json")
return tmpdata_file
@retrier_async
async def _async_fetch_trades_from_file(self, datafile: str) -> List[List]:
# Open a file: file
file = open(datafile, mode='r')
# read all lines at once
json_string = file.read()
# close the file
file.close()
trades_list = json.loads(json_string)
return trades_list
@retrier_async @retrier_async
async def _async_fetch_trades(self, pair: str, async def _async_fetch_trades(self, pair: str,
since: Optional[int] = None, since: Optional[int] = None,
params: Optional[dict] = None) -> List[List]: params: Optional[dict] = None,
datadir: Optional[str] = None) -> List[List]:
""" """
Asyncronously gets trade history using fetch_trades. Asyncronously gets trade history using fetch_trades.
Handles exchange errors, does one call to the exchange. Handles exchange errors, does one call to the exchange.
@ -1385,15 +1410,30 @@ class Exchange:
# fetch trades asynchronously # fetch trades asynchronously
if params: if params:
logger.debug("Fetching trades for pair %s, params: %s ", pair, params) logger.debug("Fetching trades for pair %s, params: %s ", pair, params)
trades = await self._api_async.fetch_trades(pair, params=params, limit=1000) trades = await self._api_async.fetch_trades(pair, params=params, limit=self.batch_size())
else: else:
logger.debug( logger.debug(
"Fetching trades for pair %s, since %s %s...", "Fetching trades for pair %s, since %s %s...",
pair, since, pair, since,
'(' + arrow.get(since // 1000).isoformat() + ') ' if since is not None else '' '(' + arrow.get(since // 1000).isoformat() + ') ' if since is not None else ''
) )
trades = await self._api_async.fetch_trades(pair, since=since, limit=1000) trades = await self._api_async.fetch_trades(pair, since=since, limit=self.batch_size())
return trades_dict_to_list(trades)
trades_list = trades_dict_to_list(trades)
if trades_list and len(trades_list) == self.batch_size() and datadir:
from_id = trades_list[0][1]
tmpdata_file = self._intermediate_trades_dir(datadir, pair, from_id)
json_string = json.dumps(trades_list)
with open(tmpdata_file, "w") as text_file:
text_file.write(json_string)
logger.debug("Cached the intermediate trades in %s", tmpdata_file)
else:
from_id = trades_list[0][1] if trades_list else 0
tmpdata_file = self._intermediate_trades_dir(datadir, pair, from_id)
logger.debug("DID NOT CACHE the intermediate trades in %s with len=%s", tmpdata_file, len(trades_list))
return trades_list
except ccxt.NotSupported as e: except ccxt.NotSupported as e:
raise OperationalException( raise OperationalException(
f'Exchange {self._api.name} does not support fetching historical trade data.' f'Exchange {self._api.name} does not support fetching historical trade data.'
@ -1406,10 +1446,15 @@ class Exchange:
except ccxt.BaseError as e: except ccxt.BaseError as e:
raise OperationalException(f'Could not fetch trade data. Msg: {e}') from e raise OperationalException(f'Could not fetch trade data. Msg: {e}') from e
def batch_size(self) -> int:
return 1000
async def _async_get_trade_history_id(self, pair: str, async def _async_get_trade_history_id(self, pair: str,
until: int, until: int,
since: Optional[int] = None, since: Optional[int] = None,
from_id: Optional[str] = None) -> Tuple[str, List[List]]: from_id: Optional[str] = None,
datadir: Optional[str] = None
) -> Tuple[str, List[List]]:
""" """
Asyncronously gets trade history using fetch_trades Asyncronously gets trade history using fetch_trades
use this when exchange uses id-based iteration (check `self._trades_pagination`) use this when exchange uses id-based iteration (check `self._trades_pagination`)
@ -1428,14 +1473,32 @@ class Exchange:
# of up to an hour. # of up to an hour.
# e.g. Binance returns the "last 1000" candles within a 1h time interval # e.g. Binance returns the "last 1000" candles within a 1h time interval
# - so we will miss the first trades. # - so we will miss the first trades.
t = await self._async_fetch_trades(pair, since=since) t = await self._async_fetch_trades(pair, since=since, datadir=datadir)
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp # DEFAULT_TRADES_COLUMNS: 0 -> timestamp
# DEFAULT_TRADES_COLUMNS: 1 -> id # DEFAULT_TRADES_COLUMNS: 1 -> id
from_id = t[-1][1] from_id = t[-1][1]
trades.extend(t[:-1]) trades.extend(t[:-1])
while True: while True:
t = await self._async_fetch_trades(pair, tmpdata_file = self._intermediate_trades_dir(datadir, pair, from_id)
params={self._trades_pagination_arg: from_id})
t = []
success_cache_read = False
if os.path.isfile(tmpdata_file):
t = await self._async_fetch_trades_from_file(tmpdata_file)
if len(t) < self.batch_size():
success_cache_read = False
logger.debug("Read from cache %s", tmpdata_file)
else:
success_cache_read = True
from_id = t[0][1]
to_id = t[-1][1]
logger.debug("Read from cache %s from %s to %s", tmpdata_file, from_id, to_id)
if not success_cache_read:
t = await self._async_fetch_trades(pair,
params={self._trades_pagination_arg: from_id},
datadir=datadir)
if t: if t:
# Skip last id since its the key for the next call # Skip last id since its the key for the next call
trades.extend(t[:-1]) trades.extend(t[:-1])
@ -1453,7 +1516,9 @@ class Exchange:
return (pair, trades) return (pair, trades)
async def _async_get_trade_history_time(self, pair: str, until: int, async def _async_get_trade_history_time(self, pair: str, until: int,
since: Optional[int] = None) -> Tuple[str, List[List]]: since: Optional[int] = None,
datadir: Optional[str] = None
) -> Tuple[str, List[List]]:
""" """
Asyncronously gets trade history using fetch_trades, Asyncronously gets trade history using fetch_trades,
when the exchange uses time-based iteration (check `self._trades_pagination`) when the exchange uses time-based iteration (check `self._trades_pagination`)
@ -1467,7 +1532,7 @@ class Exchange:
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp # DEFAULT_TRADES_COLUMNS: 0 -> timestamp
# DEFAULT_TRADES_COLUMNS: 1 -> id # DEFAULT_TRADES_COLUMNS: 1 -> id
while True: while True:
t = await self._async_fetch_trades(pair, since=since) t = await self._async_fetch_trades(pair, since=since, datadir=datadir)
if t: if t:
since = t[-1][0] since = t[-1][0]
trades.extend(t) trades.extend(t)
@ -1484,7 +1549,9 @@ class Exchange:
async def _async_get_trade_history(self, pair: str, async def _async_get_trade_history(self, pair: str,
since: Optional[int] = None, since: Optional[int] = None,
until: Optional[int] = None, until: Optional[int] = None,
from_id: Optional[str] = None) -> Tuple[str, List[List]]: from_id: Optional[str] = None,
datadir: Optional[str] = None
) -> Tuple[str, List[List]]:
""" """
Async wrapper handling downloading trades using either time or id based methods. Async wrapper handling downloading trades using either time or id based methods.
""" """
@ -1498,10 +1565,10 @@ class Exchange:
if self._trades_pagination == 'time': if self._trades_pagination == 'time':
return await self._async_get_trade_history_time( return await self._async_get_trade_history_time(
pair=pair, since=since, until=until) pair=pair, since=since, until=until, datadir=datadir)
elif self._trades_pagination == 'id': elif self._trades_pagination == 'id':
return await self._async_get_trade_history_id( return await self._async_get_trade_history_id(
pair=pair, since=since, until=until, from_id=from_id pair=pair, since=since, until=until, from_id=from_id, datadir=datadir
) )
else: else:
raise OperationalException(f"Exchange {self.name} does use neither time, " raise OperationalException(f"Exchange {self.name} does use neither time, "
@ -1510,7 +1577,8 @@ class Exchange:
def get_historic_trades(self, pair: str, def get_historic_trades(self, pair: str,
since: Optional[int] = None, since: Optional[int] = None,
until: Optional[int] = None, until: Optional[int] = None,
from_id: Optional[str] = None) -> Tuple[str, List]: from_id: Optional[str] = None,
datadir: Optional[str] = None) -> Tuple[str, List]:
""" """
Get trade history data using asyncio. Get trade history data using asyncio.
Handles all async work and returns the list of candles. Handles all async work and returns the list of candles.
@ -1526,7 +1594,7 @@ class Exchange:
return asyncio.get_event_loop().run_until_complete( return asyncio.get_event_loop().run_until_complete(
self._async_get_trade_history(pair=pair, since=since, self._async_get_trade_history(pair=pair, since=since,
until=until, from_id=from_id)) until=until, from_id=from_id, datadir=datadir))
def is_exchange_known_ccxt(exchange_name: str, ccxt_module: CcxtModuleType = None) -> bool: def is_exchange_known_ccxt(exchange_name: str, ccxt_module: CcxtModuleType = None) -> bool: