Added intermediate files to help with progressive downloads
This commit is contained in:
parent
90ad178932
commit
0439ae0072
@ -21,6 +21,11 @@ def create_datadir(config: Dict[str, Any], datadir: Optional[str] = None) -> Pat
|
|||||||
if not folder.is_dir():
|
if not folder.is_dir():
|
||||||
folder.mkdir(parents=True)
|
folder.mkdir(parents=True)
|
||||||
logger.info(f'Created data directory: {datadir}')
|
logger.info(f'Created data directory: {datadir}')
|
||||||
|
|
||||||
|
intermediate_dir = folder.joinpath('trades-intermediate-parts')
|
||||||
|
if not intermediate_dir.is_dir():
|
||||||
|
intermediate_dir.mkdir(parents=True)
|
||||||
|
logger.info(f'Created intermediate data directory: {intermediate_dir}')
|
||||||
return folder
|
return folder
|
||||||
|
|
||||||
|
|
||||||
|
@ -306,6 +306,7 @@ def _download_trades_history(exchange: Exchange,
|
|||||||
since=since,
|
since=since,
|
||||||
until=until,
|
until=until,
|
||||||
from_id=from_id,
|
from_id=from_id,
|
||||||
|
datadir=data_handler._datadir
|
||||||
)
|
)
|
||||||
trades.extend(new_trades[1])
|
trades.extend(new_trades[1])
|
||||||
# Remove duplicates to make sure we're not storing data we don't need
|
# Remove duplicates to make sure we're not storing data we don't need
|
||||||
|
@ -13,6 +13,10 @@ from typing import Any, Dict, List, Optional, Tuple
|
|||||||
|
|
||||||
import arrow
|
import arrow
|
||||||
import ccxt
|
import ccxt
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import ccxt.async_support as ccxt_async
|
import ccxt.async_support as ccxt_async
|
||||||
from cachetools import TTLCache
|
from cachetools import TTLCache
|
||||||
from ccxt.base.decimal_to_precision import (ROUND_DOWN, ROUND_UP, TICK_SIZE, TRUNCATE,
|
from ccxt.base.decimal_to_precision import (ROUND_DOWN, ROUND_UP, TICK_SIZE, TRUNCATE,
|
||||||
@ -1369,11 +1373,32 @@ class Exchange:
|
|||||||
f'for pair {pair}. Message: {e}') from e
|
f'for pair {pair}. Message: {e}') from e
|
||||||
|
|
||||||
# Fetch historic trades
|
# Fetch historic trades
|
||||||
|
def _intermediate_trades_dir(self, datadir: str, pair: str, from_id: int) -> str:
|
||||||
|
tmpdata_file = os.path.join(datadir, "trades-intermediate-parts")
|
||||||
|
tmpdata_file = os.path.join(tmpdata_file, pair.replace("/","-"))
|
||||||
|
tmpdata_file = os.path.join(tmpdata_file, str(int(from_id)//1000000))
|
||||||
|
Path(tmpdata_file).mkdir(parents=True, exist_ok=True)
|
||||||
|
tmpdata_file = os.path.join(tmpdata_file, pair.replace("/","-")+"_"+from_id+".json")
|
||||||
|
return tmpdata_file
|
||||||
|
|
||||||
|
@retrier_async
|
||||||
|
async def _async_fetch_trades_from_file(self, datafile: str) -> List[List]:
|
||||||
|
# Open a file: file
|
||||||
|
file = open(datafile, mode='r')
|
||||||
|
|
||||||
|
# read all lines at once
|
||||||
|
json_string = file.read()
|
||||||
|
|
||||||
|
# close the file
|
||||||
|
file.close()
|
||||||
|
trades_list = json.loads(json_string)
|
||||||
|
return trades_list
|
||||||
|
|
||||||
@retrier_async
|
@retrier_async
|
||||||
async def _async_fetch_trades(self, pair: str,
|
async def _async_fetch_trades(self, pair: str,
|
||||||
since: Optional[int] = None,
|
since: Optional[int] = None,
|
||||||
params: Optional[dict] = None) -> List[List]:
|
params: Optional[dict] = None,
|
||||||
|
datadir: Optional[str] = None) -> List[List]:
|
||||||
"""
|
"""
|
||||||
Asyncronously gets trade history using fetch_trades.
|
Asyncronously gets trade history using fetch_trades.
|
||||||
Handles exchange errors, does one call to the exchange.
|
Handles exchange errors, does one call to the exchange.
|
||||||
@ -1385,15 +1410,30 @@ class Exchange:
|
|||||||
# fetch trades asynchronously
|
# fetch trades asynchronously
|
||||||
if params:
|
if params:
|
||||||
logger.debug("Fetching trades for pair %s, params: %s ", pair, params)
|
logger.debug("Fetching trades for pair %s, params: %s ", pair, params)
|
||||||
trades = await self._api_async.fetch_trades(pair, params=params, limit=1000)
|
trades = await self._api_async.fetch_trades(pair, params=params, limit=self.batch_size())
|
||||||
else:
|
else:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Fetching trades for pair %s, since %s %s...",
|
"Fetching trades for pair %s, since %s %s...",
|
||||||
pair, since,
|
pair, since,
|
||||||
'(' + arrow.get(since // 1000).isoformat() + ') ' if since is not None else ''
|
'(' + arrow.get(since // 1000).isoformat() + ') ' if since is not None else ''
|
||||||
)
|
)
|
||||||
trades = await self._api_async.fetch_trades(pair, since=since, limit=1000)
|
trades = await self._api_async.fetch_trades(pair, since=since, limit=self.batch_size())
|
||||||
return trades_dict_to_list(trades)
|
|
||||||
|
|
||||||
|
trades_list = trades_dict_to_list(trades)
|
||||||
|
if trades_list and len(trades_list) == self.batch_size() and datadir:
|
||||||
|
from_id = trades_list[0][1]
|
||||||
|
tmpdata_file = self._intermediate_trades_dir(datadir, pair, from_id)
|
||||||
|
|
||||||
|
json_string = json.dumps(trades_list)
|
||||||
|
with open(tmpdata_file, "w") as text_file:
|
||||||
|
text_file.write(json_string)
|
||||||
|
logger.debug("Cached the intermediate trades in %s", tmpdata_file)
|
||||||
|
else:
|
||||||
|
from_id = trades_list[0][1] if trades_list else 0
|
||||||
|
tmpdata_file = self._intermediate_trades_dir(datadir, pair, from_id)
|
||||||
|
logger.debug("DID NOT CACHE the intermediate trades in %s with len=%s", tmpdata_file, len(trades_list))
|
||||||
|
return trades_list
|
||||||
except ccxt.NotSupported as e:
|
except ccxt.NotSupported as e:
|
||||||
raise OperationalException(
|
raise OperationalException(
|
||||||
f'Exchange {self._api.name} does not support fetching historical trade data.'
|
f'Exchange {self._api.name} does not support fetching historical trade data.'
|
||||||
@ -1406,10 +1446,15 @@ class Exchange:
|
|||||||
except ccxt.BaseError as e:
|
except ccxt.BaseError as e:
|
||||||
raise OperationalException(f'Could not fetch trade data. Msg: {e}') from e
|
raise OperationalException(f'Could not fetch trade data. Msg: {e}') from e
|
||||||
|
|
||||||
|
def batch_size(self) -> int:
|
||||||
|
return 1000
|
||||||
|
|
||||||
async def _async_get_trade_history_id(self, pair: str,
|
async def _async_get_trade_history_id(self, pair: str,
|
||||||
until: int,
|
until: int,
|
||||||
since: Optional[int] = None,
|
since: Optional[int] = None,
|
||||||
from_id: Optional[str] = None) -> Tuple[str, List[List]]:
|
from_id: Optional[str] = None,
|
||||||
|
datadir: Optional[str] = None
|
||||||
|
) -> Tuple[str, List[List]]:
|
||||||
"""
|
"""
|
||||||
Asyncronously gets trade history using fetch_trades
|
Asyncronously gets trade history using fetch_trades
|
||||||
use this when exchange uses id-based iteration (check `self._trades_pagination`)
|
use this when exchange uses id-based iteration (check `self._trades_pagination`)
|
||||||
@ -1428,14 +1473,32 @@ class Exchange:
|
|||||||
# of up to an hour.
|
# of up to an hour.
|
||||||
# e.g. Binance returns the "last 1000" candles within a 1h time interval
|
# e.g. Binance returns the "last 1000" candles within a 1h time interval
|
||||||
# - so we will miss the first trades.
|
# - so we will miss the first trades.
|
||||||
t = await self._async_fetch_trades(pair, since=since)
|
t = await self._async_fetch_trades(pair, since=since, datadir=datadir)
|
||||||
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp
|
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp
|
||||||
# DEFAULT_TRADES_COLUMNS: 1 -> id
|
# DEFAULT_TRADES_COLUMNS: 1 -> id
|
||||||
from_id = t[-1][1]
|
from_id = t[-1][1]
|
||||||
trades.extend(t[:-1])
|
trades.extend(t[:-1])
|
||||||
while True:
|
while True:
|
||||||
t = await self._async_fetch_trades(pair,
|
tmpdata_file = self._intermediate_trades_dir(datadir, pair, from_id)
|
||||||
params={self._trades_pagination_arg: from_id})
|
|
||||||
|
t = []
|
||||||
|
success_cache_read = False
|
||||||
|
if os.path.isfile(tmpdata_file):
|
||||||
|
t = await self._async_fetch_trades_from_file(tmpdata_file)
|
||||||
|
|
||||||
|
if len(t) < self.batch_size():
|
||||||
|
success_cache_read = False
|
||||||
|
logger.debug("Read from cache %s", tmpdata_file)
|
||||||
|
else:
|
||||||
|
success_cache_read = True
|
||||||
|
from_id = t[0][1]
|
||||||
|
to_id = t[-1][1]
|
||||||
|
logger.debug("Read from cache %s from %s to %s", tmpdata_file, from_id, to_id)
|
||||||
|
|
||||||
|
if not success_cache_read:
|
||||||
|
t = await self._async_fetch_trades(pair,
|
||||||
|
params={self._trades_pagination_arg: from_id},
|
||||||
|
datadir=datadir)
|
||||||
if t:
|
if t:
|
||||||
# Skip last id since its the key for the next call
|
# Skip last id since its the key for the next call
|
||||||
trades.extend(t[:-1])
|
trades.extend(t[:-1])
|
||||||
@ -1453,7 +1516,9 @@ class Exchange:
|
|||||||
return (pair, trades)
|
return (pair, trades)
|
||||||
|
|
||||||
async def _async_get_trade_history_time(self, pair: str, until: int,
|
async def _async_get_trade_history_time(self, pair: str, until: int,
|
||||||
since: Optional[int] = None) -> Tuple[str, List[List]]:
|
since: Optional[int] = None,
|
||||||
|
datadir: Optional[str] = None
|
||||||
|
) -> Tuple[str, List[List]]:
|
||||||
"""
|
"""
|
||||||
Asyncronously gets trade history using fetch_trades,
|
Asyncronously gets trade history using fetch_trades,
|
||||||
when the exchange uses time-based iteration (check `self._trades_pagination`)
|
when the exchange uses time-based iteration (check `self._trades_pagination`)
|
||||||
@ -1467,7 +1532,7 @@ class Exchange:
|
|||||||
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp
|
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp
|
||||||
# DEFAULT_TRADES_COLUMNS: 1 -> id
|
# DEFAULT_TRADES_COLUMNS: 1 -> id
|
||||||
while True:
|
while True:
|
||||||
t = await self._async_fetch_trades(pair, since=since)
|
t = await self._async_fetch_trades(pair, since=since, datadir=datadir)
|
||||||
if t:
|
if t:
|
||||||
since = t[-1][0]
|
since = t[-1][0]
|
||||||
trades.extend(t)
|
trades.extend(t)
|
||||||
@ -1484,7 +1549,9 @@ class Exchange:
|
|||||||
async def _async_get_trade_history(self, pair: str,
|
async def _async_get_trade_history(self, pair: str,
|
||||||
since: Optional[int] = None,
|
since: Optional[int] = None,
|
||||||
until: Optional[int] = None,
|
until: Optional[int] = None,
|
||||||
from_id: Optional[str] = None) -> Tuple[str, List[List]]:
|
from_id: Optional[str] = None,
|
||||||
|
datadir: Optional[str] = None
|
||||||
|
) -> Tuple[str, List[List]]:
|
||||||
"""
|
"""
|
||||||
Async wrapper handling downloading trades using either time or id based methods.
|
Async wrapper handling downloading trades using either time or id based methods.
|
||||||
"""
|
"""
|
||||||
@ -1498,10 +1565,10 @@ class Exchange:
|
|||||||
|
|
||||||
if self._trades_pagination == 'time':
|
if self._trades_pagination == 'time':
|
||||||
return await self._async_get_trade_history_time(
|
return await self._async_get_trade_history_time(
|
||||||
pair=pair, since=since, until=until)
|
pair=pair, since=since, until=until, datadir=datadir)
|
||||||
elif self._trades_pagination == 'id':
|
elif self._trades_pagination == 'id':
|
||||||
return await self._async_get_trade_history_id(
|
return await self._async_get_trade_history_id(
|
||||||
pair=pair, since=since, until=until, from_id=from_id
|
pair=pair, since=since, until=until, from_id=from_id, datadir=datadir
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise OperationalException(f"Exchange {self.name} does use neither time, "
|
raise OperationalException(f"Exchange {self.name} does use neither time, "
|
||||||
@ -1510,7 +1577,8 @@ class Exchange:
|
|||||||
def get_historic_trades(self, pair: str,
|
def get_historic_trades(self, pair: str,
|
||||||
since: Optional[int] = None,
|
since: Optional[int] = None,
|
||||||
until: Optional[int] = None,
|
until: Optional[int] = None,
|
||||||
from_id: Optional[str] = None) -> Tuple[str, List]:
|
from_id: Optional[str] = None,
|
||||||
|
datadir: Optional[str] = None) -> Tuple[str, List]:
|
||||||
"""
|
"""
|
||||||
Get trade history data using asyncio.
|
Get trade history data using asyncio.
|
||||||
Handles all async work and returns the list of candles.
|
Handles all async work and returns the list of candles.
|
||||||
@ -1526,7 +1594,7 @@ class Exchange:
|
|||||||
|
|
||||||
return asyncio.get_event_loop().run_until_complete(
|
return asyncio.get_event_loop().run_until_complete(
|
||||||
self._async_get_trade_history(pair=pair, since=since,
|
self._async_get_trade_history(pair=pair, since=since,
|
||||||
until=until, from_id=from_id))
|
until=until, from_id=from_id, datadir=datadir))
|
||||||
|
|
||||||
|
|
||||||
def is_exchange_known_ccxt(exchange_name: str, ccxt_module: CcxtModuleType = None) -> bool:
|
def is_exchange_known_ccxt(exchange_name: str, ccxt_module: CcxtModuleType = None) -> bool:
|
||||||
|
Loading…
Reference in New Issue
Block a user