Merge pull request #3129 from freqtrade/trades_to_list
convert dl-trades datadownload to list
This commit is contained in:
@@ -1,14 +1,17 @@
|
||||
"""
|
||||
Functions to convert data from one format to another
|
||||
"""
|
||||
import itertools
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict
|
||||
from operator import itemgetter
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import pandas as pd
|
||||
from pandas import DataFrame, to_datetime
|
||||
|
||||
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS
|
||||
from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS,
|
||||
DEFAULT_TRADES_COLUMNS)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -154,7 +157,27 @@ def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
|
||||
return frame
|
||||
|
||||
|
||||
def trades_to_ohlcv(trades: list, timeframe: str) -> DataFrame:
|
||||
def trades_remove_duplicates(trades: List[List]) -> List[List]:
|
||||
"""
|
||||
Removes duplicates from the trades list.
|
||||
Uses itertools.groupby to avoid converting to pandas.
|
||||
Tests show it as being pretty efficient on lists of 4M Lists.
|
||||
:param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns
|
||||
:return: same format as above, but with duplicates removed
|
||||
"""
|
||||
return [i for i, _ in itertools.groupby(sorted(trades, key=itemgetter(0)))]
|
||||
|
||||
|
||||
def trades_dict_to_list(trades: List[Dict]) -> List[List]:
|
||||
"""
|
||||
Convert fetch_trades result into a List (to be more memory efficient).
|
||||
:param trades: List of trades, as returned by ccxt.fetch_trades.
|
||||
:return: List of Lists, with constants.DEFAULT_TRADES_COLUMNS as columns
|
||||
"""
|
||||
return [[t[col] for col in DEFAULT_TRADES_COLUMNS] for t in trades]
|
||||
|
||||
|
||||
def trades_to_ohlcv(trades: List, timeframe: str) -> DataFrame:
|
||||
"""
|
||||
Converts trades list to OHLCV list
|
||||
TODO: This should get a dedicated test
|
||||
@@ -164,9 +187,10 @@ def trades_to_ohlcv(trades: list, timeframe: str) -> DataFrame:
|
||||
"""
|
||||
from freqtrade.exchange import timeframe_to_minutes
|
||||
timeframe_minutes = timeframe_to_minutes(timeframe)
|
||||
df = pd.DataFrame(trades)
|
||||
df['datetime'] = pd.to_datetime(df['datetime'])
|
||||
df = df.set_index('datetime')
|
||||
df = pd.DataFrame(trades, columns=DEFAULT_TRADES_COLUMNS)
|
||||
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms',
|
||||
utc=True,)
|
||||
df = df.set_index('timestamp')
|
||||
|
||||
df_new = df['price'].resample(f'{timeframe_minutes}min').ohlc()
|
||||
df_new['volume'] = df['amount'].resample(f'{timeframe_minutes}min').sum()
|
||||
|
@@ -9,10 +9,13 @@ from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS
|
||||
from freqtrade.data.converter import ohlcv_to_dataframe, trades_to_ohlcv
|
||||
from freqtrade.data.converter import (ohlcv_to_dataframe,
|
||||
trades_remove_duplicates,
|
||||
trades_to_ohlcv)
|
||||
from freqtrade.data.history.idatahandler import IDataHandler, get_datahandler
|
||||
from freqtrade.exceptions import OperationalException
|
||||
from freqtrade.exchange import Exchange
|
||||
from freqtrade.misc import format_ms_time
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -257,27 +260,40 @@ def _download_trades_history(exchange: Exchange,
|
||||
"""
|
||||
try:
|
||||
|
||||
since = timerange.startts * 1000 if timerange and timerange.starttype == 'date' else None
|
||||
since = timerange.startts * 1000 if \
|
||||
(timerange and timerange.starttype == 'date') else int(arrow.utcnow().shift(
|
||||
days=-30).float_timestamp) * 1000
|
||||
|
||||
trades = data_handler.trades_load(pair)
|
||||
|
||||
from_id = trades[-1]['id'] if trades else None
|
||||
# TradesList columns are defined in constants.DEFAULT_TRADES_COLUMNS
|
||||
# DEFAULT_TRADES_COLUMNS: 0 -> timestamp
|
||||
# DEFAULT_TRADES_COLUMNS: 1 -> id
|
||||
|
||||
logger.debug("Current Start: %s", trades[0]['datetime'] if trades else 'None')
|
||||
logger.debug("Current End: %s", trades[-1]['datetime'] if trades else 'None')
|
||||
from_id = trades[-1][1] if trades else None
|
||||
if trades and since < trades[-1][0]:
|
||||
# Reset since to the last available point
|
||||
# - 5 seconds (to ensure we're getting all trades)
|
||||
since = trades[-1][0] - (5 * 1000)
|
||||
logger.info(f"Using last trade date -5s - Downloading trades for {pair} "
|
||||
f"since: {format_ms_time(since)}.")
|
||||
|
||||
logger.debug(f"Current Start: {format_ms_time(trades[0][0]) if trades else 'None'}")
|
||||
logger.debug(f"Current End: {format_ms_time(trades[-1][0]) if trades else 'None'}")
|
||||
logger.info(f"Current Amount of trades: {len(trades)}")
|
||||
|
||||
# Default since_ms to 30 days if nothing is given
|
||||
new_trades = exchange.get_historic_trades(pair=pair,
|
||||
since=since if since else
|
||||
int(arrow.utcnow().shift(
|
||||
days=-30).float_timestamp) * 1000,
|
||||
since=since,
|
||||
from_id=from_id,
|
||||
)
|
||||
trades.extend(new_trades[1])
|
||||
# Remove duplicates to make sure we're not storing data we don't need
|
||||
trades = trades_remove_duplicates(trades)
|
||||
data_handler.trades_store(pair, data=trades)
|
||||
|
||||
logger.debug("New Start: %s", trades[0]['datetime'])
|
||||
logger.debug("New End: %s", trades[-1]['datetime'])
|
||||
logger.debug(f"New Start: {format_ms_time(trades[0][0])}")
|
||||
logger.debug(f"New End: {format_ms_time(trades[-1][0])}")
|
||||
logger.info(f"New Amount of trades: {len(trades)}")
|
||||
return True
|
||||
|
||||
|
@@ -8,16 +8,20 @@ from abc import ABC, abstractclassmethod, abstractmethod
|
||||
from copy import deepcopy
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Type
|
||||
from typing import List, Optional, Type
|
||||
|
||||
from pandas import DataFrame
|
||||
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.data.converter import clean_ohlcv_dataframe, trim_dataframe
|
||||
from freqtrade.data.converter import (clean_ohlcv_dataframe,
|
||||
trades_remove_duplicates, trim_dataframe)
|
||||
from freqtrade.exchange import timeframe_to_seconds
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Type for trades list
|
||||
TradeList = List[List]
|
||||
|
||||
|
||||
class IDataHandler(ABC):
|
||||
|
||||
@@ -89,23 +93,25 @@ class IDataHandler(ABC):
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def trades_store(self, pair: str, data: List[Dict]) -> None:
|
||||
def trades_store(self, pair: str, data: TradeList) -> None:
|
||||
"""
|
||||
Store trades data (list of Dicts) to file
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Dicts containing trade data
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def trades_append(self, pair: str, data: List[Dict]):
|
||||
def trades_append(self, pair: str, data: TradeList):
|
||||
"""
|
||||
Append data to existing files
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Dicts containing trade data
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> List[Dict]:
|
||||
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
|
||||
"""
|
||||
Load a pair from file, either .json.gz or .json
|
||||
:param pair: Load trades for this pair
|
||||
@@ -121,6 +127,16 @@ class IDataHandler(ABC):
|
||||
:return: True when deleted, false if file did not exist.
|
||||
"""
|
||||
|
||||
def trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
|
||||
"""
|
||||
Load a pair from file, either .json.gz or .json
|
||||
Removes duplicates in the process.
|
||||
:param pair: Load trades for this pair
|
||||
:param timerange: Timerange to load trades for - currently not implemented
|
||||
:return: List of trades
|
||||
"""
|
||||
return trades_remove_duplicates(self._trades_load(pair, timerange=timerange))
|
||||
|
||||
def ohlcv_load(self, pair, timeframe: str,
|
||||
timerange: Optional[TimeRange] = None,
|
||||
fill_missing: bool = True,
|
||||
|
@@ -1,6 +1,7 @@
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
from typing import List, Optional
|
||||
|
||||
import numpy as np
|
||||
from pandas import DataFrame, read_json, to_datetime
|
||||
@@ -8,8 +9,11 @@ from pandas import DataFrame, read_json, to_datetime
|
||||
from freqtrade import misc
|
||||
from freqtrade.configuration import TimeRange
|
||||
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS
|
||||
from freqtrade.data.converter import trades_dict_to_list
|
||||
|
||||
from .idatahandler import IDataHandler
|
||||
from .idatahandler import IDataHandler, TradeList
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JsonDataHandler(IDataHandler):
|
||||
@@ -113,24 +117,26 @@ class JsonDataHandler(IDataHandler):
|
||||
# Check if regex found something and only return these results to avoid exceptions.
|
||||
return [match[0].replace('_', '/') for match in _tmp if match]
|
||||
|
||||
def trades_store(self, pair: str, data: List[Dict]) -> None:
|
||||
def trades_store(self, pair: str, data: TradeList) -> None:
|
||||
"""
|
||||
Store trades data (list of Dicts) to file
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Dicts containing trade data
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
filename = self._pair_trades_filename(self._datadir, pair)
|
||||
misc.file_dump_json(filename, data, is_zip=self._use_zip)
|
||||
|
||||
def trades_append(self, pair: str, data: List[Dict]):
|
||||
def trades_append(self, pair: str, data: TradeList):
|
||||
"""
|
||||
Append data to existing files
|
||||
:param pair: Pair - used for filename
|
||||
:param data: List of Dicts containing trade data
|
||||
:param data: List of Lists containing trade data,
|
||||
column sequence as in DEFAULT_TRADES_COLUMNS
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> List[Dict]:
|
||||
def _trades_load(self, pair: str, timerange: Optional[TimeRange] = None) -> TradeList:
|
||||
"""
|
||||
Load a pair from file, either .json.gz or .json
|
||||
# TODO: respect timerange ...
|
||||
@@ -140,9 +146,15 @@ class JsonDataHandler(IDataHandler):
|
||||
"""
|
||||
filename = self._pair_trades_filename(self._datadir, pair)
|
||||
tradesdata = misc.file_load_json(filename)
|
||||
|
||||
if not tradesdata:
|
||||
return []
|
||||
|
||||
if isinstance(tradesdata[0], dict):
|
||||
# Convert trades dict to list
|
||||
logger.info("Old trades format detected - converting")
|
||||
tradesdata = trades_dict_to_list(tradesdata)
|
||||
pass
|
||||
return tradesdata
|
||||
|
||||
def trades_purge(self, pair: str) -> bool:
|
||||
|
Reference in New Issue
Block a user