Implement trades_remove_dulicates

This commit is contained in:
Matthias 2020-04-01 07:23:43 +02:00
parent 59f1a061f7
commit 0506caf986
2 changed files with 26 additions and 4 deletions

View File

@ -1,15 +1,17 @@
"""
Functions to convert data from one format to another
"""
import itertools
import logging
from datetime import datetime, timezone
from operator import itemgetter
from typing import Any, Dict, List
import pandas as pd
from pandas import DataFrame, to_datetime
from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS
from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS,
DEFAULT_TRADES_COLUMNS)
logger = logging.getLogger(__name__)
@ -155,6 +157,17 @@ def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
return frame
def trades_remove_duplicates(trades: List[List]) -> List[List]:
"""
Removes duplicates from the trades list.
Uses itertools.groupby to avoid converting to pandas.
Tests show it as being pretty efficient on lists of 4M Lists.
:param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns
:return: same format as above, but with duplicates removed
"""
return [i for i, _ in itertools.groupby(sorted(trades, key=itemgetter(0)))]
def trades_dict_to_list(trades: List[Dict]) -> List[List]:
"""
Convert fetch_trades result into a List (to be more memory efficient).

View File

@ -6,7 +6,7 @@ from freqtrade.data.converter import (convert_ohlcv_format,
convert_trades_format,
ohlcv_fill_up_missing_data,
ohlcv_to_dataframe, trades_dict_to_list,
trim_dataframe)
trades_remove_duplicates, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data,
load_pair_history, validate_backtest_data)
from tests.conftest import log_has
@ -195,7 +195,16 @@ def test_trim_dataframe(testdatadir) -> None:
assert all(data_modify.iloc[0] == data.iloc[25])
def test_trades_dict_to_list(mocker, fetch_trades_result):
def test_trades_remove_duplicates(trades_history):
trades_history1 = trades_history * 3
assert len(trades_history1) == len(trades_history) * 3
res = trades_remove_duplicates(trades_history1)
assert len(res) == len(trades_history)
for i, t in enumerate(res):
assert t == trades_history[i]
def test_trades_dict_to_list(fetch_trades_result):
res = trades_dict_to_list(fetch_trades_result)
assert isinstance(res, list)
assert isinstance(res[0], list)