From 0506caf986af7e6bc82e6584e5238e74232edca0 Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 1 Apr 2020 07:23:43 +0200 Subject: [PATCH] Implement trades_remove_dulicates --- freqtrade/data/converter.py | 17 +++++++++++++++-- tests/data/test_converter.py | 13 +++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/freqtrade/data/converter.py b/freqtrade/data/converter.py index 90db82f74..d2108b9be 100644 --- a/freqtrade/data/converter.py +++ b/freqtrade/data/converter.py @@ -1,15 +1,17 @@ """ Functions to convert data from one format to another """ +import itertools import logging from datetime import datetime, timezone +from operator import itemgetter from typing import Any, Dict, List import pandas as pd from pandas import DataFrame, to_datetime -from freqtrade.constants import DEFAULT_DATAFRAME_COLUMNS, DEFAULT_TRADES_COLUMNS - +from freqtrade.constants import (DEFAULT_DATAFRAME_COLUMNS, + DEFAULT_TRADES_COLUMNS) logger = logging.getLogger(__name__) @@ -155,6 +157,17 @@ def order_book_to_dataframe(bids: list, asks: list) -> DataFrame: return frame +def trades_remove_duplicates(trades: List[List]) -> List[List]: + """ + Removes duplicates from the trades list. + Uses itertools.groupby to avoid converting to pandas. + Tests show it as being pretty efficient on lists of 4M Lists. + :param trades: List of Lists with constants.DEFAULT_TRADES_COLUMNS as columns + :return: same format as above, but with duplicates removed + """ + return [i for i, _ in itertools.groupby(sorted(trades, key=itemgetter(0)))] + + def trades_dict_to_list(trades: List[Dict]) -> List[List]: """ Convert fetch_trades result into a List (to be more memory efficient). diff --git a/tests/data/test_converter.py b/tests/data/test_converter.py index 1f7837e46..463700d0c 100644 --- a/tests/data/test_converter.py +++ b/tests/data/test_converter.py @@ -6,7 +6,7 @@ from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format, ohlcv_fill_up_missing_data, ohlcv_to_dataframe, trades_dict_to_list, - trim_dataframe) + trades_remove_duplicates, trim_dataframe) from freqtrade.data.history import (get_timerange, load_data, load_pair_history, validate_backtest_data) from tests.conftest import log_has @@ -195,7 +195,16 @@ def test_trim_dataframe(testdatadir) -> None: assert all(data_modify.iloc[0] == data.iloc[25]) -def test_trades_dict_to_list(mocker, fetch_trades_result): +def test_trades_remove_duplicates(trades_history): + trades_history1 = trades_history * 3 + assert len(trades_history1) == len(trades_history) * 3 + res = trades_remove_duplicates(trades_history1) + assert len(res) == len(trades_history) + for i, t in enumerate(res): + assert t == trades_history[i] + + +def test_trades_dict_to_list(fetch_trades_result): res = trades_dict_to_list(fetch_trades_result) assert isinstance(res, list) assert isinstance(res[0], list)