From 416517b0c901f3438dcdc1af54fe5b32a77e38ba Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 25 Dec 2019 15:47:04 +0100 Subject: [PATCH] Move trim_dataframe from history to converter --- freqtrade/data/converter.py | 19 ++++++++ .../data/datahandlers/jsondatahandler.py | 3 +- freqtrade/data/history.py | 26 ---------- freqtrade/optimize/backtesting.py | 3 +- freqtrade/optimize/hyperopt.py | 3 +- freqtrade/plot/plotting.py | 3 +- tests/data/test_converter.py | 47 ++++++++++++++++++- tests/data/test_history.py | 46 +----------------- 8 files changed, 73 insertions(+), 77 deletions(-) diff --git a/freqtrade/data/converter.py b/freqtrade/data/converter.py index 6b44a6b20..52ce3c593 100644 --- a/freqtrade/data/converter.py +++ b/freqtrade/data/converter.py @@ -2,10 +2,12 @@ Functions to convert data from one format to another """ import logging +from datetime import datetime, timezone import pandas as pd from pandas import DataFrame, to_datetime +from freqtrade.configuration.timerange import TimeRange logger = logging.getLogger(__name__) @@ -112,6 +114,23 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str) return df +def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame: + """ + Trim dataframe based on given timerange + :param df: Dataframe to trim + :param timerange: timerange (use start and end date if available) + :param: df_date_col: Column in the dataframe to use as Date column + :return: trimmed dataframe + """ + if timerange.starttype == 'date': + start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) + df = df.loc[df[df_date_col] >= start, :] + if timerange.stoptype == 'date': + stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) + df = df.loc[df[df_date_col] <= stop, :] + return df + + def order_book_to_dataframe(bids: list, asks: list) -> DataFrame: """ Gets order book list, returns dataframe with below format per suggested by creslin diff --git a/freqtrade/data/datahandlers/jsondatahandler.py b/freqtrade/data/datahandlers/jsondatahandler.py index 2893393b1..c799784d0 100644 --- a/freqtrade/data/datahandlers/jsondatahandler.py +++ b/freqtrade/data/datahandlers/jsondatahandler.py @@ -7,8 +7,7 @@ from pandas import DataFrame, read_json, to_datetime from freqtrade import misc from freqtrade.configuration import TimeRange -from freqtrade.data.converter import clean_ohlcv_dataframe -from freqtrade.data.history import trim_dataframe +from freqtrade.data.converter import clean_ohlcv_dataframe, trim_dataframe from .idatahandler import IDataHandler diff --git a/freqtrade/data/history.py b/freqtrade/data/history.py index f09fe3d6a..cc82217a0 100644 --- a/freqtrade/data/history.py +++ b/freqtrade/data/history.py @@ -50,23 +50,6 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]: return tickerlist[start_index:stop_index] -def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame: - """ - Trim dataframe based on given timerange - :param df: Dataframe to trim - :param timerange: timerange (use start and end date if available) - :param: df_date_col: Column in the dataframe to use as Date column - :return: trimmed dataframe - """ - if timerange.starttype == 'date': - start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc) - df = df.loc[df[df_date_col] >= start, :] - if timerange.stoptype == 'date': - stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc) - df = df.loc[df[df_date_col] <= stop, :] - return df - - def load_tickerdata_file(datadir: Path, pair: str, timeframe: str, timerange: Optional[TimeRange] = None) -> List[Dict]: """ @@ -115,15 +98,6 @@ def store_trades_file(datadir: Path, pair: str, misc.file_dump_json(filename, data, is_zip=is_zip) -def _validate_pairdata(pair, pairdata, timerange: TimeRange): - if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000: - logger.warning('Missing data at start for pair %s, data starts at %s', - pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S')) - if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000: - logger.warning('Missing data at end for pair %s, data ends at %s', - pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S')) - - def load_pair_history(pair: str, timeframe: str, datadir: Path, *, diff --git a/freqtrade/optimize/backtesting.py b/freqtrade/optimize/backtesting.py index a8fe90a06..98ee71a60 100644 --- a/freqtrade/optimize/backtesting.py +++ b/freqtrade/optimize/backtesting.py @@ -16,6 +16,7 @@ from freqtrade import OperationalException from freqtrade.configuration import (TimeRange, remove_credentials, validate_config_consistency) from freqtrade.data import history +from freqtrade.data.converter import trim_dataframe from freqtrade.data.dataprovider import DataProvider from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds from freqtrade.misc import file_dump_json @@ -482,7 +483,7 @@ class Backtesting: # Trim startup period from analyzed dataframe for pair, df in preprocessed.items(): - preprocessed[pair] = history.trim_dataframe(df, timerange) + preprocessed[pair] = trim_dataframe(df, timerange) min_date, max_date = history.get_timerange(preprocessed) logger.info( diff --git a/freqtrade/optimize/hyperopt.py b/freqtrade/optimize/hyperopt.py index 48f883ac5..d29508b49 100644 --- a/freqtrade/optimize/hyperopt.py +++ b/freqtrade/optimize/hyperopt.py @@ -23,7 +23,8 @@ from joblib import (Parallel, cpu_count, delayed, dump, load, from pandas import DataFrame from freqtrade import OperationalException -from freqtrade.data.history import get_timerange, trim_dataframe +from freqtrade.data.history import get_timerange +from freqtrade.data.converter import trim_dataframe from freqtrade.misc import plural, round_dict from freqtrade.optimize.backtesting import Backtesting # Import IHyperOpt and IHyperOptLoss to allow unpickling classes from these modules diff --git a/freqtrade/plot/plotting.py b/freqtrade/plot/plotting.py index db4637ee5..0ef71ed82 100644 --- a/freqtrade/plot/plotting.py +++ b/freqtrade/plot/plotting.py @@ -5,6 +5,7 @@ from typing import Any, Dict, List import pandas as pd from freqtrade.configuration import TimeRange from freqtrade.data import history +from freqtrade.data.converter import trim_dataframe from freqtrade.data.btanalysis import (combine_tickers_with_mean, create_cum_profit, extract_trades_of_period, load_trades) @@ -47,7 +48,7 @@ def init_plotscript(config): db_url=config.get('db_url'), exportfilename=config.get('exportfilename'), ) - trades = history.trim_dataframe(trades, timerange, 'open_time') + trades = trim_dataframe(trades, timerange, 'open_time') return {"tickers": tickers, "trades": trades, "pairs": pairs, diff --git a/tests/data/test_converter.py b/tests/data/test_converter.py index 414551c95..eb8a8e513 100644 --- a/tests/data/test_converter.py +++ b/tests/data/test_converter.py @@ -1,8 +1,11 @@ # pragma pylint: disable=missing-docstring, C0103 import logging -from freqtrade.data.converter import parse_ticker_dataframe, ohlcv_fill_up_missing_data -from freqtrade.data.history import load_pair_history, validate_backtest_data, get_timerange +from freqtrade.configuration.timerange import TimeRange +from freqtrade.data.converter import (ohlcv_fill_up_missing_data, + parse_ticker_dataframe, trim_dataframe) +from freqtrade.data.history import (get_timerange, load_data, + load_pair_history, validate_backtest_data) from tests.conftest import log_has @@ -145,3 +148,43 @@ def test_ohlcv_drop_incomplete(caplog): assert len(data) == 3 assert log_has("Dropping last candle", caplog) + + +def test_trim_dataframe(testdatadir) -> None: + data = load_data( + datadir=testdatadir, + timeframe='1m', + pairs=['UNITTEST/BTC'] + )['UNITTEST/BTC'] + min_date = int(data.iloc[0]['date'].timestamp()) + max_date = int(data.iloc[-1]['date'].timestamp()) + data_modify = data.copy() + + # Remove first 30 minutes (1800 s) + tr = TimeRange('date', None, min_date + 1800, 0) + data_modify = trim_dataframe(data_modify, tr) + assert not data_modify.equals(data) + assert len(data_modify) < len(data) + assert len(data_modify) == len(data) - 30 + assert all(data_modify.iloc[-1] == data.iloc[-1]) + assert all(data_modify.iloc[0] == data.iloc[30]) + + data_modify = data.copy() + # Remove last 30 minutes (1800 s) + tr = TimeRange(None, 'date', 0, max_date - 1800) + data_modify = trim_dataframe(data_modify, tr) + assert not data_modify.equals(data) + assert len(data_modify) < len(data) + assert len(data_modify) == len(data) - 30 + assert all(data_modify.iloc[0] == data.iloc[0]) + assert all(data_modify.iloc[-1] == data.iloc[-31]) + + data_modify = data.copy() + # Remove first 25 and last 30 minutes (1800 s) + tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800) + data_modify = trim_dataframe(data_modify, tr) + assert not data_modify.equals(data) + assert len(data_modify) < len(data) + assert len(data_modify) == len(data) - 55 + # first row matches 25th original row + assert all(data_modify.iloc[0] == data.iloc[25]) diff --git a/tests/data/test_history.py b/tests/data/test_history.py index f13e386f8..5f7d0f5a2 100644 --- a/tests/data/test_history.py +++ b/tests/data/test_history.py @@ -18,10 +18,8 @@ from freqtrade.data.history import (_download_pair_history, load_tickerdata_file, pair_data_filename, pair_trades_filename, refresh_backtest_ohlcv_data, - refresh_backtest_trades_data, - refresh_data, - trim_dataframe, trim_tickerlist, - validate_backtest_data) + refresh_backtest_trades_data, refresh_data, + trim_tickerlist, validate_backtest_data) from freqtrade.exchange import timeframe_to_minutes from freqtrade.misc import file_dump_json from freqtrade.strategy.default_strategy import DefaultStrategy @@ -444,46 +442,6 @@ def test_trim_tickerlist(testdatadir) -> None: assert not ticker -def test_trim_dataframe(testdatadir) -> None: - data = load_data( - datadir=testdatadir, - timeframe='1m', - pairs=['UNITTEST/BTC'] - )['UNITTEST/BTC'] - min_date = int(data.iloc[0]['date'].timestamp()) - max_date = int(data.iloc[-1]['date'].timestamp()) - data_modify = data.copy() - - # Remove first 30 minutes (1800 s) - tr = TimeRange('date', None, min_date + 1800, 0) - data_modify = trim_dataframe(data_modify, tr) - assert not data_modify.equals(data) - assert len(data_modify) < len(data) - assert len(data_modify) == len(data) - 30 - assert all(data_modify.iloc[-1] == data.iloc[-1]) - assert all(data_modify.iloc[0] == data.iloc[30]) - - data_modify = data.copy() - # Remove last 30 minutes (1800 s) - tr = TimeRange(None, 'date', 0, max_date - 1800) - data_modify = trim_dataframe(data_modify, tr) - assert not data_modify.equals(data) - assert len(data_modify) < len(data) - assert len(data_modify) == len(data) - 30 - assert all(data_modify.iloc[0] == data.iloc[0]) - assert all(data_modify.iloc[-1] == data.iloc[-31]) - - data_modify = data.copy() - # Remove first 25 and last 30 minutes (1800 s) - tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800) - data_modify = trim_dataframe(data_modify, tr) - assert not data_modify.equals(data) - assert len(data_modify) < len(data) - assert len(data_modify) == len(data) - 55 - # first row matches 25th original row - assert all(data_modify.iloc[0] == data.iloc[25]) - - def test_file_dump_json_tofile(testdatadir) -> None: file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4())) data = {'bar': 'foo'}