Move trim_dataframe from history to converter

This commit is contained in:
Matthias 2019-12-25 15:47:04 +01:00
parent 9d8ea2f13b
commit 416517b0c9
8 changed files with 73 additions and 77 deletions

View File

@ -2,10 +2,12 @@
Functions to convert data from one format to another Functions to convert data from one format to another
""" """
import logging import logging
from datetime import datetime, timezone
import pandas as pd import pandas as pd
from pandas import DataFrame, to_datetime from pandas import DataFrame, to_datetime
from freqtrade.configuration.timerange import TimeRange
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -112,6 +114,23 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str)
return df return df
def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
"""
Trim dataframe based on given timerange
:param df: Dataframe to trim
:param timerange: timerange (use start and end date if available)
:param: df_date_col: Column in the dataframe to use as Date column
:return: trimmed dataframe
"""
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
df = df.loc[df[df_date_col] >= start, :]
if timerange.stoptype == 'date':
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df[df_date_col] <= stop, :]
return df
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame: def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
""" """
Gets order book list, returns dataframe with below format per suggested by creslin Gets order book list, returns dataframe with below format per suggested by creslin

View File

@ -7,8 +7,7 @@ from pandas import DataFrame, read_json, to_datetime
from freqtrade import misc from freqtrade import misc
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
from freqtrade.data.converter import clean_ohlcv_dataframe from freqtrade.data.converter import clean_ohlcv_dataframe, trim_dataframe
from freqtrade.data.history import trim_dataframe
from .idatahandler import IDataHandler from .idatahandler import IDataHandler

View File

@ -50,23 +50,6 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
return tickerlist[start_index:stop_index] return tickerlist[start_index:stop_index]
def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
"""
Trim dataframe based on given timerange
:param df: Dataframe to trim
:param timerange: timerange (use start and end date if available)
:param: df_date_col: Column in the dataframe to use as Date column
:return: trimmed dataframe
"""
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
df = df.loc[df[df_date_col] >= start, :]
if timerange.stoptype == 'date':
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df[df_date_col] <= stop, :]
return df
def load_tickerdata_file(datadir: Path, pair: str, timeframe: str, def load_tickerdata_file(datadir: Path, pair: str, timeframe: str,
timerange: Optional[TimeRange] = None) -> List[Dict]: timerange: Optional[TimeRange] = None) -> List[Dict]:
""" """
@ -115,15 +98,6 @@ def store_trades_file(datadir: Path, pair: str,
misc.file_dump_json(filename, data, is_zip=is_zip) misc.file_dump_json(filename, data, is_zip=is_zip)
def _validate_pairdata(pair, pairdata, timerange: TimeRange):
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
logger.warning('Missing data at start for pair %s, data starts at %s',
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
def load_pair_history(pair: str, def load_pair_history(pair: str,
timeframe: str, timeframe: str,
datadir: Path, *, datadir: Path, *,

View File

@ -16,6 +16,7 @@ from freqtrade import OperationalException
from freqtrade.configuration import (TimeRange, remove_credentials, from freqtrade.configuration import (TimeRange, remove_credentials,
validate_config_consistency) validate_config_consistency)
from freqtrade.data import history from freqtrade.data import history
from freqtrade.data.converter import trim_dataframe
from freqtrade.data.dataprovider import DataProvider from freqtrade.data.dataprovider import DataProvider
from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds
from freqtrade.misc import file_dump_json from freqtrade.misc import file_dump_json
@ -482,7 +483,7 @@ class Backtesting:
# Trim startup period from analyzed dataframe # Trim startup period from analyzed dataframe
for pair, df in preprocessed.items(): for pair, df in preprocessed.items():
preprocessed[pair] = history.trim_dataframe(df, timerange) preprocessed[pair] = trim_dataframe(df, timerange)
min_date, max_date = history.get_timerange(preprocessed) min_date, max_date = history.get_timerange(preprocessed)
logger.info( logger.info(

View File

@ -23,7 +23,8 @@ from joblib import (Parallel, cpu_count, delayed, dump, load,
from pandas import DataFrame from pandas import DataFrame
from freqtrade import OperationalException from freqtrade import OperationalException
from freqtrade.data.history import get_timerange, trim_dataframe from freqtrade.data.history import get_timerange
from freqtrade.data.converter import trim_dataframe
from freqtrade.misc import plural, round_dict from freqtrade.misc import plural, round_dict
from freqtrade.optimize.backtesting import Backtesting from freqtrade.optimize.backtesting import Backtesting
# Import IHyperOpt and IHyperOptLoss to allow unpickling classes from these modules # Import IHyperOpt and IHyperOptLoss to allow unpickling classes from these modules

View File

@ -5,6 +5,7 @@ from typing import Any, Dict, List
import pandas as pd import pandas as pd
from freqtrade.configuration import TimeRange from freqtrade.configuration import TimeRange
from freqtrade.data import history from freqtrade.data import history
from freqtrade.data.converter import trim_dataframe
from freqtrade.data.btanalysis import (combine_tickers_with_mean, from freqtrade.data.btanalysis import (combine_tickers_with_mean,
create_cum_profit, create_cum_profit,
extract_trades_of_period, load_trades) extract_trades_of_period, load_trades)
@ -47,7 +48,7 @@ def init_plotscript(config):
db_url=config.get('db_url'), db_url=config.get('db_url'),
exportfilename=config.get('exportfilename'), exportfilename=config.get('exportfilename'),
) )
trades = history.trim_dataframe(trades, timerange, 'open_time') trades = trim_dataframe(trades, timerange, 'open_time')
return {"tickers": tickers, return {"tickers": tickers,
"trades": trades, "trades": trades,
"pairs": pairs, "pairs": pairs,

View File

@ -1,8 +1,11 @@
# pragma pylint: disable=missing-docstring, C0103 # pragma pylint: disable=missing-docstring, C0103
import logging import logging
from freqtrade.data.converter import parse_ticker_dataframe, ohlcv_fill_up_missing_data from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.history import load_pair_history, validate_backtest_data, get_timerange from freqtrade.data.converter import (ohlcv_fill_up_missing_data,
parse_ticker_dataframe, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data,
load_pair_history, validate_backtest_data)
from tests.conftest import log_has from tests.conftest import log_has
@ -145,3 +148,43 @@ def test_ohlcv_drop_incomplete(caplog):
assert len(data) == 3 assert len(data) == 3
assert log_has("Dropping last candle", caplog) assert log_has("Dropping last candle", caplog)
def test_trim_dataframe(testdatadir) -> None:
data = load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)['UNITTEST/BTC']
min_date = int(data.iloc[0]['date'].timestamp())
max_date = int(data.iloc[-1]['date'].timestamp())
data_modify = data.copy()
# Remove first 30 minutes (1800 s)
tr = TimeRange('date', None, min_date + 1800, 0)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[-1] == data.iloc[-1])
assert all(data_modify.iloc[0] == data.iloc[30])
data_modify = data.copy()
# Remove last 30 minutes (1800 s)
tr = TimeRange(None, 'date', 0, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[0] == data.iloc[0])
assert all(data_modify.iloc[-1] == data.iloc[-31])
data_modify = data.copy()
# Remove first 25 and last 30 minutes (1800 s)
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 55
# first row matches 25th original row
assert all(data_modify.iloc[0] == data.iloc[25])

View File

@ -18,10 +18,8 @@ from freqtrade.data.history import (_download_pair_history,
load_tickerdata_file, pair_data_filename, load_tickerdata_file, pair_data_filename,
pair_trades_filename, pair_trades_filename,
refresh_backtest_ohlcv_data, refresh_backtest_ohlcv_data,
refresh_backtest_trades_data, refresh_backtest_trades_data, refresh_data,
refresh_data, trim_tickerlist, validate_backtest_data)
trim_dataframe, trim_tickerlist,
validate_backtest_data)
from freqtrade.exchange import timeframe_to_minutes from freqtrade.exchange import timeframe_to_minutes
from freqtrade.misc import file_dump_json from freqtrade.misc import file_dump_json
from freqtrade.strategy.default_strategy import DefaultStrategy from freqtrade.strategy.default_strategy import DefaultStrategy
@ -444,46 +442,6 @@ def test_trim_tickerlist(testdatadir) -> None:
assert not ticker assert not ticker
def test_trim_dataframe(testdatadir) -> None:
data = load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)['UNITTEST/BTC']
min_date = int(data.iloc[0]['date'].timestamp())
max_date = int(data.iloc[-1]['date'].timestamp())
data_modify = data.copy()
# Remove first 30 minutes (1800 s)
tr = TimeRange('date', None, min_date + 1800, 0)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[-1] == data.iloc[-1])
assert all(data_modify.iloc[0] == data.iloc[30])
data_modify = data.copy()
# Remove last 30 minutes (1800 s)
tr = TimeRange(None, 'date', 0, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[0] == data.iloc[0])
assert all(data_modify.iloc[-1] == data.iloc[-31])
data_modify = data.copy()
# Remove first 25 and last 30 minutes (1800 s)
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 55
# first row matches 25th original row
assert all(data_modify.iloc[0] == data.iloc[25])
def test_file_dump_json_tofile(testdatadir) -> None: def test_file_dump_json_tofile(testdatadir) -> None:
file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4())) file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4()))
data = {'bar': 'foo'} data = {'bar': 'foo'}