Move trim_dataframe from history to converter

This commit is contained in:
Matthias 2019-12-25 15:47:04 +01:00
parent 9d8ea2f13b
commit 416517b0c9
8 changed files with 73 additions and 77 deletions

View File

@ -2,10 +2,12 @@
Functions to convert data from one format to another
"""
import logging
from datetime import datetime, timezone
import pandas as pd
from pandas import DataFrame, to_datetime
from freqtrade.configuration.timerange import TimeRange
logger = logging.getLogger(__name__)
@ -112,6 +114,23 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str)
return df
def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
"""
Trim dataframe based on given timerange
:param df: Dataframe to trim
:param timerange: timerange (use start and end date if available)
:param: df_date_col: Column in the dataframe to use as Date column
:return: trimmed dataframe
"""
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
df = df.loc[df[df_date_col] >= start, :]
if timerange.stoptype == 'date':
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df[df_date_col] <= stop, :]
return df
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
"""
Gets order book list, returns dataframe with below format per suggested by creslin

View File

@ -7,8 +7,7 @@ from pandas import DataFrame, read_json, to_datetime
from freqtrade import misc
from freqtrade.configuration import TimeRange
from freqtrade.data.converter import clean_ohlcv_dataframe
from freqtrade.data.history import trim_dataframe
from freqtrade.data.converter import clean_ohlcv_dataframe, trim_dataframe
from .idatahandler import IDataHandler

View File

@ -50,23 +50,6 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
return tickerlist[start_index:stop_index]
def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
"""
Trim dataframe based on given timerange
:param df: Dataframe to trim
:param timerange: timerange (use start and end date if available)
:param: df_date_col: Column in the dataframe to use as Date column
:return: trimmed dataframe
"""
if timerange.starttype == 'date':
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
df = df.loc[df[df_date_col] >= start, :]
if timerange.stoptype == 'date':
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
df = df.loc[df[df_date_col] <= stop, :]
return df
def load_tickerdata_file(datadir: Path, pair: str, timeframe: str,
timerange: Optional[TimeRange] = None) -> List[Dict]:
"""
@ -115,15 +98,6 @@ def store_trades_file(datadir: Path, pair: str,
misc.file_dump_json(filename, data, is_zip=is_zip)
def _validate_pairdata(pair, pairdata, timerange: TimeRange):
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
logger.warning('Missing data at start for pair %s, data starts at %s',
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
logger.warning('Missing data at end for pair %s, data ends at %s',
pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
def load_pair_history(pair: str,
timeframe: str,
datadir: Path, *,

View File

@ -16,6 +16,7 @@ from freqtrade import OperationalException
from freqtrade.configuration import (TimeRange, remove_credentials,
validate_config_consistency)
from freqtrade.data import history
from freqtrade.data.converter import trim_dataframe
from freqtrade.data.dataprovider import DataProvider
from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds
from freqtrade.misc import file_dump_json
@ -482,7 +483,7 @@ class Backtesting:
# Trim startup period from analyzed dataframe
for pair, df in preprocessed.items():
preprocessed[pair] = history.trim_dataframe(df, timerange)
preprocessed[pair] = trim_dataframe(df, timerange)
min_date, max_date = history.get_timerange(preprocessed)
logger.info(

View File

@ -23,7 +23,8 @@ from joblib import (Parallel, cpu_count, delayed, dump, load,
from pandas import DataFrame
from freqtrade import OperationalException
from freqtrade.data.history import get_timerange, trim_dataframe
from freqtrade.data.history import get_timerange
from freqtrade.data.converter import trim_dataframe
from freqtrade.misc import plural, round_dict
from freqtrade.optimize.backtesting import Backtesting
# Import IHyperOpt and IHyperOptLoss to allow unpickling classes from these modules

View File

@ -5,6 +5,7 @@ from typing import Any, Dict, List
import pandas as pd
from freqtrade.configuration import TimeRange
from freqtrade.data import history
from freqtrade.data.converter import trim_dataframe
from freqtrade.data.btanalysis import (combine_tickers_with_mean,
create_cum_profit,
extract_trades_of_period, load_trades)
@ -47,7 +48,7 @@ def init_plotscript(config):
db_url=config.get('db_url'),
exportfilename=config.get('exportfilename'),
)
trades = history.trim_dataframe(trades, timerange, 'open_time')
trades = trim_dataframe(trades, timerange, 'open_time')
return {"tickers": tickers,
"trades": trades,
"pairs": pairs,

View File

@ -1,8 +1,11 @@
# pragma pylint: disable=missing-docstring, C0103
import logging
from freqtrade.data.converter import parse_ticker_dataframe, ohlcv_fill_up_missing_data
from freqtrade.data.history import load_pair_history, validate_backtest_data, get_timerange
from freqtrade.configuration.timerange import TimeRange
from freqtrade.data.converter import (ohlcv_fill_up_missing_data,
parse_ticker_dataframe, trim_dataframe)
from freqtrade.data.history import (get_timerange, load_data,
load_pair_history, validate_backtest_data)
from tests.conftest import log_has
@ -145,3 +148,43 @@ def test_ohlcv_drop_incomplete(caplog):
assert len(data) == 3
assert log_has("Dropping last candle", caplog)
def test_trim_dataframe(testdatadir) -> None:
data = load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)['UNITTEST/BTC']
min_date = int(data.iloc[0]['date'].timestamp())
max_date = int(data.iloc[-1]['date'].timestamp())
data_modify = data.copy()
# Remove first 30 minutes (1800 s)
tr = TimeRange('date', None, min_date + 1800, 0)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[-1] == data.iloc[-1])
assert all(data_modify.iloc[0] == data.iloc[30])
data_modify = data.copy()
# Remove last 30 minutes (1800 s)
tr = TimeRange(None, 'date', 0, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[0] == data.iloc[0])
assert all(data_modify.iloc[-1] == data.iloc[-31])
data_modify = data.copy()
# Remove first 25 and last 30 minutes (1800 s)
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 55
# first row matches 25th original row
assert all(data_modify.iloc[0] == data.iloc[25])

View File

@ -18,10 +18,8 @@ from freqtrade.data.history import (_download_pair_history,
load_tickerdata_file, pair_data_filename,
pair_trades_filename,
refresh_backtest_ohlcv_data,
refresh_backtest_trades_data,
refresh_data,
trim_dataframe, trim_tickerlist,
validate_backtest_data)
refresh_backtest_trades_data, refresh_data,
trim_tickerlist, validate_backtest_data)
from freqtrade.exchange import timeframe_to_minutes
from freqtrade.misc import file_dump_json
from freqtrade.strategy.default_strategy import DefaultStrategy
@ -444,46 +442,6 @@ def test_trim_tickerlist(testdatadir) -> None:
assert not ticker
def test_trim_dataframe(testdatadir) -> None:
data = load_data(
datadir=testdatadir,
timeframe='1m',
pairs=['UNITTEST/BTC']
)['UNITTEST/BTC']
min_date = int(data.iloc[0]['date'].timestamp())
max_date = int(data.iloc[-1]['date'].timestamp())
data_modify = data.copy()
# Remove first 30 minutes (1800 s)
tr = TimeRange('date', None, min_date + 1800, 0)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[-1] == data.iloc[-1])
assert all(data_modify.iloc[0] == data.iloc[30])
data_modify = data.copy()
# Remove last 30 minutes (1800 s)
tr = TimeRange(None, 'date', 0, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 30
assert all(data_modify.iloc[0] == data.iloc[0])
assert all(data_modify.iloc[-1] == data.iloc[-31])
data_modify = data.copy()
# Remove first 25 and last 30 minutes (1800 s)
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
data_modify = trim_dataframe(data_modify, tr)
assert not data_modify.equals(data)
assert len(data_modify) < len(data)
assert len(data_modify) == len(data) - 55
# first row matches 25th original row
assert all(data_modify.iloc[0] == data.iloc[25])
def test_file_dump_json_tofile(testdatadir) -> None:
file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4()))
data = {'bar': 'foo'}