Move trim_dataframe from history to converter
This commit is contained in:
parent
9d8ea2f13b
commit
416517b0c9
@ -2,10 +2,12 @@
|
|||||||
Functions to convert data from one format to another
|
Functions to convert data from one format to another
|
||||||
"""
|
"""
|
||||||
import logging
|
import logging
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas import DataFrame, to_datetime
|
from pandas import DataFrame, to_datetime
|
||||||
|
|
||||||
|
from freqtrade.configuration.timerange import TimeRange
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -112,6 +114,23 @@ def ohlcv_fill_up_missing_data(dataframe: DataFrame, timeframe: str, pair: str)
|
|||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
|
||||||
|
"""
|
||||||
|
Trim dataframe based on given timerange
|
||||||
|
:param df: Dataframe to trim
|
||||||
|
:param timerange: timerange (use start and end date if available)
|
||||||
|
:param: df_date_col: Column in the dataframe to use as Date column
|
||||||
|
:return: trimmed dataframe
|
||||||
|
"""
|
||||||
|
if timerange.starttype == 'date':
|
||||||
|
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
|
||||||
|
df = df.loc[df[df_date_col] >= start, :]
|
||||||
|
if timerange.stoptype == 'date':
|
||||||
|
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
|
||||||
|
df = df.loc[df[df_date_col] <= stop, :]
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
|
def order_book_to_dataframe(bids: list, asks: list) -> DataFrame:
|
||||||
"""
|
"""
|
||||||
Gets order book list, returns dataframe with below format per suggested by creslin
|
Gets order book list, returns dataframe with below format per suggested by creslin
|
||||||
|
@ -7,8 +7,7 @@ from pandas import DataFrame, read_json, to_datetime
|
|||||||
|
|
||||||
from freqtrade import misc
|
from freqtrade import misc
|
||||||
from freqtrade.configuration import TimeRange
|
from freqtrade.configuration import TimeRange
|
||||||
from freqtrade.data.converter import clean_ohlcv_dataframe
|
from freqtrade.data.converter import clean_ohlcv_dataframe, trim_dataframe
|
||||||
from freqtrade.data.history import trim_dataframe
|
|
||||||
|
|
||||||
from .idatahandler import IDataHandler
|
from .idatahandler import IDataHandler
|
||||||
|
|
||||||
|
@ -50,23 +50,6 @@ def trim_tickerlist(tickerlist: List[Dict], timerange: TimeRange) -> List[Dict]:
|
|||||||
return tickerlist[start_index:stop_index]
|
return tickerlist[start_index:stop_index]
|
||||||
|
|
||||||
|
|
||||||
def trim_dataframe(df: DataFrame, timerange: TimeRange, df_date_col: str = 'date') -> DataFrame:
|
|
||||||
"""
|
|
||||||
Trim dataframe based on given timerange
|
|
||||||
:param df: Dataframe to trim
|
|
||||||
:param timerange: timerange (use start and end date if available)
|
|
||||||
:param: df_date_col: Column in the dataframe to use as Date column
|
|
||||||
:return: trimmed dataframe
|
|
||||||
"""
|
|
||||||
if timerange.starttype == 'date':
|
|
||||||
start = datetime.fromtimestamp(timerange.startts, tz=timezone.utc)
|
|
||||||
df = df.loc[df[df_date_col] >= start, :]
|
|
||||||
if timerange.stoptype == 'date':
|
|
||||||
stop = datetime.fromtimestamp(timerange.stopts, tz=timezone.utc)
|
|
||||||
df = df.loc[df[df_date_col] <= stop, :]
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def load_tickerdata_file(datadir: Path, pair: str, timeframe: str,
|
def load_tickerdata_file(datadir: Path, pair: str, timeframe: str,
|
||||||
timerange: Optional[TimeRange] = None) -> List[Dict]:
|
timerange: Optional[TimeRange] = None) -> List[Dict]:
|
||||||
"""
|
"""
|
||||||
@ -115,15 +98,6 @@ def store_trades_file(datadir: Path, pair: str,
|
|||||||
misc.file_dump_json(filename, data, is_zip=is_zip)
|
misc.file_dump_json(filename, data, is_zip=is_zip)
|
||||||
|
|
||||||
|
|
||||||
def _validate_pairdata(pair, pairdata, timerange: TimeRange):
|
|
||||||
if timerange.starttype == 'date' and pairdata[0][0] > timerange.startts * 1000:
|
|
||||||
logger.warning('Missing data at start for pair %s, data starts at %s',
|
|
||||||
pair, arrow.get(pairdata[0][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
|
|
||||||
if timerange.stoptype == 'date' and pairdata[-1][0] < timerange.stopts * 1000:
|
|
||||||
logger.warning('Missing data at end for pair %s, data ends at %s',
|
|
||||||
pair, arrow.get(pairdata[-1][0] // 1000).strftime('%Y-%m-%d %H:%M:%S'))
|
|
||||||
|
|
||||||
|
|
||||||
def load_pair_history(pair: str,
|
def load_pair_history(pair: str,
|
||||||
timeframe: str,
|
timeframe: str,
|
||||||
datadir: Path, *,
|
datadir: Path, *,
|
||||||
|
@ -16,6 +16,7 @@ from freqtrade import OperationalException
|
|||||||
from freqtrade.configuration import (TimeRange, remove_credentials,
|
from freqtrade.configuration import (TimeRange, remove_credentials,
|
||||||
validate_config_consistency)
|
validate_config_consistency)
|
||||||
from freqtrade.data import history
|
from freqtrade.data import history
|
||||||
|
from freqtrade.data.converter import trim_dataframe
|
||||||
from freqtrade.data.dataprovider import DataProvider
|
from freqtrade.data.dataprovider import DataProvider
|
||||||
from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds
|
from freqtrade.exchange import timeframe_to_minutes, timeframe_to_seconds
|
||||||
from freqtrade.misc import file_dump_json
|
from freqtrade.misc import file_dump_json
|
||||||
@ -482,7 +483,7 @@ class Backtesting:
|
|||||||
|
|
||||||
# Trim startup period from analyzed dataframe
|
# Trim startup period from analyzed dataframe
|
||||||
for pair, df in preprocessed.items():
|
for pair, df in preprocessed.items():
|
||||||
preprocessed[pair] = history.trim_dataframe(df, timerange)
|
preprocessed[pair] = trim_dataframe(df, timerange)
|
||||||
min_date, max_date = history.get_timerange(preprocessed)
|
min_date, max_date = history.get_timerange(preprocessed)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
@ -23,7 +23,8 @@ from joblib import (Parallel, cpu_count, delayed, dump, load,
|
|||||||
from pandas import DataFrame
|
from pandas import DataFrame
|
||||||
|
|
||||||
from freqtrade import OperationalException
|
from freqtrade import OperationalException
|
||||||
from freqtrade.data.history import get_timerange, trim_dataframe
|
from freqtrade.data.history import get_timerange
|
||||||
|
from freqtrade.data.converter import trim_dataframe
|
||||||
from freqtrade.misc import plural, round_dict
|
from freqtrade.misc import plural, round_dict
|
||||||
from freqtrade.optimize.backtesting import Backtesting
|
from freqtrade.optimize.backtesting import Backtesting
|
||||||
# Import IHyperOpt and IHyperOptLoss to allow unpickling classes from these modules
|
# Import IHyperOpt and IHyperOptLoss to allow unpickling classes from these modules
|
||||||
|
@ -5,6 +5,7 @@ from typing import Any, Dict, List
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
from freqtrade.configuration import TimeRange
|
from freqtrade.configuration import TimeRange
|
||||||
from freqtrade.data import history
|
from freqtrade.data import history
|
||||||
|
from freqtrade.data.converter import trim_dataframe
|
||||||
from freqtrade.data.btanalysis import (combine_tickers_with_mean,
|
from freqtrade.data.btanalysis import (combine_tickers_with_mean,
|
||||||
create_cum_profit,
|
create_cum_profit,
|
||||||
extract_trades_of_period, load_trades)
|
extract_trades_of_period, load_trades)
|
||||||
@ -47,7 +48,7 @@ def init_plotscript(config):
|
|||||||
db_url=config.get('db_url'),
|
db_url=config.get('db_url'),
|
||||||
exportfilename=config.get('exportfilename'),
|
exportfilename=config.get('exportfilename'),
|
||||||
)
|
)
|
||||||
trades = history.trim_dataframe(trades, timerange, 'open_time')
|
trades = trim_dataframe(trades, timerange, 'open_time')
|
||||||
return {"tickers": tickers,
|
return {"tickers": tickers,
|
||||||
"trades": trades,
|
"trades": trades,
|
||||||
"pairs": pairs,
|
"pairs": pairs,
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
# pragma pylint: disable=missing-docstring, C0103
|
# pragma pylint: disable=missing-docstring, C0103
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from freqtrade.data.converter import parse_ticker_dataframe, ohlcv_fill_up_missing_data
|
from freqtrade.configuration.timerange import TimeRange
|
||||||
from freqtrade.data.history import load_pair_history, validate_backtest_data, get_timerange
|
from freqtrade.data.converter import (ohlcv_fill_up_missing_data,
|
||||||
|
parse_ticker_dataframe, trim_dataframe)
|
||||||
|
from freqtrade.data.history import (get_timerange, load_data,
|
||||||
|
load_pair_history, validate_backtest_data)
|
||||||
from tests.conftest import log_has
|
from tests.conftest import log_has
|
||||||
|
|
||||||
|
|
||||||
@ -145,3 +148,43 @@ def test_ohlcv_drop_incomplete(caplog):
|
|||||||
assert len(data) == 3
|
assert len(data) == 3
|
||||||
|
|
||||||
assert log_has("Dropping last candle", caplog)
|
assert log_has("Dropping last candle", caplog)
|
||||||
|
|
||||||
|
|
||||||
|
def test_trim_dataframe(testdatadir) -> None:
|
||||||
|
data = load_data(
|
||||||
|
datadir=testdatadir,
|
||||||
|
timeframe='1m',
|
||||||
|
pairs=['UNITTEST/BTC']
|
||||||
|
)['UNITTEST/BTC']
|
||||||
|
min_date = int(data.iloc[0]['date'].timestamp())
|
||||||
|
max_date = int(data.iloc[-1]['date'].timestamp())
|
||||||
|
data_modify = data.copy()
|
||||||
|
|
||||||
|
# Remove first 30 minutes (1800 s)
|
||||||
|
tr = TimeRange('date', None, min_date + 1800, 0)
|
||||||
|
data_modify = trim_dataframe(data_modify, tr)
|
||||||
|
assert not data_modify.equals(data)
|
||||||
|
assert len(data_modify) < len(data)
|
||||||
|
assert len(data_modify) == len(data) - 30
|
||||||
|
assert all(data_modify.iloc[-1] == data.iloc[-1])
|
||||||
|
assert all(data_modify.iloc[0] == data.iloc[30])
|
||||||
|
|
||||||
|
data_modify = data.copy()
|
||||||
|
# Remove last 30 minutes (1800 s)
|
||||||
|
tr = TimeRange(None, 'date', 0, max_date - 1800)
|
||||||
|
data_modify = trim_dataframe(data_modify, tr)
|
||||||
|
assert not data_modify.equals(data)
|
||||||
|
assert len(data_modify) < len(data)
|
||||||
|
assert len(data_modify) == len(data) - 30
|
||||||
|
assert all(data_modify.iloc[0] == data.iloc[0])
|
||||||
|
assert all(data_modify.iloc[-1] == data.iloc[-31])
|
||||||
|
|
||||||
|
data_modify = data.copy()
|
||||||
|
# Remove first 25 and last 30 minutes (1800 s)
|
||||||
|
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
|
||||||
|
data_modify = trim_dataframe(data_modify, tr)
|
||||||
|
assert not data_modify.equals(data)
|
||||||
|
assert len(data_modify) < len(data)
|
||||||
|
assert len(data_modify) == len(data) - 55
|
||||||
|
# first row matches 25th original row
|
||||||
|
assert all(data_modify.iloc[0] == data.iloc[25])
|
||||||
|
@ -18,10 +18,8 @@ from freqtrade.data.history import (_download_pair_history,
|
|||||||
load_tickerdata_file, pair_data_filename,
|
load_tickerdata_file, pair_data_filename,
|
||||||
pair_trades_filename,
|
pair_trades_filename,
|
||||||
refresh_backtest_ohlcv_data,
|
refresh_backtest_ohlcv_data,
|
||||||
refresh_backtest_trades_data,
|
refresh_backtest_trades_data, refresh_data,
|
||||||
refresh_data,
|
trim_tickerlist, validate_backtest_data)
|
||||||
trim_dataframe, trim_tickerlist,
|
|
||||||
validate_backtest_data)
|
|
||||||
from freqtrade.exchange import timeframe_to_minutes
|
from freqtrade.exchange import timeframe_to_minutes
|
||||||
from freqtrade.misc import file_dump_json
|
from freqtrade.misc import file_dump_json
|
||||||
from freqtrade.strategy.default_strategy import DefaultStrategy
|
from freqtrade.strategy.default_strategy import DefaultStrategy
|
||||||
@ -444,46 +442,6 @@ def test_trim_tickerlist(testdatadir) -> None:
|
|||||||
assert not ticker
|
assert not ticker
|
||||||
|
|
||||||
|
|
||||||
def test_trim_dataframe(testdatadir) -> None:
|
|
||||||
data = load_data(
|
|
||||||
datadir=testdatadir,
|
|
||||||
timeframe='1m',
|
|
||||||
pairs=['UNITTEST/BTC']
|
|
||||||
)['UNITTEST/BTC']
|
|
||||||
min_date = int(data.iloc[0]['date'].timestamp())
|
|
||||||
max_date = int(data.iloc[-1]['date'].timestamp())
|
|
||||||
data_modify = data.copy()
|
|
||||||
|
|
||||||
# Remove first 30 minutes (1800 s)
|
|
||||||
tr = TimeRange('date', None, min_date + 1800, 0)
|
|
||||||
data_modify = trim_dataframe(data_modify, tr)
|
|
||||||
assert not data_modify.equals(data)
|
|
||||||
assert len(data_modify) < len(data)
|
|
||||||
assert len(data_modify) == len(data) - 30
|
|
||||||
assert all(data_modify.iloc[-1] == data.iloc[-1])
|
|
||||||
assert all(data_modify.iloc[0] == data.iloc[30])
|
|
||||||
|
|
||||||
data_modify = data.copy()
|
|
||||||
# Remove last 30 minutes (1800 s)
|
|
||||||
tr = TimeRange(None, 'date', 0, max_date - 1800)
|
|
||||||
data_modify = trim_dataframe(data_modify, tr)
|
|
||||||
assert not data_modify.equals(data)
|
|
||||||
assert len(data_modify) < len(data)
|
|
||||||
assert len(data_modify) == len(data) - 30
|
|
||||||
assert all(data_modify.iloc[0] == data.iloc[0])
|
|
||||||
assert all(data_modify.iloc[-1] == data.iloc[-31])
|
|
||||||
|
|
||||||
data_modify = data.copy()
|
|
||||||
# Remove first 25 and last 30 minutes (1800 s)
|
|
||||||
tr = TimeRange('date', 'date', min_date + 1500, max_date - 1800)
|
|
||||||
data_modify = trim_dataframe(data_modify, tr)
|
|
||||||
assert not data_modify.equals(data)
|
|
||||||
assert len(data_modify) < len(data)
|
|
||||||
assert len(data_modify) == len(data) - 55
|
|
||||||
# first row matches 25th original row
|
|
||||||
assert all(data_modify.iloc[0] == data.iloc[25])
|
|
||||||
|
|
||||||
|
|
||||||
def test_file_dump_json_tofile(testdatadir) -> None:
|
def test_file_dump_json_tofile(testdatadir) -> None:
|
||||||
file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4()))
|
file = testdatadir / 'test_{id}.json'.format(id=str(uuid.uuid4()))
|
||||||
data = {'bar': 'foo'}
|
data = {'bar': 'foo'}
|
||||||
|
Loading…
Reference in New Issue
Block a user