updated historic data filenames to include the candle type

This commit is contained in:
Sam Germain
2021-11-07 00:35:27 -06:00
parent c8162479d6
commit ee2ad8ca97
8 changed files with 247 additions and 125 deletions

View File

@@ -34,7 +34,12 @@ class HDF5DataHandler(IDataHandler):
if match and len(match.groups()) > 1]
@classmethod
def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]:
def ohlcv_get_pairs(
cls,
datadir: Path,
timeframe: str,
candle_type: Optional[str] = ""
) -> List[str]:
"""
Returns a list of all pairs with ohlcv data available in this datadir
for the specified timeframe
@@ -43,12 +48,23 @@ class HDF5DataHandler(IDataHandler):
:return: List of Pairs
"""
_tmp = [re.search(r'^(\S+)(?=\-' + timeframe + '.h5)', p.name)
if candle_type:
candle_type = f"-{candle_type}"
else:
candle_type = ""
_tmp = [re.search(r'^(\S+)(?=\-' + timeframe + candle_type + '.h5)', p.name)
for p in datadir.glob(f"*{timeframe}.h5")]
# Check if regex found something and only return these results
return [match[0].replace('_', '/') for match in _tmp if match]
def ohlcv_store(self, pair: str, timeframe: str, data: pd.DataFrame) -> None:
def ohlcv_store(
self,
pair: str,
timeframe: str,
data: pd.DataFrame,
candle_type: Optional[str] = ""
) -> None:
"""
Store data in hdf5 file.
:param pair: Pair - used to generate filename
@@ -59,7 +75,7 @@ class HDF5DataHandler(IDataHandler):
key = self._pair_ohlcv_key(pair, timeframe)
_data = data.copy()
filename = self._pair_data_filename(self._datadir, pair, timeframe)
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
ds = pd.HDFStore(filename, mode='a', complevel=9, complib='blosc')
ds.put(key, _data.loc[:, self._columns], format='table', data_columns=['date'])
@@ -67,7 +83,8 @@ class HDF5DataHandler(IDataHandler):
ds.close()
def _ohlcv_load(self, pair: str, timeframe: str,
timerange: Optional[TimeRange] = None) -> pd.DataFrame:
timerange: Optional[TimeRange] = None,
candle_type: Optional[str] = "") -> pd.DataFrame:
"""
Internal method used to load data for one pair from disk.
Implements the loading and conversion to a Pandas dataframe.
@@ -80,7 +97,12 @@ class HDF5DataHandler(IDataHandler):
:return: DataFrame with ohlcv data, or empty DataFrame
"""
key = self._pair_ohlcv_key(pair, timeframe)
filename = self._pair_data_filename(self._datadir, pair, timeframe)
filename = self._pair_data_filename(
self._datadir,
pair,
timeframe,
candle_type=candle_type
)
if not filename.exists():
return pd.DataFrame(columns=self._columns)
@@ -99,20 +121,26 @@ class HDF5DataHandler(IDataHandler):
'low': 'float', 'close': 'float', 'volume': 'float'})
return pairdata
def ohlcv_purge(self, pair: str, timeframe: str) -> bool:
def ohlcv_purge(self, pair: str, timeframe: str, candle_type: Optional[str] = "") -> bool:
"""
Remove data for this pair
:param pair: Delete data for this pair.
:param timeframe: Timeframe (e.g. "5m")
:return: True when deleted, false if file did not exist.
"""
filename = self._pair_data_filename(self._datadir, pair, timeframe)
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type)
if filename.exists():
filename.unlink()
return True
return False
def ohlcv_append(self, pair: str, timeframe: str, data: pd.DataFrame) -> None:
def ohlcv_append(
self,
pair: str,
timeframe: str,
data: pd.DataFrame,
candle_type: Optional[str] = ""
) -> None:
"""
Append data to existing data structures
:param pair: Pair
@@ -201,9 +229,17 @@ class HDF5DataHandler(IDataHandler):
return f"{pair}/trades"
@classmethod
def _pair_data_filename(cls, datadir: Path, pair: str, timeframe: str) -> Path:
def _pair_data_filename(
cls,
datadir: Path,
pair: str,
timeframe: str,
candle_type: Optional[str] = ""
) -> Path:
pair_s = misc.pair_to_filename(pair)
filename = datadir.joinpath(f'{pair_s}-{timeframe}.h5')
if candle_type:
candle_type = f"-{candle_type}"
filename = datadir.joinpath(f'{pair_s}-{timeframe}{candle_type}.h5')
return filename
@classmethod

View File

@@ -161,7 +161,8 @@ def _download_pair_history(pair: str, *,
process: str = '',
new_pairs_days: int = 30,
data_handler: IDataHandler = None,
timerange: Optional[TimeRange] = None) -> bool:
timerange: Optional[TimeRange] = None,
candle_type: Optional[str] = "") -> bool:
"""
Download latest candles from the exchange for the pair and timeframe passed in parameters
The data is downloaded starting from the last correct data that
@@ -198,25 +199,28 @@ def _download_pair_history(pair: str, *,
since_ms=since_ms if since_ms else
arrow.utcnow().shift(
days=-new_pairs_days).int_timestamp * 1000,
is_new_pair=data.empty
is_new_pair=data.empty,
candle_type=candle_type,
)
# TODO: Maybe move parsing to exchange class (?)
new_dataframe = ohlcv_to_dataframe(new_data, timeframe, pair,
fill_missing=False, drop_incomplete=True)
fill_missing=False, drop_incomplete=True,
candle_type=candle_type)
if data.empty:
data = new_dataframe
else:
# Run cleaning again to ensure there were no duplicate candles
# Especially between existing and new data.
data = clean_ohlcv_dataframe(data.append(new_dataframe), timeframe, pair,
fill_missing=False, drop_incomplete=False)
fill_missing=False, drop_incomplete=False,
candle_type=candle_type)
logger.debug("New Start: %s",
f"{data.iloc[0]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
logger.debug("New End: %s",
f"{data.iloc[-1]['date']:%Y-%m-%d %H:%M:%S}" if not data.empty else 'None')
data_handler.ohlcv_store(pair, timeframe, data=data)
data_handler.ohlcv_store(pair, timeframe, data=data, candle_type=candle_type)
return True
except Exception:
@@ -229,7 +233,8 @@ def _download_pair_history(pair: str, *,
def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes: List[str],
datadir: Path, timerange: Optional[TimeRange] = None,
new_pairs_days: int = 30, erase: bool = False,
data_format: str = None) -> List[str]:
data_format: str = None,
candle_type: Optional[str] = "") -> List[str]:
"""
Refresh stored ohlcv data for backtesting and hyperopt operations.
Used by freqtrade download-data subcommand.
@@ -245,7 +250,7 @@ def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes
for timeframe in timeframes:
if erase:
if data_handler.ohlcv_purge(pair, timeframe):
if data_handler.ohlcv_purge(pair, timeframe, candle_type=candle_type):
logger.info(
f'Deleting existing data for pair {pair}, interval {timeframe}.')
@@ -254,7 +259,8 @@ def refresh_backtest_ohlcv_data(exchange: Exchange, pairs: List[str], timeframes
_download_pair_history(pair=pair, process=process,
datadir=datadir, exchange=exchange,
timerange=timerange, data_handler=data_handler,
timeframe=str(timeframe), new_pairs_days=new_pairs_days)
timeframe=str(timeframe), new_pairs_days=new_pairs_days,
candle_type=candle_type)
return pairs_not_available
@@ -353,10 +359,16 @@ def refresh_backtest_trades_data(exchange: Exchange, pairs: List[str], datadir:
return pairs_not_available
def convert_trades_to_ohlcv(pairs: List[str], timeframes: List[str],
datadir: Path, timerange: TimeRange, erase: bool = False,
data_format_ohlcv: str = 'json',
data_format_trades: str = 'jsongz') -> None:
def convert_trades_to_ohlcv(
pairs: List[str],
timeframes: List[str],
datadir: Path,
timerange: TimeRange,
erase: bool = False,
data_format_ohlcv: str = 'json',
data_format_trades: str = 'jsongz',
candle_type: Optional[str] = ""
) -> None:
"""
Convert stored trades data to ohlcv data
"""
@@ -367,12 +379,12 @@ def convert_trades_to_ohlcv(pairs: List[str], timeframes: List[str],
trades = data_handler_trades.trades_load(pair)
for timeframe in timeframes:
if erase:
if data_handler_ohlcv.ohlcv_purge(pair, timeframe):
if data_handler_ohlcv.ohlcv_purge(pair, timeframe, candle_type=candle_type):
logger.info(f'Deleting existing data for pair {pair}, interval {timeframe}.')
try:
ohlcv = trades_to_ohlcv(trades, timeframe)
# Store ohlcv
data_handler_ohlcv.ohlcv_store(pair, timeframe, data=ohlcv)
data_handler_ohlcv.ohlcv_store(pair, timeframe, data=ohlcv, candle_type=candle_type)
except ValueError:
logger.exception(f'Could not convert {pair} to OHLCV.')

View File

@@ -35,7 +35,12 @@ class IDataHandler(ABC):
"""
@abstractclassmethod
def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]:
def ohlcv_get_pairs(
cls,
datadir: Path,
timeframe: str,
candle_type: Optional[str] = ""
) -> List[str]:
"""
Returns a list of all pairs with ohlcv data available in this datadir
for the specified timeframe
@@ -45,7 +50,13 @@ class IDataHandler(ABC):
"""
@abstractmethod
def ohlcv_store(self, pair: str, timeframe: str, data: DataFrame) -> None:
def ohlcv_store(
self,
pair: str,
timeframe: str,
data: DataFrame,
candle_type: Optional[str] = ""
) -> None:
"""
Store ohlcv data.
:param pair: Pair - used to generate filename
@@ -57,6 +68,7 @@ class IDataHandler(ABC):
@abstractmethod
def _ohlcv_load(self, pair: str, timeframe: str,
timerange: Optional[TimeRange] = None,
candle_type: Optional[str] = ""
) -> DataFrame:
"""
Internal method used to load data for one pair from disk.
@@ -71,7 +83,7 @@ class IDataHandler(ABC):
"""
@abstractmethod
def ohlcv_purge(self, pair: str, timeframe: str) -> bool:
def ohlcv_purge(self, pair: str, timeframe: str, candle_type: Optional[str] = "") -> bool:
"""
Remove data for this pair
:param pair: Delete data for this pair.
@@ -80,7 +92,13 @@ class IDataHandler(ABC):
"""
@abstractmethod
def ohlcv_append(self, pair: str, timeframe: str, data: DataFrame) -> None:
def ohlcv_append(
self,
pair: str,
timeframe: str,
data: DataFrame,
candle_type: Optional[str] = ""
) -> None:
"""
Append data to existing data structures
:param pair: Pair
@@ -146,7 +164,8 @@ class IDataHandler(ABC):
fill_missing: bool = True,
drop_incomplete: bool = True,
startup_candles: int = 0,
warn_no_data: bool = True
warn_no_data: bool = True,
candle_type: Optional[str] = ""
) -> DataFrame:
"""
Load cached candle (OHLCV) data for the given pair.
@@ -165,9 +184,13 @@ class IDataHandler(ABC):
if startup_candles > 0 and timerange_startup:
timerange_startup.subtract_start(timeframe_to_seconds(timeframe) * startup_candles)
pairdf = self._ohlcv_load(pair, timeframe,
timerange=timerange_startup)
if self._check_empty_df(pairdf, pair, timeframe, warn_no_data):
pairdf = self._ohlcv_load(
pair,
timeframe,
timerange=timerange_startup,
candle_type=candle_type
)
if self._check_empty_df(pairdf, pair, timeframe, warn_no_data, candle_type):
return pairdf
else:
enddate = pairdf.iloc[-1]['date']
@@ -175,7 +198,13 @@ class IDataHandler(ABC):
if timerange_startup:
self._validate_pairdata(pair, pairdf, timerange_startup)
pairdf = trim_dataframe(pairdf, timerange_startup)
if self._check_empty_df(pairdf, pair, timeframe, warn_no_data):
if self._check_empty_df(
pairdf,
pair,
timeframe,
warn_no_data,
candle_type
):
return pairdf
# incomplete candles should only be dropped if we didn't trim the end beforehand.
@@ -183,11 +212,19 @@ class IDataHandler(ABC):
pair=pair,
fill_missing=fill_missing,
drop_incomplete=(drop_incomplete and
enddate == pairdf.iloc[-1]['date']))
self._check_empty_df(pairdf, pair, timeframe, warn_no_data)
enddate == pairdf.iloc[-1]['date']),
candle_type=candle_type)
self._check_empty_df(pairdf, pair, timeframe, warn_no_data, candle_type=candle_type)
return pairdf
def _check_empty_df(self, pairdf: DataFrame, pair: str, timeframe: str, warn_no_data: bool):
def _check_empty_df(
self,
pairdf: DataFrame,
pair: str,
timeframe: str,
warn_no_data: bool,
candle_type: Optional[str] = ""
):
"""
Warn on empty dataframe
"""
@@ -200,7 +237,13 @@ class IDataHandler(ABC):
return True
return False
def _validate_pairdata(self, pair, pairdata: DataFrame, timerange: TimeRange):
def _validate_pairdata(
self,
pair,
pairdata: DataFrame,
timerange: TimeRange,
candle_type: Optional[str] = ""
):
"""
Validates pairdata for missing data at start end end and logs warnings.
:param pairdata: Dataframe to validate

View File

@@ -35,7 +35,12 @@ class JsonDataHandler(IDataHandler):
if match and len(match.groups()) > 1]
@classmethod
def ohlcv_get_pairs(cls, datadir: Path, timeframe: str) -> List[str]:
def ohlcv_get_pairs(
cls,
datadir: Path,
timeframe: str,
candle_type: Optional[str] = ""
) -> List[str]:
"""
Returns a list of all pairs with ohlcv data available in this datadir
for the specified timeframe
@@ -43,13 +48,23 @@ class JsonDataHandler(IDataHandler):
:param timeframe: Timeframe to search pairs for
:return: List of Pairs
"""
if candle_type:
candle_type = f"-{candle_type}"
else:
candle_type = ""
_tmp = [re.search(r'^(\S+)(?=\-' + timeframe + '.json)', p.name)
_tmp = [re.search(r'^(\S+)(?=\-' + timeframe + candle_type + '.json)', p.name)
for p in datadir.glob(f"*{timeframe}.{cls._get_file_extension()}")]
# Check if regex found something and only return these results
return [match[0].replace('_', '/') for match in _tmp if match]
def ohlcv_store(self, pair: str, timeframe: str, data: DataFrame) -> None:
def ohlcv_store(
self,
pair: str,
timeframe: str,
data: DataFrame,
candle_type: Optional[str] = ""
) -> None:
"""
Store data in json format "values".
format looks as follows:
@@ -59,7 +74,12 @@ class JsonDataHandler(IDataHandler):
:param data: Dataframe containing OHLCV data
:return: None
"""
filename = self._pair_data_filename(self._datadir, pair, timeframe)
filename = self._pair_data_filename(
self._datadir,
pair,
timeframe,
candle_type
)
_data = data.copy()
# Convert date to int
_data['date'] = _data['date'].view(np.int64) // 1000 // 1000
@@ -71,6 +91,7 @@ class JsonDataHandler(IDataHandler):
def _ohlcv_load(self, pair: str, timeframe: str,
timerange: Optional[TimeRange] = None,
candle_type: Optional[str] = ""
) -> DataFrame:
"""
Internal method used to load data for one pair from disk.
@@ -83,7 +104,7 @@ class JsonDataHandler(IDataHandler):
all data where possible.
:return: DataFrame with ohlcv data, or empty DataFrame
"""
filename = self._pair_data_filename(self._datadir, pair, timeframe)
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type=candle_type)
if not filename.exists():
return DataFrame(columns=self._columns)
try:
@@ -100,20 +121,26 @@ class JsonDataHandler(IDataHandler):
infer_datetime_format=True)
return pairdata
def ohlcv_purge(self, pair: str, timeframe: str) -> bool:
def ohlcv_purge(self, pair: str, timeframe: str, candle_type: Optional[str] = "") -> bool:
"""
Remove data for this pair
:param pair: Delete data for this pair.
:param timeframe: Timeframe (e.g. "5m")
:return: True when deleted, false if file did not exist.
"""
filename = self._pair_data_filename(self._datadir, pair, timeframe)
filename = self._pair_data_filename(self._datadir, pair, timeframe, candle_type=candle_type)
if filename.exists():
filename.unlink()
return True
return False
def ohlcv_append(self, pair: str, timeframe: str, data: DataFrame) -> None:
def ohlcv_append(
self,
pair: str,
timeframe: str,
data: DataFrame,
candle_type: Optional[str] = ""
) -> None:
"""
Append data to existing data structures
:param pair: Pair
@@ -187,9 +214,18 @@ class JsonDataHandler(IDataHandler):
return False
@classmethod
def _pair_data_filename(cls, datadir: Path, pair: str, timeframe: str) -> Path:
def _pair_data_filename(
cls,
datadir: Path,
pair: str,
timeframe: str,
candle_type: Optional[str] = ""
) -> Path:
pair_s = misc.pair_to_filename(pair)
filename = datadir.joinpath(f'{pair_s}-{timeframe}.{cls._get_file_extension()}')
if candle_type:
candle_type = f"-{candle_type}"
filename = datadir.joinpath(
f'{pair_s}-{timeframe}{candle_type}.{cls._get_file_extension()}')
return filename
@classmethod