diff --git a/config_examples/config_full.example.json b/config_examples/config_full.example.json index 5a5096f81..b60957b58 100644 --- a/config_examples/config_full.example.json +++ b/config_examples/config_full.example.json @@ -204,6 +204,7 @@ "strategy_path": "user_data/strategies/", "recursive_strategy_search": false, "add_config_files": [], + "reduce_df_footprint": false, "dataformat_ohlcv": "json", "dataformat_trades": "jsongz" } diff --git a/docs/configuration.md b/docs/configuration.md index e773e1878..9dbfe7932 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -253,6 +253,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.
*Defaults to `[]`*.
**Datatype:** List of strings | `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data.
*Defaults to `json`*.
**Datatype:** String | `dataformat_trades` | Data format to use to store historical trades data.
*Defaults to `jsongz`*.
**Datatype:** String +| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage (and decreasing train/inference timing in FreqAI). (Currently only affects FreqAI use-cases)
**Datatype:** Boolean.
Default: `False`. ### Parameters in the strategy diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 8a240c372..c027a12b1 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -50,3 +50,4 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | | **Extraneous parameters** | `keras` | If the selected model makes use of Keras (typical for Tensorflow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards.
**Datatype:** Boolean.
Default: `False`. | `conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction.
**Datatype:** Integer.
Default: `2`. +| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI).
**Datatype:** Boolean.
Default: `False`. diff --git a/freqtrade/constants.py b/freqtrade/constants.py index 022cbd400..534d06fd4 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -159,6 +159,7 @@ CONF_SCHEMA = { 'ignore_buying_expired_candle_after': {'type': 'number'}, 'trading_mode': {'type': 'string', 'enum': TRADING_MODES}, 'margin_mode': {'type': 'string', 'enum': MARGIN_MODES}, + 'reduce_df_footprint': {'type': 'boolean', 'default': False}, 'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99}, 'backtest_breakdown': { 'type': 'array', diff --git a/freqtrade/data/converter.py b/freqtrade/data/converter.py index 98ed15489..6a49a4799 100644 --- a/freqtrade/data/converter.py +++ b/freqtrade/data/converter.py @@ -7,6 +7,7 @@ from datetime import datetime, timezone from operator import itemgetter from typing import Dict, List +import numpy as np import pandas as pd from pandas import DataFrame, to_datetime @@ -313,3 +314,29 @@ def convert_ohlcv_format( if erase and convert_from != convert_to: logger.info(f"Deleting source data for {pair} / {timeframe}") src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type) + + +def reduce_dataframe_footprint(df: DataFrame) -> DataFrame: + """ + Ensure all values are float32 in the incoming dataframe. + :param df: Dataframe to be converted to float/int 32s + :return: Dataframe converted to float/int 32s + """ + + logger.debug(f"Memory usage of dataframe is " + f"{df.memory_usage().sum() / 1024**2:.2f} MB") + + df_dtypes = df.dtypes + for column, dtype in df_dtypes.items(): + if column in ['open', 'high', 'low', 'close', 'volume']: + continue + if dtype == np.float64: + df_dtypes[column] = np.float32 + elif dtype == np.int64: + df_dtypes[column] = np.int32 + df = df.astype(df_dtypes) + + logger.debug(f"Memory usage after optimization is: " + f"{df.memory_usage().sum() / 1024**2:.2f} MB") + + return df diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 12a3cd519..d717858d2 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -19,6 +19,7 @@ from sklearn.neighbors import NearestNeighbors from freqtrade.configuration import TimeRange from freqtrade.constants import Config +from freqtrade.data.converter import reduce_dataframe_footprint from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds from freqtrade.strategy.interface import IStrategy @@ -1275,6 +1276,9 @@ class FreqaiDataKitchen: dataframe = self.remove_special_chars_from_feature_names(dataframe) + if self.config.get('reduce_df_footprint', False): + dataframe = reduce_dataframe_footprint(dataframe) + return dataframe def fit_labels(self) -> None: diff --git a/tests/data/test_converter.py b/tests/data/test_converter.py index f74383d15..760ad8b76 100644 --- a/tests/data/test_converter.py +++ b/tests/data/test_converter.py @@ -3,18 +3,19 @@ import logging from pathlib import Path from shutil import copyfile +import numpy as np import pytest from freqtrade.configuration.timerange import TimeRange from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format, ohlcv_fill_up_missing_data, ohlcv_to_dataframe, - trades_dict_to_list, trades_remove_duplicates, - trades_to_ohlcv, trim_dataframe) + reduce_dataframe_footprint, trades_dict_to_list, + trades_remove_duplicates, trades_to_ohlcv, trim_dataframe) from freqtrade.data.history import (get_timerange, load_data, load_pair_history, validate_backtest_data) from freqtrade.data.history.idatahandler import IDataHandler from freqtrade.enums import CandleType -from tests.conftest import log_has, log_has_re +from tests.conftest import generate_test_data, log_has, log_has_re from tests.data.test_history import _clean_test_file @@ -344,3 +345,33 @@ def test_convert_ohlcv_format(default_conf, testdatadir, tmpdir, file_base, cand assert file.exists() for file in (files_new): assert not file.exists() + + +def test_reduce_dataframe_footprint(): + data = generate_test_data('15m', 40) + + data['open_copy'] = data['open'] + data['close_copy'] = data['close'] + data['close_copy'] = data['close'] + + assert data['open'].dtype == np.float64 + assert data['open_copy'].dtype == np.float64 + assert data['close_copy'].dtype == np.float64 + + df2 = reduce_dataframe_footprint(data) + + # Does not modify original dataframe + assert data['open'].dtype == np.float64 + assert data['open_copy'].dtype == np.float64 + assert data['close_copy'].dtype == np.float64 + + # skips ohlcv columns + assert df2['open'].dtype == np.float64 + assert df2['high'].dtype == np.float64 + assert df2['low'].dtype == np.float64 + assert df2['close'].dtype == np.float64 + assert df2['volume'].dtype == np.float64 + + # Changes dtype of returned dataframe + assert df2['open_copy'].dtype == np.float32 + assert df2['close_copy'].dtype == np.float32 diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 10bb79971..25bc99580 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -27,13 +27,13 @@ def is_mac() -> bool: return "Darwin" in machine -@pytest.mark.parametrize('model, pca, dbscan', [ - ('LightGBMRegressor', True, False), - ('XGBoostRegressor', False, True), - ('XGBoostRFRegressor', False, False), - ('CatboostRegressor', False, False), +@pytest.mark.parametrize('model, pca, dbscan, float32', [ + ('LightGBMRegressor', True, False, True), + ('XGBoostRegressor', False, True, False), + ('XGBoostRFRegressor', False, False, False), + ('CatboostRegressor', False, False, False), ]) -def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan): +def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan, float32): if is_arm() and model == 'CatboostRegressor': pytest.skip("CatBoost is not supported on ARM") @@ -43,6 +43,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, freqai_conf.update({"strategy": "freqai_test_strat"}) freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca}) freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan}) + freqai_conf.update({"reduce_df_footprint": float32}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf)