From 3ccc120f924b9fec07a08bdf0e3794b27b557e97 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 4 Nov 2022 17:42:10 +0100 Subject: [PATCH 1/9] add option to force single precision --- freqtrade/freqai/data_kitchen.py | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index e06709b2c..1e3a518d0 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1246,6 +1246,9 @@ class FreqaiDataKitchen: self.get_unique_classes_from_labels(dataframe) + if self.freqai_config.get('convert_df_to_float32', False): + dataframe = self.reduce_dataframe_footprint(dataframe) + return dataframe def fit_labels(self) -> None: @@ -1344,3 +1347,35 @@ class FreqaiDataKitchen: f"Could not find backtesting prediction file at {path_to_predictionfile}" ) return False + + def reduce_dataframe_footprint(self, df: DataFrame) -> DataFrame: + """ + Ensure all values are float32 + """ + start_mem = df.memory_usage().sum() / 1024**2 + print("Memory usage of dataframe is {:.2f} MB".format(start_mem)) + + for col in df.columns[1:]: + col_type = df[col].dtype + + if col_type != object: + c_min = df[col].min() + c_max = df[col].max() + if str(col_type)[:3] == "int": + if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: + df[col] = df[col].astype(np.int8) + elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: + df[col] = df[col].astype(np.int16) + elif c_min > np.iinfo(np.int32).min: + df[col] = df[col].astype(np.int32) + else: + if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max: + df[col] = df[col].astype(np.float16) + elif c_min > np.finfo(np.float32).min: + df[col] = df[col].astype(np.float32) + + end_mem = df.memory_usage().sum() / 1024**2 + print("Memory usage after optimization is: {:.2f} MB".format(end_mem)) + print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem)) + + return df From 257c83383146f430d334839b6958a94dcd56657b Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 4 Nov 2022 18:10:04 +0100 Subject: [PATCH 2/9] add doc for single precision, dont allow half precision, add test --- docs/freqai-parameter-table.md | 1 + freqtrade/freqai/data_kitchen.py | 15 ++------------- tests/freqai/test_freqai_interface.py | 13 +++++++------ 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 28a15913b..f4fbcbf1c 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -18,6 +18,7 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`. +| `convert_df_to_float32` | Recast all numeric columns to float32, with the objective of reducing ram/disk usage and decreasing train/inference timing.
**Datatype:** Boolean.
Default: `False`. | | **Feature parameters** | `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](freqai-feature-engineering.md).
**Datatype:** Dictionary. | `include_timeframes` | A list of timeframes that all indicators in `populate_any_indicators` will be created for. The list is added as features to the base indicators dataset.
**Datatype:** List of timeframes (strings). diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 1e3a518d0..f1c1fa26d 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1357,22 +1357,11 @@ class FreqaiDataKitchen: for col in df.columns[1:]: col_type = df[col].dtype - if col_type != object: - c_min = df[col].min() - c_max = df[col].max() if str(col_type)[:3] == "int": - if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: - df[col] = df[col].astype(np.int8) - elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: - df[col] = df[col].astype(np.int16) - elif c_min > np.iinfo(np.int32).min: - df[col] = df[col].astype(np.int32) + df[col] = df[col].astype(np.int32) else: - if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max: - df[col] = df[col].astype(np.float16) - elif c_min > np.finfo(np.float32).min: - df[col] = df[col].astype(np.float32) + df[col] = df[col].astype(np.float32) end_mem = df.memory_usage().sum() / 1024**2 print("Memory usage after optimization is: {:.2f} MB".format(end_mem)) diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 2bc65d52e..a2eed92e3 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -27,13 +27,13 @@ def is_mac() -> bool: return "Darwin" in machine -@pytest.mark.parametrize('model, pca, dbscan', [ - ('LightGBMRegressor', True, False), - ('XGBoostRegressor', False, True), - ('XGBoostRFRegressor', False, False), - ('CatboostRegressor', False, False), +@pytest.mark.parametrize('model, pca, dbscan, float32', [ + ('LightGBMRegressor', True, False, True), + ('XGBoostRegressor', False, True, False), + ('XGBoostRFRegressor', False, False, False), + ('CatboostRegressor', False, False, False), ]) -def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan): +def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, dbscan, float32): if is_arm() and model == 'CatboostRegressor': pytest.skip("CatBoost is not supported on ARM") @@ -43,6 +43,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, freqai_conf.update({"strategy": "freqai_test_strat"}) freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca}) freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan}) + freqai_conf['freqai'].update({"convert_df_to_float32": float32}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) From 43bdd34964976578acbc99a6f0b7f70f1bef8bbc Mon Sep 17 00:00:00 2001 From: Emre Date: Sat, 5 Nov 2022 19:13:02 +0300 Subject: [PATCH 3/9] Optimize reduce_dataframe_footprint function --- freqtrade/freqai/data_kitchen.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index f1c1fa26d..6b4586adc 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1355,13 +1355,15 @@ class FreqaiDataKitchen: start_mem = df.memory_usage().sum() / 1024**2 print("Memory usage of dataframe is {:.2f} MB".format(start_mem)) - for col in df.columns[1:]: - col_type = df[col].dtype - if col_type != object: - if str(col_type)[:3] == "int": - df[col] = df[col].astype(np.int32) - else: - df[col] = df[col].astype(np.float32) + df_dtypes = df.dtypes + for column, dtype in df_dtypes.items(): + if column in ['open', 'high', 'low', 'close', 'volume']: + continue + if dtype == np.float64: + df_dtypes[column] = np.float32 + elif dtype == np.int64: + df_dtypes[column] = np.int32 + df = df.astype(df_dtypes) end_mem = df.memory_usage().sum() / 1024**2 print("Memory usage after optimization is: {:.2f} MB".format(end_mem)) From e46a57bbd0d7ea7dbd52a5723edb5313753decfc Mon Sep 17 00:00:00 2001 From: robcaulk Date: Fri, 11 Nov 2022 18:05:32 +0100 Subject: [PATCH 4/9] move mem logs to debug --- freqtrade/freqai/data_kitchen.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 6b4586adc..7c4138f6f 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1352,8 +1352,8 @@ class FreqaiDataKitchen: """ Ensure all values are float32 """ - start_mem = df.memory_usage().sum() / 1024**2 - print("Memory usage of dataframe is {:.2f} MB".format(start_mem)) + logger.debug(f"Memory usage of dataframe is " + f"{df.memory_usage().sum() / 1024**2:.2f} MB") df_dtypes = df.dtypes for column, dtype in df_dtypes.items(): @@ -1365,8 +1365,7 @@ class FreqaiDataKitchen: df_dtypes[column] = np.int32 df = df.astype(df_dtypes) - end_mem = df.memory_usage().sum() / 1024**2 - print("Memory usage after optimization is: {:.2f} MB".format(end_mem)) - print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem)) + logger.debug(f"Memory usage after optimization is: " + f"{df.memory_usage().sum() / 1024**2:.2f} MB") return df From 214c6224755f6651938853c0b63581f160dd9060 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 12 Nov 2022 10:38:25 +0100 Subject: [PATCH 5/9] move dataframe converter to converter.py --- docs/configuration.md | 1 + docs/freqai-parameter-table.md | 2 +- freqtrade/constants.py | 1 + freqtrade/data/converter.py | 27 +++++++++++++++++++++++++++ freqtrade/freqai/data_kitchen.py | 27 +++------------------------ tests/freqai/test_freqai_interface.py | 2 +- 6 files changed, 34 insertions(+), 26 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index e773e1878..1fda27893 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -253,6 +253,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.
*Defaults to `[]`*.
**Datatype:** List of strings | `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data.
*Defaults to `json`*.
**Datatype:** String | `dataformat_trades` | Data format to use to store historical trades data.
*Defaults to `jsongz`*.
**Datatype:** String +| `convert_df_to_32bit` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. (Currently only affects FreqAI use-cases)
**Datatype:** Boolean.
Default: `False`. ### Parameters in the strategy diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 834960056..7ba54c1eb 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -18,7 +18,6 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)).
**Datatype:** Positive integer. | `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models.
**Datatype:** Boolean.
Default: `False`. | `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)).
**Datatype:** Boolean.
Default: `False`. -| `convert_df_to_float32` | Recast all numeric columns to float32, with the objective of reducing ram/disk usage and decreasing train/inference timing.
**Datatype:** Boolean.
Default: `False`. | `write_metrics_to_disk` | Collect train timings, inference timings and cpu usage in json file.
**Datatype:** Boolean.
Default: `False` | | **Feature parameters** | `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](freqai-feature-engineering.md).
**Datatype:** Dictionary. @@ -51,3 +50,4 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | | **Extraneous parameters** | `keras` | If the selected model makes use of Keras (typical for Tensorflow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards.
**Datatype:** Boolean.
Default: `False`. | `conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction.
**Datatype:** Integer.
Default: `2`. +| `convert_df_to_32bit` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI).
**Datatype:** Boolean.
Default: `False`. diff --git a/freqtrade/constants.py b/freqtrade/constants.py index 022cbd400..a9a38fa89 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -159,6 +159,7 @@ CONF_SCHEMA = { 'ignore_buying_expired_candle_after': {'type': 'number'}, 'trading_mode': {'type': 'string', 'enum': TRADING_MODES}, 'margin_mode': {'type': 'string', 'enum': MARGIN_MODES}, + 'convert_df_to_32bit': {'type': 'number', 'default': False}, 'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99}, 'backtest_breakdown': { 'type': 'array', diff --git a/freqtrade/data/converter.py b/freqtrade/data/converter.py index 98ed15489..6a49a4799 100644 --- a/freqtrade/data/converter.py +++ b/freqtrade/data/converter.py @@ -7,6 +7,7 @@ from datetime import datetime, timezone from operator import itemgetter from typing import Dict, List +import numpy as np import pandas as pd from pandas import DataFrame, to_datetime @@ -313,3 +314,29 @@ def convert_ohlcv_format( if erase and convert_from != convert_to: logger.info(f"Deleting source data for {pair} / {timeframe}") src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type) + + +def reduce_dataframe_footprint(df: DataFrame) -> DataFrame: + """ + Ensure all values are float32 in the incoming dataframe. + :param df: Dataframe to be converted to float/int 32s + :return: Dataframe converted to float/int 32s + """ + + logger.debug(f"Memory usage of dataframe is " + f"{df.memory_usage().sum() / 1024**2:.2f} MB") + + df_dtypes = df.dtypes + for column, dtype in df_dtypes.items(): + if column in ['open', 'high', 'low', 'close', 'volume']: + continue + if dtype == np.float64: + df_dtypes[column] = np.float32 + elif dtype == np.int64: + df_dtypes[column] = np.int32 + df = df.astype(df_dtypes) + + logger.debug(f"Memory usage after optimization is: " + f"{df.memory_usage().sum() / 1024**2:.2f} MB") + + return df diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index e6d50377b..5b2457eb8 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -19,6 +19,7 @@ from sklearn.neighbors import NearestNeighbors from freqtrade.configuration import TimeRange from freqtrade.constants import Config +from freqtrade.data.converter import reduce_dataframe_footprint from freqtrade.exceptions import OperationalException from freqtrade.exchange import timeframe_to_seconds from freqtrade.strategy.interface import IStrategy @@ -1275,8 +1276,8 @@ class FreqaiDataKitchen: dataframe = self.remove_special_chars_from_feature_names(dataframe) - if self.freqai_config.get('convert_df_to_float32', False): - dataframe = self.reduce_dataframe_footprint(dataframe) + if self.config.get('convert_df_to_32bit', False): + dataframe = reduce_dataframe_footprint(dataframe) return dataframe @@ -1492,25 +1493,3 @@ class FreqaiDataKitchen: dataframe.columns = dataframe.columns.str.replace(c, "") return dataframe - - def reduce_dataframe_footprint(self, df: DataFrame) -> DataFrame: - """ - Ensure all values are float32 - """ - logger.debug(f"Memory usage of dataframe is " - f"{df.memory_usage().sum() / 1024**2:.2f} MB") - - df_dtypes = df.dtypes - for column, dtype in df_dtypes.items(): - if column in ['open', 'high', 'low', 'close', 'volume']: - continue - if dtype == np.float64: - df_dtypes[column] = np.float32 - elif dtype == np.int64: - df_dtypes[column] = np.int32 - df = df.astype(df_dtypes) - - logger.debug(f"Memory usage after optimization is: " - f"{df.memory_usage().sum() / 1024**2:.2f} MB") - - return df diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index eab92c7f1..3a040181d 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -43,7 +43,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, freqai_conf.update({"strategy": "freqai_test_strat"}) freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca}) freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan}) - freqai_conf['freqai'].update({"convert_df_to_float32": float32}) + freqai_conf.update({"convert_df_to_float32": float32}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) From 942840da2d0c902fcbb8a9a43cf53dfc3f0feeb0 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 13 Nov 2022 15:22:44 +0100 Subject: [PATCH 6/9] Improve setting wording to keep future possibilities open --- config_examples/config_full.example.json | 1 + docs/configuration.md | 2 +- docs/freqai-parameter-table.md | 2 +- freqtrade/freqai/data_kitchen.py | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/config_examples/config_full.example.json b/config_examples/config_full.example.json index 5a5096f81..b60957b58 100644 --- a/config_examples/config_full.example.json +++ b/config_examples/config_full.example.json @@ -204,6 +204,7 @@ "strategy_path": "user_data/strategies/", "recursive_strategy_search": false, "add_config_files": [], + "reduce_df_footprint": false, "dataformat_ohlcv": "json", "dataformat_trades": "jsongz" } diff --git a/docs/configuration.md b/docs/configuration.md index 1fda27893..d50b798f8 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -253,7 +253,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.
*Defaults to `[]`*.
**Datatype:** List of strings | `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data.
*Defaults to `json`*.
**Datatype:** String | `dataformat_trades` | Data format to use to store historical trades data.
*Defaults to `jsongz`*.
**Datatype:** String -| `convert_df_to_32bit` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. (Currently only affects FreqAI use-cases)
**Datatype:** Boolean.
Default: `False`. +| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing, as well as memory usage. (Currently only affects FreqAI use-cases)
**Datatype:** Boolean.
Default: `False`. ### Parameters in the strategy diff --git a/docs/freqai-parameter-table.md b/docs/freqai-parameter-table.md index 7ba54c1eb..c027a12b1 100644 --- a/docs/freqai-parameter-table.md +++ b/docs/freqai-parameter-table.md @@ -50,4 +50,4 @@ Mandatory parameters are marked as **Required** and have to be set in one of the | | **Extraneous parameters** | `keras` | If the selected model makes use of Keras (typical for Tensorflow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards.
**Datatype:** Boolean.
Default: `False`. | `conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction.
**Datatype:** Integer.
Default: `2`. -| `convert_df_to_32bit` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI).
**Datatype:** Boolean.
Default: `False`. +| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI).
**Datatype:** Boolean.
Default: `False`. diff --git a/freqtrade/freqai/data_kitchen.py b/freqtrade/freqai/data_kitchen.py index 5b2457eb8..d717858d2 100644 --- a/freqtrade/freqai/data_kitchen.py +++ b/freqtrade/freqai/data_kitchen.py @@ -1276,7 +1276,7 @@ class FreqaiDataKitchen: dataframe = self.remove_special_chars_from_feature_names(dataframe) - if self.config.get('convert_df_to_32bit', False): + if self.config.get('reduce_df_footprint', False): dataframe = reduce_dataframe_footprint(dataframe) return dataframe From a59d61472bc559b94afb1a7b256da7753e5ce3f2 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 13 Nov 2022 15:29:37 +0100 Subject: [PATCH 7/9] Add test for dataframe footprint reduction --- tests/data/test_converter.py | 37 +++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/tests/data/test_converter.py b/tests/data/test_converter.py index f74383d15..760ad8b76 100644 --- a/tests/data/test_converter.py +++ b/tests/data/test_converter.py @@ -3,18 +3,19 @@ import logging from pathlib import Path from shutil import copyfile +import numpy as np import pytest from freqtrade.configuration.timerange import TimeRange from freqtrade.data.converter import (convert_ohlcv_format, convert_trades_format, ohlcv_fill_up_missing_data, ohlcv_to_dataframe, - trades_dict_to_list, trades_remove_duplicates, - trades_to_ohlcv, trim_dataframe) + reduce_dataframe_footprint, trades_dict_to_list, + trades_remove_duplicates, trades_to_ohlcv, trim_dataframe) from freqtrade.data.history import (get_timerange, load_data, load_pair_history, validate_backtest_data) from freqtrade.data.history.idatahandler import IDataHandler from freqtrade.enums import CandleType -from tests.conftest import log_has, log_has_re +from tests.conftest import generate_test_data, log_has, log_has_re from tests.data.test_history import _clean_test_file @@ -344,3 +345,33 @@ def test_convert_ohlcv_format(default_conf, testdatadir, tmpdir, file_base, cand assert file.exists() for file in (files_new): assert not file.exists() + + +def test_reduce_dataframe_footprint(): + data = generate_test_data('15m', 40) + + data['open_copy'] = data['open'] + data['close_copy'] = data['close'] + data['close_copy'] = data['close'] + + assert data['open'].dtype == np.float64 + assert data['open_copy'].dtype == np.float64 + assert data['close_copy'].dtype == np.float64 + + df2 = reduce_dataframe_footprint(data) + + # Does not modify original dataframe + assert data['open'].dtype == np.float64 + assert data['open_copy'].dtype == np.float64 + assert data['close_copy'].dtype == np.float64 + + # skips ohlcv columns + assert df2['open'].dtype == np.float64 + assert df2['high'].dtype == np.float64 + assert df2['low'].dtype == np.float64 + assert df2['close'].dtype == np.float64 + assert df2['volume'].dtype == np.float64 + + # Changes dtype of returned dataframe + assert df2['open_copy'].dtype == np.float32 + assert df2['close_copy'].dtype == np.float32 From 1e9e7887aadb2d39b1c8a1c336076b74337b10c7 Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sun, 13 Nov 2022 15:38:35 +0100 Subject: [PATCH 8/9] fix constants.py, fix freqai test --- docs/configuration.md | 2 +- freqtrade/constants.py | 2 +- tests/freqai/test_freqai_interface.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index d50b798f8..9dbfe7932 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -253,7 +253,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi | `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.
*Defaults to `[]`*.
**Datatype:** List of strings | `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data.
*Defaults to `json`*.
**Datatype:** String | `dataformat_trades` | Data format to use to store historical trades data.
*Defaults to `jsongz`*.
**Datatype:** String -| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing, as well as memory usage. (Currently only affects FreqAI use-cases)
**Datatype:** Boolean.
Default: `False`. +| `reduce_df_footprint` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage (and decreasing train/inference timing in FreqAI). (Currently only affects FreqAI use-cases)
**Datatype:** Boolean.
Default: `False`. ### Parameters in the strategy diff --git a/freqtrade/constants.py b/freqtrade/constants.py index a9a38fa89..0c15883a4 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -159,7 +159,7 @@ CONF_SCHEMA = { 'ignore_buying_expired_candle_after': {'type': 'number'}, 'trading_mode': {'type': 'string', 'enum': TRADING_MODES}, 'margin_mode': {'type': 'string', 'enum': MARGIN_MODES}, - 'convert_df_to_32bit': {'type': 'number', 'default': False}, + 'reduce_df_footprint': {'type': 'number', 'default': False}, 'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99}, 'backtest_breakdown': { 'type': 'array', diff --git a/tests/freqai/test_freqai_interface.py b/tests/freqai/test_freqai_interface.py index 3a040181d..25bc99580 100644 --- a/tests/freqai/test_freqai_interface.py +++ b/tests/freqai/test_freqai_interface.py @@ -43,7 +43,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca, freqai_conf.update({"strategy": "freqai_test_strat"}) freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca}) freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan}) - freqai_conf.update({"convert_df_to_float32": float32}) + freqai_conf.update({"reduce_df_footprint": float32}) strategy = get_patched_freqai_strategy(mocker, freqai_conf) exchange = get_patched_exchange(mocker, freqai_conf) From 42b29cd307a4dbd1f2a477545dcbe353532710aa Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 13 Nov 2022 19:31:49 +0100 Subject: [PATCH 9/9] Fix constants type --- freqtrade/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freqtrade/constants.py b/freqtrade/constants.py index 0c15883a4..534d06fd4 100644 --- a/freqtrade/constants.py +++ b/freqtrade/constants.py @@ -159,7 +159,7 @@ CONF_SCHEMA = { 'ignore_buying_expired_candle_after': {'type': 'number'}, 'trading_mode': {'type': 'string', 'enum': TRADING_MODES}, 'margin_mode': {'type': 'string', 'enum': MARGIN_MODES}, - 'reduce_df_footprint': {'type': 'number', 'default': False}, + 'reduce_df_footprint': {'type': 'boolean', 'default': False}, 'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99}, 'backtest_breakdown': { 'type': 'array',