move dataframe converter to converter.py
This commit is contained in:
parent
9617d8143d
commit
214c622475
@ -253,6 +253,7 @@ Mandatory parameters are marked as **Required**, which means that they are requi
|
|||||||
| `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.<br> *Defaults to `[]`*. <br> **Datatype:** List of strings
|
| `add_config_files` | Additional config files. These files will be loaded and merged with the current config file. The files are resolved relative to the initial file.<br> *Defaults to `[]`*. <br> **Datatype:** List of strings
|
||||||
| `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data. <br> *Defaults to `json`*. <br> **Datatype:** String
|
| `dataformat_ohlcv` | Data format to use to store historical candle (OHLCV) data. <br> *Defaults to `json`*. <br> **Datatype:** String
|
||||||
| `dataformat_trades` | Data format to use to store historical trades data. <br> *Defaults to `jsongz`*. <br> **Datatype:** String
|
| `dataformat_trades` | Data format to use to store historical trades data. <br> *Defaults to `jsongz`*. <br> **Datatype:** String
|
||||||
|
| `convert_df_to_32bit` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. (Currently only affects FreqAI use-cases) <br> **Datatype:** Boolean. <br> Default: `False`.
|
||||||
|
|
||||||
### Parameters in the strategy
|
### Parameters in the strategy
|
||||||
|
|
||||||
|
@ -18,7 +18,6 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
|
|||||||
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
|
| `fit_live_predictions_candles` | Number of historical candles to use for computing target (label) statistics from prediction data, instead of from the training dataset (more information can be found [here](freqai-configuration.md#creating-a-dynamic-target-threshold)). <br> **Datatype:** Positive integer.
|
||||||
| `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`.
|
| `follow_mode` | Use a `follower` that will look for models associated with a specific `identifier` and load those for inferencing. A `follower` will **not** train new models. <br> **Datatype:** Boolean. <br> Default: `False`.
|
||||||
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
|
| `continual_learning` | Use the final state of the most recently trained model as starting point for the new model, allowing for incremental learning (more information can be found [here](freqai-running.md#continual-learning)). <br> **Datatype:** Boolean. <br> Default: `False`.
|
||||||
| `convert_df_to_float32` | Recast all numeric columns to float32, with the objective of reducing ram/disk usage and decreasing train/inference timing. <br> **Datatype:** Boolean. <br> Default: `False`.
|
|
||||||
| `write_metrics_to_disk` | Collect train timings, inference timings and cpu usage in json file. <br> **Datatype:** Boolean. <br> Default: `False`
|
| `write_metrics_to_disk` | Collect train timings, inference timings and cpu usage in json file. <br> **Datatype:** Boolean. <br> Default: `False`
|
||||||
| | **Feature parameters**
|
| | **Feature parameters**
|
||||||
| `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](freqai-feature-engineering.md). <br> **Datatype:** Dictionary.
|
| `feature_parameters` | A dictionary containing the parameters used to engineer the feature set. Details and examples are shown [here](freqai-feature-engineering.md). <br> **Datatype:** Dictionary.
|
||||||
@ -51,3 +50,4 @@ Mandatory parameters are marked as **Required** and have to be set in one of the
|
|||||||
| | **Extraneous parameters**
|
| | **Extraneous parameters**
|
||||||
| `keras` | If the selected model makes use of Keras (typical for Tensorflow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards. <br> **Datatype:** Boolean. <br> Default: `False`.
|
| `keras` | If the selected model makes use of Keras (typical for Tensorflow-based prediction models), this flag needs to be activated so that the model save/loading follows Keras standards. <br> **Datatype:** Boolean. <br> Default: `False`.
|
||||||
| `conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction. <br> **Datatype:** Integer. <br> Default: `2`.
|
| `conv_width` | The width of a convolutional neural network input tensor. This replaces the need for shifting candles (`include_shifted_candles`) by feeding in historical data points as the second dimension of the tensor. Technically, this parameter can also be used for regressors, but it only adds computational overhead and does not change the model training/prediction. <br> **Datatype:** Integer. <br> Default: `2`.
|
||||||
|
| `convert_df_to_32bit` | Recast all numeric columns to float32/int32, with the objective of reducing ram/disk usage and decreasing train/inference timing. This parameter is set in the main level of the Freqtrade configuration file (not inside FreqAI). <br> **Datatype:** Boolean. <br> Default: `False`.
|
||||||
|
@ -159,6 +159,7 @@ CONF_SCHEMA = {
|
|||||||
'ignore_buying_expired_candle_after': {'type': 'number'},
|
'ignore_buying_expired_candle_after': {'type': 'number'},
|
||||||
'trading_mode': {'type': 'string', 'enum': TRADING_MODES},
|
'trading_mode': {'type': 'string', 'enum': TRADING_MODES},
|
||||||
'margin_mode': {'type': 'string', 'enum': MARGIN_MODES},
|
'margin_mode': {'type': 'string', 'enum': MARGIN_MODES},
|
||||||
|
'convert_df_to_32bit': {'type': 'number', 'default': False},
|
||||||
'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99},
|
'liquidation_buffer': {'type': 'number', 'minimum': 0.0, 'maximum': 0.99},
|
||||||
'backtest_breakdown': {
|
'backtest_breakdown': {
|
||||||
'type': 'array',
|
'type': 'array',
|
||||||
|
@ -7,6 +7,7 @@ from datetime import datetime, timezone
|
|||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas import DataFrame, to_datetime
|
from pandas import DataFrame, to_datetime
|
||||||
|
|
||||||
@ -313,3 +314,29 @@ def convert_ohlcv_format(
|
|||||||
if erase and convert_from != convert_to:
|
if erase and convert_from != convert_to:
|
||||||
logger.info(f"Deleting source data for {pair} / {timeframe}")
|
logger.info(f"Deleting source data for {pair} / {timeframe}")
|
||||||
src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type)
|
src.ohlcv_purge(pair=pair, timeframe=timeframe, candle_type=candle_type)
|
||||||
|
|
||||||
|
|
||||||
|
def reduce_dataframe_footprint(df: DataFrame) -> DataFrame:
|
||||||
|
"""
|
||||||
|
Ensure all values are float32 in the incoming dataframe.
|
||||||
|
:param df: Dataframe to be converted to float/int 32s
|
||||||
|
:return: Dataframe converted to float/int 32s
|
||||||
|
"""
|
||||||
|
|
||||||
|
logger.debug(f"Memory usage of dataframe is "
|
||||||
|
f"{df.memory_usage().sum() / 1024**2:.2f} MB")
|
||||||
|
|
||||||
|
df_dtypes = df.dtypes
|
||||||
|
for column, dtype in df_dtypes.items():
|
||||||
|
if column in ['open', 'high', 'low', 'close', 'volume']:
|
||||||
|
continue
|
||||||
|
if dtype == np.float64:
|
||||||
|
df_dtypes[column] = np.float32
|
||||||
|
elif dtype == np.int64:
|
||||||
|
df_dtypes[column] = np.int32
|
||||||
|
df = df.astype(df_dtypes)
|
||||||
|
|
||||||
|
logger.debug(f"Memory usage after optimization is: "
|
||||||
|
f"{df.memory_usage().sum() / 1024**2:.2f} MB")
|
||||||
|
|
||||||
|
return df
|
||||||
|
@ -19,6 +19,7 @@ from sklearn.neighbors import NearestNeighbors
|
|||||||
|
|
||||||
from freqtrade.configuration import TimeRange
|
from freqtrade.configuration import TimeRange
|
||||||
from freqtrade.constants import Config
|
from freqtrade.constants import Config
|
||||||
|
from freqtrade.data.converter import reduce_dataframe_footprint
|
||||||
from freqtrade.exceptions import OperationalException
|
from freqtrade.exceptions import OperationalException
|
||||||
from freqtrade.exchange import timeframe_to_seconds
|
from freqtrade.exchange import timeframe_to_seconds
|
||||||
from freqtrade.strategy.interface import IStrategy
|
from freqtrade.strategy.interface import IStrategy
|
||||||
@ -1275,8 +1276,8 @@ class FreqaiDataKitchen:
|
|||||||
|
|
||||||
dataframe = self.remove_special_chars_from_feature_names(dataframe)
|
dataframe = self.remove_special_chars_from_feature_names(dataframe)
|
||||||
|
|
||||||
if self.freqai_config.get('convert_df_to_float32', False):
|
if self.config.get('convert_df_to_32bit', False):
|
||||||
dataframe = self.reduce_dataframe_footprint(dataframe)
|
dataframe = reduce_dataframe_footprint(dataframe)
|
||||||
|
|
||||||
return dataframe
|
return dataframe
|
||||||
|
|
||||||
@ -1492,25 +1493,3 @@ class FreqaiDataKitchen:
|
|||||||
dataframe.columns = dataframe.columns.str.replace(c, "")
|
dataframe.columns = dataframe.columns.str.replace(c, "")
|
||||||
|
|
||||||
return dataframe
|
return dataframe
|
||||||
|
|
||||||
def reduce_dataframe_footprint(self, df: DataFrame) -> DataFrame:
|
|
||||||
"""
|
|
||||||
Ensure all values are float32
|
|
||||||
"""
|
|
||||||
logger.debug(f"Memory usage of dataframe is "
|
|
||||||
f"{df.memory_usage().sum() / 1024**2:.2f} MB")
|
|
||||||
|
|
||||||
df_dtypes = df.dtypes
|
|
||||||
for column, dtype in df_dtypes.items():
|
|
||||||
if column in ['open', 'high', 'low', 'close', 'volume']:
|
|
||||||
continue
|
|
||||||
if dtype == np.float64:
|
|
||||||
df_dtypes[column] = np.float32
|
|
||||||
elif dtype == np.int64:
|
|
||||||
df_dtypes[column] = np.int32
|
|
||||||
df = df.astype(df_dtypes)
|
|
||||||
|
|
||||||
logger.debug(f"Memory usage after optimization is: "
|
|
||||||
f"{df.memory_usage().sum() / 1024**2:.2f} MB")
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
@ -43,7 +43,7 @@ def test_extract_data_and_train_model_Standard(mocker, freqai_conf, model, pca,
|
|||||||
freqai_conf.update({"strategy": "freqai_test_strat"})
|
freqai_conf.update({"strategy": "freqai_test_strat"})
|
||||||
freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca})
|
freqai_conf['freqai']['feature_parameters'].update({"principal_component_analysis": pca})
|
||||||
freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan})
|
freqai_conf['freqai']['feature_parameters'].update({"use_DBSCAN_to_remove_outliers": dbscan})
|
||||||
freqai_conf['freqai'].update({"convert_df_to_float32": float32})
|
freqai_conf.update({"convert_df_to_float32": float32})
|
||||||
|
|
||||||
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
strategy = get_patched_freqai_strategy(mocker, freqai_conf)
|
||||||
exchange = get_patched_exchange(mocker, freqai_conf)
|
exchange = get_patched_exchange(mocker, freqai_conf)
|
||||||
|
Loading…
Reference in New Issue
Block a user