add option to force single precision

This commit is contained in:
robcaulk 2022-11-04 17:42:10 +01:00
parent c2130ed3dd
commit 3ccc120f92

View File

@ -1246,6 +1246,9 @@ class FreqaiDataKitchen:
self.get_unique_classes_from_labels(dataframe)
if self.freqai_config.get('convert_df_to_float32', False):
dataframe = self.reduce_dataframe_footprint(dataframe)
return dataframe
def fit_labels(self) -> None:
@ -1344,3 +1347,35 @@ class FreqaiDataKitchen:
f"Could not find backtesting prediction file at {path_to_predictionfile}"
)
return False
def reduce_dataframe_footprint(self, df: DataFrame) -> DataFrame:
"""
Ensure all values are float32
"""
start_mem = df.memory_usage().sum() / 1024**2
print("Memory usage of dataframe is {:.2f} MB".format(start_mem))
for col in df.columns[1:]:
col_type = df[col].dtype
if col_type != object:
c_min = df[col].min()
c_max = df[col].max()
if str(col_type)[:3] == "int":
if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
df[col] = df[col].astype(np.int8)
elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
df[col] = df[col].astype(np.int16)
elif c_min > np.iinfo(np.int32).min:
df[col] = df[col].astype(np.int32)
else:
if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
df[col] = df[col].astype(np.float16)
elif c_min > np.finfo(np.float32).min:
df[col] = df[col].astype(np.float32)
end_mem = df.memory_usage().sum() / 1024**2
print("Memory usage after optimization is: {:.2f} MB".format(end_mem))
print("Decreased by {:.1f}%".format(100 * (start_mem - end_mem) / start_mem))
return df