From 0883198f40d17d7c1a60d6a0f35a167ce9ae8f9e Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 25 Mar 2023 11:38:16 +0100 Subject: [PATCH 1/5] use parquet in favor of pickle to improve performance for historic_prediction saving, loading, and active post processing --- freqtrade/freqai/data_drawer.py | 35 ++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 14986d854..c51f21647 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -74,8 +74,10 @@ class FreqaiDataDrawer: self.historic_predictions: Dict[str, DataFrame] = {} self.full_path = full_path self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl") - self.historic_predictions_bkp_path = Path( - self.full_path / "historic_predictions.backup.pkl") + self.historic_predictions_path_parquet = Path( + self.full_path / "historic_predictions.parquet") + self.historic_predictions_bkp_path_parquet = Path( + self.full_path / "historic_predictions.backup.parquet") self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") self.global_metadata_path = Path(self.full_path / "global_metadata.json") self.metric_tracker_path = Path(self.full_path / "metric_tracker.json") @@ -163,11 +165,12 @@ class FreqaiDataDrawer: Locate and load a previously saved historic predictions. :return: bool - whether or not the drawer was located """ - exists = self.historic_predictions_path.is_file() + exists = self.historic_predictions_path_parquet.is_file() + convert = self.historic_predictions_path.is_file() + if exists: try: - with self.historic_predictions_path.open("rb") as fp: - self.historic_predictions = cloudpickle.load(fp) + self.historic_predictions = pd.read_parquet(self.historic_predictions_path_parquet) logger.info( f"Found existing historic predictions at {self.full_path}, but beware " "that statistics may be inaccurate if the bot has been offline for " @@ -176,12 +179,23 @@ class FreqaiDataDrawer: except EOFError: logger.warning( 'Historical prediction file was corrupted. Trying to load backup file.') - with self.historic_predictions_bkp_path.open("rb") as fp: - self.historic_predictions = cloudpickle.load(fp) + self.historic_predictions = pd.read_parquet( + self.historic_predictions_bkp_path_parquet) logger.warning('FreqAI successfully loaded the backup historical predictions file.') + elif not exists and convert: + logger.info("Converting your historic predictions pkl to parquet" + "to improve performance.") + with Path.open(self.historic_predictions_path, "rb") as fp: + self.historic_predictions = cloudpickle.load(fp) + self.historic_predictions.to_parquet(self.historic_predictions_path_parquet) + exists = True + else: - logger.info("Could not find existing historic_predictions, starting from scratch") + logger.warning( + f"Follower could not find historic predictions at {self.full_path} " + "sending null values back to strategy" + ) return exists @@ -189,11 +203,10 @@ class FreqaiDataDrawer: """ Save historic predictions pickle to disk """ - with self.historic_predictions_path.open("wb") as fp: - cloudpickle.dump(self.historic_predictions, fp, protocol=cloudpickle.DEFAULT_PROTOCOL) + self.historic_predictions.to_parquet(self.historic_predictions_path_parquet) # create a backup - shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path) + shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path_parquet) def save_metric_tracker_to_disk(self): """ From b383654378439eb1efc5287d7e89e457cfe6eb2a Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 25 Mar 2023 13:19:53 +0100 Subject: [PATCH 2/5] create a separate folder for the historic_predictions parquet files. Modify save/load functions to accommodate new structure --- freqtrade/freqai/data_drawer.py | 40 +++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index c51f21647..397e8912f 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -74,10 +74,9 @@ class FreqaiDataDrawer: self.historic_predictions: Dict[str, DataFrame] = {} self.full_path = full_path self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl") - self.historic_predictions_path_parquet = Path( - self.full_path / "historic_predictions.parquet") - self.historic_predictions_bkp_path_parquet = Path( - self.full_path / "historic_predictions.backup.parquet") + self.historic_predictions_folder = Path(self.full_path / "historic_predictions") + self.historic_predictions_bkp_folder = Path( + self.full_path / "historic_predictions_backup") self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") self.global_metadata_path = Path(self.full_path / "global_metadata.json") self.metric_tracker_path = Path(self.full_path / "metric_tracker.json") @@ -165,12 +164,16 @@ class FreqaiDataDrawer: Locate and load a previously saved historic predictions. :return: bool - whether or not the drawer was located """ - exists = self.historic_predictions_path_parquet.is_file() + exists = self.historic_predictions_folder.exists() convert = self.historic_predictions_path.is_file() if exists: try: - self.historic_predictions = pd.read_parquet(self.historic_predictions_path_parquet) + for file_path in self.historic_predictions_folder.glob("*.parquet"): + key = file_path.stem + key.replace("_", "/") + self.historic_predictions[key] = pd.read_parquet(file_path) + logger.info( f"Found existing historic predictions at {self.full_path}, but beware " "that statistics may be inaccurate if the bot has been offline for " @@ -178,17 +181,20 @@ class FreqaiDataDrawer: ) except EOFError: logger.warning( - 'Historical prediction file was corrupted. Trying to load backup file.') - self.historic_predictions = pd.read_parquet( - self.historic_predictions_bkp_path_parquet) - logger.warning('FreqAI successfully loaded the backup historical predictions file.') + 'Historical prediction files were corrupted. Trying to load backup files.') + for file_path in self.historic_predictions_folder.glob("*.parquet"): + key = file_path.stem + key.replace("_", "/") + self.historic_predictions[key] = pd.read_parquet(file_path) + logger.warning('FreqAI successfully loaded the backup ' + 'historical predictions files.') elif not exists and convert: logger.info("Converting your historic predictions pkl to parquet" "to improve performance.") with Path.open(self.historic_predictions_path, "rb") as fp: self.historic_predictions = cloudpickle.load(fp) - self.historic_predictions.to_parquet(self.historic_predictions_path_parquet) + self.save_historic_predictions_to_disk() exists = True else: @@ -203,10 +209,16 @@ class FreqaiDataDrawer: """ Save historic predictions pickle to disk """ - self.historic_predictions.to_parquet(self.historic_predictions_path_parquet) + + self.historic_predictions_folder.mkdir(parents=True, exist_ok=True) + for key, value in self.historic_predictions.items(): + key = key.replace("/", "_") + # pytest.set_trace() + filename = Path(self.historic_predictions_folder / f"{key}.parquet") + value.to_parquet(filename) # create a backup - shutil.copy(self.historic_predictions_path, self.historic_predictions_bkp_path_parquet) + shutil.copytree(self.historic_predictions_folder, self.historic_predictions_bkp_folder) def save_metric_tracker_to_disk(self): """ @@ -688,7 +700,7 @@ class FreqaiDataDrawer: Returns timerange information based on historic predictions file :return: timerange calculated from saved live data """ - if not self.historic_predictions_path.is_file(): + if not self.historic_predictions_folder.exists(): raise OperationalException( 'Historic predictions not found. Historic predictions data is required ' 'to run backtest with the freqai-backtest-live-models option ' From 630cdcb99fe9f678a136fa60acd2c4432c8879df Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 25 Mar 2023 13:27:41 +0100 Subject: [PATCH 3/5] reduce duplicated code --- freqtrade/freqai/data_drawer.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 397e8912f..cfed65771 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -169,11 +169,7 @@ class FreqaiDataDrawer: if exists: try: - for file_path in self.historic_predictions_folder.glob("*.parquet"): - key = file_path.stem - key.replace("_", "/") - self.historic_predictions[key] = pd.read_parquet(file_path) - + self.load_historic_predictions_from_folder() logger.info( f"Found existing historic predictions at {self.full_path}, but beware " "that statistics may be inaccurate if the bot has been offline for " @@ -182,10 +178,7 @@ class FreqaiDataDrawer: except EOFError: logger.warning( 'Historical prediction files were corrupted. Trying to load backup files.') - for file_path in self.historic_predictions_folder.glob("*.parquet"): - key = file_path.stem - key.replace("_", "/") - self.historic_predictions[key] = pd.read_parquet(file_path) + self.load_historic_predictions_from_folder() logger.warning('FreqAI successfully loaded the backup ' 'historical predictions files.') @@ -205,6 +198,18 @@ class FreqaiDataDrawer: return exists + def load_historic_predictions_from_folder(self): + """ + Try to build the historic_predictions dictionary from parquet + files in the historic_predictions_folder + """ + for file_path in self.historic_predictions_folder.glob("*.parquet"): + key = file_path.stem + key.replace("_", "/") + self.historic_predictions[key] = pd.read_parquet(file_path) + + return + def save_historic_predictions_to_disk(self): """ Save historic predictions pickle to disk From 1fe58c393802d3f837363438276695987ba68b0f Mon Sep 17 00:00:00 2001 From: robcaulk Date: Sat, 25 Mar 2023 14:12:06 +0100 Subject: [PATCH 4/5] ensure the backup folder will overwrite existing folders --- freqtrade/freqai/data_drawer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index cfed65771..79ceab3d4 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -223,7 +223,8 @@ class FreqaiDataDrawer: value.to_parquet(filename) # create a backup - shutil.copytree(self.historic_predictions_folder, self.historic_predictions_bkp_folder) + shutil.copytree(self.historic_predictions_folder, + self.historic_predictions_bkp_folder, dirs_exist_ok=True) def save_metric_tracker_to_disk(self): """ From cae8c3a7a8a2a4124b995294640fa475937cfc3b Mon Sep 17 00:00:00 2001 From: Robert Caulk Date: Mon, 27 Mar 2023 07:00:20 -0400 Subject: [PATCH 5/5] Update freqtrade/freqai/data_drawer.py Co-authored-by: Matthias --- freqtrade/freqai/data_drawer.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index 79ceab3d4..0e41655fe 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -75,8 +75,7 @@ class FreqaiDataDrawer: self.full_path = full_path self.historic_predictions_path = Path(self.full_path / "historic_predictions.pkl") self.historic_predictions_folder = Path(self.full_path / "historic_predictions") - self.historic_predictions_bkp_folder = Path( - self.full_path / "historic_predictions_backup") + self.historic_predictions_bkp_folder = Path(self.full_path / "historic_predictions_backup") self.pair_dictionary_path = Path(self.full_path / "pair_dictionary.json") self.global_metadata_path = Path(self.full_path / "global_metadata.json") self.metric_tracker_path = Path(self.full_path / "metric_tracker.json")