Add follow_mode feature so that secondary bots can be launched with the same identifier and load models trained by the leader

2022-05-30 21:35:48 +02:00 · 2022-05-30 21:35:48 +02:00 · 606f18e5c1
commit 606f18e5c1
parent 5b4c649d43
5 changed files with 99 additions and 16 deletions
--- a/config_examples/config_freqai_futures.example.json
+++ b/config_examples/config_freqai_futures.example.json
@ -66,7 +66,7 @@
        ],
        "train_period": 20,
        "backtest_period": 2,
-        "identifier": "example",
+        "identifier": "example2",
        "live_trained_timestamp": 0,
        "corr_pairlist": [
            "BTC/USDT:USDT",
@ -86,8 +86,14 @@
            "random_state": 1
        },
        "model_training_parameters": {
-            "n_estimators": 1000,
+            "n_estimators": 200,
            "task_type": "CPU"
        }
+    },
+    "bot_name": "",
+    "force_entry_enable": true,
+    "initial_state": "running",
+    "internals": {
+        "process_throttle_secs": 5
    }
 }
--- a/config_examples/config_freqai_spot.example.json
+++ b/config_examples/config_freqai_spot.example.json
@ -56,7 +56,7 @@
            "15m",
            "4h"
        ],
-        "train_period": 60,
+        "train_period": 30,
        "backtest_period": 7,
        "identifier": "example",
        "live_trained_timestamp": 0,
--- a/docs/freqai.md
+++ b/docs/freqai.md
@ -391,7 +391,7 @@ Freqai will train an SVM on the training data (or components if the user activat
 `principal_component_analysis`) and remove any data point that it deems to be sit beyond the 
 feature space.

-## Stratifying the data
+### Stratifying the data

 The user can stratify the training/testing data using:

@ -403,10 +403,26 @@ The user can stratify the training/testing data using:
    }
 ```

-which will split the data chronologically so that every X data points is a testing data point. In the
+which will split the data chronologically so that every Xth data points is a testing data point. In the
 present example, the user is asking for every third data point in the dataframe to be used for 
 testing, the other points are used for training. 

+### Setting up a follower
+
+The user can define:
+
+```json
+    "freqai": {
+        "follow_mode": true,
+        "identifier": "example"
+    }
+```
+
+to indicate to the bot that it should not train models, but instead should look for models trained 
+by a leader with the same `identifier`. In this example, the user has a leader bot with the 
+`identifier: "example"` already running or launching simultaneously as the present follower. 
+The follower will load models created by the leader and inference them to obtain predictions.
+
 <!-- ## Dynamic target expectation

 The labels used for model training have a unique statistical distribution for each separate model training. 
--- a/freqtrade/freqai/data_drawer.py
+++ b/freqtrade/freqai/data_drawer.py
@ -7,6 +7,10 @@ from typing import Any, Dict, Tuple

 # import pickle as pk
 import numpy as np
+from pandas import DataFrame
+
+
+# from freqtrade.freqai.data_kitchen import FreqaiDataKitchen


 logger = logging.getLogger(__name__)
@ -19,7 +23,7 @@ class FreqaiDataDrawer:
    This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is
    reinstantiated for each coin.
    """
-    def __init__(self, full_path: Path, pair_whitelist):
+    def __init__(self, full_path: Path, pair_whitelist, follow_mode: bool = False):

        # dictionary holding all pair metadata necessary to load in from disk
        self.pair_dict: Dict[str, Any] = {}
@ -28,6 +32,7 @@ class FreqaiDataDrawer:
        self.model_return_values: Dict[str, Any] = {}
        self.pair_data_dict: Dict[str, Any] = {}
        self.full_path = full_path
+        self.follow_mode = follow_mode
        self.load_drawer_from_disk()
        self.training_queue: Dict[str, int] = {}
        # self.create_training_queue(pair_whitelist)
@ -37,8 +42,12 @@ class FreqaiDataDrawer:
        if exists:
            with open(self.full_path / str('pair_dictionary.json'), "r") as fp:
                self.pair_dict = json.load(fp)
-        else:
+        elif not self.follow_mode:
            logger.info("Could not find existing datadrawer, starting from scratch")
+        else:
+            logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
+                           'sending null values back to strategy')
+
        return exists

    def save_drawer_to_disk(self):
@ -49,19 +58,25 @@ class FreqaiDataDrawer:
        if isinstance(object, np.generic):
            return object.item()

-    def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool]:
+    def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]:
        pair_in_dict = self.pair_dict.get(metadata['pair'])
+        return_null_array = False
        if pair_in_dict:
            model_filename = self.pair_dict[metadata['pair']]['model_filename']
            trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp']
            coin_first = self.pair_dict[metadata['pair']]['first']
-        else:
+        elif not self.follow_mode:
            self.pair_dict[metadata['pair']] = {}
            model_filename = self.pair_dict[metadata['pair']]['model_filename'] = ''
            coin_first = self.pair_dict[metadata['pair']]['first'] = True
            trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] = 0
+        else:
+            logger.warning(f'Follow mode could not find current pair {metadata["pair"]} in'
+                           f'pair_dictionary at path {self.full_path}, sending null values'
+                           'back to strategy.')
+            return_null_array = True

-        return model_filename, trained_timestamp, coin_first
+        return model_filename, trained_timestamp, coin_first, return_null_array

    def set_pair_dict_info(self, metadata: dict) -> None:
        pair_in_dict = self.pair_dict.get(metadata['pair'])
@ -94,6 +109,9 @@ class FreqaiDataDrawer:
        self.model_return_values[pair]['target_mean'] = dh.full_target_mean
        self.model_return_values[pair]['target_std'] = dh.full_target_std

+        # if not self.follow_mode:
+        #     self.save_model_return_values_to_disk()
+
    def append_model_predictions(self, pair: str, predictions, do_preds,
                                 target_mean, target_std, dh, len_df) -> None:

@ -132,3 +150,33 @@ class FreqaiDataDrawer:
        dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
        dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
        dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
+
+        # if not self.follow_mode:
+        #     self.save_model_return_values_to_disk()
+
+    def return_null_values_to_strategy(self, dataframe: DataFrame, dh) -> None:
+
+        len_df = len(dataframe)
+        dh.full_predictions = np.zeros(len_df)
+        dh.full_do_predict = np.zeros(len_df)
+        dh.full_target_mean = np.zeros(len_df)
+        dh.full_target_std = np.zeros(len_df)
+
+    # to be used if we want to send predictions directly to the follower instead of forcing
+    # follower to load models and inference
+    # def save_model_return_values_to_disk(self) -> None:
+    #     with open(self.full_path / str('model_return_values.json'), "w") as fp:
+    #         json.dump(self.model_return_values, fp, default=self.np_encoder)
+
+    # def load_model_return_values_from_disk(self, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
+    #     exists = Path(self.full_path / str('model_return_values.json')).resolve().exists()
+    #     if exists:
+    #         with open(self.full_path / str('model_return_values.json'), "r") as fp:
+    #             self.model_return_values = json.load(fp)
+    #     elif not self.follow_mode:
+    #         logger.info("Could not find existing datadrawer, starting from scratch")
+    #     else:
+    #         logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
+    #                        'sending null values back to strategy')
+
+    #     return exists, dh
--- a/freqtrade/freqai/freqai_interface.py
+++ b/freqtrade/freqai/freqai_interface.py
@ -54,9 +54,13 @@ class IFreqaiModel(ABC):
        self.retrain = False
        self.first = True
        self.set_full_path()
+        self.follow_mode = self.freqai_info.get('follow_mode', False)
        self.data_drawer = FreqaiDataDrawer(Path(self.full_path),
-                                            self.config['exchange']['pair_whitelist'])
+                                            self.config['exchange']['pair_whitelist'],
+                                            self.follow_mode)
        self.lock = threading.Lock()
+        self.follow_mode = self.freqai_info.get('follow_mode', False)
+        self.identifier = self.freqai_info.get('identifier', 'no_id_provided')

    def assert_config(self, config: Dict[str, Any]) -> None:

@ -105,7 +109,7 @@ class IFreqaiModel(ABC):
        # (backtest window, i.e. window immediately following the training window).
        # FreqAI slides the window and sequentially builds the backtesting results before returning
        # the concatenated results for the full backtesting period back to the strategy.
-        else:
+        elif not self.follow_mode:
            self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"])
            logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
            dh = self.start_backtesting(dataframe, metadata, self.dh)
@ -138,7 +142,7 @@ class IFreqaiModel(ABC):
        for tr_train, tr_backtest in zip(
            dh.training_timeranges, dh.backtesting_timeranges
        ):
-            (_, _, _) = self.data_drawer.get_pair_dict_info(metadata)
+            (_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata)
            gc.collect()
            dh.data = {}  # clean the pair specific data between training window sliding
            self.training_timerange = tr_train
@ -188,9 +192,15 @@ class IFreqaiModel(ABC):

        (model_filename,
         trained_timestamp,
-         coin_first) = self.data_drawer.get_pair_dict_info(metadata)
+         coin_first,
+         return_null_array) = self.data_drawer.get_pair_dict_info(metadata)

-        if (not self.training_on_separate_thread):
+        # if the files do not yet exist, the follower returns null arrays to strategy
+        if self.follow_mode and return_null_array:
+            self.data_drawer.return_null_values_to_strategy(dataframe, dh)
+            return dh
+
+        if (not self.training_on_separate_thread and not self.follow_mode):
            file_exists = False

            if trained_timestamp != 0:  # historical model available
@ -212,8 +222,11 @@ class IFreqaiModel(ABC):
                    self.retrain_model_on_separate_thread(new_trained_timerange,
                                                          metadata, strategy, dh)

-        else:
+        elif self.training_on_separate_thread and not self.follow_mode:
            logger.info("FreqAI training a new model on background thread.")
+        elif self.follow_mode:
+            logger.info('FreqAI instance set to follow_mode, finding existing pair'
+                        f'using { self.identifier }')

        self.model = dh.load_data(coin=metadata['pair'])