Add follow_mode feature so that secondary bots can be launched with the same identifier and load models trained by the leader

This commit is contained in:
robcaulk 2022-05-30 21:35:48 +02:00
parent 5b4c649d43
commit 606f18e5c1
5 changed files with 99 additions and 16 deletions

View File

@ -66,7 +66,7 @@
], ],
"train_period": 20, "train_period": 20,
"backtest_period": 2, "backtest_period": 2,
"identifier": "example", "identifier": "example2",
"live_trained_timestamp": 0, "live_trained_timestamp": 0,
"corr_pairlist": [ "corr_pairlist": [
"BTC/USDT:USDT", "BTC/USDT:USDT",
@ -86,8 +86,14 @@
"random_state": 1 "random_state": 1
}, },
"model_training_parameters": { "model_training_parameters": {
"n_estimators": 1000, "n_estimators": 200,
"task_type": "CPU" "task_type": "CPU"
} }
},
"bot_name": "",
"force_entry_enable": true,
"initial_state": "running",
"internals": {
"process_throttle_secs": 5
} }
} }

View File

@ -56,7 +56,7 @@
"15m", "15m",
"4h" "4h"
], ],
"train_period": 60, "train_period": 30,
"backtest_period": 7, "backtest_period": 7,
"identifier": "example", "identifier": "example",
"live_trained_timestamp": 0, "live_trained_timestamp": 0,

View File

@ -391,7 +391,7 @@ Freqai will train an SVM on the training data (or components if the user activat
`principal_component_analysis`) and remove any data point that it deems to be sit beyond the `principal_component_analysis`) and remove any data point that it deems to be sit beyond the
feature space. feature space.
## Stratifying the data ### Stratifying the data
The user can stratify the training/testing data using: The user can stratify the training/testing data using:
@ -403,10 +403,26 @@ The user can stratify the training/testing data using:
} }
``` ```
which will split the data chronologically so that every X data points is a testing data point. In the which will split the data chronologically so that every Xth data points is a testing data point. In the
present example, the user is asking for every third data point in the dataframe to be used for present example, the user is asking for every third data point in the dataframe to be used for
testing, the other points are used for training. testing, the other points are used for training.
### Setting up a follower
The user can define:
```json
"freqai": {
"follow_mode": true,
"identifier": "example"
}
```
to indicate to the bot that it should not train models, but instead should look for models trained
by a leader with the same `identifier`. In this example, the user has a leader bot with the
`identifier: "example"` already running or launching simultaneously as the present follower.
The follower will load models created by the leader and inference them to obtain predictions.
<!-- ## Dynamic target expectation <!-- ## Dynamic target expectation
The labels used for model training have a unique statistical distribution for each separate model training. The labels used for model training have a unique statistical distribution for each separate model training.

View File

@ -7,6 +7,10 @@ from typing import Any, Dict, Tuple
# import pickle as pk # import pickle as pk
import numpy as np import numpy as np
from pandas import DataFrame
# from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -19,7 +23,7 @@ class FreqaiDataDrawer:
This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is
reinstantiated for each coin. reinstantiated for each coin.
""" """
def __init__(self, full_path: Path, pair_whitelist): def __init__(self, full_path: Path, pair_whitelist, follow_mode: bool = False):
# dictionary holding all pair metadata necessary to load in from disk # dictionary holding all pair metadata necessary to load in from disk
self.pair_dict: Dict[str, Any] = {} self.pair_dict: Dict[str, Any] = {}
@ -28,6 +32,7 @@ class FreqaiDataDrawer:
self.model_return_values: Dict[str, Any] = {} self.model_return_values: Dict[str, Any] = {}
self.pair_data_dict: Dict[str, Any] = {} self.pair_data_dict: Dict[str, Any] = {}
self.full_path = full_path self.full_path = full_path
self.follow_mode = follow_mode
self.load_drawer_from_disk() self.load_drawer_from_disk()
self.training_queue: Dict[str, int] = {} self.training_queue: Dict[str, int] = {}
# self.create_training_queue(pair_whitelist) # self.create_training_queue(pair_whitelist)
@ -37,8 +42,12 @@ class FreqaiDataDrawer:
if exists: if exists:
with open(self.full_path / str('pair_dictionary.json'), "r") as fp: with open(self.full_path / str('pair_dictionary.json'), "r") as fp:
self.pair_dict = json.load(fp) self.pair_dict = json.load(fp)
else: elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch") logger.info("Could not find existing datadrawer, starting from scratch")
else:
logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
'sending null values back to strategy')
return exists return exists
def save_drawer_to_disk(self): def save_drawer_to_disk(self):
@ -49,19 +58,25 @@ class FreqaiDataDrawer:
if isinstance(object, np.generic): if isinstance(object, np.generic):
return object.item() return object.item()
def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool]: def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]:
pair_in_dict = self.pair_dict.get(metadata['pair']) pair_in_dict = self.pair_dict.get(metadata['pair'])
return_null_array = False
if pair_in_dict: if pair_in_dict:
model_filename = self.pair_dict[metadata['pair']]['model_filename'] model_filename = self.pair_dict[metadata['pair']]['model_filename']
trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp']
coin_first = self.pair_dict[metadata['pair']]['first'] coin_first = self.pair_dict[metadata['pair']]['first']
else: elif not self.follow_mode:
self.pair_dict[metadata['pair']] = {} self.pair_dict[metadata['pair']] = {}
model_filename = self.pair_dict[metadata['pair']]['model_filename'] = '' model_filename = self.pair_dict[metadata['pair']]['model_filename'] = ''
coin_first = self.pair_dict[metadata['pair']]['first'] = True coin_first = self.pair_dict[metadata['pair']]['first'] = True
trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] = 0 trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] = 0
else:
logger.warning(f'Follow mode could not find current pair {metadata["pair"]} in'
f'pair_dictionary at path {self.full_path}, sending null values'
'back to strategy.')
return_null_array = True
return model_filename, trained_timestamp, coin_first return model_filename, trained_timestamp, coin_first, return_null_array
def set_pair_dict_info(self, metadata: dict) -> None: def set_pair_dict_info(self, metadata: dict) -> None:
pair_in_dict = self.pair_dict.get(metadata['pair']) pair_in_dict = self.pair_dict.get(metadata['pair'])
@ -94,6 +109,9 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['target_mean'] = dh.full_target_mean self.model_return_values[pair]['target_mean'] = dh.full_target_mean
self.model_return_values[pair]['target_std'] = dh.full_target_std self.model_return_values[pair]['target_std'] = dh.full_target_std
# if not self.follow_mode:
# self.save_model_return_values_to_disk()
def append_model_predictions(self, pair: str, predictions, do_preds, def append_model_predictions(self, pair: str, predictions, do_preds,
target_mean, target_std, dh, len_df) -> None: target_mean, target_std, dh, len_df) -> None:
@ -132,3 +150,33 @@ class FreqaiDataDrawer:
dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds']) dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean']) dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std']) dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
# if not self.follow_mode:
# self.save_model_return_values_to_disk()
def return_null_values_to_strategy(self, dataframe: DataFrame, dh) -> None:
len_df = len(dataframe)
dh.full_predictions = np.zeros(len_df)
dh.full_do_predict = np.zeros(len_df)
dh.full_target_mean = np.zeros(len_df)
dh.full_target_std = np.zeros(len_df)
# to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference
# def save_model_return_values_to_disk(self) -> None:
# with open(self.full_path / str('model_return_values.json'), "w") as fp:
# json.dump(self.model_return_values, fp, default=self.np_encoder)
# def load_model_return_values_from_disk(self, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
# exists = Path(self.full_path / str('model_return_values.json')).resolve().exists()
# if exists:
# with open(self.full_path / str('model_return_values.json'), "r") as fp:
# self.model_return_values = json.load(fp)
# elif not self.follow_mode:
# logger.info("Could not find existing datadrawer, starting from scratch")
# else:
# logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
# 'sending null values back to strategy')
# return exists, dh

View File

@ -54,9 +54,13 @@ class IFreqaiModel(ABC):
self.retrain = False self.retrain = False
self.first = True self.first = True
self.set_full_path() self.set_full_path()
self.follow_mode = self.freqai_info.get('follow_mode', False)
self.data_drawer = FreqaiDataDrawer(Path(self.full_path), self.data_drawer = FreqaiDataDrawer(Path(self.full_path),
self.config['exchange']['pair_whitelist']) self.config['exchange']['pair_whitelist'],
self.follow_mode)
self.lock = threading.Lock() self.lock = threading.Lock()
self.follow_mode = self.freqai_info.get('follow_mode', False)
self.identifier = self.freqai_info.get('identifier', 'no_id_provided')
def assert_config(self, config: Dict[str, Any]) -> None: def assert_config(self, config: Dict[str, Any]) -> None:
@ -105,7 +109,7 @@ class IFreqaiModel(ABC):
# (backtest window, i.e. window immediately following the training window). # (backtest window, i.e. window immediately following the training window).
# FreqAI slides the window and sequentially builds the backtesting results before returning # FreqAI slides the window and sequentially builds the backtesting results before returning
# the concatenated results for the full backtesting period back to the strategy. # the concatenated results for the full backtesting period back to the strategy.
else: elif not self.follow_mode:
self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"]) self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"])
logger.info(f'Training {len(self.dh.training_timeranges)} timeranges') logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
dh = self.start_backtesting(dataframe, metadata, self.dh) dh = self.start_backtesting(dataframe, metadata, self.dh)
@ -138,7 +142,7 @@ class IFreqaiModel(ABC):
for tr_train, tr_backtest in zip( for tr_train, tr_backtest in zip(
dh.training_timeranges, dh.backtesting_timeranges dh.training_timeranges, dh.backtesting_timeranges
): ):
(_, _, _) = self.data_drawer.get_pair_dict_info(metadata) (_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata)
gc.collect() gc.collect()
dh.data = {} # clean the pair specific data between training window sliding dh.data = {} # clean the pair specific data between training window sliding
self.training_timerange = tr_train self.training_timerange = tr_train
@ -188,9 +192,15 @@ class IFreqaiModel(ABC):
(model_filename, (model_filename,
trained_timestamp, trained_timestamp,
coin_first) = self.data_drawer.get_pair_dict_info(metadata) coin_first,
return_null_array) = self.data_drawer.get_pair_dict_info(metadata)
if (not self.training_on_separate_thread): # if the files do not yet exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array:
self.data_drawer.return_null_values_to_strategy(dataframe, dh)
return dh
if (not self.training_on_separate_thread and not self.follow_mode):
file_exists = False file_exists = False
if trained_timestamp != 0: # historical model available if trained_timestamp != 0: # historical model available
@ -212,8 +222,11 @@ class IFreqaiModel(ABC):
self.retrain_model_on_separate_thread(new_trained_timerange, self.retrain_model_on_separate_thread(new_trained_timerange,
metadata, strategy, dh) metadata, strategy, dh)
else: elif self.training_on_separate_thread and not self.follow_mode:
logger.info("FreqAI training a new model on background thread.") logger.info("FreqAI training a new model on background thread.")
elif self.follow_mode:
logger.info('FreqAI instance set to follow_mode, finding existing pair'
f'using { self.identifier }')
self.model = dh.load_data(coin=metadata['pair']) self.model = dh.load_data(coin=metadata['pair'])