Add follow_mode feature so that secondary bots can be launched with the same identifier and load models trained by the leader

This commit is contained in:
robcaulk 2022-05-30 21:35:48 +02:00
parent 5b4c649d43
commit 606f18e5c1
5 changed files with 99 additions and 16 deletions

View File

@ -66,7 +66,7 @@
],
"train_period": 20,
"backtest_period": 2,
"identifier": "example",
"identifier": "example2",
"live_trained_timestamp": 0,
"corr_pairlist": [
"BTC/USDT:USDT",
@ -86,8 +86,14 @@
"random_state": 1
},
"model_training_parameters": {
"n_estimators": 1000,
"n_estimators": 200,
"task_type": "CPU"
}
},
"bot_name": "",
"force_entry_enable": true,
"initial_state": "running",
"internals": {
"process_throttle_secs": 5
}
}

View File

@ -56,7 +56,7 @@
"15m",
"4h"
],
"train_period": 60,
"train_period": 30,
"backtest_period": 7,
"identifier": "example",
"live_trained_timestamp": 0,

View File

@ -391,7 +391,7 @@ Freqai will train an SVM on the training data (or components if the user activat
`principal_component_analysis`) and remove any data point that it deems to be sit beyond the
feature space.
## Stratifying the data
### Stratifying the data
The user can stratify the training/testing data using:
@ -403,10 +403,26 @@ The user can stratify the training/testing data using:
}
```
which will split the data chronologically so that every X data points is a testing data point. In the
which will split the data chronologically so that every Xth data points is a testing data point. In the
present example, the user is asking for every third data point in the dataframe to be used for
testing, the other points are used for training.
### Setting up a follower
The user can define:
```json
"freqai": {
"follow_mode": true,
"identifier": "example"
}
```
to indicate to the bot that it should not train models, but instead should look for models trained
by a leader with the same `identifier`. In this example, the user has a leader bot with the
`identifier: "example"` already running or launching simultaneously as the present follower.
The follower will load models created by the leader and inference them to obtain predictions.
<!-- ## Dynamic target expectation
The labels used for model training have a unique statistical distribution for each separate model training.

View File

@ -7,6 +7,10 @@ from typing import Any, Dict, Tuple
# import pickle as pk
import numpy as np
from pandas import DataFrame
# from freqtrade.freqai.data_kitchen import FreqaiDataKitchen
logger = logging.getLogger(__name__)
@ -19,7 +23,7 @@ class FreqaiDataDrawer:
This object remains persistent throughout live/dry, unlike FreqaiDataKitchen, which is
reinstantiated for each coin.
"""
def __init__(self, full_path: Path, pair_whitelist):
def __init__(self, full_path: Path, pair_whitelist, follow_mode: bool = False):
# dictionary holding all pair metadata necessary to load in from disk
self.pair_dict: Dict[str, Any] = {}
@ -28,6 +32,7 @@ class FreqaiDataDrawer:
self.model_return_values: Dict[str, Any] = {}
self.pair_data_dict: Dict[str, Any] = {}
self.full_path = full_path
self.follow_mode = follow_mode
self.load_drawer_from_disk()
self.training_queue: Dict[str, int] = {}
# self.create_training_queue(pair_whitelist)
@ -37,8 +42,12 @@ class FreqaiDataDrawer:
if exists:
with open(self.full_path / str('pair_dictionary.json'), "r") as fp:
self.pair_dict = json.load(fp)
else:
elif not self.follow_mode:
logger.info("Could not find existing datadrawer, starting from scratch")
else:
logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
'sending null values back to strategy')
return exists
def save_drawer_to_disk(self):
@ -49,19 +58,25 @@ class FreqaiDataDrawer:
if isinstance(object, np.generic):
return object.item()
def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool]:
def get_pair_dict_info(self, metadata: dict) -> Tuple[str, int, bool, bool]:
pair_in_dict = self.pair_dict.get(metadata['pair'])
return_null_array = False
if pair_in_dict:
model_filename = self.pair_dict[metadata['pair']]['model_filename']
trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp']
coin_first = self.pair_dict[metadata['pair']]['first']
else:
elif not self.follow_mode:
self.pair_dict[metadata['pair']] = {}
model_filename = self.pair_dict[metadata['pair']]['model_filename'] = ''
coin_first = self.pair_dict[metadata['pair']]['first'] = True
trained_timestamp = self.pair_dict[metadata['pair']]['trained_timestamp'] = 0
else:
logger.warning(f'Follow mode could not find current pair {metadata["pair"]} in'
f'pair_dictionary at path {self.full_path}, sending null values'
'back to strategy.')
return_null_array = True
return model_filename, trained_timestamp, coin_first
return model_filename, trained_timestamp, coin_first, return_null_array
def set_pair_dict_info(self, metadata: dict) -> None:
pair_in_dict = self.pair_dict.get(metadata['pair'])
@ -94,6 +109,9 @@ class FreqaiDataDrawer:
self.model_return_values[pair]['target_mean'] = dh.full_target_mean
self.model_return_values[pair]['target_std'] = dh.full_target_std
# if not self.follow_mode:
# self.save_model_return_values_to_disk()
def append_model_predictions(self, pair: str, predictions, do_preds,
target_mean, target_std, dh, len_df) -> None:
@ -132,3 +150,33 @@ class FreqaiDataDrawer:
dh.full_do_predict = copy.deepcopy(self.model_return_values[pair]['do_preds'])
dh.full_target_mean = copy.deepcopy(self.model_return_values[pair]['target_mean'])
dh.full_target_std = copy.deepcopy(self.model_return_values[pair]['target_std'])
# if not self.follow_mode:
# self.save_model_return_values_to_disk()
def return_null_values_to_strategy(self, dataframe: DataFrame, dh) -> None:
len_df = len(dataframe)
dh.full_predictions = np.zeros(len_df)
dh.full_do_predict = np.zeros(len_df)
dh.full_target_mean = np.zeros(len_df)
dh.full_target_std = np.zeros(len_df)
# to be used if we want to send predictions directly to the follower instead of forcing
# follower to load models and inference
# def save_model_return_values_to_disk(self) -> None:
# with open(self.full_path / str('model_return_values.json'), "w") as fp:
# json.dump(self.model_return_values, fp, default=self.np_encoder)
# def load_model_return_values_from_disk(self, dh: FreqaiDataKitchen) -> FreqaiDataKitchen:
# exists = Path(self.full_path / str('model_return_values.json')).resolve().exists()
# if exists:
# with open(self.full_path / str('model_return_values.json'), "r") as fp:
# self.model_return_values = json.load(fp)
# elif not self.follow_mode:
# logger.info("Could not find existing datadrawer, starting from scratch")
# else:
# logger.warning(f'Follower could not find pair_dictionary at {self.full_path} '
# 'sending null values back to strategy')
# return exists, dh

View File

@ -54,9 +54,13 @@ class IFreqaiModel(ABC):
self.retrain = False
self.first = True
self.set_full_path()
self.follow_mode = self.freqai_info.get('follow_mode', False)
self.data_drawer = FreqaiDataDrawer(Path(self.full_path),
self.config['exchange']['pair_whitelist'])
self.config['exchange']['pair_whitelist'],
self.follow_mode)
self.lock = threading.Lock()
self.follow_mode = self.freqai_info.get('follow_mode', False)
self.identifier = self.freqai_info.get('identifier', 'no_id_provided')
def assert_config(self, config: Dict[str, Any]) -> None:
@ -105,7 +109,7 @@ class IFreqaiModel(ABC):
# (backtest window, i.e. window immediately following the training window).
# FreqAI slides the window and sequentially builds the backtesting results before returning
# the concatenated results for the full backtesting period back to the strategy.
else:
elif not self.follow_mode:
self.dh = FreqaiDataKitchen(self.config, self.data_drawer, self.live, metadata["pair"])
logger.info(f'Training {len(self.dh.training_timeranges)} timeranges')
dh = self.start_backtesting(dataframe, metadata, self.dh)
@ -138,7 +142,7 @@ class IFreqaiModel(ABC):
for tr_train, tr_backtest in zip(
dh.training_timeranges, dh.backtesting_timeranges
):
(_, _, _) = self.data_drawer.get_pair_dict_info(metadata)
(_, _, _, _) = self.data_drawer.get_pair_dict_info(metadata)
gc.collect()
dh.data = {} # clean the pair specific data between training window sliding
self.training_timerange = tr_train
@ -188,9 +192,15 @@ class IFreqaiModel(ABC):
(model_filename,
trained_timestamp,
coin_first) = self.data_drawer.get_pair_dict_info(metadata)
coin_first,
return_null_array) = self.data_drawer.get_pair_dict_info(metadata)
if (not self.training_on_separate_thread):
# if the files do not yet exist, the follower returns null arrays to strategy
if self.follow_mode and return_null_array:
self.data_drawer.return_null_values_to_strategy(dataframe, dh)
return dh
if (not self.training_on_separate_thread and not self.follow_mode):
file_exists = False
if trained_timestamp != 0: # historical model available
@ -212,8 +222,11 @@ class IFreqaiModel(ABC):
self.retrain_model_on_separate_thread(new_trained_timerange,
metadata, strategy, dh)
else:
elif self.training_on_separate_thread and not self.follow_mode:
logger.info("FreqAI training a new model on background thread.")
elif self.follow_mode:
logger.info('FreqAI instance set to follow_mode, finding existing pair'
f'using { self.identifier }')
self.model = dh.load_data(coin=metadata['pair'])