From bf7ceba95857ab6880929c8387d32a84155d92fd Mon Sep 17 00:00:00 2001 From: robcaulk Date: Mon, 15 Aug 2022 18:01:15 +0200 Subject: [PATCH] set cpu threads in config --- .gitignore | 1 + config_examples/config_freqai-rl.example.json | 110 ++++++++++++++++++ .../RL/BaseReinforcementLearningModel.py | 19 ++- freqtrade/freqai/data_drawer.py | 7 +- .../prediction_models/CatboostClassifier.py | 2 +- .../prediction_models/CatboostRegressor.py | 2 +- .../ReinforcementLearningPPO.py | 13 ++- .../ReinforcementLearningPPO_multiproc.py | 15 ++- .../ReinforcementLearningTDQN.py | 11 +- 9 files changed, 159 insertions(+), 21 deletions(-) create mode 100644 config_examples/config_freqai-rl.example.json diff --git a/.gitignore b/.gitignore index e400c01f5..2d2d526d9 100644 --- a/.gitignore +++ b/.gitignore @@ -113,3 +113,4 @@ target/ !config_examples/config_full.example.json !config_examples/config_kraken.example.json !config_examples/config_freqai.example.json +!config_examples/config_freqai-rl.example.json diff --git a/config_examples/config_freqai-rl.example.json b/config_examples/config_freqai-rl.example.json new file mode 100644 index 000000000..826fe7187 --- /dev/null +++ b/config_examples/config_freqai-rl.example.json @@ -0,0 +1,110 @@ +{ + "trading_mode": "futures", + "new_pairs_days": 30, + "margin_mode": "isolated", + "max_open_trades": 8, + "stake_currency": "USDT", + "stake_amount": 1000, + "tradable_balance_ratio": 1, + "fiat_display_currency": "USD", + "dry_run": true, + "timeframe": "5m", + "dataformat_ohlcv": "json", + "dry_run_wallet": 12000, + "cancel_open_orders_on_exit": true, + "unfilledtimeout": { + "entry": 10, + "exit": 30 + }, + "exchange": { + "name": "binance", + "key": "", + "secret": "", + "ccxt_config": { + "enableRateLimit": true + }, + "ccxt_async_config": { + "enableRateLimit": true, + "rateLimit": 200 + }, + "pair_whitelist": [ + "1INCH/USDT", + "AAVE/USDT" + ], + "pair_blacklist": [] + }, + "entry_pricing": { + "price_side": "same", + "purge_old_models": true, + "use_order_book": true, + "order_book_top": 1, + "price_last_balance": 0.0, + "check_depth_of_market": { + "enabled": false, + "bids_to_ask_delta": 1 + } + }, + "exit_pricing": { + "price_side": "other", + "use_order_book": true, + "order_book_top": 1 + }, + "pairlists": [ + { + "method": "StaticPairList" + } + ], + "freqai": { + "model_save_type": "stable_baselines_ppo", + "conv_width": 10, + "follow_mode": false, + "purge_old_models": true, + "train_period_days": 10, + "backtest_period_days": 2, + "identifier": "unique-id", + "data_kitchen_thread_count": 4, + "feature_parameters": { + "include_corr_pairlist": [ + "BTC/USDT", + "ETH/USDT" + ], + "include_timeframes": [ + "5m", + "30m" + ], + "label_period_candles": 80, + "include_shifted_candles": 0, + "DI_threshold": 0, + "weight_factor": 0.9, + "principal_component_analysis": false, + "use_SVM_to_remove_outliers": false, + "svm_params": {"shuffle": true, "nu": 0.1}, + "stratify_training_data": 0, + "indicator_max_period_candles": 10, + "indicator_periods_candles": [5] + }, + "data_split_parameters": { + "test_size": 0.5, + "random_state": 1, + "shuffle": false + }, + "model_training_parameters": { + "n_steps": 2048, + "ent_coef": 0.005, + "learning_rate": 0.000025, + "batch_size": 256, + "eval_cycles" : 5, + "train_cycles" : 15 + }, + "model_reward_parameters": { + "rr": 1, + "profit_aim": 0.01 + } + }, + "bot_name": "RL_test", + "force_entry_enable": true, + "initial_state": "running", + "internals": { + "process_throttle_secs": 5 + } +} \ No newline at end of file diff --git a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py index a28b88c42..8fa784f12 100644 --- a/freqtrade/freqai/RL/BaseReinforcementLearningModel.py +++ b/freqtrade/freqai/RL/BaseReinforcementLearningModel.py @@ -56,7 +56,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): ) logger.info(f'Training model on {len(data_dictionary["train_features"])} data points') - model = self.fit(data_dictionary, pair) + model = self.fit_rl(data_dictionary, pair, dk) if pair not in self.dd.historic_predictions: self.set_initial_historic_predictions( @@ -69,7 +69,7 @@ class BaseReinforcementLearningModel(IFreqaiModel): return model @abstractmethod - def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): + def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): """ Agent customizations and abstract Reinforcement Learning customizations go in here. Abstract method, so this function must be overridden by @@ -164,6 +164,21 @@ class BaseReinforcementLearningModel(IFreqaiModel): for return_str in dk.data['extra_returns_per_train']: hist_preds_df[return_str] = 0 + # TODO take care of this appendage. Right now it needs to be called because FreqAI enforces it. + # But FreqaiRL needs more objects passed to fit() (like DK) and we dont want to go refactor + # all the other existing fit() functions to include dk argument. For now we instantiate and + # leave it. + def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any: + """ + Most regressors use the same function names and arguments e.g. user + can drop in LGBMRegressor in place of CatBoostRegressor and all data + management will be properly handled by Freqai. + :param data_dictionary: Dict = the dictionary constructed by DataHandler to hold + all the training and test data/labels. + """ + + return + class MyRLEnv(Base3ActionRLEnv): diff --git a/freqtrade/freqai/data_drawer.py b/freqtrade/freqai/data_drawer.py index f9d56c4b4..68f688ed4 100644 --- a/freqtrade/freqai/data_drawer.py +++ b/freqtrade/freqai/data_drawer.py @@ -471,11 +471,12 @@ class FreqaiDataDrawer: elif model_type == 'keras': from tensorflow import keras model = keras.models.load_model(dk.data_path / f"{dk.model_filename}_model.h5") - elif model_type == 'stable_baselines': + elif model_type == 'stable_baselines_ppo': from stable_baselines3.ppo.ppo import PPO + model = PPO.load(dk.data_path / f"{dk.model_filename}_model.zip") + elif model_type == 'stable_baselines_dqn': from stable_baselines3 import DQN - #model = PPO.load(dk.data_path / f"{dk.model_filename}_model.zip") - model = DQN.load(dk.data_path / f"best_model.zip") + model = DQN.load(dk.data_path / f"{dk.model_filename}_model.zip") if Path(dk.data_path / f"{dk.model_filename}_svm_model.joblib").is_file(): dk.svm_model = load(dk.data_path / f"{dk.model_filename}_svm_model.joblib") diff --git a/freqtrade/freqai/prediction_models/CatboostClassifier.py b/freqtrade/freqai/prediction_models/CatboostClassifier.py index b88b28b25..fad74d7a8 100644 --- a/freqtrade/freqai/prediction_models/CatboostClassifier.py +++ b/freqtrade/freqai/prediction_models/CatboostClassifier.py @@ -16,7 +16,7 @@ class CatboostClassifier(BaseClassifierModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any: """ User sets up the training and test data to fit their desired model here :params: diff --git a/freqtrade/freqai/prediction_models/CatboostRegressor.py b/freqtrade/freqai/prediction_models/CatboostRegressor.py index d93569c91..018f55879 100644 --- a/freqtrade/freqai/prediction_models/CatboostRegressor.py +++ b/freqtrade/freqai/prediction_models/CatboostRegressor.py @@ -17,7 +17,7 @@ class CatboostRegressor(BaseRegressionModel): has its own DataHandler where data is held, saved, loaded, and managed. """ - def fit(self, data_dictionary: Dict) -> Any: + def fit(self, data_dictionary: Dict[str, Any], pair: str = '') -> Any: """ User sets up the training and test data to fit their desired model here :param data_dictionary: the dictionary constructed by DataHandler to hold diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py index 5bc33bff1..d1cd2293e 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO.py @@ -9,9 +9,9 @@ import torch as th from stable_baselines3 import PPO from stable_baselines3.common.callbacks import EvalCallback from stable_baselines3.common.monitor import Monitor -# from stable_baselines3.common.vec_env import SubprocVecEnv from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen logger = logging.getLogger(__name__) @@ -22,7 +22,7 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): User created Reinforcement Learning Model prediction model. """ - def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): + def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): agent_params = self.freqai_info['model_training_parameters'] reward_params = self.freqai_info['model_reward_parameters'] @@ -44,7 +44,7 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): eval_env = Monitor(eval, ".") eval_env.reset() - path = self.dk.data_path + path = dk.data_path eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq), deterministic=True, render=False) @@ -54,7 +54,8 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): net_arch=[256, 256, 128]) model = PPO('MlpPolicy', train_env, policy_kwargs=policy_kwargs, - tensorboard_log=f"{path}/ppo/tensorboard/", learning_rate=0.00025, gamma=0.9, verbose=1 + tensorboard_log=f"{path}/ppo/tensorboard/", learning_rate=0.00025, + gamma=0.9, verbose=1 ) model.learn( @@ -62,9 +63,11 @@ class ReinforcementLearningPPO(BaseReinforcementLearningModel): callback=eval_callback ) + best_model = PPO.load(dk.data_path / "best_model.zip") + print('Training finished!') - return model + return best_model class MyRLEnv(Base3ActionRLEnv): diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py index c00784d7a..743caf8c6 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningPPO_multiproc.py @@ -13,7 +13,9 @@ from stable_baselines3.common.vec_env import SubprocVecEnv from stable_baselines3.common.utils import set_random_seed from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen import gym + logger = logging.getLogger(__name__) @@ -42,7 +44,7 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): User created Reinforcement Learning Model prediction model. """ - def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): + def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): agent_params = self.freqai_info['model_training_parameters'] reward_params = self.freqai_info['model_reward_parameters'] @@ -58,16 +60,15 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): len(test_df.index)) env_id = "train_env" - train_num_cpu = 6 + num_cpu = int(dk.thread_count / 2) train_env = SubprocVecEnv([make_env(env_id, i, 1, train_df, price, reward_params, self.CONV_WIDTH) for i in range(train_num_cpu)]) - eval_num_cpu = 6 eval_env_id = 'eval_env' eval_env = SubprocVecEnv([make_env(eval_env_id, i, 1, test_df, price_test, reward_params, - self.CONV_WIDTH) for i in range(eval_num_cpu)]) + self.CONV_WIDTH) for i in range(num_cpu)]) - path = self.dk.data_path + path = dk.data_path eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", log_path=f"{path}/ppo/logs/", eval_freq=int(eval_freq), deterministic=True, render=False) @@ -85,10 +86,12 @@ class ReinforcementLearningPPO_multiproc(BaseReinforcementLearningModel): callback=eval_callback ) + # TODO get callback working so the best model is saved. For now we save last model + # best_model = PPO.load(dk.data_path / "best_model.zip") print('Training finished!') eval_env.close() - return model + return model # best_model class MyRLEnv(Base3ActionRLEnv): diff --git a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py index 2a8570d3e..8bc5f9152 100644 --- a/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py +++ b/freqtrade/freqai/prediction_models/ReinforcementLearningTDQN.py @@ -7,9 +7,12 @@ from stable_baselines3.common.monitor import Monitor from freqtrade.freqai.RL.Base3ActionRLEnv import Base3ActionRLEnv, Actions, Positions from freqtrade.freqai.RL.BaseReinforcementLearningModel import BaseReinforcementLearningModel from freqtrade.freqai.RL.TDQNagent import TDQN +from stable_baselines3 import DQN from stable_baselines3.common.buffers import ReplayBuffer import numpy as np +from freqtrade.freqai.data_kitchen import FreqaiDataKitchen + logger = logging.getLogger(__name__) @@ -18,7 +21,7 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): User created Reinforcement Learning Model prediction model. """ - def fit(self, data_dictionary: Dict[str, Any], pair: str = ''): + def fit_rl(self, data_dictionary: Dict[str, Any], pair: str, dk: FreqaiDataKitchen): agent_params = self.freqai_info['model_training_parameters'] reward_params = self.freqai_info['model_reward_parameters'] @@ -40,7 +43,7 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): eval_env = Monitor(eval, ".") eval_env.reset() - path = self.dk.data_path + path = dk.data_path eval_callback = EvalCallback(eval_env, best_model_save_path=f"{path}/", log_path=f"{path}/tdqn/logs/", eval_freq=int(eval_freq), deterministic=True, render=False) @@ -63,9 +66,11 @@ class ReinforcementLearningTDQN(BaseReinforcementLearningModel): callback=eval_callback ) + best_model = DQN.load(dk.data_path / "best_model.zip") + print('Training finished!') - return model + return best_model class MyRLEnv(Base3ActionRLEnv):