From 5ba255f635a0af422be55678f4061cd9cf09c113 Mon Sep 17 00:00:00 2001 From: Anton Ermak Date: Thu, 28 Dec 2017 12:14:34 +0700 Subject: [PATCH] Watchdog for bot process --- freqtrade/main.py | 9 +++++ freqtrade/misc.py | 6 ++++ freqtrade/watchdog.py | 79 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 94 insertions(+) create mode 100644 freqtrade/watchdog.py diff --git a/freqtrade/main.py b/freqtrade/main.py index 5e8680b85..3f929b0a7 100755 --- a/freqtrade/main.py +++ b/freqtrade/main.py @@ -19,6 +19,7 @@ from freqtrade.misc import State, get_state, update_state, parse_args, throttle, load_config from freqtrade.persistence import Trade from freqtrade.fiat_convert import CryptoToFiatConverter +from freqtrade.watchdog import Watchdog logger = logging.getLogger('freqtrade') @@ -441,6 +442,13 @@ def main(sysargv=sys.argv[1:]) -> None: else: logger.info('Dry run is disabled. (--dry_run_db ignored)') + watchdog = Watchdog() + + if args.watchdog_enable: + logger.info('Using watchdog to monitor process (--watchdog)') + if not watchdog.start(): + return + try: init(_CONF) old_state = None @@ -460,6 +468,7 @@ def main(sysargv=sys.argv[1:]) -> None: nb_assets=args.dynamic_whitelist, ) old_state = new_state + watchdog.heartbeat() except KeyboardInterrupt: logger.info('Got SIGINT, aborting ...') except BaseException: diff --git a/freqtrade/misc.py b/freqtrade/misc.py index afc4334e8..f2d4433ae 100644 --- a/freqtrade/misc.py +++ b/freqtrade/misc.py @@ -142,6 +142,12 @@ def parse_args(args: List[str], description: str): metavar='INT', nargs='?', ) + parser.add_argument( + '-w', '--watchdog', + help='Run under watchdog (restart process if main loop is stalled)', # noqa + action='store_true', + dest='watchdog_enable', + ) build_subcommands(parser) return parser.parse_args(args) diff --git a/freqtrade/watchdog.py b/freqtrade/watchdog.py new file mode 100644 index 000000000..b0fde770e --- /dev/null +++ b/freqtrade/watchdog.py @@ -0,0 +1,79 @@ +import os +import signal +import time +import logging +from multiprocessing import Value + +logger = logging.getLogger('freqtrade.watchdog') + +WATCHDOG_TIMEOUT = 300 + + +class Watchdog: + + shared_heartbeat = Value('d', 0.0) + kill_signal = None + + def heartbeat(self) -> None: + logger.debug("Heartbeat") + self.shared_heartbeat.value = time.time() + + def exit_gracefully(self, signum, frame): + logger.warning("Kill signal: {}".format(signum)) + self.kill_signal = signum + + def kill(self, pid): + logger.info("Stopping pid {}".format(pid)) + os.kill(pid, signal.SIGTERM) # Better use sigint and then sigterm? + os.wait() + + def start(self) -> bool: + self.heartbeat() + pid = os.fork() + if pid != 0: + # In watchdog proces, run it + if not self.run(pid): + # Got exit signal + return False + else: + # Forked new children, continue to main + self.heartbeat() + return True + else: + # In children process, continue to main + return True + + def run(self, pid) -> bool: + logger.info("Watchdog started") + self.orig_SIGINT = signal.signal(signal.SIGINT, self.exit_gracefully) + self.orig_SIGTERM = signal.signal(signal.SIGTERM, self.exit_gracefully) + try: + while True: + if self.kill_signal: + raise KeyboardInterrupt() + + timeout = time.time() - self.shared_heartbeat.value + + if timeout > WATCHDOG_TIMEOUT: + logger.warning("Kill process due to timeout: {}".format(timeout)) + self.kill(pid) + new_pid = os.fork() + if new_pid == 0: + logger.info("New children forked") + signal.signal(signal.SIGINT, self.orig_SIGINT) + signal.signal(signal.SIGTERM, self.orig_SIGTERM) + return True + else: + pid = new_pid + + time.sleep(1) + + except Exception as ex: + logger.exception(ex) + self.kill(pid) + return False + + except KeyboardInterrupt: + logger.info("Watchdog stopped") + self.kill(pid) + return False