Watchdog for bot process

This commit is contained in:
Anton Ermak 2017-12-28 12:14:34 +07:00
parent e46fcf0e02
commit 5ba255f635
3 changed files with 94 additions and 0 deletions

View File

@ -19,6 +19,7 @@ from freqtrade.misc import State, get_state, update_state, parse_args, throttle,
load_config
from freqtrade.persistence import Trade
from freqtrade.fiat_convert import CryptoToFiatConverter
from freqtrade.watchdog import Watchdog
logger = logging.getLogger('freqtrade')
@ -441,6 +442,13 @@ def main(sysargv=sys.argv[1:]) -> None:
else:
logger.info('Dry run is disabled. (--dry_run_db ignored)')
watchdog = Watchdog()
if args.watchdog_enable:
logger.info('Using watchdog to monitor process (--watchdog)')
if not watchdog.start():
return
try:
init(_CONF)
old_state = None
@ -460,6 +468,7 @@ def main(sysargv=sys.argv[1:]) -> None:
nb_assets=args.dynamic_whitelist,
)
old_state = new_state
watchdog.heartbeat()
except KeyboardInterrupt:
logger.info('Got SIGINT, aborting ...')
except BaseException:

View File

@ -142,6 +142,12 @@ def parse_args(args: List[str], description: str):
metavar='INT',
nargs='?',
)
parser.add_argument(
'-w', '--watchdog',
help='Run under watchdog (restart process if main loop is stalled)', # noqa
action='store_true',
dest='watchdog_enable',
)
build_subcommands(parser)
return parser.parse_args(args)

79
freqtrade/watchdog.py Normal file
View File

@ -0,0 +1,79 @@
import os
import signal
import time
import logging
from multiprocessing import Value
logger = logging.getLogger('freqtrade.watchdog')
WATCHDOG_TIMEOUT = 300
class Watchdog:
shared_heartbeat = Value('d', 0.0)
kill_signal = None
def heartbeat(self) -> None:
logger.debug("Heartbeat")
self.shared_heartbeat.value = time.time()
def exit_gracefully(self, signum, frame):
logger.warning("Kill signal: {}".format(signum))
self.kill_signal = signum
def kill(self, pid):
logger.info("Stopping pid {}".format(pid))
os.kill(pid, signal.SIGTERM) # Better use sigint and then sigterm?
os.wait()
def start(self) -> bool:
self.heartbeat()
pid = os.fork()
if pid != 0:
# In watchdog proces, run it
if not self.run(pid):
# Got exit signal
return False
else:
# Forked new children, continue to main
self.heartbeat()
return True
else:
# In children process, continue to main
return True
def run(self, pid) -> bool:
logger.info("Watchdog started")
self.orig_SIGINT = signal.signal(signal.SIGINT, self.exit_gracefully)
self.orig_SIGTERM = signal.signal(signal.SIGTERM, self.exit_gracefully)
try:
while True:
if self.kill_signal:
raise KeyboardInterrupt()
timeout = time.time() - self.shared_heartbeat.value
if timeout > WATCHDOG_TIMEOUT:
logger.warning("Kill process due to timeout: {}".format(timeout))
self.kill(pid)
new_pid = os.fork()
if new_pid == 0:
logger.info("New children forked")
signal.signal(signal.SIGINT, self.orig_SIGINT)
signal.signal(signal.SIGTERM, self.orig_SIGTERM)
return True
else:
pid = new_pid
time.sleep(1)
except Exception as ex:
logger.exception(ex)
self.kill(pid)
return False
except KeyboardInterrupt:
logger.info("Watchdog stopped")
self.kill(pid)
return False