Watchdog for bot process
This commit is contained in:
parent
e46fcf0e02
commit
5ba255f635
@ -19,6 +19,7 @@ from freqtrade.misc import State, get_state, update_state, parse_args, throttle,
|
|||||||
load_config
|
load_config
|
||||||
from freqtrade.persistence import Trade
|
from freqtrade.persistence import Trade
|
||||||
from freqtrade.fiat_convert import CryptoToFiatConverter
|
from freqtrade.fiat_convert import CryptoToFiatConverter
|
||||||
|
from freqtrade.watchdog import Watchdog
|
||||||
|
|
||||||
logger = logging.getLogger('freqtrade')
|
logger = logging.getLogger('freqtrade')
|
||||||
|
|
||||||
@ -441,6 +442,13 @@ def main(sysargv=sys.argv[1:]) -> None:
|
|||||||
else:
|
else:
|
||||||
logger.info('Dry run is disabled. (--dry_run_db ignored)')
|
logger.info('Dry run is disabled. (--dry_run_db ignored)')
|
||||||
|
|
||||||
|
watchdog = Watchdog()
|
||||||
|
|
||||||
|
if args.watchdog_enable:
|
||||||
|
logger.info('Using watchdog to monitor process (--watchdog)')
|
||||||
|
if not watchdog.start():
|
||||||
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
init(_CONF)
|
init(_CONF)
|
||||||
old_state = None
|
old_state = None
|
||||||
@ -460,6 +468,7 @@ def main(sysargv=sys.argv[1:]) -> None:
|
|||||||
nb_assets=args.dynamic_whitelist,
|
nb_assets=args.dynamic_whitelist,
|
||||||
)
|
)
|
||||||
old_state = new_state
|
old_state = new_state
|
||||||
|
watchdog.heartbeat()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
logger.info('Got SIGINT, aborting ...')
|
logger.info('Got SIGINT, aborting ...')
|
||||||
except BaseException:
|
except BaseException:
|
||||||
|
@ -142,6 +142,12 @@ def parse_args(args: List[str], description: str):
|
|||||||
metavar='INT',
|
metavar='INT',
|
||||||
nargs='?',
|
nargs='?',
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'-w', '--watchdog',
|
||||||
|
help='Run under watchdog (restart process if main loop is stalled)', # noqa
|
||||||
|
action='store_true',
|
||||||
|
dest='watchdog_enable',
|
||||||
|
)
|
||||||
|
|
||||||
build_subcommands(parser)
|
build_subcommands(parser)
|
||||||
return parser.parse_args(args)
|
return parser.parse_args(args)
|
||||||
|
79
freqtrade/watchdog.py
Normal file
79
freqtrade/watchdog.py
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
import os
|
||||||
|
import signal
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from multiprocessing import Value
|
||||||
|
|
||||||
|
logger = logging.getLogger('freqtrade.watchdog')
|
||||||
|
|
||||||
|
WATCHDOG_TIMEOUT = 300
|
||||||
|
|
||||||
|
|
||||||
|
class Watchdog:
|
||||||
|
|
||||||
|
shared_heartbeat = Value('d', 0.0)
|
||||||
|
kill_signal = None
|
||||||
|
|
||||||
|
def heartbeat(self) -> None:
|
||||||
|
logger.debug("Heartbeat")
|
||||||
|
self.shared_heartbeat.value = time.time()
|
||||||
|
|
||||||
|
def exit_gracefully(self, signum, frame):
|
||||||
|
logger.warning("Kill signal: {}".format(signum))
|
||||||
|
self.kill_signal = signum
|
||||||
|
|
||||||
|
def kill(self, pid):
|
||||||
|
logger.info("Stopping pid {}".format(pid))
|
||||||
|
os.kill(pid, signal.SIGTERM) # Better use sigint and then sigterm?
|
||||||
|
os.wait()
|
||||||
|
|
||||||
|
def start(self) -> bool:
|
||||||
|
self.heartbeat()
|
||||||
|
pid = os.fork()
|
||||||
|
if pid != 0:
|
||||||
|
# In watchdog proces, run it
|
||||||
|
if not self.run(pid):
|
||||||
|
# Got exit signal
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Forked new children, continue to main
|
||||||
|
self.heartbeat()
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
# In children process, continue to main
|
||||||
|
return True
|
||||||
|
|
||||||
|
def run(self, pid) -> bool:
|
||||||
|
logger.info("Watchdog started")
|
||||||
|
self.orig_SIGINT = signal.signal(signal.SIGINT, self.exit_gracefully)
|
||||||
|
self.orig_SIGTERM = signal.signal(signal.SIGTERM, self.exit_gracefully)
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
if self.kill_signal:
|
||||||
|
raise KeyboardInterrupt()
|
||||||
|
|
||||||
|
timeout = time.time() - self.shared_heartbeat.value
|
||||||
|
|
||||||
|
if timeout > WATCHDOG_TIMEOUT:
|
||||||
|
logger.warning("Kill process due to timeout: {}".format(timeout))
|
||||||
|
self.kill(pid)
|
||||||
|
new_pid = os.fork()
|
||||||
|
if new_pid == 0:
|
||||||
|
logger.info("New children forked")
|
||||||
|
signal.signal(signal.SIGINT, self.orig_SIGINT)
|
||||||
|
signal.signal(signal.SIGTERM, self.orig_SIGTERM)
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
pid = new_pid
|
||||||
|
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
except Exception as ex:
|
||||||
|
logger.exception(ex)
|
||||||
|
self.kill(pid)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
logger.info("Watchdog stopped")
|
||||||
|
self.kill(pid)
|
||||||
|
return False
|
Loading…
Reference in New Issue
Block a user