Add watchdog to prevent stator freezing

Refs #424
This commit is contained in:
Andrew Godwin 2023-01-16 11:15:28 -07:00
parent cfe18932b8
commit 0a848aa884
2 changed files with 19 additions and 1 deletions

View File

@ -83,4 +83,7 @@ class Command(BaseCommand):
schedule_interval=schedule_interval, schedule_interval=schedule_interval,
run_for=run_for, run_for=run_for,
) )
try:
async_to_sync(runner.run)() async_to_sync(runner.run)()
except KeyboardInterrupt:
print("Ctrl-C received")

View File

@ -1,5 +1,7 @@
import asyncio import asyncio
import datetime import datetime
import os
import signal
import time import time
import traceback import traceback
import uuid import uuid
@ -41,6 +43,8 @@ class StatorRunner:
self.run_for = run_for self.run_for = run_for
self.minimum_loop_delay = 0.5 self.minimum_loop_delay = 0.5
self.maximum_loop_delay = 5 self.maximum_loop_delay = 5
# Set up SIGALRM handler
signal.signal(signal.SIGALRM, self.alarm_handler)
async def run(self): async def run(self):
sentry.set_takahe_app("stator") sentry.set_takahe_app("stator")
@ -56,6 +60,9 @@ class StatorRunner:
while True: while True:
# Do we need to do cleaning? # Do we need to do cleaning?
if (time.monotonic() - self.last_clean) >= self.schedule_interval: if (time.monotonic() - self.last_clean) >= self.schedule_interval:
# Set up the watchdog timer (each time we do this the
# previous one is cancelled)
signal.alarm(self.schedule_interval * 2)
# Refresh the config # Refresh the config
Config.system = await Config.aload_system() Config.system = await Config.aload_system()
print("Tasks processed this loop:") print("Tasks processed this loop:")
@ -107,6 +114,14 @@ class StatorRunner:
print("Complete") print("Complete")
return self.handled return self.handled
def alarm_handler(self, signum, frame):
"""
Called when SIGALRM fires, which means we missed a schedule loop.
Just exit as we're likely deadlocked.
"""
print("Watchdog timeout exceeded")
os._exit(2)
async def run_scheduling(self): async def run_scheduling(self):
""" """
Do any transition cleanup tasks Do any transition cleanup tasks