# HG changeset patch # User Tim Deegan # Date 1275473726 -3600 # Node ID f192bc28870d1789cb5865f8e795e1a47a56200c # Parent ba22297b00514325fef3673f3184eca466bd6990 Watchdog timers for domains. Each domain is allowed to set, reset and disable its timers; when any timer runs out the domain is killed. Patch from Christian Limpach Signed-off-by: Tim Deegan diff -r ba22297b0051 -r f192bc28870d .hgignore --- a/.hgignore Wed Jun 02 11:15:21 2010 +0100 +++ b/.hgignore Wed Jun 02 11:15:26 2010 +0100 @@ -237,6 +237,7 @@ ^tools/xcutils/xc_restore$ ^tools/xcutils/xc_save$ ^tools/xcutils/readnotes$ +^tools/misc/xenwatchdogd$ ^tools/xenfb/sdlfb$ ^tools/xenfb/vncfb$ ^tools/xenmon/xentrace_setmask$ diff -r ba22297b0051 -r f192bc28870d tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Wed Jun 02 11:15:21 2010 +0100 +++ b/tools/libxc/xc_domain.c Wed Jun 02 11:15:26 2010 +0100 @@ -364,6 +364,34 @@ unlock_pages(ctxt, sz); return rc; +} + +int xc_watchdog(xc_interface *xch, + uint32_t id, + uint32_t timeout) +{ + int ret = -1; + sched_watchdog_t arg; + DECLARE_HYPERCALL; + + hypercall.op = __HYPERVISOR_sched_op; + hypercall.arg[0] = (unsigned long)SCHEDOP_watchdog; + hypercall.arg[1] = (unsigned long)&arg; + arg.id = id; + arg.timeout = timeout; + + if ( lock_pages(&arg, sizeof(arg)) != 0 ) + { + PERROR("Could not lock memory for Xen hypercall"); + goto out1; + } + + ret = do_xen_hypercall(xch, &hypercall); + + unlock_pages(&arg, sizeof(arg)); + + out1: + return ret; } diff -r ba22297b0051 -r f192bc28870d tools/libxc/xenctrl.h --- a/tools/libxc/xenctrl.h Wed Jun 02 11:15:21 2010 +0100 +++ b/tools/libxc/xenctrl.h Wed Jun 02 11:15:26 2010 +0100 @@ -341,6 +341,10 @@ int xc_domain_shutdown(xc_interface *xch, uint32_t domid, int reason); + +int xc_watchdog(xc_interface *xch, + uint32_t id, + uint32_t timeout); int xc_vcpu_setaffinity(xc_interface *xch, uint32_t domid, diff -r ba22297b0051 -r f192bc28870d tools/misc/Makefile --- a/tools/misc/Makefile Wed Jun 02 11:15:21 2010 +0100 +++ b/tools/misc/Makefile Wed Jun 02 11:15:26 2010 +0100 @@ -10,7 +10,7 @@ HDRS = $(wildcard *.h) -TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool +TARGETS-y := xenperf xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool xenwatchdogd TARGETS-$(CONFIG_X86) += xen-detect xen-hvmctx TARGETS := $(TARGETS-y) @@ -22,7 +22,7 @@ INSTALL_BIN-$(CONFIG_X86) += xen-detect INSTALL_BIN := $(INSTALL_BIN-y) -INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool +INSTALL_SBIN-y := xm xen-bugtool xen-python-path xend xenperf xsview xenpm xen-tmem-list-parse gtraceview gtracestat xenlockprof xen-hptool xenwatchdogd INSTALL_SBIN-$(CONFIG_X86) += xen-hvmctx INSTALL_SBIN := $(INSTALL_SBIN-y) @@ -37,8 +37,10 @@ install: build $(INSTALL_DIR) $(DESTDIR)$(BINDIR) $(INSTALL_DIR) $(DESTDIR)$(SBINDIR) + $(INSTALL_DIR) $(DESTDIR)$(CONFIG_DIR)/init.d $(INSTALL_PYTHON_PROG) $(INSTALL_BIN) $(DESTDIR)$(BINDIR) $(INSTALL_PYTHON_PROG) $(INSTALL_SBIN) $(DESTDIR)$(SBINDIR) + $(INSTALL_PROG) xen-watchdog $(DESTDIR)$(CONFIG_DIR)/init.d set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d install-recurse; done .PHONY: clean @@ -49,7 +51,7 @@ %.o: %.c $(HDRS) Makefile $(CC) -c $(CFLAGS) -o $@ $< -xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool: %: %.o Makefile +xen-hvmctx xenperf xenpm gtracestat xenlockprof xen-hptool xenwatchdogd: %: %.o Makefile $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore) gtraceview: %: %.o Makefile diff -r ba22297b0051 -r f192bc28870d tools/misc/xen-watchdog --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/misc/xen-watchdog Wed Jun 02 11:15:26 2010 +0100 @@ -0,0 +1,59 @@ +#! /bin/bash +# +# xen-watchdog +# +# chkconfig: 2345 21 79 +# description: Run domain watchdog daemon +# + +# Source function library. +. /etc/init.d/functions + +start() { + local r + base="watchdogd" + echo -n $"Starting domain watchdog daemon: " + + /usr/sbin/xenwatchdogd 30 15 + r=$? + [ "$r" -eq 0 ] && success $"$base startup" || failure $"$base startup" + echo + + return $r +} + +stop() { + local r + base="watchdogd" + echo -n $"Stopping domain watchdog daemon: " + + killall -USR1 watchdogd 2>/dev/null + r=$? + [ "$r" -eq 0 ] && success $"$base stop" || failure $"$base stop" + echo + + return $r +} + +case "$1" in + start) + start + ;; + stop) + stop + ;; + restart) + stop + start + ;; + status) + ;; + condrestart) + stop + start + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|condrestart}" + exit 1 +esac + diff -r ba22297b0051 -r f192bc28870d tools/misc/xenwatchdogd.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/misc/xenwatchdogd.c Wed Jun 02 11:15:26 2010 +0100 @@ -0,0 +1,96 @@ + +#include +#include +#include "xenctrl.h" +#include +#include +#include +#include +#include +#include +#include + +xc_interface *h; +int id = 0; + +void daemonize(void) +{ + switch (fork()) { + case -1: + err(1, "fork"); + case 0: + break; + default: + exit(0); + } + umask(0); + if (setsid() < 0) + err(1, "setsid"); + if (chdir("/") < 0) + err(1, "chdir /"); + freopen("/dev/null", "r", stdin); + freopen("/dev/null", "w", stdout); + freopen("/dev/null", "w", stderr); +} + +void catch_exit(int sig) +{ + if (id) + xc_watchdog(h, id, 300); + exit(0); +} + +void catch_usr1(int sig) +{ + if (id) + xc_watchdog(h, id, 0); + exit(0); +} + +int main(int argc, char **argv) +{ + int t, s; + int ret; + + if (argc < 2) + errx(1, "usage: %s ", argv[0]); + + daemonize(); + + h = xc_interface_open(NULL, NULL, 0); + if (h == NULL) + err(1, "xc_interface_open"); + + t = strtoul(argv[1], NULL, 0); + if (t == ULONG_MAX) + err(1, "strtoul"); + + s = t / 2; + if (argc == 3) { + s = strtoul(argv[2], NULL, 0); + if (s == ULONG_MAX) + err(1, "strtoul"); + } + + if (signal(SIGHUP, &catch_exit) == SIG_ERR) + err(1, "signal"); + if (signal(SIGINT, &catch_exit) == SIG_ERR) + err(1, "signal"); + if (signal(SIGQUIT, &catch_exit) == SIG_ERR) + err(1, "signal"); + if (signal(SIGTERM, &catch_exit) == SIG_ERR) + err(1, "signal"); + if (signal(SIGUSR1, &catch_usr1) == SIG_ERR) + err(1, "signal"); + + id = xc_watchdog(h, 0, t); + if (id <= 0) + err(1, "xc_watchdog setup"); + + for (;;) { + sleep(s); + ret = xc_watchdog(h, id, t); + if (ret != 0) + err(1, "xc_watchdog"); + } +} diff -r ba22297b0051 -r f192bc28870d xen/common/domain.c --- a/xen/common/domain.c Wed Jun 02 11:15:21 2010 +0100 +++ b/xen/common/domain.c Wed Jun 02 11:15:26 2010 +0100 @@ -229,6 +229,7 @@ spin_lock_init_prof(d, domain_lock); spin_lock_init_prof(d, page_alloc_lock); spin_lock_init(&d->shutdown_lock); + spin_lock_init(&d->watchdog_lock); spin_lock_init(&d->hypercall_deadlock_mutex); INIT_PAGE_LIST_HEAD(&d->page_list); INIT_PAGE_LIST_HEAD(&d->xenpage_list); @@ -608,6 +609,8 @@ grant_table_destroy(d); arch_domain_destroy(d); + + watchdog_domain_destroy(d); rangeset_domain_destroy(d); diff -r ba22297b0051 -r f192bc28870d xen/common/keyhandler.c --- a/xen/common/keyhandler.c Wed Jun 02 11:15:21 2010 +0100 +++ b/xen/common/keyhandler.c Wed Jun 02 11:15:26 2010 +0100 @@ -241,6 +241,7 @@ for_each_domain ( d ) { + unsigned int i; printk("General information for domain %u:\n", d->domain_id); cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask); printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d " @@ -254,6 +255,12 @@ d->handle[ 8], d->handle[ 9], d->handle[10], d->handle[11], d->handle[12], d->handle[13], d->handle[14], d->handle[15], d->vm_assist); + for (i = 0 ; i < NR_WATCHDOG_TIMERS; i++) + { + if ( active_timer(&d->watchdog_timer[i]) ) + printk(" watchdog %d expires in %d seconds\n", + i, (u32)((d->watchdog_timer[i].expires - NOW()) >> 30)); + } arch_dump_domain_info(d); diff -r ba22297b0051 -r f192bc28870d xen/common/schedule.c --- a/xen/common/schedule.c Wed Jun 02 11:15:21 2010 +0100 +++ b/xen/common/schedule.c Wed Jun 02 11:15:26 2010 +0100 @@ -644,6 +644,55 @@ return 0; } +static void watchdog_timeout(void *data) +{ + struct domain *d = data; + + printk("Watchdog timer fired for domain %u\n", d->domain_id); + domain_shutdown(d, SHUTDOWN_watchdog); +} + +static long do_watchdog(struct domain *d, uint32_t id, uint32_t timeout) +{ + if ( id == 0 ) + { + /* Find an unused watchdog timer and start it */ + unsigned int i; + for ( i = 0; i < NR_WATCHDOG_TIMERS; i++ ) + { + if ( !active_timer(&d->watchdog_timer[i]) ) + { + init_timer(&d->watchdog_timer[i], watchdog_timeout, + (void *)d, 0); + set_timer(&d->watchdog_timer[i], NOW() + SECONDS(timeout)); + /* Timer IDs count from 1, not 0 */ + return i + 1; + } + } + return -EEXIST; + } + + if ( id > NR_WATCHDOG_TIMERS ) + return -EINVAL; + + if ( !active_timer(&d->watchdog_timer[id - 1]) ) + return -EEXIST; + + if ( timeout == 0 ) + stop_timer(&d->watchdog_timer[id - 1]); + else + set_timer(&d->watchdog_timer[id - 1], NOW() + SECONDS(timeout)); + + return 0; +} + +void watchdog_domain_destroy(struct domain *d) +{ + unsigned int i; + for ( i = 0; i < NR_WATCHDOG_TIMERS; i++ ) + kill_timer(&d->watchdog_timer[i]); +} + long do_sched_op_compat(int cmd, unsigned long arg) { long ret = 0; @@ -783,6 +832,22 @@ rcu_unlock_domain(d); ret = 0; + + break; + } + + case SCHEDOP_watchdog: + { + struct sched_watchdog sched_watchdog; + + ret = -EFAULT; + if ( copy_from_guest(&sched_watchdog, arg, 1) ) + break; + + spin_lock(¤t->domain->watchdog_lock); + ret = do_watchdog(current->domain, sched_watchdog.id, + sched_watchdog.timeout); + spin_unlock(¤t->domain->watchdog_lock); break; } diff -r ba22297b0051 -r f192bc28870d xen/common/shutdown.c --- a/xen/common/shutdown.c Wed Jun 02 11:15:21 2010 +0100 +++ b/xen/common/shutdown.c Wed Jun 02 11:15:26 2010 +0100 @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -53,6 +54,15 @@ break; /* not reached */ } + case SHUTDOWN_watchdog: + { + console_force_unlock(); + printk("Domain 0 shutdown: watchdog rebooting machine.\n"); + kexec_crash(); + machine_restart(0); + break; /* not reached */ + } + default: { printk("Domain 0 shutdown (unknown reason %u): ", reason); diff -r ba22297b0051 -r f192bc28870d xen/include/public/sched.h --- a/xen/include/public/sched.h Wed Jun 02 11:15:21 2010 +0100 +++ b/xen/include/public/sched.h Wed Jun 02 11:15:26 2010 +0100 @@ -106,6 +106,22 @@ #define SCHEDOP_shutdown_code 5 /* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +typedef struct sched_watchdog sched_watchdog_t; +DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); + +/* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does * not care about the shutdown code. @@ -114,6 +130,7 @@ #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ #endif /* __XEN_PUBLIC_SCHED_H__ */ diff -r ba22297b0051 -r f192bc28870d xen/include/xen/sched.h --- a/xen/include/xen/sched.h Wed Jun 02 11:15:21 2010 +0100 +++ b/xen/include/xen/sched.h Wed Jun 02 11:15:26 2010 +0100 @@ -193,7 +193,10 @@ /* tasklet */ struct tasklet tasklet; }; - + +/* Number of supported watchdog timers per domain */ +#define NR_WATCHDOG_TIMERS 2 + struct domain { domid_t domain_id; @@ -297,6 +300,10 @@ /* OProfile support. */ struct xenoprof *xenoprof; int32_t time_offset_seconds; + + /* Domain watchdog. */ + spinlock_t watchdog_lock; + struct timer watchdog_timer[NR_WATCHDOG_TIMERS]; struct rcu_head rcu; @@ -601,6 +608,8 @@ cpu_online(cpu) && \ !per_cpu(tasklet_work_to_do, cpu)) +void watchdog_domain_destroy(struct domain *d); + #define IS_PRIV(_d) ((_d)->is_privileged) #define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))