@@ -21,8 +21,10 @@
#include <sys/types.h>
#include <fcntl.h>
#include <sys/select.h>
+#include <sys/wait.h>
#include <signal.h>
#include <unistd.h> /* for write, unlink and close */
+#include <assert.h>
#include "libxl.h"
#include "libxl_utils.h"
#include "libxl_internal.h"
@@ -38,6 +40,8 @@
ctx->xch = xc_interface_open();
ctx->xsh = xs_daemon_open();
+
+ ctx->waitpid_instead= libxl_waitpid_instead_default;
return 0;
}
@@ -496,16 +500,24 @@
return (char **) flexarray_contents(dm_args);
}
+struct libxl_device_model_starting {
+ int domid;
+ pid_t intermediate;
+};
+
int libxl_create_device_model(struct libxl_ctx *ctx,
libxl_device_model_info *info,
- libxl_device_nic *vifs, int num_vifs)
+ libxl_device_nic *vifs, int num_vifs,
+ libxl_device_model_starting **starting_r)
{
char *dom_path, *path, *logfile, *logfile_new;
- char *kvs[3];
struct stat stat_buf;
- int logfile_w, null, pid;
+ int logfile_w, null;
int i;
char **args;
+ pid_t intermediate;
+
+ *starting_r= 0;
args = libxl_build_device_model_args(ctx, info, vifs, num_vifs);
if (!args)
@@ -533,17 +545,121 @@
logfile = libxl_sprintf(ctx, "/var/log/xen/qemu-dm-%s.log", info->dom_name);
logfile_w = open(logfile, O_WRONLY|O_CREAT, 0644);
null = open("/dev/null", O_RDONLY);
- pid = libxl_exec(ctx, null, logfile_w, logfile_w, info->device_model, args);
+
+ if (starting_r) {
+ *starting_r= libxl_calloc(ctx, sizeof(**starting_r), 1);
+ if (!*starting_r) return ERROR_NOMEM;
+ }
+
+ intermediate = libxl_fork(ctx);
+ if (intermediate==-1) return ERROR_FAIL;
+
+ if (!intermediate) {
+ struct libxl_ctx clone;
+ char *kvs[3];
+ pid_t child, got;
+ int status;
+
+ child = libxl_fork(ctx);
+ if (!child) {
+ libxl_exec(ctx, null, logfile_w, logfile_w,
+ info->device_model, args);
+ }
+
+ if (!starting_r) _exit(0); /* just detach then */
+
+ clone = *ctx;
+ clone.xsh = xs_daemon_open();
+ /* we mustn't use the parent's handle in the child */
+
+ kvs[0] = libxl_sprintf(ctx, "image/device-model-pid");
+ kvs[1] = libxl_sprintf(ctx, "%d", child);
+ kvs[2] = NULL;
+ libxl_xs_writev(ctx, XBT_NULL, dom_path, kvs);
+
+ got = ctx->waitpid_instead(child, &status, 0);
+ assert(got == child);
+
+ libxl_report_child_exitstatus(ctx, "device model", child, status);
+ _exit(WIFEXITED(status) ? WEXITSTATUS(status) :
+ WIFSIGNALED(status) && WTERMSIG(status)<127
+ ? WTERMSIG(status)+128 : -1);
+ }
+
+ if (starting_r) {
+ (*starting_r)->domid= info->domid;
+ (*starting_r)->intermediate= intermediate;
+ }
+
close(null);
close(logfile_w);
- kvs[0] = libxl_sprintf(ctx, "image/device-model-pid");
- kvs[1] = libxl_sprintf(ctx, "%d", pid);
- kvs[2] = NULL;
- libxl_xs_writev(ctx, XBT_NULL, dom_path, kvs);
-
return 0;
}
+
+static void report_dm_intermediate_status(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting,
+ pid_t got, int status) {
+ if (!WIFEXITED(status))
+ /* intermediate process did the logging itself if it exited */
+ libxl_report_child_exitstatus(ctx,
+ "device model intermediate process"
+ " (startup monitor)", starting->intermediate,
+ status);
+}
+
+int libxl_detach_device_model(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting) {
+ int r, status;
+ int rc = 0;
+ pid_t got;
+
+ if (starting->intermediate) {
+ r = kill(starting->intermediate, SIGKILL);
+ if (r) {
+ XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "could not kill device model"
+ "intermediate process [%ld]",
+ (unsigned long)starting->intermediate);
+ abort(); /* things are very wrong */
+ }
+ got = ctx->waitpid_instead(starting->intermediate, &status, 0);
+ assert(got == starting->intermediate);
+ if (!(WIFSIGNALED(status) && WTERMSIG(status)==SIGKILL)) {
+ report_dm_intermediate_status(ctx, starting, got, status);
+ rc = ERROR_FAIL;
+ }
+ }
+
+ libxl_free(ctx, starting);
+
+ return rc;
+}
+
+static int check_dm_failure(struct libxl_ctx *ctx,
+ void *starting_void) {
+ libxl_device_model_starting *starting = starting_void;
+ pid_t got;
+ int status;
+
+ got = ctx->waitpid_instead(starting->intermediate, &status, WNOHANG);
+ if (!got) return 0;
+
+ assert(got == starting->intermediate);
+ report_dm_intermediate_status(ctx, starting, got, status);
+ starting->intermediate= 0;
+ return ERROR_FAIL;
+}
+
+int libxl_confirm_device_model_startup(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting) {
+ int problem = libxl_wait_for_device_model(ctx, starting->domid, "running",
+ check_dm_failure,
+ starting);
+ int detach = libxl_detach_device_model(ctx, starting);
+ return problem ? problem : detach;
+ return ERROR_FAIL;
+}
+
/******************************************************************************/
int libxl_device_disk_add(struct libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk)
@@ -941,7 +1057,7 @@
hvm = is_hvm(ctx, domid);
if (hvm) {
- if (libxl_wait_for_device_model(ctx, domid, "running") < 0) {
+ if (libxl_wait_for_device_model(ctx, domid, "running", 0,0) < 0) {
return -1;
}
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state", domid);
@@ -955,7 +1071,7 @@
pcidev->bus, pcidev->dev, pcidev->func);
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/command", domid);
xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins"));
- if (libxl_wait_for_device_model(ctx, domid, "pci-inserted") < 0)
+ if (libxl_wait_for_device_model(ctx, domid, "pci-inserted", 0,0) < 0)
XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn't respond in time\n");
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/parameter", domid);
vdevfn = libxl_xs_read(ctx, XBT_NULL, path);
@@ -1029,7 +1145,7 @@
hvm = is_hvm(ctx, domid);
if (hvm) {
- if (libxl_wait_for_device_model(ctx, domid, "running") < 0) {
+ if (libxl_wait_for_device_model(ctx, domid, "running", 0,0) < 0) {
return -1;
}
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state", domid);
@@ -1039,7 +1155,7 @@
pcidev->bus, pcidev->dev, pcidev->func);
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/command", domid);
xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem"));
- if (libxl_wait_for_device_model(ctx, domid, "pci-removed") < 0) {
+ if (libxl_wait_for_device_model(ctx, domid, "pci-removed", 0,0) < 0) {
XL_LOG(ctx, XL_LOG_ERROR, "Device Model didn't respond in time\n");
return -1;
}
@@ -42,6 +42,11 @@
/* mini-GC */
int alloc_maxsize;
void **alloc_ptrs;
+
+ /* for callers who reap children willy-nilly; caller must only
+ * set this after libxl_init and before any other call - or
+ * may leave them untouched */
+ int (*waitpid_instead)(pid_t pid, int *status, int flags);
};
typedef struct {
@@ -193,9 +198,19 @@
struct libxl_dominfo * libxl_domain_list(struct libxl_ctx *ctx, int *nb_domain);
xc_dominfo_t * libxl_domain_infolist(struct libxl_ctx *ctx, int *nb_domain);
+typedef struct libxl_device_model_starting libxl_device_model_starting;
int libxl_create_device_model(struct libxl_ctx *ctx,
libxl_device_model_info *info,
- libxl_device_nic *vifs, int num_vifs);
+ libxl_device_nic *vifs, int num_vifs,
+ libxl_device_model_starting **starting_r);
+ /* Caller must either: pass starting_r==0, or on successful
+ * return pass *starting_r to libxl_confirm_device_model
+ * or libxl_detach_device_model */
+int libxl_confirm_device_model_startup(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting);
+int libxl_detach_device_model(struct libxl_ctx *ctx,
+ libxl_device_model_starting *starting);
+ /* DM is detached even if error is returned */
int libxl_device_disk_add(struct libxl_ctx *ctx, uint32_t domid, libxl_device_disk *disk);
int libxl_device_disk_clean_shutdown(struct libxl_ctx *ctx, uint32_t domid);
@@ -260,12 +260,17 @@
return -1;
}
-int libxl_wait_for_device_model(struct libxl_ctx *ctx, uint32_t domid, char *state)
+int libxl_wait_for_device_model(struct libxl_ctx *ctx,
+ uint32_t domid, char *state,
+ int (*check_callback)(struct libxl_ctx *ctx,
+ void *userdata),
+ void *check_callback_userdata)
{
char path[50];
char *p;
int watchdog = 100;
unsigned int len;
+ int rc;
snprintf(path, sizeof(path), "/local/domain/0/device-model/%d/state", domid);
while (watchdog > 0) {
@@ -283,6 +288,10 @@
watchdog--;
}
}
+ if (check_callback) {
+ rc = check_callback(ctx, check_callback_userdata);
+ if (rc) return rc;
+ }
}
XL_LOG(ctx, XL_LOG_ERROR, "Device Model not ready\n");
return -1;
@@ -15,34 +15,81 @@
* GNU Lesser General Public License for more details.
*/
+#define _GNU_SOURCE /* fixme should be somewhere else */
+
#include <stdio.h>
+#include <string.h>
#include <unistd.h>
#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "libxl.h"
#include "libxl_internal.h"
-int libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
- char *arg0, char **args)
+pid_t libxl_fork(struct libxl_ctx *ctx)
{
- int pid, i;
+ pid_t pid;
pid = fork();
if (pid == -1) {
- XL_LOG(ctx, XL_LOG_ERROR, "fork failed");
+ XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "fork failed");
return -1;
}
- if (pid == 0) {
- /* child */
- if (stdinfd != -1)
- dup2(stdinfd, STDIN_FILENO);
- if (stdoutfd != -1)
- dup2(stdoutfd, STDOUT_FILENO);
- if (stderrfd != -1)
- dup2(stderrfd, STDERR_FILENO);
- for (i = 4; i < 256; i++)
- close(i);
- execv(arg0, args);
- exit(256);
- }
+
return pid;
}
+
+void libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
+ char *arg0, char **args)
+ /* call this in the child */
+{
+ int i;
+
+ if (stdinfd != -1)
+ dup2(stdinfd, STDIN_FILENO);
+ if (stdoutfd != -1)
+ dup2(stdoutfd, STDOUT_FILENO);
+ if (stderrfd != -1)
+ dup2(stderrfd, STDERR_FILENO);
+ for (i = 4; i < 256; i++)
+ close(i);
+ execv(arg0, args);
+ XL_LOG(ctx, XL_LOG_ERROR_ERRNO, "exec %s failed", arg0);
+ _exit(-1);
+}
+
+void libxl_report_child_exitstatus(struct libxl_ctx *ctx,
+ const char *what, pid_t pid, int status) {
+ /* treats all exit statuses as errors; if that's not what you want,
+ * check status yourself first */
+
+ if (WIFEXITED(status)) {
+ int st= WEXITSTATUS(status);
+ if (st)
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] exited"
+ " with error status %d", what, (unsigned long)pid, st);
+ else
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] unexpectedly"
+ " exited status zero", what, (unsigned long)pid);
+ } else if (WIFSIGNALED(status)) {
+ int sig= WTERMSIG(status);
+ const char *str= strsignal(sig);
+ const char *coredump= WCOREDUMP(status) ? " (core dumped)" : "";
+ if (str)
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] died due to"
+ " fatal signal %s%s", what, (unsigned long)pid,
+ str, coredump);
+ else
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] died due to unknown"
+ " fatal signal number %d%s", what, (unsigned long)pid,
+ sig, coredump);
+ } else {
+ XL_LOG(ctx, XL_LOG_ERROR, "%s [%ld] gave unknown"
+ " wait status 0x%x", what, (unsigned long)pid, status);
+ }
+}
+
+pid_t libxl_waitpid_instead_default(pid_t pid, int *status, int flags) {
+ return waitpid(pid,status,flags);
+}
@@ -40,11 +40,13 @@
#define XL_LOG(ctx, loglevel, _f, _a...)
#endif
+#define XL_LOG_ERROR_ERRNO 4
#define XL_LOG_DEBUG 3
#define XL_LOG_INFO 2
#define XL_LOG_WARNING 1
#define XL_LOG_ERROR 0
+void xl_logv(struct libxl_ctx *ctx, int loglevel, const char *file, int line, const char *func, char *fmt, va_list al);
void xl_log(struct libxl_ctx *ctx, int loglevel, const char *file, int line, const char *func, char *fmt, ...);
typedef struct {
@@ -126,7 +128,11 @@
char **bents, char **fents);
int libxl_device_destroy(struct libxl_ctx *ctx, char *be_path, int force);
int libxl_devices_destroy(struct libxl_ctx *ctx, uint32_t domid, int force);
-int libxl_wait_for_device_model(struct libxl_ctx *ctx, uint32_t domid, char *state);
+int libxl_wait_for_device_model(struct libxl_ctx *ctx,
+ uint32_t domid, char *state,
+ int (*check_callback)(struct libxl_ctx *ctx,
+ void *userdata),
+ void *check_callback_userdata);
int libxl_wait_for_backend(struct libxl_ctx *ctx, char *be_path, char *state);
int libxl_device_pci_flr(struct libxl_ctx *ctx, unsigned int domain, unsigned int bus,
unsigned int dev, unsigned int func);
@@ -137,8 +143,12 @@
int vcpus, int store_evtchn, unsigned long *store_mfn);
/* xl_exec */
-int libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
- char *arg0, char **args);
+pid_t libxl_fork(struct libxl_ctx *ctx);
+void libxl_exec(struct libxl_ctx *ctx, int stdinfd, int stdoutfd, int stderrfd,
+ char *arg0, char **args);
+void libxl_report_child_exitstatus(struct libxl_ctx *ctx,
+ const char *what, pid_t pid, int status);
+pid_t libxl_waitpid_instead_default(pid_t pid, int *status, int flags);
#endif
@@ -19,8 +19,10 @@
#include <stdarg.h>
#if defined(__linux__)
+# ifndef _GNU_SOURCE /* fixme WTF! */
int asprintf(char **buffer, char *fmt, ...);
int vasprintf(char **buffer, const char *fmt, va_list ap);
+# endif
#endif
#endif
@@ -572,6 +572,15 @@
config_destroy(&config);
}
+#define MUST( call ) ({ \
+ int must_rc = (call); \
+ if (must_rc) { \
+ fprintf(stderr,"xl: fatal error: %s:%d, rc=%d: %s\n", \
+ __FILE__,__LINE__, must_rc, #call); \
+ exit(-must_rc); \
+ } \
+ })
+
static void create_domain(int debug, const char *filename)
{
struct libxl_ctx ctx;
@@ -584,30 +593,35 @@
libxl_device_pci *pcidevs = NULL;
int num_disks = 0, num_vifs = 0, num_pcidevs = 0;
int i;
+ libxl_device_model_starting *dm_starting;
printf("Parsing config file %s\n", filename);
parse_config_file(filename, &info1, &info2, &disks, &num_disks, &vifs, &num_vifs, &pcidevs, &num_pcidevs, &dm_info);
if (debug)
printf_info(&info1, &info2, disks, num_disks, vifs, num_vifs, pcidevs, num_pcidevs, &dm_info);
- libxl_ctx_init(&ctx);
- libxl_ctx_set_log(&ctx, log_callback, NULL);
- libxl_domain_make(&ctx, &info1, &domid);
- libxl_domain_build(&ctx, &info2, domid);
+ MUST( libxl_ctx_init(&ctx) );
+ MUST( libxl_ctx_set_log(&ctx, log_callback, NULL) );
+ MUST( libxl_domain_make(&ctx, &info1, &domid) );
+ MUST( libxl_domain_build(&ctx, &info2, domid) );
device_model_info_domid_fixup(&dm_info, domid);
for (i = 0; i < num_disks; i++) {
disk_info_domid_fixup(disks + i, domid);
- libxl_device_disk_add(&ctx, domid, &disks[i]);
+ MUST( libxl_device_disk_add(&ctx, domid, &disks[i]) );
}
for (i = 0; i < num_vifs; i++) {
nic_info_domid_fixup(vifs + i, domid);
- libxl_device_nic_add(&ctx, domid, &vifs[i]);
+ MUST( libxl_device_nic_add(&ctx, domid, &vifs[i]) );
}
- libxl_create_device_model(&ctx, &dm_info, vifs, num_vifs);
+
+ MUST( libxl_create_device_model(&ctx, &dm_info, vifs, num_vifs,
+ &dm_starting) );
for (i = 0; i < num_pcidevs; i++)
libxl_device_pci_add(&ctx, domid, &pcidevs[i]);
+ MUST( libxl_confirm_device_model_startup(&ctx, dm_starting) );
+
libxl_domain_unpause(&ctx, domid);
}
This patch makes xl create check whether qemu-dm has started correctly, and causes it to fail immediately with appropriate errors if not. There are other bugfixes too. More specifically: * libxl_create_device_model forks twice rather than once so that the process which calls libxl does not end up being the actual parent of qemu. That avoids the need for the qemu-dm process to be reaped at some indefinite time in the future. * The first fork generates an intermediate process which is responsible for writing the qemu-dm pid to xenstore and then merely waits to collect and report on qemu-dm's exit status during startup. New arguments to libxl_create_device_model allow the preservation of its pid so that a later call can check whether the startup is successful. * xl.c's create_domain checks for errors in its libxl calls. Consequential changes: * libxl_wait_for_device_model now takes a callback function parameter which is called repeatedly in the loop iteration and allows the caller to abort the wait. * libxl_exec no longer calls fork; there is a new libxl_fork. * libxl_exec.c says #define _GNU_SOURCE (for strsignal). The provided asprintf declarations then need to be suppressed. * There is a hook to override waitpid, which will be necessary for some callers. Remaining problems and other issues I noticed or we found: * The error handling is rather inconsistent still and lacking in places. * xl_logv is declared but not defined. * _GNU_SOURCE should be used throughout. The asprintf implementation should be disabled in favour of the system one. * XL_LOG_ERROR_ERRNO needs to actually print the errno value. * destroy_device_model can kill random dom0 processes (!) * struct libxl_ctx should be defined in libxl_internal.h. Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>