Message ID | 20190118173103.4903-4-berrange@redhat.com |
---|---|
State | New |
Headers | show |
Series | trace: make systemtap easier to use for simple logging | expand |
On 1/18/19 11:31 AM, Daniel P. Berrangé wrote: > The '%m' format specifier instructs glibc's printf() implementation to > insert the contents of strerror(errno). That is a glibc-only extension in printf(), but mandatory (and supported in ALL platforms) in syslog(). However, you are correct that: > This is not something that > should ever be used in trace-events files because several of the > backends do not use the format string and so this error information is > invisible to them. The errno value should be given as an explicit > trace argument instead. Use of '%m' should also be avoided as it is not > portable to all QEMU build targets. If all of our traces are compiled to syslog(), it would be portable, but that's not the case. What's more, using %m is subject to race conditions - the more code you have between when the format string containing %m is given, and the point where the actual error message is emitted, the higher the probability that some of the intermediate code could have stomped on errno in the meantime, rendering the logged message incorrect. And forcing logging wrappers to save/restore the current state of errno, just in case they might encounter %m, can get wasteful. Should checkpatch be taught to flag %m as suspicious? However, tracing the errno value is NOT what %m did; I'd rather see... > > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> > --- > hw/vfio/pci.c | 2 +- > hw/vfio/trace-events | 2 +- > scripts/tracetool/__init__.py | 4 ++++ > 3 files changed, 6 insertions(+), 2 deletions(-) > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > index c0cb1ec289..85f1908cfe 100644 > --- a/hw/vfio/pci.c > +++ b/hw/vfio/pci.c > @@ -2581,7 +2581,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) > ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > if (ret) { > /* This can fail for an old kernel or legacy PCI dev */ > - trace_vfio_populate_device_get_irq_info_failure(); > + trace_vfio_populate_device_get_irq_info_failure(errno); trace_vfio_populate_device_get_irq_info_failure(strerror(errno)) > } else if (irq_info.count == 1) { > vdev->pci_aer = true; > } else { > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > index a85e8662ea..6d412afc83 100644 > --- a/hw/vfio/trace-events > +++ b/hw/vfio/trace-events > @@ -37,7 +37,7 @@ vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent de > vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" > vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" > vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" > -vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" > +vfio_populate_device_get_irq_info_failure(int err) "VFIO_DEVICE_GET_IRQ_INFO failure: %d" vfio_populate_device_get_irq_info_failur(const char *err) "VFIO_DEVICE_GET_IRQ_INFO failure: %s" ...if we are trying to match the original intent. > vfio_realize(const char *name, int group_id) " (%s) group %d" > vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" > vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" > diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py > index 3478ac93ab..6fca674936 100644 > --- a/scripts/tracetool/__init__.py > +++ b/scripts/tracetool/__init__.py > @@ -274,6 +274,10 @@ class Event(object): > props = groups["props"].split() > fmt = groups["fmt"] > fmt_trans = groups["fmt_trans"] > + if fmt.find("%m") != -1 or fmt_trans.find("%m") != -1: > + raise ValueError("Event format '%m' is forbidden, pass the error " > + "as an explicit trace argument") Whether or not checkpatch decides to flag %m as suspicious in the overall code base, I like that you are forbidding it in trace files.
On Fri, 18 Jan 2019 11:50:39 -0600 Eric Blake <eblake@redhat.com> wrote: > On 1/18/19 11:31 AM, Daniel P. Berrangé wrote: > > The '%m' format specifier instructs glibc's printf() implementation to > > insert the contents of strerror(errno). > > That is a glibc-only extension in printf(), but mandatory (and supported > in ALL platforms) in syslog(). However, you are correct that: > > > This is not something that > > should ever be used in trace-events files because several of the > > backends do not use the format string and so this error information is > > invisible to them. The errno value should be given as an explicit > > trace argument instead. Use of '%m' should also be avoided as it is not > > portable to all QEMU build targets. It's portable to all QEMU build targets that support vfio, but we can remove it if we want it out of the trace system, which of course does support other targets. > If all of our traces are compiled to syslog(), it would be portable, but > that's not the case. > > What's more, using %m is subject to race conditions - the more code you > have between when the format string containing %m is given, and the > point where the actual error message is emitted, the higher the > probability that some of the intermediate code could have stomped on > errno in the meantime, rendering the logged message incorrect. And > forcing logging wrappers to save/restore the current state of errno, > just in case they might encounter %m, can get wasteful. > > Should checkpatch be taught to flag %m as suspicious? > > However, tracing the errno value is NOT what %m did; I'd rather see... > > > > > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> > > --- > > hw/vfio/pci.c | 2 +- > > hw/vfio/trace-events | 2 +- > > scripts/tracetool/__init__.py | 4 ++++ > > 3 files changed, 6 insertions(+), 2 deletions(-) > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > index c0cb1ec289..85f1908cfe 100644 > > --- a/hw/vfio/pci.c > > +++ b/hw/vfio/pci.c > > @@ -2581,7 +2581,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) > > ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > > if (ret) { > > /* This can fail for an old kernel or legacy PCI dev */ > > - trace_vfio_populate_device_get_irq_info_failure(); > > + trace_vfio_populate_device_get_irq_info_failure(errno); > > trace_vfio_populate_device_get_irq_info_failure(strerror(errno)) Yep, I agree, the intent of the original code is not maintained. Thanks, Alex > > } else if (irq_info.count == 1) { > > vdev->pci_aer = true; > > } else { > > diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events > > index a85e8662ea..6d412afc83 100644 > > --- a/hw/vfio/trace-events > > +++ b/hw/vfio/trace-events > > @@ -37,7 +37,7 @@ vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent de > > vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" > > vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" > > vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" > > -vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" > > +vfio_populate_device_get_irq_info_failure(int err) "VFIO_DEVICE_GET_IRQ_INFO failure: %d" > > vfio_populate_device_get_irq_info_failur(const char *err) > "VFIO_DEVICE_GET_IRQ_INFO failure: %s" > > ...if we are trying to match the original intent. > > > vfio_realize(const char *name, int group_id) " (%s) group %d" > > vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" > > vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" > > diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py > > index 3478ac93ab..6fca674936 100644 > > --- a/scripts/tracetool/__init__.py > > +++ b/scripts/tracetool/__init__.py > > @@ -274,6 +274,10 @@ class Event(object): > > props = groups["props"].split() > > fmt = groups["fmt"] > > fmt_trans = groups["fmt_trans"] > > + if fmt.find("%m") != -1 or fmt_trans.find("%m") != -1: > > + raise ValueError("Event format '%m' is forbidden, pass the error " > > + "as an explicit trace argument") > > Whether or not checkpatch decides to flag %m as suspicious in the > overall code base, I like that you are forbidding it in trace files. >
On Fri, Jan 18, 2019 at 11:50:39AM -0600, Eric Blake wrote: > On 1/18/19 11:31 AM, Daniel P. Berrangé wrote: > > The '%m' format specifier instructs glibc's printf() implementation to > > insert the contents of strerror(errno). > > That is a glibc-only extension in printf(), but mandatory (and supported > in ALL platforms) in syslog(). However, you are correct that: > > > This is not something that > > should ever be used in trace-events files because several of the > > backends do not use the format string and so this error information is > > invisible to them. The errno value should be given as an explicit > > trace argument instead. Use of '%m' should also be avoided as it is not > > portable to all QEMU build targets. > > If all of our traces are compiled to syslog(), it would be portable, but > that's not the case. > > What's more, using %m is subject to race conditions - the more code you > have between when the format string containing %m is given, and the > point where the actual error message is emitted, the higher the > probability that some of the intermediate code could have stomped on > errno in the meantime, rendering the logged message incorrect. And > forcing logging wrappers to save/restore the current state of errno, > just in case they might encounter %m, can get wasteful. > > Should checkpatch be taught to flag %m as suspicious? > > However, tracing the errno value is NOT what %m did; I'd rather see... > > > > > Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> > > --- > > hw/vfio/pci.c | 2 +- > > hw/vfio/trace-events | 2 +- > > scripts/tracetool/__init__.py | 4 ++++ > > 3 files changed, 6 insertions(+), 2 deletions(-) > > > > diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c > > index c0cb1ec289..85f1908cfe 100644 > > --- a/hw/vfio/pci.c > > +++ b/hw/vfio/pci.c > > @@ -2581,7 +2581,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) > > ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > > if (ret) { > > /* This can fail for an old kernel or legacy PCI dev */ > > - trace_vfio_populate_device_get_irq_info_failure(); > > + trace_vfio_populate_device_get_irq_info_failure(errno); > > trace_vfio_populate_device_get_irq_info_failure(strerror(errno)) The caveat is that 'strerror' is not required to be thread safe, however, given that this is Linux only code I guess we can assume the glibc impl which fortunately is thread safe. On this point though, does anyone know of any platforms we support[1], or are likely to support in future, where 'strerror' is *not* thread safe ? I see rumours that Windows strerror() is not thread safe but not found a concrete source for that. [1] https://qemu.weilnetz.de/doc/qemu-doc.html#Supported-build-platforms > > diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py > > index 3478ac93ab..6fca674936 100644 > > --- a/scripts/tracetool/__init__.py > > +++ b/scripts/tracetool/__init__.py > > @@ -274,6 +274,10 @@ class Event(object): > > props = groups["props"].split() > > fmt = groups["fmt"] > > fmt_trans = groups["fmt_trans"] > > + if fmt.find("%m") != -1 or fmt_trans.find("%m") != -1: > > + raise ValueError("Event format '%m' is forbidden, pass the error " > > + "as an explicit trace argument") > > Whether or not checkpatch decides to flag %m as suspicious in the > overall code base, I like that you are forbidding it in trace files. Regards, Daniel
On 1/22/19 8:32 AM, Daniel P. Berrangé wrote: >>> +++ b/hw/vfio/pci.c >>> @@ -2581,7 +2581,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) >>> ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); >>> if (ret) { >>> /* This can fail for an old kernel or legacy PCI dev */ >>> - trace_vfio_populate_device_get_irq_info_failure(); >>> + trace_vfio_populate_device_get_irq_info_failure(errno); >> >> trace_vfio_populate_device_get_irq_info_failure(strerror(errno)) > > The caveat is that 'strerror' is not required to be thread safe, > however, given that this is Linux only code I guess we can assume > the glibc impl which fortunately is thread safe. If we are going to worry about thread-safety of strerror(), we have a LOT of code to scrub (we are using it rather liberally throughout the code base). > > On this point though, does anyone know of any platforms we support[1], > or are likely to support in future, where 'strerror' is *not* thread > safe ? I'm not coming up with one, and I think the problem is independent of this series (if we DO have a problem, it's a series all its own to eradicate the use of strerror() in favor of something safer, either picking strerror_l() or dealing with the glibc vs. BSD differences in strerror_r()).
On Tue, Jan 22, 2019 at 11:19:42AM -0600, Eric Blake wrote: > On 1/22/19 8:32 AM, Daniel P. Berrangé wrote: > > >>> +++ b/hw/vfio/pci.c > >>> @@ -2581,7 +2581,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) > >>> ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > >>> if (ret) { > >>> /* This can fail for an old kernel or legacy PCI dev */ > >>> - trace_vfio_populate_device_get_irq_info_failure(); > >>> + trace_vfio_populate_device_get_irq_info_failure(errno); > >> > >> trace_vfio_populate_device_get_irq_info_failure(strerror(errno)) > > > > The caveat is that 'strerror' is not required to be thread safe, > > however, given that this is Linux only code I guess we can assume > > the glibc impl which fortunately is thread safe. > > If we are going to worry about thread-safety of strerror(), we have a > LOT of code to scrub (we are using it rather liberally throughout the > code base). Yes, indeed we do and it was something that always worried me a little (as well as a few other non-reentrant APIs we used previously). > > On this point though, does anyone know of any platforms we support[1], > > or are likely to support in future, where 'strerror' is *not* thread > > safe ? > > I'm not coming up with one, and I think the problem is independent of > this series (if we DO have a problem, it's a series all its own to > eradicate the use of strerror() in favor of something safer, either > picking strerror_l() or dealing with the glibc vs. BSD differences in > strerror_r()). Agree that its not really something for this series - this just made me think of it again. We went through the scrubbing in libvirt to use the sane, but still tedious to call, variant of strerror_r() many years ago. With luck though it is a worry that can be confined the dustbin of ancient UNIX history....unless someone can point to evidence to the contrary ? Regards, Daniel
On 1/22/19 11:23 AM, Daniel P. Berrangé wrote: > >>> On this point though, does anyone know of any platforms we support[1], >>> or are likely to support in future, where 'strerror' is *not* thread >>> safe ? >> >> I'm not coming up with one, and I think the problem is independent of >> this series (if we DO have a problem, it's a series all its own to >> eradicate the use of strerror() in favor of something safer, either >> picking strerror_l() or dealing with the glibc vs. BSD differences in >> strerror_r()). > > Agree that its not really something for this series - this just > made me think of it again. Shoot - FreeBSD strerror() is not threadsafe: https://github.com/freebsd/freebsd/blob/master/lib/libc/string/strerror.c#L119 char * strerror(int num) { static char ebuf[NL_TEXTMAX]; if (strerror_r(num, ebuf, sizeof(ebuf)) != 0) errno = EINVAL; return (ebuf); } > > We went through the scrubbing in libvirt to use the sane, but still > tedious to call, variant of strerror_r() many years ago. With luck > though it is a worry that can be confined the dustbin of ancient > UNIX history....unless someone can point to evidence to the contrary ? libvirt has it easy - they let gnulib do all the work of futzing around with getting a working strerror() despite platform bugs and despite glibc's insistence on a non-POSIX signature if _GNU_SOURCE is defined. We'll have to do a bit more legwork. That said, I've added it to: https://wiki.qemu.org/Contribute/BiteSizedTasks#Error_checking if someone wants to do the grunt work.
On 22/01/2019 19.10, Eric Blake wrote: > On 1/22/19 11:23 AM, Daniel P. Berrangé wrote: > >> >>>> On this point though, does anyone know of any platforms we support[1], >>>> or are likely to support in future, where 'strerror' is *not* thread >>>> safe ? >>> >>> I'm not coming up with one, and I think the problem is independent of >>> this series (if we DO have a problem, it's a series all its own to >>> eradicate the use of strerror() in favor of something safer, either >>> picking strerror_l() or dealing with the glibc vs. BSD differences in >>> strerror_r()). >> >> Agree that its not really something for this series - this just >> made me think of it again. > > Shoot - FreeBSD strerror() is not threadsafe: > https://github.com/freebsd/freebsd/blob/master/lib/libc/string/strerror.c#L119 > > char * > strerror(int num) > { > static char ebuf[NL_TEXTMAX]; > > if (strerror_r(num, ebuf, sizeof(ebuf)) != 0) > errno = EINVAL; > return (ebuf); > } > >> >> We went through the scrubbing in libvirt to use the sane, but still >> tedious to call, variant of strerror_r() many years ago. With luck >> though it is a worry that can be confined the dustbin of ancient >> UNIX history....unless someone can point to evidence to the contrary ? > > libvirt has it easy - they let gnulib do all the work of futzing around > with getting a working strerror() despite platform bugs and despite > glibc's insistence on a non-POSIX signature if _GNU_SOURCE is defined. > We'll have to do a bit more legwork. > > That said, I've added it to: > https://wiki.qemu.org/Contribute/BiteSizedTasks#Error_checking > > if someone wants to do the grunt work. I think we should change that task to switch to g_strerror() from glib instead ... as far as I can see, this is a proper wrapper around strerror_r(), so we don't have to deal with the implementation oddities of strerror_r() in QEMU. Thomas
On Fri, Oct 18, 2019 at 11:31:15AM +0200, Thomas Huth wrote: > On 22/01/2019 19.10, Eric Blake wrote: > > On 1/22/19 11:23 AM, Daniel P. Berrangé wrote: > > > >> > >>>> On this point though, does anyone know of any platforms we support[1], > >>>> or are likely to support in future, where 'strerror' is *not* thread > >>>> safe ? > >>> > >>> I'm not coming up with one, and I think the problem is independent of > >>> this series (if we DO have a problem, it's a series all its own to > >>> eradicate the use of strerror() in favor of something safer, either > >>> picking strerror_l() or dealing with the glibc vs. BSD differences in > >>> strerror_r()). > >> > >> Agree that its not really something for this series - this just > >> made me think of it again. > > > > Shoot - FreeBSD strerror() is not threadsafe: > > https://github.com/freebsd/freebsd/blob/master/lib/libc/string/strerror.c#L119 > > > > char * > > strerror(int num) > > { > > static char ebuf[NL_TEXTMAX]; > > > > if (strerror_r(num, ebuf, sizeof(ebuf)) != 0) > > errno = EINVAL; > > return (ebuf); > > } > > > >> > >> We went through the scrubbing in libvirt to use the sane, but still > >> tedious to call, variant of strerror_r() many years ago. With luck > >> though it is a worry that can be confined the dustbin of ancient > >> UNIX history....unless someone can point to evidence to the contrary ? > > > > libvirt has it easy - they let gnulib do all the work of futzing around > > with getting a working strerror() despite platform bugs and despite > > glibc's insistence on a non-POSIX signature if _GNU_SOURCE is defined. > > We'll have to do a bit more legwork. > > > > That said, I've added it to: > > https://wiki.qemu.org/Contribute/BiteSizedTasks#Error_checking > > > > if someone wants to do the grunt work. > > I think we should change that task to switch to g_strerror() from glib > instead ... as far as I can see, this is a proper wrapper around > strerror_r(), so we don't have to deal with the implementation oddities > of strerror_r() in QEMU. Yeah, I think using g_strerror() makes sense. We've just adopted that in libvirt precisely to avoid these platform portability oddities and the really unpleasant API calling convention of strerror_r(). Regards, Daniel
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index c0cb1ec289..85f1908cfe 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -2581,7 +2581,7 @@ static void vfio_populate_device(VFIOPCIDevice *vdev, Error **errp) ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); if (ret) { /* This can fail for an old kernel or legacy PCI dev */ - trace_vfio_populate_device_get_irq_info_failure(); + trace_vfio_populate_device_get_irq_info_failure(errno); } else if (irq_info.count == 1) { vdev->pci_aer = true; } else { diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events index a85e8662ea..6d412afc83 100644 --- a/hw/vfio/trace-events +++ b/hw/vfio/trace-events @@ -37,7 +37,7 @@ vfio_pci_hot_reset_has_dep_devices(const char *name) "%s: hot reset dependent de vfio_pci_hot_reset_dep_devices(int domain, int bus, int slot, int function, int group_id) "\t%04x:%02x:%02x.%x group %d" vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %s" vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx" -vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m" +vfio_populate_device_get_irq_info_failure(int err) "VFIO_DEVICE_GET_IRQ_INFO failure: %d" vfio_realize(const char *name, int group_id) " (%s) group %d" vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d" vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x" diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py index 3478ac93ab..6fca674936 100644 --- a/scripts/tracetool/__init__.py +++ b/scripts/tracetool/__init__.py @@ -274,6 +274,10 @@ class Event(object): props = groups["props"].split() fmt = groups["fmt"] fmt_trans = groups["fmt_trans"] + if fmt.find("%m") != -1 or fmt_trans.find("%m") != -1: + raise ValueError("Event format '%m' is forbidden, pass the error " + "as an explicit trace argument") + if len(fmt_trans) > 0: fmt = [fmt_trans, fmt] args = Arguments.build(groups["args"])
The '%m' format specifier instructs glibc's printf() implementation to insert the contents of strerror(errno). This is not something that should ever be used in trace-events files because several of the backends do not use the format string and so this error information is invisible to them. The errno value should be given as an explicit trace argument instead. Use of '%m' should also be avoided as it is not portable to all QEMU build targets. Signed-off-by: Daniel P. Berrangé <berrange@redhat.com> --- hw/vfio/pci.c | 2 +- hw/vfio/trace-events | 2 +- scripts/tracetool/__init__.py | 4 ++++ 3 files changed, 6 insertions(+), 2 deletions(-)