Message ID | 20210223132300.2651-2-tim.gardner@canonical.com |
---|---|
State | New |
Headers | show |
Series | xen/events: avoid removing an event channel while handling it | expand |
Acked-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
On 23.02.21 14:23, Tim Gardner wrote: > From: Juergen Gross <jgross@suse.com> > > Today it can happen that an event channel is being removed from the > system while the event handling loop is active. This can lead to a > race resulting in crashes or WARN() splats when trying to access the > irq_info structure related to the event channel. > > Fix this problem by using a rwlock taken as reader in the event > handling loop and as writer when deallocating the irq_info structure. > > As the observed problem was a NULL dereference in evtchn_from_irq() > make this function more robust against races by testing the irq_info > pointer to be not NULL before dereferencing it. > > And finally make all accesses to evtchn_to_irq[row][col] atomic ones > in order to avoid seeing partial updates of an array element in irq > handling. Note that irq handling can be entered only for event channels > which have been valid before, so any not populated row isn't a problem > in this regard, as rows are only ever added and never removed. > > This is XSA-331. > > Cc: stable@vger.kernel.org > Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> > Reported-by: Jinoh Kang <luke1337@theori.io> > Signed-off-by: Juergen Gross <jgross@suse.com> > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> > Reviewed-by: Wei Liu <wl@xen.org> > (backported from commit 073d0552ead5bfc7a3a9c01de590e924f11b5dd2) [rtg: Minor context adjustment in xen_free_irq()] > Signed-off-by: Tim Gardner <tim.gardner@canonical.com> Acked-by: Stefan Bader <stefan.bader@canonical.com> > > Conflicts: > drivers/xen/events/events_base.c > --- I give up. Why above is not possible, I cannot understand. -Stefan > drivers/xen/events/events_base.c | 41 ++++++++++++++++++++++++++++---- > 1 file changed, 36 insertions(+), 5 deletions(-) > > diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c > index 499eff7d3f65..c56fc81a409f 100644 > --- a/drivers/xen/events/events_base.c > +++ b/drivers/xen/events/events_base.c > @@ -33,6 +33,7 @@ > #include <linux/slab.h> > #include <linux/irqnr.h> > #include <linux/pci.h> > +#include <linux/spinlock.h> > > #ifdef CONFIG_X86 > #include <asm/desc.h> > @@ -70,6 +71,23 @@ const struct evtchn_ops *evtchn_ops; > */ > static DEFINE_MUTEX(irq_mapping_update_lock); > > +/* > + * Lock protecting event handling loop against removing event channels. > + * Adding of event channels is no issue as the associated IRQ becomes active > + * only after everything is setup (before request_[threaded_]irq() the handler > + * can't be entered for an event, as the event channel will be unmasked only > + * then). > + */ > +static DEFINE_RWLOCK(evtchn_rwlock); > + > +/* > + * Lock hierarchy: > + * > + * irq_mapping_update_lock > + * evtchn_rwlock > + * IRQ-desc lock > + */ > + > static LIST_HEAD(xen_irq_list_head); > > /* IRQ <-> VIRQ mapping. */ > @@ -102,7 +120,7 @@ static void clear_evtchn_to_irq_row(unsigned row) > unsigned col; > > for (col = 0; col < EVTCHN_PER_ROW; col++) > - evtchn_to_irq[row][col] = -1; > + WRITE_ONCE(evtchn_to_irq[row][col], -1); > } > > static void clear_evtchn_to_irq_all(void) > @@ -139,7 +157,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) > clear_evtchn_to_irq_row(row); > } > > - evtchn_to_irq[row][col] = irq; > + WRITE_ONCE(evtchn_to_irq[row][col], irq); > return 0; > } > > @@ -149,7 +167,7 @@ int get_evtchn_to_irq(unsigned evtchn) > return -1; > if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) > return -1; > - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; > + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); > } > > /* Get info for IRQ */ > @@ -247,10 +265,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) > */ > unsigned int evtchn_from_irq(unsigned irq) > { > - if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) > + const struct irq_info *info = NULL; > + > + if (likely(irq < nr_irqs)) > + info = info_for_irq(irq); > + if (!info) > return 0; > > - return info_for_irq(irq)->evtchn; > + return info->evtchn; > } > > unsigned irq_from_evtchn(unsigned int evtchn) > @@ -426,16 +448,21 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) > static void xen_free_irq(unsigned irq) > { > struct irq_info *info = irq_get_handler_data(irq); > + unsigned long flags; > > if (WARN_ON(!info)) > return; > > + write_lock_irqsave(&evtchn_rwlock, flags); > + > list_del(&info->list); > > irq_set_handler_data(irq, NULL); > > WARN_ON(info->refcnt > 0); > > + write_unlock_irqrestore(&evtchn_rwlock, flags); > + > kfree(info); > > /* Legacy IRQ descriptors are managed by the arch. */ > @@ -1218,6 +1245,8 @@ static void __xen_evtchn_do_upcall(void) > struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); > int cpu = smp_processor_id(); > > + read_lock(&evtchn_rwlock); > + > do { > vcpu_info->evtchn_upcall_pending = 0; > > @@ -1228,6 +1257,8 @@ static void __xen_evtchn_do_upcall(void) > virt_rmb(); /* Hypervisor can set upcall pending. */ > > } while (vcpu_info->evtchn_upcall_pending); > + > + read_unlock(&evtchn_rwlock); > } > > void xen_evtchn_do_upcall(struct pt_regs *regs) >
On 2/23/21 7:55 AM, Stefan Bader wrote: > On 23.02.21 14:23, Tim Gardner wrote: >> From: Juergen Gross <jgross@suse.com> >> >> Today it can happen that an event channel is being removed from the >> system while the event handling loop is active. This can lead to a >> race resulting in crashes or WARN() splats when trying to access the >> irq_info structure related to the event channel. >> >> Fix this problem by using a rwlock taken as reader in the event >> handling loop and as writer when deallocating the irq_info structure. >> >> As the observed problem was a NULL dereference in evtchn_from_irq() >> make this function more robust against races by testing the irq_info >> pointer to be not NULL before dereferencing it. >> >> And finally make all accesses to evtchn_to_irq[row][col] atomic ones >> in order to avoid seeing partial updates of an array element in irq >> handling. Note that irq handling can be entered only for event channels >> which have been valid before, so any not populated row isn't a problem >> in this regard, as rows are only ever added and never removed. >> >> This is XSA-331. >> >> Cc: stable@vger.kernel.org >> Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> >> Reported-by: Jinoh Kang <luke1337@theori.io> >> Signed-off-by: Juergen Gross <jgross@suse.com> >> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> >> Reviewed-by: Wei Liu <wl@xen.org> >> (backported from commit 073d0552ead5bfc7a3a9c01de590e924f11b5dd2) > [rtg: Minor context adjustment in xen_free_irq()] >> Signed-off-by: Tim Gardner <tim.gardner@canonical.com> > Acked-by: Stefan Bader <stefan.bader@canonical.com> >> >> Conflicts: >> drivers/xen/events/events_base.c >> --- > > I give up. Why above is not possible, I cannot understand. > I have no idea what you want here. As I pointed out above, the conflict was a minor context adjustment in xen_free_irq(). How can I better describe what I did ? > -Stefan > >> drivers/xen/events/events_base.c | 41 ++++++++++++++++++++++++++++---- >> 1 file changed, 36 insertions(+), 5 deletions(-) >> >> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c >> index 499eff7d3f65..c56fc81a409f 100644 >> --- a/drivers/xen/events/events_base.c >> +++ b/drivers/xen/events/events_base.c >> @@ -33,6 +33,7 @@ >> #include <linux/slab.h> >> #include <linux/irqnr.h> >> #include <linux/pci.h> >> +#include <linux/spinlock.h> >> >> #ifdef CONFIG_X86 >> #include <asm/desc.h> >> @@ -70,6 +71,23 @@ const struct evtchn_ops *evtchn_ops; >> */ >> static DEFINE_MUTEX(irq_mapping_update_lock); >> >> +/* >> + * Lock protecting event handling loop against removing event channels. >> + * Adding of event channels is no issue as the associated IRQ becomes active >> + * only after everything is setup (before request_[threaded_]irq() the handler >> + * can't be entered for an event, as the event channel will be unmasked only >> + * then). >> + */ >> +static DEFINE_RWLOCK(evtchn_rwlock); >> + >> +/* >> + * Lock hierarchy: >> + * >> + * irq_mapping_update_lock >> + * evtchn_rwlock >> + * IRQ-desc lock >> + */ >> + >> static LIST_HEAD(xen_irq_list_head); >> >> /* IRQ <-> VIRQ mapping. */ >> @@ -102,7 +120,7 @@ static void clear_evtchn_to_irq_row(unsigned row) >> unsigned col; >> >> for (col = 0; col < EVTCHN_PER_ROW; col++) >> - evtchn_to_irq[row][col] = -1; >> + WRITE_ONCE(evtchn_to_irq[row][col], -1); >> } >> >> static void clear_evtchn_to_irq_all(void) >> @@ -139,7 +157,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) >> clear_evtchn_to_irq_row(row); >> } >> >> - evtchn_to_irq[row][col] = irq; >> + WRITE_ONCE(evtchn_to_irq[row][col], irq); >> return 0; >> } >> >> @@ -149,7 +167,7 @@ int get_evtchn_to_irq(unsigned evtchn) >> return -1; >> if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) >> return -1; >> - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; >> + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); >> } >> >> /* Get info for IRQ */ >> @@ -247,10 +265,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) >> */ >> unsigned int evtchn_from_irq(unsigned irq) >> { >> - if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) >> + const struct irq_info *info = NULL; >> + >> + if (likely(irq < nr_irqs)) >> + info = info_for_irq(irq); >> + if (!info) >> return 0; >> >> - return info_for_irq(irq)->evtchn; >> + return info->evtchn; >> } >> >> unsigned irq_from_evtchn(unsigned int evtchn) >> @@ -426,16 +448,21 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) >> static void xen_free_irq(unsigned irq) >> { >> struct irq_info *info = irq_get_handler_data(irq); >> + unsigned long flags; >> >> if (WARN_ON(!info)) >> return; >> >> + write_lock_irqsave(&evtchn_rwlock, flags); >> + >> list_del(&info->list); >> >> irq_set_handler_data(irq, NULL); >> >> WARN_ON(info->refcnt > 0); >> >> + write_unlock_irqrestore(&evtchn_rwlock, flags); >> + >> kfree(info); >> >> /* Legacy IRQ descriptors are managed by the arch. */ >> @@ -1218,6 +1245,8 @@ static void __xen_evtchn_do_upcall(void) >> struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); >> int cpu = smp_processor_id(); >> >> + read_lock(&evtchn_rwlock); >> + >> do { >> vcpu_info->evtchn_upcall_pending = 0; >> >> @@ -1228,6 +1257,8 @@ static void __xen_evtchn_do_upcall(void) >> virt_rmb(); /* Hypervisor can set upcall pending. */ >> >> } while (vcpu_info->evtchn_upcall_pending); >> + >> + read_unlock(&evtchn_rwlock); >> } >> >> void xen_evtchn_do_upcall(struct pt_regs *regs) >> > >
On 23.02.21 15:58, Tim Gardner wrote: > > > On 2/23/21 7:55 AM, Stefan Bader wrote: >> On 23.02.21 14:23, Tim Gardner wrote: >>> From: Juergen Gross <jgross@suse.com> >>> >>> Today it can happen that an event channel is being removed from the >>> system while the event handling loop is active. This can lead to a >>> race resulting in crashes or WARN() splats when trying to access the >>> irq_info structure related to the event channel. >>> >>> Fix this problem by using a rwlock taken as reader in the event >>> handling loop and as writer when deallocating the irq_info structure. >>> >>> As the observed problem was a NULL dereference in evtchn_from_irq() >>> make this function more robust against races by testing the irq_info >>> pointer to be not NULL before dereferencing it. >>> >>> And finally make all accesses to evtchn_to_irq[row][col] atomic ones >>> in order to avoid seeing partial updates of an array element in irq >>> handling. Note that irq handling can be entered only for event channels >>> which have been valid before, so any not populated row isn't a problem >>> in this regard, as rows are only ever added and never removed. >>> >>> This is XSA-331. >>> >>> Cc: stable@vger.kernel.org >>> Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> >>> Reported-by: Jinoh Kang <luke1337@theori.io> >>> Signed-off-by: Juergen Gross <jgross@suse.com> >>> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> >>> Reviewed-by: Wei Liu <wl@xen.org> >>> (backported from commit 073d0552ead5bfc7a3a9c01de590e924f11b5dd2) >> [rtg: Minor context adjustment in xen_free_irq()] ^ Format, just like upstream does it... >>> Signed-off-by: Tim Gardner <tim.gardner@canonical.com> >> Acked-by: Stefan Bader <stefan.bader@canonical.com> >>> >>> Conflicts: >>> drivers/xen/events/events_base.c >>> --- >> >> I give up. Why above is not possible, I cannot understand. >> > > I have no idea what you want here. As I pointed out above, the conflict was a > minor context adjustment in xen_free_irq(). How can I better describe what I did ? > >> -Stefan >> >>> drivers/xen/events/events_base.c | 41 ++++++++++++++++++++++++++++---- >>> 1 file changed, 36 insertions(+), 5 deletions(-) >>> >>> diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c >>> index 499eff7d3f65..c56fc81a409f 100644 >>> --- a/drivers/xen/events/events_base.c >>> +++ b/drivers/xen/events/events_base.c >>> @@ -33,6 +33,7 @@ >>> #include <linux/slab.h> >>> #include <linux/irqnr.h> >>> #include <linux/pci.h> >>> +#include <linux/spinlock.h> >>> #ifdef CONFIG_X86 >>> #include <asm/desc.h> >>> @@ -70,6 +71,23 @@ const struct evtchn_ops *evtchn_ops; >>> */ >>> static DEFINE_MUTEX(irq_mapping_update_lock); >>> +/* >>> + * Lock protecting event handling loop against removing event channels. >>> + * Adding of event channels is no issue as the associated IRQ becomes active >>> + * only after everything is setup (before request_[threaded_]irq() the handler >>> + * can't be entered for an event, as the event channel will be unmasked only >>> + * then). >>> + */ >>> +static DEFINE_RWLOCK(evtchn_rwlock); >>> + >>> +/* >>> + * Lock hierarchy: >>> + * >>> + * irq_mapping_update_lock >>> + * evtchn_rwlock >>> + * IRQ-desc lock >>> + */ >>> + >>> static LIST_HEAD(xen_irq_list_head); >>> /* IRQ <-> VIRQ mapping. */ >>> @@ -102,7 +120,7 @@ static void clear_evtchn_to_irq_row(unsigned row) >>> unsigned col; >>> for (col = 0; col < EVTCHN_PER_ROW; col++) >>> - evtchn_to_irq[row][col] = -1; >>> + WRITE_ONCE(evtchn_to_irq[row][col], -1); >>> } >>> static void clear_evtchn_to_irq_all(void) >>> @@ -139,7 +157,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) >>> clear_evtchn_to_irq_row(row); >>> } >>> - evtchn_to_irq[row][col] = irq; >>> + WRITE_ONCE(evtchn_to_irq[row][col], irq); >>> return 0; >>> } >>> @@ -149,7 +167,7 @@ int get_evtchn_to_irq(unsigned evtchn) >>> return -1; >>> if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) >>> return -1; >>> - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; >>> + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); >>> } >>> /* Get info for IRQ */ >>> @@ -247,10 +265,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) >>> */ >>> unsigned int evtchn_from_irq(unsigned irq) >>> { >>> - if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) >>> + const struct irq_info *info = NULL; >>> + >>> + if (likely(irq < nr_irqs)) >>> + info = info_for_irq(irq); >>> + if (!info) >>> return 0; >>> - return info_for_irq(irq)->evtchn; >>> + return info->evtchn; >>> } >>> unsigned irq_from_evtchn(unsigned int evtchn) >>> @@ -426,16 +448,21 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) >>> static void xen_free_irq(unsigned irq) >>> { >>> struct irq_info *info = irq_get_handler_data(irq); >>> + unsigned long flags; >>> if (WARN_ON(!info)) >>> return; >>> + write_lock_irqsave(&evtchn_rwlock, flags); >>> + >>> list_del(&info->list); >>> irq_set_handler_data(irq, NULL); >>> WARN_ON(info->refcnt > 0); >>> + write_unlock_irqrestore(&evtchn_rwlock, flags); >>> + >>> kfree(info); >>> /* Legacy IRQ descriptors are managed by the arch. */ >>> @@ -1218,6 +1245,8 @@ static void __xen_evtchn_do_upcall(void) >>> struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); >>> int cpu = smp_processor_id(); >>> + read_lock(&evtchn_rwlock); >>> + >>> do { >>> vcpu_info->evtchn_upcall_pending = 0; >>> @@ -1228,6 +1257,8 @@ static void __xen_evtchn_do_upcall(void) >>> virt_rmb(); /* Hypervisor can set upcall pending. */ >>> } while (vcpu_info->evtchn_upcall_pending); >>> + >>> + read_unlock(&evtchn_rwlock); >>> } >>> void xen_evtchn_do_upcall(struct pt_regs *regs) >>> >> >> >
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 499eff7d3f65..c56fc81a409f 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -33,6 +33,7 @@ #include <linux/slab.h> #include <linux/irqnr.h> #include <linux/pci.h> +#include <linux/spinlock.h> #ifdef CONFIG_X86 #include <asm/desc.h> @@ -70,6 +71,23 @@ const struct evtchn_ops *evtchn_ops; */ static DEFINE_MUTEX(irq_mapping_update_lock); +/* + * Lock protecting event handling loop against removing event channels. + * Adding of event channels is no issue as the associated IRQ becomes active + * only after everything is setup (before request_[threaded_]irq() the handler + * can't be entered for an event, as the event channel will be unmasked only + * then). + */ +static DEFINE_RWLOCK(evtchn_rwlock); + +/* + * Lock hierarchy: + * + * irq_mapping_update_lock + * evtchn_rwlock + * IRQ-desc lock + */ + static LIST_HEAD(xen_irq_list_head); /* IRQ <-> VIRQ mapping. */ @@ -102,7 +120,7 @@ static void clear_evtchn_to_irq_row(unsigned row) unsigned col; for (col = 0; col < EVTCHN_PER_ROW; col++) - evtchn_to_irq[row][col] = -1; + WRITE_ONCE(evtchn_to_irq[row][col], -1); } static void clear_evtchn_to_irq_all(void) @@ -139,7 +157,7 @@ static int set_evtchn_to_irq(unsigned evtchn, unsigned irq) clear_evtchn_to_irq_row(row); } - evtchn_to_irq[row][col] = irq; + WRITE_ONCE(evtchn_to_irq[row][col], irq); return 0; } @@ -149,7 +167,7 @@ int get_evtchn_to_irq(unsigned evtchn) return -1; if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL) return -1; - return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]; + return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]); } /* Get info for IRQ */ @@ -247,10 +265,14 @@ static void xen_irq_info_cleanup(struct irq_info *info) */ unsigned int evtchn_from_irq(unsigned irq) { - if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq)) + const struct irq_info *info = NULL; + + if (likely(irq < nr_irqs)) + info = info_for_irq(irq); + if (!info) return 0; - return info_for_irq(irq)->evtchn; + return info->evtchn; } unsigned irq_from_evtchn(unsigned int evtchn) @@ -426,16 +448,21 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) static void xen_free_irq(unsigned irq) { struct irq_info *info = irq_get_handler_data(irq); + unsigned long flags; if (WARN_ON(!info)) return; + write_lock_irqsave(&evtchn_rwlock, flags); + list_del(&info->list); irq_set_handler_data(irq, NULL); WARN_ON(info->refcnt > 0); + write_unlock_irqrestore(&evtchn_rwlock, flags); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -1218,6 +1245,8 @@ static void __xen_evtchn_do_upcall(void) struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); int cpu = smp_processor_id(); + read_lock(&evtchn_rwlock); + do { vcpu_info->evtchn_upcall_pending = 0; @@ -1228,6 +1257,8 @@ static void __xen_evtchn_do_upcall(void) virt_rmb(); /* Hypervisor can set upcall pending. */ } while (vcpu_info->evtchn_upcall_pending); + + read_unlock(&evtchn_rwlock); } void xen_evtchn_do_upcall(struct pt_regs *regs)