Message ID | 1534797417-18710-1-git-send-email-kamal@canonical.com |
---|---|
State | New |
Headers | show |
Series | [Xenial,SRU] nvme: avoid cqe corruption when update at the same time as read | expand |
Hi, FWIW, I also backported this patch to Xenial independently, and it looks like we resolved the one conflict in the same way. Regards, Daniel On Tue, Aug 21, 2018 at 6:37 AM Kamal Mostafa <kamal@canonical.com> wrote: > > From: Marta Rybczynska <mrybczyn@kalray.eu> > > BugLink: http://bugs.launchpad.net/bugs/1788035 > > Make sure the CQE phase (validity) is read before the rest of the > structure. The phase bit is the highest address and the CQE > read will happen on most platforms from lower to upper addresses > and will be done by multiple non-atomic loads. If the structure > is updated by PCI during the reads from the processor, the > processor may get a corrupted copy. > > The addition of the new nvme_cqe_valid function that verifies > the validity bit also allows refactoring of the other CQE read > sequences. > > Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu> > Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Reviewed-by: Keith Busch <keith.busch@intel.com> > Signed-off-by: Jens Axboe <axboe@fb.com> > (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276) > Signed-off-by: Kamal Mostafa <kamal@canonical.com> > --- > drivers/nvme/host/pci.c | 24 +++++++++++++----------- > 1 file changed, 13 insertions(+), 11 deletions(-) > > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c > index a755d0e..5e5f065 100644 > --- a/drivers/nvme/host/pci.c > +++ b/drivers/nvme/host/pci.c > @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req) > blk_mq_end_request(req, error); > } > > +/* We read the CQE phase first to check if the rest of the entry is valid */ > +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, > + u16 phase) > +{ > + return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; > +} > + > static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > { > u16 head, phase; > @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > head = nvmeq->cq_head; > phase = nvmeq->cq_phase; > > - for (;;) { > + while (nvme_cqe_valid(nvmeq, head, phase)) { > struct nvme_completion cqe = nvmeq->cqes[head]; > - u16 status = le16_to_cpu(cqe.status); > struct request *req; > > - if ((status & 1) != phase) > - break; > nvmeq->sq_head = le16_to_cpu(cqe.sq_head); > > #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE > @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); > if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) > memcpy(req->special, &cqe, sizeof(cqe)); > - blk_mq_complete_request(req, status >> 1); > + blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); > > } > > @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data) > static irqreturn_t nvme_irq_check(int irq, void *data) > { > struct nvme_queue *nvmeq = data; > - struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; > - if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) > - return IRQ_NONE; > - return IRQ_WAKE_THREAD; > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > } > > static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) > { > struct nvme_queue *nvmeq = hctx->driver_data; > > - if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == > - nvmeq->cq_phase) { > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { > spin_lock_irq(&nvmeq->q_lock); > __nvme_process_cq(nvmeq, &tag); > spin_unlock_irq(&nvmeq->q_lock); > -- > 2.7.4 > > > -- > kernel-team mailing list > kernel-team@lists.ubuntu.com > https://lists.ubuntu.com/mailman/listinfo/kernel-team
On 2018-08-20 13:36:57 , Kamal Mostafa wrote: > From: Marta Rybczynska <mrybczyn@kalray.eu> > > BugLink: http://bugs.launchpad.net/bugs/1788035 > > Make sure the CQE phase (validity) is read before the rest of the > structure. The phase bit is the highest address and the CQE > read will happen on most platforms from lower to upper addresses > and will be done by multiple non-atomic loads. If the structure > is updated by PCI during the reads from the processor, the > processor may get a corrupted copy. > > The addition of the new nvme_cqe_valid function that verifies > the validity bit also allows refactoring of the other CQE read > sequences. > > Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu> > Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Reviewed-by: Keith Busch <keith.busch@intel.com> > Signed-off-by: Jens Axboe <axboe@fb.com> > (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276) > Signed-off-by: Kamal Mostafa <kamal@canonical.com> > --- > drivers/nvme/host/pci.c | 24 +++++++++++++----------- > 1 file changed, 13 insertions(+), 11 deletions(-) > > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c > index a755d0e..5e5f065 100644 > --- a/drivers/nvme/host/pci.c > +++ b/drivers/nvme/host/pci.c > @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req) > blk_mq_end_request(req, error); > } > > +/* We read the CQE phase first to check if the rest of the entry is valid */ > +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, > + u16 phase) > +{ > + return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; > +} > + > static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > { > u16 head, phase; > @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > head = nvmeq->cq_head; > phase = nvmeq->cq_phase; > > - for (;;) { > + while (nvme_cqe_valid(nvmeq, head, phase)) { > struct nvme_completion cqe = nvmeq->cqes[head]; > - u16 status = le16_to_cpu(cqe.status); > struct request *req; > > - if ((status & 1) != phase) > - break; > nvmeq->sq_head = le16_to_cpu(cqe.sq_head); > > #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE > @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); > if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) > memcpy(req->special, &cqe, sizeof(cqe)); > - blk_mq_complete_request(req, status >> 1); > + blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); > > } > > @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data) > static irqreturn_t nvme_irq_check(int irq, void *data) > { > struct nvme_queue *nvmeq = data; > - struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; > - if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) > - return IRQ_NONE; > - return IRQ_WAKE_THREAD; > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > } > > static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) > { > struct nvme_queue *nvmeq = hctx->driver_data; > > - if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == > - nvmeq->cq_phase) { > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { > spin_lock_irq(&nvmeq->q_lock); > __nvme_process_cq(nvmeq, &tag); > spin_unlock_irq(&nvmeq->q_lock); Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
On 08/20/18 22:36, Kamal Mostafa wrote: > From: Marta Rybczynska <mrybczyn@kalray.eu> > > BugLink: http://bugs.launchpad.net/bugs/1788035 > > Make sure the CQE phase (validity) is read before the rest of the > structure. The phase bit is the highest address and the CQE > read will happen on most platforms from lower to upper addresses > and will be done by multiple non-atomic loads. If the structure > is updated by PCI during the reads from the processor, the > processor may get a corrupted copy. > > The addition of the new nvme_cqe_valid function that verifies > the validity bit also allows refactoring of the other CQE read > sequences. > > Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu> > Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Reviewed-by: Keith Busch <keith.busch@intel.com> > Signed-off-by: Jens Axboe <axboe@fb.com> > (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276) > Signed-off-by: Kamal Mostafa <kamal@canonical.com> Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> > --- > drivers/nvme/host/pci.c | 24 +++++++++++++----------- > 1 file changed, 13 insertions(+), 11 deletions(-) > > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c > index a755d0e..5e5f065 100644 > --- a/drivers/nvme/host/pci.c > +++ b/drivers/nvme/host/pci.c > @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req) > blk_mq_end_request(req, error); > } > > +/* We read the CQE phase first to check if the rest of the entry is valid */ > +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, > + u16 phase) > +{ > + return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; > +} > + > static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > { > u16 head, phase; > @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > head = nvmeq->cq_head; > phase = nvmeq->cq_phase; > > - for (;;) { > + while (nvme_cqe_valid(nvmeq, head, phase)) { > struct nvme_completion cqe = nvmeq->cqes[head]; > - u16 status = le16_to_cpu(cqe.status); > struct request *req; > > - if ((status & 1) != phase) > - break; > nvmeq->sq_head = le16_to_cpu(cqe.sq_head); > > #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE > @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); > if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) > memcpy(req->special, &cqe, sizeof(cqe)); > - blk_mq_complete_request(req, status >> 1); > + blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); > > } > > @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data) > static irqreturn_t nvme_irq_check(int irq, void *data) > { > struct nvme_queue *nvmeq = data; > - struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; > - if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) > - return IRQ_NONE; > - return IRQ_WAKE_THREAD; > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > } > > static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) > { > struct nvme_queue *nvmeq = hctx->driver_data; > > - if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == > - nvmeq->cq_phase) { > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { > spin_lock_irq(&nvmeq->q_lock); > __nvme_process_cq(nvmeq, &tag); > spin_unlock_irq(&nvmeq->q_lock); >
..to xenial/master-next On 2018-08-20 13:36:57 , Kamal Mostafa wrote: > From: Marta Rybczynska <mrybczyn@kalray.eu> > > BugLink: http://bugs.launchpad.net/bugs/1788035 > > Make sure the CQE phase (validity) is read before the rest of the > structure. The phase bit is the highest address and the CQE > read will happen on most platforms from lower to upper addresses > and will be done by multiple non-atomic loads. If the structure > is updated by PCI during the reads from the processor, the > processor may get a corrupted copy. > > The addition of the new nvme_cqe_valid function that verifies > the validity bit also allows refactoring of the other CQE read > sequences. > > Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu> > Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> > Reviewed-by: Christoph Hellwig <hch@lst.de> > Reviewed-by: Keith Busch <keith.busch@intel.com> > Signed-off-by: Jens Axboe <axboe@fb.com> > (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276) > Signed-off-by: Kamal Mostafa <kamal@canonical.com> > --- > drivers/nvme/host/pci.c | 24 +++++++++++++----------- > 1 file changed, 13 insertions(+), 11 deletions(-) > > diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c > index a755d0e..5e5f065 100644 > --- a/drivers/nvme/host/pci.c > +++ b/drivers/nvme/host/pci.c > @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req) > blk_mq_end_request(req, error); > } > > +/* We read the CQE phase first to check if the rest of the entry is valid */ > +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, > + u16 phase) > +{ > + return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; > +} > + > static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > { > u16 head, phase; > @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > head = nvmeq->cq_head; > phase = nvmeq->cq_phase; > > - for (;;) { > + while (nvme_cqe_valid(nvmeq, head, phase)) { > struct nvme_completion cqe = nvmeq->cqes[head]; > - u16 status = le16_to_cpu(cqe.status); > struct request *req; > > - if ((status & 1) != phase) > - break; > nvmeq->sq_head = le16_to_cpu(cqe.sq_head); > > #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE > @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) > req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); > if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) > memcpy(req->special, &cqe, sizeof(cqe)); > - blk_mq_complete_request(req, status >> 1); > + blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); > > } > > @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data) > static irqreturn_t nvme_irq_check(int irq, void *data) > { > struct nvme_queue *nvmeq = data; > - struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; > - if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) > - return IRQ_NONE; > - return IRQ_WAKE_THREAD; > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) > + return IRQ_WAKE_THREAD; > + return IRQ_NONE; > } > > static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) > { > struct nvme_queue *nvmeq = hctx->driver_data; > > - if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == > - nvmeq->cq_phase) { > + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { > spin_lock_irq(&nvmeq->q_lock); > __nvme_process_cq(nvmeq, &tag); > spin_unlock_irq(&nvmeq->q_lock); > -- > 2.7.4 > > > -- > kernel-team mailing list > kernel-team@lists.ubuntu.com > https://lists.ubuntu.com/mailman/listinfo/kernel-team
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a755d0e..5e5f065 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req) blk_mq_end_request(req, error); } +/* We read the CQE phase first to check if the rest of the entry is valid */ +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head, + u16 phase) +{ + return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase; +} + static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) { u16 head, phase; @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) head = nvmeq->cq_head; phase = nvmeq->cq_phase; - for (;;) { + while (nvme_cqe_valid(nvmeq, head, phase)) { struct nvme_completion cqe = nvmeq->cqes[head]; - u16 status = le16_to_cpu(cqe.status); struct request *req; - if ((status & 1) != phase) - break; nvmeq->sq_head = le16_to_cpu(cqe.sq_head); #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id); if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special) memcpy(req->special, &cqe, sizeof(cqe)); - blk_mq_complete_request(req, status >> 1); + blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1); } @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data) static irqreturn_t nvme_irq_check(int irq, void *data) { struct nvme_queue *nvmeq = data; - struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head]; - if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase) - return IRQ_NONE; - return IRQ_WAKE_THREAD; + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) + return IRQ_WAKE_THREAD; + return IRQ_NONE; } static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) { struct nvme_queue *nvmeq = hctx->driver_data; - if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == - nvmeq->cq_phase) { + if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { spin_lock_irq(&nvmeq->q_lock); __nvme_process_cq(nvmeq, &tag); spin_unlock_irq(&nvmeq->q_lock);