diff mbox series

[Xenial,SRU] nvme: avoid cqe corruption when update at the same time as read

Message ID 1534797417-18710-1-git-send-email-kamal@canonical.com
State New
Headers show
Series [Xenial,SRU] nvme: avoid cqe corruption when update at the same time as read | expand

Commit Message

Kamal Mostafa Aug. 20, 2018, 8:36 p.m. UTC
From: Marta Rybczynska <mrybczyn@kalray.eu>

BugLink: http://bugs.launchpad.net/bugs/1788035

Make sure the CQE phase (validity) is read before the rest of the
structure. The phase bit is the highest address and the CQE
read will happen on most platforms from lower to upper addresses
and will be done by multiple non-atomic loads. If the structure
is updated by PCI during the reads from the processor, the
processor may get a corrupted copy.

The addition of the new nvme_cqe_valid function that verifies
the validity bit also allows refactoring of the other CQE read
sequences.

Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
(backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276)
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
---
 drivers/nvme/host/pci.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

Comments

Daniel Axtens Aug. 21, 2018, 1:54 a.m. UTC | #1
Hi,

FWIW, I also backported this patch to Xenial independently, and it
looks like we resolved the one conflict in the same way.

Regards,
Daniel
On Tue, Aug 21, 2018 at 6:37 AM Kamal Mostafa <kamal@canonical.com> wrote:
>
> From: Marta Rybczynska <mrybczyn@kalray.eu>
>
> BugLink: http://bugs.launchpad.net/bugs/1788035
>
> Make sure the CQE phase (validity) is read before the rest of the
> structure. The phase bit is the highest address and the CQE
> read will happen on most platforms from lower to upper addresses
> and will be done by multiple non-atomic loads. If the structure
> is updated by PCI during the reads from the processor, the
> processor may get a corrupted copy.
>
> The addition of the new nvme_cqe_valid function that verifies
> the validity bit also allows refactoring of the other CQE read
> sequences.
>
> Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Keith Busch <keith.busch@intel.com>
> Signed-off-by: Jens Axboe <axboe@fb.com>
> (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276)
> Signed-off-by: Kamal Mostafa <kamal@canonical.com>
> ---
>  drivers/nvme/host/pci.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index a755d0e..5e5f065 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req)
>         blk_mq_end_request(req, error);
>  }
>
> +/* We read the CQE phase first to check if the rest of the entry is valid */
> +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
> +               u16 phase)
> +{
> +       return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
> +}
> +
>  static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  {
>         u16 head, phase;
> @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>         head = nvmeq->cq_head;
>         phase = nvmeq->cq_phase;
>
> -       for (;;) {
> +       while (nvme_cqe_valid(nvmeq, head, phase)) {
>                 struct nvme_completion cqe = nvmeq->cqes[head];
> -               u16 status = le16_to_cpu(cqe.status);
>                 struct request *req;
>
> -               if ((status & 1) != phase)
> -                       break;
>                 nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
>
>  #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE
> @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>                 req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
>                 if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
>                         memcpy(req->special, &cqe, sizeof(cqe));
> -               blk_mq_complete_request(req, status >> 1);
> +               blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
>
>         }
>
> @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
>  static irqreturn_t nvme_irq_check(int irq, void *data)
>  {
>         struct nvme_queue *nvmeq = data;
> -       struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
> -       if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
> -               return IRQ_NONE;
> -       return IRQ_WAKE_THREAD;
> +       if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
> +               return IRQ_WAKE_THREAD;
> +       return IRQ_NONE;
>  }
>
>  static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
>  {
>         struct nvme_queue *nvmeq = hctx->driver_data;
>
> -       if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
> -           nvmeq->cq_phase) {
> +       if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
>                 spin_lock_irq(&nvmeq->q_lock);
>                 __nvme_process_cq(nvmeq, &tag);
>                 spin_unlock_irq(&nvmeq->q_lock);
> --
> 2.7.4
>
>
> --
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
Khalid Elmously Aug. 22, 2018, 12:46 a.m. UTC | #2
On 2018-08-20 13:36:57 , Kamal Mostafa wrote:
> From: Marta Rybczynska <mrybczyn@kalray.eu>
> 
> BugLink: http://bugs.launchpad.net/bugs/1788035
> 
> Make sure the CQE phase (validity) is read before the rest of the
> structure. The phase bit is the highest address and the CQE
> read will happen on most platforms from lower to upper addresses
> and will be done by multiple non-atomic loads. If the structure
> is updated by PCI during the reads from the processor, the
> processor may get a corrupted copy.
> 
> The addition of the new nvme_cqe_valid function that verifies
> the validity bit also allows refactoring of the other CQE read
> sequences.
> 
> Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Keith Busch <keith.busch@intel.com>
> Signed-off-by: Jens Axboe <axboe@fb.com>
> (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276)
> Signed-off-by: Kamal Mostafa <kamal@canonical.com>
> ---
>  drivers/nvme/host/pci.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index a755d0e..5e5f065 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req)
>  	blk_mq_end_request(req, error);
>  }
>  
> +/* We read the CQE phase first to check if the rest of the entry is valid */
> +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
> +		u16 phase)
> +{
> +	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
> +}
> +
>  static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  {
>  	u16 head, phase;
> @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  	head = nvmeq->cq_head;
>  	phase = nvmeq->cq_phase;
>  
> -	for (;;) {
> +	while (nvme_cqe_valid(nvmeq, head, phase)) {
>  		struct nvme_completion cqe = nvmeq->cqes[head];
> -		u16 status = le16_to_cpu(cqe.status);
>  		struct request *req;
>  
> -		if ((status & 1) != phase)
> -			break;
>  		nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
>  
>  #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE
> @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
>  		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
>  			memcpy(req->special, &cqe, sizeof(cqe));
> -		blk_mq_complete_request(req, status >> 1);
> +		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
>  
>  	}
>  
> @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
>  static irqreturn_t nvme_irq_check(int irq, void *data)
>  {
>  	struct nvme_queue *nvmeq = data;
> -	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
> -	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
> -		return IRQ_NONE;
> -	return IRQ_WAKE_THREAD;
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
>  }
>  
>  static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
>  {
>  	struct nvme_queue *nvmeq = hctx->driver_data;
>  
> -	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
> -	    nvmeq->cq_phase) {
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
>  		spin_lock_irq(&nvmeq->q_lock);
>  		__nvme_process_cq(nvmeq, &tag);
>  		spin_unlock_irq(&nvmeq->q_lock);

Acked-by: Khalid Elmously <khalid.elmously@canonical.com>
Kleber Sacilotto de Souza Aug. 22, 2018, 6:46 a.m. UTC | #3
On 08/20/18 22:36, Kamal Mostafa wrote:
> From: Marta Rybczynska <mrybczyn@kalray.eu>
> 
> BugLink: http://bugs.launchpad.net/bugs/1788035
> 
> Make sure the CQE phase (validity) is read before the rest of the
> structure. The phase bit is the highest address and the CQE
> read will happen on most platforms from lower to upper addresses
> and will be done by multiple non-atomic loads. If the structure
> is updated by PCI during the reads from the processor, the
> processor may get a corrupted copy.
> 
> The addition of the new nvme_cqe_valid function that verifies
> the validity bit also allows refactoring of the other CQE read
> sequences.
> 
> Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Keith Busch <keith.busch@intel.com>
> Signed-off-by: Jens Axboe <axboe@fb.com>
> (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276)
> Signed-off-by: Kamal Mostafa <kamal@canonical.com>

Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

> ---
>  drivers/nvme/host/pci.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index a755d0e..5e5f065 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req)
>  	blk_mq_end_request(req, error);
>  }
>  
> +/* We read the CQE phase first to check if the rest of the entry is valid */
> +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
> +		u16 phase)
> +{
> +	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
> +}
> +
>  static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  {
>  	u16 head, phase;
> @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  	head = nvmeq->cq_head;
>  	phase = nvmeq->cq_phase;
>  
> -	for (;;) {
> +	while (nvme_cqe_valid(nvmeq, head, phase)) {
>  		struct nvme_completion cqe = nvmeq->cqes[head];
> -		u16 status = le16_to_cpu(cqe.status);
>  		struct request *req;
>  
> -		if ((status & 1) != phase)
> -			break;
>  		nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
>  
>  #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE
> @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
>  		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
>  			memcpy(req->special, &cqe, sizeof(cqe));
> -		blk_mq_complete_request(req, status >> 1);
> +		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
>  
>  	}
>  
> @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
>  static irqreturn_t nvme_irq_check(int irq, void *data)
>  {
>  	struct nvme_queue *nvmeq = data;
> -	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
> -	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
> -		return IRQ_NONE;
> -	return IRQ_WAKE_THREAD;
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
>  }
>  
>  static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
>  {
>  	struct nvme_queue *nvmeq = hctx->driver_data;
>  
> -	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
> -	    nvmeq->cq_phase) {
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
>  		spin_lock_irq(&nvmeq->q_lock);
>  		__nvme_process_cq(nvmeq, &tag);
>  		spin_unlock_irq(&nvmeq->q_lock);
>
Khalid Elmously Aug. 23, 2018, 5:27 a.m. UTC | #4
..to xenial/master-next


On 2018-08-20 13:36:57 , Kamal Mostafa wrote:
> From: Marta Rybczynska <mrybczyn@kalray.eu>
> 
> BugLink: http://bugs.launchpad.net/bugs/1788035
> 
> Make sure the CQE phase (validity) is read before the rest of the
> structure. The phase bit is the highest address and the CQE
> read will happen on most platforms from lower to upper addresses
> and will be done by multiple non-atomic loads. If the structure
> is updated by PCI during the reads from the processor, the
> processor may get a corrupted copy.
> 
> The addition of the new nvme_cqe_valid function that verifies
> the validity bit also allows refactoring of the other CQE read
> sequences.
> 
> Signed-off-by: Marta Rybczynska <marta.rybczynska@kalray.eu>
> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Reviewed-by: Keith Busch <keith.busch@intel.com>
> Signed-off-by: Jens Axboe <axboe@fb.com>
> (backported from commit d783e0bd02e700e7a893ef4fa71c69438ac1c276)
> Signed-off-by: Kamal Mostafa <kamal@canonical.com>
> ---
>  drivers/nvme/host/pci.c | 24 +++++++++++++-----------
>  1 file changed, 13 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
> index a755d0e..5e5f065 100644
> --- a/drivers/nvme/host/pci.c
> +++ b/drivers/nvme/host/pci.c
> @@ -815,6 +815,13 @@ static void nvme_complete_rq(struct request *req)
>  	blk_mq_end_request(req, error);
>  }
>  
> +/* We read the CQE phase first to check if the rest of the entry is valid */
> +static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
> +		u16 phase)
> +{
> +	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
> +}
> +
>  static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  {
>  	u16 head, phase;
> @@ -822,13 +829,10 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  	head = nvmeq->cq_head;
>  	phase = nvmeq->cq_phase;
>  
> -	for (;;) {
> +	while (nvme_cqe_valid(nvmeq, head, phase)) {
>  		struct nvme_completion cqe = nvmeq->cqes[head];
> -		u16 status = le16_to_cpu(cqe.status);
>  		struct request *req;
>  
> -		if ((status & 1) != phase)
> -			break;
>  		nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
>  
>  #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE
> @@ -866,7 +870,7 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
>  		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
>  		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
>  			memcpy(req->special, &cqe, sizeof(cqe));
> -		blk_mq_complete_request(req, status >> 1);
> +		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
>  
>  	}
>  
> @@ -914,18 +918,16 @@ static irqreturn_t nvme_irq(int irq, void *data)
>  static irqreturn_t nvme_irq_check(int irq, void *data)
>  {
>  	struct nvme_queue *nvmeq = data;
> -	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
> -	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
> -		return IRQ_NONE;
> -	return IRQ_WAKE_THREAD;
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
> +		return IRQ_WAKE_THREAD;
> +	return IRQ_NONE;
>  }
>  
>  static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
>  {
>  	struct nvme_queue *nvmeq = hctx->driver_data;
>  
> -	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
> -	    nvmeq->cq_phase) {
> +	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
>  		spin_lock_irq(&nvmeq->q_lock);
>  		__nvme_process_cq(nvmeq, &tag);
>  		spin_unlock_irq(&nvmeq->q_lock);
> -- 
> 2.7.4
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
diff mbox series

Patch

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a755d0e..5e5f065 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -815,6 +815,13 @@  static void nvme_complete_rq(struct request *req)
 	blk_mq_end_request(req, error);
 }
 
+/* We read the CQE phase first to check if the rest of the entry is valid */
+static inline bool nvme_cqe_valid(struct nvme_queue *nvmeq, u16 head,
+		u16 phase)
+{
+	return (le16_to_cpu(nvmeq->cqes[head].status) & 1) == phase;
+}
+
 static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 {
 	u16 head, phase;
@@ -822,13 +829,10 @@  static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 	head = nvmeq->cq_head;
 	phase = nvmeq->cq_phase;
 
-	for (;;) {
+	while (nvme_cqe_valid(nvmeq, head, phase)) {
 		struct nvme_completion cqe = nvmeq->cqes[head];
-		u16 status = le16_to_cpu(cqe.status);
 		struct request *req;
 
-		if ((status & 1) != phase)
-			break;
 		nvmeq->sq_head = le16_to_cpu(cqe.sq_head);
 
 #ifdef CONFIG_NVME_VENDOR_EXT_GOOGLE
@@ -866,7 +870,7 @@  static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag)
 		req = blk_mq_tag_to_rq(*nvmeq->tags, cqe.command_id);
 		if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
 			memcpy(req->special, &cqe, sizeof(cqe));
-		blk_mq_complete_request(req, status >> 1);
+		blk_mq_complete_request(req, le16_to_cpu(cqe.status) >> 1);
 
 	}
 
@@ -914,18 +918,16 @@  static irqreturn_t nvme_irq(int irq, void *data)
 static irqreturn_t nvme_irq_check(int irq, void *data)
 {
 	struct nvme_queue *nvmeq = data;
-	struct nvme_completion cqe = nvmeq->cqes[nvmeq->cq_head];
-	if ((le16_to_cpu(cqe.status) & 1) != nvmeq->cq_phase)
-		return IRQ_NONE;
-	return IRQ_WAKE_THREAD;
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase))
+		return IRQ_WAKE_THREAD;
+	return IRQ_NONE;
 }
 
 static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 {
 	struct nvme_queue *nvmeq = hctx->driver_data;
 
-	if ((le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
-	    nvmeq->cq_phase) {
+	if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) {
 		spin_lock_irq(&nvmeq->q_lock);
 		__nvme_process_cq(nvmeq, &tag);
 		spin_unlock_irq(&nvmeq->q_lock);