diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 896548b..442cf36 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -95,6 +95,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	struct axon_msic *msic = get_irq_data(irq);
 	u32 write_offset, msi;
 	int idx;
+	int retry = 0;
 
 	write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG);
 	pr_debug("axon_msi: original write_offset 0x%x\n", write_offset);
@@ -102,7 +103,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	/* write_offset doesn't wrap properly, so we have to mask it */
 	write_offset &= MSIC_FIFO_SIZE_MASK;
 
-	while (msic->read_offset != write_offset) {
+	while (msic->read_offset != write_offset && retry < 100) {
 		idx  = msic->read_offset / sizeof(__le32);
 		msi  = le32_to_cpu(msic->fifo_virt[idx]);
 		msi &= 0xFFFF;
@@ -110,13 +111,37 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc)
 		pr_debug("axon_msi: woff %x roff %x msi %x\n",
 			  write_offset, msic->read_offset, msi);
 
+		if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host) {
+			generic_handle_irq(msi);
+			msic->fifo_virt[idx] = cpu_to_le32(0xffffffff);
+		} else {
+			/*
+			 * Reading the MSIC_WRITE_OFFSET_REG does not
+			 * reliably flush the outstanding DMA to the
+			 * FIFO buffer. Here we were reading stale
+			 * data, so we need to retry.
+			 */
+			udelay(1);
+			retry++;
+			pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
+			continue;
+		}
+
+		if (retry) {
+			pr_debug("axon_msi: late irq 0x%x, retry %d\n",
+				 msi, retry);
+			retry = 0;
+		}
+
 		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
 		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
+	}
 
-		if (msi < NR_IRQS && irq_map[msi].host == msic->irq_host)
-			generic_handle_irq(msi);
-		else
-			pr_debug("axon_msi: invalid irq 0x%x!\n", msi);
+	if (retry) {
+		printk(KERN_WARNING "axon_msi: irq timed out\n");
+
+		msic->read_offset += MSIC_FIFO_ENTRY_SIZE;
+		msic->read_offset &= MSIC_FIFO_SIZE_MASK;
 	}
 
 	desc->chip->eoi(irq);
@@ -364,6 +389,7 @@ static int axon_msi_probe(struct of_device *device,
 		       dn->full_name);
 		goto out_free_fifo;
 	}
+	memset(msic->fifo_virt, 0xff, MSIC_FIFO_SIZE_BYTES);
 
 	msic->irq_host = irq_alloc_host(dn, IRQ_HOST_MAP_NOMAP,
 					NR_IRQS, &msic_host_ops, 0);
