Message ID | 20181009160442.19894-1-colin.king@canonical.com |
---|---|
Headers | show |
Series | block: fix silent corruption in Linux kernel 4.15 | expand |
On 09.10.2018 18:04, Colin King wrote: > From: Colin Ian King <colin.king@canonical.com> > > Buglink: https://bugs.launchpad.net/bugs/1796542 > > == SRU Justification == > > A silent data corruption was introduced in v4.10-rc1 with commit > 72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7 > with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects > users of O_DIRECT, in our case a KVM virtual machine with drives > which use qemu's "cache=none" option. > > == Fix == > > Upstream commits: > > 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b > block: add a lower-level bio_add_page interface > > b403ea2404889e1227812fa9657667a1deb9c694 > block: bio_iov_iter_get_pages: fix size of last iovec > > 9362dd1109f87a9d0a798fbc890cb339c171ed35 > blkdev: __blkdev_direct_IO_simple: fix leak in error case > > 17d51b10d7773e4618bcac64648f30f12d4078fb > block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs > > The first 3 patches are required for a clean application of the final > patch that actually addresses the problem with a fix to this known > issue. > > == Regression Potential == > > This touches the block layer, so there is risk potential in data > corruption. The fixes have several weeks in the upstream kernel and > so far, I see no subsequent fixes required. > > == Test Case == > > Build the program listed below [1] > kudos to Jan Kara, and run with: > > dd if=/dev/zero if=loop.img bs=1M count=2048 > sudo losetup /dev/loop0 loop.img > > ./blkdev-dio-test /dev/loop0 0 & > ./blkdev-dio-test /dev/loop0 2048 & > > Without the fix, ones lost writes fairly soon. Without the fix, this > runs without any losy write messages. > > blkdev-dio-test.c: > > #define _GNU_SOURCE > #include <stdio.h> > #include <unistd.h> > #include <fcntl.h> > #include <string.h> > #include <stdlib.h> > #include <sys/uio.h> > > #define PAGE_SIZE 4096 > #define SECT_SIZE 512 > #define BUF_OFF (2*SECT_SIZE) > > int main(int argc, char **argv) > { > int fd = open(argv[1], O_RDWR | O_DIRECT); > int ret; > char *buf; > loff_t off; > struct iovec iov[2]; > unsigned int seq; > > if (fd < 0) { > perror("open"); > return 1; > } > > off = strtol(argv[2], NULL, 10); > > buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE); > > iov[0].iov_base = buf; > iov[0].iov_len = SECT_SIZE; > iov[1].iov_base = buf + BUF_OFF; > iov[1].iov_len = SECT_SIZE; > > seq = 0; > memset(buf, 0, PAGE_SIZE); > while (1) { > *(unsigned int *)buf = seq; > *(unsigned int *)(buf + BUF_OFF) = seq; > ret = pwritev(fd, iov, 2, off); > if (ret < 0) { > perror("pwritev"); > return 1; > } > if (ret != 2*SECT_SIZE) { > fprintf(stderr, "Short pwritev: %d\n", ret); > return 1; > } > ret = pread(fd, buf, PAGE_SIZE, off); > if (ret < 0) { > perror("pread"); > return 1; > } > if (ret != PAGE_SIZE) { > fprintf(stderr, "Short read: %d\n", ret); > return 1; > } > if (*(unsigned int *)buf != seq || > *(unsigned int *)(buf + SECT_SIZE) != seq) { > printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE)); > return 1; > } > seq++; > } > > return 0; > } > > References: > [1] https://www.spinics.net/lists/linux-block/msg28507.html > > --- > > Christoph Hellwig (1): > block: add a lower-level bio_add_page interface > > Martin Wilck (3): > block: bio_iov_iter_get_pages: fix size of last iovec > blkdev: __blkdev_direct_IO_simple: fix leak in error case > block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs > > block/bio.c | 149 ++++++++++++++++++++++++++++++-------------- > fs/block_dev.c | 9 +-- > include/linux/bio.h | 9 +++ > 3 files changed, 117 insertions(+), 50 deletions(-) > Successful testing and so far changes had no further follow-up changes. Acked-by: Stefan Bader <stefan.bader@canonical.com>
On 10/09/18 18:04, Colin King wrote: > From: Colin Ian King <colin.king@canonical.com> > > Buglink: https://bugs.launchpad.net/bugs/1796542 > > == SRU Justification == > > A silent data corruption was introduced in v4.10-rc1 with commit > 72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7 > with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects > users of O_DIRECT, in our case a KVM virtual machine with drives > which use qemu's "cache=none" option. > > == Fix == > > Upstream commits: > > 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b > block: add a lower-level bio_add_page interface > > b403ea2404889e1227812fa9657667a1deb9c694 > block: bio_iov_iter_get_pages: fix size of last iovec > > 9362dd1109f87a9d0a798fbc890cb339c171ed35 > blkdev: __blkdev_direct_IO_simple: fix leak in error case > > 17d51b10d7773e4618bcac64648f30f12d4078fb > block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs > > The first 3 patches are required for a clean application of the final > patch that actually addresses the problem with a fix to this known > issue. > > == Regression Potential == > > This touches the block layer, so there is risk potential in data > corruption. The fixes have several weeks in the upstream kernel and > so far, I see no subsequent fixes required. > > == Test Case == > > Build the program listed below [1] > kudos to Jan Kara, and run with: > > dd if=/dev/zero if=loop.img bs=1M count=2048 > sudo losetup /dev/loop0 loop.img > > ./blkdev-dio-test /dev/loop0 0 & > ./blkdev-dio-test /dev/loop0 2048 & > > Without the fix, ones lost writes fairly soon. Without the fix, this > runs without any losy write messages. > > blkdev-dio-test.c: > > #define _GNU_SOURCE > #include <stdio.h> > #include <unistd.h> > #include <fcntl.h> > #include <string.h> > #include <stdlib.h> > #include <sys/uio.h> > > #define PAGE_SIZE 4096 > #define SECT_SIZE 512 > #define BUF_OFF (2*SECT_SIZE) > > int main(int argc, char **argv) > { > int fd = open(argv[1], O_RDWR | O_DIRECT); > int ret; > char *buf; > loff_t off; > struct iovec iov[2]; > unsigned int seq; > > if (fd < 0) { > perror("open"); > return 1; > } > > off = strtol(argv[2], NULL, 10); > > buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE); > > iov[0].iov_base = buf; > iov[0].iov_len = SECT_SIZE; > iov[1].iov_base = buf + BUF_OFF; > iov[1].iov_len = SECT_SIZE; > > seq = 0; > memset(buf, 0, PAGE_SIZE); > while (1) { > *(unsigned int *)buf = seq; > *(unsigned int *)(buf + BUF_OFF) = seq; > ret = pwritev(fd, iov, 2, off); > if (ret < 0) { > perror("pwritev"); > return 1; > } > if (ret != 2*SECT_SIZE) { > fprintf(stderr, "Short pwritev: %d\n", ret); > return 1; > } > ret = pread(fd, buf, PAGE_SIZE, off); > if (ret < 0) { > perror("pread"); > return 1; > } > if (ret != PAGE_SIZE) { > fprintf(stderr, "Short read: %d\n", ret); > return 1; > } > if (*(unsigned int *)buf != seq || > *(unsigned int *)(buf + SECT_SIZE) != seq) { > printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE)); > return 1; > } > seq++; > } > > return 0; > } > > References: > [1] https://www.spinics.net/lists/linux-block/msg28507.html > > --- > > Christoph Hellwig (1): > block: add a lower-level bio_add_page interface > > Martin Wilck (3): > block: bio_iov_iter_get_pages: fix size of last iovec > blkdev: __blkdev_direct_IO_simple: fix leak in error case > block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs > > block/bio.c | 149 ++++++++++++++++++++++++++++++-------------- > fs/block_dev.c | 9 +-- > include/linux/bio.h | 9 +++ > 3 files changed, 117 insertions(+), 50 deletions(-) > Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>
On 09.10.2018 18:04, Colin King wrote: > From: Colin Ian King <colin.king@canonical.com> > > Buglink: https://bugs.launchpad.net/bugs/1796542 > > == SRU Justification == > > A silent data corruption was introduced in v4.10-rc1 with commit > 72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7 > with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects > users of O_DIRECT, in our case a KVM virtual machine with drives > which use qemu's "cache=none" option. > > == Fix == > > Upstream commits: > > 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b > block: add a lower-level bio_add_page interface > > b403ea2404889e1227812fa9657667a1deb9c694 > block: bio_iov_iter_get_pages: fix size of last iovec > > 9362dd1109f87a9d0a798fbc890cb339c171ed35 > blkdev: __blkdev_direct_IO_simple: fix leak in error case > > 17d51b10d7773e4618bcac64648f30f12d4078fb > block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs > > The first 3 patches are required for a clean application of the final > patch that actually addresses the problem with a fix to this known > issue. > > == Regression Potential == > > This touches the block layer, so there is risk potential in data > corruption. The fixes have several weeks in the upstream kernel and > so far, I see no subsequent fixes required. > > == Test Case == > > Build the program listed below [1] > kudos to Jan Kara, and run with: > > dd if=/dev/zero if=loop.img bs=1M count=2048 > sudo losetup /dev/loop0 loop.img > > ./blkdev-dio-test /dev/loop0 0 & > ./blkdev-dio-test /dev/loop0 2048 & > > Without the fix, ones lost writes fairly soon. Without the fix, this > runs without any losy write messages. > > blkdev-dio-test.c: > > #define _GNU_SOURCE > #include <stdio.h> > #include <unistd.h> > #include <fcntl.h> > #include <string.h> > #include <stdlib.h> > #include <sys/uio.h> > > #define PAGE_SIZE 4096 > #define SECT_SIZE 512 > #define BUF_OFF (2*SECT_SIZE) > > int main(int argc, char **argv) > { > int fd = open(argv[1], O_RDWR | O_DIRECT); > int ret; > char *buf; > loff_t off; > struct iovec iov[2]; > unsigned int seq; > > if (fd < 0) { > perror("open"); > return 1; > } > > off = strtol(argv[2], NULL, 10); > > buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE); > > iov[0].iov_base = buf; > iov[0].iov_len = SECT_SIZE; > iov[1].iov_base = buf + BUF_OFF; > iov[1].iov_len = SECT_SIZE; > > seq = 0; > memset(buf, 0, PAGE_SIZE); > while (1) { > *(unsigned int *)buf = seq; > *(unsigned int *)(buf + BUF_OFF) = seq; > ret = pwritev(fd, iov, 2, off); > if (ret < 0) { > perror("pwritev"); > return 1; > } > if (ret != 2*SECT_SIZE) { > fprintf(stderr, "Short pwritev: %d\n", ret); > return 1; > } > ret = pread(fd, buf, PAGE_SIZE, off); > if (ret < 0) { > perror("pread"); > return 1; > } > if (ret != PAGE_SIZE) { > fprintf(stderr, "Short read: %d\n", ret); > return 1; > } > if (*(unsigned int *)buf != seq || > *(unsigned int *)(buf + SECT_SIZE) != seq) { > printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE)); > return 1; > } > seq++; > } > > return 0; > } > > References: > [1] https://www.spinics.net/lists/linux-block/msg28507.html > > --- > > Christoph Hellwig (1): > block: add a lower-level bio_add_page interface > > Martin Wilck (3): > block: bio_iov_iter_get_pages: fix size of last iovec > blkdev: __blkdev_direct_IO_simple: fix leak in error case > block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs > > block/bio.c | 149 ++++++++++++++++++++++++++++++-------------- > fs/block_dev.c | 9 +-- > include/linux/bio.h | 9 +++ > 3 files changed, 117 insertions(+), 50 deletions(-) > Applied to bionic/master-next for re-spin. Thanks. -Stefan
From: Colin Ian King <colin.king@canonical.com> Buglink: https://bugs.launchpad.net/bugs/1796542 == SRU Justification == A silent data corruption was introduced in v4.10-rc1 with commit 72ecad22d9f198aafee64218512e02ffa7818671 and was fixed in v4.18-rc7 with commit 17d51b10d7773e4618bcac64648f30f12d4078fb. It affects users of O_DIRECT, in our case a KVM virtual machine with drives which use qemu's "cache=none" option. == Fix == Upstream commits: 0aa69fd32a5f766e997ca8ab4723c5a1146efa8b block: add a lower-level bio_add_page interface b403ea2404889e1227812fa9657667a1deb9c694 block: bio_iov_iter_get_pages: fix size of last iovec 9362dd1109f87a9d0a798fbc890cb339c171ed35 blkdev: __blkdev_direct_IO_simple: fix leak in error case 17d51b10d7773e4618bcac64648f30f12d4078fb block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs The first 3 patches are required for a clean application of the final patch that actually addresses the problem with a fix to this known issue. == Regression Potential == This touches the block layer, so there is risk potential in data corruption. The fixes have several weeks in the upstream kernel and so far, I see no subsequent fixes required. == Test Case == Build the program listed below [1] kudos to Jan Kara, and run with: dd if=/dev/zero if=loop.img bs=1M count=2048 sudo losetup /dev/loop0 loop.img ./blkdev-dio-test /dev/loop0 0 & ./blkdev-dio-test /dev/loop0 2048 & Without the fix, ones lost writes fairly soon. Without the fix, this runs without any losy write messages. blkdev-dio-test.c: #define _GNU_SOURCE #include <stdio.h> #include <unistd.h> #include <fcntl.h> #include <string.h> #include <stdlib.h> #include <sys/uio.h> #define PAGE_SIZE 4096 #define SECT_SIZE 512 #define BUF_OFF (2*SECT_SIZE) int main(int argc, char **argv) { int fd = open(argv[1], O_RDWR | O_DIRECT); int ret; char *buf; loff_t off; struct iovec iov[2]; unsigned int seq; if (fd < 0) { perror("open"); return 1; } off = strtol(argv[2], NULL, 10); buf = aligned_alloc(PAGE_SIZE, PAGE_SIZE); iov[0].iov_base = buf; iov[0].iov_len = SECT_SIZE; iov[1].iov_base = buf + BUF_OFF; iov[1].iov_len = SECT_SIZE; seq = 0; memset(buf, 0, PAGE_SIZE); while (1) { *(unsigned int *)buf = seq; *(unsigned int *)(buf + BUF_OFF) = seq; ret = pwritev(fd, iov, 2, off); if (ret < 0) { perror("pwritev"); return 1; } if (ret != 2*SECT_SIZE) { fprintf(stderr, "Short pwritev: %d\n", ret); return 1; } ret = pread(fd, buf, PAGE_SIZE, off); if (ret < 0) { perror("pread"); return 1; } if (ret != PAGE_SIZE) { fprintf(stderr, "Short read: %d\n", ret); return 1; } if (*(unsigned int *)buf != seq || *(unsigned int *)(buf + SECT_SIZE) != seq) { printf("Lost write %u: %u %u\n", seq, *(unsigned int *)buf, *(unsigned int *)(buf + SECT_SIZE)); return 1; } seq++; } return 0; } References: [1] https://www.spinics.net/lists/linux-block/msg28507.html --- Christoph Hellwig (1): block: add a lower-level bio_add_page interface Martin Wilck (3): block: bio_iov_iter_get_pages: fix size of last iovec blkdev: __blkdev_direct_IO_simple: fix leak in error case block: bio_iov_iter_get_pages: pin more pages for multi-segment IOs block/bio.c | 149 ++++++++++++++++++++++++++++++-------------- fs/block_dev.c | 9 +-- include/linux/bio.h | 9 +++ 3 files changed, 117 insertions(+), 50 deletions(-)