diff mbox

[SRU,WILY,XENIAL,YAKKETY] UBUNTU: SAUCE: mm/mmap: fix oopsing on remap_file_pages

Message ID 1462555552-24174-1-git-send-email-colin.king@canonical.com
State New
Headers show

Commit Message

Colin Ian King May 6, 2016, 5:25 p.m. UTC
From: Colin Ian King <colin.king@canonical.com>

BugLink: http://bugs.launchpad.net/bugs/1558120

AUFS introduced a subtle bug into remap_file_pages; calls to
do_mmap_pgoff can lead to a change of the vma->vm_file and so
the vma_fput(vma) on the file is incorrect; we should instead
fput on the original file.

Constant remapping the same page with multiple processes can
create the following oops:

[    6.606988] BUG: unable to handle kernel NULL pointer dereference at 0000000000000228
[    6.607010] IP: [<ffffffff811aad98>] shmem_fault+0x38/0x1e0
[    6.607032] PGD 78d0d067 PUD 795c0067 PMD 0
[    6.607044] Oops: 0000 [#1] SMP
[    6.607055] Modules linked in: ppdev snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm joydev input_leds serio_raw snd_timer parport_pc snd parport mac_hid i2c_piix4 8250_fintek soundcore ib_iser rdma_cm iw_cm ib_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear qxl crct10dif_pclmul ttm crc32_pclmul drm_kms_helper aesni_intel syscopyarea aes_x86_64 sysfillrect sysimgblt lrw gf128mul glue_helper ablk_helper cryptd psmouse fb_sys_fops floppy drm pata_acpi
[    6.607240] CPU: 1 PID: 1033 Comm: a.out Not tainted 4.4.0-21-generic #37-Ubuntu
[    6.607257] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[    6.607278] task: ffff8800760b5280 ti: ffff88007af44000 task.ti: ffff88007af44000
[    6.607294] RIP: 0010:[<ffffffff811aad98>]  [<ffffffff811aad98>] shmem_fault+0x38/0x1e0
[    6.607314] RSP: 0018:ffff88007af47c08  EFLAGS: 00010246
[    6.607326] RAX: ffff88007c25cf00 RBX: 0000000000000000 RCX: 0000000000000004
[    6.607341] RDX: 0000000000000000 RSI: ffff88007af47c78 RDI: ffff88007add44b0
[    6.607357] RBP: ffff88007af47c68 R08: 0000000000000000 R09: ffff88007af47d38
[    6.607373] R10: 0000000000000000 R11: 00003ffffffff000 R12: ffff88007add44b0
[    6.607388] R13: ffff88007af47d38 R14: ffff8800795fdd00 R15: 00007fdb34174000
[    6.607404] FS:  00007fdb3416c700(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
[    6.607422] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    6.607435] CR2: 0000000000000228 CR3: 000000007af83000 CR4: 00000000001406e0
[    6.607453] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[    6.607469] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[    6.607484] Stack:
[    6.607490]  ffff88007af47c28 00000200811cb011 8000000072af6027 00007fdb34174000
[    6.607510]  ffff88007af47c60 ffffea0001cabd80 0000000000000000 000000004a6f65dc
[    6.607530]  0000000000000000 ffff88007add44b0 ffff88007af47d38 ffff8800795fdd00
[    6.607737] Call Trace:
[    6.607899]  [<ffffffff811bc120>] __do_fault+0x50/0xe0
[    6.608062]  [<ffffffff811bfb5b>] handle_mm_fault+0xf8b/0x1820
[    6.608225]  [<ffffffff811bac05>] __get_user_pages+0x135/0x620
[    6.608388]  [<ffffffff811c5d8d>] ? vma_set_page_prot+0x3d/0x60
[    6.608533]  [<ffffffff811bb63f>] populate_vma_page_range+0x7f/0x90
[    6.608678]  [<ffffffff811bb6f3>] __mm_populate+0xa3/0x130
[    6.608820]  [<ffffffff811c79b5>] SyS_remap_file_pages+0xe5/0x2e0
[    6.608964]  [<ffffffff818244f2>] entry_SYSCALL_64_fastpath+0x16/0x71
[    6.609107] Code: 41 54 53 49 89 fc 48 83 ec 40 c7 45 ac 00 02 00 00 65 48 8b 04 25 28 00 00 00 48 89 45 d8 31 c0 48 8b 87 a0 00 00 00 48 8b 58 20 <48> 83 bb 28 02 00 00 00 0f 85 98 00 00 00 48 8b 43 30 48 8d 56
[    6.609480] RIP  [<ffffffff811aad98>] shmem_fault+0x38/0x1e0
[    6.609638]  RSP <ffff88007af47c08>
[    6.609793] CR2: 0000000000000228
[    6.609949] ---[ end trace f58b20cac46c4135 ]---

The reproducer is below:

void do_remap(void)
{
        const size_t page_size = sysconf(_SC_PAGESIZE);
        void *data;

        data = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
                        MAP_SHARED | MAP_ANONYMOUS, -1, 0);
        if (data == MAP_FAILED) {
                fprintf(stderr, "mmap failed\n");
                exit(1);
        }
        for (;;) {
                if (remap_file_pages(data, page_size, 0, 0, 0) < 0) {
                        fprintf(stderr, "remap_file_pages failed\n");
                        exit(1);
                }
        }
        munmap(data, page_size);
}

int main(int argc, char **argv)
{
        int i;
        pid_t pids[4];

        for (i = 0; i < 4; i++) {
                pids[i] = fork();
                if (pids[i] == 0) {
                        printf("FORK child\n");
                        do_remap();
                        exit(0);
                }
                if (pids[i] < 0) {
                        fprintf(stderr, "fork failed\n");

                }
        }

        for (i = 0; i < 4; i++) {
                int status;
                waitpid(pids[i], &status, 0);
        }
}

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 mm/mmap.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

Comments

Kamal Mostafa May 6, 2016, 6:04 p.m. UTC | #1
ACK for Wily+.  Is this patch also suitable for upstream (and cc: stable)?

 -Kamal

On Fri, May 06, 2016 at 06:25:52PM +0100, Colin King wrote:
> From: Colin Ian King <colin.king@canonical.com>
> 
> BugLink: http://bugs.launchpad.net/bugs/1558120
> 
> AUFS introduced a subtle bug into remap_file_pages; calls to
> do_mmap_pgoff can lead to a change of the vma->vm_file and so
> the vma_fput(vma) on the file is incorrect; we should instead
> fput on the original file.
> 
> Constant remapping the same page with multiple processes can
> create the following oops:
> 
> [    6.606988] BUG: unable to handle kernel NULL pointer dereference at 0000000000000228
> [    6.607010] IP: [<ffffffff811aad98>] shmem_fault+0x38/0x1e0
> [    6.607032] PGD 78d0d067 PUD 795c0067 PMD 0
> [    6.607044] Oops: 0000 [#1] SMP
> [    6.607055] Modules linked in: ppdev snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm joydev input_leds serio_raw snd_timer parport_pc snd parport mac_hid i2c_piix4 8250_fintek soundcore ib_iser rdma_cm iw_cm ib_cm ib_sa ib_mad ib_core ib_addr iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi autofs4 btrfs raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 multipath linear qxl crct10dif_pclmul ttm crc32_pclmul drm_kms_helper aesni_intel syscopyarea aes_x86_64 sysfillrect sysimgblt lrw gf128mul glue_helper ablk_helper cryptd psmouse fb_sys_fops floppy drm pata_acpi
> [    6.607240] CPU: 1 PID: 1033 Comm: a.out Not tainted 4.4.0-21-generic #37-Ubuntu
> [    6.607257] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
> [    6.607278] task: ffff8800760b5280 ti: ffff88007af44000 task.ti: ffff88007af44000
> [    6.607294] RIP: 0010:[<ffffffff811aad98>]  [<ffffffff811aad98>] shmem_fault+0x38/0x1e0
> [    6.607314] RSP: 0018:ffff88007af47c08  EFLAGS: 00010246
> [    6.607326] RAX: ffff88007c25cf00 RBX: 0000000000000000 RCX: 0000000000000004
> [    6.607341] RDX: 0000000000000000 RSI: ffff88007af47c78 RDI: ffff88007add44b0
> [    6.607357] RBP: ffff88007af47c68 R08: 0000000000000000 R09: ffff88007af47d38
> [    6.607373] R10: 0000000000000000 R11: 00003ffffffff000 R12: ffff88007add44b0
> [    6.607388] R13: ffff88007af47d38 R14: ffff8800795fdd00 R15: 00007fdb34174000
> [    6.607404] FS:  00007fdb3416c700(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
> [    6.607422] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    6.607435] CR2: 0000000000000228 CR3: 000000007af83000 CR4: 00000000001406e0
> [    6.607453] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> [    6.607469] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
> [    6.607484] Stack:
> [    6.607490]  ffff88007af47c28 00000200811cb011 8000000072af6027 00007fdb34174000
> [    6.607510]  ffff88007af47c60 ffffea0001cabd80 0000000000000000 000000004a6f65dc
> [    6.607530]  0000000000000000 ffff88007add44b0 ffff88007af47d38 ffff8800795fdd00
> [    6.607737] Call Trace:
> [    6.607899]  [<ffffffff811bc120>] __do_fault+0x50/0xe0
> [    6.608062]  [<ffffffff811bfb5b>] handle_mm_fault+0xf8b/0x1820
> [    6.608225]  [<ffffffff811bac05>] __get_user_pages+0x135/0x620
> [    6.608388]  [<ffffffff811c5d8d>] ? vma_set_page_prot+0x3d/0x60
> [    6.608533]  [<ffffffff811bb63f>] populate_vma_page_range+0x7f/0x90
> [    6.608678]  [<ffffffff811bb6f3>] __mm_populate+0xa3/0x130
> [    6.608820]  [<ffffffff811c79b5>] SyS_remap_file_pages+0xe5/0x2e0
> [    6.608964]  [<ffffffff818244f2>] entry_SYSCALL_64_fastpath+0x16/0x71
> [    6.609107] Code: 41 54 53 49 89 fc 48 83 ec 40 c7 45 ac 00 02 00 00 65 48 8b 04 25 28 00 00 00 48 89 45 d8 31 c0 48 8b 87 a0 00 00 00 48 8b 58 20 <48> 83 bb 28 02 00 00 00 0f 85 98 00 00 00 48 8b 43 30 48 8d 56
> [    6.609480] RIP  [<ffffffff811aad98>] shmem_fault+0x38/0x1e0
> [    6.609638]  RSP <ffff88007af47c08>
> [    6.609793] CR2: 0000000000000228
> [    6.609949] ---[ end trace f58b20cac46c4135 ]---
> 
> The reproducer is below:
> 
> void do_remap(void)
> {
>         const size_t page_size = sysconf(_SC_PAGESIZE);
>         void *data;
> 
>         data = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
>                         MAP_SHARED | MAP_ANONYMOUS, -1, 0);
>         if (data == MAP_FAILED) {
>                 fprintf(stderr, "mmap failed\n");
>                 exit(1);
>         }
>         for (;;) {
>                 if (remap_file_pages(data, page_size, 0, 0, 0) < 0) {
>                         fprintf(stderr, "remap_file_pages failed\n");
>                         exit(1);
>                 }
>         }
>         munmap(data, page_size);
> }
> 
> int main(int argc, char **argv)
> {
>         int i;
>         pid_t pids[4];
> 
>         for (i = 0; i < 4; i++) {
>                 pids[i] = fork();
>                 if (pids[i] == 0) {
>                         printf("FORK child\n");
>                         do_remap();
>                         exit(0);
>                 }
>                 if (pids[i] < 0) {
>                         fprintf(stderr, "fork failed\n");
> 
>                 }
>         }
> 
>         for (i = 0; i < 4; i++) {
>                 int status;
>                 waitpid(pids[i], &status, 0);
>         }
> }
> 
> Signed-off-by: Colin Ian King <colin.king@canonical.com>
> ---
>  mm/mmap.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
> 
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 550cd93..d06ac4f 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2644,6 +2644,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
>  	struct vm_area_struct *vma;
>  	unsigned long populate = 0;
>  	unsigned long ret = -EINVAL;
> +	struct file *file;
>  
>  	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
>  			"See Documentation/vm/remap_file_pages.txt.\n",
> @@ -2711,10 +2712,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
>  		}
>  	}
>  
> -	vma_get_file(vma);
> +	file = get_file(vma->vm_file);
>  	ret = do_mmap_pgoff(vma->vm_file, start, size,
>  			prot, flags, pgoff, &populate);
> -	vma_fput(vma);
> +	fput(file);
>  out:
>  	up_write(&mm->mmap_sem);
>  	if (populate)
> -- 
> 2.7.4
> 
> 
> -- 
> kernel-team mailing list
> kernel-team@lists.ubuntu.com
> https://lists.ubuntu.com/mailman/listinfo/kernel-team
Tim Gardner May 6, 2016, 6:08 p.m. UTC | #2
On 05/06/2016 12:04 PM, Kamal Mostafa wrote:
> ACK for Wily+.  Is this patch also suitable for upstream (and cc: stable)?
>
>   -Kamal
>

Suitable for upstream AUFS only.
Tim Gardner May 6, 2016, 6:08 p.m. UTC | #3

Colin Ian King May 6, 2016, 6:31 p.m. UTC | #4
On 06/05/16 19:08, Tim Gardner wrote:
> On 05/06/2016 12:04 PM, Kamal Mostafa wrote:
>> ACK for Wily+.  Is this patch also suitable for upstream (and cc:
>> stable)?
>>
>>   -Kamal
>>
> 
> Suitable for upstream AUFS only.

I'll submit it upstream shortly.

Colin
Kamal Mostafa May 6, 2016, 8:23 p.m. UTC | #5

diff mbox

Patch

diff --git a/mm/mmap.c b/mm/mmap.c
index 550cd93..d06ac4f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2644,6 +2644,7 @@  SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 	struct vm_area_struct *vma;
 	unsigned long populate = 0;
 	unsigned long ret = -EINVAL;
+	struct file *file;
 
 	pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
 			"See Documentation/vm/remap_file_pages.txt.\n",
@@ -2711,10 +2712,10 @@  SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		}
 	}
 
-	vma_get_file(vma);
+	file = get_file(vma->vm_file);
 	ret = do_mmap_pgoff(vma->vm_file, start, size,
 			prot, flags, pgoff, &populate);
-	vma_fput(vma);
+	fput(file);
 out:
 	up_write(&mm->mmap_sem);
 	if (populate)