diff mbox

[RFC,12/18] limits: track RLIMIT_MEMLOCK actual max

Message ID 1465847065-3577-13-git-send-email-toiwoton@gmail.com (mailing list archive)
State Not Applicable
Headers show

Commit Message

Topi Miettinen June 13, 2016, 7:44 p.m. UTC
Track maximum size of locked memory, presented in /proc/self/limits.

Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
---
 arch/ia64/kernel/perfmon.c                 |  1 +
 arch/powerpc/kvm/book3s_64_vio.c           |  1 +
 arch/powerpc/mm/mmu_context_iommu.c        |  1 +
 drivers/infiniband/core/umem.c             |  1 +
 drivers/infiniband/hw/hfi1/user_pages.c    |  1 +
 drivers/infiniband/hw/qib/qib_user_pages.c |  1 +
 drivers/infiniband/hw/usnic/usnic_uiom.c   |  2 ++
 drivers/misc/mic/scif/scif_rma.c           |  1 +
 drivers/vfio/vfio_iommu_spapr_tce.c        |  2 ++
 drivers/vfio/vfio_iommu_type1.c            |  2 ++
 include/linux/sched.h                      | 10 ++++++++--
 kernel/bpf/syscall.c                       |  6 ++++++
 kernel/events/core.c                       |  1 +
 mm/mlock.c                                 |  7 +++++++
 mm/mmap.c                                  |  3 +++
 mm/mremap.c                                |  3 +++
 16 files changed, 41 insertions(+), 2 deletions(-)

Comments

Topi Miettinen June 13, 2016, 9:17 p.m. UTC | #1
On 06/13/16 20:43, Alex Williamson wrote:
> On Mon, 13 Jun 2016 22:44:19 +0300
> Topi Miettinen <toiwoton@gmail.com> wrote:
> 
>> Track maximum size of locked memory, presented in /proc/self/limits.
>>
>> Signed-off-by: Topi Miettinen <toiwoton@gmail.com>
>> ---
>>  arch/ia64/kernel/perfmon.c                 |  1 +
>>  arch/powerpc/kvm/book3s_64_vio.c           |  1 +
>>  arch/powerpc/mm/mmu_context_iommu.c        |  1 +
>>  drivers/infiniband/core/umem.c             |  1 +
>>  drivers/infiniband/hw/hfi1/user_pages.c    |  1 +
>>  drivers/infiniband/hw/qib/qib_user_pages.c |  1 +
>>  drivers/infiniband/hw/usnic/usnic_uiom.c   |  2 ++
>>  drivers/misc/mic/scif/scif_rma.c           |  1 +
>>  drivers/vfio/vfio_iommu_spapr_tce.c        |  2 ++
>>  drivers/vfio/vfio_iommu_type1.c            |  2 ++
>>  include/linux/sched.h                      | 10 ++++++++--
>>  kernel/bpf/syscall.c                       |  6 ++++++
>>  kernel/events/core.c                       |  1 +
>>  mm/mlock.c                                 |  7 +++++++
>>  mm/mmap.c                                  |  3 +++
>>  mm/mremap.c                                |  3 +++
>>  16 files changed, 41 insertions(+), 2 deletions(-)
> ...  
>>
>> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
>> index 2ba1942..4c6e7a3 100644
>> --- a/drivers/vfio/vfio_iommu_type1.c
>> +++ b/drivers/vfio/vfio_iommu_type1.c
>> @@ -312,6 +312,8 @@ static long vfio_pin_pages(unsigned long vaddr, long npage,
>>  		}
>>  	}
>>  
>> +	bump_rlimit(RLIMIT_MEMLOCK, (current->mm->locked_vm + i) << PAGE_SHIFT);
>> +
>>  	if (!rsvd)
>>  		vfio_lock_acct(i);
>>  
> 
> 
> Not all cases passing through here bump rlimit (see: rsvd), there's an
> entire case above the other end of this closing bracket that does bump
> rlimit but returns before here, and I wonder why we wouldn't just do
> this in our vfio_lock_acct() accounting function anyway.  Thanks,

Yes, just make disable_hugepages case go to end of function.

-Topi

> 
> Alex
>
Doug Ledford June 18, 2016, 12:59 a.m. UTC | #2
On 6/13/2016 3:44 PM, Topi Miettinen wrote:
> Track maximum size of locked memory, presented in /proc/self/limits.

You should have probably Cc:ed everyone on the cover letter and probably
patch 1 of this series.  This patch is hard to decipher without the
additional context of those items.  However, that said, I think I see
what you are doing.  But your wording of your comments below is bad:

> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index feb9bb7..d3f3c9f 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -3378,10 +3378,16 @@ static inline unsigned long rlimit_max(unsigned int limit)
>  	return task_rlimit_max(current, limit);
>  }
>  
> +static inline void task_bump_rlimit(struct task_struct *tsk,
> +				    unsigned int limit, unsigned long r)
> +{
> +	if (READ_ONCE(tsk->signal->rlim_curmax[limit]) < r)
> +		tsk->signal->rlim_curmax[limit] = r;
> +}
> +
>  static inline void bump_rlimit(unsigned int limit, unsigned long r)
>  {
> -	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
> -		current->signal->rlim_curmax[limit] = r;
> +	return task_bump_rlimit(current, limit, r);
>  }
>  
>  #ifdef CONFIG_CPU_FREQ
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 46ecce4..192001e 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -76,6 +76,9 @@ static int bpf_map_charge_memlock(struct bpf_map *map)
>  		return -EPERM;
>  	}
>  	map->user = user;
> +	/* XXX resource limits apply per task, not per user */
> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
> +		    PAGE_SHIFT);

No, these resource limits do not apply per task.  They are per user.
However, you are doing maximum  usage accounting on a per-task basis by
adding a new counter to the signal struct of the task.  Fine, but your
comments need to reflect that instead of the confusing comment above.
In addition, your function name is horrible for what you are doing.  A
person reading this function will think that you are bumping the actual
rlimit on the task, which is not what you are doing.  You are performing
per-task accounting of MEMLOCK memory.  The actual permission checks are
per-user, and the primary accounting is per-user.  So, really, this is
just a nice little feature that provides a more granular per-task usage
(but not control) so a user can see where their overall memlock memory
is being used.  Fine.  I would reword the comment something like this:

/* XXX resource is tracked and limit enforced on a per user basis,
   but we track it on a per-task basis as well so users can identify
   hogs of this resource, stats can be found in /proc/<pid>/limits */

And I would rename bump_rlimit and task_bump_rlimit to something like
account_rlimit and task_account_rlimit.  Calling it bump just gives the
wrong idea entirely on first read.

>  	return 0;
>  }
>  
> @@ -601,6 +604,9 @@ static int bpf_prog_charge_memlock(struct bpf_prog *prog)
>  		return -EPERM;
>  	}
>  	prog->aux->user = user;
> +	/* XXX resource limits apply per task, not per user */
> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
> +		    PAGE_SHIFT);
>  	return 0;
>  }

> @@ -798,6 +802,9 @@ int user_shm_lock(size_t size, struct user_struct *user)
>  	get_uid(user);
>  	user->locked_shm += locked;
>  	allowed = 1;
> +
> +	/* XXX resource limits apply per task, not per user */
> +	bump_rlimit(RLIMIT_MEMLOCK, user->locked_shm << PAGE_SHIFT);
>  out:
>  	spin_unlock(&shmlock_user_lock);
>  	return allowed;
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 0963e7f..4e683dd 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2020,6 +2020,9 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
>  		return -ENOMEM;
>  
>  	bump_rlimit(RLIMIT_STACK, actual_size);
> +	if (vma->vm_flags & VM_LOCKED)
> +		bump_rlimit(RLIMIT_MEMLOCK,
> +			    (mm->locked_vm + grow) << PAGE_SHIFT);
>  
>  	return 0;
>  }
> diff --git a/mm/mremap.c b/mm/mremap.c
> index 1f157ad..ade3e13 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -394,6 +394,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
>  		*p = charged;
>  	}
>  
> +	if (vma->vm_flags & VM_LOCKED)
> +		bump_rlimit(RLIMIT_MEMLOCK, (mm->locked_vm << PAGE_SHIFT) +
> +			    new_len - old_len);
>  	return vma;
>  }
>  
>
Topi Miettinen June 18, 2016, 7 a.m. UTC | #3
On 06/18/16 00:59, Doug Ledford wrote:
> On 6/13/2016 3:44 PM, Topi Miettinen wrote:
>> Track maximum size of locked memory, presented in /proc/self/limits.
> 
> You should have probably Cc:ed everyone on the cover letter and probably
> patch 1 of this series.  This patch is hard to decipher without the
> additional context of those items.  However, that said, I think I see

Yes, I didn't know to CC everybody involved, sorry about that.

> what you are doing.  But your wording of your comments below is bad:
> 
>> diff --git a/include/linux/sched.h b/include/linux/sched.h
>> index feb9bb7..d3f3c9f 100644
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -3378,10 +3378,16 @@ static inline unsigned long rlimit_max(unsigned int limit)
>>  	return task_rlimit_max(current, limit);
>>  }
>>  
>> +static inline void task_bump_rlimit(struct task_struct *tsk,
>> +				    unsigned int limit, unsigned long r)
>> +{
>> +	if (READ_ONCE(tsk->signal->rlim_curmax[limit]) < r)
>> +		tsk->signal->rlim_curmax[limit] = r;
>> +}
>> +
>>  static inline void bump_rlimit(unsigned int limit, unsigned long r)
>>  {
>> -	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
>> -		current->signal->rlim_curmax[limit] = r;
>> +	return task_bump_rlimit(current, limit, r);
>>  }
>>  
>>  #ifdef CONFIG_CPU_FREQ
>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
>> index 46ecce4..192001e 100644
>> --- a/kernel/bpf/syscall.c
>> +++ b/kernel/bpf/syscall.c
>> @@ -76,6 +76,9 @@ static int bpf_map_charge_memlock(struct bpf_map *map)
>>  		return -EPERM;
>>  	}
>>  	map->user = user;
>> +	/* XXX resource limits apply per task, not per user */
>> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
>> +		    PAGE_SHIFT);
> 
> No, these resource limits do not apply per task.  They are per user.

The problem could be that the manual pages do not say that but more to
the opposite direction. For example, setrlimit(2) says that some limits
(RLIMIT_MEMLOCK only for SHML_LOCK and others like RLIMIT_MSGQUEUE)
apply indeed per user but others are per process. This note in mlock(2)
could be also easily read as specifying a per process limit:

"Since Linux 2.6.9, no limits are placed on the amount of memory that
       a privileged process can lock and the RLIMIT_MEMLOCK soft resource
       limit instead defines a limit on how much memory an unprivileged
       process may lock."

It's also confusing (to me, at least) that the limit values are stored
in per task structures, so the actual limits can be different for each
process for the same user.

The limits are also sometimes compared to per task
current->mm->pinned_vm, in other places to current->mm->locked_vm and in
still other places to per user user->locked_vm. How can the same limit
apply to all of them at the same time? I'd think the user can actually
lock many times the limit because of this.

Anyway, assuming that the actual implementation is always correct and
unchangeable due to ABI stability reasons, it's useless to add XXX
comments like I did.

> However, you are doing maximum  usage accounting on a per-task basis by
> adding a new counter to the signal struct of the task.  Fine, but your
> comments need to reflect that instead of the confusing comment above.
> In addition, your function name is horrible for what you are doing.  A
> person reading this function will think that you are bumping the actual
> rlimit on the task, which is not what you are doing.  You are performing
> per-task accounting of MEMLOCK memory.  The actual permission checks are
> per-user, and the primary accounting is per-user.  So, really, this is
> just a nice little feature that provides a more granular per-task usage
> (but not control) so a user can see where their overall memlock memory
> is being used.  Fine.  I would reword the comment something like this:
> 
> /* XXX resource is tracked and limit enforced on a per user basis,
>    but we track it on a per-task basis as well so users can identify
>    hogs of this resource, stats can be found in /proc/<pid>/limits */
> 
> And I would rename bump_rlimit and task_bump_rlimit to something like
> account_rlimit and task_account_rlimit.  Calling it bump just gives the
> wrong idea entirely on first read.

Right, others have also proposed better names.

-Topi

> 
>>  	return 0;
>>  }
>>  
>> @@ -601,6 +604,9 @@ static int bpf_prog_charge_memlock(struct bpf_prog *prog)
>>  		return -EPERM;
>>  	}
>>  	prog->aux->user = user;
>> +	/* XXX resource limits apply per task, not per user */
>> +	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
>> +		    PAGE_SHIFT);
>>  	return 0;
>>  }
> 
>> @@ -798,6 +802,9 @@ int user_shm_lock(size_t size, struct user_struct *user)
>>  	get_uid(user);
>>  	user->locked_shm += locked;
>>  	allowed = 1;
>> +
>> +	/* XXX resource limits apply per task, not per user */
>> +	bump_rlimit(RLIMIT_MEMLOCK, user->locked_shm << PAGE_SHIFT);
>>  out:
>>  	spin_unlock(&shmlock_user_lock);
>>  	return allowed;
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index 0963e7f..4e683dd 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -2020,6 +2020,9 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
>>  		return -ENOMEM;
>>  
>>  	bump_rlimit(RLIMIT_STACK, actual_size);
>> +	if (vma->vm_flags & VM_LOCKED)
>> +		bump_rlimit(RLIMIT_MEMLOCK,
>> +			    (mm->locked_vm + grow) << PAGE_SHIFT);
>>  
>>  	return 0;
>>  }
>> diff --git a/mm/mremap.c b/mm/mremap.c
>> index 1f157ad..ade3e13 100644
>> --- a/mm/mremap.c
>> +++ b/mm/mremap.c
>> @@ -394,6 +394,9 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr,
>>  		*p = charged;
>>  	}
>>  
>> +	if (vma->vm_flags & VM_LOCKED)
>> +		bump_rlimit(RLIMIT_MEMLOCK, (mm->locked_vm << PAGE_SHIFT) +
>> +			    new_len - old_len);
>>  	return vma;
>>  }
>>  
>>
> 
>
diff mbox

Patch

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 2436ad5..d05ff3b 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2341,6 +2341,7 @@  pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
 	*(unsigned long *)user_vaddr = vma->vm_start;
 
+	task_bump_rlimit(task, RLIMIT_MEMLOCK, size);
 	return 0;
 
 error:
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 18cf6d1..2714bbf 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -71,6 +71,7 @@  static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
 			ret = -ENOMEM;
 		else
 			current->mm->locked_vm += stt_pages;
+		bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 	} else {
 		if (WARN_ON_ONCE(stt_pages > current->mm->locked_vm))
 			stt_pages = current->mm->locked_vm;
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index da6a216..ace8b9d 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -46,6 +46,7 @@  static long mm_iommu_adjust_locked_vm(struct mm_struct *mm,
 			ret = -ENOMEM;
 		else
 			mm->locked_vm += npages;
+		bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 	} else {
 		if (WARN_ON_ONCE(npages > mm->locked_vm))
 			npages = mm->locked_vm;
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index fe4d2e1..9bd9638 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -224,6 +224,7 @@  struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 	ret = 0;
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 out:
 	if (ret < 0) {
 		if (need_release)
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 88e10b5f..096910d7 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -111,6 +111,7 @@  int hfi1_acquire_user_pages(unsigned long vaddr, size_t npages, bool writable,
 
 	down_write(&current->mm->mmap_sem);
 	current->mm->pinned_vm += ret;
+	bump_rlimit(RLIMIT_MEMLOCK, current->mm->pinned_vm << PAGE_SHIFT);
 	up_write(&current->mm->mmap_sem);
 
 	return ret;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 2d2b94f..06f93de 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -74,6 +74,7 @@  static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
 	}
 
 	current->mm->pinned_vm += num_pages;
+	bump_rlimit(RLIMIT_MEMLOCK, current->mm->pinned_vm << PAGE_SHIFT);
 
 	ret = 0;
 	goto bail;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index a0b6ebe..83409dc 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -178,6 +178,8 @@  static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 		ret = 0;
 	}
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
+
 out:
 	if (ret < 0)
 		usnic_uiom_put_pages(chunk_list, 0);
diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c
index e0203b1..1d6315a 100644
--- a/drivers/misc/mic/scif/scif_rma.c
+++ b/drivers/misc/mic/scif/scif_rma.c
@@ -306,6 +306,7 @@  static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
 		return -ENOMEM;
 	}
 	mm->pinned_vm = locked;
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 	return 0;
 }
 
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 80378dd..769a5b8 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -55,6 +55,8 @@  static long try_increment_locked_vm(long npages)
 			rlimit(RLIMIT_MEMLOCK),
 			ret ? " - exceeded" : "");
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
+
 	up_write(&current->mm->mmap_sem);
 
 	return ret;
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 2ba1942..4c6e7a3 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -312,6 +312,8 @@  static long vfio_pin_pages(unsigned long vaddr, long npage,
 		}
 	}
 
+	bump_rlimit(RLIMIT_MEMLOCK, (current->mm->locked_vm + i) << PAGE_SHIFT);
+
 	if (!rsvd)
 		vfio_lock_acct(i);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index feb9bb7..d3f3c9f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3378,10 +3378,16 @@  static inline unsigned long rlimit_max(unsigned int limit)
 	return task_rlimit_max(current, limit);
 }
 
+static inline void task_bump_rlimit(struct task_struct *tsk,
+				    unsigned int limit, unsigned long r)
+{
+	if (READ_ONCE(tsk->signal->rlim_curmax[limit]) < r)
+		tsk->signal->rlim_curmax[limit] = r;
+}
+
 static inline void bump_rlimit(unsigned int limit, unsigned long r)
 {
-	if (READ_ONCE(current->signal->rlim_curmax[limit]) < r)
-		current->signal->rlim_curmax[limit] = r;
+	return task_bump_rlimit(current, limit, r);
 }
 
 #ifdef CONFIG_CPU_FREQ
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 46ecce4..192001e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -76,6 +76,9 @@  static int bpf_map_charge_memlock(struct bpf_map *map)
 		return -EPERM;
 	}
 	map->user = user;
+	/* XXX resource limits apply per task, not per user */
+	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
+		    PAGE_SHIFT);
 	return 0;
 }
 
@@ -601,6 +604,9 @@  static int bpf_prog_charge_memlock(struct bpf_prog *prog)
 		return -EPERM;
 	}
 	prog->aux->user = user;
+	/* XXX resource limits apply per task, not per user */
+	bump_rlimit(RLIMIT_MEMLOCK, atomic_long_read(&user->locked_vm) <<
+		    PAGE_SHIFT);
 	return 0;
 }
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9c51ec3..92467e8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5075,6 +5075,7 @@  accounting:
 		if (!ret)
 			rb->aux_mmap_locked = extra;
 	}
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
 
 unlock:
 	if (!ret) {
diff --git a/mm/mlock.c b/mm/mlock.c
index ef8dc9f..554bee9 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -648,6 +648,8 @@  static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t fla
 	if (error)
 		return error;
 
+	bump_rlimit(RLIMIT_MEMLOCK, locked << PAGE_SHIFT);
+
 	error = __mm_populate(start, len, 0);
 	if (error)
 		return __mlock_posix_error_return(error);
@@ -761,6 +763,8 @@  SYSCALL_DEFINE1(mlockall, int, flags)
 	if (!ret && (flags & MCL_CURRENT))
 		mm_populate(0, TASK_SIZE);
 
+	bump_rlimit(RLIMIT_MEMLOCK, current->mm->total_vm << PAGE_SHIFT);
+
 	return ret;
 }
 
@@ -798,6 +802,9 @@  int user_shm_lock(size_t size, struct user_struct *user)
 	get_uid(user);
 	user->locked_shm += locked;
 	allowed = 1;
+
+	/* XXX resource limits apply per task, not per user */
+	bump_rlimit(RLIMIT_MEMLOCK, user->locked_shm << PAGE_SHIFT);
 out:
 	spin_unlock(&shmlock_user_lock);
 	return allowed;
diff --git a/mm/mmap.c b/mm/mmap.c
index 0963e7f..4e683dd 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2020,6 +2020,9 @@  static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
 		return -ENOMEM;
 
 	bump_rlimit(RLIMIT_STACK, actual_size);
+	if (vma->vm_flags & VM_LOCKED)
+		bump_rlimit(RLIMIT_MEMLOCK,
+			    (mm->locked_vm + grow) << PAGE_SHIFT);
 
 	return 0;
 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 1f157ad..ade3e13 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -394,6 +394,9 @@  static struct vm_area_struct *vma_to_resize(unsigned long addr,
 		*p = charged;
 	}
 
+	if (vma->vm_flags & VM_LOCKED)
+		bump_rlimit(RLIMIT_MEMLOCK, (mm->locked_vm << PAGE_SHIFT) +
+			    new_len - old_len);
 	return vma;
 }