From patchwork Thu Jun 6 01:45:34 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110854 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7mD5YWdz9sNT for ; Thu, 6 Jun 2019 11:46:04 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726721AbfFFBpK (ORCPT ); Wed, 5 Jun 2019 21:45:10 -0400 Received: from mga03.intel.com ([134.134.136.65]:36140 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726589AbfFFBpK (ORCPT ); Wed, 5 Jun 2019 21:45:10 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:09 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:09 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 01/10] fs/locks: Add trace_leases_conflict Date: Wed, 5 Jun 2019 18:45:34 -0700 Message-Id: <20190606014544.8339-2-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny Signed-off-by: Ira Weiny Reviewed-by: Jeff Layton --- fs/locks.c | 20 ++++++++++++++----- include/trace/events/filelock.h | 35 +++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index ec1e4a5df629..0cc2b9f30e22 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1534,11 +1534,21 @@ static void time_out_leases(struct inode *inode, struct list_head *dispose) static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { - if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) - return false; - if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) - return false; - return locks_conflict(breaker, lease); + bool rc; + + if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { + rc = false; + goto trace; + } + if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) { + rc = false; + goto trace; + } + + rc = locks_conflict(breaker, lease); +trace: + trace_leases_conflict(rc, lease, breaker); + return rc; } static bool diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index fad7befa612d..4b735923f2ff 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -203,6 +203,41 @@ TRACE_EVENT(generic_add_lease, show_fl_type(__entry->fl_type)) ); +TRACE_EVENT(leases_conflict, + TP_PROTO(bool conflict, struct file_lock *lease, struct file_lock *breaker), + + TP_ARGS(conflict, lease, breaker), + + TP_STRUCT__entry( + __field(void *, lease) + __field(void *, breaker) + __field(unsigned int, l_fl_flags) + __field(unsigned int, b_fl_flags) + __field(unsigned char, l_fl_type) + __field(unsigned char, b_fl_type) + __field(bool, conflict) + ), + + TP_fast_assign( + __entry->lease = lease; + __entry->l_fl_flags = lease->fl_flags; + __entry->l_fl_type = lease->fl_type; + __entry->breaker = breaker; + __entry->b_fl_flags = breaker->fl_flags; + __entry->b_fl_type = breaker->fl_type; + __entry->conflict = conflict; + ), + + TP_printk("conflict %d: lease=0x%p fl_flags=%s fl_type=%s; breaker=0x%p fl_flags=%s fl_type=%s", + __entry->conflict, + __entry->lease, + show_fl_flags(__entry->l_fl_flags), + show_fl_type(__entry->l_fl_type), + __entry->breaker, + show_fl_flags(__entry->b_fl_flags), + show_fl_type(__entry->b_fl_type)) +); + #endif /* _TRACE_FILELOCK_H */ /* This part must be outside protection */ From patchwork Thu Jun 6 01:45:35 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110853 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7m95X7rz9sNT for ; Thu, 6 Jun 2019 11:46:01 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726757AbfFFBpO (ORCPT ); Wed, 5 Jun 2019 21:45:14 -0400 Received: from mga03.intel.com ([134.134.136.65]:36140 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726589AbfFFBpM (ORCPT ); Wed, 5 Jun 2019 21:45:12 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:11 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:11 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 02/10] fs/locks: Export F_LAYOUT lease to user space Date: Wed, 5 Jun 2019 18:45:35 -0700 Message-Id: <20190606014544.8339-3-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny GUP longterm pins of non-pagecache file system pages (eg FS DAX) are currently disallowed because they are unsafe. The danger for pinning these pages comes from the fact that hole punch and/or truncate of those files results in the pages being mapped and pinned by a user space process while DAX has potentially allocated those pages to other processes. Most (All) users who are mapping FS DAX pages for long term pin purposes (such as RDMA) are not going to want to deallocate these pages while those pages are in use. To do so would mean the application would lose data. So the use case for allowing truncate operations of such pages is limited. However, the kernel must protect itself and users from potential mistakes and/or malicious user space code. Rather than disabling long term pins as is done now. Allow for users who know they are going to be pinning this memory to alert the file system of this intention. Furthermore, allow users to be alerted such that they can react if a truncate operation occurs for some reason. Example user space pseudocode for a user using RDMA and wanting to allow a truncate would look like this: lease_break_sigio_handler() { ... if (sigio.fd == rdma_fd) { complete_rdma_operations(...); ibv_dereg_mr(mr); close(rdma_fd); fcntl(rdma_fd, F_SETLEASE, F_UNLCK); } } setup_rdma_to_dax_file() { ... rdma_fd = open(...) fcntl(rdma_fd, F_SETLEASE, F_LAYOUT); sigaction(SIGIO, ... lease_break ...); ptr = mmap(rdma_fd, ...); mr = ibv_reg_mr(ptr, ...); do_rdma_stuff(...); } Follow on patches implement the notification of the lease holder on truncate as well as failing the truncate if the GUP pin is not released. This first patch exports the F_LAYOUT lease type and allows the user to set and get it. After the complete series: 1) Failure to obtain a F_LAYOUT lease on an open FS DAX file will result in a failure to GUP pin any pages in that file. An example of a call which results in GUP pin is ibv_reg_mr(). 2) While the GUP pin is in place (eg MR is in use) truncates of the affected pages will fail. 3) If the user registers a sigaction they will be notified of the truncate so they can react. Failure to react will result in the lease being revoked after /lease-break-time seconds. After this time new GUP pins will fail without a new lease being taken. 4) A truncate will work if the pages being truncated are not actively pinned at the time of truncate. Attempts to pin these pages after will result in a failure. Signed-off-by: Ira Weiny --- fs/locks.c | 36 +++++++++++++++++++++++++++----- include/linux/fs.h | 2 +- include/uapi/asm-generic/fcntl.h | 3 +++ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 0cc2b9f30e22..de9761c068de 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -191,6 +191,8 @@ static int target_leasetype(struct file_lock *fl) return F_UNLCK; if (fl->fl_flags & FL_DOWNGRADE_PENDING) return F_RDLCK; + if (fl->fl_flags & FL_LAYOUT) + return F_LAYOUT; return fl->fl_type; } @@ -611,7 +613,8 @@ static const struct lock_manager_operations lease_manager_ops = { /* * Initialize a lease, use the default lock manager operations */ -static int lease_init(struct file *filp, long type, struct file_lock *fl) +static int lease_init(struct file *filp, long type, unsigned int flags, + struct file_lock *fl) { if (assign_type(fl, type) != 0) return -EINVAL; @@ -621,6 +624,8 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) fl->fl_file = filp; fl->fl_flags = FL_LEASE; + if (flags & FL_LAYOUT) + fl->fl_flags |= FL_LAYOUT; fl->fl_start = 0; fl->fl_end = OFFSET_MAX; fl->fl_ops = NULL; @@ -629,7 +634,8 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl) } /* Allocate a file_lock initialised to this type of lease */ -static struct file_lock *lease_alloc(struct file *filp, long type) +static struct file_lock *lease_alloc(struct file *filp, long type, + unsigned int flags) { struct file_lock *fl = locks_alloc_lock(); int error = -ENOMEM; @@ -637,7 +643,7 @@ static struct file_lock *lease_alloc(struct file *filp, long type) if (fl == NULL) return ERR_PTR(error); - error = lease_init(filp, type, fl); + error = lease_init(filp, type, flags, fl); if (error) { locks_free_lock(fl); return ERR_PTR(error); @@ -1588,7 +1594,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) int want_write = (mode & O_ACCMODE) != O_RDONLY; LIST_HEAD(dispose); - new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); + new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK, 0); if (IS_ERR(new_fl)) return PTR_ERR(new_fl); new_fl->fl_flags = type; @@ -1725,6 +1731,8 @@ EXPORT_SYMBOL(lease_get_mtime); * * %F_UNLCK to indicate no lease is held. * + * %F_LAYOUT to indicate a layout lease is held. + * * (if a lease break is pending): * * %F_RDLCK to indicate an exclusive lease needs to be @@ -2015,8 +2023,26 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg) struct file_lock *fl; struct fasync_struct *new; int error; + unsigned int flags = 0; + + /* + * NOTE on F_LAYOUT lease + * + * LAYOUT lease types are taken on files which the user knows that + * they will be pinning in memory for some indeterminate amount of + * time. Such as for use with RDMA. While we don't know what user + * space is going to do with the file we still use a F_RDLOCK level of + * lease. This ensures that there are no conflicts between + * 2 users. The conflict should only come from the File system wanting + * to revoke the lease in break_layout() And this is done by using + * F_WRLCK in the break code. + */ + if (arg == F_LAYOUT) { + arg = F_RDLCK; + flags = FL_LAYOUT; + } - fl = lease_alloc(filp, arg); + fl = lease_alloc(filp, arg, flags); if (IS_ERR(fl)) return PTR_ERR(fl); diff --git a/include/linux/fs.h b/include/linux/fs.h index f7fdfe93e25d..9e9d8d35ee93 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -998,7 +998,7 @@ static inline struct file *get_file(struct file *f) #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ -#define FL_LAYOUT 2048 /* outstanding pNFS layout */ +#define FL_LAYOUT 2048 /* outstanding pNFS layout or user held pin */ #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 9dc0bf0c5a6e..baddd54f3031 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -174,6 +174,9 @@ struct f_owner_ex { #define F_SHLCK 8 /* or 4 */ #endif +#define F_LAYOUT 16 /* layout lease to allow longterm pins such as + RDMA */ + /* operations for bsd flock(), also used by the kernel implementation */ #define LOCK_SH 1 /* shared lock */ #define LOCK_EX 2 /* exclusive lock */ From patchwork Thu Jun 6 01:45:36 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110852 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7m73FX1z9sND for ; Thu, 6 Jun 2019 11:45:59 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726784AbfFFBpO (ORCPT ); Wed, 5 Jun 2019 21:45:14 -0400 Received: from mga03.intel.com ([134.134.136.65]:36143 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726741AbfFFBpO (ORCPT ); Wed, 5 Jun 2019 21:45:14 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:13 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:12 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 03/10] mm/gup: Pass flags down to __gup_device_huge* calls Date: Wed, 5 Jun 2019 18:45:36 -0700 Message-Id: <20190606014544.8339-4-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny In order to support checking for a layout lease on a FS DAX inode these calls need to know if FOLL_LONGTERM was specified. Prepare for this with this patch. Signed-off-by: Ira Weiny --- mm/gup.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index a3fb48605836..26a7a3a3a657 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1939,7 +1939,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, #if defined(__HAVE_ARCH_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE) static int __gup_device_huge(unsigned long pfn, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { int nr_start = *nr; struct dev_pagemap *pgmap = NULL; @@ -1969,30 +1970,33 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, } static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { unsigned long fault_pfn; int nr_start = *nr; fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - if (!__gup_device_huge(fault_pfn, addr, end, pages, nr)) + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags)) return 0; if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { undo_dev_pagemap(nr, nr_start, pages); return 0; } + return 1; } static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { unsigned long fault_pfn; int nr_start = *nr; fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - if (!__gup_device_huge(fault_pfn, addr, end, pages, nr)) + if (!__gup_device_huge(fault_pfn, addr, end, pages, nr, flags)) return 0; if (unlikely(pud_val(orig) != pud_val(*pudp))) { @@ -2003,14 +2007,16 @@ static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, } #else static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { BUILD_BUG(); return 0; } static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr, - unsigned long end, struct page **pages, int *nr) + unsigned long end, struct page **pages, int *nr, + unsigned int flags) { BUILD_BUG(); return 0; @@ -2029,7 +2035,8 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, if (pmd_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; - return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr); + return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr, + flags); } refs = 0; @@ -2072,7 +2079,8 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, if (pud_devmap(orig)) { if (unlikely(flags & FOLL_LONGTERM)) return 0; - return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr); + return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr, + flags); } refs = 0; From patchwork Thu Jun 6 01:45:37 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110851 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7m25rwwz9sND for ; Thu, 6 Jun 2019 11:45:54 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726817AbfFFBpQ (ORCPT ); Wed, 5 Jun 2019 21:45:16 -0400 Received: from mga03.intel.com ([134.134.136.65]:36145 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726805AbfFFBpQ (ORCPT ); Wed, 5 Jun 2019 21:45:16 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:15 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:14 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 04/10] mm/gup: Ensure F_LAYOUT lease is held prior to GUP'ing pages Date: Wed, 5 Jun 2019 18:45:37 -0700 Message-Id: <20190606014544.8339-5-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny On FS DAX files users must inform the file system they intend to take long term GUP pins on the file pages. Failure to do so should result in an error. Ensure that a F_LAYOUT lease exists at the time the GUP call is made. If not return EPERM. Signed-off-by: Ira Weiny --- fs/locks.c | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 2 ++ mm/gup.c | 25 +++++++++++++++++++++++++ mm/huge_memory.c | 12 ++++++++++++ 4 files changed, 80 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index de9761c068de..43f5dc97652c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2945,3 +2945,44 @@ static int __init filelock_init(void) return 0; } core_initcall(filelock_init); + +/** + * mapping_inode_has_layout() + * @page page we are trying to GUP + * + * This should only be called on DAX pages. DAX pages which are mapped through + * FS DAX do not use the page cache. As a result they require the user to take + * a LAYOUT lease on them prior to be able to pin them for longterm use. + * This allows the user to opt-into the fact that truncation operations will + * fail for the duration of the pin. + * + * @Return true if the page has a LAYOUT lease associated with it's file. + */ +bool mapping_inode_has_layout(struct page *page) +{ + bool ret = false; + struct inode *inode; + struct file_lock *fl; + struct file_lock_context *ctx; + + if (WARN_ON(PageAnon(page)) || + WARN_ON(!page) || + WARN_ON(!page->mapping) || + WARN_ON(!page->mapping->host)) + return false; + + inode = page->mapping->host; + + ctx = locks_get_lock_context(inode, F_RDLCK); + spin_lock(&ctx->flc_lock); + list_for_each_entry(fl, &ctx->flc_lease, fl_list) { + if (fl->fl_flags & FL_LAYOUT) { + ret = true; + break; + } + } + spin_unlock(&ctx->flc_lock); + + return ret; +} +EXPORT_SYMBOL_GPL(mapping_inode_has_layout); diff --git a/include/linux/mm.h b/include/linux/mm.h index bc373a9b69fc..432b004b920c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1630,6 +1630,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int get_user_pages_fast(unsigned long start, int nr_pages, unsigned int gup_flags, struct page **pages); +bool mapping_inode_has_layout(struct page *page); + /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ diff --git a/mm/gup.c b/mm/gup.c index 26a7a3a3a657..d06cc5b14c0b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -361,6 +361,13 @@ static struct page *follow_page_pte(struct vm_area_struct *vma, page = pte_page(pte); else goto no_page; + + if (unlikely(flags & FOLL_LONGTERM) && + (*pgmap)->type == MEMORY_DEVICE_FS_DAX && + !mapping_inode_has_layout(page)) { + page = ERR_PTR(-EPERM); + goto out; + } } else if (unlikely(!page)) { if (flags & FOLL_DUMP) { /* Avoid special (like zero) pages in core dumps */ @@ -1905,6 +1912,16 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, VM_BUG_ON_PAGE(compound_head(page) != head, page); + if (pte_devmap(pte) && + unlikely(flags & FOLL_LONGTERM) && + pgmap->type == MEMORY_DEVICE_FS_DAX && + !mapping_inode_has_layout(head)) { + mod_node_page_state(page_pgdat(head), + NR_GUP_FAST_PAGE_BACKOFFS, 1); + put_user_page(head); + goto pte_unmap; + } + SetPageReferenced(page); pages[*nr] = page; (*nr)++; @@ -1955,6 +1972,14 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr, } SetPageReferenced(page); pages[*nr] = page; + + if (unlikely(flags & FOLL_LONGTERM) && + pgmap->type == MEMORY_DEVICE_FS_DAX && + !mapping_inode_has_layout(page)) { + undo_dev_pagemap(nr, nr_start, pages); + return 0; + } + if (try_get_gup_pin_page(page, NR_GUP_FAST_PAGES_REQUESTED)) { undo_dev_pagemap(nr, nr_start, pages); return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index bb7fd7fa6f77..cdc213e50902 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -950,6 +950,12 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); + + if (unlikely(flags & FOLL_LONGTERM) && + (*pgmap)->type == MEMORY_DEVICE_FS_DAX && + !mapping_inode_has_layout(page)) + return ERR_PTR(-EPERM); + if (unlikely(!try_get_gup_pin_page(page, NR_GUP_SLOW_PAGES_REQUESTED))) page = ERR_PTR(-ENOMEM); @@ -1092,6 +1098,12 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, if (!*pgmap) return ERR_PTR(-EFAULT); page = pfn_to_page(pfn); + + if (unlikely(flags & FOLL_LONGTERM) && + (*pgmap)->type == MEMORY_DEVICE_FS_DAX && + !mapping_inode_has_layout(page)) + return ERR_PTR(-EPERM); + if (unlikely(!try_get_gup_pin_page(page, NR_GUP_SLOW_PAGES_REQUESTED))) page = ERR_PTR(-ENOMEM); From patchwork Thu Jun 6 01:45:38 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110849 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7ly4sbZz9sNp for ; Thu, 6 Jun 2019 11:45:50 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726843AbfFFBpV (ORCPT ); Wed, 5 Jun 2019 21:45:21 -0400 Received: from mga03.intel.com ([134.134.136.65]:36145 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726823AbfFFBpR (ORCPT ); Wed, 5 Jun 2019 21:45:17 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:17 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:16 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 05/10] fs/ext4: Teach ext4 to break layout leases Date: Wed, 5 Jun 2019 18:45:38 -0700 Message-Id: <20190606014544.8339-6-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny ext4 needs to break a layout lease if it is held to inform a user holding a layout lease that a truncate is about to happen. This allows the user knowledge of, and choice in how to handle, some other thread attempting to modify a file they are actively using. Split out the logic to determine if a mapping is DAX, export it, and then break layout leases if a mapping is DAX. Signed-off-by: Ira Weiny --- fs/dax.c | 23 ++++++++++++++++------- fs/ext4/inode.c | 4 ++++ include/linux/dax.h | 6 ++++++ 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index f74386293632..29ff3b683657 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -552,6 +552,21 @@ static void *grab_mapping_entry(struct xa_state *xas, return xa_mk_internal(VM_FAULT_FALLBACK); } +bool dax_mapping_is_dax(struct address_space *mapping) +{ + /* + * In the 'limited' case get_user_pages() for dax is disabled. + */ + if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) + return false; + + if (!dax_mapping(mapping) || !mapping_mapped(mapping)) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(dax_mapping_is_dax); + /** * dax_layout_busy_page - find first pinned page in @mapping * @mapping: address space to scan for a page with ref count > 1 @@ -574,13 +589,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) unsigned int scanned = 0; struct page *page = NULL; - /* - * In the 'limited' case get_user_pages() for dax is disabled. - */ - if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) - return NULL; - - if (!dax_mapping(mapping) || !mapping_mapped(mapping)) + if (!dax_mapping_is_dax(mapping)) return NULL; /* diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c16071547c9c..c7c99f51961f 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4241,6 +4241,10 @@ int ext4_break_layouts(struct inode *inode) if (WARN_ON_ONCE(!rwsem_is_locked(&ei->i_mmap_sem))) return -EINVAL; + /* Break layout leases if active */ + if (dax_mapping_is_dax(inode->i_mapping)) + break_layout(inode, true); + do { page = dax_layout_busy_page(inode->i_mapping); if (!page) diff --git a/include/linux/dax.h b/include/linux/dax.h index becaea5f4488..ee6cbd56ddc4 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -106,6 +106,7 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc); +bool dax_mapping_is_dax(struct address_space *mapping); struct page *dax_layout_busy_page(struct address_space *mapping); dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); @@ -137,6 +138,11 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev) return NULL; } +bool dax_mapping_is_dax(struct address_space *mapping) +{ + return false; +} + static inline struct page *dax_layout_busy_page(struct address_space *mapping) { return NULL; From patchwork Thu Jun 6 01:45:39 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110850 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7m04m76z9sND for ; Thu, 6 Jun 2019 11:45:52 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726830AbfFFBpU (ORCPT ); Wed, 5 Jun 2019 21:45:20 -0400 Received: from mga03.intel.com ([134.134.136.65]:36145 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726658AbfFFBpU (ORCPT ); Wed, 5 Jun 2019 21:45:20 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:19 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:18 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 06/10] fs/ext4: Teach dax_layout_busy_page() to operate on a sub-range Date: Wed, 5 Jun 2019 18:45:39 -0700 Message-Id: <20190606014544.8339-7-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny Callers of dax_layout_busy_page() are only rarely operating on the entire file of concern. Teach dax_layout_busy_page() to operate on a sub-range of the address_space provided. Specifying 0 - ULONG_MAX however, will continue to operate on the "entire file" and XFS is split out to a separate patch by this method. This could potentially speed up dax_layout_busy_page() as well. Signed-off-by: Ira Weiny --- fs/dax.c | 15 +++++++++++---- fs/ext4/ext4.h | 2 +- fs/ext4/extents.c | 6 +++--- fs/ext4/inode.c | 19 ++++++++++++------- fs/xfs/xfs_file.c | 3 ++- include/linux/dax.h | 3 ++- 6 files changed, 31 insertions(+), 17 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 29ff3b683657..abd77b184879 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -568,8 +568,11 @@ bool dax_mapping_is_dax(struct address_space *mapping) EXPORT_SYMBOL_GPL(dax_mapping_is_dax); /** - * dax_layout_busy_page - find first pinned page in @mapping + * dax_layout_busy_page - find first pinned page in @mapping within + * the range @off - @off + @len * @mapping: address space to scan for a page with ref count > 1 + * @off: offset to start at + * @len: length to scan through * * DAX requires ZONE_DEVICE mapped pages. These pages are never * 'onlined' to the page allocator so they are considered idle when @@ -582,9 +585,13 @@ EXPORT_SYMBOL_GPL(dax_mapping_is_dax); * to be able to run unmap_mapping_range() and subsequently not race * mapping_mapped() becoming true. */ -struct page *dax_layout_busy_page(struct address_space *mapping) +struct page *dax_layout_busy_page(struct address_space *mapping, + loff_t off, loff_t len) { - XA_STATE(xas, &mapping->i_pages, 0); + unsigned long start_idx = off >> PAGE_SHIFT; + unsigned long end_idx = (len == ULONG_MAX) ? ULONG_MAX + : start_idx + (len >> PAGE_SHIFT); + XA_STATE(xas, &mapping->i_pages, start_idx); void *entry; unsigned int scanned = 0; struct page *page = NULL; @@ -607,7 +614,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping) unmap_mapping_range(mapping, 0, 0, 1); xas_lock_irq(&xas); - xas_for_each(&xas, entry, ULONG_MAX) { + xas_for_each(&xas, entry, end_idx) { if (WARN_ON_ONCE(!xa_is_value(entry))) continue; if (unlikely(dax_is_locked(entry))) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1cb67859e051..ba5920c21023 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2530,7 +2530,7 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_inode_attach_jinode(struct inode *inode); extern int ext4_can_truncate(struct inode *inode); extern int ext4_truncate(struct inode *); -extern int ext4_break_layouts(struct inode *); +extern int ext4_break_layouts(struct inode *inode, loff_t offset, loff_t len); extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d40ed940001e..9ddb117d8beb 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4736,7 +4736,7 @@ static long ext4_zero_range(struct file *file, loff_t offset, */ down_write(&EXT4_I(inode)->i_mmap_sem); - ret = ext4_break_layouts(inode); + ret = ext4_break_layouts(inode, offset, len); if (ret) { up_write(&EXT4_I(inode)->i_mmap_sem); goto out_mutex; @@ -5419,7 +5419,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) */ down_write(&EXT4_I(inode)->i_mmap_sem); - ret = ext4_break_layouts(inode); + ret = ext4_break_layouts(inode, offset, len); if (ret) goto out_mmap; @@ -5572,7 +5572,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len) */ down_write(&EXT4_I(inode)->i_mmap_sem); - ret = ext4_break_layouts(inode); + ret = ext4_break_layouts(inode, offset, len); if (ret) goto out_mmap; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c7c99f51961f..75f543f384e4 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4232,7 +4232,7 @@ static void ext4_wait_dax_page(struct ext4_inode_info *ei) down_write(&ei->i_mmap_sem); } -int ext4_break_layouts(struct inode *inode) +int ext4_break_layouts(struct inode *inode, loff_t offset, loff_t len) { struct ext4_inode_info *ei = EXT4_I(inode); struct page *page; @@ -4246,7 +4246,7 @@ int ext4_break_layouts(struct inode *inode) break_layout(inode, true); do { - page = dax_layout_busy_page(inode->i_mapping); + page = dax_layout_busy_page(inode->i_mapping, offset, len); if (!page) return 0; @@ -4333,7 +4333,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) */ down_write(&EXT4_I(inode)->i_mmap_sem); - ret = ext4_break_layouts(inode); + ret = ext4_break_layouts(inode, offset, length); if (ret) goto out_dio; @@ -5605,10 +5605,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) down_write(&EXT4_I(inode)->i_mmap_sem); - rc = ext4_break_layouts(inode); - if (rc) { - up_write(&EXT4_I(inode)->i_mmap_sem); - return rc; + if (shrink) { + loff_t off = attr->ia_size; + loff_t len = inode->i_size - attr->ia_size; + + rc = ext4_break_layouts(inode, off, len); + if (rc) { + up_write(&EXT4_I(inode)->i_mmap_sem); + return rc; + } } if (attr->ia_size != inode->i_size) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 76748255f843..ebddf911644c 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -746,7 +746,8 @@ xfs_break_dax_layouts( ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); - page = dax_layout_busy_page(inode->i_mapping); + /* We default to the "whole file" */ + page = dax_layout_busy_page(inode->i_mapping, 0, ULONG_MAX); if (!page) return 0; diff --git a/include/linux/dax.h b/include/linux/dax.h index ee6cbd56ddc4..3c3ab8dd76c6 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -107,7 +107,8 @@ int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc); bool dax_mapping_is_dax(struct address_space *mapping); -struct page *dax_layout_busy_page(struct address_space *mapping); +struct page *dax_layout_busy_page(struct address_space *mapping, + loff_t off, loff_t len); dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); #else From patchwork Thu Jun 6 01:45:40 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110848 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7lq1mhjz9sND for ; Thu, 6 Jun 2019 11:45:43 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726984AbfFFBpl (ORCPT ); Wed, 5 Jun 2019 21:45:41 -0400 Received: from mga03.intel.com ([134.134.136.65]:36145 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726863AbfFFBpV (ORCPT ); Wed, 5 Jun 2019 21:45:21 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:21 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:20 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 07/10] fs/ext4: Fail truncate if pages are GUP pinned Date: Wed, 5 Jun 2019 18:45:40 -0700 Message-Id: <20190606014544.8339-8-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny If pages are actively gup pinned fail the truncate operation. Signed-off-by: Ira Weiny --- fs/ext4/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 75f543f384e4..1ded83ec08c0 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4250,6 +4250,9 @@ int ext4_break_layouts(struct inode *inode, loff_t offset, loff_t len) if (!page) return 0; + if (page_gup_pinned(page)) + return -ETXTBSY; + error = ___wait_var_event(&page->_refcount, atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 0, 0, From patchwork Thu Jun 6 01:45:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110847 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7lm43TLz9sNm for ; Thu, 6 Jun 2019 11:45:40 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726887AbfFFBpY (ORCPT ); Wed, 5 Jun 2019 21:45:24 -0400 Received: from mga03.intel.com ([134.134.136.65]:36145 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726823AbfFFBpY (ORCPT ); Wed, 5 Jun 2019 21:45:24 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:23 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:22 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 08/10] fs/xfs: Teach xfs to use new dax_layout_busy_page() Date: Wed, 5 Jun 2019 18:45:41 -0700 Message-Id: <20190606014544.8339-9-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny dax_layout_busy_page() can now operate on a sub-range of the address_space provided. Have xfs specify the sub range to dax_layout_busy_page() Signed-off-by: Ira Weiny --- fs/xfs/xfs_file.c | 19 +++++++++++++------ fs/xfs/xfs_inode.h | 5 +++-- fs/xfs/xfs_ioctl.c | 15 ++++++++++++--- fs/xfs/xfs_iops.c | 14 ++++++++++---- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ebddf911644c..350eb5546d36 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -300,7 +300,11 @@ xfs_file_aio_write_checks( if (error <= 0) return error; - error = xfs_break_layouts(inode, iolock, BREAK_WRITE); + /* + * BREAK_WRITE ignores offset/len tuple just specify the whole file + * (0 - ULONG_MAX to be safe. + */ + error = xfs_break_layouts(inode, iolock, 0, ULONG_MAX, BREAK_WRITE); if (error) return error; @@ -740,14 +744,15 @@ xfs_wait_dax_page( static int xfs_break_dax_layouts( struct inode *inode, - bool *retry) + bool *retry, + loff_t off, + loff_t len) { struct page *page; ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); - /* We default to the "whole file" */ - page = dax_layout_busy_page(inode->i_mapping, 0, ULONG_MAX); + page = dax_layout_busy_page(inode->i_mapping, off, len); if (!page) return 0; @@ -761,6 +766,8 @@ int xfs_break_layouts( struct inode *inode, uint *iolock, + loff_t off, + loff_t len, enum layout_break_reason reason) { bool retry; @@ -772,7 +779,7 @@ xfs_break_layouts( retry = false; switch (reason) { case BREAK_UNMAP: - error = xfs_break_dax_layouts(inode, &retry); + error = xfs_break_dax_layouts(inode, &retry, off, len); if (error || retry) break; /* fall through */ @@ -814,7 +821,7 @@ xfs_file_fallocate( return -EOPNOTSUPP; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); + error = xfs_break_layouts(inode, &iolock, offset, len, BREAK_UNMAP); if (error) goto out_unlock; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 558173f95a03..1b0948f5267c 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -475,8 +475,9 @@ enum xfs_prealloc_flags { int xfs_update_prealloc_flags(struct xfs_inode *ip, enum xfs_prealloc_flags flags); -int xfs_break_layouts(struct inode *inode, uint *iolock, - enum layout_break_reason reason); +int xfs_break_layouts(struct inode *inode, uint *iolock, + loff_t off, loff_t len, + enum layout_break_reason reason); /* from xfs_iops.c */ extern void xfs_setup_inode(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index d7dfc13f30f5..a702e44a63b8 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -605,6 +605,7 @@ xfs_ioc_space( enum xfs_prealloc_flags flags = 0; uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; int error; + loff_t break_length; if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -EPERM; @@ -625,9 +626,6 @@ xfs_ioc_space( return error; xfs_ilock(ip, iolock); - error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); - if (error) - goto out_unlock; switch (bf->l_whence) { case 0: /*SEEK_SET*/ @@ -673,6 +671,17 @@ xfs_ioc_space( goto out_unlock; } + /* break layout for the whole file if len ends up 0 */ + if (bf->l_len == 0) + break_length = ULONG_MAX; + else + break_length = bf->l_len; + + error = xfs_break_layouts(inode, &iolock, bf->l_start, break_length, + BREAK_UNMAP); + if (error) + goto out_unlock; + switch (cmd) { case XFS_IOC_ZERO_RANGE: flags |= XFS_PREALLOC_SET; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 74047bd0c1ae..5529bc7a516b 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1052,10 +1052,16 @@ xfs_vn_setattr( xfs_ilock(ip, XFS_MMAPLOCK_EXCL); iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; - error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); - if (error) { - xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); - return error; + if (iattr->ia_size < inode->i_size) { + loff_t off = iattr->ia_size; + loff_t len = inode->i_size - iattr->ia_size; + + error = xfs_break_layouts(inode, &iolock, off, len, + BREAK_UNMAP); + if (error) { + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + return error; + } } error = xfs_vn_setattr_size(dentry, iattr); From patchwork Thu Jun 6 01:45:42 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110846 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7lj4CFvz9s4Y for ; Thu, 6 Jun 2019 11:45:37 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726937AbfFFBp0 (ORCPT ); Wed, 5 Jun 2019 21:45:26 -0400 Received: from mga03.intel.com ([134.134.136.65]:36145 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726922AbfFFBp0 (ORCPT ); Wed, 5 Jun 2019 21:45:26 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:25 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:24 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 09/10] fs/xfs: Fail truncate if pages are GUP pinned Date: Wed, 5 Jun 2019 18:45:42 -0700 Message-Id: <20190606014544.8339-10-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny If pages are actively gup pinned fail the truncate operation. To support an application who wishes to removing a pin upon SIGIO reception we must change the order of breaking layout leases with respect to DAX layout leases. Check for a GUP pin on the page being truncated and return ETXTBSY if it is GUP pinned. Change the order of XFS break leased layouts and break DAX layouts. Select EXPORT_BLOCK_OPS for FS_DAX to ensure that xfs_break_lease_layouts() is defined for FS_DAX as well as pNFS. Update comment for xfs_break_lease_layouts() Signed-off-by: Ira Weiny --- fs/Kconfig | 1 + fs/xfs/xfs_file.c | 8 ++++++-- fs/xfs/xfs_pnfs.c | 14 +++++++------- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/Kconfig b/fs/Kconfig index f1046cf6ad85..c54b0b88abbf 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -49,6 +49,7 @@ config FS_DAX select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED) select FS_IOMAP select DAX + select EXPORTFS_BLOCK_OPS help Direct Access (DAX) can be used on memory-backed block devices. If the block device supports DAX and the filesystem supports DAX, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 350eb5546d36..1dc61c98f7cd 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -756,6 +756,9 @@ xfs_break_dax_layouts( if (!page) return 0; + if (page_gup_pinned(page)) + return -ETXTBSY; + *retry = true; return ___wait_var_event(&page->_refcount, atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, @@ -779,10 +782,11 @@ xfs_break_layouts( retry = false; switch (reason) { case BREAK_UNMAP: - error = xfs_break_dax_layouts(inode, &retry, off, len); + error = xfs_break_leased_layouts(inode, iolock, &retry); if (error || retry) break; - /* fall through */ + error = xfs_break_dax_layouts(inode, &retry, off, len); + break; case BREAK_WRITE: error = xfs_break_leased_layouts(inode, iolock, &retry); break; diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index bde2c9f56a46..e70d24d12cbf 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -21,14 +21,14 @@ #include "xfs_pnfs.h" /* - * Ensure that we do not have any outstanding pNFS layouts that can be used by - * clients to directly read from or write to this inode. This must be called - * before every operation that can remove blocks from the extent map. - * Additionally we call it during the write operation, where aren't concerned - * about exposing unallocated blocks but just want to provide basic + * Ensure that we do not have any outstanding pNFS or longterm GUP layouts that + * can be used by clients to directly read from or write to this inode. This + * must be called before every operation that can remove blocks from the extent + * map. Additionally we call it during the write operation, where aren't + * concerned about exposing unallocated blocks but just want to provide basic * synchronization between a local writer and pNFS clients. mmap writes would - * also benefit from this sort of synchronization, but due to the tricky locking - * rules in the page fault path we don't bother. + * also benefit from this sort of synchronization, but due to the tricky + * locking rules in the page fault path we don't bother. */ int xfs_break_leased_layouts( From patchwork Thu Jun 6 01:45:43 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ira Weiny X-Patchwork-Id: 1110845 Return-Path: X-Original-To: patchwork-incoming@ozlabs.org Delivered-To: patchwork-incoming@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=linux-ext4-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=intel.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 45K7lb3PYLz9sNf for ; Thu, 6 Jun 2019 11:45:31 +1000 (AEST) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1726965AbfFFBp2 (ORCPT ); Wed, 5 Jun 2019 21:45:28 -0400 Received: from mga03.intel.com ([134.134.136.65]:36145 "EHLO mga03.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726950AbfFFBp2 (ORCPT ); Wed, 5 Jun 2019 21:45:28 -0400 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 05 Jun 2019 18:45:27 -0700 X-ExtLoop1: 1 Received: from iweiny-desk2.sc.intel.com ([10.3.52.157]) by orsmga002.jf.intel.com with ESMTP; 05 Jun 2019 18:45:26 -0700 From: ira.weiny@intel.com To: Dan Williams , Jan Kara , "Theodore Ts'o" , Jeff Layton , Dave Chinner Cc: Ira Weiny , Matthew Wilcox , linux-xfs@vger.kernel.org, Andrew Morton , John Hubbard , =?utf-8?b?SsOpcsO0bWUgR2xpc3Nl?= , linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org, linux-nvdimm@lists.01.org, linux-ext4@vger.kernel.org, linux-mm@kvack.org Subject: [PATCH RFC 10/10] mm/gup: Remove FOLL_LONGTERM DAX exclusion Date: Wed, 5 Jun 2019 18:45:43 -0700 Message-Id: <20190606014544.8339-11-ira.weiny@intel.com> X-Mailer: git-send-email 2.20.1 In-Reply-To: <20190606014544.8339-1-ira.weiny@intel.com> References: <20190606014544.8339-1-ira.weiny@intel.com> MIME-Version: 1.0 Sender: linux-ext4-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-ext4@vger.kernel.org From: Ira Weiny Now that there is a mechanism for users to safely take LONGTERM pins on FS DAX pages, remove the FS DAX exclusion from GUP with FOLL_LONGTERM. Special processing remains in effect for CONFIG_CMA Signed-off-by: Ira Weiny --- mm/gup.c | 78 ++++++-------------------------------------------------- 1 file changed, 8 insertions(+), 70 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index d06cc5b14c0b..4f6e5606b81e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1392,26 +1392,6 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, } EXPORT_SYMBOL(get_user_pages_remote); -#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA) -static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) -{ - long i; - struct vm_area_struct *vma_prev = NULL; - - for (i = 0; i < nr_pages; i++) { - struct vm_area_struct *vma = vmas[i]; - - if (vma == vma_prev) - continue; - - vma_prev = vma; - - if (vma_is_fsdax(vma)) - return true; - } - return false; -} - #ifdef CONFIG_CMA static struct page *new_non_cma_page(struct page *page, unsigned long private) { @@ -1542,18 +1522,6 @@ static long check_and_migrate_cma_pages(struct task_struct *tsk, return nr_pages; } -#else -static long check_and_migrate_cma_pages(struct task_struct *tsk, - struct mm_struct *mm, - unsigned long start, - unsigned long nr_pages, - struct page **pages, - struct vm_area_struct **vmas, - unsigned int gup_flags) -{ - return nr_pages; -} -#endif /* * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which @@ -1567,49 +1535,28 @@ static long __gup_longterm_locked(struct task_struct *tsk, struct vm_area_struct **vmas, unsigned int gup_flags) { - struct vm_area_struct **vmas_tmp = vmas; unsigned long flags = 0; - long rc, i; + long rc; - if (gup_flags & FOLL_LONGTERM) { - if (!pages) - return -EINVAL; - - if (!vmas_tmp) { - vmas_tmp = kcalloc(nr_pages, - sizeof(struct vm_area_struct *), - GFP_KERNEL); - if (!vmas_tmp) - return -ENOMEM; - } + if (flags & FOLL_LONGTERM) flags = memalloc_nocma_save(); - } rc = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, - vmas_tmp, NULL, gup_flags); + vmas, NULL, gup_flags); if (gup_flags & FOLL_LONGTERM) { memalloc_nocma_restore(flags); if (rc < 0) goto out; - if (check_dax_vmas(vmas_tmp, rc)) { - for (i = 0; i < rc; i++) - put_page(pages[i]); - rc = -EOPNOTSUPP; - goto out; - } - rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages, - vmas_tmp, gup_flags); + vmas, gup_flags); } out: - if (vmas_tmp != vmas) - kfree(vmas_tmp); return rc; } -#else /* !CONFIG_FS_DAX && !CONFIG_CMA */ +#else /* !CONFIG_CMA */ static __always_inline long __gup_longterm_locked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, @@ -1621,7 +1568,7 @@ static __always_inline long __gup_longterm_locked(struct task_struct *tsk, return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas, NULL, flags); } -#endif /* CONFIG_FS_DAX || CONFIG_CMA */ +#endif /* CONFIG_CMA */ /* * This is the same as get_user_pages_remote(), just with a @@ -1882,9 +1829,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, goto pte_unmap; if (pte_devmap(pte)) { - if (unlikely(flags & FOLL_LONGTERM)) - goto pte_unmap; - pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); @@ -2057,12 +2001,9 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr, if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) return 0; - if (pmd_devmap(orig)) { - if (unlikely(flags & FOLL_LONGTERM)) - return 0; + if (pmd_devmap(orig)) return __gup_device_huge_pmd(orig, pmdp, addr, end, pages, nr, flags); - } refs = 0; page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); @@ -2101,12 +2042,9 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr, if (!pud_access_permitted(orig, flags & FOLL_WRITE)) return 0; - if (pud_devmap(orig)) { - if (unlikely(flags & FOLL_LONGTERM)) - return 0; + if (pud_devmap(orig)) return __gup_device_huge_pud(orig, pudp, addr, end, pages, nr, flags); - } refs = 0; page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);