[v2,09/15] CIFS: SMBD: Support page offset in RDMA recv

Message ID 20180530194807.31657-10-longli@linuxonhyperv.com
State New
Headers show
Series
  • CIFS: Add direct I/O support
Related show

Commit Message

Long Li May 30, 2018, 7:48 p.m.
From: Long Li <longli@microsoft.com>

RDMA recv function needs to place data to the correct place starting at
page offset.

Signed-off-by: Long Li <longli@microsoft.com>
---
 fs/cifs/smbdirect.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

Comments

Tom Talpey June 24, 2018, 2:16 a.m. | #1
On 5/30/2018 3:48 PM, Long Li wrote:
> From: Long Li <longli@microsoft.com>
> 
> RDMA recv function needs to place data to the correct place starting at
> page offset.
> 
> Signed-off-by: Long Li <longli@microsoft.com>
> ---
>   fs/cifs/smbdirect.c | 18 +++++++++++-------
>   1 file changed, 11 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
> index 6141e3c..ba53c52 100644
> --- a/fs/cifs/smbdirect.c
> +++ b/fs/cifs/smbdirect.c
> @@ -2004,10 +2004,12 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf,
>    * return value: actual data read
>    */
>   static int smbd_recv_page(struct smbd_connection *info,
> -		struct page *page, unsigned int to_read)
> +		struct page *page, unsigned int page_offset,
> +		unsigned int to_read)
>   {
>   	int ret;
>   	char *to_address;
> +	void *page_address;
>   
>   	/* make sure we have the page ready for read */
>   	ret = wait_event_interruptible(
> @@ -2015,16 +2017,17 @@ static int smbd_recv_page(struct smbd_connection *info,
>   		info->reassembly_data_length >= to_read ||
>   			info->transport_status != SMBD_CONNECTED);
>   	if (ret)
> -		return 0;
> +		return ret;
>   
>   	/* now we can read from reassembly queue and not sleep */
> -	to_address = kmap_atomic(page);
> +	page_address = kmap_atomic(page);
> +	to_address = (char *) page_address + page_offset;
>   
>   	log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
>   		page, to_address, to_read);
>   
>   	ret = smbd_recv_buf(info, to_address, to_read);
> -	kunmap_atomic(to_address);
> +	kunmap_atomic(page_address);

Is "page" truly not mapped? This kmap/kunmap for each received 4KB is
very expensive. Is there not a way to keep a kva for the reassembly
queue segments?

>   
>   	return ret;
>   }
> @@ -2038,7 +2041,7 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
>   {
>   	char *buf;
>   	struct page *page;
> -	unsigned int to_read;
> +	unsigned int to_read, page_offset;
>   	int rc;
>   
>   	info->smbd_recv_pending++;
> @@ -2052,15 +2055,16 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
>   
>   	case READ | ITER_BVEC:
>   		page = msg->msg_iter.bvec->bv_page;
> +		page_offset = msg->msg_iter.bvec->bv_offset;
>   		to_read = msg->msg_iter.bvec->bv_len;
> -		rc = smbd_recv_page(info, page, to_read);
> +		rc = smbd_recv_page(info, page, page_offset, to_read);
>   		break;
>   
>   	default:
>   		/* It's a bug in upper layer to get there */
>   		cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
>   			msg->msg_iter.type);
> -		rc = -EIO;
> +		rc = -EINVAL;
>   	}
>   
>   	info->smbd_recv_pending--;
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Long Li June 25, 2018, 9:29 p.m. | #2
> Subject: Re: [Patch v2 09/15] CIFS: SMBD: Support page offset in RDMA recv
> 
> On 5/30/2018 3:48 PM, Long Li wrote:
> > From: Long Li <longli@microsoft.com>
> >
> > RDMA recv function needs to place data to the correct place starting
> > at page offset.
> >
> > Signed-off-by: Long Li <longli@microsoft.com>
> > ---
> >   fs/cifs/smbdirect.c | 18 +++++++++++-------
> >   1 file changed, 11 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index
> > 6141e3c..ba53c52 100644
> > --- a/fs/cifs/smbdirect.c
> > +++ b/fs/cifs/smbdirect.c
> > @@ -2004,10 +2004,12 @@ static int smbd_recv_buf(struct
> smbd_connection *info, char *buf,
> >    * return value: actual data read
> >    */
> >   static int smbd_recv_page(struct smbd_connection *info,
> > -		struct page *page, unsigned int to_read)
> > +		struct page *page, unsigned int page_offset,
> > +		unsigned int to_read)
> >   {
> >   	int ret;
> >   	char *to_address;
> > +	void *page_address;
> >
> >   	/* make sure we have the page ready for read */
> >   	ret = wait_event_interruptible(
> > @@ -2015,16 +2017,17 @@ static int smbd_recv_page(struct
> smbd_connection *info,
> >   		info->reassembly_data_length >= to_read ||
> >   			info->transport_status != SMBD_CONNECTED);
> >   	if (ret)
> > -		return 0;
> > +		return ret;
> >
> >   	/* now we can read from reassembly queue and not sleep */
> > -	to_address = kmap_atomic(page);
> > +	page_address = kmap_atomic(page);
> > +	to_address = (char *) page_address + page_offset;
> >
> >   	log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
> >   		page, to_address, to_read);
> >
> >   	ret = smbd_recv_buf(info, to_address, to_read);
> > -	kunmap_atomic(to_address);
> > +	kunmap_atomic(page_address);
> 
> Is "page" truly not mapped? This kmap/kunmap for each received 4KB is very
> expensive. Is there not a way to keep a kva for the reassembly queue
> segments?

I will find a way to not map those upper layer pages when doing RDMA receive. The reason for doing this is to use a common layer (that will greatly simplifies the code) for transport. It's probably a waste of time when only pages are involved, and not need to be mapped.

I will address those in a separate patch.

> 
> >
> >   	return ret;
> >   }
> > @@ -2038,7 +2041,7 @@ int smbd_recv(struct smbd_connection *info,
> struct msghdr *msg)
> >   {
> >   	char *buf;
> >   	struct page *page;
> > -	unsigned int to_read;
> > +	unsigned int to_read, page_offset;
> >   	int rc;
> >
> >   	info->smbd_recv_pending++;
> > @@ -2052,15 +2055,16 @@ int smbd_recv(struct smbd_connection *info,
> > struct msghdr *msg)
> >
> >   	case READ | ITER_BVEC:
> >   		page = msg->msg_iter.bvec->bv_page;
> > +		page_offset = msg->msg_iter.bvec->bv_offset;
> >   		to_read = msg->msg_iter.bvec->bv_len;
> > -		rc = smbd_recv_page(info, page, to_read);
> > +		rc = smbd_recv_page(info, page, page_offset, to_read);
> >   		break;
> >
> >   	default:
> >   		/* It's a bug in upper layer to get there */
> >   		cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
> >   			msg->msg_iter.type);
> > -		rc = -EIO;
> > +		rc = -EINVAL;
> >   	}
> >
> >   	info->smbd_recv_pending--;
> >

Patch

diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index 6141e3c..ba53c52 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -2004,10 +2004,12 @@  static int smbd_recv_buf(struct smbd_connection *info, char *buf,
  * return value: actual data read
  */
 static int smbd_recv_page(struct smbd_connection *info,
-		struct page *page, unsigned int to_read)
+		struct page *page, unsigned int page_offset,
+		unsigned int to_read)
 {
 	int ret;
 	char *to_address;
+	void *page_address;
 
 	/* make sure we have the page ready for read */
 	ret = wait_event_interruptible(
@@ -2015,16 +2017,17 @@  static int smbd_recv_page(struct smbd_connection *info,
 		info->reassembly_data_length >= to_read ||
 			info->transport_status != SMBD_CONNECTED);
 	if (ret)
-		return 0;
+		return ret;
 
 	/* now we can read from reassembly queue and not sleep */
-	to_address = kmap_atomic(page);
+	page_address = kmap_atomic(page);
+	to_address = (char *) page_address + page_offset;
 
 	log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
 		page, to_address, to_read);
 
 	ret = smbd_recv_buf(info, to_address, to_read);
-	kunmap_atomic(to_address);
+	kunmap_atomic(page_address);
 
 	return ret;
 }
@@ -2038,7 +2041,7 @@  int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
 {
 	char *buf;
 	struct page *page;
-	unsigned int to_read;
+	unsigned int to_read, page_offset;
 	int rc;
 
 	info->smbd_recv_pending++;
@@ -2052,15 +2055,16 @@  int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
 
 	case READ | ITER_BVEC:
 		page = msg->msg_iter.bvec->bv_page;
+		page_offset = msg->msg_iter.bvec->bv_offset;
 		to_read = msg->msg_iter.bvec->bv_len;
-		rc = smbd_recv_page(info, page, to_read);
+		rc = smbd_recv_page(info, page, page_offset, to_read);
 		break;
 
 	default:
 		/* It's a bug in upper layer to get there */
 		cifs_dbg(VFS, "CIFS: invalid msg type %d\n",
 			msg->msg_iter.type);
-		rc = -EIO;
+		rc = -EINVAL;
 	}
 
 	info->smbd_recv_pending--;