diff mbox

[linux-next,v2] SUNRPC: rpcrdma_register_default_external: Dynamically allocate ib_phys_buf

Message ID 1363023447-22453-1-git-send-email-tim.gardner@canonical.com
State Not Applicable, archived
Delegated to: David Miller
Headers show

Commit Message

Tim Gardner March 11, 2013, 5:37 p.m. UTC
rpcrdma_register_default_external() is several frames into the call stack which
goes deeper yet. You run the risk of stack corruption by declaring such a large
automatic variable, so move the array of 'struct ib_phys_buf' objects into the
requestor structure 'struct rpcrdma_req' (which is dynamically allocated) in
order to silence the frame-larger-than warning.

net/sunrpc/xprtrdma/verbs.c: In function 'rpcrdma_register_default_external':
net/sunrpc/xprtrdma/verbs.c:1774:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=]

gcc version 4.6.3

Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Tom Tucker <tom@ogc.us>
Cc: Haggai Eran <haggaie@mellanox.com>
Cc: Or Gerlitz <ogerlitz@mellanox.com>
Cc: Shani Michaeli <shanim@mellanox.com>
Cc: linux-nfs@vger.kernel.org
Cc: netdev@vger.kernel.org

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
---

v1 - Use kmalloc() to dynamically allocate and free the array of 'struct
ib_phys_buf' objects

v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
and pass this request down through rpcrdma_register_external() and
rpcrdma_register_default_external(). This is less overhead then using
kmalloc() and requires no extra error checking as the allocation burden is
shifted to the transport client.

 net/sunrpc/xprtrdma/rpc_rdma.c  |    2 +-
 net/sunrpc/xprtrdma/verbs.c     |   11 ++++++-----
 net/sunrpc/xprtrdma/xprt_rdma.h |    3 ++-
 3 files changed, 9 insertions(+), 7 deletions(-)

Comments

J. Bruce Fields March 11, 2013, 6:14 p.m. UTC | #1
On Mon, Mar 11, 2013 at 11:37:27AM -0600, Tim Gardner wrote:
> rpcrdma_register_default_external() is several frames into the call stack which
> goes deeper yet. You run the risk of stack corruption by declaring such a large
> automatic variable, so move the array of 'struct ib_phys_buf' objects into the
> requestor structure 'struct rpcrdma_req' (which is dynamically allocated) in
> order to silence the frame-larger-than warning.
> 
> net/sunrpc/xprtrdma/verbs.c: In function 'rpcrdma_register_default_external':
> net/sunrpc/xprtrdma/verbs.c:1774:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=]
> 
> gcc version 4.6.3
> 
> Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
> Cc: "J. Bruce Fields" <bfields@fieldses.org>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Tom Tucker <tom@ogc.us>
> Cc: Haggai Eran <haggaie@mellanox.com>
> Cc: Or Gerlitz <ogerlitz@mellanox.com>
> Cc: Shani Michaeli <shanim@mellanox.com>
> Cc: linux-nfs@vger.kernel.org
> Cc: netdev@vger.kernel.org
> 
> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
> ---
> 
> v1 - Use kmalloc() to dynamically allocate and free the array of 'struct
> ib_phys_buf' objects
> 
> v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
> and pass this request down through rpcrdma_register_external() and
> rpcrdma_register_default_external(). This is less overhead then using
> kmalloc() and requires no extra error checking as the allocation burden is
> shifted to the transport client.

Oh good--so that works, and the req is the right place to put this?  How
are you testing this?

(Just want to make it clear: I'm *not* an expert on the rdma code, so my
suggestion to put this in the rpcrdma_req was a suggestion for something
to look into, not a claim that it's correct.)

--b.

> 
>  net/sunrpc/xprtrdma/rpc_rdma.c  |    2 +-
>  net/sunrpc/xprtrdma/verbs.c     |   11 ++++++-----
>  net/sunrpc/xprtrdma/xprt_rdma.h |    3 ++-
>  3 files changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index e03725b..c89448b 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -203,7 +203,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>  
>  	do {
>  		/* bind/register the memory, then build chunk from result. */
> -		int n = rpcrdma_register_external(seg, nsegs,
> +		int n = rpcrdma_register_external(req, seg, nsegs,
>  						cur_wchunk != NULL, r_xprt);
>  		if (n <= 0)
>  			goto out;
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 93726560..5b439ed 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1730,13 +1730,14 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
>  }
>  
>  static int
> -rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
> -			int *nsegs, int writing, struct rpcrdma_ia *ia)
> +rpcrdma_register_default_external(struct rpcrdma_req *req,
> +			struct rpcrdma_mr_seg *seg, int *nsegs, int writing,
> +			struct rpcrdma_ia *ia)
>  {
>  	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
>  				  IB_ACCESS_REMOTE_READ);
>  	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
> +	struct ib_phys_buf *ipb = req->rl_ipb;
>  	int len, i, rc = 0;
>  
>  	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> @@ -1791,7 +1792,7 @@ rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
>  }
>  
>  int
> -rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
> +rpcrdma_register_external(struct rpcrdma_req *req, struct rpcrdma_mr_seg *seg,
>  			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
>  {
>  	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
> @@ -1827,7 +1828,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
>  
>  	/* Default registration each time */
>  	default:
> -		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
> +		rc = rpcrdma_register_default_external(req, seg, &nsegs, writing, ia);
>  		break;
>  	}
>  	if (rc)
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cc1445d..b10ed34 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -192,6 +192,7 @@ struct rpcrdma_req {
>  	struct ib_sge	rl_send_iov[4];	/* for active requests */
>  	struct ib_sge	rl_iov;		/* for posting */
>  	struct ib_mr	*rl_handle;	/* handle for mem in rl_iov */
> +	struct ib_phys_buf rl_ipb[RPCRDMA_MAX_DATA_SEGS]; /* temp work array */
>  	char		rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
>  	__u32 		rl_xdr_buf[0];	/* start of returned rpc rq_buffer */
>  };
> @@ -327,7 +328,7 @@ int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
>  int rpcrdma_deregister_internal(struct rpcrdma_ia *,
>  				struct ib_mr *, struct ib_sge *);
>  
> -int rpcrdma_register_external(struct rpcrdma_mr_seg *,
> +int rpcrdma_register_external(struct rpcrdma_req *, struct rpcrdma_mr_seg *,
>  				int, int, struct rpcrdma_xprt *);
>  int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
>  				struct rpcrdma_xprt *, void *);
> -- 
> 1.7.9.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tim Gardner March 11, 2013, 6:51 p.m. UTC | #2
On 03/11/2013 12:14 PM, J. Bruce Fields wrote:
<snip>
>>
>> v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
>> and pass this request down through rpcrdma_register_external() and
>> rpcrdma_register_default_external(). This is less overhead then using
>> kmalloc() and requires no extra error checking as the allocation burden is
>> shifted to the transport client.
> 
> Oh good--so that works, and the req is the right place to put this?  How
> are you testing this?
> 
> (Just want to make it clear: I'm *not* an expert on the rdma code, so my
> suggestion to put this in the rpcrdma_req was a suggestion for something
> to look into, not a claim that it's correct.)
> 

Just compile tested so far. Incidentally, I've been through the call stack:

call_transmit
 xprt_transmit
  xprt->ops->send_request(task)
   xprt_rdma_send_request
    rpcrdma_marshal_req
     rpcrdma_create_chunks
      rpcrdma_register_external
       rpcrdma_register_default_external

It appears that the context for kmalloc() should be fine unless there is
a spinlock held around call_transmit() (which seems unlikely).

rtg
J. Bruce Fields March 11, 2013, 7:15 p.m. UTC | #3
On Mon, Mar 11, 2013 at 12:51:44PM -0600, Tim Gardner wrote:
> On 03/11/2013 12:14 PM, J. Bruce Fields wrote:
> <snip>
> >>
> >> v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
> >> and pass this request down through rpcrdma_register_external() and
> >> rpcrdma_register_default_external(). This is less overhead then using
> >> kmalloc() and requires no extra error checking as the allocation burden is
> >> shifted to the transport client.
> > 
> > Oh good--so that works, and the req is the right place to put this?  How
> > are you testing this?
> > 
> > (Just want to make it clear: I'm *not* an expert on the rdma code, so my
> > suggestion to put this in the rpcrdma_req was a suggestion for something
> > to look into, not a claim that it's correct.)
> > 
> 
> Just compile tested so far. Incidentally, I've been through the call stack:
> 
> call_transmit
>  xprt_transmit
>   xprt->ops->send_request(task)
>    xprt_rdma_send_request
>     rpcrdma_marshal_req
>      rpcrdma_create_chunks
>       rpcrdma_register_external
>        rpcrdma_register_default_external
> 
> It appears that the context for kmalloc() should be fine unless there is
> a spinlock held around call_transmit() (which seems unlikely).

Right, though I think it shouldn't be GFP_KERNEL--looks like writes
could wait on it.

In any case, the embedding-in-rpcrdma_req solution does look cleaner if
that's correct (e.g. if we can be sure there won't be two simultaneous
users of that array).

--b.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Trond Myklebust March 11, 2013, 7:48 p.m. UTC | #4
On Mon, 2013-03-11 at 15:15 -0400, J. Bruce Fields wrote:
> On Mon, Mar 11, 2013 at 12:51:44PM -0600, Tim Gardner wrote:
> > On 03/11/2013 12:14 PM, J. Bruce Fields wrote:
> > <snip>
> > >>
> > >> v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
> > >> and pass this request down through rpcrdma_register_external() and
> > >> rpcrdma_register_default_external(). This is less overhead then using
> > >> kmalloc() and requires no extra error checking as the allocation burden is
> > >> shifted to the transport client.
> > > 
> > > Oh good--so that works, and the req is the right place to put this?  How
> > > are you testing this?
> > > 
> > > (Just want to make it clear: I'm *not* an expert on the rdma code, so my
> > > suggestion to put this in the rpcrdma_req was a suggestion for something
> > > to look into, not a claim that it's correct.)
> > > 
> > 
> > Just compile tested so far. Incidentally, I've been through the call stack:
> > 
> > call_transmit
> >  xprt_transmit
> >   xprt->ops->send_request(task)
> >    xprt_rdma_send_request
> >     rpcrdma_marshal_req
> >      rpcrdma_create_chunks
> >       rpcrdma_register_external
> >        rpcrdma_register_default_external
> > 
> > It appears that the context for kmalloc() should be fine unless there is
> > a spinlock held around call_transmit() (which seems unlikely).
> 
> Right, though I think it shouldn't be GFP_KERNEL--looks like writes
> could wait on it.

Nothing inside the RPC client should be using anything heavier than
GFP_NOWAIT (unless done at setup).

> In any case, the embedding-in-rpcrdma_req solution does look cleaner if
> that's correct (e.g. if we can be sure there won't be two simultaneous
> users of that array).

Putting it in the rpcrdma_req means that you have one copy per transport
slot. Why not rather put it in the rpcrdma_xprt?
AFAICS you only need this array at transmit time for registering memory
for RDMA, at which time the transport XPRT_LOCK guarantees that nobody
else is competing for these resources.
J. Bruce Fields March 11, 2013, 8 p.m. UTC | #5
On Mon, Mar 11, 2013 at 07:48:51PM +0000, Myklebust, Trond wrote:
> On Mon, 2013-03-11 at 15:15 -0400, J. Bruce Fields wrote:
> > On Mon, Mar 11, 2013 at 12:51:44PM -0600, Tim Gardner wrote:
> > > On 03/11/2013 12:14 PM, J. Bruce Fields wrote:
> > > <snip>
> > > >>
> > > >> v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
> > > >> and pass this request down through rpcrdma_register_external() and
> > > >> rpcrdma_register_default_external(). This is less overhead then using
> > > >> kmalloc() and requires no extra error checking as the allocation burden is
> > > >> shifted to the transport client.
> > > > 
> > > > Oh good--so that works, and the req is the right place to put this?  How
> > > > are you testing this?
> > > > 
> > > > (Just want to make it clear: I'm *not* an expert on the rdma code, so my
> > > > suggestion to put this in the rpcrdma_req was a suggestion for something
> > > > to look into, not a claim that it's correct.)
> > > > 
> > > 
> > > Just compile tested so far. Incidentally, I've been through the call stack:
> > > 
> > > call_transmit
> > >  xprt_transmit
> > >   xprt->ops->send_request(task)
> > >    xprt_rdma_send_request
> > >     rpcrdma_marshal_req
> > >      rpcrdma_create_chunks
> > >       rpcrdma_register_external
> > >        rpcrdma_register_default_external
> > > 
> > > It appears that the context for kmalloc() should be fine unless there is
> > > a spinlock held around call_transmit() (which seems unlikely).
> > 
> > Right, though I think it shouldn't be GFP_KERNEL--looks like writes
> > could wait on it.
> 
> Nothing inside the RPC client should be using anything heavier than
> GFP_NOWAIT (unless done at setup).
> 
> > In any case, the embedding-in-rpcrdma_req solution does look cleaner if
> > that's correct (e.g. if we can be sure there won't be two simultaneous
> > users of that array).
> 
> Putting it in the rpcrdma_req means that you have one copy per transport
> slot. Why not rather put it in the rpcrdma_xprt?
> AFAICS you only need this array at transmit time for registering memory
> for RDMA, at which time the transport XPRT_LOCK guarantees that nobody
> else is competing for these resources.

Oh, good.  If that works, Steve might want to look back at how that
array size was chosen?  I seem to recall there being some compromise due
to this array being on the stack, and that there might have been some
performance advantage to increasing it further, but I can't find the bug
right now....  (And I might be misremembering.)

--b.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
J. Bruce Fields March 11, 2013, 9:25 p.m. UTC | #6
On Mon, Mar 11, 2013 at 03:15:08PM -0600, Tim Gardner wrote:
> rpcrdma_register_default_external() is several frames into the call stack which
> goes deeper yet. You run the risk of stack corruption by declaring such a large
> automatic variable, so move the array of 'struct ib_phys_buf' objects into the
> transport structure 'struct rpcrdma_xprt' (which is dynamically allocated) in
> order to silence the frame-larger-than warning. Access to each struct
> rpcrdma_xprt is serialized by XPRT_LOCKED in xprt_reserve_xprt(), so there is
> no danger of multiple accessors to the array of struct ib_phys_buf objects.
> 
> net/sunrpc/xprtrdma/verbs.c: In function 'rpcrdma_register_default_external':
> net/sunrpc/xprtrdma/verbs.c:1774:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=]
> 
> gcc version 4.6.3
> 
> Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
> Cc: "J. Bruce Fields" <bfields@fieldses.org>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Tom Tucker <tom@ogc.us>
> Cc: Haggai Eran <haggaie@mellanox.com>
> Cc: Or Gerlitz <ogerlitz@mellanox.com>
> Cc: Shani Michaeli <shanim@mellanox.com>
> Cc: linux-nfs@vger.kernel.org
> Cc: netdev@vger.kernel.org
> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
> ---
> v1 - Use kmalloc() to dynamically allocate and free the array of 'struct
> ib_phys_buf' objects
> 
> v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
> and pass this request down through rpcrdma_register_external() and
> rpcrdma_register_default_external(). This is less overhead then using
> kmalloc() and requires no extra error checking as the allocation burden is
> shifted to the transport client.
> 
> v3 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_xprt.
> Pass a pointer to this transport structure into rpcrdma_register_default_external().
> This is less overhead then using kmalloc() and requires no extra error checking
> as the allocation burden is shifted to the transport client.

Looks good to me; wish we could get it tested....

In future if we do decide to also increase the size of that array we may
need to allocate it separately from struct rpcrdma_xprt itself, which
looks already fairly large without it; on x86_64:

	$ gdb net/sunrpc/xprtrdma/xprtrdma.ko
	...
	(gdb) p sizeof(struct rpcrdma_xprt)
	$1 = 2912

But that shouldn't be a big deal to do.

--b.

> 
>  net/sunrpc/xprtrdma/verbs.c     |   10 ++++++----
>  net/sunrpc/xprtrdma/xprt_rdma.h |    5 ++++-
>  2 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 93726560..c7aa3da 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -1730,13 +1730,14 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
>  }
>  
>  static int
> -rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
> -			int *nsegs, int writing, struct rpcrdma_ia *ia)
> +rpcrdma_register_default_external(struct rpcrdma_xprt *r_xprt,
> +			struct rpcrdma_mr_seg *seg, int *nsegs, int writing,
> +			struct rpcrdma_ia *ia)
>  {
>  	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
>  				  IB_ACCESS_REMOTE_READ);
>  	struct rpcrdma_mr_seg *seg1 = seg;
> -	struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
> +	struct ib_phys_buf *ipb = r_xprt->ipb;
>  	int len, i, rc = 0;
>  
>  	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
> @@ -1827,7 +1828,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
>  
>  	/* Default registration each time */
>  	default:
> -		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
> +		rc = rpcrdma_register_default_external(r_xprt, seg, &nsegs,
> +			writing, ia);
>  		break;
>  	}
>  	if (rc)
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index cc1445d..d7b440f 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -269,7 +269,8 @@ struct rpcrdma_stats {
>   * for convenience. This structure need not be visible externally.
>   *
>   * It is allocated and initialized during mount, and released
> - * during unmount.
> + * during unmount. Access to this structure is serialized by XPRT_LOCKED
> + * in xprt_reserve_xprt().
>   */
>  struct rpcrdma_xprt {
>  	struct rpc_xprt		xprt;
> @@ -279,6 +280,8 @@ struct rpcrdma_xprt {
>  	struct rpcrdma_create_data_internal rx_data;
>  	struct delayed_work	rdma_connect;
>  	struct rpcrdma_stats	rx_stats;
> +	/* temp work array */
> +	struct ib_phys_buf	ipb[RPCRDMA_MAX_DATA_SEGS];
>  };
>  
>  #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
> -- 
> 1.7.9.5
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tom Tucker March 11, 2013, 11:02 p.m. UTC | #7
On 3/11/13 4:25 PM, J. Bruce Fields wrote:
> On Mon, Mar 11, 2013 at 03:15:08PM -0600, Tim Gardner wrote:
>> rpcrdma_register_default_external() is several frames into the call stack which
>> goes deeper yet. You run the risk of stack corruption by declaring such a large
>> automatic variable, so move the array of 'struct ib_phys_buf' objects into the
>> transport structure 'struct rpcrdma_xprt' (which is dynamically allocated) in
>> order to silence the frame-larger-than warning. Access to each struct
>> rpcrdma_xprt is serialized by XPRT_LOCKED in xprt_reserve_xprt(), so there is
>> no danger of multiple accessors to the array of struct ib_phys_buf objects.
>>
>> net/sunrpc/xprtrdma/verbs.c: In function 'rpcrdma_register_default_external':
>> net/sunrpc/xprtrdma/verbs.c:1774:1: warning: the frame size of 1056 bytes is larger than 1024 bytes [-Wframe-larger-than=]
>>
>> gcc version 4.6.3
>>
>> Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
>> Cc: "J. Bruce Fields" <bfields@fieldses.org>
>> Cc: "David S. Miller" <davem@davemloft.net>
>> Cc: Tom Tucker <tom@ogc.us>
>> Cc: Haggai Eran <haggaie@mellanox.com>
>> Cc: Or Gerlitz <ogerlitz@mellanox.com>
>> Cc: Shani Michaeli <shanim@mellanox.com>
>> Cc: linux-nfs@vger.kernel.org
>> Cc: netdev@vger.kernel.org
>> Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
>> ---
>> v1 - Use kmalloc() to dynamically allocate and free the array of 'struct
>> ib_phys_buf' objects
>>
>> v2 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_req
>> and pass this request down through rpcrdma_register_external() and
>> rpcrdma_register_default_external(). This is less overhead then using
>> kmalloc() and requires no extra error checking as the allocation burden is
>> shifted to the transport client.
>>
>> v3 - Move the array of 'struct ib_phys_buf' objects into struct rpcrdma_xprt.
>> Pass a pointer to this transport structure into rpcrdma_register_default_external().
>> This is less overhead then using kmalloc() and requires no extra error checking
>> as the allocation burden is shifted to the transport client.
> Looks good to me; wish we could get it tested....

I will test it. Tim could you please send me a final version that you'd 
like tested as a single message?

Would someone (like Tim maybe ... hint hint) look at tearing out all those 
dead registration strategies? I don't think we need or will ever use 
bounce-buffers, memory windows, or mlnx fmr.  The only two that are used 
and tested are all-phys and FRMR (the default).

Tom

> In future if we do decide to also increase the size of that array we may
> need to allocate it separately from struct rpcrdma_xprt itself, which
> looks already fairly large without it; on x86_64:
>
> 	$ gdb net/sunrpc/xprtrdma/xprtrdma.ko
> 	...
> 	(gdb) p sizeof(struct rpcrdma_xprt)
> 	$1 = 2912
>
> But that shouldn't be a big deal to do.
>
> --b.
>
>>   net/sunrpc/xprtrdma/verbs.c     |   10 ++++++----
>>   net/sunrpc/xprtrdma/xprt_rdma.h |    5 ++++-
>>   2 files changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index 93726560..c7aa3da 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -1730,13 +1730,14 @@ rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
>>   }
>>   
>>   static int
>> -rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
>> -			int *nsegs, int writing, struct rpcrdma_ia *ia)
>> +rpcrdma_register_default_external(struct rpcrdma_xprt *r_xprt,
>> +			struct rpcrdma_mr_seg *seg, int *nsegs, int writing,
>> +			struct rpcrdma_ia *ia)
>>   {
>>   	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
>>   				  IB_ACCESS_REMOTE_READ);
>>   	struct rpcrdma_mr_seg *seg1 = seg;
>> -	struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
>> +	struct ib_phys_buf *ipb = r_xprt->ipb;
>>   	int len, i, rc = 0;
>>   
>>   	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
>> @@ -1827,7 +1828,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
>>   
>>   	/* Default registration each time */
>>   	default:
>> -		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
>> +		rc = rpcrdma_register_default_external(r_xprt, seg, &nsegs,
>> +			writing, ia);
>>   		break;
>>   	}
>>   	if (rc)
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index cc1445d..d7b440f 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -269,7 +269,8 @@ struct rpcrdma_stats {
>>    * for convenience. This structure need not be visible externally.
>>    *
>>    * It is allocated and initialized during mount, and released
>> - * during unmount.
>> + * during unmount. Access to this structure is serialized by XPRT_LOCKED
>> + * in xprt_reserve_xprt().
>>    */
>>   struct rpcrdma_xprt {
>>   	struct rpc_xprt		xprt;
>> @@ -279,6 +280,8 @@ struct rpcrdma_xprt {
>>   	struct rpcrdma_create_data_internal rx_data;
>>   	struct delayed_work	rdma_connect;
>>   	struct rpcrdma_stats	rx_stats;
>> +	/* temp work array */
>> +	struct ib_phys_buf	ipb[RPCRDMA_MAX_DATA_SEGS];
>>   };
>>   
>>   #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt)
>> -- 
>> 1.7.9.5
>>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tim Gardner March 12, 2013, 2:53 a.m. UTC | #8
On 03/11/2013 05:02 PM, Tom Tucker wrote:
> On 3/11/13 4:25 PM, J. Bruce Fields wrote:

<snip>

>> Looks good to me; wish we could get it tested....
>
> I will test it. Tim could you please send me a final version that you'd
> like tested as a single message?
>

I'm a little confused about what you are asking. I think v3 of the patch 
is the final version (unless you find bugs with it).

> Would someone (like Tim maybe ... hint hint) look at tearing out all
> those dead registration strategies? I don't think we need or will ever
> use bounce-buffers, memory windows, or mlnx fmr.  The only two that are
> used and tested are all-phys and FRMR (the default).
>

Dunno if I'll get time for that. I had a one day window where I could 
hack out some simple patches. Now I'm back to the usual grindstone.

rtg
Tom Tucker March 12, 2013, 3:40 a.m. UTC | #9
Ah....Ok

On 3/11/13 9:53 PM, Tim Gardner wrote:
> On 03/11/2013 05:02 PM, Tom Tucker wrote:
>> On 3/11/13 4:25 PM, J. Bruce Fields wrote:
>
> <snip>
>
>>> Looks good to me; wish we could get it tested....
>>
>> I will test it. Tim could you please send me a final version that you'd
>> like tested as a single message?
>>
>
> I'm a little confused about what you are asking. I think v3 of the patch 
> is the final version (unless you find bugs with it).
>
>> Would someone (like Tim maybe ... hint hint) look at tearing out all
>> those dead registration strategies? I don't think we need or will ever
>> use bounce-buffers, memory windows, or mlnx fmr.  The only two that are
>> used and tested are all-phys and FRMR (the default).
>>
>
> Dunno if I'll get time for that. I had a one day window where I could 
> hack out some simple patches. Now I'm back to the usual grindstone.
>
> rtg

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e03725b..c89448b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -203,7 +203,7 @@  rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 
 	do {
 		/* bind/register the memory, then build chunk from result. */
-		int n = rpcrdma_register_external(seg, nsegs,
+		int n = rpcrdma_register_external(req, seg, nsegs,
 						cur_wchunk != NULL, r_xprt);
 		if (n <= 0)
 			goto out;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560..5b439ed 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1730,13 +1730,14 @@  rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
 }
 
 static int
-rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
-			int *nsegs, int writing, struct rpcrdma_ia *ia)
+rpcrdma_register_default_external(struct rpcrdma_req *req,
+			struct rpcrdma_mr_seg *seg, int *nsegs, int writing,
+			struct rpcrdma_ia *ia)
 {
 	int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
 				  IB_ACCESS_REMOTE_READ);
 	struct rpcrdma_mr_seg *seg1 = seg;
-	struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
+	struct ib_phys_buf *ipb = req->rl_ipb;
 	int len, i, rc = 0;
 
 	if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
@@ -1791,7 +1792,7 @@  rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
 }
 
 int
-rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
+rpcrdma_register_external(struct rpcrdma_req *req, struct rpcrdma_mr_seg *seg,
 			int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
 {
 	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -1827,7 +1828,7 @@  rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
 
 	/* Default registration each time */
 	default:
-		rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
+		rc = rpcrdma_register_default_external(req, seg, &nsegs, writing, ia);
 		break;
 	}
 	if (rc)
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445d..b10ed34 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -192,6 +192,7 @@  struct rpcrdma_req {
 	struct ib_sge	rl_send_iov[4];	/* for active requests */
 	struct ib_sge	rl_iov;		/* for posting */
 	struct ib_mr	*rl_handle;	/* handle for mem in rl_iov */
+	struct ib_phys_buf rl_ipb[RPCRDMA_MAX_DATA_SEGS]; /* temp work array */
 	char		rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */
 	__u32 		rl_xdr_buf[0];	/* start of returned rpc rq_buffer */
 };
@@ -327,7 +328,7 @@  int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int,
 int rpcrdma_deregister_internal(struct rpcrdma_ia *,
 				struct ib_mr *, struct ib_sge *);
 
-int rpcrdma_register_external(struct rpcrdma_mr_seg *,
+int rpcrdma_register_external(struct rpcrdma_req *, struct rpcrdma_mr_seg *,
 				int, int, struct rpcrdma_xprt *);
 int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
 				struct rpcrdma_xprt *, void *);