diff mbox series

[v1,iproute2-next,1/4] rdma: add helper rd_sendrecv_msg()

Message ID aa883bc5ffd63047cf57e2f9925002e7573fd7ac.1550773362.git.swise@opengridcomputing.com
State Changes Requested
Delegated to: David Ahern
Headers show
Series Dynamic rdma link creation | expand

Commit Message

Steve Wise Feb. 21, 2019, 4:19 p.m. UTC
This function sends the constructed netlink message and then
receives the response, displaying any error text.

Change 'rdma dev set' to use it.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
---
 rdma/dev.c   |  2 +-
 rdma/rdma.h  |  1 +
 rdma/utils.c | 21 +++++++++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

Comments

Leon Romanovsky Feb. 23, 2019, 9:26 a.m. UTC | #1
On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
> This function sends the constructed netlink message and then
> receives the response, displaying any error text.
>
> Change 'rdma dev set' to use it.
>
> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> ---
>  rdma/dev.c   |  2 +-
>  rdma/rdma.h  |  1 +
>  rdma/utils.c | 21 +++++++++++++++++++++
>  3 files changed, 23 insertions(+), 1 deletion(-)
>
> diff --git a/rdma/dev.c b/rdma/dev.c
> index 60ff4b31e320..d2949c378f08 100644
> --- a/rdma/dev.c
> +++ b/rdma/dev.c
> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
>
> -	return rd_send_msg(rd);
> +	return rd_sendrecv_msg(rd, seq);
>  }
>
>  static int dev_one_set(struct rd *rd)
> diff --git a/rdma/rdma.h b/rdma/rdma.h
> index 547bb5749a39..20be2f12c4f8 100644
> --- a/rdma/rdma.h
> +++ b/rdma/rdma.h
> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
>   */
>  int rd_send_msg(struct rd *rd);
>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
>  int rd_attr_cb(const struct nlattr *attr, void *data);
> diff --git a/rdma/utils.c b/rdma/utils.c
> index 069d44fece10..a6f2826c9605 100644
> --- a/rdma/utils.c
> +++ b/rdma/utils.c
> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
>  	return ret;
>  }
>
> +static int null_cb(const struct nlmsghdr *nlh, void *data)
> +{
> +	return MNL_CB_OK;
> +}
> +
> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
> +{
> +	int ret;
> +
> +	ret = rd_send_msg(rd);
> +	if (ret) {
> +		perror(NULL);

This is more or less already done in rd_send_msg() and that function
prints something in case of execution error. So the missing piece
is to update rd_recv_msg(), so all places will "magically" print errors
and not only dev_set_name().

> +		goto out;
> +	}
> +	ret = rd_recv_msg(rd, null_cb, rd, seq);
> +	if (ret)
> +		perror(NULL);
> +out:
> +	return ret;
> +}
> +
>  static struct dev_map *_dev_map_lookup(struct rd *rd, const char *dev_name)
>  {
>  	struct dev_map *dev_map;
> --
> 1.8.3.1
>
Leon Romanovsky Feb. 23, 2019, 9:31 a.m. UTC | #2
On Sat, Feb 23, 2019 at 11:26:15AM +0200, Leon Romanovsky wrote:
> On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
> > This function sends the constructed netlink message and then
> > receives the response, displaying any error text.
> >
> > Change 'rdma dev set' to use it.
> >
> > Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > ---
> >  rdma/dev.c   |  2 +-
> >  rdma/rdma.h  |  1 +
> >  rdma/utils.c | 21 +++++++++++++++++++++
> >  3 files changed, 23 insertions(+), 1 deletion(-)
> >
> > diff --git a/rdma/dev.c b/rdma/dev.c
> > index 60ff4b31e320..d2949c378f08 100644
> > --- a/rdma/dev.c
> > +++ b/rdma/dev.c
> > @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
> >  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
> >  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
> >
> > -	return rd_send_msg(rd);
> > +	return rd_sendrecv_msg(rd, seq);
> >  }
> >
> >  static int dev_one_set(struct rd *rd)
> > diff --git a/rdma/rdma.h b/rdma/rdma.h
> > index 547bb5749a39..20be2f12c4f8 100644
> > --- a/rdma/rdma.h
> > +++ b/rdma/rdma.h
> > @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
> >   */
> >  int rd_send_msg(struct rd *rd);
> >  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
> > +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
> >  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
> >  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
> >  int rd_attr_cb(const struct nlattr *attr, void *data);
> > diff --git a/rdma/utils.c b/rdma/utils.c
> > index 069d44fece10..a6f2826c9605 100644
> > --- a/rdma/utils.c
> > +++ b/rdma/utils.c
> > @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
> >  	return ret;
> >  }
> >
> > +static int null_cb(const struct nlmsghdr *nlh, void *data)
> > +{
> > +	return MNL_CB_OK;
> > +}
> > +
> > +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
> > +{
> > +	int ret;
> > +
> > +	ret = rd_send_msg(rd);
> > +	if (ret) {
> > +		perror(NULL);
>
> This is more or less already done in rd_send_msg() and that function
> prints something in case of execution error. So the missing piece
> is to update rd_recv_msg(), so all places will "magically" print errors
> and not only dev_set_name().
>
> > +		goto out;
> > +	}
> > +	ret = rd_recv_msg(rd, null_cb, rd, seq);

Will this "null_cb" work for all send/recv flows or only in flows where
response can be error only? Will we need this recv_msg if we implement
extack support?

> > +	if (ret)
> > +		perror(NULL);
> > +out:
> > +	return ret;
> > +}
> > +
> >  static struct dev_map *_dev_map_lookup(struct rd *rd, const char *dev_name)
> >  {
> >  	struct dev_map *dev_map;
> > --
> > 1.8.3.1
> >
Steve Wise Feb. 26, 2019, 5:13 p.m. UTC | #3
On 2/23/2019 3:26 AM, Leon Romanovsky wrote:
> On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
>> This function sends the constructed netlink message and then
>> receives the response, displaying any error text.
>>
>> Change 'rdma dev set' to use it.
>>
>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>> ---
>>  rdma/dev.c   |  2 +-
>>  rdma/rdma.h  |  1 +
>>  rdma/utils.c | 21 +++++++++++++++++++++
>>  3 files changed, 23 insertions(+), 1 deletion(-)
>>
>> diff --git a/rdma/dev.c b/rdma/dev.c
>> index 60ff4b31e320..d2949c378f08 100644
>> --- a/rdma/dev.c
>> +++ b/rdma/dev.c
>> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
>>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
>>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
>>
>> -	return rd_send_msg(rd);
>> +	return rd_sendrecv_msg(rd, seq);
>>  }
>>
>>  static int dev_one_set(struct rd *rd)
>> diff --git a/rdma/rdma.h b/rdma/rdma.h
>> index 547bb5749a39..20be2f12c4f8 100644
>> --- a/rdma/rdma.h
>> +++ b/rdma/rdma.h
>> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
>>   */
>>  int rd_send_msg(struct rd *rd);
>>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
>>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
>>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
>>  int rd_attr_cb(const struct nlattr *attr, void *data);
>> diff --git a/rdma/utils.c b/rdma/utils.c
>> index 069d44fece10..a6f2826c9605 100644
>> --- a/rdma/utils.c
>> +++ b/rdma/utils.c
>> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
>>  	return ret;
>>  }
>>
>> +static int null_cb(const struct nlmsghdr *nlh, void *data)
>> +{
>> +	return MNL_CB_OK;
>> +}
>> +
>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
>> +{
>> +	int ret;
>> +
>> +	ret = rd_send_msg(rd);
>> +	if (ret) {
>> +		perror(NULL);
> This is more or less already done in rd_send_msg() and that function
> prints something in case of execution error. So the missing piece
> is to update rd_recv_msg(), so all places will "magically" print errors
> and not only dev_set_name().

Yea ok.
Steve Wise Feb. 26, 2019, 5:19 p.m. UTC | #4
On 2/23/2019 3:31 AM, Leon Romanovsky wrote:
> On Sat, Feb 23, 2019 at 11:26:15AM +0200, Leon Romanovsky wrote:
>> On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
>>> This function sends the constructed netlink message and then
>>> receives the response, displaying any error text.
>>>
>>> Change 'rdma dev set' to use it.
>>>
>>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>>> ---
>>>  rdma/dev.c   |  2 +-
>>>  rdma/rdma.h  |  1 +
>>>  rdma/utils.c | 21 +++++++++++++++++++++
>>>  3 files changed, 23 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/rdma/dev.c b/rdma/dev.c
>>> index 60ff4b31e320..d2949c378f08 100644
>>> --- a/rdma/dev.c
>>> +++ b/rdma/dev.c
>>> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
>>>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
>>>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
>>>
>>> -	return rd_send_msg(rd);
>>> +	return rd_sendrecv_msg(rd, seq);
>>>  }
>>>
>>>  static int dev_one_set(struct rd *rd)
>>> diff --git a/rdma/rdma.h b/rdma/rdma.h
>>> index 547bb5749a39..20be2f12c4f8 100644
>>> --- a/rdma/rdma.h
>>> +++ b/rdma/rdma.h
>>> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
>>>   */
>>>  int rd_send_msg(struct rd *rd);
>>>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
>>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
>>>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
>>>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
>>>  int rd_attr_cb(const struct nlattr *attr, void *data);
>>> diff --git a/rdma/utils.c b/rdma/utils.c
>>> index 069d44fece10..a6f2826c9605 100644
>>> --- a/rdma/utils.c
>>> +++ b/rdma/utils.c
>>> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
>>>  	return ret;
>>>  }
>>>
>>> +static int null_cb(const struct nlmsghdr *nlh, void *data)
>>> +{
>>> +	return MNL_CB_OK;
>>> +}
>>> +
>>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
>>> +{
>>> +	int ret;
>>> +
>>> +	ret = rd_send_msg(rd);
>>> +	if (ret) {
>>> +		perror(NULL);
>> This is more or less already done in rd_send_msg() and that function
>> prints something in case of execution error. So the missing piece
>> is to update rd_recv_msg(), so all places will "magically" print errors
>> and not only dev_set_name().
>>
>>> +		goto out;
>>> +	}
>>> +	ret = rd_recv_msg(rd, null_cb, rd, seq);
> Will this "null_cb" work for all send/recv flows or only in flows where
> response can be error only? 


Only those flows where no nl attributes are expected to be returned.


> Will we need this recv_msg if we implement
> extack support?


I'm not sure how extack works.  Do you know?

Thanks!

Steve.
Leon Romanovsky Feb. 26, 2019, 7:16 p.m. UTC | #5
On Tue, Feb 26, 2019 at 11:19:12AM -0600, Steve Wise wrote:
>
> On 2/23/2019 3:31 AM, Leon Romanovsky wrote:
> > On Sat, Feb 23, 2019 at 11:26:15AM +0200, Leon Romanovsky wrote:
> >> On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
> >>> This function sends the constructed netlink message and then
> >>> receives the response, displaying any error text.
> >>>
> >>> Change 'rdma dev set' to use it.
> >>>
> >>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> >>> ---
> >>>  rdma/dev.c   |  2 +-
> >>>  rdma/rdma.h  |  1 +
> >>>  rdma/utils.c | 21 +++++++++++++++++++++
> >>>  3 files changed, 23 insertions(+), 1 deletion(-)
> >>>
> >>> diff --git a/rdma/dev.c b/rdma/dev.c
> >>> index 60ff4b31e320..d2949c378f08 100644
> >>> --- a/rdma/dev.c
> >>> +++ b/rdma/dev.c
> >>> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
> >>>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
> >>>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
> >>>
> >>> -	return rd_send_msg(rd);
> >>> +	return rd_sendrecv_msg(rd, seq);
> >>>  }
> >>>
> >>>  static int dev_one_set(struct rd *rd)
> >>> diff --git a/rdma/rdma.h b/rdma/rdma.h
> >>> index 547bb5749a39..20be2f12c4f8 100644
> >>> --- a/rdma/rdma.h
> >>> +++ b/rdma/rdma.h
> >>> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
> >>>   */
> >>>  int rd_send_msg(struct rd *rd);
> >>>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
> >>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
> >>>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
> >>>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
> >>>  int rd_attr_cb(const struct nlattr *attr, void *data);
> >>> diff --git a/rdma/utils.c b/rdma/utils.c
> >>> index 069d44fece10..a6f2826c9605 100644
> >>> --- a/rdma/utils.c
> >>> +++ b/rdma/utils.c
> >>> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
> >>>  	return ret;
> >>>  }
> >>>
> >>> +static int null_cb(const struct nlmsghdr *nlh, void *data)
> >>> +{
> >>> +	return MNL_CB_OK;
> >>> +}
> >>> +
> >>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
> >>> +{
> >>> +	int ret;
> >>> +
> >>> +	ret = rd_send_msg(rd);
> >>> +	if (ret) {
> >>> +		perror(NULL);
> >> This is more or less already done in rd_send_msg() and that function
> >> prints something in case of execution error. So the missing piece
> >> is to update rd_recv_msg(), so all places will "magically" print errors
> >> and not only dev_set_name().
> >>
> >>> +		goto out;
> >>> +	}
> >>> +	ret = rd_recv_msg(rd, null_cb, rd, seq);
> > Will this "null_cb" work for all send/recv flows or only in flows where
> > response can be error only?
>
>
> Only those flows where no nl attributes are expected to be returned.
>
>
> > Will we need this recv_msg if we implement
> > extack support?
>
>
> I'm not sure how extack works.  Do you know?

I can't say that :)

>
> Thanks!
>
> Steve.
>
>
Steve Wise Feb. 26, 2019, 7:55 p.m. UTC | #6
On 2/26/2019 1:16 PM, Leon Romanovsky wrote:
> On Tue, Feb 26, 2019 at 11:19:12AM -0600, Steve Wise wrote:
>> On 2/23/2019 3:31 AM, Leon Romanovsky wrote:
>>> On Sat, Feb 23, 2019 at 11:26:15AM +0200, Leon Romanovsky wrote:
>>>> On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
>>>>> This function sends the constructed netlink message and then
>>>>> receives the response, displaying any error text.
>>>>>
>>>>> Change 'rdma dev set' to use it.
>>>>>
>>>>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>>>>> ---
>>>>>  rdma/dev.c   |  2 +-
>>>>>  rdma/rdma.h  |  1 +
>>>>>  rdma/utils.c | 21 +++++++++++++++++++++
>>>>>  3 files changed, 23 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/rdma/dev.c b/rdma/dev.c
>>>>> index 60ff4b31e320..d2949c378f08 100644
>>>>> --- a/rdma/dev.c
>>>>> +++ b/rdma/dev.c
>>>>> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
>>>>>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
>>>>>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
>>>>>
>>>>> -	return rd_send_msg(rd);
>>>>> +	return rd_sendrecv_msg(rd, seq);
>>>>>  }
>>>>>
>>>>>  static int dev_one_set(struct rd *rd)
>>>>> diff --git a/rdma/rdma.h b/rdma/rdma.h
>>>>> index 547bb5749a39..20be2f12c4f8 100644
>>>>> --- a/rdma/rdma.h
>>>>> +++ b/rdma/rdma.h
>>>>> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
>>>>>   */
>>>>>  int rd_send_msg(struct rd *rd);
>>>>>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
>>>>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
>>>>>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
>>>>>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
>>>>>  int rd_attr_cb(const struct nlattr *attr, void *data);
>>>>> diff --git a/rdma/utils.c b/rdma/utils.c
>>>>> index 069d44fece10..a6f2826c9605 100644
>>>>> --- a/rdma/utils.c
>>>>> +++ b/rdma/utils.c
>>>>> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
>>>>>  	return ret;
>>>>>  }
>>>>>
>>>>> +static int null_cb(const struct nlmsghdr *nlh, void *data)
>>>>> +{
>>>>> +	return MNL_CB_OK;
>>>>> +}
>>>>> +
>>>>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
>>>>> +{
>>>>> +	int ret;
>>>>> +
>>>>> +	ret = rd_send_msg(rd);
>>>>> +	if (ret) {
>>>>> +		perror(NULL);
>>>> This is more or less already done in rd_send_msg() and that function
>>>> prints something in case of execution error. So the missing piece
>>>> is to update rd_recv_msg(), so all places will "magically" print errors
>>>> and not only dev_set_name().
>>>>
>>>>> +		goto out;
>>>>> +	}
>>>>> +	ret = rd_recv_msg(rd, null_cb, rd, seq);
>>> Will this "null_cb" work for all send/recv flows or only in flows where
>>> response can be error only?
>>
>> Only those flows where no nl attributes are expected to be returned.
>>
>>
>>> Will we need this recv_msg if we implement
>>> extack support?
>>
>> I'm not sure how extack works.  Do you know?
> I can't say that :)


We can change things if/when we support extack.

Stevo.
David Ahern Feb. 26, 2019, 7:55 p.m. UTC | #7
On 2/26/19 10:19 AM, Steve Wise wrote:
> 
>> Will we need this recv_msg if we implement
>> extack support?
> 
> I'm not sure how extack works.  Do you know?

see devlink/mnlg.c

mnlg_socket_open()
{
	...
	mnl_socket_setsockopt(nlg->nl, NETLINK_EXT_ACK, &one, sizeof(one));
	...
}

and

mnlg_cb_error()

That code under devlink needs to be generic for both tools.
Steve Wise Feb. 27, 2019, 8:23 p.m. UTC | #8
> -----Original Message-----
> From: Steve Wise <swise@opengridcomputing.com>
> Sent: Tuesday, February 26, 2019 11:14 AM
> To: Leon Romanovsky <leon@kernel.org>
> Cc: dsahern@gmail.com; stephen@networkplumber.org;
> netdev@vger.kernel.org; linux-rdma@vger.kernel.org
> Subject: Re: [PATCH v1 iproute2-next 1/4] rdma: add helper
> rd_sendrecv_msg()
> 
> 
> On 2/23/2019 3:26 AM, Leon Romanovsky wrote:
> > On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
> >> This function sends the constructed netlink message and then
> >> receives the response, displaying any error text.
> >>
> >> Change 'rdma dev set' to use it.
> >>
> >> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> >> ---
> >>  rdma/dev.c   |  2 +-
> >>  rdma/rdma.h  |  1 +
> >>  rdma/utils.c | 21 +++++++++++++++++++++
> >>  3 files changed, 23 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/rdma/dev.c b/rdma/dev.c
> >> index 60ff4b31e320..d2949c378f08 100644
> >> --- a/rdma/dev.c
> >> +++ b/rdma/dev.c
> >> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
> >>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd-
> >dev_idx);
> >>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME,
> rd_argv(rd));
> >>
> >> -	return rd_send_msg(rd);
> >> +	return rd_sendrecv_msg(rd, seq);
> >>  }
> >>
> >>  static int dev_one_set(struct rd *rd)
> >> diff --git a/rdma/rdma.h b/rdma/rdma.h
> >> index 547bb5749a39..20be2f12c4f8 100644
> >> --- a/rdma/rdma.h
> >> +++ b/rdma/rdma.h
> >> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const
> char *key);
> >>   */
> >>  int rd_send_msg(struct rd *rd);
> >>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t
> seq);
> >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
> >>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq,
uint16_t
> flags);
> >>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
> >>  int rd_attr_cb(const struct nlattr *attr, void *data);
> >> diff --git a/rdma/utils.c b/rdma/utils.c
> >> index 069d44fece10..a6f2826c9605 100644
> >> --- a/rdma/utils.c
> >> +++ b/rdma/utils.c
> >> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback,
> void *data, unsigned int seq)
> >>  	return ret;
> >>  }
> >>
> >> +static int null_cb(const struct nlmsghdr *nlh, void *data)
> >> +{
> >> +	return MNL_CB_OK;
> >> +}
> >> +
> >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = rd_send_msg(rd);
> >> +	if (ret) {
> >> +		perror(NULL);
> > This is more or less already done in rd_send_msg() and that function
> > prints something in case of execution error. So the missing piece
> > is to update rd_recv_msg(), so all places will "magically" print errors
> > and not only dev_set_name().
> 
> Yea ok.
> 

dev_set_name() doesn't call rd_recv_msg().  So you're suggesting I fix up
rd_recv_msg() to display errors and make dev_set_name() call rd_recv_msg()
with the null_cb function?  You sure that's the way to go?
Steve Wise Feb. 28, 2019, 7:41 p.m. UTC | #9
On 2/23/2019 3:26 AM, Leon Romanovsky wrote:
> On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
>> This function sends the constructed netlink message and then
>> receives the response, displaying any error text.
>>
>> Change 'rdma dev set' to use it.
>>
>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>> ---
>>  rdma/dev.c   |  2 +-
>>  rdma/rdma.h  |  1 +
>>  rdma/utils.c | 21 +++++++++++++++++++++
>>  3 files changed, 23 insertions(+), 1 deletion(-)
>>
>> diff --git a/rdma/dev.c b/rdma/dev.c
>> index 60ff4b31e320..d2949c378f08 100644
>> --- a/rdma/dev.c
>> +++ b/rdma/dev.c
>> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
>>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
>>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
>>
>> -	return rd_send_msg(rd);
>> +	return rd_sendrecv_msg(rd, seq);
>>  }
>>
>>  static int dev_one_set(struct rd *rd)
>> diff --git a/rdma/rdma.h b/rdma/rdma.h
>> index 547bb5749a39..20be2f12c4f8 100644
>> --- a/rdma/rdma.h
>> +++ b/rdma/rdma.h
>> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
>>   */
>>  int rd_send_msg(struct rd *rd);
>>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
>>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
>>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
>>  int rd_attr_cb(const struct nlattr *attr, void *data);
>> diff --git a/rdma/utils.c b/rdma/utils.c
>> index 069d44fece10..a6f2826c9605 100644
>> --- a/rdma/utils.c
>> +++ b/rdma/utils.c
>> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
>>  	return ret;
>>  }
>>
>> +static int null_cb(const struct nlmsghdr *nlh, void *data)
>> +{
>> +	return MNL_CB_OK;
>> +}
>> +
>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
>> +{
>> +	int ret;
>> +
>> +	ret = rd_send_msg(rd);
>> +	if (ret) {
>> +		perror(NULL);
> This is more or less already done in rd_send_msg() and that function
> prints something in case of execution error. So the missing piece
> is to update rd_recv_msg(), so all places will "magically" print errors
> and not only dev_set_name().

Hey Leon,

Displaying errors in rd_recv_msg() as you suggested:

@@ -693,10 +693,28 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback,
void *data, unsigned int seq)
                ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
        } while (ret > 0);

+       if (ret < 0)
+               perror(NULL);
+
        mnl_socket_close(rd->nl);
        return ret;
 }

Causes problems when doing filtered dumps:

[root@stevo1 iproute2]# ./rdma/rdma res show qp
link mlx5_0/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
link mlx5_0/1 lqpn 176 type UD state RTS sq-psn 0 comm [ib_core]
link mlx5_1/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
link mlx5_1/1 lqpn 2224 type UD state RTS sq-psn 0 comm [ib_core]
link rxe_foo0/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
[root@stevo1 iproute2]# ./rdma/rdma res show qp  lqpn 2224
Invalid argument
No such file or directory
Invalid argument
link mlx5_1/1 lqpn 2224 type UD state RTS sq-psn 0 comm [ib_core]
Invalid argument
No such file or directory
[root@stevo1 iproute2]#


Steve.
Leon Romanovsky Feb. 28, 2019, 7:56 p.m. UTC | #10
On Thu, Feb 28, 2019 at 01:41:20PM -0600, Steve Wise wrote:
>
> On 2/23/2019 3:26 AM, Leon Romanovsky wrote:
> > On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
> >> This function sends the constructed netlink message and then
> >> receives the response, displaying any error text.
> >>
> >> Change 'rdma dev set' to use it.
> >>
> >> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> >> ---
> >>  rdma/dev.c   |  2 +-
> >>  rdma/rdma.h  |  1 +
> >>  rdma/utils.c | 21 +++++++++++++++++++++
> >>  3 files changed, 23 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/rdma/dev.c b/rdma/dev.c
> >> index 60ff4b31e320..d2949c378f08 100644
> >> --- a/rdma/dev.c
> >> +++ b/rdma/dev.c
> >> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
> >>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
> >>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
> >>
> >> -	return rd_send_msg(rd);
> >> +	return rd_sendrecv_msg(rd, seq);
> >>  }
> >>
> >>  static int dev_one_set(struct rd *rd)
> >> diff --git a/rdma/rdma.h b/rdma/rdma.h
> >> index 547bb5749a39..20be2f12c4f8 100644
> >> --- a/rdma/rdma.h
> >> +++ b/rdma/rdma.h
> >> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
> >>   */
> >>  int rd_send_msg(struct rd *rd);
> >>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
> >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
> >>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
> >>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
> >>  int rd_attr_cb(const struct nlattr *attr, void *data);
> >> diff --git a/rdma/utils.c b/rdma/utils.c
> >> index 069d44fece10..a6f2826c9605 100644
> >> --- a/rdma/utils.c
> >> +++ b/rdma/utils.c
> >> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
> >>  	return ret;
> >>  }
> >>
> >> +static int null_cb(const struct nlmsghdr *nlh, void *data)
> >> +{
> >> +	return MNL_CB_OK;
> >> +}
> >> +
> >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
> >> +{
> >> +	int ret;
> >> +
> >> +	ret = rd_send_msg(rd);
> >> +	if (ret) {
> >> +		perror(NULL);
> > This is more or less already done in rd_send_msg() and that function
> > prints something in case of execution error. So the missing piece
> > is to update rd_recv_msg(), so all places will "magically" print errors
> > and not only dev_set_name().
>
> Hey Leon,
>
> Displaying errors in rd_recv_msg() as you suggested:
>
> @@ -693,10 +693,28 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback,
> void *data, unsigned int seq)
>                 ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
>         } while (ret > 0);
>
> +       if (ret < 0)
> +               perror(NULL);
> +
>         mnl_socket_close(rd->nl);
>         return ret;
>  }
>
> Causes problems when doing filtered dumps:
>
> [root@stevo1 iproute2]# ./rdma/rdma res show qp
> link mlx5_0/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
> link mlx5_0/1 lqpn 176 type UD state RTS sq-psn 0 comm [ib_core]
> link mlx5_1/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
> link mlx5_1/1 lqpn 2224 type UD state RTS sq-psn 0 comm [ib_core]
> link rxe_foo0/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
> [root@stevo1 iproute2]# ./rdma/rdma res show qp  lqpn 2224
> Invalid argument
> No such file or directory

Why do we it? We are supposed to check "invalid argument" before sending
message and are not supposed to see perror(). I'm not near code right
now, so most probably wrong in my assumption.

> Invalid argument
> link mlx5_1/1 lqpn 2224 type UD state RTS sq-psn 0 comm [ib_core]
> Invalid argument
> No such file or directory
> [root@stevo1 iproute2]#
>
>
> Steve.
>
Steve Wise Feb. 28, 2019, 8:10 p.m. UTC | #11
On 2/28/2019 1:56 PM, Leon Romanovsky wrote:
> On Thu, Feb 28, 2019 at 01:41:20PM -0600, Steve Wise wrote:
>> On 2/23/2019 3:26 AM, Leon Romanovsky wrote:
>>> On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
>>>> This function sends the constructed netlink message and then
>>>> receives the response, displaying any error text.
>>>>
>>>> Change 'rdma dev set' to use it.
>>>>
>>>> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
>>>> ---
>>>>  rdma/dev.c   |  2 +-
>>>>  rdma/rdma.h  |  1 +
>>>>  rdma/utils.c | 21 +++++++++++++++++++++
>>>>  3 files changed, 23 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/rdma/dev.c b/rdma/dev.c
>>>> index 60ff4b31e320..d2949c378f08 100644
>>>> --- a/rdma/dev.c
>>>> +++ b/rdma/dev.c
>>>> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
>>>>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
>>>>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
>>>>
>>>> -	return rd_send_msg(rd);
>>>> +	return rd_sendrecv_msg(rd, seq);
>>>>  }
>>>>
>>>>  static int dev_one_set(struct rd *rd)
>>>> diff --git a/rdma/rdma.h b/rdma/rdma.h
>>>> index 547bb5749a39..20be2f12c4f8 100644
>>>> --- a/rdma/rdma.h
>>>> +++ b/rdma/rdma.h
>>>> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const char *key);
>>>>   */
>>>>  int rd_send_msg(struct rd *rd);
>>>>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
>>>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
>>>>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
>>>>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
>>>>  int rd_attr_cb(const struct nlattr *attr, void *data);
>>>> diff --git a/rdma/utils.c b/rdma/utils.c
>>>> index 069d44fece10..a6f2826c9605 100644
>>>> --- a/rdma/utils.c
>>>> +++ b/rdma/utils.c
>>>> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
>>>>  	return ret;
>>>>  }
>>>>
>>>> +static int null_cb(const struct nlmsghdr *nlh, void *data)
>>>> +{
>>>> +	return MNL_CB_OK;
>>>> +}
>>>> +
>>>> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
>>>> +{
>>>> +	int ret;
>>>> +
>>>> +	ret = rd_send_msg(rd);
>>>> +	if (ret) {
>>>> +		perror(NULL);
>>> This is more or less already done in rd_send_msg() and that function
>>> prints something in case of execution error. So the missing piece
>>> is to update rd_recv_msg(), so all places will "magically" print errors
>>> and not only dev_set_name().
>> Hey Leon,
>>
>> Displaying errors in rd_recv_msg() as you suggested:
>>
>> @@ -693,10 +693,28 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback,
>> void *data, unsigned int seq)
>>                 ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
>>         } while (ret > 0);
>>
>> +       if (ret < 0)
>> +               perror(NULL);
>> +
>>         mnl_socket_close(rd->nl);
>>         return ret;
>>  }
>>
>> Causes problems when doing filtered dumps:
>>
>> [root@stevo1 iproute2]# ./rdma/rdma res show qp
>> link mlx5_0/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
>> link mlx5_0/1 lqpn 176 type UD state RTS sq-psn 0 comm [ib_core]
>> link mlx5_1/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
>> link mlx5_1/1 lqpn 2224 type UD state RTS sq-psn 0 comm [ib_core]
>> link rxe_foo0/1 lqpn 1 type GSI state RTS sq-psn 0 comm [ib_core]
>> [root@stevo1 iproute2]# ./rdma/rdma res show qp  lqpn 2224
>> Invalid argument
>> No such file or directory
> Why do we it? We are supposed to check "invalid argument" before sending
> message and are not supposed to see perror(). I'm not near code right
> now, so most probably wrong in my assumption.

I'm still investigating, but I _think_ it is because mnl_run_cb(),
called by rd_recv_msg() returns the return code from the callback
function.  So the resource callback functions must be returning an error
when a returned resource doesn't match the filter.  Maybe.  It is
related to doing a rdma res dump where you specify a filter...
Leon Romanovsky March 3, 2019, 1:50 p.m. UTC | #12
On Wed, Feb 27, 2019 at 02:23:45PM -0600, Steve Wise wrote:
>
>
> > -----Original Message-----
> > From: Steve Wise <swise@opengridcomputing.com>
> > Sent: Tuesday, February 26, 2019 11:14 AM
> > To: Leon Romanovsky <leon@kernel.org>
> > Cc: dsahern@gmail.com; stephen@networkplumber.org;
> > netdev@vger.kernel.org; linux-rdma@vger.kernel.org
> > Subject: Re: [PATCH v1 iproute2-next 1/4] rdma: add helper
> > rd_sendrecv_msg()
> >
> >
> > On 2/23/2019 3:26 AM, Leon Romanovsky wrote:
> > > On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
> > >> This function sends the constructed netlink message and then
> > >> receives the response, displaying any error text.
> > >>
> > >> Change 'rdma dev set' to use it.
> > >>
> > >> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > >> ---
> > >>  rdma/dev.c   |  2 +-
> > >>  rdma/rdma.h  |  1 +
> > >>  rdma/utils.c | 21 +++++++++++++++++++++
> > >>  3 files changed, 23 insertions(+), 1 deletion(-)
> > >>
> > >> diff --git a/rdma/dev.c b/rdma/dev.c
> > >> index 60ff4b31e320..d2949c378f08 100644
> > >> --- a/rdma/dev.c
> > >> +++ b/rdma/dev.c
> > >> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
> > >>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd-
> > >dev_idx);
> > >>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME,
> > rd_argv(rd));
> > >>
> > >> -	return rd_send_msg(rd);
> > >> +	return rd_sendrecv_msg(rd, seq);
> > >>  }
> > >>
> > >>  static int dev_one_set(struct rd *rd)
> > >> diff --git a/rdma/rdma.h b/rdma/rdma.h
> > >> index 547bb5749a39..20be2f12c4f8 100644
> > >> --- a/rdma/rdma.h
> > >> +++ b/rdma/rdma.h
> > >> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const
> > char *key);
> > >>   */
> > >>  int rd_send_msg(struct rd *rd);
> > >>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t
> > seq);
> > >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
> > >>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq,
> uint16_t
> > flags);
> > >>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
> > >>  int rd_attr_cb(const struct nlattr *attr, void *data);
> > >> diff --git a/rdma/utils.c b/rdma/utils.c
> > >> index 069d44fece10..a6f2826c9605 100644
> > >> --- a/rdma/utils.c
> > >> +++ b/rdma/utils.c
> > >> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback,
> > void *data, unsigned int seq)
> > >>  	return ret;
> > >>  }
> > >>
> > >> +static int null_cb(const struct nlmsghdr *nlh, void *data)
> > >> +{
> > >> +	return MNL_CB_OK;
> > >> +}
> > >> +
> > >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
> > >> +{
> > >> +	int ret;
> > >> +
> > >> +	ret = rd_send_msg(rd);
> > >> +	if (ret) {
> > >> +		perror(NULL);
> > > This is more or less already done in rd_send_msg() and that function
> > > prints something in case of execution error. So the missing piece
> > > is to update rd_recv_msg(), so all places will "magically" print errors
> > > and not only dev_set_name().
> >
> > Yea ok.
> >
>
> dev_set_name() doesn't call rd_recv_msg().  So you're suggesting I fix up
> rd_recv_msg() to display errors and make dev_set_name() call rd_recv_msg()
> with the null_cb function?  You sure that's the way to go?

I'm sure that we need to fix dev_set_name(), everything else I'm not sure.

Thanks

>
>
>
>
Steve Wise March 4, 2019, 2:13 p.m. UTC | #13
> > >
> > > On 2/23/2019 3:26 AM, Leon Romanovsky wrote:
> > > > On Thu, Feb 21, 2019 at 08:19:03AM -0800, Steve Wise wrote:
> > > >> This function sends the constructed netlink message and then
> > > >> receives the response, displaying any error text.
> > > >>
> > > >> Change 'rdma dev set' to use it.
> > > >>
> > > >> Signed-off-by: Steve Wise <swise@opengridcomputing.com>
> > > >> ---
> > > >>  rdma/dev.c   |  2 +-
> > > >>  rdma/rdma.h  |  1 +
> > > >>  rdma/utils.c | 21 +++++++++++++++++++++
> > > >>  3 files changed, 23 insertions(+), 1 deletion(-)
> > > >>
> > > >> diff --git a/rdma/dev.c b/rdma/dev.c
> > > >> index 60ff4b31e320..d2949c378f08 100644
> > > >> --- a/rdma/dev.c
> > > >> +++ b/rdma/dev.c
> > > >> @@ -273,7 +273,7 @@ static int dev_set_name(struct rd *rd)
> > > >>  	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd-
> > > >dev_idx);
> > > >>  	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME,
> > > rd_argv(rd));
> > > >>
> > > >> -	return rd_send_msg(rd);
> > > >> +	return rd_sendrecv_msg(rd, seq);
> > > >>  }
> > > >>
> > > >>  static int dev_one_set(struct rd *rd)
> > > >> diff --git a/rdma/rdma.h b/rdma/rdma.h
> > > >> index 547bb5749a39..20be2f12c4f8 100644
> > > >> --- a/rdma/rdma.h
> > > >> +++ b/rdma/rdma.h
> > > >> @@ -115,6 +115,7 @@ bool rd_check_is_key_exist(struct rd *rd, const
> > > char *key);
> > > >>   */
> > > >>  int rd_send_msg(struct rd *rd);
> > > >>  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data,
uint32_t
> > > seq);
> > > >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
> > > >>  void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq,
> > uint16_t
> > > flags);
> > > >>  int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
> > > >>  int rd_attr_cb(const struct nlattr *attr, void *data);
> > > >> diff --git a/rdma/utils.c b/rdma/utils.c
> > > >> index 069d44fece10..a6f2826c9605 100644
> > > >> --- a/rdma/utils.c
> > > >> +++ b/rdma/utils.c
> > > >> @@ -664,6 +664,27 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t
> callback,
> > > void *data, unsigned int seq)
> > > >>  	return ret;
> > > >>  }
> > > >>
> > > >> +static int null_cb(const struct nlmsghdr *nlh, void *data)
> > > >> +{
> > > >> +	return MNL_CB_OK;
> > > >> +}
> > > >> +
> > > >> +int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
> > > >> +{
> > > >> +	int ret;
> > > >> +
> > > >> +	ret = rd_send_msg(rd);
> > > >> +	if (ret) {
> > > >> +		perror(NULL);
> > > > This is more or less already done in rd_send_msg() and that function
> > > > prints something in case of execution error. So the missing piece
> > > > is to update rd_recv_msg(), so all places will "magically" print
errors
> > > > and not only dev_set_name().
> > >
> > > Yea ok.
> > >
> >
> > dev_set_name() doesn't call rd_recv_msg().  So you're suggesting I fix
up
> > rd_recv_msg() to display errors and make dev_set_name() call
> rd_recv_msg()
> > with the null_cb function?  You sure that's the way to go?
> 
> I'm sure that we need to fix dev_set_name(), everything else I'm not sure.
> 
> Thanks

Hey Leon, adding this to rd_recv_msg():

@@ -693,10 +693,28 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void
*data, unsigned int seq)
                ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
        } while (ret > 0);

+       if (ret < 0)
+               perror(NULL);
+
        mnl_socket_close(rd->nl);
        return ret;
 }

Results in unexpected errors being logged when doing a query such as:

[root@stevo1 iproute2]# ./rdma/rdma res show qp lqpn 176
error: Invalid argument
link mlx5_0/1 lqpn 176 type UD state RTS sq-psn 0 comm [ib_core]
error: Invalid argument
error: No such file or directory
error: Invalid argument
error: No such file or directory

It appears the "invalid argument" errors are due to rdmatool sending a
RDMA_NLDEV_CMD_RES_QP_GET command using the doit kernel method to allow
querying for just a QP with lqpn = 176.  However, rdmatool isn't passing a
port index in the messages that generate the "invalid argument" error from
the kernel.  IE you must provide a device index and port index when issuing
a doit command vs a dumpit command.  I think. 

This error was not found because rd_recv_msg() never displayed any errors
previously.  Further, the RES_FUNC() massive macro has code that will retry
a failed doit call with a dumpit call.  I think _##name() should distinguish
between failures reported by the kernel doit function vs failures because no
doit function exists.  Not sure how to support that.


        static inline int _##name(struct rd *rd)
\
        {
\
                uint32_t idx;
\
                int ret;
\
                if (id) {
\
                        ret = rd_doit_index(rd, &idx);
\
                        if (ret) {
\
                                ret = _res_send_idx_msg(rd, command,
\
                                                        name##_idx_parse_cb,
\
                                                        idx, id);
\
                                if (!ret)
\
                                        return ret;
\
                                /* Fallback for old systems without .doit
callbacks */ \
                        }
\
                }
\
                return _res_send_msg(rd, command, name##_parse_cb);
\
        }
\



The "no such file or dir" errors are being returned because, in my setup,
there are 2 other links that do not have lqpn 176.   So there are 2 issues
uncovered by adding generic printing of errors in rd_recv_msg()

1) the doit code in rdmatool is generating requests for a doit method in the
kernel w/o providing a port index.
2) some paths in rdmatool should not print "benign" errors like filtering on
a GET command causing a "does not exist" error returned by the kernel doit
func.

#1 is a bug, IMO.  Can you propose a fix?
#2 could be solved by adding an error callback func passed to rd_recv_msg().
Then the RES_FUNC() functions could parse errors like "no such file or dir"
when doing a filtered query and silently drop them.  And functions like
dev_set_name() would display all errors returned because there are no
expected errors other than "success".

Steve.
Steve Wise March 6, 2019, 9:50 p.m. UTC | #14
On 3/4/2019 8:13 AM, Steve Wise wrote:
> Hey Leon, adding this to rd_recv_msg():
>
> @@ -693,10 +693,28 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void
> *data, unsigned int seq)
>                 ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
>         } while (ret > 0);
>
> +       if (ret < 0)
> +               perror(NULL);
> +
>         mnl_socket_close(rd->nl);
>         return ret;
>  }
>
> Results in unexpected errors being logged when doing a query such as:
>
> [root@stevo1 iproute2]# ./rdma/rdma res show qp lqpn 176
> error: Invalid argument
> link mlx5_0/1 lqpn 176 type UD state RTS sq-psn 0 comm [ib_core]
> error: Invalid argument
> error: No such file or directory
> error: Invalid argument
> error: No such file or directory
>
> It appears the "invalid argument" errors are due to rdmatool sending a
> RDMA_NLDEV_CMD_RES_QP_GET command using the doit kernel method to allow
> querying for just a QP with lqpn = 176.  However, rdmatool isn't passing a
> port index in the messages that generate the "invalid argument" error from
> the kernel.  IE you must provide a device index and port index when issuing
> a doit command vs a dumpit command.  I think. 
>
> This error was not found because rd_recv_msg() never displayed any errors
> previously.  Further, the RES_FUNC() massive macro has code that will retry
> a failed doit call with a dumpit call.  I think _##name() should distinguish
> between failures reported by the kernel doit function vs failures because no
> doit function exists.  Not sure how to support that.
>
>
>         static inline int _##name(struct rd *rd)
> \
>         {
> \
>                 uint32_t idx;
> \
>                 int ret;
> \
>                 if (id) {
> \
>                         ret = rd_doit_index(rd, &idx);
> \
>                         if (ret) {
> \
>                                 ret = _res_send_idx_msg(rd, command,
> \
>                                                         name##_idx_parse_cb,
> \
>                                                         idx, id);
> \
>                                 if (!ret)
> \
>                                         return ret;
> \
>                                 /* Fallback for old systems without .doit
> callbacks */ \
>                         }
> \
>                 }
> \
>                 return _res_send_msg(rd, command, name##_parse_cb);
> \
>         }
> \
>
>
>
> The "no such file or dir" errors are being returned because, in my setup,
> there are 2 other links that do not have lqpn 176.   So there are 2 issues
> uncovered by adding generic printing of errors in rd_recv_msg()
>
> 1) the doit code in rdmatool is generating requests for a doit method in the
> kernel w/o providing a port index.
> 2) some paths in rdmatool should not print "benign" errors like filtering on
> a GET command causing a "does not exist" error returned by the kernel doit
> func.
>
> #1 is a bug, IMO.  Can you propose a fix?
> #2 could be solved by adding an error callback func passed to rd_recv_msg().
> Then the RES_FUNC() functions could parse errors like "no such file or dir"
> when doing a filtered query and silently drop them.  And functions like
> dev_set_name() would display all errors returned because there are no
> expected errors other than "success".
>
> Steve.
>

Hey Leon, you've been quiet. :)   Thoughts?

Thanks,

Steve.
Leon Romanovsky March 7, 2019, 8:33 a.m. UTC | #15
On Wed, Mar 06, 2019 at 03:50:13PM -0600, Steve Wise wrote:
>
> On 3/4/2019 8:13 AM, Steve Wise wrote:
> > Hey Leon, adding this to rd_recv_msg():
> >
> > @@ -693,10 +693,28 @@ int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void
> > *data, unsigned int seq)
> >                 ret = mnl_cb_run(buf, ret, seq, portid, callback, data);
> >         } while (ret > 0);
> >
> > +       if (ret < 0)
> > +               perror(NULL);
> > +
> >         mnl_socket_close(rd->nl);
> >         return ret;
> >  }
> >
> > Results in unexpected errors being logged when doing a query such as:
> >
> > [root@stevo1 iproute2]# ./rdma/rdma res show qp lqpn 176
> > error: Invalid argument
> > link mlx5_0/1 lqpn 176 type UD state RTS sq-psn 0 comm [ib_core]
> > error: Invalid argument
> > error: No such file or directory
> > error: Invalid argument
> > error: No such file or directory
> >
> > It appears the "invalid argument" errors are due to rdmatool sending a
> > RDMA_NLDEV_CMD_RES_QP_GET command using the doit kernel method to allow
> > querying for just a QP with lqpn = 176.  However, rdmatool isn't passing a
> > port index in the messages that generate the "invalid argument" error from
> > the kernel.  IE you must provide a device index and port index when issuing
> > a doit command vs a dumpit command.  I think.

QPs are per-device and not per-port, so I can't issue "real" port on
multi-port devices.

> >
> > This error was not found because rd_recv_msg() never displayed any errors
> > previously.  Further, the RES_FUNC() massive macro has code that will retry
> > a failed doit call with a dumpit call.  I think _##name() should distinguish
> > between failures reported by the kernel doit function vs failures because no
> > doit function exists.  Not sure how to support that.

We can suppress prints from failures to find .doit, by adding extra
parameter to _res_send_idx_msg(): for example _res_send_idx_msg(..., no_error_print);
and provide this "no_error_print" to rd_recv_msg(), through "void
*data".

> >
> >
> >         static inline int _##name(struct rd *rd)
> > \
> >         {
> > \
> >                 uint32_t idx;
> > \
> >                 int ret;
> > \
> >                 if (id) {
> > \
> >                         ret = rd_doit_index(rd, &idx);
> > \
> >                         if (ret) {
> > \
> >                                 ret = _res_send_idx_msg(rd, command,
> > \
> >                                                         name##_idx_parse_cb,
> > \
> >                                                         idx, id);
> > \
> >                                 if (!ret)
> > \
> >                                         return ret;
> > \
> >                                 /* Fallback for old systems without .doit
> > callbacks */ \
> >                         }
> > \
> >                 }
> > \
> >                 return _res_send_msg(rd, command, name##_parse_cb);
> > \
> >         }
> > \
> >
> >
> >
> > The "no such file or dir" errors are being returned because, in my setup,
> > there are 2 other links that do not have lqpn 176.   So there are 2 issues
> > uncovered by adding generic printing of errors in rd_recv_msg()
> >
> > 1) the doit code in rdmatool is generating requests for a doit method in the
> > kernel w/o providing a port index.
> > 2) some paths in rdmatool should not print "benign" errors like filtering on
> > a GET command causing a "does not exist" error returned by the kernel doit
> > func.
> >
> > #1 is a bug, IMO.  Can you propose a fix?
> > #2 could be solved by adding an error callback func passed to rd_recv_msg().
> > Then the RES_FUNC() functions could parse errors like "no such file or dir"
> > when doing a filtered query and silently drop them.  And functions like
> > dev_set_name() would display all errors returned because there are no
> > expected errors other than "success".
> >
> > Steve.
> >
>
> Hey Leon, you've been quiet. :)   Thoughts?

I missed your email, sorry.

>
> Thanks,
>
> Steve.
>
>
diff mbox series

Patch

diff --git a/rdma/dev.c b/rdma/dev.c
index 60ff4b31e320..d2949c378f08 100644
--- a/rdma/dev.c
+++ b/rdma/dev.c
@@ -273,7 +273,7 @@  static int dev_set_name(struct rd *rd)
 	mnl_attr_put_u32(rd->nlh, RDMA_NLDEV_ATTR_DEV_INDEX, rd->dev_idx);
 	mnl_attr_put_strz(rd->nlh, RDMA_NLDEV_ATTR_DEV_NAME, rd_argv(rd));
 
-	return rd_send_msg(rd);
+	return rd_sendrecv_msg(rd, seq);
 }
 
 static int dev_one_set(struct rd *rd)
diff --git a/rdma/rdma.h b/rdma/rdma.h
index 547bb5749a39..20be2f12c4f8 100644
--- a/rdma/rdma.h
+++ b/rdma/rdma.h
@@ -115,6 +115,7 @@  bool rd_check_is_key_exist(struct rd *rd, const char *key);
  */
 int rd_send_msg(struct rd *rd);
 int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, uint32_t seq);
+int rd_sendrecv_msg(struct rd *rd, unsigned int seq);
 void rd_prepare_msg(struct rd *rd, uint32_t cmd, uint32_t *seq, uint16_t flags);
 int rd_dev_init_cb(const struct nlmsghdr *nlh, void *data);
 int rd_attr_cb(const struct nlattr *attr, void *data);
diff --git a/rdma/utils.c b/rdma/utils.c
index 069d44fece10..a6f2826c9605 100644
--- a/rdma/utils.c
+++ b/rdma/utils.c
@@ -664,6 +664,27 @@  int rd_recv_msg(struct rd *rd, mnl_cb_t callback, void *data, unsigned int seq)
 	return ret;
 }
 
+static int null_cb(const struct nlmsghdr *nlh, void *data)
+{
+	return MNL_CB_OK;
+}
+
+int rd_sendrecv_msg(struct rd *rd, unsigned int seq)
+{
+	int ret;
+
+	ret = rd_send_msg(rd);
+	if (ret) {
+		perror(NULL);
+		goto out;
+	}
+	ret = rd_recv_msg(rd, null_cb, rd, seq);
+	if (ret)
+		perror(NULL);
+out:
+	return ret;
+}
+
 static struct dev_map *_dev_map_lookup(struct rd *rd, const char *dev_name)
 {
 	struct dev_map *dev_map;