diff mbox

[RFC,06/11] IB/Verbs: Use management helper has_sa() and cap_sa(), for sa-check

Message ID 55157B43.6060507@profitbricks.com
State RFC, archived
Delegated to: David Miller
Headers show

Commit Message

Michael Wang March 27, 2015, 3:46 p.m. UTC
Introduce helper has_sa() and cap_sa() to help us check if an IB device
or it's port support Subnet Administrator.

Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Cc: Doug Ledford <dledford@redhat.com>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Michael Wang <yun.wang@profitbricks.com>
---
 drivers/infiniband/core/sa_query.c | 12 ++++++------
 include/rdma/ib_verbs.h            | 28 ++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+), 6 deletions(-)

Comments

Ira Weiny March 27, 2015, 4:47 p.m. UTC | #1
On Fri, Mar 27, 2015 at 04:46:11PM +0100, Michael Wang wrote:
> 
> Introduce helper has_sa() and cap_sa() to help us check if an IB device
> or it's port support Subnet Administrator.

I think these 2 should be combined.  The question is if a port requires the use
of the SA depending on the network it is connected to.

Aren't some dual port Mellanox cards capable of doing IB on 1 port and Eth on
the other?  Do they show up as 2 devices?

Regardless I think we should define the SA access on a per port basis.

> 
> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
> Cc: Doug Ledford <dledford@redhat.com>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Cc: Sean Hefty <sean.hefty@intel.com>
> Signed-off-by: Michael Wang <yun.wang@profitbricks.com>
> ---
>  drivers/infiniband/core/sa_query.c | 12 ++++++------
>  include/rdma/ib_verbs.h            | 28 ++++++++++++++++++++++++++++
>  2 files changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
> index d95d25f..89c27da 100644
> --- a/drivers/infiniband/core/sa_query.c
> +++ b/drivers/infiniband/core/sa_query.c
> @@ -450,7 +450,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
>          struct ib_sa_port *port =
>              &sa_dev->port[event->element.port_num - sa_dev->start_port];
>  
> -        if (!rdma_port_ll_is_ib(handler->device, port->port_num))
> +        if (!cap_sa(handler->device, port->port_num))
>              return;
>  
>          spin_lock_irqsave(&port->ah_lock, flags);
> @@ -1154,7 +1154,7 @@ static void ib_sa_add_one(struct ib_device *device)
>      struct ib_sa_device *sa_dev;
>      int s, e, i;
>  
> -    if (!rdma_transport_is_ib(device))
> +    if (!has_sa(device))

The logic here should be:

if (no ports of this device need sa access)
	return;

So why not eliminate this check and allow the cap_sa(s) to handle the support?

-- Ira

>          return;
>  
>      if (device->node_type == RDMA_NODE_IB_SWITCH)
> @@ -1175,7 +1175,7 @@ static void ib_sa_add_one(struct ib_device *device)
>  
>      for (i = 0; i <= e - s; ++i) {
>          spin_lock_init(&sa_dev->port[i].ah_lock);
> -        if (!rdma_port_ll_is_ib(device, i + 1))
> +        if (!cap_sa(device, i + 1))
>              continue;
>  
>          sa_dev->port[i].sm_ah    = NULL;
> @@ -1205,14 +1205,14 @@ static void ib_sa_add_one(struct ib_device *device)
>          goto err;
>  
>      for (i = 0; i <= e - s; ++i)
> -        if (rdma_port_ll_is_ib(device, i + 1))
> +        if (cap_sa(device, i + 1))
>              update_sm_ah(&sa_dev->port[i].update_task);
>  
>      return;
>  
>  err:
>      while (--i >= 0)
> -        if (rdma_port_ll_is_ib(device, i + 1))
> +        if (cap_sa(device, i + 1))
>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>  
>      kfree(sa_dev);
> @@ -1233,7 +1233,7 @@ static void ib_sa_remove_one(struct ib_device *device)
>      flush_workqueue(ib_wq);
>  
>      for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
> -        if (rdma_port_ll_is_ib(device, i + 1)) {
> +        if (cap_sa(device, i + 1)) {
>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>              if (sa_dev->port[i].sm_ah)
>                  kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index c0a63f8..fa8ffa3 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -1810,6 +1810,19 @@ static inline int has_cm(struct ib_device *device)
>  }
>  
>  /**
> + * has_sa - Check if a device support Subnet Administrator.
> + *
> + * @device: Device to be checked
> + *
> + * Return 0 when a device has none port to support
> + * Subnet Administrator.
> + */
> +static inline int has_sa(struct ib_device *device)
> +{
> +    return rdma_transport_is_ib(device);
> +}
> +
> +/**
>   * cap_smi - Check if the port of device has the capability
>   * Subnet Management Interface.
>   *
> @@ -1824,6 +1837,21 @@ static inline int cap_smi(struct ib_device *device, u8 port_num)
>      return rdma_port_ll_is_ib(device, port_num);
>  }
>  
> +/**
> + * cap_sa - Check if the port of device has the capability
> + * Subnet Administrator.
> + *
> + * @device: Device to be checked
> + * @port_num: Port number of the device
> + *
> + * Return 0 when port of the device don't support
> + * Subnet Administrator.
> + */
> +static inline int cap_sa(struct ib_device *device, u8 port_num)
> +{
> +    return rdma_port_ll_is_ib(device, port_num);
> +}
> +
>  int ib_query_gid(struct ib_device *device,
>           u8 port_num, int index, union ib_gid *gid);
>  
> -- 
> 2.1.0
> 
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael Wang March 27, 2015, 5:13 p.m. UTC | #2
On Fri, Mar 27, 2015 at 5:47 PM, ira.weiny <ira.weiny@intel.com> wrote:
> On Fri, Mar 27, 2015 at 04:46:11PM +0100, Michael Wang wrote:
>>
>> Introduce helper has_sa() and cap_sa() to help us check if an IB device
>> or it's port support Subnet Administrator.
>
> I think these 2 should be combined.  The question is if a port requires the use
> of the SA depending on the network it is connected to.
>
> Aren't some dual port Mellanox cards capable of doing IB on 1 port and Eth on
> the other?  Do they show up as 2 devices?

I'm not sure about this... but sounds like your opinion is same to
Jason on ipoib stuff, I'll do more investigation on that part, if it's
just for optimization, then we should be able to eliminate has_xx()
stuff :-) after all, init/exit is not a hot path.

>
> Regardless I think we should define the SA access on a per port basis.
>
>>
>> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
>> Cc: Doug Ledford <dledford@redhat.com>
>> Cc: Ira Weiny <ira.weiny@intel.com>
>> Cc: Sean Hefty <sean.hefty@intel.com>
>> Signed-off-by: Michael Wang <yun.wang@profitbricks.com>
>> ---
>>  drivers/infiniband/core/sa_query.c | 12 ++++++------
>>  include/rdma/ib_verbs.h            | 28 ++++++++++++++++++++++++++++
>>  2 files changed, 34 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
>> index d95d25f..89c27da 100644
>> --- a/drivers/infiniband/core/sa_query.c
>> +++ b/drivers/infiniband/core/sa_query.c
>> @@ -450,7 +450,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
>>          struct ib_sa_port *port =
>>              &sa_dev->port[event->element.port_num - sa_dev->start_port];
>>
>> -        if (!rdma_port_ll_is_ib(handler->device, port->port_num))
>> +        if (!cap_sa(handler->device, port->port_num))
>>              return;
>>
>>          spin_lock_irqsave(&port->ah_lock, flags);
>> @@ -1154,7 +1154,7 @@ static void ib_sa_add_one(struct ib_device *device)
>>      struct ib_sa_device *sa_dev;
>>      int s, e, i;
>>
>> -    if (!rdma_transport_is_ib(device))
>> +    if (!has_sa(device))
>
> The logic here should be:
>
> if (no ports of this device need sa access)
>         return;
>
> So why not eliminate this check and allow the cap_sa(s) to handle the support?

I'll check if we could merge this part to the following iteration :-)

Regards,
Michael Wang

>
> -- Ira
>
>>          return;
>>
>>      if (device->node_type == RDMA_NODE_IB_SWITCH)
>> @@ -1175,7 +1175,7 @@ static void ib_sa_add_one(struct ib_device *device)
>>
>>      for (i = 0; i <= e - s; ++i) {
>>          spin_lock_init(&sa_dev->port[i].ah_lock);
>> -        if (!rdma_port_ll_is_ib(device, i + 1))
>> +        if (!cap_sa(device, i + 1))
>>              continue;
>>
>>          sa_dev->port[i].sm_ah    = NULL;
>> @@ -1205,14 +1205,14 @@ static void ib_sa_add_one(struct ib_device *device)
>>          goto err;
>>
>>      for (i = 0; i <= e - s; ++i)
>> -        if (rdma_port_ll_is_ib(device, i + 1))
>> +        if (cap_sa(device, i + 1))
>>              update_sm_ah(&sa_dev->port[i].update_task);
>>
>>      return;
>>
>>  err:
>>      while (--i >= 0)
>> -        if (rdma_port_ll_is_ib(device, i + 1))
>> +        if (cap_sa(device, i + 1))
>>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>>
>>      kfree(sa_dev);
>> @@ -1233,7 +1233,7 @@ static void ib_sa_remove_one(struct ib_device *device)
>>      flush_workqueue(ib_wq);
>>
>>      for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
>> -        if (rdma_port_ll_is_ib(device, i + 1)) {
>> +        if (cap_sa(device, i + 1)) {
>>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>>              if (sa_dev->port[i].sm_ah)
>>                  kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index c0a63f8..fa8ffa3 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -1810,6 +1810,19 @@ static inline int has_cm(struct ib_device *device)
>>  }
>>
>>  /**
>> + * has_sa - Check if a device support Subnet Administrator.
>> + *
>> + * @device: Device to be checked
>> + *
>> + * Return 0 when a device has none port to support
>> + * Subnet Administrator.
>> + */
>> +static inline int has_sa(struct ib_device *device)
>> +{
>> +    return rdma_transport_is_ib(device);
>> +}
>> +
>> +/**
>>   * cap_smi - Check if the port of device has the capability
>>   * Subnet Management Interface.
>>   *
>> @@ -1824,6 +1837,21 @@ static inline int cap_smi(struct ib_device *device, u8 port_num)
>>      return rdma_port_ll_is_ib(device, port_num);
>>  }
>>
>> +/**
>> + * cap_sa - Check if the port of device has the capability
>> + * Subnet Administrator.
>> + *
>> + * @device: Device to be checked
>> + * @port_num: Port number of the device
>> + *
>> + * Return 0 when port of the device don't support
>> + * Subnet Administrator.
>> + */
>> +static inline int cap_sa(struct ib_device *device, u8 port_num)
>> +{
>> +    return rdma_port_ll_is_ib(device, port_num);
>> +}
>> +
>>  int ib_query_gid(struct ib_device *device,
>>           u8 port_num, int index, union ib_gid *gid);
>>
>> --
>> 2.1.0
>>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford March 27, 2015, 7:49 p.m. UTC | #3
On Fri, 2015-03-27 at 12:47 -0400, ira.weiny wrote:
> On Fri, Mar 27, 2015 at 04:46:11PM +0100, Michael Wang wrote:
> > 
> > Introduce helper has_sa() and cap_sa() to help us check if an IB device
> > or it's port support Subnet Administrator.
> 
> I think these 2 should be combined.  The question is if a port requires the use
> of the SA depending on the network it is connected to.
> 
> Aren't some dual port Mellanox cards capable of doing IB on 1 port and Eth on
> the other?

Yes.

>   Do they show up as 2 devices?

No.  They are two ports of the same device with different link layers.
See here:

hca_id:	mlx4_1
	transport:			InfiniBand (0)
	fw_ver:				2.31.5050
	node_guid:			f452:1403:007b:cba0
	sys_image_guid:			f452:1403:007b:cba3
	vendor_id:			0x02c9
	vendor_part_id:			4099
	hw_ver:				0x0
	board_id:			MT_1090120019
	phys_port_cnt:			2
		port:	1
			state:			PORT_ACTIVE (4)
			max_mtu:		2048 (4)
			active_mtu:		2048 (4)
			sm_lid:			2
			port_lid:		2
			port_lmc:		0x01
			link_layer:		InfiniBand

		port:	2
			state:			PORT_ACTIVE (4)
			max_mtu:		4096 (5)
			active_mtu:		4096 (5)
			sm_lid:			0
			port_lid:		0
			port_lmc:		0x00
			link_layer:		Ethernet



> Regardless I think we should define the SA access on a per port basis.

Yes.

> > 
> > Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
> > Cc: Doug Ledford <dledford@redhat.com>
> > Cc: Ira Weiny <ira.weiny@intel.com>
> > Cc: Sean Hefty <sean.hefty@intel.com>
> > Signed-off-by: Michael Wang <yun.wang@profitbricks.com>
> > ---
> >  drivers/infiniband/core/sa_query.c | 12 ++++++------
> >  include/rdma/ib_verbs.h            | 28 ++++++++++++++++++++++++++++
> >  2 files changed, 34 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
> > index d95d25f..89c27da 100644
> > --- a/drivers/infiniband/core/sa_query.c
> > +++ b/drivers/infiniband/core/sa_query.c
> > @@ -450,7 +450,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
> >          struct ib_sa_port *port =
> >              &sa_dev->port[event->element.port_num - sa_dev->start_port];
> >  
> > -        if (!rdma_port_ll_is_ib(handler->device, port->port_num))
> > +        if (!cap_sa(handler->device, port->port_num))
> >              return;
> >  
> >          spin_lock_irqsave(&port->ah_lock, flags);
> > @@ -1154,7 +1154,7 @@ static void ib_sa_add_one(struct ib_device *device)
> >      struct ib_sa_device *sa_dev;
> >      int s, e, i;
> >  
> > -    if (!rdma_transport_is_ib(device))
> > +    if (!has_sa(device))
> 
> The logic here should be:
> 
> if (no ports of this device need sa access)
> 	return;
> 
> So why not eliminate this check and allow the cap_sa(s) to handle the support?
> 
> -- Ira
> 
> >          return;
> >  
> >      if (device->node_type == RDMA_NODE_IB_SWITCH)
> > @@ -1175,7 +1175,7 @@ static void ib_sa_add_one(struct ib_device *device)
> >  
> >      for (i = 0; i <= e - s; ++i) {
> >          spin_lock_init(&sa_dev->port[i].ah_lock);
> > -        if (!rdma_port_ll_is_ib(device, i + 1))
> > +        if (!cap_sa(device, i + 1))
> >              continue;
> >  
> >          sa_dev->port[i].sm_ah    = NULL;
> > @@ -1205,14 +1205,14 @@ static void ib_sa_add_one(struct ib_device *device)
> >          goto err;
> >  
> >      for (i = 0; i <= e - s; ++i)
> > -        if (rdma_port_ll_is_ib(device, i + 1))
> > +        if (cap_sa(device, i + 1))
> >              update_sm_ah(&sa_dev->port[i].update_task);
> >  
> >      return;
> >  
> >  err:
> >      while (--i >= 0)
> > -        if (rdma_port_ll_is_ib(device, i + 1))
> > +        if (cap_sa(device, i + 1))
> >              ib_unregister_mad_agent(sa_dev->port[i].agent);
> >  
> >      kfree(sa_dev);
> > @@ -1233,7 +1233,7 @@ static void ib_sa_remove_one(struct ib_device *device)
> >      flush_workqueue(ib_wq);
> >  
> >      for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
> > -        if (rdma_port_ll_is_ib(device, i + 1)) {
> > +        if (cap_sa(device, i + 1)) {
> >              ib_unregister_mad_agent(sa_dev->port[i].agent);
> >              if (sa_dev->port[i].sm_ah)
> >                  kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
> > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> > index c0a63f8..fa8ffa3 100644
> > --- a/include/rdma/ib_verbs.h
> > +++ b/include/rdma/ib_verbs.h
> > @@ -1810,6 +1810,19 @@ static inline int has_cm(struct ib_device *device)
> >  }
> >  
> >  /**
> > + * has_sa - Check if a device support Subnet Administrator.
> > + *
> > + * @device: Device to be checked
> > + *
> > + * Return 0 when a device has none port to support
> > + * Subnet Administrator.
> > + */
> > +static inline int has_sa(struct ib_device *device)
> > +{
> > +    return rdma_transport_is_ib(device);
> > +}
> > +
> > +/**
> >   * cap_smi - Check if the port of device has the capability
> >   * Subnet Management Interface.
> >   *
> > @@ -1824,6 +1837,21 @@ static inline int cap_smi(struct ib_device *device, u8 port_num)
> >      return rdma_port_ll_is_ib(device, port_num);
> >  }
> >  
> > +/**
> > + * cap_sa - Check if the port of device has the capability
> > + * Subnet Administrator.
> > + *
> > + * @device: Device to be checked
> > + * @port_num: Port number of the device
> > + *
> > + * Return 0 when port of the device don't support
> > + * Subnet Administrator.
> > + */
> > +static inline int cap_sa(struct ib_device *device, u8 port_num)
> > +{
> > +    return rdma_port_ll_is_ib(device, port_num);
> > +}
> > +
> >  int ib_query_gid(struct ib_device *device,
> >           u8 port_num, int index, union ib_gid *gid);
> >  
> > -- 
> > 2.1.0
> >
Doug Ledford March 30, 2015, 4:16 p.m. UTC | #4
On Fri, 2015-03-27 at 16:46 +0100, Michael Wang wrote:
> Introduce helper has_sa() and cap_sa() to help us check if an IB device
> or it's port support Subnet Administrator.

There's no functional reason to have both rdma_transport_is_ib and
rdma_port_ll_is_ib, just use one.  Then there is also no reason for both
has_sa and cap_sa.  Just use one.

> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
> Cc: Doug Ledford <dledford@redhat.com>
> Cc: Ira Weiny <ira.weiny@intel.com>
> Cc: Sean Hefty <sean.hefty@intel.com>
> Signed-off-by: Michael Wang <yun.wang@profitbricks.com>
> ---
>  drivers/infiniband/core/sa_query.c | 12 ++++++------
>  include/rdma/ib_verbs.h            | 28 ++++++++++++++++++++++++++++
>  2 files changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
> index d95d25f..89c27da 100644
> --- a/drivers/infiniband/core/sa_query.c
> +++ b/drivers/infiniband/core/sa_query.c
> @@ -450,7 +450,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
>          struct ib_sa_port *port =
>              &sa_dev->port[event->element.port_num - sa_dev->start_port];
>  
> -        if (!rdma_port_ll_is_ib(handler->device, port->port_num))
> +        if (!cap_sa(handler->device, port->port_num))
>              return;
>  
>          spin_lock_irqsave(&port->ah_lock, flags);
> @@ -1154,7 +1154,7 @@ static void ib_sa_add_one(struct ib_device *device)
>      struct ib_sa_device *sa_dev;
>      int s, e, i;
>  
> -    if (!rdma_transport_is_ib(device))
> +    if (!has_sa(device))
>          return;
>  
>      if (device->node_type == RDMA_NODE_IB_SWITCH)
> @@ -1175,7 +1175,7 @@ static void ib_sa_add_one(struct ib_device *device)
>  
>      for (i = 0; i <= e - s; ++i) {
>          spin_lock_init(&sa_dev->port[i].ah_lock);
> -        if (!rdma_port_ll_is_ib(device, i + 1))
> +        if (!cap_sa(device, i + 1))
>              continue;
>  
>          sa_dev->port[i].sm_ah    = NULL;
> @@ -1205,14 +1205,14 @@ static void ib_sa_add_one(struct ib_device *device)
>          goto err;
>  
>      for (i = 0; i <= e - s; ++i)
> -        if (rdma_port_ll_is_ib(device, i + 1))
> +        if (cap_sa(device, i + 1))
>              update_sm_ah(&sa_dev->port[i].update_task);
>  
>      return;
>  
>  err:
>      while (--i >= 0)
> -        if (rdma_port_ll_is_ib(device, i + 1))
> +        if (cap_sa(device, i + 1))
>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>  
>      kfree(sa_dev);
> @@ -1233,7 +1233,7 @@ static void ib_sa_remove_one(struct ib_device *device)
>      flush_workqueue(ib_wq);
>  
>      for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
> -        if (rdma_port_ll_is_ib(device, i + 1)) {
> +        if (cap_sa(device, i + 1)) {
>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>              if (sa_dev->port[i].sm_ah)
>                  kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index c0a63f8..fa8ffa3 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -1810,6 +1810,19 @@ static inline int has_cm(struct ib_device *device)
>  }
>  
>  /**
> + * has_sa - Check if a device support Subnet Administrator.
> + *
> + * @device: Device to be checked
> + *
> + * Return 0 when a device has none port to support
> + * Subnet Administrator.
> + */
> +static inline int has_sa(struct ib_device *device)
> +{
> +    return rdma_transport_is_ib(device);
> +}
> +
> +/**
>   * cap_smi - Check if the port of device has the capability
>   * Subnet Management Interface.
>   *
> @@ -1824,6 +1837,21 @@ static inline int cap_smi(struct ib_device *device, u8 port_num)
>      return rdma_port_ll_is_ib(device, port_num);
>  }
>  
> +/**
> + * cap_sa - Check if the port of device has the capability
> + * Subnet Administrator.
> + *
> + * @device: Device to be checked
> + * @port_num: Port number of the device
> + *
> + * Return 0 when port of the device don't support
> + * Subnet Administrator.
> + */
> +static inline int cap_sa(struct ib_device *device, u8 port_num)
> +{
> +    return rdma_port_ll_is_ib(device, port_num);
> +}
> +
>  int ib_query_gid(struct ib_device *device,
>           u8 port_num, int index, union ib_gid *gid);
>
Michael Wang March 30, 2015, 4:42 p.m. UTC | #5
On 03/30/2015 06:16 PM, Doug Ledford wrote:
> On Fri, 2015-03-27 at 16:46 +0100, Michael Wang wrote:
>> Introduce helper has_sa() and cap_sa() to help us check if an IB device
>> or it's port support Subnet Administrator.
> There's no functional reason to have both rdma_transport_is_ib and
> rdma_port_ll_is_ib, just use one.  Then there is also no reason for both
> has_sa and cap_sa.  Just use one.
The has_sa() will be eliminated :-)

rdma_transport_is_ib and rdma_port_ll_is_ib is actually just rough helper
to save some code, we can get rid of them when we no longer need them, but
currently device driver still using them a lot, I'm not sure if the new
mechanism could take cover all these cases...

Regards,
Michael Wang 


>> Cc: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
>> Cc: Doug Ledford <dledford@redhat.com>
>> Cc: Ira Weiny <ira.weiny@intel.com>
>> Cc: Sean Hefty <sean.hefty@intel.com>
>> Signed-off-by: Michael Wang <yun.wang@profitbricks.com>
>> ---
>>  drivers/infiniband/core/sa_query.c | 12 ++++++------
>>  include/rdma/ib_verbs.h            | 28 ++++++++++++++++++++++++++++
>>  2 files changed, 34 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
>> index d95d25f..89c27da 100644
>> --- a/drivers/infiniband/core/sa_query.c
>> +++ b/drivers/infiniband/core/sa_query.c
>> @@ -450,7 +450,7 @@ static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
>>          struct ib_sa_port *port =
>>              &sa_dev->port[event->element.port_num - sa_dev->start_port];
>>  
>> -        if (!rdma_port_ll_is_ib(handler->device, port->port_num))
>> +        if (!cap_sa(handler->device, port->port_num))
>>              return;
>>  
>>          spin_lock_irqsave(&port->ah_lock, flags);
>> @@ -1154,7 +1154,7 @@ static void ib_sa_add_one(struct ib_device *device)
>>      struct ib_sa_device *sa_dev;
>>      int s, e, i;
>>  
>> -    if (!rdma_transport_is_ib(device))
>> +    if (!has_sa(device))
>>          return;
>>  
>>      if (device->node_type == RDMA_NODE_IB_SWITCH)
>> @@ -1175,7 +1175,7 @@ static void ib_sa_add_one(struct ib_device *device)
>>  
>>      for (i = 0; i <= e - s; ++i) {
>>          spin_lock_init(&sa_dev->port[i].ah_lock);
>> -        if (!rdma_port_ll_is_ib(device, i + 1))
>> +        if (!cap_sa(device, i + 1))
>>              continue;
>>  
>>          sa_dev->port[i].sm_ah    = NULL;
>> @@ -1205,14 +1205,14 @@ static void ib_sa_add_one(struct ib_device *device)
>>          goto err;
>>  
>>      for (i = 0; i <= e - s; ++i)
>> -        if (rdma_port_ll_is_ib(device, i + 1))
>> +        if (cap_sa(device, i + 1))
>>              update_sm_ah(&sa_dev->port[i].update_task);
>>  
>>      return;
>>  
>>  err:
>>      while (--i >= 0)
>> -        if (rdma_port_ll_is_ib(device, i + 1))
>> +        if (cap_sa(device, i + 1))
>>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>>  
>>      kfree(sa_dev);
>> @@ -1233,7 +1233,7 @@ static void ib_sa_remove_one(struct ib_device *device)
>>      flush_workqueue(ib_wq);
>>  
>>      for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
>> -        if (rdma_port_ll_is_ib(device, i + 1)) {
>> +        if (cap_sa(device, i + 1)) {
>>              ib_unregister_mad_agent(sa_dev->port[i].agent);
>>              if (sa_dev->port[i].sm_ah)
>>                  kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
>> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
>> index c0a63f8..fa8ffa3 100644
>> --- a/include/rdma/ib_verbs.h
>> +++ b/include/rdma/ib_verbs.h
>> @@ -1810,6 +1810,19 @@ static inline int has_cm(struct ib_device *device)
>>  }
>>  
>>  /**
>> + * has_sa - Check if a device support Subnet Administrator.
>> + *
>> + * @device: Device to be checked
>> + *
>> + * Return 0 when a device has none port to support
>> + * Subnet Administrator.
>> + */
>> +static inline int has_sa(struct ib_device *device)
>> +{
>> +    return rdma_transport_is_ib(device);
>> +}
>> +
>> +/**
>>   * cap_smi - Check if the port of device has the capability
>>   * Subnet Management Interface.
>>   *
>> @@ -1824,6 +1837,21 @@ static inline int cap_smi(struct ib_device *device, u8 port_num)
>>      return rdma_port_ll_is_ib(device, port_num);
>>  }
>>  
>> +/**
>> + * cap_sa - Check if the port of device has the capability
>> + * Subnet Administrator.
>> + *
>> + * @device: Device to be checked
>> + * @port_num: Port number of the device
>> + *
>> + * Return 0 when port of the device don't support
>> + * Subnet Administrator.
>> + */
>> +static inline int cap_sa(struct ib_device *device, u8 port_num)
>> +{
>> +    return rdma_port_ll_is_ib(device, port_num);
>> +}
>> +
>>  int ib_query_gid(struct ib_device *device,
>>           u8 port_num, int index, union ib_gid *gid);
>>  
>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Doug Ledford March 30, 2015, 5:02 p.m. UTC | #6
On Mon, 2015-03-30 at 18:42 +0200, Michael Wang wrote:
> On 03/30/2015 06:16 PM, Doug Ledford wrote:
> > On Fri, 2015-03-27 at 16:46 +0100, Michael Wang wrote:
> >> Introduce helper has_sa() and cap_sa() to help us check if an IB device
> >> or it's port support Subnet Administrator.
> > There's no functional reason to have both rdma_transport_is_ib and
> > rdma_port_ll_is_ib, just use one.  Then there is also no reason for both
> > has_sa and cap_sa.  Just use one.
> The has_sa() will be eliminated :-)
> 
> rdma_transport_is_ib and rdma_port_ll_is_ib is actually just rough helper
> to save some code, we can get rid of them when we no longer need them, but
> currently device driver still using them a lot, I'm not sure if the new
> mechanism could take cover all these cases...

Sure it would.  This is what I had suggested (well, close to this, I
rearranged the order this time around):

enum rdma_transport {
	RDMA_TRANSPORT_IB =      0x01,
	RDMA_TRANSPORT_OPA =     0x02,
	RDMA_TRANSPORT_IWARP =   0x04,
	RDMA_TRANSPORT_ROCE_V1 = 0x08,
	RDMA_TRANSPORT_ROCE_V2 = 0x10,
};

struct ib_port {
	...
	enum rdma_transport;
	...
};

static inline bool rdma_transport_is_ib(struct ib_port *port)
{
	return port->transport & (RDMA_TRANSPORT_IB | RDMA_TRANSPORT_OPA);
}

static inline bool rdma_transport_is_opa(struct ib_port *port)
{
	return port->transport & RDMA_TRANSPORT_OPA;
}

static inline bool rdma_transport_is_iwarp(struct ib_port *port)
{
	return port->transport & RDMA_TRANSPORT_IWARP;
}

static inline bool rdma_transport_is_roce(struct ib_port *port)
{
	return port->transport & (RDMA_TRANSPORT_ROCE_V1 | RDMA_TRANSPORT_ROCE_V2);
}

static inline bool rdma_ib_mgmt(struct ib_port *port)
{
	return port->transport & (RDMA_TRANSPORT_IB | RDMA_TRANSPORT_OPA);
}

static inline bool rdma_opa_mgmt(struct ib_port *port)
{
	return port->transport & RDMA_TRANSPORT_OPA;
}


If we use something like this, then the above is all you need.  Then
every place in the code that checks for something like has_sa or cap_sa
can be replaced with rdma_ib_mgmt.  When Ira updates his patches for
this, he can check for rdma_opa_mgmt to enable jumbo MAD packets and
whatever else he needs.  Every place that does transport == IB and ll ==
Ethernet can become rdma_transport_is_roce.  Every place that does
transport == IB and ll == INFINIBAND becomes rdma_transport_is_ib.  The
code in multicast.c just needs to check rdma_ib_mgmt() (which happens to
make perfect sense anyway as the code in multicast.c that is checking
that we are on an IB interface is doing so because IB requires extra
management of the multicast group joins/leaves).  But, like I said, this
is an all or nothing change, it isn't something we can ease into.
Michael Wang March 31, 2015, 1:22 p.m. UTC | #7
On 03/30/2015 07:02 PM, Doug Ledford wrote:
> On Mon, 2015-03-30 at 18:42 +0200, Michael Wang wrote:
>> [snip]
>>
>> rdma_transport_is_ib and rdma_port_ll_is_ib is actually just rough helper
>> to save some code, we can get rid of them when we no longer need them, but
>> currently device driver still using them a lot, I'm not sure if the new
>> mechanism could take cover all these cases...
> [snip]
>
> If we use something like this, then the above is all you need.  Then
> every place in the code that checks for something like has_sa or cap_sa
> can be replaced with rdma_ib_mgmt.  When Ira updates his patches for
> this, he can check for rdma_opa_mgmt to enable jumbo MAD packets and
> whatever else he needs. Every place that does transport == IB and ll ==
> Ethernet can become rdma_transport_is_roce.  Every place that does
> transport == IB and ll == INFINIBAND becomes rdma_transport_is_ib.

Get your point :-) I need to investigate all those cases see if it
works, in case if things unclear, I'll reform it according to my
understanding to adapt to this new mechanism, if there are any
misunderstanding, we can address them case by case during
the review.

>  The
> code in multicast.c just needs to check rdma_ib_mgmt() (which happens to
> make perfect sense anyway as the code in multicast.c that is checking
> that we are on an IB interface is doing so because IB requires extra
> management of the multicast group joins/leaves).

I'll adopt those helpers already been discussed, and they will be
implemented with the helpers from new mechanism
(eg, rdma_ib_mgmt) in next version.

> But, like I said, this
> is an all or nothing change, it isn't something we can ease into.

It's will be great if we can make it, let's see ;-)

Regards,
Michael Wang

>

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe March 31, 2015, 11:12 p.m. UTC | #8
On Mon, Mar 30, 2015 at 01:02:03PM -0400, Doug Ledford wrote:

> If we use something like this, then the above is all you need.  Then
> every place in the code that checks for something like has_sa or cap_sa
> can be replaced with rdma_ib_mgmt. 

I don't want to see this slide back into some ill defined feature
test.

We need to clearly define exactly what these tests are checking, and I
think, since we have so much code sharing, the checks should be narrow
in scope, not just an entire standard.

I think the basic family of checks Michael identified seemed like
a reasonable start.

Going forward we want to NAK stuff like this:

  if (rdma_ib_mgmt() || rdma_opa_mgmt())
  if (has_sa() || has_opa_foobar())

That indicates we need a new micro feature test.

A big problem here is people working on the core may not know the
intricate details of all the families. This will only get worse when
proprietary tech like OPA gets added. Documenting requirements via a
narrow feature test gives a much higher chance that core stuff will be
right via review.

> But, like I said, this is an all or nothing change, it isn't
> something we can ease into.

Well, we ease into it by introducing the micro tests and then wiping
the legacy ones, followed by changing how the driver/core communicates
the port and device feature set, ideally to a bitset like you've
described.

Michael has tackled the core code, another series could work on the
drivers..

Jason
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ira Weiny April 1, 2015, 12:51 a.m. UTC | #9
On Tue, Mar 31, 2015 at 05:12:02PM -0600, Jason Gunthorpe wrote:
> On Mon, Mar 30, 2015 at 01:02:03PM -0400, Doug Ledford wrote:
> 
> > If we use something like this, then the above is all you need.  Then
> > every place in the code that checks for something like has_sa or cap_sa
> > can be replaced with rdma_ib_mgmt. 
> 
> I don't want to see this slide back into some ill defined feature
> test.
> 
> We need to clearly define exactly what these tests are checking, and I
> think, since we have so much code sharing, the checks should be narrow
> in scope, not just an entire standard.

Somewhat agree.

> 
> I think the basic family of checks Michael identified seemed like
> a reasonable start.
> 
> Going forward we want to NAK stuff like this:
> 
>   if (rdma_ib_mgmt() || rdma_opa_mgmt())
>   if (has_sa() || has_opa_foobar())

I'm confused.

Is the idea that you would NAK has_sa but be in favor of has_ib_sa?

I think cap_opa_mad is reasonable.  And this confuses me why has_opa_foobar
would be NAKed.

I believe it is reasonable to flag OPA MAD support as a feature set through a
single bit.  From this the MAD stack can know if OPA MAD base/class versions
are allowed (or treated differently from future IB versions) and if it should
processing OPA SM Class DR MADs differently.  I don't want devices to be
required to supply a list of MAD Base/Class Versions they support.

> 
> That indicates we need a new micro feature test.

The million dollar question is how micro we should go.

More specifically which feature sets can be appropriately communicated with a
single bit vs not.

For example, the MAD size is more generally (and perhaps better) communicated
via an actual mad_size attribute rather than as part of the OPA MAD feature
set.

Another example is the question of is it appropriate to wrap up the single READ
SGL entry support within the "is iwarp" flag?

https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg23611.html

Right now there is implicit information being gleaned from the "is iwarp" flag.
That is that an iWarp device is limited to only 1 READ SGL entry.  As this is
the only device type which has this limitation _and_ that number is "well
known" it works.  Even if not the best solution.

The task at hand is to clean up implicit information passing like this while
not getting mired down in our own refactoring.

> 
> A big problem here is people working on the core may not know the
> intricate details of all the families. This will only get worse when
> proprietary tech like OPA gets added. Documenting requirements via a
> narrow feature test gives a much higher chance that core stuff will be
> right via review.

Agreed as long as we get the width right.

> 
> > But, like I said, this is an all or nothing change, it isn't
> > something we can ease into.
> 
> Well, we ease into it by introducing the micro tests and then wiping
> the legacy ones, followed by changing how the driver/core communicates
> the port and device feature set, ideally to a bitset like you've
> described.

I think this does need to be eased into.  We seem to agree that the feature
flags we have now are not granular and/or explicit enough.  So lets start
cleaning up some of these tests and see how it goes.

Ira

> 
> Michael has tackled the core code, another series could work on the
> drivers..
> 
> Jason
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jason Gunthorpe April 1, 2015, 1:31 a.m. UTC | #10
On Tue, Mar 31, 2015 at 08:51:13PM -0400, ira.weiny wrote:

> > Going forward we want to NAK stuff like this:
> > 
> >   if (rdma_ib_mgmt() || rdma_opa_mgmt())
> >   if (has_sa() || has_opa_foobar())
> 
> I'm confused.
> 
> Is the idea that you would NAK has_sa but be in favor of has_ib_sa?

It is the || I don't want to see. Feature tests should be one thing.

> I believe it is reasonable to flag OPA MAD support as a feature set through a
> single bit.  From this the MAD stack can know if OPA MAD base/class versions
> are allowed (or treated differently from future IB versions) and if it should
> processing OPA SM Class DR MADs differently.  I don't want devices to be
> required to supply a list of MAD Base/Class Versions they support.

That seems reasonable, but we'd gain a is_ib_smp and is_opa_smp test
to do that.

> For example, the MAD size is more generally (and perhaps better) communicated
> via an actual mad_size attribute rather than as part of the OPA MAD feature
> set.

Start with a bit and change when that makes no sense, MTUs as ints
make sense.

> Another example is the question of is it appropriate to wrap up the single READ
> SGL entry support within the "is iwarp" flag?

Again, stick with a single bit test until a HW vendors needs more,
then someone can clean it. At least they now know what to clean and
why.

Remember, this is in the kernel, we can change it when it outlives its
life.

Jason
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index d95d25f..89c27da 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -450,7 +450,7 @@  static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event
         struct ib_sa_port *port =
             &sa_dev->port[event->element.port_num - sa_dev->start_port];
 
-        if (!rdma_port_ll_is_ib(handler->device, port->port_num))
+        if (!cap_sa(handler->device, port->port_num))
             return;
 
         spin_lock_irqsave(&port->ah_lock, flags);
@@ -1154,7 +1154,7 @@  static void ib_sa_add_one(struct ib_device *device)
     struct ib_sa_device *sa_dev;
     int s, e, i;
 
-    if (!rdma_transport_is_ib(device))
+    if (!has_sa(device))
         return;
 
     if (device->node_type == RDMA_NODE_IB_SWITCH)
@@ -1175,7 +1175,7 @@  static void ib_sa_add_one(struct ib_device *device)
 
     for (i = 0; i <= e - s; ++i) {
         spin_lock_init(&sa_dev->port[i].ah_lock);
-        if (!rdma_port_ll_is_ib(device, i + 1))
+        if (!cap_sa(device, i + 1))
             continue;
 
         sa_dev->port[i].sm_ah    = NULL;
@@ -1205,14 +1205,14 @@  static void ib_sa_add_one(struct ib_device *device)
         goto err;
 
     for (i = 0; i <= e - s; ++i)
-        if (rdma_port_ll_is_ib(device, i + 1))
+        if (cap_sa(device, i + 1))
             update_sm_ah(&sa_dev->port[i].update_task);
 
     return;
 
 err:
     while (--i >= 0)
-        if (rdma_port_ll_is_ib(device, i + 1))
+        if (cap_sa(device, i + 1))
             ib_unregister_mad_agent(sa_dev->port[i].agent);
 
     kfree(sa_dev);
@@ -1233,7 +1233,7 @@  static void ib_sa_remove_one(struct ib_device *device)
     flush_workqueue(ib_wq);
 
     for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
-        if (rdma_port_ll_is_ib(device, i + 1)) {
+        if (cap_sa(device, i + 1)) {
             ib_unregister_mad_agent(sa_dev->port[i].agent);
             if (sa_dev->port[i].sm_ah)
                 kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index c0a63f8..fa8ffa3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1810,6 +1810,19 @@  static inline int has_cm(struct ib_device *device)
 }
 
 /**
+ * has_sa - Check if a device support Subnet Administrator.
+ *
+ * @device: Device to be checked
+ *
+ * Return 0 when a device has none port to support
+ * Subnet Administrator.
+ */
+static inline int has_sa(struct ib_device *device)
+{
+    return rdma_transport_is_ib(device);
+}
+
+/**
  * cap_smi - Check if the port of device has the capability
  * Subnet Management Interface.
  *
@@ -1824,6 +1837,21 @@  static inline int cap_smi(struct ib_device *device, u8 port_num)
     return rdma_port_ll_is_ib(device, port_num);
 }
 
+/**
+ * cap_sa - Check if the port of device has the capability
+ * Subnet Administrator.
+ *
+ * @device: Device to be checked
+ * @port_num: Port number of the device
+ *
+ * Return 0 when port of the device don't support
+ * Subnet Administrator.
+ */
+static inline int cap_sa(struct ib_device *device, u8 port_num)
+{
+    return rdma_port_ll_is_ib(device, port_num);
+}
+
 int ib_query_gid(struct ib_device *device,
          u8 port_num, int index, union ib_gid *gid);