diff mbox series

[rdma-next,4/8] IB/mlx5: Add steering SW ICM device memory type

Message ID 20190331164450.23618-5-leon@kernel.org
State Not Applicable
Delegated to: David Miller
Headers show
Series User space steering | expand

Commit Message

Leon Romanovsky March 31, 2019, 4:44 p.m. UTC
From: Ariel Levkovich <lariel@mellanox.com>

This patch adds support for allocating, deallocating and registering
a new device memory type, STEERING_SW_ICM.
This memory can be allocated and used by a privileged user for direct
rule insertion and management of the device's steering tables.

The type is provided by the user via the dedicated attribute in
the alloc_dm ioctl command.

Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Reviewed-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
 drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
 drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
 drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
 drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
 drivers/infiniband/hw/mlx5/mr.c           |   7 ++
 include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
 7 files changed, 291 insertions(+), 6 deletions(-)

Comments

Jason Gunthorpe April 24, 2019, 1:04 p.m. UTC | #1
On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> From: Ariel Levkovich <lariel@mellanox.com>
> 
> This patch adds support for allocating, deallocating and registering
> a new device memory type, STEERING_SW_ICM.
> This memory can be allocated and used by a privileged user for direct
> rule insertion and management of the device's steering tables.
> 
> The type is provided by the user via the dedicated attribute in
> the alloc_dm ioctl command.
> 
> Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> Reviewed-by: Eli Cohen <eli@mellanox.com>
> Reviewed-by: Mark Bloch <markb@mellanox.com>
> Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
>  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
>  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
>  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
>  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
>  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
>  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
>  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
>  7 files changed, 291 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> index 8d651c05de62..347d457fcb2f 100644
> +++ b/drivers/infiniband/hw/mlx5/Kconfig
> @@ -1,6 +1,6 @@
>  config MLX5_INFINIBAND
>  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT

Why do we need this?

Jason
Leon Romanovsky April 24, 2019, 1:12 p.m. UTC | #2
On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > From: Ariel Levkovich <lariel@mellanox.com>
> >
> > This patch adds support for allocating, deallocating and registering
> > a new device memory type, STEERING_SW_ICM.
> > This memory can be allocated and used by a privileged user for direct
> > rule insertion and management of the device's steering tables.
> >
> > The type is provided by the user via the dedicated attribute in
> > the alloc_dm ioctl command.
> >
> > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> >  7 files changed, 291 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > index 8d651c05de62..347d457fcb2f 100644
> > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > @@ -1,6 +1,6 @@
> >  config MLX5_INFINIBAND
> >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
>
> Why do we need this?

We are using phys_addr_t type and not u64 as an input to FW to emphasize
the caller interface, but it is different between 32 and 64 bits systems.
Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.

Thanks

>
> Jason
Jason Gunthorpe April 24, 2019, 1:14 p.m. UTC | #3
On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > From: Ariel Levkovich <lariel@mellanox.com>
> > >
> > > This patch adds support for allocating, deallocating and registering
> > > a new device memory type, STEERING_SW_ICM.
> > > This memory can be allocated and used by a privileged user for direct
> > > rule insertion and management of the device's steering tables.
> > >
> > > The type is provided by the user via the dedicated attribute in
> > > the alloc_dm ioctl command.
> > >
> > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > index 8d651c05de62..347d457fcb2f 100644
> > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > @@ -1,6 +1,6 @@
> > >  config MLX5_INFINIBAND
> > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> >
> > Why do we need this?
> 
> We are using phys_addr_t type and not u64 as an input to FW to emphasize
> the caller interface, but it is different between 32 and 64 bits systems.
> Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.

???

PHYS_ADDR_T should only ever be used to store a bus address - and if
you are passing a bus address to/from FW then you 0 extend it on 32
bit, and range check on 64 bit.

This is not a reason to force PHYS_ADDR_T_64BIT from a driver

Jason
Leon Romanovsky April 24, 2019, 1:25 p.m. UTC | #4
On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > >
> > > > This patch adds support for allocating, deallocating and registering
> > > > a new device memory type, STEERING_SW_ICM.
> > > > This memory can be allocated and used by a privileged user for direct
> > > > rule insertion and management of the device's steering tables.
> > > >
> > > > The type is provided by the user via the dedicated attribute in
> > > > the alloc_dm ioctl command.
> > > >
> > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > index 8d651c05de62..347d457fcb2f 100644
> > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > @@ -1,6 +1,6 @@
> > > >  config MLX5_INFINIBAND
> > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > >
> > > Why do we need this?
> >
> > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > the caller interface, but it is different between 32 and 64 bits systems.
> > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
>
> ???
>
> PHYS_ADDR_T should only ever be used to store a bus address - and if
> you are passing a bus address to/from FW then you 0 extend it on 32
> bit, and range check on 64 bit.
>
> This is not a reason to force PHYS_ADDR_T_64BIT from a driver

So how will I mark that we are forwarding physical address to FW?

Thanks

>
> Jason
Jason Gunthorpe April 24, 2019, 1:35 p.m. UTC | #5
On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > >
> > > > > This patch adds support for allocating, deallocating and registering
> > > > > a new device memory type, STEERING_SW_ICM.
> > > > > This memory can be allocated and used by a privileged user for direct
> > > > > rule insertion and management of the device's steering tables.
> > > > >
> > > > > The type is provided by the user via the dedicated attribute in
> > > > > the alloc_dm ioctl command.
> > > > >
> > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > @@ -1,6 +1,6 @@
> > > > >  config MLX5_INFINIBAND
> > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > >
> > > > Why do we need this?
> > >
> > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > the caller interface, but it is different between 32 and 64 bits systems.
> > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> >
> > ???
> >
> > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > you are passing a bus address to/from FW then you 0 extend it on 32
> > bit, and range check on 64 bit.
> >
> > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> 
> So how will I mark that we are forwarding physical address to FW?

I don't know what this means.. phys_addr_t will transparently 0 extend
to 64 bit which is the correct thing to do when passing a phys_addr_t
to HW.

Jason
Leon Romanovsky April 24, 2019, 1:46 p.m. UTC | #6
On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > >
> > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > rule insertion and management of the device's steering tables.
> > > > > >
> > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > the alloc_dm ioctl command.
> > > > > >
> > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > >
> > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > @@ -1,6 +1,6 @@
> > > > > >  config MLX5_INFINIBAND
> > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > >
> > > > > Why do we need this?
> > > >
> > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > >
> > > ???
> > >
> > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > bit, and range check on 64 bit.
> > >
> > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> >
> > So how will I mark that we are forwarding physical address to FW?
>
> I don't know what this means.. phys_addr_t will transparently 0 extend
> to 64 bit which is the correct thing to do when passing a phys_addr_t
> to HW.

It looks to me very similar to what I wanted: mark input as an address
and ensure that it is always u64 as our FW expects.

Thanks

>
> Jason
Jason Gunthorpe April 24, 2019, 1:47 p.m. UTC | #7
On Wed, Apr 24, 2019 at 04:46:04PM +0300, Leon Romanovsky wrote:
> On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> > On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > > >
> > > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > > rule insertion and management of the device's steering tables.
> > > > > > >
> > > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > > the alloc_dm ioctl command.
> > > > > > >
> > > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > > >
> > > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > @@ -1,6 +1,6 @@
> > > > > > >  config MLX5_INFINIBAND
> > > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > > >
> > > > > > Why do we need this?
> > > > >
> > > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > > >
> > > > ???
> > > >
> > > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > > bit, and range check on 64 bit.
> > > >
> > > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> > >
> > > So how will I mark that we are forwarding physical address to FW?
> >
> > I don't know what this means.. phys_addr_t will transparently 0 extend
> > to 64 bit which is the correct thing to do when passing a phys_addr_t
> > to HW.
> 
> It looks to me very similar to what I wanted: mark input as an address
> and ensure that it is always u64 as our FW expects.

So there is no reason for the kconfig hunk then

Jason
Leon Romanovsky April 24, 2019, 3:49 p.m. UTC | #8
On Wed, Apr 24, 2019 at 10:47:03AM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 24, 2019 at 04:46:04PM +0300, Leon Romanovsky wrote:
> > On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > > > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > >
> > > > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > > > rule insertion and management of the device's steering tables.
> > > > > > > >
> > > > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > > > the alloc_dm ioctl command.
> > > > > > > >
> > > > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > @@ -1,6 +1,6 @@
> > > > > > > >  config MLX5_INFINIBAND
> > > > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > > > >
> > > > > > > Why do we need this?
> > > > > >
> > > > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > > > >
> > > > > ???
> > > > >
> > > > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > > > bit, and range check on 64 bit.
> > > > >
> > > > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> > > >
> > > > So how will I mark that we are forwarding physical address to FW?
> > >
> > > I don't know what this means.. phys_addr_t will transparently 0 extend
> > > to 64 bit which is the correct thing to do when passing a phys_addr_t
> > > to HW.
> >
> > It looks to me very similar to what I wanted: mark input as an address
> > and ensure that it is always u64 as our FW expects.
>
> So there is no reason for the kconfig hunk then

The kconfig is needed to ensure that u64 value is truncated to be u32 on
32bits systems.

Thanks

>
> Jason
Jason Gunthorpe April 24, 2019, 3:53 p.m. UTC | #9
On Wed, Apr 24, 2019 at 06:49:37PM +0300, Leon Romanovsky wrote:
> On Wed, Apr 24, 2019 at 10:47:03AM -0300, Jason Gunthorpe wrote:
> > On Wed, Apr 24, 2019 at 04:46:04PM +0300, Leon Romanovsky wrote:
> > > On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > > > > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > > > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > >
> > > > > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > > > > rule insertion and management of the device's steering tables.
> > > > > > > > >
> > > > > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > > > > the alloc_dm ioctl command.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > @@ -1,6 +1,6 @@
> > > > > > > > >  config MLX5_INFINIBAND
> > > > > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > > > > >
> > > > > > > > Why do we need this?
> > > > > > >
> > > > > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > > > > >
> > > > > > ???
> > > > > >
> > > > > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > > > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > > > > bit, and range check on 64 bit.
> > > > > >
> > > > > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> > > > >
> > > > > So how will I mark that we are forwarding physical address to FW?
> > > >
> > > > I don't know what this means.. phys_addr_t will transparently 0 extend
> > > > to 64 bit which is the correct thing to do when passing a phys_addr_t
> > > > to HW.
> > >
> > > It looks to me very similar to what I wanted: mark input as an address
> > > and ensure that it is always u64 as our FW expects.
> >
> > So there is no reason for the kconfig hunk then
> 
> The kconfig is needed to ensure that u64 value is truncated to be u32 on
> 32bits systems.

What?

Jason
Leon Romanovsky April 24, 2019, 4:27 p.m. UTC | #10
On Wed, Apr 24, 2019 at 12:53:13PM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 24, 2019 at 06:49:37PM +0300, Leon Romanovsky wrote:
> > On Wed, Apr 24, 2019 at 10:47:03AM -0300, Jason Gunthorpe wrote:
> > > On Wed, Apr 24, 2019 at 04:46:04PM +0300, Leon Romanovsky wrote:
> > > > On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> > > > > On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > > > > > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > > > > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > > > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > >
> > > > > > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > > > > > rule insertion and management of the device's steering tables.
> > > > > > > > > >
> > > > > > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > > > > > the alloc_dm ioctl command.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > @@ -1,6 +1,6 @@
> > > > > > > > > >  config MLX5_INFINIBAND
> > > > > > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > > > > > >
> > > > > > > > > Why do we need this?
> > > > > > > >
> > > > > > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > > > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > > > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > > > > > >
> > > > > > > ???
> > > > > > >
> > > > > > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > > > > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > > > > > bit, and range check on 64 bit.
> > > > > > >
> > > > > > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> > > > > >
> > > > > > So how will I mark that we are forwarding physical address to FW?
> > > > >
> > > > > I don't know what this means.. phys_addr_t will transparently 0 extend
> > > > > to 64 bit which is the correct thing to do when passing a phys_addr_t
> > > > > to HW.
> > > >
> > > > It looks to me very similar to what I wanted: mark input as an address
> > > > and ensure that it is always u64 as our FW expects.
> > >
> > > So there is no reason for the kconfig hunk then
> >
> > The kconfig is needed to ensure that u64 value is truncated to be u32 on
> > 32bits systems.
>
> What?

I wanted to say "not truncated", sorry about that.

>
> Jason
Jason Gunthorpe April 24, 2019, 4:39 p.m. UTC | #11
On Wed, Apr 24, 2019 at 07:27:36PM +0300, Leon Romanovsky wrote:
> On Wed, Apr 24, 2019 at 12:53:13PM -0300, Jason Gunthorpe wrote:
> > On Wed, Apr 24, 2019 at 06:49:37PM +0300, Leon Romanovsky wrote:
> > > On Wed, Apr 24, 2019 at 10:47:03AM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Apr 24, 2019 at 04:46:04PM +0300, Leon Romanovsky wrote:
> > > > > On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> > > > > > On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > > > > > > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > > > > > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > > > > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > > > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > > >
> > > > > > > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > > > > > > rule insertion and management of the device's steering tables.
> > > > > > > > > > >
> > > > > > > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > > > > > > the alloc_dm ioctl command.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > > > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > > > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > > > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > > > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > > > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > > > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > > @@ -1,6 +1,6 @@
> > > > > > > > > > >  config MLX5_INFINIBAND
> > > > > > > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > > > > > > >
> > > > > > > > > > Why do we need this?
> > > > > > > > >
> > > > > > > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > > > > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > > > > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > > > > > > >
> > > > > > > > ???
> > > > > > > >
> > > > > > > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > > > > > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > > > > > > bit, and range check on 64 bit.
> > > > > > > >
> > > > > > > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> > > > > > >
> > > > > > > So how will I mark that we are forwarding physical address to FW?
> > > > > >
> > > > > > I don't know what this means.. phys_addr_t will transparently 0 extend
> > > > > > to 64 bit which is the correct thing to do when passing a phys_addr_t
> > > > > > to HW.
> > > > >
> > > > > It looks to me very similar to what I wanted: mark input as an address
> > > > > and ensure that it is always u64 as our FW expects.
> > > >
> > > > So there is no reason for the kconfig hunk then
> > >
> > > The kconfig is needed to ensure that u64 value is truncated to be u32 on
> > > 32bits systems.
> >
> > What?
> 
> I wanted to say "not truncated", sorry about that.

Are we reading back a u64 physical value from the HW? Where?

Jason
Leon Romanovsky April 28, 2019, 11:38 a.m. UTC | #12
On Wed, Apr 24, 2019 at 01:39:26PM -0300, Jason Gunthorpe wrote:
> On Wed, Apr 24, 2019 at 07:27:36PM +0300, Leon Romanovsky wrote:
> > On Wed, Apr 24, 2019 at 12:53:13PM -0300, Jason Gunthorpe wrote:
> > > On Wed, Apr 24, 2019 at 06:49:37PM +0300, Leon Romanovsky wrote:
> > > > On Wed, Apr 24, 2019 at 10:47:03AM -0300, Jason Gunthorpe wrote:
> > > > > On Wed, Apr 24, 2019 at 04:46:04PM +0300, Leon Romanovsky wrote:
> > > > > > On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> > > > > > > On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > > > > > > > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > > > > > > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > > > > > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > > > > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > > > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > > > >
> > > > > > > > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > > > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > > > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > > > > > > > rule insertion and management of the device's steering tables.
> > > > > > > > > > > >
> > > > > > > > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > > > > > > > the alloc_dm ioctl command.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > > > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > > > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > > > > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > > > > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > > > > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > > > > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > > > > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > > > > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > > > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > > > @@ -1,6 +1,6 @@
> > > > > > > > > > > >  config MLX5_INFINIBAND
> > > > > > > > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > > > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > > > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > > > > > > > >
> > > > > > > > > > > Why do we need this?
> > > > > > > > > >
> > > > > > > > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > > > > > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > > > > > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > > > > > > > >
> > > > > > > > > ???
> > > > > > > > >
> > > > > > > > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > > > > > > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > > > > > > > bit, and range check on 64 bit.
> > > > > > > > >
> > > > > > > > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> > > > > > > >
> > > > > > > > So how will I mark that we are forwarding physical address to FW?
> > > > > > >
> > > > > > > I don't know what this means.. phys_addr_t will transparently 0 extend
> > > > > > > to 64 bit which is the correct thing to do when passing a phys_addr_t
> > > > > > > to HW.
> > > > > >
> > > > > > It looks to me very similar to what I wanted: mark input as an address
> > > > > > and ensure that it is always u64 as our FW expects.
> > > > >
> > > > > So there is no reason for the kconfig hunk then
> > > >
> > > > The kconfig is needed to ensure that u64 value is truncated to be u32 on
> > > > 32bits systems.
> > >
> > > What?
> >
> > I wanted to say "not truncated", sorry about that.
>
> Are we reading back a u64 physical value from the HW? Where?

No, we are setting address only.

>
> Jason
Jason Gunthorpe April 28, 2019, 11:56 a.m. UTC | #13
On Sun, Apr 28, 2019 at 02:38:48PM +0300, Leon Romanovsky wrote:
> On Wed, Apr 24, 2019 at 01:39:26PM -0300, Jason Gunthorpe wrote:
> > On Wed, Apr 24, 2019 at 07:27:36PM +0300, Leon Romanovsky wrote:
> > > On Wed, Apr 24, 2019 at 12:53:13PM -0300, Jason Gunthorpe wrote:
> > > > On Wed, Apr 24, 2019 at 06:49:37PM +0300, Leon Romanovsky wrote:
> > > > > On Wed, Apr 24, 2019 at 10:47:03AM -0300, Jason Gunthorpe wrote:
> > > > > > On Wed, Apr 24, 2019 at 04:46:04PM +0300, Leon Romanovsky wrote:
> > > > > > > On Wed, Apr 24, 2019 at 10:35:53AM -0300, Jason Gunthorpe wrote:
> > > > > > > > On Wed, Apr 24, 2019 at 04:25:16PM +0300, Leon Romanovsky wrote:
> > > > > > > > > On Wed, Apr 24, 2019 at 10:14:05AM -0300, Jason Gunthorpe wrote:
> > > > > > > > > > On Wed, Apr 24, 2019 at 04:12:22PM +0300, Leon Romanovsky wrote:
> > > > > > > > > > > On Wed, Apr 24, 2019 at 10:04:40AM -0300, Jason Gunthorpe wrote:
> > > > > > > > > > > > On Sun, Mar 31, 2019 at 07:44:46PM +0300, Leon Romanovsky wrote:
> > > > > > > > > > > > > From: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > > > > >
> > > > > > > > > > > > > This patch adds support for allocating, deallocating and registering
> > > > > > > > > > > > > a new device memory type, STEERING_SW_ICM.
> > > > > > > > > > > > > This memory can be allocated and used by a privileged user for direct
> > > > > > > > > > > > > rule insertion and management of the device's steering tables.
> > > > > > > > > > > > >
> > > > > > > > > > > > > The type is provided by the user via the dedicated attribute in
> > > > > > > > > > > > > the alloc_dm ioctl command.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
> > > > > > > > > > > > > Reviewed-by: Eli Cohen <eli@mellanox.com>
> > > > > > > > > > > > > Reviewed-by: Mark Bloch <markb@mellanox.com>
> > > > > > > > > > > > > Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
> > > > > > > > > > > > >  drivers/infiniband/hw/mlx5/Kconfig        |   2 +-
> > > > > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.c          | 127 +++++++++++++++++++-
> > > > > > > > > > > > >  drivers/infiniband/hw/mlx5/cmd.h          |   6 +-
> > > > > > > > > > > > >  drivers/infiniband/hw/mlx5/main.c         | 136 +++++++++++++++++++++-
> > > > > > > > > > > > >  drivers/infiniband/hw/mlx5/mlx5_ib.h      |  17 +++
> > > > > > > > > > > > >  drivers/infiniband/hw/mlx5/mr.c           |   7 ++
> > > > > > > > > > > > >  include/uapi/rdma/mlx5_user_ioctl_verbs.h |   2 +
> > > > > > > > > > > > >  7 files changed, 291 insertions(+), 6 deletions(-)
> > > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > > > > index 8d651c05de62..347d457fcb2f 100644
> > > > > > > > > > > > > +++ b/drivers/infiniband/hw/mlx5/Kconfig
> > > > > > > > > > > > > @@ -1,6 +1,6 @@
> > > > > > > > > > > > >  config MLX5_INFINIBAND
> > > > > > > > > > > > >  	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
> > > > > > > > > > > > > -	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
> > > > > > > > > > > > > +	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
> > > > > > > > > > > >
> > > > > > > > > > > > Why do we need this?
> > > > > > > > > > >
> > > > > > > > > > > We are using phys_addr_t type and not u64 as an input to FW to emphasize
> > > > > > > > > > > the caller interface, but it is different between 32 and 64 bits systems.
> > > > > > > > > > > Such PHYS_ADDR_T_64BIT will ensure that phys_addr_t will be always 64 bits.
> > > > > > > > > >
> > > > > > > > > > ???
> > > > > > > > > >
> > > > > > > > > > PHYS_ADDR_T should only ever be used to store a bus address - and if
> > > > > > > > > > you are passing a bus address to/from FW then you 0 extend it on 32
> > > > > > > > > > bit, and range check on 64 bit.
> > > > > > > > > >
> > > > > > > > > > This is not a reason to force PHYS_ADDR_T_64BIT from a driver
> > > > > > > > >
> > > > > > > > > So how will I mark that we are forwarding physical address to FW?
> > > > > > > >
> > > > > > > > I don't know what this means.. phys_addr_t will transparently 0 extend
> > > > > > > > to 64 bit which is the correct thing to do when passing a phys_addr_t
> > > > > > > > to HW.
> > > > > > >
> > > > > > > It looks to me very similar to what I wanted: mark input as an address
> > > > > > > and ensure that it is always u64 as our FW expects.
> > > > > >
> > > > > > So there is no reason for the kconfig hunk then
> > > > >
> > > > > The kconfig is needed to ensure that u64 value is truncated to be u32 on
> > > > > 32bits systems.
> > > >
> > > > What?
> > >
> > > I wanted to say "not truncated", sorry about that.
> >
> > Are we reading back a u64 physical value from the HW? Where?
> 
> No, we are setting address only.

There there is no need for the kconfig, the phys_addr_t transparently
0 extends to 64 bits which is the right thing to do.

Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/mlx5/Kconfig b/drivers/infiniband/hw/mlx5/Kconfig
index 8d651c05de62..347d457fcb2f 100644
--- a/drivers/infiniband/hw/mlx5/Kconfig
+++ b/drivers/infiniband/hw/mlx5/Kconfig
@@ -1,6 +1,6 @@ 
 config MLX5_INFINIBAND
 	tristate "Mellanox 5th generation network adapters (ConnectX series) support"
-	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE && PHYS_ADDR_T_64BIT
 	---help---
 	  This driver provides low-level InfiniBand support for
 	  Mellanox Connect-IB PCI Express host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index a405d81ba01a..d3d8cd670493 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -157,7 +157,7 @@  int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
 	return -ENOMEM;
 }
 
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length)
+int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
 {
 	struct mlx5_core_dev *dev = dm->dev;
 	u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
@@ -186,6 +186,131 @@  int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length)
 	return err;
 }
 
+int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+			  u16 uid, phys_addr_t *addr, u32 *obj_id)
+{
+	struct mlx5_core_dev *dev = dm->dev;
+	u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_sw_icm_in)] = {};
+	unsigned long *block_map;
+	u64 icm_start_addr;
+	u32 log_icm_size;
+	u32 max_blocks;
+	u64 block_idx;
+	void *sw_icm;
+	int ret;
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	switch (type) {
+	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+						steering_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev, log_steering_sw_icm_size);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+		icm_start_addr = MLX5_CAP64_DEV_MEM(dev,
+					header_modify_sw_icm_start_address);
+		log_icm_size = MLX5_CAP_DEV_MEM(dev,
+						log_header_modify_sw_icm_size);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+	spin_lock(&dm->lock);
+	block_idx = bitmap_find_next_zero_area(block_map,
+					       max_blocks,
+					       0,
+					       num_blocks, 0);
+
+	if (block_idx < max_blocks)
+		bitmap_set(block_map,
+			   block_idx, num_blocks);
+
+	spin_unlock(&dm->lock);
+
+	if (block_idx >= max_blocks)
+		return -ENOMEM;
+
+	sw_icm = MLX5_ADDR_OF(create_sw_icm_in, in, sw_icm);
+	icm_start_addr += block_idx << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	MLX5_SET64(sw_icm, sw_icm, sw_icm_start_addr,
+		   icm_start_addr);
+	MLX5_SET(sw_icm, sw_icm, log_sw_icm_size, ilog2(length));
+
+	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (ret) {
+		spin_lock(&dm->lock);
+		bitmap_clear(block_map,
+			     block_idx, num_blocks);
+		spin_unlock(&dm->lock);
+
+		return ret;
+	}
+
+	*addr = icm_start_addr;
+	*obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+
+	return 0;
+}
+
+int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+			    u16 uid, phys_addr_t addr, u32 obj_id)
+{
+	struct mlx5_core_dev *dev = dm->dev;
+	u32 num_blocks = DIV_ROUND_UP(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
+	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+	u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+	unsigned long *block_map;
+	u64 start_idx;
+	int err;
+
+	switch (type) {
+	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+		start_idx =
+			(addr - MLX5_CAP64_DEV_MEM(
+					dev, steering_sw_icm_start_address)) >>
+			MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+		block_map = dm->steering_sw_icm_alloc_blocks;
+		break;
+	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+		start_idx =
+			(addr -
+			 MLX5_CAP64_DEV_MEM(
+				 dev, header_modify_sw_icm_start_address)) >>
+			MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+		block_map = dm->header_modify_sw_icm_alloc_blocks;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	MLX5_SET(general_obj_in_cmd_hdr, in, opcode,
+		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_SW_ICM);
+	MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, obj_id);
+	MLX5_SET(general_obj_in_cmd_hdr, in, uid, uid);
+
+	err =  mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	spin_lock(&dm->lock);
+	bitmap_clear(block_map,
+		     start_idx, num_blocks);
+	spin_unlock(&dm->lock);
+
+	return 0;
+}
+
 int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
 {
 	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 80a644bea6c7..0572dcba6eae 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -46,7 +46,7 @@  int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
 				void *in, int in_size);
 int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
 			 u64 length, u32 alignment);
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, u64 addr, u64 length);
+int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
 void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
 void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
 void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
@@ -65,4 +65,8 @@  int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
 			     u16 uid);
 int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
 		     u16 opmod, u8 port);
+int mlx5_cmd_alloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+			  u16 uid, phys_addr_t *addr, u32 *obj_id);
+int mlx5_cmd_dealloc_sw_icm(struct mlx5_dm *dm, int type, u64 length,
+			    u16 uid, phys_addr_t addr, u32 obj_id);
 #endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 23436feaa22f..3806a3e7caf5 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2291,6 +2291,28 @@  static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
 	return 0;
 }
 
+static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
+					u32 type)
+{
+	switch (type) {
+	case MLX5_IB_UAPI_DM_TYPE_MEMIC:
+		if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
+			return -EOPNOTSUPP;
+		break;
+	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+		if (!capable(CAP_SYS_RAWIO) ||
+		    !capable(CAP_NET_RAW))
+			return -EPERM;
+
+		if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
+		      MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
+			return -EOPNOTSUPP;
+		break;
+	}
+
+	return 0;
+}
+
 static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
 				 struct mlx5_ib_dm *dm,
 				 struct ib_dm_alloc_attr *attr,
@@ -2336,6 +2358,40 @@  static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
 	return err;
 }
 
+static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
+				  struct mlx5_ib_dm *dm,
+				  struct ib_dm_alloc_attr *attr,
+				  struct uverbs_attr_bundle *attrs,
+				  int type)
+{
+	struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
+	u64 act_size;
+	int err;
+
+	/* Allocation size must a multiple of the basic block size
+	 * and a power of 2.
+	 */
+	act_size = roundup(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dm_db->dev));
+	act_size = roundup_pow_of_two(act_size);
+
+	dm->size = act_size;
+	err = mlx5_cmd_alloc_sw_icm(dm_db, type, act_size,
+				    to_mucontext(ctx)->devx_uid, &dm->dev_addr,
+				    &dm->icm_dm.obj_id);
+	if (err)
+		return err;
+
+	err = uverbs_copy_to(attrs,
+			     MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+			     &dm->dev_addr, sizeof(dm->dev_addr));
+	if (err)
+		mlx5_cmd_dealloc_sw_icm(dm_db, type, dm->size,
+					to_mucontext(ctx)->devx_uid,
+					dm->dev_addr, dm->icm_dm.obj_id);
+
+	return err;
+}
+
 struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
 			       struct ib_ucontext *context,
 			       struct ib_dm_alloc_attr *attr,
@@ -2354,6 +2410,10 @@  struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
 	mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
 		    type, attr->length, attr->alignment);
 
+	err = check_dm_type_support(to_mdev(ibdev), type);
+	if (err)
+		return ERR_PTR(err);
+
 	dm = kzalloc(sizeof(*dm), GFP_KERNEL);
 	if (!dm)
 		return ERR_PTR(-ENOMEM);
@@ -2366,6 +2426,10 @@  struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
 					    attr,
 					    attrs);
 		break;
+	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+		err = handle_alloc_dm_sw_icm(context, dm, attr, attrs, type);
+		break;
 	default:
 		err = -EOPNOTSUPP;
 	}
@@ -2382,6 +2446,7 @@  struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
 
 int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
 {
+	struct mlx5_ib_ucontext *ctx = to_mucontext(ibdm->uobject->context);
 	struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
 	struct mlx5_ib_dm *dm = to_mdm(ibdm);
 	u32 page_idx;
@@ -2398,10 +2463,18 @@  int mlx5_ib_dealloc_dm(struct ib_dm *ibdm)
 			    MLX5_CAP64_DEV_MEM(dm_db->dev,
 					       memic_bar_start_addr)) >>
 			   PAGE_SHIFT;
-		bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
+		bitmap_clear(ctx->dm_pages,
 			     page_idx,
 			     DIV_ROUND_UP(dm->size, PAGE_SIZE));
 		break;
+	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+		ret = mlx5_cmd_dealloc_sw_icm(dm_db, dm->type, dm->size,
+					      ctx->devx_uid, dm->dev_addr,
+					      dm->icm_dm.obj_id);
+		if (ret)
+			return ret;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -6000,6 +6073,8 @@  static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device,
 
 static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 {
+	struct mlx5_core_dev *mdev = dev->mdev;
+
 	mlx5_ib_cleanup_multiport_master(dev);
 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
 		srcu_barrier(&dev->mr_srcu);
@@ -6007,11 +6082,29 @@  static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)
 	}
 
 	WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES));
+
+	WARN_ON(dev->dm.steering_sw_icm_alloc_blocks &&
+		!bitmap_empty(
+			dev->dm.steering_sw_icm_alloc_blocks,
+			BIT(MLX5_CAP_DEV_MEM(mdev, log_steering_sw_icm_size) -
+			    MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
+
+	kfree(dev->dm.steering_sw_icm_alloc_blocks);
+
+	WARN_ON(dev->dm.header_modify_sw_icm_alloc_blocks &&
+		!bitmap_empty(dev->dm.header_modify_sw_icm_alloc_blocks,
+			      BIT(MLX5_CAP_DEV_MEM(
+					  mdev, log_header_modify_sw_icm_size) -
+				  MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev))));
+
+	kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
 }
 
 static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 {
 	struct mlx5_core_dev *mdev = dev->mdev;
+	u64 header_modify_icm_blocks = 0;
+	u64 steering_icm_blocks = 0;
 	int err;
 	int i;
 
@@ -6057,16 +6150,51 @@  static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
 	INIT_LIST_HEAD(&dev->qp_list);
 	spin_lock_init(&dev->reset_flow_resource_lock);
 
+	if (MLX5_CAP_GEN_64(mdev, general_obj_types) &
+	    MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) {
+		if (MLX5_CAP64_DEV_MEM(mdev, steering_sw_icm_start_address)) {
+			steering_icm_blocks =
+				BIT(MLX5_CAP_DEV_MEM(mdev,
+						     log_steering_sw_icm_size) -
+				    MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
+
+			dev->dm.steering_sw_icm_alloc_blocks =
+				kcalloc(BITS_TO_LONGS(steering_icm_blocks),
+					sizeof(unsigned long), GFP_KERNEL);
+			if (!dev->dm.steering_sw_icm_alloc_blocks)
+				goto err_mp;
+		}
+
+		if (MLX5_CAP64_DEV_MEM(mdev,
+				       header_modify_sw_icm_start_address)) {
+			header_modify_icm_blocks = BIT(
+				MLX5_CAP_DEV_MEM(
+					mdev, log_header_modify_sw_icm_size) -
+				MLX5_LOG_SW_ICM_BLOCK_SIZE(mdev));
+
+			dev->dm.header_modify_sw_icm_alloc_blocks =
+				kcalloc(BITS_TO_LONGS(header_modify_icm_blocks),
+					sizeof(unsigned long), GFP_KERNEL);
+			if (!dev->dm.header_modify_sw_icm_alloc_blocks)
+				goto err_dm;
+		}
+	}
+
 	spin_lock_init(&dev->dm.lock);
 	dev->dm.dev = mdev;
 
 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
 		err = init_srcu_struct(&dev->mr_srcu);
 		if (err)
-			goto err_mp;
+			goto err_dm;
 	}
 
 	return 0;
+
+err_dm:
+	kfree(dev->dm.steering_sw_icm_alloc_blocks);
+	kfree(dev->dm.header_modify_sw_icm_alloc_blocks);
+
 err_mp:
 	mlx5_ib_cleanup_multiport_master(dev);
 
@@ -6249,7 +6377,9 @@  static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
 		ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_xrc_ops);
 	}
 
-	if (MLX5_CAP_DEV_MEM(mdev, memic))
+	if (MLX5_CAP_DEV_MEM(mdev, memic) ||
+	    MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+	    MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM)
 		ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops);
 
 	if (mlx5_accel_ipsec_device_caps(dev->mdev) &
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 714ff948bea7..ac298f2c87a7 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -118,6 +118,10 @@  enum {
 	MLX5_MEMIC_BASE_SIZE	= 1 << MLX5_MEMIC_BASE_ALIGN,
 };
 
+#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)                                        \
+	(MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
+#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+
 struct mlx5_ib_ucontext {
 	struct ib_ucontext	ibucontext;
 	struct list_head	db_page_list;
@@ -561,6 +565,12 @@  struct mlx5_ib_dm {
 	phys_addr_t		dev_addr;
 	u32			type;
 	size_t			size;
+	union {
+		struct {
+			u32	obj_id;
+		} icm_dm;
+		/* other dm types specific params should be added here */
+	};
 };
 
 #define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -571,6 +581,11 @@  struct mlx5_ib_dm {
 					 IB_ACCESS_REMOTE_ATOMIC |\
 					 IB_ZERO_BASED)
 
+#define MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS (IB_ACCESS_LOCAL_WRITE   |\
+					  IB_ACCESS_REMOTE_WRITE  |\
+					  IB_ACCESS_REMOTE_READ   |\
+					  IB_ZERO_BASED)
+
 struct mlx5_ib_mr {
 	struct ib_mr		ibmr;
 	void			*descs;
@@ -858,6 +873,8 @@  struct mlx5_dm {
 	 */
 	spinlock_t lock;
 	DECLARE_BITMAP(memic_alloc_pages, MLX5_MAX_MEMIC_PAGES);
+	unsigned long *steering_sw_icm_alloc_blocks;
+	unsigned long *header_modify_sw_icm_alloc_blocks;
 };
 
 struct mlx5_read_counters_attr {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 7c54eacd9574..f6ff6d6968db 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1247,6 +1247,13 @@  struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
 		mode = MLX5_MKC_ACCESS_MODE_MEMIC;
 		start_addr -= pci_resource_start(dev->pdev, 0);
 		break;
+	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
+	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
+		if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
+			return ERR_PTR(-EINVAL);
+
+		mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
+		break;
 	default:
 		return ERR_PTR(-EINVAL);
 	}
diff --git a/include/uapi/rdma/mlx5_user_ioctl_verbs.h b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
index c291fb2f8446..a8f34c237458 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_verbs.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_verbs.h
@@ -59,6 +59,8 @@  struct mlx5_ib_uapi_devx_async_cmd_hdr {
 
 enum mlx5_ib_uapi_dm_type {
 	MLX5_IB_UAPI_DM_TYPE_MEMIC,
+	MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM,
+	MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM,
 };
 
 #endif