diff mbox series

[net-next,04/15] net/mlx5e: Receive flow steering framework for accelerated TCP flows

Message ID 20200627211727.259569-5-saeedm@mellanox.com
State Accepted
Delegated to: David Miller
Headers show
Series [net-next,01/15] net/mlx5e: Turn XSK ICOSQ into a general asynchronous one | expand

Commit Message

Saeed Mahameed June 27, 2020, 9:17 p.m. UTC
From: Boris Pismenny <borisp@mellanox.com>

The framework allows creating flow tables to steer incoming traffic of
TCP sockets to the acceleration TIRs.
This is used in downstream patches for TLS, and will be used in the
future for other offloads.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en/fs.h   |  10 +
 .../mellanox/mlx5/core/en_accel/fs_tcp.c      | 280 ++++++++++++++++++
 .../mellanox/mlx5/core/en_accel/fs_tcp.h      |  18 ++
 .../net/ethernet/mellanox/mlx5/core/fs_core.c |   4 +-
 5 files changed, 311 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h

Comments

Tom Herbert June 27, 2020, 10:34 p.m. UTC | #1
On Sat, Jun 27, 2020 at 2:19 PM Saeed Mahameed <saeedm@mellanox.com> wrote:
>
> From: Boris Pismenny <borisp@mellanox.com>
>
> The framework allows creating flow tables to steer incoming traffic of
> TCP sockets to the acceleration TIRs.
> This is used in downstream patches for TLS, and will be used in the
> future for other offloads.
>
> Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> ---
>  .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
>  .../net/ethernet/mellanox/mlx5/core/en/fs.h   |  10 +
>  .../mellanox/mlx5/core/en_accel/fs_tcp.c      | 280 ++++++++++++++++++
>  .../mellanox/mlx5/core/en_accel/fs_tcp.h      |  18 ++
>  .../net/ethernet/mellanox/mlx5/core/fs_core.c |   4 +-

Saeed,

What is the relationship between this and RFS, accelerated RFS, and
now PTQ? Is this something that we can generalize in the stack and
support in the driver/device with a simple interface like we do with
aRFS and ndo_rx_flow_steer?

Tom

>  5 files changed, 311 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index b61e47bc16e8..8ffa1325a18f 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -74,7 +74,7 @@ mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
>                                      en_accel/ipsec_stats.o
>
>  mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
> -                                  en_accel/ktls.o en_accel/ktls_tx.o
> +                                  en_accel/ktls.o en_accel/ktls_tx.o en_accel/fs_tcp.o
>
>  mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
>                                         steering/dr_matcher.o steering/dr_rule.o \
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
> index c633579474c3..385cbff1caf1 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
> @@ -123,6 +123,9 @@ enum {
>         MLX5E_L2_FT_LEVEL,
>         MLX5E_TTC_FT_LEVEL,
>         MLX5E_INNER_TTC_FT_LEVEL,
> +#ifdef CONFIG_MLX5_EN_TLS
> +       MLX5E_ACCEL_FS_TCP_FT_LEVEL,
> +#endif
>  #ifdef CONFIG_MLX5_EN_ARFS
>         MLX5E_ARFS_FT_LEVEL
>  #endif
> @@ -216,6 +219,10 @@ static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUP
>  static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) {        return -EOPNOTSUPP; }
>  #endif
>
> +#ifdef CONFIG_MLX5_EN_TLS
> +struct mlx5e_accel_fs_tcp;
> +#endif
> +
>  struct mlx5e_flow_steering {
>         struct mlx5_flow_namespace      *ns;
>  #ifdef CONFIG_MLX5_EN_RXNFC
> @@ -229,6 +236,9 @@ struct mlx5e_flow_steering {
>  #ifdef CONFIG_MLX5_EN_ARFS
>         struct mlx5e_arfs_tables        arfs;
>  #endif
> +#ifdef CONFIG_MLX5_EN_TLS
> +       struct mlx5e_accel_fs_tcp      *accel_tcp;
> +#endif
>  };
>
>  struct ttc_params {
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
> new file mode 100644
> index 000000000000..a0e9082e15b0
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
> @@ -0,0 +1,280 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
> +
> +#include <linux/netdevice.h>
> +#include "en_accel/fs_tcp.h"
> +#include "fs_core.h"
> +
> +enum accel_fs_tcp_type {
> +       ACCEL_FS_IPV4_TCP,
> +       ACCEL_FS_IPV6_TCP,
> +       ACCEL_FS_TCP_NUM_TYPES,
> +};
> +
> +struct mlx5e_accel_fs_tcp {
> +       struct mlx5e_flow_table tables[ACCEL_FS_TCP_NUM_TYPES];
> +       struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
> +};
> +
> +static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
> +{
> +       switch (i) {
> +       case ACCEL_FS_IPV4_TCP:
> +               return MLX5E_TT_IPV4_TCP;
> +       default: /* ACCEL_FS_IPV6_TCP */
> +               return MLX5E_TT_IPV6_TCP;
> +       }
> +}
> +
> +static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
> +                                        enum accel_fs_tcp_type type)
> +{
> +       struct mlx5e_flow_table *accel_fs_t;
> +       struct mlx5_flow_destination dest;
> +       struct mlx5e_accel_fs_tcp *fs_tcp;
> +       MLX5_DECLARE_FLOW_ACT(flow_act);
> +       struct mlx5_flow_handle *rule;
> +       int err = 0;
> +
> +       fs_tcp = priv->fs.accel_tcp;
> +       accel_fs_t = &fs_tcp->tables[type];
> +
> +       dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type));
> +       rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
> +       if (IS_ERR(rule)) {
> +               err = PTR_ERR(rule);
> +               netdev_err(priv->netdev,
> +                          "%s: add default rule failed, accel_fs type=%d, err %d\n",
> +                          __func__, type, err);
> +               return err;
> +       }
> +
> +       fs_tcp->default_rules[type] = rule;
> +       return 0;
> +}
> +
> +#define MLX5E_ACCEL_FS_TCP_NUM_GROUPS  (2)
> +#define MLX5E_ACCEL_FS_TCP_GROUP1_SIZE (BIT(16) - 1)
> +#define MLX5E_ACCEL_FS_TCP_GROUP2_SIZE (BIT(0))
> +#define MLX5E_ACCEL_FS_TCP_TABLE_SIZE  (MLX5E_ACCEL_FS_TCP_GROUP1_SIZE +\
> +                                        MLX5E_ACCEL_FS_TCP_GROUP2_SIZE)
> +static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
> +                                     enum accel_fs_tcp_type type)
> +{
> +       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
> +       void *outer_headers_c;
> +       int ix = 0;
> +       u32 *in;
> +       int err;
> +       u8 *mc;
> +
> +       ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
> +       in = kvzalloc(inlen, GFP_KERNEL);
> +       if  (!in || !ft->g) {
> +               kvfree(ft->g);
> +               kvfree(in);
> +               return -ENOMEM;
> +       }
> +
> +       mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
> +       outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
> +       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
> +       MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
> +
> +       switch (type) {
> +       case ACCEL_FS_IPV4_TCP:
> +       case ACCEL_FS_IPV6_TCP:
> +               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
> +               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
> +               break;
> +       default:
> +               err = -EINVAL;
> +               goto out;
> +       }
> +
> +       switch (type) {
> +       case ACCEL_FS_IPV4_TCP:
> +               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
> +                                src_ipv4_src_ipv6.ipv4_layout.ipv4);
> +               MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
> +                                dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
> +               break;
> +       case ACCEL_FS_IPV6_TCP:
> +               memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
> +                                   src_ipv4_src_ipv6.ipv6_layout.ipv6),
> +                      0xff, 16);
> +               memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
> +                                   dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
> +                      0xff, 16);
> +               break;
> +       default:
> +               err = -EINVAL;
> +               goto out;
> +       }
> +
> +       MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
> +       MLX5_SET_CFG(in, start_flow_index, ix);
> +       ix += MLX5E_ACCEL_FS_TCP_GROUP1_SIZE;
> +       MLX5_SET_CFG(in, end_flow_index, ix - 1);
> +       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
> +       if (IS_ERR(ft->g[ft->num_groups]))
> +               goto err;
> +       ft->num_groups++;
> +
> +       /* Default Flow Group */
> +       memset(in, 0, inlen);
> +       MLX5_SET_CFG(in, start_flow_index, ix);
> +       ix += MLX5E_ACCEL_FS_TCP_GROUP2_SIZE;
> +       MLX5_SET_CFG(in, end_flow_index, ix - 1);
> +       ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
> +       if (IS_ERR(ft->g[ft->num_groups]))
> +               goto err;
> +       ft->num_groups++;
> +
> +       kvfree(in);
> +       return 0;
> +
> +err:
> +       err = PTR_ERR(ft->g[ft->num_groups]);
> +       ft->g[ft->num_groups] = NULL;
> +out:
> +       kvfree(in);
> +
> +       return err;
> +}
> +
> +static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_type type)
> +{
> +       struct mlx5e_flow_table *ft = &priv->fs.accel_tcp->tables[type];
> +       struct mlx5_flow_table_attr ft_attr = {};
> +       int err;
> +
> +       ft->num_groups = 0;
> +
> +       ft_attr.max_fte = MLX5E_ACCEL_FS_TCP_TABLE_SIZE;
> +       ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL;
> +       ft_attr.prio = MLX5E_NIC_PRIO;
> +
> +       ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
> +       if (IS_ERR(ft->t)) {
> +               err = PTR_ERR(ft->t);
> +               ft->t = NULL;
> +               return err;
> +       }
> +
> +       netdev_dbg(priv->netdev, "Created fs accel table id %u level %u\n",
> +                  ft->t->id, ft->t->level);
> +
> +       err = accel_fs_tcp_create_groups(ft, type);
> +       if (err)
> +               goto err;
> +
> +       err = accel_fs_tcp_add_default_rule(priv, type);
> +       if (err)
> +               goto err;
> +
> +       return 0;
> +err:
> +       mlx5e_destroy_flow_table(ft);
> +       return err;
> +}
> +
> +static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
> +{
> +       int err, i;
> +
> +       for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
> +               /* Modify ttc rules destination to point back to the indir TIRs */
> +               err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i));
> +               if (err) {
> +                       netdev_err(priv->netdev,
> +                                  "%s: modify ttc[%d] default destination failed, err(%d)\n",
> +                                  __func__, fs_accel2tt(i), err);
> +                       return err;
> +               }
> +       }
> +
> +       return 0;
> +}
> +
> +static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
> +{
> +       struct mlx5_flow_destination dest = {};
> +       int err, i;
> +
> +       dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
> +       for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
> +               dest.ft = priv->fs.accel_tcp->tables[i].t;
> +
> +               /* Modify ttc rules destination to point on the accel_fs FTs */
> +               err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest);
> +               if (err) {
> +                       netdev_err(priv->netdev,
> +                                  "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
> +                                  __func__, fs_accel2tt(i), err);
> +                       return err;
> +               }
> +       }
> +       return 0;
> +}
> +
> +static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i)
> +{
> +       struct mlx5e_accel_fs_tcp *fs_tcp;
> +
> +       fs_tcp = priv->fs.accel_tcp;
> +       if (IS_ERR_OR_NULL(fs_tcp->tables[i].t))
> +               return;
> +
> +       mlx5_del_flow_rules(fs_tcp->default_rules[i]);
> +       mlx5e_destroy_flow_table(&fs_tcp->tables[i]);
> +       fs_tcp->tables[i].t = NULL;
> +}
> +
> +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv)
> +{
> +       int i;
> +
> +       if (!priv->fs.accel_tcp)
> +               return;
> +
> +       accel_fs_tcp_disable(priv);
> +
> +       for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++)
> +               accel_fs_tcp_destroy_table(priv, i);
> +
> +       kfree(priv->fs.accel_tcp);
> +       priv->fs.accel_tcp = NULL;
> +}
> +
> +int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv)
> +{
> +       int i, err;
> +
> +       if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version))
> +               return -EOPNOTSUPP;
> +
> +       priv->fs.accel_tcp = kzalloc(sizeof(*priv->fs.accel_tcp), GFP_KERNEL);
> +       if (!priv->fs.accel_tcp)
> +               return -ENOMEM;
> +
> +       for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
> +               err = accel_fs_tcp_create_table(priv, i);
> +               if (err)
> +                       goto err_destroy_tables;
> +       }
> +
> +       err = accel_fs_tcp_enable(priv);
> +       if (err)
> +               goto err_destroy_tables;
> +
> +       return 0;
> +
> +err_destroy_tables:
> +       while (--i >= 0)
> +               accel_fs_tcp_destroy_table(priv, i);
> +
> +       kfree(priv->fs.accel_tcp);
> +       priv->fs.accel_tcp = NULL;
> +       return err;
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
> new file mode 100644
> index 000000000000..0df53473550a
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
> +
> +#ifndef __MLX5E_ACCEL_FS_TCP_H__
> +#define __MLX5E_ACCEL_FS_TCP_H__
> +
> +#include "en.h"
> +
> +#ifdef CONFIG_MLX5_EN_TLS
> +int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv);
> +void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv);
> +#else
> +static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv) { return 0; }
> +static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv) {}
> +#endif
> +
> +#endif /* __MLX5E_ACCEL_FS_TCP_H__ */
> +
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
> index e47a66983935..785b2960d6b5 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
> @@ -105,8 +105,8 @@
>  #define ETHTOOL_PRIO_NUM_LEVELS 1
>  #define ETHTOOL_NUM_PRIOS 11
>  #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
> -/* Vlan, mac, ttc, inner ttc, aRFS */
> -#define KERNEL_NIC_PRIO_NUM_LEVELS 5
> +/* Vlan, mac, ttc, inner ttc, {aRFS/accel} */
> +#define KERNEL_NIC_PRIO_NUM_LEVELS 6
>  #define KERNEL_NIC_NUM_PRIOS 1
>  /* One more level for tc */
>  #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)
> --
> 2.26.2
>
Saeed Mahameed June 29, 2020, 6:57 a.m. UTC | #2
On Sat, 2020-06-27 at 15:34 -0700, Tom Herbert wrote:
> On Sat, Jun 27, 2020 at 2:19 PM Saeed Mahameed <saeedm@mellanox.com>
> wrote:
> > From: Boris Pismenny <borisp@mellanox.com>
> > 
> > The framework allows creating flow tables to steer incoming traffic
> > of
> > TCP sockets to the acceleration TIRs.
> > This is used in downstream patches for TLS, and will be used in the
> > future for other offloads.
> > 
> > Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> > Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> > Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> > ---
> >  .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
> >  .../net/ethernet/mellanox/mlx5/core/en/fs.h   |  10 +
> >  .../mellanox/mlx5/core/en_accel/fs_tcp.c      | 280
> > ++++++++++++++++++
> >  .../mellanox/mlx5/core/en_accel/fs_tcp.h      |  18 ++
> >  .../net/ethernet/mellanox/mlx5/core/fs_core.c |   4 +-
> 
> Saeed,
> 
> What is the relationship between this and RFS, accelerated RFS, and
> now PTQ? Is this something that we can generalize in the stack and

Hi Tom,

This is very similar to our internal aRFS HW tables implementation but
is only meant for TCP state-full acceleration filtering and processing,
mainly for TLS ecrypt/decrypt in downstream patches and nvme accel in a
future submission.

what this mlx5 framework does for now is add a TCP steering filter in
the HW and attach an action to it  (for now RX TLS decrypt) and then
forward to regular RSS rx queue. similar to aRFS where we add 5 tuple
filter in the HW and the action will be forward to specific CPU RX
queue instead of the default RSS table.

For PTQ i am not really sure, since i felt a bit confused when I read
the doc and i couldn't really see how PTQ creates/asks for dedicated
hwardware queues/filters, i will try to go through the patches
tomorrow. 

> support in the driver/device with a simple interface like we do with
> aRFS and ndo_rx_flow_steer?
> 

Currently just like the aRFS HW tables which are programmed via
ndo_rx_flow_steer this TCP Flow table is programmed via 
netdev->tlsdev_ops->tls_dev_add/del(), for TLS sockets to be offloaded
to HW.

as implemented in:
[net-next 08/15] net/mlx5e: kTLS, Add kTLS RX HW offload support

But yes the HW filter is is always similar, only the actions are
different (encrypt or Forward to specific CPU), 

So maybe a unified generic ndo can work for TLS, aRFS, PTQ, XSK,
intel's ADQ, and maybe more. Also make it easier to introduce more flow
based offloads (flows that do not belong to the TC layer) such as nvme
zero copy.

There were lots of talks and discussions by Magnus, Jesper, Bjorn,
Maxim and many others to improve netdev queue management and make
networking queues a "first class kernel citizen" I believe flow based
filters should be part of that effort, and i think you already address
some of this in your PTQ series.

- Saeed.
Tom Herbert June 29, 2020, 2:25 p.m. UTC | #3
On Sun, Jun 28, 2020 at 11:57 PM Saeed Mahameed <saeedm@mellanox.com> wrote:
>
> On Sat, 2020-06-27 at 15:34 -0700, Tom Herbert wrote:
> > On Sat, Jun 27, 2020 at 2:19 PM Saeed Mahameed <saeedm@mellanox.com>
> > wrote:
> > > From: Boris Pismenny <borisp@mellanox.com>
> > >
> > > The framework allows creating flow tables to steer incoming traffic
> > > of
> > > TCP sockets to the acceleration TIRs.
> > > This is used in downstream patches for TLS, and will be used in the
> > > future for other offloads.
> > >
> > > Signed-off-by: Boris Pismenny <borisp@mellanox.com>
> > > Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
> > > Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
> > > ---
> > >  .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
> > >  .../net/ethernet/mellanox/mlx5/core/en/fs.h   |  10 +
> > >  .../mellanox/mlx5/core/en_accel/fs_tcp.c      | 280
> > > ++++++++++++++++++
> > >  .../mellanox/mlx5/core/en_accel/fs_tcp.h      |  18 ++
> > >  .../net/ethernet/mellanox/mlx5/core/fs_core.c |   4 +-
> >
> > Saeed,
> >
> > What is the relationship between this and RFS, accelerated RFS, and
> > now PTQ? Is this something that we can generalize in the stack and
>
> Hi Tom,
>
> This is very similar to our internal aRFS HW tables implementation but
> is only meant for TCP state-full acceleration filtering and processing,
> mainly for TLS ecrypt/decrypt in downstream patches and nvme accel in a
> future submission.
>

Saeed,

Receive Flow Steering is a specific kernel stack functionality that
has been in the kernel over ten years, and accelerated Receive Flow
Steering is the hardware acceleration variant that has been in kernel
almost as long (see scaling.txt). If these patches don't leverage or
extend RFS then please call this something else to avoid confusion.

> what this mlx5 framework does for now is add a TCP steering filter in
> the HW and attach an action to it  (for now RX TLS decrypt) and then
> forward to regular RSS rx queue. similar to aRFS where we add 5 tuple
> filter in the HW and the action will be forward to specific CPU RX
> queue instead of the default RSS table.
>
> For PTQ i am not really sure, since i felt a bit confused when I read
> the doc and i couldn't really see how PTQ creates/asks for dedicated
> hwardware queues/filters, i will try to go through the patches
> tomorrow.
>
> > support in the driver/device with a simple interface like we do with
> > aRFS and ndo_rx_flow_steer?
> >
>
> Currently just like the aRFS HW tables which are programmed via
> ndo_rx_flow_steer this TCP Flow table is programmed via
> netdev->tlsdev_ops->tls_dev_add/del(), for TLS sockets to be offloaded
> to HW.
>
> as implemented in:
> [net-next 08/15] net/mlx5e: kTLS, Add kTLS RX HW offload support
>
> But yes the HW filter is is always similar, only the actions are
> different (encrypt or Forward to specific CPU),
>
> So maybe a unified generic ndo can work for TLS, aRFS, PTQ, XSK,
> intel's ADQ, and maybe more. Also make it easier to introduce more flow
> based offloads (flows that do not belong to the TC layer) such as nvme
> zero copy.
>
That's an admirable goal, but I don't see how these patches steer
towards that. The patch set is over 1600 LOC, nearly all of which are
in MLNX driver code. Can some proportion of this code be generalized
and moved in the stack to become common code that other drivers can
use instead of having to recreate this code per each driver that might
want to support advanced offloads?

Tom

> There were lots of talks and discussions by Magnus, Jesper, Bjorn,
> Maxim and many others to improve netdev queue management and make
> networking queues a "first class kernel citizen" I believe flow based
> filters should be part of that effort, and i think you already address
> some of this in your PTQ series.
>
> - Saeed.
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index b61e47bc16e8..8ffa1325a18f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -74,7 +74,7 @@  mlx5_core-$(CONFIG_MLX5_EN_IPSEC) += en_accel/ipsec.o en_accel/ipsec_rxtx.o \
 				     en_accel/ipsec_stats.o
 
 mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/tls.o en_accel/tls_rxtx.o en_accel/tls_stats.o \
-				   en_accel/ktls.o en_accel/ktls_tx.o
+				   en_accel/ktls.o en_accel/ktls_tx.o en_accel/fs_tcp.o
 
 mlx5_core-$(CONFIG_MLX5_SW_STEERING) += steering/dr_domain.o steering/dr_table.o \
 					steering/dr_matcher.o steering/dr_rule.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index c633579474c3..385cbff1caf1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -123,6 +123,9 @@  enum {
 	MLX5E_L2_FT_LEVEL,
 	MLX5E_TTC_FT_LEVEL,
 	MLX5E_INNER_TTC_FT_LEVEL,
+#ifdef CONFIG_MLX5_EN_TLS
+	MLX5E_ACCEL_FS_TCP_FT_LEVEL,
+#endif
 #ifdef CONFIG_MLX5_EN_ARFS
 	MLX5E_ARFS_FT_LEVEL
 #endif
@@ -216,6 +219,10 @@  static inline int mlx5e_arfs_enable(struct mlx5e_priv *priv) { return -EOPNOTSUP
 static inline int mlx5e_arfs_disable(struct mlx5e_priv *priv) {	return -EOPNOTSUPP; }
 #endif
 
+#ifdef CONFIG_MLX5_EN_TLS
+struct mlx5e_accel_fs_tcp;
+#endif
+
 struct mlx5e_flow_steering {
 	struct mlx5_flow_namespace      *ns;
 #ifdef CONFIG_MLX5_EN_RXNFC
@@ -229,6 +236,9 @@  struct mlx5e_flow_steering {
 #ifdef CONFIG_MLX5_EN_ARFS
 	struct mlx5e_arfs_tables        arfs;
 #endif
+#ifdef CONFIG_MLX5_EN_TLS
+	struct mlx5e_accel_fs_tcp      *accel_tcp;
+#endif
 };
 
 struct ttc_params {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
new file mode 100644
index 000000000000..a0e9082e15b0
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c
@@ -0,0 +1,280 @@ 
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#include <linux/netdevice.h>
+#include "en_accel/fs_tcp.h"
+#include "fs_core.h"
+
+enum accel_fs_tcp_type {
+	ACCEL_FS_IPV4_TCP,
+	ACCEL_FS_IPV6_TCP,
+	ACCEL_FS_TCP_NUM_TYPES,
+};
+
+struct mlx5e_accel_fs_tcp {
+	struct mlx5e_flow_table tables[ACCEL_FS_TCP_NUM_TYPES];
+	struct mlx5_flow_handle *default_rules[ACCEL_FS_TCP_NUM_TYPES];
+};
+
+static enum mlx5e_traffic_types fs_accel2tt(enum accel_fs_tcp_type i)
+{
+	switch (i) {
+	case ACCEL_FS_IPV4_TCP:
+		return MLX5E_TT_IPV4_TCP;
+	default: /* ACCEL_FS_IPV6_TCP */
+		return MLX5E_TT_IPV6_TCP;
+	}
+}
+
+static int accel_fs_tcp_add_default_rule(struct mlx5e_priv *priv,
+					 enum accel_fs_tcp_type type)
+{
+	struct mlx5e_flow_table *accel_fs_t;
+	struct mlx5_flow_destination dest;
+	struct mlx5e_accel_fs_tcp *fs_tcp;
+	MLX5_DECLARE_FLOW_ACT(flow_act);
+	struct mlx5_flow_handle *rule;
+	int err = 0;
+
+	fs_tcp = priv->fs.accel_tcp;
+	accel_fs_t = &fs_tcp->tables[type];
+
+	dest = mlx5e_ttc_get_default_dest(priv, fs_accel2tt(type));
+	rule = mlx5_add_flow_rules(accel_fs_t->t, NULL, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		netdev_err(priv->netdev,
+			   "%s: add default rule failed, accel_fs type=%d, err %d\n",
+			   __func__, type, err);
+		return err;
+	}
+
+	fs_tcp->default_rules[type] = rule;
+	return 0;
+}
+
+#define MLX5E_ACCEL_FS_TCP_NUM_GROUPS	(2)
+#define MLX5E_ACCEL_FS_TCP_GROUP1_SIZE	(BIT(16) - 1)
+#define MLX5E_ACCEL_FS_TCP_GROUP2_SIZE	(BIT(0))
+#define MLX5E_ACCEL_FS_TCP_TABLE_SIZE	(MLX5E_ACCEL_FS_TCP_GROUP1_SIZE +\
+					 MLX5E_ACCEL_FS_TCP_GROUP2_SIZE)
+static int accel_fs_tcp_create_groups(struct mlx5e_flow_table *ft,
+				      enum accel_fs_tcp_type type)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	void *outer_headers_c;
+	int ix = 0;
+	u32 *in;
+	int err;
+	u8 *mc;
+
+	ft->g = kcalloc(MLX5E_ACCEL_FS_TCP_NUM_GROUPS, sizeof(*ft->g), GFP_KERNEL);
+	in = kvzalloc(inlen, GFP_KERNEL);
+	if  (!in || !ft->g) {
+		kvfree(ft->g);
+		kvfree(in);
+		return -ENOMEM;
+	}
+
+	mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+	outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_version);
+
+	switch (type) {
+	case ACCEL_FS_IPV4_TCP:
+	case ACCEL_FS_IPV6_TCP:
+		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport);
+		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport);
+		break;
+	default:
+		err = -EINVAL;
+		goto out;
+	}
+
+	switch (type) {
+	case ACCEL_FS_IPV4_TCP:
+		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+				 src_ipv4_src_ipv6.ipv4_layout.ipv4);
+		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c,
+				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+		break;
+	case ACCEL_FS_IPV6_TCP:
+		memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+		       0xff, 16);
+		memset(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
+				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+		       0xff, 16);
+		break;
+	default:
+		err = -EINVAL;
+		goto out;
+	}
+
+	MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_ACCEL_FS_TCP_GROUP1_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	/* Default Flow Group */
+	memset(in, 0, inlen);
+	MLX5_SET_CFG(in, start_flow_index, ix);
+	ix += MLX5E_ACCEL_FS_TCP_GROUP2_SIZE;
+	MLX5_SET_CFG(in, end_flow_index, ix - 1);
+	ft->g[ft->num_groups] = mlx5_create_flow_group(ft->t, in);
+	if (IS_ERR(ft->g[ft->num_groups]))
+		goto err;
+	ft->num_groups++;
+
+	kvfree(in);
+	return 0;
+
+err:
+	err = PTR_ERR(ft->g[ft->num_groups]);
+	ft->g[ft->num_groups] = NULL;
+out:
+	kvfree(in);
+
+	return err;
+}
+
+static int accel_fs_tcp_create_table(struct mlx5e_priv *priv, enum accel_fs_tcp_type type)
+{
+	struct mlx5e_flow_table *ft = &priv->fs.accel_tcp->tables[type];
+	struct mlx5_flow_table_attr ft_attr = {};
+	int err;
+
+	ft->num_groups = 0;
+
+	ft_attr.max_fte = MLX5E_ACCEL_FS_TCP_TABLE_SIZE;
+	ft_attr.level = MLX5E_ACCEL_FS_TCP_FT_LEVEL;
+	ft_attr.prio = MLX5E_NIC_PRIO;
+
+	ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr);
+	if (IS_ERR(ft->t)) {
+		err = PTR_ERR(ft->t);
+		ft->t = NULL;
+		return err;
+	}
+
+	netdev_dbg(priv->netdev, "Created fs accel table id %u level %u\n",
+		   ft->t->id, ft->t->level);
+
+	err = accel_fs_tcp_create_groups(ft, type);
+	if (err)
+		goto err;
+
+	err = accel_fs_tcp_add_default_rule(priv, type);
+	if (err)
+		goto err;
+
+	return 0;
+err:
+	mlx5e_destroy_flow_table(ft);
+	return err;
+}
+
+static int accel_fs_tcp_disable(struct mlx5e_priv *priv)
+{
+	int err, i;
+
+	for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+		/* Modify ttc rules destination to point back to the indir TIRs */
+		err = mlx5e_ttc_fwd_default_dest(priv, fs_accel2tt(i));
+		if (err) {
+			netdev_err(priv->netdev,
+				   "%s: modify ttc[%d] default destination failed, err(%d)\n",
+				   __func__, fs_accel2tt(i), err);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int accel_fs_tcp_enable(struct mlx5e_priv *priv)
+{
+	struct mlx5_flow_destination dest = {};
+	int err, i;
+
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+		dest.ft = priv->fs.accel_tcp->tables[i].t;
+
+		/* Modify ttc rules destination to point on the accel_fs FTs */
+		err = mlx5e_ttc_fwd_dest(priv, fs_accel2tt(i), &dest);
+		if (err) {
+			netdev_err(priv->netdev,
+				   "%s: modify ttc[%d] destination to accel failed, err(%d)\n",
+				   __func__, fs_accel2tt(i), err);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static void accel_fs_tcp_destroy_table(struct mlx5e_priv *priv, int i)
+{
+	struct mlx5e_accel_fs_tcp *fs_tcp;
+
+	fs_tcp = priv->fs.accel_tcp;
+	if (IS_ERR_OR_NULL(fs_tcp->tables[i].t))
+		return;
+
+	mlx5_del_flow_rules(fs_tcp->default_rules[i]);
+	mlx5e_destroy_flow_table(&fs_tcp->tables[i]);
+	fs_tcp->tables[i].t = NULL;
+}
+
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv)
+{
+	int i;
+
+	if (!priv->fs.accel_tcp)
+		return;
+
+	accel_fs_tcp_disable(priv);
+
+	for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++)
+		accel_fs_tcp_destroy_table(priv, i);
+
+	kfree(priv->fs.accel_tcp);
+	priv->fs.accel_tcp = NULL;
+}
+
+int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv)
+{
+	int i, err;
+
+	if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ft_field_support.outer_ip_version))
+		return -EOPNOTSUPP;
+
+	priv->fs.accel_tcp = kzalloc(sizeof(*priv->fs.accel_tcp), GFP_KERNEL);
+	if (!priv->fs.accel_tcp)
+		return -ENOMEM;
+
+	for (i = 0; i < ACCEL_FS_TCP_NUM_TYPES; i++) {
+		err = accel_fs_tcp_create_table(priv, i);
+		if (err)
+			goto err_destroy_tables;
+	}
+
+	err = accel_fs_tcp_enable(priv);
+	if (err)
+		goto err_destroy_tables;
+
+	return 0;
+
+err_destroy_tables:
+	while (--i >= 0)
+		accel_fs_tcp_destroy_table(priv, i);
+
+	kfree(priv->fs.accel_tcp);
+	priv->fs.accel_tcp = NULL;
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
new file mode 100644
index 000000000000..0df53473550a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.h
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020, Mellanox Technologies inc. All rights reserved. */
+
+#ifndef __MLX5E_ACCEL_FS_TCP_H__
+#define __MLX5E_ACCEL_FS_TCP_H__
+
+#include "en.h"
+
+#ifdef CONFIG_MLX5_EN_TLS
+int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv);
+void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv);
+#else
+static inline int mlx5e_accel_fs_tcp_create(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_accel_fs_tcp_destroy(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_ACCEL_FS_TCP_H__ */
+
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index e47a66983935..785b2960d6b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -105,8 +105,8 @@ 
 #define ETHTOOL_PRIO_NUM_LEVELS 1
 #define ETHTOOL_NUM_PRIOS 11
 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS)
-/* Vlan, mac, ttc, inner ttc, aRFS */
-#define KERNEL_NIC_PRIO_NUM_LEVELS 5
+/* Vlan, mac, ttc, inner ttc, {aRFS/accel} */
+#define KERNEL_NIC_PRIO_NUM_LEVELS 6
 #define KERNEL_NIC_NUM_PRIOS 1
 /* One more level for tc */
 #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 1)