diff mbox series

[ovs-dev,v17,5/8] dpif-offload-netlink: Implement dpif-offload-provider API

Message ID 20211021080103.657675-6-cmi@nvidia.com
State Changes Requested
Headers show
Series Add offload support for sFlow | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test fail github build: failed

Commit Message

Chris Mi Oct. 21, 2021, 8:01 a.m. UTC
Implement dpif-offload API for netlink datapath.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Eli Britstein <elibr@nvidia.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
---
 lib/automake.mk             |   1 +
 lib/dpif-netdev.c           |   3 +-
 lib/dpif-netlink.c          |  11 +-
 lib/dpif-offload-netlink.c  | 207 ++++++++++++++++++++++++++++++++++++
 lib/dpif-offload-provider.h |  15 ++-
 lib/dpif-provider.h         |   3 +-
 lib/dpif.c                  |   3 +-
 7 files changed, 237 insertions(+), 6 deletions(-)
 create mode 100644 lib/dpif-offload-netlink.c

Comments

Eelco Chaudron Nov. 5, 2021, 1:24 p.m. UTC | #1
On 21 Oct 2021, at 10:01, Chris Mi wrote:

> Implement dpif-offload API for netlink datapath.
>
> Signed-off-by: Chris Mi <cmi@nvidia.com>
> Reviewed-by: Eli Britstein <elibr@nvidia.com>
> Acked-by: Eelco Chaudron <echaudro@redhat.com>

Chris, if you make changes to this patch after I have ACKed it, you need to remove the acked-by.
See some comments inline below.

> ---
>  lib/automake.mk             |   1 +
>  lib/dpif-netdev.c           |   3 +-
>  lib/dpif-netlink.c          |  11 +-
>  lib/dpif-offload-netlink.c  | 207 ++++++++++++++++++++++++++++++++++++
>  lib/dpif-offload-provider.h |  15 ++-
>  lib/dpif-provider.h         |   3 +-
>  lib/dpif.c                  |   3 +-
>  7 files changed, 237 insertions(+), 6 deletions(-)
>  create mode 100644 lib/dpif-offload-netlink.c
>
> diff --git a/lib/automake.mk b/lib/automake.mk
> index 9259f57de..18cffa827 100644
> --- a/lib/automake.mk
> +++ b/lib/automake.mk
> @@ -446,6 +446,7 @@ lib_libopenvswitch_la_SOURCES += \
>  	lib/dpif-netlink.h \
>  	lib/dpif-netlink-rtnl.c \
>  	lib/dpif-netlink-rtnl.h \
> +	lib/dpif-offload-netlink.c \
>  	lib/if-notifier.c \
>  	lib/netdev-linux.c \
>  	lib/netdev-linux.h \
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index b078c2da5..3f6d2ead7 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -1593,7 +1593,8 @@ create_dpif_netdev(struct dp_netdev *dp)
>      ovs_refcount_ref(&dp->ref_cnt);
>
>      dpif = xmalloc(sizeof *dpif);
> -    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
> +    dpif_init(&dpif->dpif, dp->class, NULL, dp->name, netflow_id >> 8,
> +              netflow_id);
>      dpif->dp = dp;
>      dpif->last_port_seq = seq_read(dp->port_seq);
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 5f4b60c5a..4dea2f76c 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -370,6 +370,12 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
>
>      if (create) {
>          dp_request.cmd = OVS_DP_CMD_NEW;
> +        error = dpif_offload_netlink_init();

I think the init should still be a callback into the dpif_offload_class, and when it gets registered the init function should be called.
Not sure if it’s the correct place, but you could do something similar in dp_initialize() as done with  dp_register_provider().

> +        if (error) {
> +            VLOG_WARN("Failed to initialize netlink offload datapath: %s",
> +                      ovs_strerror(error));

VLOG_ERR()?

> +            return error;
> +        }
>      } else {
>          dp_request.cmd = OVS_DP_CMD_GET;
>
> @@ -460,8 +466,8 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
>      dpif->port_notifier = NULL;
>      fat_rwlock_init(&dpif->upcall_lock);
>
> -    dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
> -              dp->dp_ifindex, dp->dp_ifindex);
> +    dpif_init(&dpif->dpif, &dpif_netlink_class, &dpif_offload_netlink,

Guess naming should me more offload class related, so maybe rename it to “dpif_offload_netlink_class”.

> +              dp->name, dp->dp_ifindex, dp->dp_ifindex);
>
>      dpif->dp_ifindex = dp->dp_ifindex;
>      dpif->user_features = dp->user_features;
> @@ -732,6 +738,7 @@ dpif_netlink_destroy(struct dpif *dpif_)
>      struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
>      struct dpif_netlink_dp dp;
>
> +    dpif_offload_netlink_destroy();
>      dpif_netlink_dp_init(&dp);
>      dp.cmd = OVS_DP_CMD_DEL;
>      dp.dp_ifindex = dpif->dp_ifindex;
> diff --git a/lib/dpif-offload-netlink.c b/lib/dpif-offload-netlink.c
> new file mode 100644
> index 000000000..7f53ed582
> --- /dev/null
> +++ b/lib/dpif-offload-netlink.c
> @@ -0,0 +1,207 @@
> +/*
> + * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at:
> + *
> + *     http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#include <config.h>
> +#include <errno.h>
> +#include <linux/psample.h>
> +#include <sys/poll.h>
> +
> +#include "dpif-offload-provider.h"
> +#include "netdev-offload.h"
> +#include "netlink-protocol.h"
> +#include "netlink-socket.h"
> +#include "openvswitch/vlog.h"
> +
> +VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink);
> +
> +static struct nl_sock *psample_sock;
> +static int psample_family;
> +
> +/* Receive psample netlink message and save the attributes. */
> +struct offload_psample {
> +    struct nlattr *packet;      /* Packet data. */
> +    int dp_group_id;            /* Mapping id for sFlow offload. */
> +    int iifindex;               /* Input ifindex. */
> +};
> +
> +/* In order to keep compatibility with kernels without psample module,
> + * return success even if psample is not initialized successfully. */
> +int
> +dpif_offload_netlink_init(void)
> +{
> +    unsigned int psample_mcgroup;
> +    int err;
> +
> +    if (!netdev_is_flow_api_enabled()) {
> +        VLOG_DBG("Flow API is not enabled.");
> +        return 0;
> +    }
> +
> +    if (psample_sock) {
> +        VLOG_DBG("Psample socket is already initialized.");
> +        return 0;
> +    }
> +
> +    err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
> +                                &psample_family);
> +    if (err) {
> +        VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n"
> +                  "Please make sure the kernel module psample is loaded.",
> +                  PSAMPLE_GENL_NAME, ovs_strerror(err));
> +        return 0;
> +    }
> +
> +    err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
> +                                 PSAMPLE_NL_MCGRP_SAMPLE_NAME,
> +                                 &psample_mcgroup);
> +    if (err) {
> +        VLOG_WARN("Failed to join Netlink multicast group '%s': %s",
> +                  PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
> +        return 0;
> +    }
> +
> +    err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
> +    if (err) {
> +        VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err));
> +        return 0;
> +    }
> +
> +    err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
> +    if (err) {
> +        VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err));
> +        nl_sock_destroy(psample_sock);
> +        psample_sock = NULL;
> +        return 0;
> +    }
> +
> +    return 0;
> +}
> +
> +void
> +dpif_offload_netlink_destroy(void)
> +{
> +    if (!psample_sock) {
> +        return;
> +    }
> +
> +    nl_sock_destroy(psample_sock);
> +    psample_sock = NULL;
> +}
> +
> +static void
> +dpif_offload_netlink_sflow_recv_wait(void)
> +{
> +    if (psample_sock) {
> +        nl_sock_wait(psample_sock, POLLIN);
> +    }
> +}
> +
> +static int
> +psample_from_ofpbuf(struct offload_psample *psample,
> +                    const struct ofpbuf *buf)
> +{
> +    static const struct nl_policy ovs_psample_policy[] = {
> +        [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 },
> +        [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
> +        [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
> +        [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
> +    };
> +    struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
> +    struct genlmsghdr *genl;
> +    struct nlmsghdr *nlmsg;
> +    struct ofpbuf b;
> +
> +    b = ofpbuf_const_initializer(buf->data, buf->size);
> +    nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    genl = ofpbuf_try_pull(&b, sizeof *genl);
> +    if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
> +        || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
> +                            ARRAY_SIZE(ovs_psample_policy))) {
> +        return EINVAL;
> +    }
> +
> +    psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
> +    psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
> +    psample->packet = a[PSAMPLE_ATTR_DATA];
> +
> +    return 0;
> +}
> +
> +static int
> +psample_parse_packet(struct offload_psample *psample,
> +                     struct dpif_offload_sflow *sflow)
> +{
> +    dp_packet_use_stub(&sflow->packet,
> +                       CONST_CAST(struct nlattr *,
> +                                  nl_attr_get(psample->packet)) - 1,
> +                       nl_attr_get_size(psample->packet) +
> +                       sizeof(struct nlattr));
> +    dp_packet_set_data(&sflow->packet,
> +                       (char *) dp_packet_data(&sflow->packet) +
> +                       sizeof(struct nlattr));
> +    dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet));
> +
> +    sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id);
> +    if (!sflow->attr) {
> +        return ENOENT;
> +    }
> +    sflow->iifindex = psample->iifindex;
> +
> +    return 0;
> +}
> +
> +static int
> +dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow)
> +{
> +    if (!psample_sock) {
> +        return ENOENT;
> +    }
> +
> +    for (;;) {
> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> +        struct offload_psample psample;
> +        uint64_t buf_stub[4096 / 8];
> +        struct ofpbuf buf;
> +        int error;
> +
> +        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
> +        error = nl_sock_recv(psample_sock, &buf, NULL, false);
> +
> +        if (!error) {
> +            error = psample_from_ofpbuf(&psample, &buf);
> +            if (!error) {
> +                    ofpbuf_uninit(&buf);
> +                    error = psample_parse_packet(&psample, sflow);
> +                    return error;
> +            }
> +        } else if (error != EAGAIN) {
> +            VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).",
> +                         ovs_strerror(error));
> +            nl_sock_drain(psample_sock);
> +            error = ENOBUFS;
> +        }
> +
> +        ofpbuf_uninit(&buf);
> +        if (error) {
> +            return error;
> +        }
> +    }
> +}
> +
> +const struct dpif_offload_api dpif_offload_netlink = {

Think the structure definition for dpif_offload_api should change to dpif_offload_class.
And this structure to dpif_offload_netlink_class, see also previous comment.

> +    .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait,
> +    .sflow_recv = dpif_offload_netlink_sflow_recv,
> +};
> diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h
> index 5b4726ab6..5ac419516 100644
> --- a/lib/dpif-offload-provider.h
> +++ b/lib/dpif-offload-provider.h
> @@ -17,12 +17,12 @@
>  #ifndef DPIF_OFFLOAD_PROVIDER_H
>  #define DPIF_OFFLOAD_PROVIDER_H
>
> +#include "dp-packet.h"
>  #include "netlink-protocol.h"
>  #include "openvswitch/packets.h"
>  #include "openvswitch/types.h"
>
>  struct dpif;
> -struct dpif_offload_sflow;
>
>  /* When offloading sample action, userspace creates a unique ID to map
>   * sFlow action and tunnel info and passes this ID to datapath instead
> @@ -37,6 +37,13 @@ struct dpif_sflow_attr {
>      ovs_u128 ufid;                  /* Flow ufid. */
>  };
>
> +/* Parse the specific dpif message to sFlow. So OVS can process it. */
> +struct dpif_offload_sflow {
> +    struct dp_packet packet;            /* Packet data. */
> +    uint32_t iifindex;                  /* Input ifindex. */
> +    const struct dpif_sflow_attr *attr; /* SFlow attribute. */
> +};
> +
>  /* Datapath interface offload structure, to be defined by each implementation
>   * of a datapath interface.
>   */
> @@ -54,4 +61,10 @@ void dpif_offload_sflow_recv_wait(const struct dpif *dpif);
>  int dpif_offload_sflow_recv(const struct dpif *dpif,
>                              struct dpif_offload_sflow *sflow);
>
> +#ifdef __linux__
> +extern const struct dpif_offload_api dpif_offload_netlink;
> +int dpif_offload_netlink_init(void);
> +void dpif_offload_netlink_destroy(void);
> +#endif
> +
>  #endif /* DPIF_OFFLOAD_PROVIDER_H */
> diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
> index 15a0d2b63..86898d838 100644
> --- a/lib/dpif-provider.h
> +++ b/lib/dpif-provider.h
> @@ -47,7 +47,8 @@ struct dpif {
>  struct dpif_ipf_status;
>  struct ipf_dump_ctx;
>
> -void dpif_init(struct dpif *, const struct dpif_class *, const char *name,
> +void dpif_init(struct dpif *, const struct dpif_class *,
> +               const struct dpif_offload_api *offload_api, const char *name,
>                 uint8_t netflow_engine_type, uint8_t netflow_engine_id);
>  void dpif_uninit(struct dpif *dpif, bool close);
>
> diff --git a/lib/dpif.c b/lib/dpif.c
> index 8c4aed47b..09ffa18e3 100644
> --- a/lib/dpif.c
> +++ b/lib/dpif.c
> @@ -1680,10 +1680,11 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
>  
>  void
>  dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
> -          const char *name,
> +          const struct dpif_offload_api *offload_api, const char *name,
Change dpif_offload_api to dpif_offload_api *offload_class
>            uint8_t netflow_engine_type, uint8_t netflow_engine_id)
>  {
>      dpif->dpif_class = dpif_class;
> +    dpif->offload_api = offload_api;

I would change the entry in the dpif structure from offload_api top offload_class.

>      dpif->base_name = xstrdup(name);
>      dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
>      dpif->netflow_engine_type = netflow_engine_type;
> -- 
> 2.30.2
Chris Mi Nov. 15, 2021, 2:42 a.m. UTC | #2
On 11/5/2021 9:24 PM, Eelco Chaudron wrote:
> On 21 Oct 2021, at 10:01, Chris Mi wrote:
>
>> Implement dpif-offload API for netlink datapath.
>>
>> Signed-off-by: Chris Mi <cmi@nvidia.com>
>> Reviewed-by: Eli Britstein <elibr@nvidia.com>
>> Acked-by: Eelco Chaudron <echaudro@redhat.com>
> Chris, if you make changes to this patch after I have ACKed it, you need to remove the acked-by.
> See some comments inline below.
OK, got it. Done.
>
>> ---
>>   lib/automake.mk             |   1 +
>>   lib/dpif-netdev.c           |   3 +-
>>   lib/dpif-netlink.c          |  11 +-
>>   lib/dpif-offload-netlink.c  | 207 ++++++++++++++++++++++++++++++++++++
>>   lib/dpif-offload-provider.h |  15 ++-
>>   lib/dpif-provider.h         |   3 +-
>>   lib/dpif.c                  |   3 +-
>>   7 files changed, 237 insertions(+), 6 deletions(-)
>>   create mode 100644 lib/dpif-offload-netlink.c
>>
>> diff --git a/lib/automake.mk b/lib/automake.mk
>> index 9259f57de..18cffa827 100644
>> --- a/lib/automake.mk
>> +++ b/lib/automake.mk
>> @@ -446,6 +446,7 @@ lib_libopenvswitch_la_SOURCES += \
>>   	lib/dpif-netlink.h \
>>   	lib/dpif-netlink-rtnl.c \
>>   	lib/dpif-netlink-rtnl.h \
>> +	lib/dpif-offload-netlink.c \
>>   	lib/if-notifier.c \
>>   	lib/netdev-linux.c \
>>   	lib/netdev-linux.h \
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index b078c2da5..3f6d2ead7 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -1593,7 +1593,8 @@ create_dpif_netdev(struct dp_netdev *dp)
>>       ovs_refcount_ref(&dp->ref_cnt);
>>
>>       dpif = xmalloc(sizeof *dpif);
>> -    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
>> +    dpif_init(&dpif->dpif, dp->class, NULL, dp->name, netflow_id >> 8,
>> +              netflow_id);
>>       dpif->dp = dp;
>>       dpif->last_port_seq = seq_read(dp->port_seq);
>>
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 5f4b60c5a..4dea2f76c 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -370,6 +370,12 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
>>
>>       if (create) {
>>           dp_request.cmd = OVS_DP_CMD_NEW;
>> +        error = dpif_offload_netlink_init();
> I think the init should still be a callback into the dpif_offload_class, and when it gets registered the init function should be called.
> Not sure if it’s the correct place, but you could do something similar in dp_initialize() as done with  dp_register_provider().
Done.
>
>> +        if (error) {
>> +            VLOG_WARN("Failed to initialize netlink offload datapath: %s",
>> +                      ovs_strerror(error));
> VLOG_ERR()?
Since the init is done in registration, this code is not needed now.
>
>> +            return error;
>> +        }
>>       } else {
>>           dp_request.cmd = OVS_DP_CMD_GET;
>>
>> @@ -460,8 +466,8 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
>>       dpif->port_notifier = NULL;
>>       fat_rwlock_init(&dpif->upcall_lock);
>>
>> -    dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
>> -              dp->dp_ifindex, dp->dp_ifindex);
>> +    dpif_init(&dpif->dpif, &dpif_netlink_class, &dpif_offload_netlink,
> Guess naming should me more offload class related, so maybe rename it to “dpif_offload_netlink_class”.
Done.
>
>> +              dp->name, dp->dp_ifindex, dp->dp_ifindex);
>>
>>       dpif->dp_ifindex = dp->dp_ifindex;
>>       dpif->user_features = dp->user_features;
>> @@ -732,6 +738,7 @@ dpif_netlink_destroy(struct dpif *dpif_)
>>       struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
>>       struct dpif_netlink_dp dp;
>>
>> +    dpif_offload_netlink_destroy();
>>       dpif_netlink_dp_init(&dp);
>>       dp.cmd = OVS_DP_CMD_DEL;
>>       dp.dp_ifindex = dpif->dp_ifindex;
>> diff --git a/lib/dpif-offload-netlink.c b/lib/dpif-offload-netlink.c
>> new file mode 100644
>> index 000000000..7f53ed582
>> --- /dev/null
>> +++ b/lib/dpif-offload-netlink.c
>> @@ -0,0 +1,207 @@
>> +/*
>> + * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at:
>> + *
>> + *     http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +#include <config.h>
>> +#include <errno.h>
>> +#include <linux/psample.h>
>> +#include <sys/poll.h>
>> +
>> +#include "dpif-offload-provider.h"
>> +#include "netdev-offload.h"
>> +#include "netlink-protocol.h"
>> +#include "netlink-socket.h"
>> +#include "openvswitch/vlog.h"
>> +
>> +VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink);
>> +
>> +static struct nl_sock *psample_sock;
>> +static int psample_family;
>> +
>> +/* Receive psample netlink message and save the attributes. */
>> +struct offload_psample {
>> +    struct nlattr *packet;      /* Packet data. */
>> +    int dp_group_id;            /* Mapping id for sFlow offload. */
>> +    int iifindex;               /* Input ifindex. */
>> +};
>> +
>> +/* In order to keep compatibility with kernels without psample module,
>> + * return success even if psample is not initialized successfully. */
>> +int
>> +dpif_offload_netlink_init(void)
>> +{
>> +    unsigned int psample_mcgroup;
>> +    int err;
>> +
>> +    if (!netdev_is_flow_api_enabled()) {
>> +        VLOG_DBG("Flow API is not enabled.");
>> +        return 0;
>> +    }
>> +
>> +    if (psample_sock) {
>> +        VLOG_DBG("Psample socket is already initialized.");
>> +        return 0;
>> +    }
>> +
>> +    err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
>> +                                &psample_family);
>> +    if (err) {
>> +        VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n"
>> +                  "Please make sure the kernel module psample is loaded.",
>> +                  PSAMPLE_GENL_NAME, ovs_strerror(err));
>> +        return 0;
>> +    }
>> +
>> +    err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
>> +                                 PSAMPLE_NL_MCGRP_SAMPLE_NAME,
>> +                                 &psample_mcgroup);
>> +    if (err) {
>> +        VLOG_WARN("Failed to join Netlink multicast group '%s': %s",
>> +                  PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
>> +        return 0;
>> +    }
>> +
>> +    err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
>> +    if (err) {
>> +        VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err));
>> +        return 0;
>> +    }
>> +
>> +    err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
>> +    if (err) {
>> +        VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err));
>> +        nl_sock_destroy(psample_sock);
>> +        psample_sock = NULL;
>> +        return 0;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +void
>> +dpif_offload_netlink_destroy(void)
>> +{
>> +    if (!psample_sock) {
>> +        return;
>> +    }
>> +
>> +    nl_sock_destroy(psample_sock);
>> +    psample_sock = NULL;
>> +}
>> +
>> +static void
>> +dpif_offload_netlink_sflow_recv_wait(void)
>> +{
>> +    if (psample_sock) {
>> +        nl_sock_wait(psample_sock, POLLIN);
>> +    }
>> +}
>> +
>> +static int
>> +psample_from_ofpbuf(struct offload_psample *psample,
>> +                    const struct ofpbuf *buf)
>> +{
>> +    static const struct nl_policy ovs_psample_policy[] = {
>> +        [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 },
>> +        [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
>> +        [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
>> +        [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
>> +    };
>> +    struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
>> +    struct genlmsghdr *genl;
>> +    struct nlmsghdr *nlmsg;
>> +    struct ofpbuf b;
>> +
>> +    b = ofpbuf_const_initializer(buf->data, buf->size);
>> +    nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>> +    genl = ofpbuf_try_pull(&b, sizeof *genl);
>> +    if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
>> +        || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
>> +                            ARRAY_SIZE(ovs_psample_policy))) {
>> +        return EINVAL;
>> +    }
>> +
>> +    psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
>> +    psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
>> +    psample->packet = a[PSAMPLE_ATTR_DATA];
>> +
>> +    return 0;
>> +}
>> +
>> +static int
>> +psample_parse_packet(struct offload_psample *psample,
>> +                     struct dpif_offload_sflow *sflow)
>> +{
>> +    dp_packet_use_stub(&sflow->packet,
>> +                       CONST_CAST(struct nlattr *,
>> +                                  nl_attr_get(psample->packet)) - 1,
>> +                       nl_attr_get_size(psample->packet) +
>> +                       sizeof(struct nlattr));
>> +    dp_packet_set_data(&sflow->packet,
>> +                       (char *) dp_packet_data(&sflow->packet) +
>> +                       sizeof(struct nlattr));
>> +    dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet));
>> +
>> +    sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id);
>> +    if (!sflow->attr) {
>> +        return ENOENT;
>> +    }
>> +    sflow->iifindex = psample->iifindex;
>> +
>> +    return 0;
>> +}
>> +
>> +static int
>> +dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow)
>> +{
>> +    if (!psample_sock) {
>> +        return ENOENT;
>> +    }
>> +
>> +    for (;;) {
>> +        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>> +        struct offload_psample psample;
>> +        uint64_t buf_stub[4096 / 8];
>> +        struct ofpbuf buf;
>> +        int error;
>> +
>> +        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
>> +        error = nl_sock_recv(psample_sock, &buf, NULL, false);
>> +
>> +        if (!error) {
>> +            error = psample_from_ofpbuf(&psample, &buf);
>> +            if (!error) {
>> +                    ofpbuf_uninit(&buf);
>> +                    error = psample_parse_packet(&psample, sflow);
>> +                    return error;
>> +            }
>> +        } else if (error != EAGAIN) {
>> +            VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).",
>> +                         ovs_strerror(error));
>> +            nl_sock_drain(psample_sock);
>> +            error = ENOBUFS;
>> +        }
>> +
>> +        ofpbuf_uninit(&buf);
>> +        if (error) {
>> +            return error;
>> +        }
>> +    }
>> +}
>> +
>> +const struct dpif_offload_api dpif_offload_netlink = {
> Think the structure definition for dpif_offload_api should change to dpif_offload_class.
> And this structure to dpif_offload_netlink_class, see also previous comment.
Done.
>
>> +    .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait,
>> +    .sflow_recv = dpif_offload_netlink_sflow_recv,
>> +};
>> diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h
>> index 5b4726ab6..5ac419516 100644
>> --- a/lib/dpif-offload-provider.h
>> +++ b/lib/dpif-offload-provider.h
>> @@ -17,12 +17,12 @@
>>   #ifndef DPIF_OFFLOAD_PROVIDER_H
>>   #define DPIF_OFFLOAD_PROVIDER_H
>>
>> +#include "dp-packet.h"
>>   #include "netlink-protocol.h"
>>   #include "openvswitch/packets.h"
>>   #include "openvswitch/types.h"
>>
>>   struct dpif;
>> -struct dpif_offload_sflow;
>>
>>   /* When offloading sample action, userspace creates a unique ID to map
>>    * sFlow action and tunnel info and passes this ID to datapath instead
>> @@ -37,6 +37,13 @@ struct dpif_sflow_attr {
>>       ovs_u128 ufid;                  /* Flow ufid. */
>>   };
>>
>> +/* Parse the specific dpif message to sFlow. So OVS can process it. */
>> +struct dpif_offload_sflow {
>> +    struct dp_packet packet;            /* Packet data. */
>> +    uint32_t iifindex;                  /* Input ifindex. */
>> +    const struct dpif_sflow_attr *attr; /* SFlow attribute. */
>> +};
>> +
>>   /* Datapath interface offload structure, to be defined by each implementation
>>    * of a datapath interface.
>>    */
>> @@ -54,4 +61,10 @@ void dpif_offload_sflow_recv_wait(const struct dpif *dpif);
>>   int dpif_offload_sflow_recv(const struct dpif *dpif,
>>                               struct dpif_offload_sflow *sflow);
>>
>> +#ifdef __linux__
>> +extern const struct dpif_offload_api dpif_offload_netlink;
>> +int dpif_offload_netlink_init(void);
>> +void dpif_offload_netlink_destroy(void);
>> +#endif
>> +
>>   #endif /* DPIF_OFFLOAD_PROVIDER_H */
>> diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
>> index 15a0d2b63..86898d838 100644
>> --- a/lib/dpif-provider.h
>> +++ b/lib/dpif-provider.h
>> @@ -47,7 +47,8 @@ struct dpif {
>>   struct dpif_ipf_status;
>>   struct ipf_dump_ctx;
>>
>> -void dpif_init(struct dpif *, const struct dpif_class *, const char *name,
>> +void dpif_init(struct dpif *, const struct dpif_class *,
>> +               const struct dpif_offload_api *offload_api, const char *name,
>>                  uint8_t netflow_engine_type, uint8_t netflow_engine_id);
>>   void dpif_uninit(struct dpif *dpif, bool close);
>>
>> diff --git a/lib/dpif.c b/lib/dpif.c
>> index 8c4aed47b..09ffa18e3 100644
>> --- a/lib/dpif.c
>> +++ b/lib/dpif.c
>> @@ -1680,10 +1680,11 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
>>   
>>   void
>>   dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
>> -          const char *name,
>> +          const struct dpif_offload_api *offload_api, const char *name,
> Change dpif_offload_api to dpif_offload_api *offload_class
Done.
>>             uint8_t netflow_engine_type, uint8_t netflow_engine_id)
>>   {
>>       dpif->dpif_class = dpif_class;
>> +    dpif->offload_api = offload_api;
> I would change the entry in the dpif structure from offload_api top offload_class.
Done.

Will send v18 for review.

Thanks,
Chris
>
>>       dpif->base_name = xstrdup(name);
>>       dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
>>       dpif->netflow_engine_type = netflow_engine_type;
>> -- 
>> 2.30.2
diff mbox series

Patch

diff --git a/lib/automake.mk b/lib/automake.mk
index 9259f57de..18cffa827 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -446,6 +446,7 @@  lib_libopenvswitch_la_SOURCES += \
 	lib/dpif-netlink.h \
 	lib/dpif-netlink-rtnl.c \
 	lib/dpif-netlink-rtnl.h \
+	lib/dpif-offload-netlink.c \
 	lib/if-notifier.c \
 	lib/netdev-linux.c \
 	lib/netdev-linux.h \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b078c2da5..3f6d2ead7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1593,7 +1593,8 @@  create_dpif_netdev(struct dp_netdev *dp)
     ovs_refcount_ref(&dp->ref_cnt);
 
     dpif = xmalloc(sizeof *dpif);
-    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
+    dpif_init(&dpif->dpif, dp->class, NULL, dp->name, netflow_id >> 8,
+              netflow_id);
     dpif->dp = dp;
     dpif->last_port_seq = seq_read(dp->port_seq);
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 5f4b60c5a..4dea2f76c 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -370,6 +370,12 @@  dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
 
     if (create) {
         dp_request.cmd = OVS_DP_CMD_NEW;
+        error = dpif_offload_netlink_init();
+        if (error) {
+            VLOG_WARN("Failed to initialize netlink offload datapath: %s",
+                      ovs_strerror(error));
+            return error;
+        }
     } else {
         dp_request.cmd = OVS_DP_CMD_GET;
 
@@ -460,8 +466,8 @@  open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
     dpif->port_notifier = NULL;
     fat_rwlock_init(&dpif->upcall_lock);
 
-    dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
-              dp->dp_ifindex, dp->dp_ifindex);
+    dpif_init(&dpif->dpif, &dpif_netlink_class, &dpif_offload_netlink,
+              dp->name, dp->dp_ifindex, dp->dp_ifindex);
 
     dpif->dp_ifindex = dp->dp_ifindex;
     dpif->user_features = dp->user_features;
@@ -732,6 +738,7 @@  dpif_netlink_destroy(struct dpif *dpif_)
     struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
     struct dpif_netlink_dp dp;
 
+    dpif_offload_netlink_destroy();
     dpif_netlink_dp_init(&dp);
     dp.cmd = OVS_DP_CMD_DEL;
     dp.dp_ifindex = dpif->dp_ifindex;
diff --git a/lib/dpif-offload-netlink.c b/lib/dpif-offload-netlink.c
new file mode 100644
index 000000000..7f53ed582
--- /dev/null
+++ b/lib/dpif-offload-netlink.c
@@ -0,0 +1,207 @@ 
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <linux/psample.h>
+#include <sys/poll.h>
+
+#include "dpif-offload-provider.h"
+#include "netdev-offload.h"
+#include "netlink-protocol.h"
+#include "netlink-socket.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink);
+
+static struct nl_sock *psample_sock;
+static int psample_family;
+
+/* Receive psample netlink message and save the attributes. */
+struct offload_psample {
+    struct nlattr *packet;      /* Packet data. */
+    int dp_group_id;            /* Mapping id for sFlow offload. */
+    int iifindex;               /* Input ifindex. */
+};
+
+/* In order to keep compatibility with kernels without psample module,
+ * return success even if psample is not initialized successfully. */
+int
+dpif_offload_netlink_init(void)
+{
+    unsigned int psample_mcgroup;
+    int err;
+
+    if (!netdev_is_flow_api_enabled()) {
+        VLOG_DBG("Flow API is not enabled.");
+        return 0;
+    }
+
+    if (psample_sock) {
+        VLOG_DBG("Psample socket is already initialized.");
+        return 0;
+    }
+
+    err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
+                                &psample_family);
+    if (err) {
+        VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n"
+                  "Please make sure the kernel module psample is loaded.",
+                  PSAMPLE_GENL_NAME, ovs_strerror(err));
+        return 0;
+    }
+
+    err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
+                                 PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+                                 &psample_mcgroup);
+    if (err) {
+        VLOG_WARN("Failed to join Netlink multicast group '%s': %s",
+                  PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
+        return 0;
+    }
+
+    err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
+    if (err) {
+        VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err));
+        return 0;
+    }
+
+    err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
+    if (err) {
+        VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err));
+        nl_sock_destroy(psample_sock);
+        psample_sock = NULL;
+        return 0;
+    }
+
+    return 0;
+}
+
+void
+dpif_offload_netlink_destroy(void)
+{
+    if (!psample_sock) {
+        return;
+    }
+
+    nl_sock_destroy(psample_sock);
+    psample_sock = NULL;
+}
+
+static void
+dpif_offload_netlink_sflow_recv_wait(void)
+{
+    if (psample_sock) {
+        nl_sock_wait(psample_sock, POLLIN);
+    }
+}
+
+static int
+psample_from_ofpbuf(struct offload_psample *psample,
+                    const struct ofpbuf *buf)
+{
+    static const struct nl_policy ovs_psample_policy[] = {
+        [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 },
+        [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
+        [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
+        [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
+    };
+    struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
+    struct genlmsghdr *genl;
+    struct nlmsghdr *nlmsg;
+    struct ofpbuf b;
+
+    b = ofpbuf_const_initializer(buf->data, buf->size);
+    nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    genl = ofpbuf_try_pull(&b, sizeof *genl);
+    if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
+        || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
+                            ARRAY_SIZE(ovs_psample_policy))) {
+        return EINVAL;
+    }
+
+    psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
+    psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
+    psample->packet = a[PSAMPLE_ATTR_DATA];
+
+    return 0;
+}
+
+static int
+psample_parse_packet(struct offload_psample *psample,
+                     struct dpif_offload_sflow *sflow)
+{
+    dp_packet_use_stub(&sflow->packet,
+                       CONST_CAST(struct nlattr *,
+                                  nl_attr_get(psample->packet)) - 1,
+                       nl_attr_get_size(psample->packet) +
+                       sizeof(struct nlattr));
+    dp_packet_set_data(&sflow->packet,
+                       (char *) dp_packet_data(&sflow->packet) +
+                       sizeof(struct nlattr));
+    dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet));
+
+    sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id);
+    if (!sflow->attr) {
+        return ENOENT;
+    }
+    sflow->iifindex = psample->iifindex;
+
+    return 0;
+}
+
+static int
+dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow)
+{
+    if (!psample_sock) {
+        return ENOENT;
+    }
+
+    for (;;) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        struct offload_psample psample;
+        uint64_t buf_stub[4096 / 8];
+        struct ofpbuf buf;
+        int error;
+
+        ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub);
+        error = nl_sock_recv(psample_sock, &buf, NULL, false);
+
+        if (!error) {
+            error = psample_from_ofpbuf(&psample, &buf);
+            if (!error) {
+                    ofpbuf_uninit(&buf);
+                    error = psample_parse_packet(&psample, sflow);
+                    return error;
+            }
+        } else if (error != EAGAIN) {
+            VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).",
+                         ovs_strerror(error));
+            nl_sock_drain(psample_sock);
+            error = ENOBUFS;
+        }
+
+        ofpbuf_uninit(&buf);
+        if (error) {
+            return error;
+        }
+    }
+}
+
+const struct dpif_offload_api dpif_offload_netlink = {
+    .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait,
+    .sflow_recv = dpif_offload_netlink_sflow_recv,
+};
diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h
index 5b4726ab6..5ac419516 100644
--- a/lib/dpif-offload-provider.h
+++ b/lib/dpif-offload-provider.h
@@ -17,12 +17,12 @@ 
 #ifndef DPIF_OFFLOAD_PROVIDER_H
 #define DPIF_OFFLOAD_PROVIDER_H
 
+#include "dp-packet.h"
 #include "netlink-protocol.h"
 #include "openvswitch/packets.h"
 #include "openvswitch/types.h"
 
 struct dpif;
-struct dpif_offload_sflow;
 
 /* When offloading sample action, userspace creates a unique ID to map
  * sFlow action and tunnel info and passes this ID to datapath instead
@@ -37,6 +37,13 @@  struct dpif_sflow_attr {
     ovs_u128 ufid;                  /* Flow ufid. */
 };
 
+/* Parse the specific dpif message to sFlow. So OVS can process it. */
+struct dpif_offload_sflow {
+    struct dp_packet packet;            /* Packet data. */
+    uint32_t iifindex;                  /* Input ifindex. */
+    const struct dpif_sflow_attr *attr; /* SFlow attribute. */
+};
+
 /* Datapath interface offload structure, to be defined by each implementation
  * of a datapath interface.
  */
@@ -54,4 +61,10 @@  void dpif_offload_sflow_recv_wait(const struct dpif *dpif);
 int dpif_offload_sflow_recv(const struct dpif *dpif,
                             struct dpif_offload_sflow *sflow);
 
+#ifdef __linux__
+extern const struct dpif_offload_api dpif_offload_netlink;
+int dpif_offload_netlink_init(void);
+void dpif_offload_netlink_destroy(void);
+#endif
+
 #endif /* DPIF_OFFLOAD_PROVIDER_H */
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 15a0d2b63..86898d838 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -47,7 +47,8 @@  struct dpif {
 struct dpif_ipf_status;
 struct ipf_dump_ctx;
 
-void dpif_init(struct dpif *, const struct dpif_class *, const char *name,
+void dpif_init(struct dpif *, const struct dpif_class *,
+               const struct dpif_offload_api *offload_api, const char *name,
                uint8_t netflow_engine_type, uint8_t netflow_engine_id);
 void dpif_uninit(struct dpif *dpif, bool close);
 
diff --git a/lib/dpif.c b/lib/dpif.c
index 8c4aed47b..09ffa18e3 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1680,10 +1680,11 @@  dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
 
 void
 dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
-          const char *name,
+          const struct dpif_offload_api *offload_api, const char *name,
           uint8_t netflow_engine_type, uint8_t netflow_engine_id)
 {
     dpif->dpif_class = dpif_class;
+    dpif->offload_api = offload_api;
     dpif->base_name = xstrdup(name);
     dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
     dpif->netflow_engine_type = netflow_engine_type;