diff mbox series

[ovs-dev,v21,5/8] dpif-offload-netlink: Implement dpif-offload-provider API

Message ID 20220317011249.46787-6-cmi@nvidia.com
State Changes Requested
Headers show
Series Add offload support for sFlow | expand

Checks

Context Check Description
ovsrobot/apply-robot success apply and check: success
ovsrobot/github-robot-_Build_and_Test success github build: passed
ovsrobot/intel-ovs-compilation success test: success

Commit Message

Chris Mi March 17, 2022, 1:12 a.m. UTC
Implement dpif-offload API for netlink datapath.

Signed-off-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Eli Britstein <elibr@nvidia.com>
---
 lib/automake.mk             |   1 +
 lib/dpif-netdev.c           |   3 +-
 lib/dpif-netlink.c          |  20 +++-
 lib/dpif-offload-netlink.c  | 221 ++++++++++++++++++++++++++++++++++++
 lib/dpif-offload-provider.h |  25 +++-
 lib/dpif-offload.c          | 104 +++++++++++++++++
 lib/dpif-provider.h         |   6 +-
 lib/dpif.c                  |   3 +-
 8 files changed, 375 insertions(+), 8 deletions(-)
 create mode 100644 lib/dpif-offload-netlink.c
diff mbox series

Patch

diff --git a/lib/automake.mk b/lib/automake.mk
index 781fba47a..b4cff234a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -453,6 +453,7 @@  lib_libopenvswitch_la_SOURCES += \
 	lib/dpif-netlink.h \
 	lib/dpif-netlink-rtnl.c \
 	lib/dpif-netlink-rtnl.h \
+	lib/dpif-offload-netlink.c \
 	lib/if-notifier.c \
 	lib/netdev-linux.c \
 	lib/netdev-linux.h \
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 9f35713ef..b0ace7f1e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1684,7 +1684,8 @@  create_dpif_netdev(struct dp_netdev *dp)
     ovs_refcount_ref(&dp->ref_cnt);
 
     dpif = xmalloc(sizeof *dpif);
-    dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id);
+    dpif_init(&dpif->dpif, dp->class, NULL, dp->name, netflow_id >> 8,
+              netflow_id);
     dpif->dp = dp;
     dpif->last_port_seq = seq_read(dp->port_seq);
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 71e35ccdd..f00159063 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -455,20 +455,33 @@  dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name,
 static int
 open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp)
 {
+    struct registered_dpif_offload_class *registered_offload_class;
+    const char *type = dpif_netlink_class.type;
     struct dpif_netlink *dpif;
+    int error = 0;
 
     dpif = xzalloc(sizeof *dpif);
     dpif->port_notifier = NULL;
     fat_rwlock_init(&dpif->upcall_lock);
 
-    dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name,
-              dp->dp_ifindex, dp->dp_ifindex);
+    dp_offload_initialize();
+    registered_offload_class = dp_offload_class_lookup(type);
+    if (!registered_offload_class) {
+        VLOG_WARN("Could not find offload class for type %s", type);
+        error = EAFNOSUPPORT;
+        goto exit;
+    }
+
+    dpif_init(&dpif->dpif, &dpif_netlink_class,
+              registered_offload_class->offload_class,
+              dp->name, dp->dp_ifindex, dp->dp_ifindex);
 
     dpif->dp_ifindex = dp->dp_ifindex;
     dpif->user_features = dp->user_features;
     *dpifp = &dpif->dpif;
 
-    return 0;
+exit:
+    return error;
 }
 
 #ifdef _WIN32
@@ -713,6 +726,7 @@  dpif_netlink_close(struct dpif *dpif_)
 {
     struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
 
+    dpif_offload_close(dpif_);
     nl_sock_destroy(dpif->port_notifier);
 
     fat_rwlock_wrlock(&dpif->upcall_lock);
diff --git a/lib/dpif-offload-netlink.c b/lib/dpif-offload-netlink.c
new file mode 100644
index 000000000..02aea7e2d
--- /dev/null
+++ b/lib/dpif-offload-netlink.c
@@ -0,0 +1,221 @@ 
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <errno.h>
+#include <linux/psample.h>
+#include <sys/poll.h>
+
+#include "dpif-offload-provider.h"
+#include "netdev-offload.h"
+#include "netlink-protocol.h"
+#include "netlink-socket.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink);
+
+static struct nl_sock *psample_sock;
+static int psample_family;
+
+/* Receive psample netlink message and save the attributes. */
+struct offload_psample {
+    struct nlattr *packet;      /* Packet data. */
+    int dp_group_id;            /* Mapping id for sFlow offload. */
+    int iifindex;               /* Input ifindex. */
+};
+
+/* In order to keep compatibility with kernels without psample module,
+ * return success even if psample is not initialized successfully. */
+static void
+psample_init(void)
+{
+    unsigned int psample_mcgroup;
+    int err;
+
+    if (!netdev_is_flow_api_enabled()) {
+        VLOG_DBG("Flow API is not enabled.");
+        return;
+    }
+
+    if (psample_sock) {
+        VLOG_DBG("Psample socket is already initialized.");
+        return;
+    }
+
+    err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
+                                &psample_family);
+    if (err) {
+        VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n"
+                  "Please make sure the kernel module psample is loaded.",
+                  PSAMPLE_GENL_NAME, ovs_strerror(err));
+        return;
+    }
+
+    err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
+                                 PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+                                 &psample_mcgroup);
+    if (err) {
+        VLOG_WARN("Failed to join Netlink multicast group '%s': %s",
+                  PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
+        return;
+    }
+
+    err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
+    if (err) {
+        VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err));
+        return;
+    }
+
+    err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
+    if (err) {
+        VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err));
+        nl_sock_destroy(psample_sock);
+        psample_sock = NULL;
+        return;
+    }
+}
+
+static int
+dpif_offload_netlink_init(void)
+{
+    psample_init();
+
+    return 0;
+}
+
+static void
+psample_destroy(void)
+{
+    if (!psample_sock) {
+        return;
+    }
+
+    nl_sock_destroy(psample_sock);
+    psample_sock = NULL;
+}
+
+static void
+dpif_offload_netlink_destroy(void)
+{
+    psample_destroy();
+}
+
+static void
+dpif_offload_netlink_sflow_recv_wait(void)
+{
+    if (psample_sock) {
+        nl_sock_wait(psample_sock, POLLIN);
+    }
+}
+
+static int
+psample_from_ofpbuf(struct offload_psample *psample,
+                    const struct ofpbuf *buf)
+{
+    static const struct nl_policy ovs_psample_policy[] = {
+        [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 },
+        [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
+        [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 },
+        [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
+    };
+    struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
+    struct genlmsghdr *genl;
+    struct nlmsghdr *nlmsg;
+    struct ofpbuf b;
+
+    b = ofpbuf_const_initializer(buf->data, buf->size);
+    nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+    genl = ofpbuf_try_pull(&b, sizeof *genl);
+    if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
+        || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
+                            ARRAY_SIZE(ovs_psample_policy))) {
+        return EINVAL;
+    }
+
+    psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]);
+    psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
+    psample->packet = a[PSAMPLE_ATTR_DATA];
+
+    return 0;
+}
+
+static int
+psample_parse_packet(struct offload_psample *psample,
+                     struct dpif_offload_sflow *sflow)
+{
+    dp_packet_use_stub(&sflow->packet,
+                       CONST_CAST(struct nlattr *,
+                                  nl_attr_get(psample->packet)) - 1,
+                       nl_attr_get_size(psample->packet) +
+                       sizeof(struct nlattr));
+    dp_packet_set_data(&sflow->packet,
+                       (char *) dp_packet_data(&sflow->packet) +
+                       sizeof(struct nlattr));
+    dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet));
+
+    sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id);
+    if (!sflow->attr) {
+        return ENOENT;
+    }
+    sflow->iifindex = psample->iifindex;
+
+    return 0;
+}
+
+static int
+dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow)
+{
+    if (!psample_sock) {
+        return ENOENT;
+    }
+
+    for (;;) {
+        static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
+        struct offload_psample psample;
+        struct ofpbuf buf;
+        int error;
+
+        ofpbuf_use_stub(&buf, sflow->buf_stub, sizeof sflow->buf_stub);
+        error = nl_sock_recv(psample_sock, &buf, NULL, false);
+
+        if (!error) {
+            error = psample_from_ofpbuf(&psample, &buf);
+            if (!error) {
+                    ofpbuf_uninit(&buf);
+                    error = psample_parse_packet(&psample, sflow);
+                    return error;
+            }
+        } else if (error != EAGAIN) {
+            VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).",
+                         ovs_strerror(error));
+            nl_sock_drain(psample_sock);
+            error = ENOBUFS;
+        }
+
+        ofpbuf_uninit(&buf);
+        if (error) {
+            return error;
+        }
+    }
+}
+
+const struct dpif_offload_class dpif_offload_netlink_class = {
+    .type = "system",
+    .init = dpif_offload_netlink_init,
+    .destroy = dpif_offload_netlink_destroy,
+    .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait,
+    .sflow_recv = dpif_offload_netlink_sflow_recv,
+};
diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h
index af49eedb9..51a374868 100644
--- a/lib/dpif-offload-provider.h
+++ b/lib/dpif-offload-provider.h
@@ -17,12 +17,20 @@ 
 #ifndef DPIF_OFFLOAD_PROVIDER_H
 #define DPIF_OFFLOAD_PROVIDER_H
 
+#include "dp-packet.h"
 #include "netlink-protocol.h"
 #include "openvswitch/packets.h"
 #include "openvswitch/types.h"
 
 struct dpif;
-struct dpif_offload_sflow;
+struct registered_dpif_offload_class {
+    const struct dpif_offload_class *offload_class;
+    int refcount;
+};
+
+#ifdef __linux__
+extern const struct dpif_offload_class dpif_offload_netlink_class;
+#endif
 
 /* When offloading sample action, userspace creates a unique ID to map
  * sFlow action and tunnel info and passes this ID to datapath instead
@@ -37,6 +45,14 @@  struct dpif_sflow_attr {
     ovs_u128 ufid;                  /* Flow ufid. */
 };
 
+/* Parse the specific dpif message to sFlow. So OVS can process it. */
+struct dpif_offload_sflow {
+    struct dp_packet packet;            /* Packet data. */
+    uint64_t buf_stub[4096 / 8];        /* Buffer stub for packet data. */
+    uint32_t iifindex;                  /* Input ifindex. */
+    const struct dpif_sflow_attr *attr; /* SFlow attribute. */
+};
+
 /* Datapath interface offload structure, to be defined by each implementation
  * of a datapath interface.
  */
@@ -62,6 +78,13 @@  struct dpif_offload_class {
     int (*sflow_recv)(struct dpif_offload_sflow *sflow);
 };
 
+void dp_offload_initialize(void);
+void dpif_offload_close(struct dpif *);
+
+int dp_offload_register_provider(const struct dpif_offload_class *);
+void dp_offload_class_unref(struct registered_dpif_offload_class *rc);
+struct registered_dpif_offload_class *dp_offload_class_lookup(const char *);
+
 void dpif_offload_sflow_recv_wait(const struct dpif *dpif);
 int dpif_offload_sflow_recv(const struct dpif *dpif,
                             struct dpif_offload_sflow *sflow);
diff --git a/lib/dpif-offload.c b/lib/dpif-offload.c
index f2bf3e634..bea6a9e95 100644
--- a/lib/dpif-offload.c
+++ b/lib/dpif-offload.c
@@ -18,6 +18,110 @@ 
 #include <errno.h>
 
 #include "dpif-provider.h"
+#include "openvswitch/shash.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(dpif_offload);
+
+static const struct dpif_offload_class *base_dpif_offload_classes[] = {
+#if defined(__linux__)
+    &dpif_offload_netlink_class,
+#endif
+};
+
+static struct shash dpif_offload_classes =
+    SHASH_INITIALIZER(&dpif_offload_classes);
+
+/* Protects 'dpif_offload_classes', including the refcount. */
+static struct ovs_mutex dpif_offload_mutex = OVS_MUTEX_INITIALIZER;
+
+void
+dp_offload_initialize(void)
+{
+    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+    if (ovsthread_once_start(&once)) {
+        for (int i = 0; i < ARRAY_SIZE(base_dpif_offload_classes); i++) {
+            dp_offload_register_provider(base_dpif_offload_classes[i]);
+        }
+        ovsthread_once_done(&once);
+    }
+}
+
+static int
+dp_offload_register_provider__(const struct dpif_offload_class *new_class)
+    OVS_REQUIRES(dpif_offload_mutex)
+{
+    struct registered_dpif_offload_class *registered_class;
+    int error;
+
+    if (shash_find(&dpif_offload_classes, new_class->type)) {
+        VLOG_WARN("Attempted to register duplicate datapath offload "
+                  "provider: %s", new_class->type);
+        return EEXIST;
+    }
+
+    error = new_class->init ? new_class->init() : 0;
+    if (error) {
+        VLOG_WARN("Failed to initialize %s datapath offload class: %s",
+                  new_class->type, ovs_strerror(error));
+        return error;
+    }
+
+    registered_class = xmalloc(sizeof *registered_class);
+    registered_class->offload_class = new_class;
+    registered_class->refcount = 0;
+
+    shash_add(&dpif_offload_classes, new_class->type, registered_class);
+
+    return 0;
+}
+
+void dpif_offload_close(struct dpif *dpif)
+{
+    if (dpif->offload_class) {
+        struct registered_dpif_offload_class *rc;
+
+        rc = shash_find_data(&dpif_offload_classes, dpif->offload_class->type);
+        dp_offload_class_unref(rc);
+    }
+}
+
+int
+dp_offload_register_provider(const struct dpif_offload_class *new_class)
+{
+    int error;
+
+    ovs_mutex_lock(&dpif_offload_mutex);
+    error = dp_offload_register_provider__(new_class);
+    ovs_mutex_unlock(&dpif_offload_mutex);
+
+    return error;
+}
+
+void
+dp_offload_class_unref(struct registered_dpif_offload_class *rc)
+{
+    ovs_mutex_lock(&dpif_offload_mutex);
+    ovs_assert(rc->refcount);
+    rc->refcount--;
+    ovs_mutex_unlock(&dpif_offload_mutex);
+}
+
+struct registered_dpif_offload_class *
+dp_offload_class_lookup(const char *type)
+{
+    struct registered_dpif_offload_class *rc;
+
+    ovs_mutex_lock(&dpif_offload_mutex);
+    rc = shash_find_data(&dpif_offload_classes, type);
+    if (rc) {
+        rc->refcount++;
+    }
+    ovs_mutex_unlock(&dpif_offload_mutex);
+
+    return rc;
+}
 
 void
 dpif_offload_sflow_recv_wait(const struct dpif *dpif)
diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h
index 99009722a..c8ed385a1 100644
--- a/lib/dpif-provider.h
+++ b/lib/dpif-provider.h
@@ -47,8 +47,10 @@  struct dpif {
 struct dpif_ipf_status;
 struct ipf_dump_ctx;
 
-void dpif_init(struct dpif *, const struct dpif_class *, const char *name,
-               uint8_t netflow_engine_type, uint8_t netflow_engine_id);
+void dpif_init(struct dpif *, const struct dpif_class *,
+               const struct dpif_offload_class *offload_class,
+               const char *name, uint8_t netflow_engine_type,
+               uint8_t netflow_engine_id);
 void dpif_uninit(struct dpif *dpif, bool close);
 
 static inline void dpif_assert_class(const struct dpif *dpif,
diff --git a/lib/dpif.c b/lib/dpif.c
index 40f5fe446..5fb8ee0a7 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -1699,10 +1699,11 @@  dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id,
 
 void
 dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class,
-          const char *name,
+          const struct dpif_offload_class *offload_class, const char *name,
           uint8_t netflow_engine_type, uint8_t netflow_engine_id)
 {
     dpif->dpif_class = dpif_class;
+    dpif->offload_class = offload_class;
     dpif->base_name = xstrdup(name);
     dpif->full_name = xasprintf("%s@%s", dpif_class->type, name);
     dpif->netflow_engine_type = netflow_engine_type;