Message ID | 20211021080103.657675-6-cmi@nvidia.com |
---|---|
State | Changes Requested |
Headers | show |
Series | Add offload support for sFlow | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | fail | github build: failed |
On 21 Oct 2021, at 10:01, Chris Mi wrote: > Implement dpif-offload API for netlink datapath. > > Signed-off-by: Chris Mi <cmi@nvidia.com> > Reviewed-by: Eli Britstein <elibr@nvidia.com> > Acked-by: Eelco Chaudron <echaudro@redhat.com> Chris, if you make changes to this patch after I have ACKed it, you need to remove the acked-by. See some comments inline below. > --- > lib/automake.mk | 1 + > lib/dpif-netdev.c | 3 +- > lib/dpif-netlink.c | 11 +- > lib/dpif-offload-netlink.c | 207 ++++++++++++++++++++++++++++++++++++ > lib/dpif-offload-provider.h | 15 ++- > lib/dpif-provider.h | 3 +- > lib/dpif.c | 3 +- > 7 files changed, 237 insertions(+), 6 deletions(-) > create mode 100644 lib/dpif-offload-netlink.c > > diff --git a/lib/automake.mk b/lib/automake.mk > index 9259f57de..18cffa827 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -446,6 +446,7 @@ lib_libopenvswitch_la_SOURCES += \ > lib/dpif-netlink.h \ > lib/dpif-netlink-rtnl.c \ > lib/dpif-netlink-rtnl.h \ > + lib/dpif-offload-netlink.c \ > lib/if-notifier.c \ > lib/netdev-linux.c \ > lib/netdev-linux.h \ > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index b078c2da5..3f6d2ead7 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -1593,7 +1593,8 @@ create_dpif_netdev(struct dp_netdev *dp) > ovs_refcount_ref(&dp->ref_cnt); > > dpif = xmalloc(sizeof *dpif); > - dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id); > + dpif_init(&dpif->dpif, dp->class, NULL, dp->name, netflow_id >> 8, > + netflow_id); > dpif->dp = dp; > dpif->last_port_seq = seq_read(dp->port_seq); > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index 5f4b60c5a..4dea2f76c 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -370,6 +370,12 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, > > if (create) { > dp_request.cmd = OVS_DP_CMD_NEW; > + error = dpif_offload_netlink_init(); I think the init should still be a callback into the dpif_offload_class, and when it gets registered the init function should be called. Not sure if it’s the correct place, but you could do something similar in dp_initialize() as done with dp_register_provider(). > + if (error) { > + VLOG_WARN("Failed to initialize netlink offload datapath: %s", > + ovs_strerror(error)); VLOG_ERR()? > + return error; > + } > } else { > dp_request.cmd = OVS_DP_CMD_GET; > > @@ -460,8 +466,8 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp) > dpif->port_notifier = NULL; > fat_rwlock_init(&dpif->upcall_lock); > > - dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name, > - dp->dp_ifindex, dp->dp_ifindex); > + dpif_init(&dpif->dpif, &dpif_netlink_class, &dpif_offload_netlink, Guess naming should me more offload class related, so maybe rename it to “dpif_offload_netlink_class”. > + dp->name, dp->dp_ifindex, dp->dp_ifindex); > > dpif->dp_ifindex = dp->dp_ifindex; > dpif->user_features = dp->user_features; > @@ -732,6 +738,7 @@ dpif_netlink_destroy(struct dpif *dpif_) > struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); > struct dpif_netlink_dp dp; > > + dpif_offload_netlink_destroy(); > dpif_netlink_dp_init(&dp); > dp.cmd = OVS_DP_CMD_DEL; > dp.dp_ifindex = dpif->dp_ifindex; > diff --git a/lib/dpif-offload-netlink.c b/lib/dpif-offload-netlink.c > new file mode 100644 > index 000000000..7f53ed582 > --- /dev/null > +++ b/lib/dpif-offload-netlink.c > @@ -0,0 +1,207 @@ > +/* > + * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#include <config.h> > +#include <errno.h> > +#include <linux/psample.h> > +#include <sys/poll.h> > + > +#include "dpif-offload-provider.h" > +#include "netdev-offload.h" > +#include "netlink-protocol.h" > +#include "netlink-socket.h" > +#include "openvswitch/vlog.h" > + > +VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink); > + > +static struct nl_sock *psample_sock; > +static int psample_family; > + > +/* Receive psample netlink message and save the attributes. */ > +struct offload_psample { > + struct nlattr *packet; /* Packet data. */ > + int dp_group_id; /* Mapping id for sFlow offload. */ > + int iifindex; /* Input ifindex. */ > +}; > + > +/* In order to keep compatibility with kernels without psample module, > + * return success even if psample is not initialized successfully. */ > +int > +dpif_offload_netlink_init(void) > +{ > + unsigned int psample_mcgroup; > + int err; > + > + if (!netdev_is_flow_api_enabled()) { > + VLOG_DBG("Flow API is not enabled."); > + return 0; > + } > + > + if (psample_sock) { > + VLOG_DBG("Psample socket is already initialized."); > + return 0; > + } > + > + err = nl_lookup_genl_family(PSAMPLE_GENL_NAME, > + &psample_family); > + if (err) { > + VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n" > + "Please make sure the kernel module psample is loaded.", > + PSAMPLE_GENL_NAME, ovs_strerror(err)); > + return 0; > + } > + > + err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME, > + PSAMPLE_NL_MCGRP_SAMPLE_NAME, > + &psample_mcgroup); > + if (err) { > + VLOG_WARN("Failed to join Netlink multicast group '%s': %s", > + PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err)); > + return 0; > + } > + > + err = nl_sock_create(NETLINK_GENERIC, &psample_sock); > + if (err) { > + VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err)); > + return 0; > + } > + > + err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup); > + if (err) { > + VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err)); > + nl_sock_destroy(psample_sock); > + psample_sock = NULL; > + return 0; > + } > + > + return 0; > +} > + > +void > +dpif_offload_netlink_destroy(void) > +{ > + if (!psample_sock) { > + return; > + } > + > + nl_sock_destroy(psample_sock); > + psample_sock = NULL; > +} > + > +static void > +dpif_offload_netlink_sflow_recv_wait(void) > +{ > + if (psample_sock) { > + nl_sock_wait(psample_sock, POLLIN); > + } > +} > + > +static int > +psample_from_ofpbuf(struct offload_psample *psample, > + const struct ofpbuf *buf) > +{ > + static const struct nl_policy ovs_psample_policy[] = { > + [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 }, > + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 }, > + [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 }, > + [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC }, > + }; > + struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)]; > + struct genlmsghdr *genl; > + struct nlmsghdr *nlmsg; > + struct ofpbuf b; > + > + b = ofpbuf_const_initializer(buf->data, buf->size); > + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); > + genl = ofpbuf_try_pull(&b, sizeof *genl); > + if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family > + || !nl_policy_parse(&b, 0, ovs_psample_policy, a, > + ARRAY_SIZE(ovs_psample_policy))) { > + return EINVAL; > + } > + > + psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]); > + psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]); > + psample->packet = a[PSAMPLE_ATTR_DATA]; > + > + return 0; > +} > + > +static int > +psample_parse_packet(struct offload_psample *psample, > + struct dpif_offload_sflow *sflow) > +{ > + dp_packet_use_stub(&sflow->packet, > + CONST_CAST(struct nlattr *, > + nl_attr_get(psample->packet)) - 1, > + nl_attr_get_size(psample->packet) + > + sizeof(struct nlattr)); > + dp_packet_set_data(&sflow->packet, > + (char *) dp_packet_data(&sflow->packet) + > + sizeof(struct nlattr)); > + dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet)); > + > + sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id); > + if (!sflow->attr) { > + return ENOENT; > + } > + sflow->iifindex = psample->iifindex; > + > + return 0; > +} > + > +static int > +dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow) > +{ > + if (!psample_sock) { > + return ENOENT; > + } > + > + for (;;) { > + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); > + struct offload_psample psample; > + uint64_t buf_stub[4096 / 8]; > + struct ofpbuf buf; > + int error; > + > + ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); > + error = nl_sock_recv(psample_sock, &buf, NULL, false); > + > + if (!error) { > + error = psample_from_ofpbuf(&psample, &buf); > + if (!error) { > + ofpbuf_uninit(&buf); > + error = psample_parse_packet(&psample, sflow); > + return error; > + } > + } else if (error != EAGAIN) { > + VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).", > + ovs_strerror(error)); > + nl_sock_drain(psample_sock); > + error = ENOBUFS; > + } > + > + ofpbuf_uninit(&buf); > + if (error) { > + return error; > + } > + } > +} > + > +const struct dpif_offload_api dpif_offload_netlink = { Think the structure definition for dpif_offload_api should change to dpif_offload_class. And this structure to dpif_offload_netlink_class, see also previous comment. > + .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait, > + .sflow_recv = dpif_offload_netlink_sflow_recv, > +}; > diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h > index 5b4726ab6..5ac419516 100644 > --- a/lib/dpif-offload-provider.h > +++ b/lib/dpif-offload-provider.h > @@ -17,12 +17,12 @@ > #ifndef DPIF_OFFLOAD_PROVIDER_H > #define DPIF_OFFLOAD_PROVIDER_H > > +#include "dp-packet.h" > #include "netlink-protocol.h" > #include "openvswitch/packets.h" > #include "openvswitch/types.h" > > struct dpif; > -struct dpif_offload_sflow; > > /* When offloading sample action, userspace creates a unique ID to map > * sFlow action and tunnel info and passes this ID to datapath instead > @@ -37,6 +37,13 @@ struct dpif_sflow_attr { > ovs_u128 ufid; /* Flow ufid. */ > }; > > +/* Parse the specific dpif message to sFlow. So OVS can process it. */ > +struct dpif_offload_sflow { > + struct dp_packet packet; /* Packet data. */ > + uint32_t iifindex; /* Input ifindex. */ > + const struct dpif_sflow_attr *attr; /* SFlow attribute. */ > +}; > + > /* Datapath interface offload structure, to be defined by each implementation > * of a datapath interface. > */ > @@ -54,4 +61,10 @@ void dpif_offload_sflow_recv_wait(const struct dpif *dpif); > int dpif_offload_sflow_recv(const struct dpif *dpif, > struct dpif_offload_sflow *sflow); > > +#ifdef __linux__ > +extern const struct dpif_offload_api dpif_offload_netlink; > +int dpif_offload_netlink_init(void); > +void dpif_offload_netlink_destroy(void); > +#endif > + > #endif /* DPIF_OFFLOAD_PROVIDER_H */ > diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h > index 15a0d2b63..86898d838 100644 > --- a/lib/dpif-provider.h > +++ b/lib/dpif-provider.h > @@ -47,7 +47,8 @@ struct dpif { > struct dpif_ipf_status; > struct ipf_dump_ctx; > > -void dpif_init(struct dpif *, const struct dpif_class *, const char *name, > +void dpif_init(struct dpif *, const struct dpif_class *, > + const struct dpif_offload_api *offload_api, const char *name, > uint8_t netflow_engine_type, uint8_t netflow_engine_id); > void dpif_uninit(struct dpif *dpif, bool close); > > diff --git a/lib/dpif.c b/lib/dpif.c > index 8c4aed47b..09ffa18e3 100644 > --- a/lib/dpif.c > +++ b/lib/dpif.c > @@ -1680,10 +1680,11 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id, > > void > dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class, > - const char *name, > + const struct dpif_offload_api *offload_api, const char *name, Change dpif_offload_api to dpif_offload_api *offload_class > uint8_t netflow_engine_type, uint8_t netflow_engine_id) > { > dpif->dpif_class = dpif_class; > + dpif->offload_api = offload_api; I would change the entry in the dpif structure from offload_api top offload_class. > dpif->base_name = xstrdup(name); > dpif->full_name = xasprintf("%s@%s", dpif_class->type, name); > dpif->netflow_engine_type = netflow_engine_type; > -- > 2.30.2
On 11/5/2021 9:24 PM, Eelco Chaudron wrote: > On 21 Oct 2021, at 10:01, Chris Mi wrote: > >> Implement dpif-offload API for netlink datapath. >> >> Signed-off-by: Chris Mi <cmi@nvidia.com> >> Reviewed-by: Eli Britstein <elibr@nvidia.com> >> Acked-by: Eelco Chaudron <echaudro@redhat.com> > Chris, if you make changes to this patch after I have ACKed it, you need to remove the acked-by. > See some comments inline below. OK, got it. Done. > >> --- >> lib/automake.mk | 1 + >> lib/dpif-netdev.c | 3 +- >> lib/dpif-netlink.c | 11 +- >> lib/dpif-offload-netlink.c | 207 ++++++++++++++++++++++++++++++++++++ >> lib/dpif-offload-provider.h | 15 ++- >> lib/dpif-provider.h | 3 +- >> lib/dpif.c | 3 +- >> 7 files changed, 237 insertions(+), 6 deletions(-) >> create mode 100644 lib/dpif-offload-netlink.c >> >> diff --git a/lib/automake.mk b/lib/automake.mk >> index 9259f57de..18cffa827 100644 >> --- a/lib/automake.mk >> +++ b/lib/automake.mk >> @@ -446,6 +446,7 @@ lib_libopenvswitch_la_SOURCES += \ >> lib/dpif-netlink.h \ >> lib/dpif-netlink-rtnl.c \ >> lib/dpif-netlink-rtnl.h \ >> + lib/dpif-offload-netlink.c \ >> lib/if-notifier.c \ >> lib/netdev-linux.c \ >> lib/netdev-linux.h \ >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index b078c2da5..3f6d2ead7 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -1593,7 +1593,8 @@ create_dpif_netdev(struct dp_netdev *dp) >> ovs_refcount_ref(&dp->ref_cnt); >> >> dpif = xmalloc(sizeof *dpif); >> - dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id); >> + dpif_init(&dpif->dpif, dp->class, NULL, dp->name, netflow_id >> 8, >> + netflow_id); >> dpif->dp = dp; >> dpif->last_port_seq = seq_read(dp->port_seq); >> >> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c >> index 5f4b60c5a..4dea2f76c 100644 >> --- a/lib/dpif-netlink.c >> +++ b/lib/dpif-netlink.c >> @@ -370,6 +370,12 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, >> >> if (create) { >> dp_request.cmd = OVS_DP_CMD_NEW; >> + error = dpif_offload_netlink_init(); > I think the init should still be a callback into the dpif_offload_class, and when it gets registered the init function should be called. > Not sure if it’s the correct place, but you could do something similar in dp_initialize() as done with dp_register_provider(). Done. > >> + if (error) { >> + VLOG_WARN("Failed to initialize netlink offload datapath: %s", >> + ovs_strerror(error)); > VLOG_ERR()? Since the init is done in registration, this code is not needed now. > >> + return error; >> + } >> } else { >> dp_request.cmd = OVS_DP_CMD_GET; >> >> @@ -460,8 +466,8 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp) >> dpif->port_notifier = NULL; >> fat_rwlock_init(&dpif->upcall_lock); >> >> - dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name, >> - dp->dp_ifindex, dp->dp_ifindex); >> + dpif_init(&dpif->dpif, &dpif_netlink_class, &dpif_offload_netlink, > Guess naming should me more offload class related, so maybe rename it to “dpif_offload_netlink_class”. Done. > >> + dp->name, dp->dp_ifindex, dp->dp_ifindex); >> >> dpif->dp_ifindex = dp->dp_ifindex; >> dpif->user_features = dp->user_features; >> @@ -732,6 +738,7 @@ dpif_netlink_destroy(struct dpif *dpif_) >> struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); >> struct dpif_netlink_dp dp; >> >> + dpif_offload_netlink_destroy(); >> dpif_netlink_dp_init(&dp); >> dp.cmd = OVS_DP_CMD_DEL; >> dp.dp_ifindex = dpif->dp_ifindex; >> diff --git a/lib/dpif-offload-netlink.c b/lib/dpif-offload-netlink.c >> new file mode 100644 >> index 000000000..7f53ed582 >> --- /dev/null >> +++ b/lib/dpif-offload-netlink.c >> @@ -0,0 +1,207 @@ >> +/* >> + * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. >> + * >> + * Licensed under the Apache License, Version 2.0 (the "License"); >> + * you may not use this file except in compliance with the License. >> + * You may obtain a copy of the License at: >> + * >> + * http://www.apache.org/licenses/LICENSE-2.0 >> + * >> + * Unless required by applicable law or agreed to in writing, software >> + * distributed under the License is distributed on an "AS IS" BASIS, >> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. >> + * See the License for the specific language governing permissions and >> + * limitations under the License. >> + */ >> + >> +#include <config.h> >> +#include <errno.h> >> +#include <linux/psample.h> >> +#include <sys/poll.h> >> + >> +#include "dpif-offload-provider.h" >> +#include "netdev-offload.h" >> +#include "netlink-protocol.h" >> +#include "netlink-socket.h" >> +#include "openvswitch/vlog.h" >> + >> +VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink); >> + >> +static struct nl_sock *psample_sock; >> +static int psample_family; >> + >> +/* Receive psample netlink message and save the attributes. */ >> +struct offload_psample { >> + struct nlattr *packet; /* Packet data. */ >> + int dp_group_id; /* Mapping id for sFlow offload. */ >> + int iifindex; /* Input ifindex. */ >> +}; >> + >> +/* In order to keep compatibility with kernels without psample module, >> + * return success even if psample is not initialized successfully. */ >> +int >> +dpif_offload_netlink_init(void) >> +{ >> + unsigned int psample_mcgroup; >> + int err; >> + >> + if (!netdev_is_flow_api_enabled()) { >> + VLOG_DBG("Flow API is not enabled."); >> + return 0; >> + } >> + >> + if (psample_sock) { >> + VLOG_DBG("Psample socket is already initialized."); >> + return 0; >> + } >> + >> + err = nl_lookup_genl_family(PSAMPLE_GENL_NAME, >> + &psample_family); >> + if (err) { >> + VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n" >> + "Please make sure the kernel module psample is loaded.", >> + PSAMPLE_GENL_NAME, ovs_strerror(err)); >> + return 0; >> + } >> + >> + err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME, >> + PSAMPLE_NL_MCGRP_SAMPLE_NAME, >> + &psample_mcgroup); >> + if (err) { >> + VLOG_WARN("Failed to join Netlink multicast group '%s': %s", >> + PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err)); >> + return 0; >> + } >> + >> + err = nl_sock_create(NETLINK_GENERIC, &psample_sock); >> + if (err) { >> + VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err)); >> + return 0; >> + } >> + >> + err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup); >> + if (err) { >> + VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err)); >> + nl_sock_destroy(psample_sock); >> + psample_sock = NULL; >> + return 0; >> + } >> + >> + return 0; >> +} >> + >> +void >> +dpif_offload_netlink_destroy(void) >> +{ >> + if (!psample_sock) { >> + return; >> + } >> + >> + nl_sock_destroy(psample_sock); >> + psample_sock = NULL; >> +} >> + >> +static void >> +dpif_offload_netlink_sflow_recv_wait(void) >> +{ >> + if (psample_sock) { >> + nl_sock_wait(psample_sock, POLLIN); >> + } >> +} >> + >> +static int >> +psample_from_ofpbuf(struct offload_psample *psample, >> + const struct ofpbuf *buf) >> +{ >> + static const struct nl_policy ovs_psample_policy[] = { >> + [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 }, >> + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 }, >> + [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 }, >> + [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC }, >> + }; >> + struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)]; >> + struct genlmsghdr *genl; >> + struct nlmsghdr *nlmsg; >> + struct ofpbuf b; >> + >> + b = ofpbuf_const_initializer(buf->data, buf->size); >> + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); >> + genl = ofpbuf_try_pull(&b, sizeof *genl); >> + if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family >> + || !nl_policy_parse(&b, 0, ovs_psample_policy, a, >> + ARRAY_SIZE(ovs_psample_policy))) { >> + return EINVAL; >> + } >> + >> + psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]); >> + psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]); >> + psample->packet = a[PSAMPLE_ATTR_DATA]; >> + >> + return 0; >> +} >> + >> +static int >> +psample_parse_packet(struct offload_psample *psample, >> + struct dpif_offload_sflow *sflow) >> +{ >> + dp_packet_use_stub(&sflow->packet, >> + CONST_CAST(struct nlattr *, >> + nl_attr_get(psample->packet)) - 1, >> + nl_attr_get_size(psample->packet) + >> + sizeof(struct nlattr)); >> + dp_packet_set_data(&sflow->packet, >> + (char *) dp_packet_data(&sflow->packet) + >> + sizeof(struct nlattr)); >> + dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet)); >> + >> + sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id); >> + if (!sflow->attr) { >> + return ENOENT; >> + } >> + sflow->iifindex = psample->iifindex; >> + >> + return 0; >> +} >> + >> +static int >> +dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow) >> +{ >> + if (!psample_sock) { >> + return ENOENT; >> + } >> + >> + for (;;) { >> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); >> + struct offload_psample psample; >> + uint64_t buf_stub[4096 / 8]; >> + struct ofpbuf buf; >> + int error; >> + >> + ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); >> + error = nl_sock_recv(psample_sock, &buf, NULL, false); >> + >> + if (!error) { >> + error = psample_from_ofpbuf(&psample, &buf); >> + if (!error) { >> + ofpbuf_uninit(&buf); >> + error = psample_parse_packet(&psample, sflow); >> + return error; >> + } >> + } else if (error != EAGAIN) { >> + VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).", >> + ovs_strerror(error)); >> + nl_sock_drain(psample_sock); >> + error = ENOBUFS; >> + } >> + >> + ofpbuf_uninit(&buf); >> + if (error) { >> + return error; >> + } >> + } >> +} >> + >> +const struct dpif_offload_api dpif_offload_netlink = { > Think the structure definition for dpif_offload_api should change to dpif_offload_class. > And this structure to dpif_offload_netlink_class, see also previous comment. Done. > >> + .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait, >> + .sflow_recv = dpif_offload_netlink_sflow_recv, >> +}; >> diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h >> index 5b4726ab6..5ac419516 100644 >> --- a/lib/dpif-offload-provider.h >> +++ b/lib/dpif-offload-provider.h >> @@ -17,12 +17,12 @@ >> #ifndef DPIF_OFFLOAD_PROVIDER_H >> #define DPIF_OFFLOAD_PROVIDER_H >> >> +#include "dp-packet.h" >> #include "netlink-protocol.h" >> #include "openvswitch/packets.h" >> #include "openvswitch/types.h" >> >> struct dpif; >> -struct dpif_offload_sflow; >> >> /* When offloading sample action, userspace creates a unique ID to map >> * sFlow action and tunnel info and passes this ID to datapath instead >> @@ -37,6 +37,13 @@ struct dpif_sflow_attr { >> ovs_u128 ufid; /* Flow ufid. */ >> }; >> >> +/* Parse the specific dpif message to sFlow. So OVS can process it. */ >> +struct dpif_offload_sflow { >> + struct dp_packet packet; /* Packet data. */ >> + uint32_t iifindex; /* Input ifindex. */ >> + const struct dpif_sflow_attr *attr; /* SFlow attribute. */ >> +}; >> + >> /* Datapath interface offload structure, to be defined by each implementation >> * of a datapath interface. >> */ >> @@ -54,4 +61,10 @@ void dpif_offload_sflow_recv_wait(const struct dpif *dpif); >> int dpif_offload_sflow_recv(const struct dpif *dpif, >> struct dpif_offload_sflow *sflow); >> >> +#ifdef __linux__ >> +extern const struct dpif_offload_api dpif_offload_netlink; >> +int dpif_offload_netlink_init(void); >> +void dpif_offload_netlink_destroy(void); >> +#endif >> + >> #endif /* DPIF_OFFLOAD_PROVIDER_H */ >> diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h >> index 15a0d2b63..86898d838 100644 >> --- a/lib/dpif-provider.h >> +++ b/lib/dpif-provider.h >> @@ -47,7 +47,8 @@ struct dpif { >> struct dpif_ipf_status; >> struct ipf_dump_ctx; >> >> -void dpif_init(struct dpif *, const struct dpif_class *, const char *name, >> +void dpif_init(struct dpif *, const struct dpif_class *, >> + const struct dpif_offload_api *offload_api, const char *name, >> uint8_t netflow_engine_type, uint8_t netflow_engine_id); >> void dpif_uninit(struct dpif *dpif, bool close); >> >> diff --git a/lib/dpif.c b/lib/dpif.c >> index 8c4aed47b..09ffa18e3 100644 >> --- a/lib/dpif.c >> +++ b/lib/dpif.c >> @@ -1680,10 +1680,11 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id, >> >> void >> dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class, >> - const char *name, >> + const struct dpif_offload_api *offload_api, const char *name, > Change dpif_offload_api to dpif_offload_api *offload_class Done. >> uint8_t netflow_engine_type, uint8_t netflow_engine_id) >> { >> dpif->dpif_class = dpif_class; >> + dpif->offload_api = offload_api; > I would change the entry in the dpif structure from offload_api top offload_class. Done. Will send v18 for review. Thanks, Chris > >> dpif->base_name = xstrdup(name); >> dpif->full_name = xasprintf("%s@%s", dpif_class->type, name); >> dpif->netflow_engine_type = netflow_engine_type; >> -- >> 2.30.2
diff --git a/lib/automake.mk b/lib/automake.mk index 9259f57de..18cffa827 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -446,6 +446,7 @@ lib_libopenvswitch_la_SOURCES += \ lib/dpif-netlink.h \ lib/dpif-netlink-rtnl.c \ lib/dpif-netlink-rtnl.h \ + lib/dpif-offload-netlink.c \ lib/if-notifier.c \ lib/netdev-linux.c \ lib/netdev-linux.h \ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index b078c2da5..3f6d2ead7 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1593,7 +1593,8 @@ create_dpif_netdev(struct dp_netdev *dp) ovs_refcount_ref(&dp->ref_cnt); dpif = xmalloc(sizeof *dpif); - dpif_init(&dpif->dpif, dp->class, dp->name, netflow_id >> 8, netflow_id); + dpif_init(&dpif->dpif, dp->class, NULL, dp->name, netflow_id >> 8, + netflow_id); dpif->dp = dp; dpif->last_port_seq = seq_read(dp->port_seq); diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 5f4b60c5a..4dea2f76c 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -370,6 +370,12 @@ dpif_netlink_open(const struct dpif_class *class OVS_UNUSED, const char *name, if (create) { dp_request.cmd = OVS_DP_CMD_NEW; + error = dpif_offload_netlink_init(); + if (error) { + VLOG_WARN("Failed to initialize netlink offload datapath: %s", + ovs_strerror(error)); + return error; + } } else { dp_request.cmd = OVS_DP_CMD_GET; @@ -460,8 +466,8 @@ open_dpif(const struct dpif_netlink_dp *dp, struct dpif **dpifp) dpif->port_notifier = NULL; fat_rwlock_init(&dpif->upcall_lock); - dpif_init(&dpif->dpif, &dpif_netlink_class, dp->name, - dp->dp_ifindex, dp->dp_ifindex); + dpif_init(&dpif->dpif, &dpif_netlink_class, &dpif_offload_netlink, + dp->name, dp->dp_ifindex, dp->dp_ifindex); dpif->dp_ifindex = dp->dp_ifindex; dpif->user_features = dp->user_features; @@ -732,6 +738,7 @@ dpif_netlink_destroy(struct dpif *dpif_) struct dpif_netlink *dpif = dpif_netlink_cast(dpif_); struct dpif_netlink_dp dp; + dpif_offload_netlink_destroy(); dpif_netlink_dp_init(&dp); dp.cmd = OVS_DP_CMD_DEL; dp.dp_ifindex = dpif->dp_ifindex; diff --git a/lib/dpif-offload-netlink.c b/lib/dpif-offload-netlink.c new file mode 100644 index 000000000..7f53ed582 --- /dev/null +++ b/lib/dpif-offload-netlink.c @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include <errno.h> +#include <linux/psample.h> +#include <sys/poll.h> + +#include "dpif-offload-provider.h" +#include "netdev-offload.h" +#include "netlink-protocol.h" +#include "netlink-socket.h" +#include "openvswitch/vlog.h" + +VLOG_DEFINE_THIS_MODULE(dpif_offload_netlink); + +static struct nl_sock *psample_sock; +static int psample_family; + +/* Receive psample netlink message and save the attributes. */ +struct offload_psample { + struct nlattr *packet; /* Packet data. */ + int dp_group_id; /* Mapping id for sFlow offload. */ + int iifindex; /* Input ifindex. */ +}; + +/* In order to keep compatibility with kernels without psample module, + * return success even if psample is not initialized successfully. */ +int +dpif_offload_netlink_init(void) +{ + unsigned int psample_mcgroup; + int err; + + if (!netdev_is_flow_api_enabled()) { + VLOG_DBG("Flow API is not enabled."); + return 0; + } + + if (psample_sock) { + VLOG_DBG("Psample socket is already initialized."); + return 0; + } + + err = nl_lookup_genl_family(PSAMPLE_GENL_NAME, + &psample_family); + if (err) { + VLOG_WARN("Generic Netlink family '%s' does not exist: %s\n" + "Please make sure the kernel module psample is loaded.", + PSAMPLE_GENL_NAME, ovs_strerror(err)); + return 0; + } + + err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME, + PSAMPLE_NL_MCGRP_SAMPLE_NAME, + &psample_mcgroup); + if (err) { + VLOG_WARN("Failed to join Netlink multicast group '%s': %s", + PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err)); + return 0; + } + + err = nl_sock_create(NETLINK_GENERIC, &psample_sock); + if (err) { + VLOG_WARN("Failed to create psample socket: %s", ovs_strerror(err)); + return 0; + } + + err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup); + if (err) { + VLOG_WARN("Failed to join psample mcgroup: %s", ovs_strerror(err)); + nl_sock_destroy(psample_sock); + psample_sock = NULL; + return 0; + } + + return 0; +} + +void +dpif_offload_netlink_destroy(void) +{ + if (!psample_sock) { + return; + } + + nl_sock_destroy(psample_sock); + psample_sock = NULL; +} + +static void +dpif_offload_netlink_sflow_recv_wait(void) +{ + if (psample_sock) { + nl_sock_wait(psample_sock, POLLIN); + } +} + +static int +psample_from_ofpbuf(struct offload_psample *psample, + const struct ofpbuf *buf) +{ + static const struct nl_policy ovs_psample_policy[] = { + [PSAMPLE_ATTR_IIFINDEX] = { .type = NL_A_U16 }, + [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 }, + [PSAMPLE_ATTR_GROUP_SEQ] = { .type = NL_A_U32 }, + [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC }, + }; + struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)]; + struct genlmsghdr *genl; + struct nlmsghdr *nlmsg; + struct ofpbuf b; + + b = ofpbuf_const_initializer(buf->data, buf->size); + nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg); + genl = ofpbuf_try_pull(&b, sizeof *genl); + if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family + || !nl_policy_parse(&b, 0, ovs_psample_policy, a, + ARRAY_SIZE(ovs_psample_policy))) { + return EINVAL; + } + + psample->iifindex = nl_attr_get_u16(a[PSAMPLE_ATTR_IIFINDEX]); + psample->dp_group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]); + psample->packet = a[PSAMPLE_ATTR_DATA]; + + return 0; +} + +static int +psample_parse_packet(struct offload_psample *psample, + struct dpif_offload_sflow *sflow) +{ + dp_packet_use_stub(&sflow->packet, + CONST_CAST(struct nlattr *, + nl_attr_get(psample->packet)) - 1, + nl_attr_get_size(psample->packet) + + sizeof(struct nlattr)); + dp_packet_set_data(&sflow->packet, + (char *) dp_packet_data(&sflow->packet) + + sizeof(struct nlattr)); + dp_packet_set_size(&sflow->packet, nl_attr_get_size(psample->packet)); + + sflow->attr = dpif_offload_sflow_attr_find(psample->dp_group_id); + if (!sflow->attr) { + return ENOENT; + } + sflow->iifindex = psample->iifindex; + + return 0; +} + +static int +dpif_offload_netlink_sflow_recv(struct dpif_offload_sflow *sflow) +{ + if (!psample_sock) { + return ENOENT; + } + + for (;;) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + struct offload_psample psample; + uint64_t buf_stub[4096 / 8]; + struct ofpbuf buf; + int error; + + ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); + error = nl_sock_recv(psample_sock, &buf, NULL, false); + + if (!error) { + error = psample_from_ofpbuf(&psample, &buf); + if (!error) { + ofpbuf_uninit(&buf); + error = psample_parse_packet(&psample, sflow); + return error; + } + } else if (error != EAGAIN) { + VLOG_WARN_RL(&rl, "Error reading or parsing netlink (%s).", + ovs_strerror(error)); + nl_sock_drain(psample_sock); + error = ENOBUFS; + } + + ofpbuf_uninit(&buf); + if (error) { + return error; + } + } +} + +const struct dpif_offload_api dpif_offload_netlink = { + .sflow_recv_wait = dpif_offload_netlink_sflow_recv_wait, + .sflow_recv = dpif_offload_netlink_sflow_recv, +}; diff --git a/lib/dpif-offload-provider.h b/lib/dpif-offload-provider.h index 5b4726ab6..5ac419516 100644 --- a/lib/dpif-offload-provider.h +++ b/lib/dpif-offload-provider.h @@ -17,12 +17,12 @@ #ifndef DPIF_OFFLOAD_PROVIDER_H #define DPIF_OFFLOAD_PROVIDER_H +#include "dp-packet.h" #include "netlink-protocol.h" #include "openvswitch/packets.h" #include "openvswitch/types.h" struct dpif; -struct dpif_offload_sflow; /* When offloading sample action, userspace creates a unique ID to map * sFlow action and tunnel info and passes this ID to datapath instead @@ -37,6 +37,13 @@ struct dpif_sflow_attr { ovs_u128 ufid; /* Flow ufid. */ }; +/* Parse the specific dpif message to sFlow. So OVS can process it. */ +struct dpif_offload_sflow { + struct dp_packet packet; /* Packet data. */ + uint32_t iifindex; /* Input ifindex. */ + const struct dpif_sflow_attr *attr; /* SFlow attribute. */ +}; + /* Datapath interface offload structure, to be defined by each implementation * of a datapath interface. */ @@ -54,4 +61,10 @@ void dpif_offload_sflow_recv_wait(const struct dpif *dpif); int dpif_offload_sflow_recv(const struct dpif *dpif, struct dpif_offload_sflow *sflow); +#ifdef __linux__ +extern const struct dpif_offload_api dpif_offload_netlink; +int dpif_offload_netlink_init(void); +void dpif_offload_netlink_destroy(void); +#endif + #endif /* DPIF_OFFLOAD_PROVIDER_H */ diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 15a0d2b63..86898d838 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -47,7 +47,8 @@ struct dpif { struct dpif_ipf_status; struct ipf_dump_ctx; -void dpif_init(struct dpif *, const struct dpif_class *, const char *name, +void dpif_init(struct dpif *, const struct dpif_class *, + const struct dpif_offload_api *offload_api, const char *name, uint8_t netflow_engine_type, uint8_t netflow_engine_id); void dpif_uninit(struct dpif *dpif, bool close); diff --git a/lib/dpif.c b/lib/dpif.c index 8c4aed47b..09ffa18e3 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1680,10 +1680,11 @@ dpif_queue_to_priority(const struct dpif *dpif, uint32_t queue_id, void dpif_init(struct dpif *dpif, const struct dpif_class *dpif_class, - const char *name, + const struct dpif_offload_api *offload_api, const char *name, uint8_t netflow_engine_type, uint8_t netflow_engine_id) { dpif->dpif_class = dpif_class; + dpif->offload_api = offload_api; dpif->base_name = xstrdup(name); dpif->full_name = xasprintf("%s@%s", dpif_class->type, name); dpif->netflow_engine_type = netflow_engine_type;