Message ID | 1550750870-25004-3-git-send-email-ophirmu@mellanox.com |
---|---|
State | Superseded |
Headers | show |
Series | Move offloading-code into a new file | expand |
Bleep bloop. Greetings Ophir Munk, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Unexpected sign-offs from developers who are not authors or co-authors or committers: Ophir Munk <ophirmu@mellanox.com> Lines checked: 1626, Warnings: 1, Errors: 0 Please check this out. If you feel there has been an error, please email aconole@bytheb.org Thanks, 0-day Robot
At first, 'git am' fails to apply the patch: Applying: netdev-dpdk: Move offloading-code to a new file error: patch failed: lib/netdev-dpdk.c:4240 error: lib/netdev-dpdk.c: patch does not apply .git/rebase-apply/patch:1564: new blank line at EOF. + Patch failed at 0002 netdev-dpdk: Move offloading-code to a new file I'm not sure why it complains about 'lib/netdev-dpdk.c' because the issue is the blanlk line at the end of netdev-rte-offloads.c. On 21.02.2019 15:07, Ophir Munk wrote: > From: Roni Bar Yanai <roniba@mellanox.com> > > Hardware offloading-code is moved to a new file called Just curious, why you writing "offloading-code" as a single dash separated word? > netdev-rte-offloads.c. The original offloading-code is copied as is IMHO, it's a good time to make style changes in the code, because current functions doesn't follow a lot of OVS coding style guidelines and we're touching them anyway. > from the netdev-dpdk.c file to the new file, where future > offloading-code should be added as well. The netdev-dpdk.c file will > remain unchanged as new offloading-code is added. > > Reviewed-by: Asaf Penso <asafp@mellanox.com> > Signed-off-by: Roni Bar Yanai <roniba@mellanox.com> > Signed-off-by: Ophir Munk <ophirmu@mellanox.com> > --- > lib/automake.mk | 4 +- > lib/netdev-dpdk.c | 727 +------------------------------------------ > lib/netdev-rte-offloads.c | 775 ++++++++++++++++++++++++++++++++++++++++++++++ > lib/netdev-rte-offloads.h | 38 +++ > 4 files changed, 817 insertions(+), 727 deletions(-) > create mode 100644 lib/netdev-rte-offloads.c > create mode 100644 lib/netdev-rte-offloads.h > > diff --git a/lib/automake.mk b/lib/automake.mk > index bae032b..cc5dccf 100644 > --- a/lib/automake.mk > +++ b/lib/automake.mk > @@ -138,6 +138,7 @@ lib_libopenvswitch_la_SOURCES = \ > lib/netdev-dpdk.h \ > lib/netdev-dummy.c \ > lib/netdev-provider.h \ > + lib/netdev-rte-offloads.h \ > lib/netdev-vport.c \ > lib/netdev-vport.h \ > lib/netdev-vport-private.h \ > @@ -411,7 +412,8 @@ endif > if DPDK_NETDEV > lib_libopenvswitch_la_SOURCES += \ > lib/dpdk.c \ > - lib/netdev-dpdk.c > + lib/netdev-dpdk.c \ > + lib/netdev-rte-offloads.c > else > lib_libopenvswitch_la_SOURCES += \ > lib/dpdk-stub.c > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 163d4ec..c4228d2 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -47,6 +47,7 @@ > #include "dpif-netdev.h" > #include "fatal-signal.h" > #include "netdev-provider.h" > +#include "netdev-rte-offloads.h" > #include "netdev-vport.h" > #include "odp-util.h" > #include "openvswitch/dynamic-string.h" > @@ -179,17 +180,6 @@ static const struct rte_eth_conf port_conf = { > }; > > /* > - * A mapping from ufid to dpdk rte_flow. > - */ > -static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; > - > -struct ufid_to_rte_flow_data { > - struct cmap_node node; > - ovs_u128 ufid; > - struct rte_flow *rte_flow; > -}; > - > -/* > * These callbacks allow virtio-net devices to be added to vhost ports when > * configuration has been fully completed. > */ > @@ -4240,721 +4230,6 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev, > return flow; > } > > -/* Find rte_flow with @ufid */ > -static struct rte_flow * > -ufid_to_rte_flow_find(const ovs_u128 *ufid) { > - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > - struct ufid_to_rte_flow_data *data; > - > - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { > - if (ovs_u128_equals(*ufid, data->ufid)) { > - return data->rte_flow; > - } > - } > - > - return NULL; > -} > - > -static inline void > -ufid_to_rte_flow_associate(const ovs_u128 *ufid, > - struct rte_flow *rte_flow) { > - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > - struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); > - > - /* > - * We should not simply overwrite an existing rte flow. > - * We should have deleted it first before re-adding it. > - * Thus, if following assert triggers, something is wrong: > - * the rte_flow is not destroyed. > - */ > - ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); > - > - data->ufid = *ufid; > - data->rte_flow = rte_flow; > - > - cmap_insert(&ufid_to_rte_flow, > - CONST_CAST(struct cmap_node *, &data->node), hash); > -} > - > -static inline void > -ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { > - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > - struct ufid_to_rte_flow_data *data; > - > - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { > - if (ovs_u128_equals(*ufid, data->ufid)) { > - cmap_remove(&ufid_to_rte_flow, > - CONST_CAST(struct cmap_node *, &data->node), hash); > - ovsrcu_postpone(free, data); > - return; > - } > - } > - > - VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", > - UUID_ARGS((struct uuid *)ufid)); > -} > - > -/* > - * To avoid individual xrealloc calls for each new element, a 'curent_max' > - * is used to keep track of current allocated number of elements. Starts > - * by 8 and doubles on each xrealloc call > - */ > -struct flow_patterns { > - struct rte_flow_item *items; > - int cnt; > - int current_max; > -}; > - > -struct flow_actions { > - struct rte_flow_action *actions; > - int cnt; > - int current_max; > -}; > - > -static void > -dump_flow_pattern(struct rte_flow_item *item) > -{ > - struct ds s; > - > - if (!VLOG_IS_DBG_ENABLED() || item->type == RTE_FLOW_ITEM_TYPE_END) { > - return; > - } > - > - ds_init(&s); > - > - if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { > - const struct rte_flow_item_eth *eth_spec = item->spec; > - const struct rte_flow_item_eth *eth_mask = item->mask; > - > - ds_put_cstr(&s, "rte flow eth pattern:\n"); > - if (eth_spec) { > - ds_put_format(&s, > - " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > - "type=0x%04" PRIx16"\n", > - ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), > - ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), > - ntohs(eth_spec->type)); > - } else { > - ds_put_cstr(&s, " Spec = null\n"); > - } > - if (eth_mask) { > - ds_put_format(&s, > - " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > - "type=0x%04"PRIx16"\n", > - ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), > - ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), > - eth_mask->type); > - } else { > - ds_put_cstr(&s, " Mask = null\n"); > - } > - } > - > - if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { > - const struct rte_flow_item_vlan *vlan_spec = item->spec; > - const struct rte_flow_item_vlan *vlan_mask = item->mask; > - > - ds_put_cstr(&s, "rte flow vlan pattern:\n"); > - if (vlan_spec) { > - ds_put_format(&s, > - " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > - ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); > - } else { > - ds_put_cstr(&s, " Spec = null\n"); > - } > - > - if (vlan_mask) { > - ds_put_format(&s, > - " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > - ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); > - } else { > - ds_put_cstr(&s, " Mask = null\n"); > - } > - } > - > - if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { > - const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; > - const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; > - > - ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); > - if (ipv4_spec) { > - ds_put_format(&s, > - " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > - ", src="IP_FMT", dst="IP_FMT"\n", > - ipv4_spec->hdr.type_of_service, > - ipv4_spec->hdr.time_to_live, > - ipv4_spec->hdr.next_proto_id, > - IP_ARGS(ipv4_spec->hdr.src_addr), > - IP_ARGS(ipv4_spec->hdr.dst_addr)); > - } else { > - ds_put_cstr(&s, " Spec = null\n"); > - } > - if (ipv4_mask) { > - ds_put_format(&s, > - " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > - ", src="IP_FMT", dst="IP_FMT"\n", > - ipv4_mask->hdr.type_of_service, > - ipv4_mask->hdr.time_to_live, > - ipv4_mask->hdr.next_proto_id, > - IP_ARGS(ipv4_mask->hdr.src_addr), > - IP_ARGS(ipv4_mask->hdr.dst_addr)); > - } else { > - ds_put_cstr(&s, " Mask = null\n"); > - } > - } > - > - if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { > - const struct rte_flow_item_udp *udp_spec = item->spec; > - const struct rte_flow_item_udp *udp_mask = item->mask; > - > - ds_put_cstr(&s, "rte flow udp pattern:\n"); > - if (udp_spec) { > - ds_put_format(&s, > - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > - ntohs(udp_spec->hdr.src_port), > - ntohs(udp_spec->hdr.dst_port)); > - } else { > - ds_put_cstr(&s, " Spec = null\n"); > - } > - if (udp_mask) { > - ds_put_format(&s, > - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > - udp_mask->hdr.src_port, > - udp_mask->hdr.dst_port); > - } else { > - ds_put_cstr(&s, " Mask = null\n"); > - } > - } > - > - if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { > - const struct rte_flow_item_sctp *sctp_spec = item->spec; > - const struct rte_flow_item_sctp *sctp_mask = item->mask; > - > - ds_put_cstr(&s, "rte flow sctp pattern:\n"); > - if (sctp_spec) { > - ds_put_format(&s, > - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > - ntohs(sctp_spec->hdr.src_port), > - ntohs(sctp_spec->hdr.dst_port)); > - } else { > - ds_put_cstr(&s, " Spec = null\n"); > - } > - if (sctp_mask) { > - ds_put_format(&s, > - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > - sctp_mask->hdr.src_port, > - sctp_mask->hdr.dst_port); > - } else { > - ds_put_cstr(&s, " Mask = null\n"); > - } > - } > - > - if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { > - const struct rte_flow_item_icmp *icmp_spec = item->spec; > - const struct rte_flow_item_icmp *icmp_mask = item->mask; > - > - ds_put_cstr(&s, "rte flow icmp pattern:\n"); > - if (icmp_spec) { > - ds_put_format(&s, > - " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", > - icmp_spec->hdr.icmp_type, > - icmp_spec->hdr.icmp_code); > - } else { > - ds_put_cstr(&s, " Spec = null\n"); > - } > - if (icmp_mask) { > - ds_put_format(&s, > - " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", > - icmp_spec->hdr.icmp_type, > - icmp_spec->hdr.icmp_code); > - } else { > - ds_put_cstr(&s, " Mask = null\n"); > - } > - } > - > - if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { > - const struct rte_flow_item_tcp *tcp_spec = item->spec; > - const struct rte_flow_item_tcp *tcp_mask = item->mask; > - > - ds_put_cstr(&s, "rte flow tcp pattern:\n"); > - if (tcp_spec) { > - ds_put_format(&s, > - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 > - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > - ntohs(tcp_spec->hdr.src_port), > - ntohs(tcp_spec->hdr.dst_port), > - tcp_spec->hdr.data_off, > - tcp_spec->hdr.tcp_flags); > - } else { > - ds_put_cstr(&s, " Spec = null\n"); > - } > - if (tcp_mask) { > - ds_put_format(&s, > - " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 > - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > - tcp_mask->hdr.src_port, > - tcp_mask->hdr.dst_port, > - tcp_mask->hdr.data_off, > - tcp_mask->hdr.tcp_flags); > - } else { > - ds_put_cstr(&s, " Mask = null\n"); > - } > - } > - > - VLOG_DBG("%s", ds_cstr(&s)); > - ds_destroy(&s); > -} > - > -static void > -add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type, > - const void *spec, const void *mask) { > - int cnt = patterns->cnt; > - > - if (cnt == 0) { > - patterns->current_max = 8; > - patterns->items = xcalloc(patterns->current_max, > - sizeof(struct rte_flow_item)); > - } else if (cnt == patterns->current_max) { > - patterns->current_max *= 2; > - patterns->items = xrealloc(patterns->items, patterns->current_max * > - sizeof(struct rte_flow_item)); > - } > - > - patterns->items[cnt].type = type; > - patterns->items[cnt].spec = spec; > - patterns->items[cnt].mask = mask; > - patterns->items[cnt].last = NULL; > - dump_flow_pattern(&patterns->items[cnt]); > - patterns->cnt++; > -} > - > -static void > -add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type, > - const void *conf) > -{ > - int cnt = actions->cnt; > - > - if (cnt == 0) { > - actions->current_max = 8; > - actions->actions = xcalloc(actions->current_max, > - sizeof(struct rte_flow_action)); > - } else if (cnt == actions->current_max) { > - actions->current_max *= 2; > - actions->actions = xrealloc(actions->actions, actions->current_max * > - sizeof(struct rte_flow_action)); > - } > - > - actions->actions[cnt].type = type; > - actions->actions[cnt].conf = conf; > - actions->cnt++; > -} > - > -struct action_rss_data { > - struct rte_flow_action_rss conf; > - uint16_t queue[0]; > -}; > - > -static struct action_rss_data * > -add_flow_rss_action(struct flow_actions *actions, > - struct netdev *netdev) { > - int i; > - struct action_rss_data *rss_data; > - > - rss_data = xmalloc(sizeof(struct action_rss_data) + > - sizeof(uint16_t) * netdev->n_rxq); > - *rss_data = (struct action_rss_data) { > - .conf = (struct rte_flow_action_rss) { > - .func = RTE_ETH_HASH_FUNCTION_DEFAULT, > - .level = 0, > - .types = 0, > - .queue_num = netdev->n_rxq, > - .queue = rss_data->queue, > - .key_len = 0, > - .key = NULL > - }, > - }; > - > - /* Override queue array with default */ > - for (i = 0; i < netdev->n_rxq; i++) { > - rss_data->queue[i] = i; > - } > - > - add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf); > - > - return rss_data; > -} > - > -static int > -netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, > - const struct match *match, > - struct nlattr *nl_actions OVS_UNUSED, > - size_t actions_len OVS_UNUSED, > - const ovs_u128 *ufid, > - struct offload_info *info) { > - const struct rte_flow_attr flow_attr = { > - .group = 0, > - .priority = 0, > - .ingress = 1, > - .egress = 0 > - }; > - struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; > - struct flow_actions actions = { .actions = NULL, .cnt = 0 }; > - struct rte_flow *flow; > - struct rte_flow_error error; > - uint8_t *ipv4_next_proto_mask = NULL; > - int ret = 0; > - > - /* Eth */ > - struct rte_flow_item_eth eth_spec; > - struct rte_flow_item_eth eth_mask; > - if (!eth_addr_is_zero(match->wc.masks.dl_src) || > - !eth_addr_is_zero(match->wc.masks.dl_dst)) { > - memset(ð_spec, 0, sizeof eth_spec); > - memset(ð_mask, 0, sizeof eth_mask); > - rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof eth_spec.dst); > - rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof eth_spec.src); > - eth_spec.type = match->flow.dl_type; > - > - rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, > - sizeof eth_mask.dst); > - rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, > - sizeof eth_mask.src); > - eth_mask.type = match->wc.masks.dl_type; > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, > - ð_spec, ð_mask); > - } else { > - /* > - * If user specifies a flow (like UDP flow) without L2 patterns, > - * OVS will at least set the dl_type. Normally, it's enough to > - * create an eth pattern just with it. Unluckily, some Intel's > - * NIC (such as XL710) doesn't support that. Below is a workaround, > - * which simply matches any L2 pkts. > - */ > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); > - } > - > - /* VLAN */ > - struct rte_flow_item_vlan vlan_spec; > - struct rte_flow_item_vlan vlan_mask; > - if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { > - memset(&vlan_spec, 0, sizeof vlan_spec); > - memset(&vlan_mask, 0, sizeof vlan_mask); > - vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); > - vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); > - > - /* match any protocols */ > - vlan_mask.inner_type = 0; > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, > - &vlan_spec, &vlan_mask); > - } > - > - /* IP v4 */ > - uint8_t proto = 0; > - struct rte_flow_item_ipv4 ipv4_spec; > - struct rte_flow_item_ipv4 ipv4_mask; > - if (match->flow.dl_type == htons(ETH_TYPE_IP)) { > - memset(&ipv4_spec, 0, sizeof ipv4_spec); > - memset(&ipv4_mask, 0, sizeof ipv4_mask); > - > - ipv4_spec.hdr.type_of_service = match->flow.nw_tos; > - ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; > - ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; > - ipv4_spec.hdr.src_addr = match->flow.nw_src; > - ipv4_spec.hdr.dst_addr = match->flow.nw_dst; > - > - ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; > - ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; > - ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; > - ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; > - ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, > - &ipv4_spec, &ipv4_mask); > - > - /* Save proto for L4 protocol setup */ > - proto = ipv4_spec.hdr.next_proto_id & > - ipv4_mask.hdr.next_proto_id; > - > - /* Remember proto mask address for later modification */ > - ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; > - } > - > - if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && > - proto != IPPROTO_SCTP && proto != IPPROTO_TCP && > - (match->wc.masks.tp_src || > - match->wc.masks.tp_dst || > - match->wc.masks.tcp_flags)) { > - VLOG_DBG("L4 Protocol (%u) not supported", proto); > - ret = -1; > - goto out; > - } > - > - if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) || > - (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) { > - ret = -1; > - goto out; > - } > - > - struct rte_flow_item_tcp tcp_spec; > - struct rte_flow_item_tcp tcp_mask; > - if (proto == IPPROTO_TCP) { > - memset(&tcp_spec, 0, sizeof tcp_spec); > - memset(&tcp_mask, 0, sizeof tcp_mask); > - tcp_spec.hdr.src_port = match->flow.tp_src; > - tcp_spec.hdr.dst_port = match->flow.tp_dst; > - tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; > - tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; > - > - tcp_mask.hdr.src_port = match->wc.masks.tp_src; > - tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; > - tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; > - tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, > - &tcp_spec, &tcp_mask); > - > - /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */ > - if (ipv4_next_proto_mask) { > - *ipv4_next_proto_mask = 0; > - } > - goto end_proto_check; > - } > - > - struct rte_flow_item_udp udp_spec; > - struct rte_flow_item_udp udp_mask; > - if (proto == IPPROTO_UDP) { > - memset(&udp_spec, 0, sizeof udp_spec); > - memset(&udp_mask, 0, sizeof udp_mask); > - udp_spec.hdr.src_port = match->flow.tp_src; > - udp_spec.hdr.dst_port = match->flow.tp_dst; > - > - udp_mask.hdr.src_port = match->wc.masks.tp_src; > - udp_mask.hdr.dst_port = match->wc.masks.tp_dst; > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, > - &udp_spec, &udp_mask); > - > - /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match */ > - if (ipv4_next_proto_mask) { > - *ipv4_next_proto_mask = 0; > - } > - goto end_proto_check; > - } > - > - struct rte_flow_item_sctp sctp_spec; > - struct rte_flow_item_sctp sctp_mask; > - if (proto == IPPROTO_SCTP) { > - memset(&sctp_spec, 0, sizeof sctp_spec); > - memset(&sctp_mask, 0, sizeof sctp_mask); > - sctp_spec.hdr.src_port = match->flow.tp_src; > - sctp_spec.hdr.dst_port = match->flow.tp_dst; > - > - sctp_mask.hdr.src_port = match->wc.masks.tp_src; > - sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, > - &sctp_spec, &sctp_mask); > - > - /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match */ > - if (ipv4_next_proto_mask) { > - *ipv4_next_proto_mask = 0; > - } > - goto end_proto_check; > - } > - > - struct rte_flow_item_icmp icmp_spec; > - struct rte_flow_item_icmp icmp_mask; > - if (proto == IPPROTO_ICMP) { > - memset(&icmp_spec, 0, sizeof icmp_spec); > - memset(&icmp_mask, 0, sizeof icmp_mask); > - icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); > - icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); > - > - icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match->wc.masks.tp_src); > - icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match->wc.masks.tp_dst); > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, > - &icmp_spec, &icmp_mask); > - > - /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match */ > - if (ipv4_next_proto_mask) { > - *ipv4_next_proto_mask = 0; > - } > - goto end_proto_check; > - } > - > -end_proto_check: > - > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); > - > - struct rte_flow_action_mark mark; > - struct action_rss_data *rss; > - > - mark.id = info->flow_mark; > - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); > - > - > - rss = add_flow_rss_action(&actions, netdev); > - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); > - > - flow = netdev_dpdk_rte_flow_create(netdev, &flow_attr,patterns.items, > - actions.actions, &error); > - > - free(rss); > - if (!flow) { > - VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", > - netdev_get_name(netdev), error.type, error.message); > - ret = -1; > - goto out; > - } > - ufid_to_rte_flow_associate(ufid, flow); > - VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", > - netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); > - > -out: > - free(patterns.items); > - free(actions.actions); > - return ret; > -} > - > -/* > - * Check if any unsupported flow patterns are specified. > - */ > -static int > -netdev_dpdk_validate_flow(const struct match *match) { > - struct match match_zero_wc; > - const struct flow *masks = &match->wc.masks; > - > - /* Create a wc-zeroed version of flow */ > - match_init(&match_zero_wc, &match->flow, &match->wc); > - > - if (!is_all_zeros(&match_zero_wc.flow.tunnel, > - sizeof match_zero_wc.flow.tunnel)) { > - goto err; > - } > - > - if (masks->metadata || masks->skb_priority || > - masks->pkt_mark || masks->dp_hash) { > - goto err; > - } > - > - /* recirc id must be zero */ > - if (match_zero_wc.flow.recirc_id) { > - goto err; > - } > - > - if (masks->ct_state || masks->ct_nw_proto || > - masks->ct_zone || masks->ct_mark || > - !ovs_u128_is_zero(masks->ct_label)) { > - goto err; > - } > - > - if (masks->conj_id || masks->actset_output) { > - goto err; > - } > - > - /* unsupported L2 */ > - if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) { > - goto err; > - } > - > - /* unsupported L3 */ > - if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst || > - !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) || > - !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) || > - !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) || > - !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) || > - !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) || > - !is_all_zeros(&masks->nsh, sizeof masks->nsh) || > - !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) || > - !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) { > - goto err; > - } > - > - /* If fragmented, then don't HW accelerate - for now */ > - if (match_zero_wc.flow.nw_frag) { > - goto err; > - } > - > - /* unsupported L4 */ > - if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) { > - goto err; > - } > - > - return 0; > - > -err: > - VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols"); > - return -1; > -} > - > -static int > -netdev_dpdk_destroy_rte_flow(struct netdev *netdev, > - const ovs_u128 *ufid, > - struct rte_flow *rte_flow) { > - struct rte_flow_error error; > - int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); > - > - if (ret == 0) { > - ufid_to_rte_flow_disassociate(ufid); > - VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n", > - netdev_get_name(netdev), rte_flow, > - UUID_ARGS((struct uuid *)ufid)); > - } else { > - VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", > - netdev_get_name(netdev), error.type, error.message); > - } > - > - return ret; > -} > - > -static int > -netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > - struct nlattr *actions, size_t actions_len, > - const ovs_u128 *ufid, struct offload_info *info, > - struct dpif_flow_stats *stats OVS_UNUSED) { > - struct rte_flow *rte_flow; > - int ret; > - > - /* > - * If an old rte_flow exists, it means it's a flow modification. > - * Here destroy the old rte flow first before adding a new one. > - */ > - rte_flow = ufid_to_rte_flow_find(ufid); > - if (rte_flow) { > - ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > - if (ret < 0) { > - return ret; > - } > - } > - > - ret = netdev_dpdk_validate_flow(match); > - if (ret < 0) { > - return ret; > - } > - > - return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, > - actions_len, ufid, info); > -} > - > -static int > -netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > - struct dpif_flow_stats *stats OVS_UNUSED) { > - > - struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); > - > - if (!rte_flow) { > - return -1; > - } > - > - return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > -} > - > -#define DPDK_FLOW_OFFLOAD_API \ > - .flow_put = netdev_dpdk_flow_put, \ > - .flow_del = netdev_dpdk_flow_del > - > #define NETDEV_DPDK_CLASS_COMMON \ > .is_pmd = true, \ > .alloc = netdev_dpdk_alloc, \ > diff --git a/lib/netdev-rte-offloads.c b/lib/netdev-rte-offloads.c > new file mode 100644 > index 0000000..0212156 > --- /dev/null > +++ b/lib/netdev-rte-offloads.c > @@ -0,0 +1,775 @@ > +/* > + * Copyright (c) 2019 Mellanox Technologies, Ltd. I'm not a layer but it seems strange to have above copyright for the file with copy-pasted code written by other people. However, I'm not sure how this should look like. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +#include <config.h> Include of "netdev-rte-offloads.h" should be here. Blank line. > +#include <rte_flow.h> Blank line. > +#include "netdev-rte-offloads.h" > +#include "netdev-provider.h" > +#include "dpif-netdev.h" Do you really need above header ? > +#include "cmap.h" > +#include "openvswitch/vlog.h" > +#include "openvswitch/match.h" > +#include "packets.h" > +#include "uuid.h" Above headers should go in alphabetical order. For the style guidelines please refer: Documentation/internals/contributing/coding-style.rst > + > +VLOG_DEFINE_THIS_MODULE(netdev_rte_offloads); > + > +/* > + * A mapping from ufid to dpdk rte_flow. > + */ > +static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; > + > +struct ufid_to_rte_flow_data { > + struct cmap_node node; > + ovs_u128 ufid; > + struct rte_flow *rte_flow; > +}; > + > + > +/* Find rte_flow with @ufid */ > +static struct rte_flow * > +ufid_to_rte_flow_find(const ovs_u128 *ufid) { The '{' should go on a next line in function definition. > + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > + struct ufid_to_rte_flow_data *data; > + > + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { > + if (ovs_u128_equals(*ufid, data->ufid)) { > + return data->rte_flow; > + } > + } > + > + return NULL; > +} > + > +static inline void > +ufid_to_rte_flow_associate(const ovs_u128 *ufid, > + struct rte_flow *rte_flow) { Same. > + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > + struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); No need to parenthesize the operand of sizeof. > + > + /* > + * We should not simply overwrite an existing rte flow. > + * We should have deleted it first before re-adding it. > + * Thus, if following assert triggers, something is wrong: > + * the rte_flow is not destroyed. > + */ > + ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); > + > + data->ufid = *ufid; > + data->rte_flow = rte_flow; > + > + cmap_insert(&ufid_to_rte_flow, > + CONST_CAST(struct cmap_node *, &data->node), hash); > +} > + > +static inline void > +ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { Same. > + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); sizeof *ufid > + struct ufid_to_rte_flow_data *data; > + > + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { > + if (ovs_u128_equals(*ufid, data->ufid)) { > + cmap_remove(&ufid_to_rte_flow, > + CONST_CAST(struct cmap_node *, &data->node), hash); > + ovsrcu_postpone(free, data); > + return; > + } > + } > + > + VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", > + UUID_ARGS((struct uuid *)ufid)); Space needed between the cast and variable: '(struct uuid *) ufid'. > +} > + > +/* > + * To avoid individual xrealloc calls for each new element, a 'curent_max' > + * is used to keep track of current allocated number of elements. Starts > + * by 8 and doubles on each xrealloc call > + */ > +struct flow_patterns { > + struct rte_flow_item *items; > + int cnt; > + int current_max; > +}; > + > +struct flow_actions { > + struct rte_flow_action *actions; > + int cnt; > + int current_max; > +}; > + > +static void > +dump_flow_pattern(struct rte_flow_item *item) > +{ > + struct ds s; > + > + if (!VLOG_IS_DBG_ENABLED() || item->type == RTE_FLOW_ITEM_TYPE_END) { > + return; > + } > + > + ds_init(&s); > + > + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { > + const struct rte_flow_item_eth *eth_spec = item->spec; > + const struct rte_flow_item_eth *eth_mask = item->mask; > + > + ds_put_cstr(&s, "rte flow eth pattern:\n"); > + if (eth_spec) { > + ds_put_format(&s, > + " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > + "type=0x%04" PRIx16"\n", > + ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), > + ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), > + ntohs(eth_spec->type)); > + } else { > + ds_put_cstr(&s, " Spec = null\n"); > + } > + if (eth_mask) { > + ds_put_format(&s, > + " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > + "type=0x%04"PRIx16"\n", > + ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), > + ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), > + eth_mask->type); > + } else { > + ds_put_cstr(&s, " Mask = null\n"); > + } > + } > + > + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { > + const struct rte_flow_item_vlan *vlan_spec = item->spec; > + const struct rte_flow_item_vlan *vlan_mask = item->mask; > + > + ds_put_cstr(&s, "rte flow vlan pattern:\n"); > + if (vlan_spec) { > + ds_put_format(&s, > + " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > + ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); > + } else { > + ds_put_cstr(&s, " Spec = null\n"); > + } > + > + if (vlan_mask) { > + ds_put_format(&s, > + " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > + ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); > + } else { > + ds_put_cstr(&s, " Mask = null\n"); > + } > + } > + > + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { > + const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; > + const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; > + > + ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); > + if (ipv4_spec) { > + ds_put_format(&s, > + " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > + ", src="IP_FMT", dst="IP_FMT"\n", > + ipv4_spec->hdr.type_of_service, > + ipv4_spec->hdr.time_to_live, > + ipv4_spec->hdr.next_proto_id, > + IP_ARGS(ipv4_spec->hdr.src_addr), > + IP_ARGS(ipv4_spec->hdr.dst_addr)); > + } else { > + ds_put_cstr(&s, " Spec = null\n"); > + } > + if (ipv4_mask) { > + ds_put_format(&s, > + " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > + ", src="IP_FMT", dst="IP_FMT"\n", > + ipv4_mask->hdr.type_of_service, > + ipv4_mask->hdr.time_to_live, > + ipv4_mask->hdr.next_proto_id, > + IP_ARGS(ipv4_mask->hdr.src_addr), > + IP_ARGS(ipv4_mask->hdr.dst_addr)); > + } else { > + ds_put_cstr(&s, " Mask = null\n"); > + } > + } > + > + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { > + const struct rte_flow_item_udp *udp_spec = item->spec; > + const struct rte_flow_item_udp *udp_mask = item->mask; > + > + ds_put_cstr(&s, "rte flow udp pattern:\n"); > + if (udp_spec) { > + ds_put_format(&s, > + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > + ntohs(udp_spec->hdr.src_port), > + ntohs(udp_spec->hdr.dst_port)); > + } else { > + ds_put_cstr(&s, " Spec = null\n"); > + } > + if (udp_mask) { > + ds_put_format(&s, > + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > + udp_mask->hdr.src_port, > + udp_mask->hdr.dst_port); > + } else { > + ds_put_cstr(&s, " Mask = null\n"); > + } > + } > + > + if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { > + const struct rte_flow_item_sctp *sctp_spec = item->spec; > + const struct rte_flow_item_sctp *sctp_mask = item->mask; > + > + ds_put_cstr(&s, "rte flow sctp pattern:\n"); > + if (sctp_spec) { > + ds_put_format(&s, > + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > + ntohs(sctp_spec->hdr.src_port), > + ntohs(sctp_spec->hdr.dst_port)); > + } else { > + ds_put_cstr(&s, " Spec = null\n"); > + } > + if (sctp_mask) { > + ds_put_format(&s, > + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > + sctp_mask->hdr.src_port, > + sctp_mask->hdr.dst_port); > + } else { > + ds_put_cstr(&s, " Mask = null\n"); > + } > + } > + > + if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { > + const struct rte_flow_item_icmp *icmp_spec = item->spec; > + const struct rte_flow_item_icmp *icmp_mask = item->mask; > + > + ds_put_cstr(&s, "rte flow icmp pattern:\n"); > + if (icmp_spec) { > + ds_put_format(&s, > + " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", > + icmp_spec->hdr.icmp_type, > + icmp_spec->hdr.icmp_code); > + } else { > + ds_put_cstr(&s, " Spec = null\n"); > + } > + if (icmp_mask) { > + ds_put_format(&s, > + " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", > + icmp_spec->hdr.icmp_type, > + icmp_spec->hdr.icmp_code); > + } else { > + ds_put_cstr(&s, " Mask = null\n"); > + } > + } > + > + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { > + const struct rte_flow_item_tcp *tcp_spec = item->spec; > + const struct rte_flow_item_tcp *tcp_mask = item->mask; > + > + ds_put_cstr(&s, "rte flow tcp pattern:\n"); > + if (tcp_spec) { > + ds_put_format(&s, > + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 > + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > + ntohs(tcp_spec->hdr.src_port), > + ntohs(tcp_spec->hdr.dst_port), > + tcp_spec->hdr.data_off, > + tcp_spec->hdr.tcp_flags); > + } else { > + ds_put_cstr(&s, " Spec = null\n"); > + } > + if (tcp_mask) { > + ds_put_format(&s, > + " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 > + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > + tcp_mask->hdr.src_port, > + tcp_mask->hdr.dst_port, > + tcp_mask->hdr.data_off, > + tcp_mask->hdr.tcp_flags); > + } else { > + ds_put_cstr(&s, " Mask = null\n"); > + } > + } > + > + VLOG_DBG("%s", ds_cstr(&s)); > + ds_destroy(&s); > +} > + > +static void > +add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type, > + const void *spec, const void *mask) { ditto > + int cnt = patterns->cnt; > + > + if (cnt == 0) { > + patterns->current_max = 8; > + patterns->items = xcalloc(patterns->current_max, > + sizeof(struct rte_flow_item)); sizeof *patterns->items > + } else if (cnt == patterns->current_max) { > + patterns->current_max *= 2; > + patterns->items = xrealloc(patterns->items, patterns->current_max * > + sizeof(struct rte_flow_item)); sizeof *patterns->items > + } > + > + patterns->items[cnt].type = type; > + patterns->items[cnt].spec = spec; > + patterns->items[cnt].mask = mask; > + patterns->items[cnt].last = NULL; > + dump_flow_pattern(&patterns->items[cnt]); > + patterns->cnt++; > +} > + > +static void > +add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type, > + const void *conf) > +{ > + int cnt = actions->cnt; > + > + if (cnt == 0) { > + actions->current_max = 8; > + actions->actions = xcalloc(actions->current_max, > + sizeof(struct rte_flow_action)); sizeof *actions->actions > + } else if (cnt == actions->current_max) { > + actions->current_max *= 2; > + actions->actions = xrealloc(actions->actions, actions->current_max * > + sizeof(struct rte_flow_action)); sizeof *actions->actions > + } > + > + actions->actions[cnt].type = type; > + actions->actions[cnt].conf = conf; > + actions->cnt++; > +} > + > +struct action_rss_data { > + struct rte_flow_action_rss conf; > + uint16_t queue[0]; > +}; > + > +static struct action_rss_data * > +add_flow_rss_action(struct flow_actions *actions, > + struct netdev *netdev) { ditto > + int i; > + struct action_rss_data *rss_data; > + > + rss_data = xmalloc(sizeof(struct action_rss_data) + > + sizeof(uint16_t) * netdev->n_rxq); sizeof *rss_data + netdev_n_rxq(netdev) * sizeof rss_data->queue[0] As we're not inside the netdev we need to use getter 'netdev_n_rxq'. > + *rss_data = (struct action_rss_data) { > + .conf = (struct rte_flow_action_rss) { > + .func = RTE_ETH_HASH_FUNCTION_DEFAULT, > + .level = 0, > + .types = 0, > + .queue_num = netdev->n_rxq, netdev_n_rxq(netdev) > + .queue = rss_data->queue, > + .key_len = 0, > + .key = NULL > + }, > + }; > + > + /* Override queue array with default */ > + for (i = 0; i < netdev->n_rxq; i++) { netdev_n_rxq(netdev) > + rss_data->queue[i] = i; > + } > + > + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf); > + > + return rss_data; > +} > + > +static int > +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, > + const struct match *match, > + struct nlattr *nl_actions OVS_UNUSED, > + size_t actions_len OVS_UNUSED, > + const ovs_u128 *ufid, > + struct offload_info *info) { ditto I'll skip the rest of this function as I already have a path with refactoring: https://patchwork.ozlabs.org/patch/1037609/ > + const struct rte_flow_attr flow_attr = { > + .group = 0, > + .priority = 0, > + .ingress = 1, > + .egress = 0 > + }; > + struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; > + struct flow_actions actions = { .actions = NULL, .cnt = 0 }; > + struct rte_flow *flow; > + struct rte_flow_error error; > + uint8_t *ipv4_next_proto_mask = NULL; > + int ret = 0; > + > + /* Eth */ > + struct rte_flow_item_eth eth_spec; > + struct rte_flow_item_eth eth_mask; > + if (!eth_addr_is_zero(match->wc.masks.dl_src) || > + !eth_addr_is_zero(match->wc.masks.dl_dst)) { > + memset(ð_spec, 0, sizeof eth_spec); > + memset(ð_mask, 0, sizeof eth_mask); > + rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof eth_spec.dst); > + rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof eth_spec.src); > + eth_spec.type = match->flow.dl_type; > + > + rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, > + sizeof eth_mask.dst); > + rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, > + sizeof eth_mask.src); > + eth_mask.type = match->wc.masks.dl_type; > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, > + ð_spec, ð_mask); > + } else { > + /* > + * If user specifies a flow (like UDP flow) without L2 patterns, > + * OVS will at least set the dl_type. Normally, it's enough to > + * create an eth pattern just with it. Unluckily, some Intel's > + * NIC (such as XL710) doesn't support that. Below is a workaround, > + * which simply matches any L2 pkts. > + */ > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); > + } > + > + /* VLAN */ > + struct rte_flow_item_vlan vlan_spec; > + struct rte_flow_item_vlan vlan_mask; > + if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { > + memset(&vlan_spec, 0, sizeof vlan_spec); > + memset(&vlan_mask, 0, sizeof vlan_mask); > + vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); > + vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); > + > + /* match any protocols */ > + vlan_mask.inner_type = 0; > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, > + &vlan_spec, &vlan_mask); > + } > + > + /* IP v4 */ > + uint8_t proto = 0; > + struct rte_flow_item_ipv4 ipv4_spec; > + struct rte_flow_item_ipv4 ipv4_mask; > + if (match->flow.dl_type == htons(ETH_TYPE_IP)) { > + memset(&ipv4_spec, 0, sizeof ipv4_spec); > + memset(&ipv4_mask, 0, sizeof ipv4_mask); > + > + ipv4_spec.hdr.type_of_service = match->flow.nw_tos; > + ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; > + ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; > + ipv4_spec.hdr.src_addr = match->flow.nw_src; > + ipv4_spec.hdr.dst_addr = match->flow.nw_dst; > + > + ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; > + ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; > + ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; > + ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; > + ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, > + &ipv4_spec, &ipv4_mask); > + > + /* Save proto for L4 protocol setup */ > + proto = ipv4_spec.hdr.next_proto_id & > + ipv4_mask.hdr.next_proto_id; > + > + /* Remember proto mask address for later modification */ > + ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; > + } > + > + if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && > + proto != IPPROTO_SCTP && proto != IPPROTO_TCP && > + (match->wc.masks.tp_src || > + match->wc.masks.tp_dst || > + match->wc.masks.tcp_flags)) { > + VLOG_DBG("L4 Protocol (%u) not supported", proto); > + ret = -1; > + goto out; > + } > + > + if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) || > + (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) { > + ret = -1; > + goto out; > + } > + > + struct rte_flow_item_tcp tcp_spec; > + struct rte_flow_item_tcp tcp_mask; > + if (proto == IPPROTO_TCP) { > + memset(&tcp_spec, 0, sizeof tcp_spec); > + memset(&tcp_mask, 0, sizeof tcp_mask); > + tcp_spec.hdr.src_port = match->flow.tp_src; > + tcp_spec.hdr.dst_port = match->flow.tp_dst; > + tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; > + tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; > + > + tcp_mask.hdr.src_port = match->wc.masks.tp_src; > + tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; > + tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; > + tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, > + &tcp_spec, &tcp_mask); > + > + /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */ > + if (ipv4_next_proto_mask) { > + *ipv4_next_proto_mask = 0; > + } > + goto end_proto_check; > + } > + > + struct rte_flow_item_udp udp_spec; > + struct rte_flow_item_udp udp_mask; > + if (proto == IPPROTO_UDP) { > + memset(&udp_spec, 0, sizeof udp_spec); > + memset(&udp_mask, 0, sizeof udp_mask); > + udp_spec.hdr.src_port = match->flow.tp_src; > + udp_spec.hdr.dst_port = match->flow.tp_dst; > + > + udp_mask.hdr.src_port = match->wc.masks.tp_src; > + udp_mask.hdr.dst_port = match->wc.masks.tp_dst; > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, > + &udp_spec, &udp_mask); > + > + /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match */ > + if (ipv4_next_proto_mask) { > + *ipv4_next_proto_mask = 0; > + } > + goto end_proto_check; > + } > + > + struct rte_flow_item_sctp sctp_spec; > + struct rte_flow_item_sctp sctp_mask; > + if (proto == IPPROTO_SCTP) { > + memset(&sctp_spec, 0, sizeof sctp_spec); > + memset(&sctp_mask, 0, sizeof sctp_mask); > + sctp_spec.hdr.src_port = match->flow.tp_src; > + sctp_spec.hdr.dst_port = match->flow.tp_dst; > + > + sctp_mask.hdr.src_port = match->wc.masks.tp_src; > + sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, > + &sctp_spec, &sctp_mask); > + > + /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match */ > + if (ipv4_next_proto_mask) { > + *ipv4_next_proto_mask = 0; > + } > + goto end_proto_check; > + } > + > + struct rte_flow_item_icmp icmp_spec; > + struct rte_flow_item_icmp icmp_mask; > + if (proto == IPPROTO_ICMP) { > + memset(&icmp_spec, 0, sizeof icmp_spec); > + memset(&icmp_mask, 0, sizeof icmp_mask); > + icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); > + icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); > + > + icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match->wc.masks.tp_src); > + icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match->wc.masks.tp_dst); > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, > + &icmp_spec, &icmp_mask); > + > + /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match */ > + if (ipv4_next_proto_mask) { > + *ipv4_next_proto_mask = 0; > + } > + goto end_proto_check; > + } > + > +end_proto_check: > + > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); > + > + struct rte_flow_action_mark mark; > + struct action_rss_data *rss; > + > + mark.id = info->flow_mark; > + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); > + > + > + rss = add_flow_rss_action(&actions, netdev); > + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); > + > + flow = netdev_dpdk_rte_flow_create(netdev, &flow_attr,patterns.items, > + actions.actions, &error); > + > + free(rss); > + if (!flow) { > + VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", > + netdev_get_name(netdev), error.type, error.message); > + ret = -1; > + goto out; > + } > + ufid_to_rte_flow_associate(ufid, flow); > + VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", > + netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); > + > +out: > + free(patterns.items); > + free(actions.actions); > + return ret; > +} Code below was refactored recently. You're removing the new version from netdev-dpdk.c, but adding the old one here. > +static bool > +is_all_zero(const void *addr, size_t n) { > + size_t i = 0; > + const uint8_t *p = (uint8_t *)addr; > + > + for (i = 0; i < n; i++) { > + if (p[i] != 0) { > + return false; > + } > + } > + > + return true; > +} > + > +/* > + * Check if any unsupported flow patterns are specified. > + */ > +static int > +netdev_dpdk_validate_flow(const struct match *match) { > + struct match match_zero_wc; > + > + /* Create a wc-zeroed version of flow */ > + match_init(&match_zero_wc, &match->flow, &match->wc); > + > + if (!is_all_zero(&match_zero_wc.flow.tunnel, > + sizeof(match_zero_wc.flow.tunnel))) { > + goto err; > + } > + > + if (match->wc.masks.metadata || > + match->wc.masks.skb_priority || > + match->wc.masks.pkt_mark || > + match->wc.masks.dp_hash) { > + goto err; > + } > + > + /* recirc id must be zero */ > + if (match_zero_wc.flow.recirc_id) { > + goto err; > + } > + > + if (match->wc.masks.ct_state || > + match->wc.masks.ct_nw_proto || > + match->wc.masks.ct_zone || > + match->wc.masks.ct_mark || > + match->wc.masks.ct_label.u64.hi || > + match->wc.masks.ct_label.u64.lo) { > + goto err; > + } > + > + if (match->wc.masks.conj_id || > + match->wc.masks.actset_output) { > + goto err; > + } > + > + /* unsupported L2 */ > + if (!is_all_zero(&match->wc.masks.mpls_lse, > + sizeof(match_zero_wc.flow.mpls_lse))) { > + goto err; > + } > + > + /* unsupported L3 */ > + if (match->wc.masks.ipv6_label || > + match->wc.masks.ct_nw_src || > + match->wc.masks.ct_nw_dst || > + !is_all_zero(&match->wc.masks.ipv6_src, sizeof(struct in6_addr)) || > + !is_all_zero(&match->wc.masks.ipv6_dst, sizeof(struct in6_addr)) || > + !is_all_zero(&match->wc.masks.ct_ipv6_src, sizeof(struct in6_addr)) || > + !is_all_zero(&match->wc.masks.ct_ipv6_dst, sizeof(struct in6_addr)) || > + !is_all_zero(&match->wc.masks.nd_target, sizeof(struct in6_addr)) || > + !is_all_zero(&match->wc.masks.nsh, sizeof(struct ovs_key_nsh)) || > + !is_all_zero(&match->wc.masks.arp_sha, sizeof(struct eth_addr)) || > + !is_all_zero(&match->wc.masks.arp_tha, sizeof(struct eth_addr))) { > + goto err; > + } > + > + /* If fragmented, then don't HW accelerate - for now */ > + if (match_zero_wc.flow.nw_frag) { > + goto err; > + } > + > + /* unsupported L4 */ > + if (match->wc.masks.igmp_group_ip4 || > + match->wc.masks.ct_tp_src || > + match->wc.masks.ct_tp_dst) { > + goto err; > + } > + > + return 0; > + > +err: > + VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols"); > + return -1; > +} > + > +static int > +netdev_dpdk_destroy_rte_flow(struct netdev *netdev, > + const ovs_u128 *ufid, > + struct rte_flow *rte_flow) { ditto > + struct rte_flow_error error; > + int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); > + > + if (ret == 0) { > + ufid_to_rte_flow_disassociate(ufid); > + VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n", > + netdev_get_name(netdev), rte_flow, > + UUID_ARGS((struct uuid *)ufid)); > + } else { > + VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", > + netdev_get_name(netdev), error.type, error.message); > + } > + > + return ret; > +} > + > +int > +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > + struct nlattr *actions, size_t actions_len, > + const ovs_u128 *ufid, struct offload_info *info, > + struct dpif_flow_stats *stats OVS_UNUSED) { ditto > + struct rte_flow *rte_flow; > + int ret; > + > + /* > + * If an old rte_flow exists, it means it's a flow modification. > + * Here destroy the old rte flow first before adding a new one. > + */ > + rte_flow = ufid_to_rte_flow_find(ufid); > + if (rte_flow) { > + ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > + if (ret < 0) { > + return ret; > + } > + } > + > + ret = netdev_dpdk_validate_flow(match); > + if (ret < 0) { > + return ret; > + } > + > + return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, > + actions_len, ufid, info); > +} > + > +int > +netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > + struct dpif_flow_stats *stats OVS_UNUSED) { ditto > + > + struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); > + > + if (!rte_flow) { > + return -1; > + } > + > + return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > +} > + > diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h > new file mode 100644 > index 0000000..573e214 > --- /dev/null > +++ b/lib/netdev-rte-offloads.h > @@ -0,0 +1,38 @@ > +/* > + * Copyright (c) 2019 Mellanox Technologies, Ltd. > + * > + * Licensed under the Apache License, Version 2.0 (the "License"); > + * you may not use this file except in compliance with the License. > + * You may obtain a copy of the License at: > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > + > +#ifndef NETDEV_VPORT_OFFLOADS_H > +#define NETDEV_VPORT_OFFLOADS_H 1 > + > +#include "openvswitch/types.h" It'll be good to have a blank line here. > +struct netdev; > +struct match; > +struct nlattr; > +struct offload_info; > +struct dpif_flow_stats; > + > +int netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > + struct nlattr *actions, size_t actions_len, > + const ovs_u128 *ufid, struct offload_info *info, > + struct dpif_flow_stats *stats OVS_UNUSED); > +int netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > + struct dpif_flow_stats *stats OVS_UNUSED); > + > +#define DPDK_FLOW_OFFLOAD_API \ > + .flow_put = netdev_dpdk_flow_put, \ > + .flow_del = netdev_dpdk_flow_del > + > +#endif /* netdev-rte-offloads.h */ >
> -----Original Message----- > From: Ilya Maximets <i.maximets@samsung.com> > Sent: Thursday, February 21, 2019 6:20 PM > To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org > Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>; > Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>; > Roni Bar Yanai <roniba@mellanox.com>; Flavio Leitner <fbl@sysclose.org> > Subject: Re: [PATCH v2 2/3] netdev-dpdk: Move offloading-code to a new > file > > At first, 'git am' fails to apply the patch: > > Applying: netdev-dpdk: Move offloading-code to a new file > error: patch failed: lib/netdev-dpdk.c:4240 > error: lib/netdev-dpdk.c: patch does not apply > .git/rebase-apply/patch:1564: new blank line at EOF. > + > Patch failed at 0002 netdev-dpdk: Move offloading-code to a new file > > I'm not sure why it complains about 'lib/netdev-dpdk.c' because the > issue is the blanlk line at the end of netdev-rte-offloads.c. > > On 21.02.2019 15:07, Ophir Munk wrote: > > From: Roni Bar Yanai <roniba@mellanox.com> > > > > Hardware offloading-code is moved to a new file called > > Just curious, why you writing "offloading-code" as a single dash > separated word? > > > netdev-rte-offloads.c. The original offloading-code is copied as is > > IMHO, it's a good time to make style changes in the code, because > current functions doesn't follow a lot of OVS coding style guidelines > and we're touching them anyway. > I agree it is a good time, but I think it should be done on a separate commit, because if something breaks (although all are style changes), it will be hard to find the changes on git log. The entire file Will be + . > > from the netdev-dpdk.c file to the new file, where future > > offloading-code should be added as well. The netdev-dpdk.c file will > > remain unchanged as new offloading-code is added. > > > > Reviewed-by: Asaf Penso <asafp@mellanox.com> > > Signed-off-by: Roni Bar Yanai <roniba@mellanox.com> > > Signed-off-by: Ophir Munk <ophirmu@mellanox.com> > > --- > > lib/automake.mk | 4 +- > > lib/netdev-dpdk.c | 727 +------------------------------------------ > > lib/netdev-rte-offloads.c | 775 > ++++++++++++++++++++++++++++++++++++++++++++++ > > lib/netdev-rte-offloads.h | 38 +++ > > 4 files changed, 817 insertions(+), 727 deletions(-) > > create mode 100644 lib/netdev-rte-offloads.c > > create mode 100644 lib/netdev-rte-offloads.h > > > > diff --git a/lib/automake.mk b/lib/automake.mk > > index bae032b..cc5dccf 100644 > > --- a/lib/automake.mk > > +++ b/lib/automake.mk > > @@ -138,6 +138,7 @@ lib_libopenvswitch_la_SOURCES = \ > > lib/netdev-dpdk.h \ > > lib/netdev-dummy.c \ > > lib/netdev-provider.h \ > > + lib/netdev-rte-offloads.h \ > > lib/netdev-vport.c \ > > lib/netdev-vport.h \ > > lib/netdev-vport-private.h \ > > @@ -411,7 +412,8 @@ endif > > if DPDK_NETDEV > > lib_libopenvswitch_la_SOURCES += \ > > lib/dpdk.c \ > > - lib/netdev-dpdk.c > > + lib/netdev-dpdk.c \ > > + lib/netdev-rte-offloads.c > > else > > lib_libopenvswitch_la_SOURCES += \ > > lib/dpdk-stub.c > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > > index 163d4ec..c4228d2 100644 > > --- a/lib/netdev-dpdk.c > > +++ b/lib/netdev-dpdk.c > > @@ -47,6 +47,7 @@ > > #include "dpif-netdev.h" > > #include "fatal-signal.h" > > #include "netdev-provider.h" > > +#include "netdev-rte-offloads.h" > > #include "netdev-vport.h" > > #include "odp-util.h" > > #include "openvswitch/dynamic-string.h" > > @@ -179,17 +180,6 @@ static const struct rte_eth_conf port_conf = { > > }; > > > > /* > > - * A mapping from ufid to dpdk rte_flow. > > - */ > > -static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; > > - > > -struct ufid_to_rte_flow_data { > > - struct cmap_node node; > > - ovs_u128 ufid; > > - struct rte_flow *rte_flow; > > -}; > > - > > -/* > > * These callbacks allow virtio-net devices to be added to vhost ports when > > * configuration has been fully completed. > > */ > > @@ -4240,721 +4230,6 @@ netdev_dpdk_rte_flow_create(struct netdev > *netdev, > > return flow; > > } > > > > -/* Find rte_flow with @ufid */ > > -static struct rte_flow * > > -ufid_to_rte_flow_find(const ovs_u128 *ufid) { > > - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > > - struct ufid_to_rte_flow_data *data; > > - > > - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { > > - if (ovs_u128_equals(*ufid, data->ufid)) { > > - return data->rte_flow; > > - } > > - } > > - > > - return NULL; > > -} > > - > > -static inline void > > -ufid_to_rte_flow_associate(const ovs_u128 *ufid, > > - struct rte_flow *rte_flow) { > > - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > > - struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); > > - > > - /* > > - * We should not simply overwrite an existing rte flow. > > - * We should have deleted it first before re-adding it. > > - * Thus, if following assert triggers, something is wrong: > > - * the rte_flow is not destroyed. > > - */ > > - ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); > > - > > - data->ufid = *ufid; > > - data->rte_flow = rte_flow; > > - > > - cmap_insert(&ufid_to_rte_flow, > > - CONST_CAST(struct cmap_node *, &data->node), hash); > > -} > > - > > -static inline void > > -ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { > > - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > > - struct ufid_to_rte_flow_data *data; > > - > > - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { > > - if (ovs_u128_equals(*ufid, data->ufid)) { > > - cmap_remove(&ufid_to_rte_flow, > > - CONST_CAST(struct cmap_node *, &data->node), hash); > > - ovsrcu_postpone(free, data); > > - return; > > - } > > - } > > - > > - VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", > > - UUID_ARGS((struct uuid *)ufid)); > > -} > > - > > -/* > > - * To avoid individual xrealloc calls for each new element, a 'curent_max' > > - * is used to keep track of current allocated number of elements. Starts > > - * by 8 and doubles on each xrealloc call > > - */ > > -struct flow_patterns { > > - struct rte_flow_item *items; > > - int cnt; > > - int current_max; > > -}; > > - > > -struct flow_actions { > > - struct rte_flow_action *actions; > > - int cnt; > > - int current_max; > > -}; > > - > > -static void > > -dump_flow_pattern(struct rte_flow_item *item) > > -{ > > - struct ds s; > > - > > - if (!VLOG_IS_DBG_ENABLED() || item->type == > RTE_FLOW_ITEM_TYPE_END) { > > - return; > > - } > > - > > - ds_init(&s); > > - > > - if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { > > - const struct rte_flow_item_eth *eth_spec = item->spec; > > - const struct rte_flow_item_eth *eth_mask = item->mask; > > - > > - ds_put_cstr(&s, "rte flow eth pattern:\n"); > > - if (eth_spec) { > > - ds_put_format(&s, > > - " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > > - "type=0x%04" PRIx16"\n", > > - ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), > > - ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), > > - ntohs(eth_spec->type)); > > - } else { > > - ds_put_cstr(&s, " Spec = null\n"); > > - } > > - if (eth_mask) { > > - ds_put_format(&s, > > - " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > > - "type=0x%04"PRIx16"\n", > > - ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), > > - ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), > > - eth_mask->type); > > - } else { > > - ds_put_cstr(&s, " Mask = null\n"); > > - } > > - } > > - > > - if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { > > - const struct rte_flow_item_vlan *vlan_spec = item->spec; > > - const struct rte_flow_item_vlan *vlan_mask = item->mask; > > - > > - ds_put_cstr(&s, "rte flow vlan pattern:\n"); > > - if (vlan_spec) { > > - ds_put_format(&s, > > - " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > > - ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); > > - } else { > > - ds_put_cstr(&s, " Spec = null\n"); > > - } > > - > > - if (vlan_mask) { > > - ds_put_format(&s, > > - " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > > - ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); > > - } else { > > - ds_put_cstr(&s, " Mask = null\n"); > > - } > > - } > > - > > - if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { > > - const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; > > - const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; > > - > > - ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); > > - if (ipv4_spec) { > > - ds_put_format(&s, > > - " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > > - ", src="IP_FMT", dst="IP_FMT"\n", > > - ipv4_spec->hdr.type_of_service, > > - ipv4_spec->hdr.time_to_live, > > - ipv4_spec->hdr.next_proto_id, > > - IP_ARGS(ipv4_spec->hdr.src_addr), > > - IP_ARGS(ipv4_spec->hdr.dst_addr)); > > - } else { > > - ds_put_cstr(&s, " Spec = null\n"); > > - } > > - if (ipv4_mask) { > > - ds_put_format(&s, > > - " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > > - ", src="IP_FMT", dst="IP_FMT"\n", > > - ipv4_mask->hdr.type_of_service, > > - ipv4_mask->hdr.time_to_live, > > - ipv4_mask->hdr.next_proto_id, > > - IP_ARGS(ipv4_mask->hdr.src_addr), > > - IP_ARGS(ipv4_mask->hdr.dst_addr)); > > - } else { > > - ds_put_cstr(&s, " Mask = null\n"); > > - } > > - } > > - > > - if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { > > - const struct rte_flow_item_udp *udp_spec = item->spec; > > - const struct rte_flow_item_udp *udp_mask = item->mask; > > - > > - ds_put_cstr(&s, "rte flow udp pattern:\n"); > > - if (udp_spec) { > > - ds_put_format(&s, > > - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > > - ntohs(udp_spec->hdr.src_port), > > - ntohs(udp_spec->hdr.dst_port)); > > - } else { > > - ds_put_cstr(&s, " Spec = null\n"); > > - } > > - if (udp_mask) { > > - ds_put_format(&s, > > - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > > - udp_mask->hdr.src_port, > > - udp_mask->hdr.dst_port); > > - } else { > > - ds_put_cstr(&s, " Mask = null\n"); > > - } > > - } > > - > > - if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { > > - const struct rte_flow_item_sctp *sctp_spec = item->spec; > > - const struct rte_flow_item_sctp *sctp_mask = item->mask; > > - > > - ds_put_cstr(&s, "rte flow sctp pattern:\n"); > > - if (sctp_spec) { > > - ds_put_format(&s, > > - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > > - ntohs(sctp_spec->hdr.src_port), > > - ntohs(sctp_spec->hdr.dst_port)); > > - } else { > > - ds_put_cstr(&s, " Spec = null\n"); > > - } > > - if (sctp_mask) { > > - ds_put_format(&s, > > - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > > - sctp_mask->hdr.src_port, > > - sctp_mask->hdr.dst_port); > > - } else { > > - ds_put_cstr(&s, " Mask = null\n"); > > - } > > - } > > - > > - if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { > > - const struct rte_flow_item_icmp *icmp_spec = item->spec; > > - const struct rte_flow_item_icmp *icmp_mask = item->mask; > > - > > - ds_put_cstr(&s, "rte flow icmp pattern:\n"); > > - if (icmp_spec) { > > - ds_put_format(&s, > > - " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", > > - icmp_spec->hdr.icmp_type, > > - icmp_spec->hdr.icmp_code); > > - } else { > > - ds_put_cstr(&s, " Spec = null\n"); > > - } > > - if (icmp_mask) { > > - ds_put_format(&s, > > - " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", > > - icmp_spec->hdr.icmp_type, > > - icmp_spec->hdr.icmp_code); > > - } else { > > - ds_put_cstr(&s, " Mask = null\n"); > > - } > > - } > > - > > - if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { > > - const struct rte_flow_item_tcp *tcp_spec = item->spec; > > - const struct rte_flow_item_tcp *tcp_mask = item->mask; > > - > > - ds_put_cstr(&s, "rte flow tcp pattern:\n"); > > - if (tcp_spec) { > > - ds_put_format(&s, > > - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 > > - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > > - ntohs(tcp_spec->hdr.src_port), > > - ntohs(tcp_spec->hdr.dst_port), > > - tcp_spec->hdr.data_off, > > - tcp_spec->hdr.tcp_flags); > > - } else { > > - ds_put_cstr(&s, " Spec = null\n"); > > - } > > - if (tcp_mask) { > > - ds_put_format(&s, > > - " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 > > - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > > - tcp_mask->hdr.src_port, > > - tcp_mask->hdr.dst_port, > > - tcp_mask->hdr.data_off, > > - tcp_mask->hdr.tcp_flags); > > - } else { > > - ds_put_cstr(&s, " Mask = null\n"); > > - } > > - } > > - > > - VLOG_DBG("%s", ds_cstr(&s)); > > - ds_destroy(&s); > > -} > > - > > -static void > > -add_flow_pattern(struct flow_patterns *patterns, enum > rte_flow_item_type type, > > - const void *spec, const void *mask) { > > - int cnt = patterns->cnt; > > - > > - if (cnt == 0) { > > - patterns->current_max = 8; > > - patterns->items = xcalloc(patterns->current_max, > > - sizeof(struct rte_flow_item)); > > - } else if (cnt == patterns->current_max) { > > - patterns->current_max *= 2; > > - patterns->items = xrealloc(patterns->items, patterns->current_max * > > - sizeof(struct rte_flow_item)); > > - } > > - > > - patterns->items[cnt].type = type; > > - patterns->items[cnt].spec = spec; > > - patterns->items[cnt].mask = mask; > > - patterns->items[cnt].last = NULL; > > - dump_flow_pattern(&patterns->items[cnt]); > > - patterns->cnt++; > > -} > > - > > -static void > > -add_flow_action(struct flow_actions *actions, enum > rte_flow_action_type type, > > - const void *conf) > > -{ > > - int cnt = actions->cnt; > > - > > - if (cnt == 0) { > > - actions->current_max = 8; > > - actions->actions = xcalloc(actions->current_max, > > - sizeof(struct rte_flow_action)); > > - } else if (cnt == actions->current_max) { > > - actions->current_max *= 2; > > - actions->actions = xrealloc(actions->actions, actions->current_max * > > - sizeof(struct rte_flow_action)); > > - } > > - > > - actions->actions[cnt].type = type; > > - actions->actions[cnt].conf = conf; > > - actions->cnt++; > > -} > > - > > -struct action_rss_data { > > - struct rte_flow_action_rss conf; > > - uint16_t queue[0]; > > -}; > > - > > -static struct action_rss_data * > > -add_flow_rss_action(struct flow_actions *actions, > > - struct netdev *netdev) { > > - int i; > > - struct action_rss_data *rss_data; > > - > > - rss_data = xmalloc(sizeof(struct action_rss_data) + > > - sizeof(uint16_t) * netdev->n_rxq); > > - *rss_data = (struct action_rss_data) { > > - .conf = (struct rte_flow_action_rss) { > > - .func = RTE_ETH_HASH_FUNCTION_DEFAULT, > > - .level = 0, > > - .types = 0, > > - .queue_num = netdev->n_rxq, > > - .queue = rss_data->queue, > > - .key_len = 0, > > - .key = NULL > > - }, > > - }; > > - > > - /* Override queue array with default */ > > - for (i = 0; i < netdev->n_rxq; i++) { > > - rss_data->queue[i] = i; > > - } > > - > > - add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data- > >conf); > > - > > - return rss_data; > > -} > > - > > -static int > > -netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, > > - const struct match *match, > > - struct nlattr *nl_actions OVS_UNUSED, > > - size_t actions_len OVS_UNUSED, > > - const ovs_u128 *ufid, > > - struct offload_info *info) { > > - const struct rte_flow_attr flow_attr = { > > - .group = 0, > > - .priority = 0, > > - .ingress = 1, > > - .egress = 0 > > - }; > > - struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; > > - struct flow_actions actions = { .actions = NULL, .cnt = 0 }; > > - struct rte_flow *flow; > > - struct rte_flow_error error; > > - uint8_t *ipv4_next_proto_mask = NULL; > > - int ret = 0; > > - > > - /* Eth */ > > - struct rte_flow_item_eth eth_spec; > > - struct rte_flow_item_eth eth_mask; > > - if (!eth_addr_is_zero(match->wc.masks.dl_src) || > > - !eth_addr_is_zero(match->wc.masks.dl_dst)) { > > - memset(ð_spec, 0, sizeof eth_spec); > > - memset(ð_mask, 0, sizeof eth_mask); > > - rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof > eth_spec.dst); > > - rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof > eth_spec.src); > > - eth_spec.type = match->flow.dl_type; > > - > > - rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, > > - sizeof eth_mask.dst); > > - rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, > > - sizeof eth_mask.src); > > - eth_mask.type = match->wc.masks.dl_type; > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, > > - ð_spec, ð_mask); > > - } else { > > - /* > > - * If user specifies a flow (like UDP flow) without L2 patterns, > > - * OVS will at least set the dl_type. Normally, it's enough to > > - * create an eth pattern just with it. Unluckily, some Intel's > > - * NIC (such as XL710) doesn't support that. Below is a workaround, > > - * which simply matches any L2 pkts. > > - */ > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, > NULL); > > - } > > - > > - /* VLAN */ > > - struct rte_flow_item_vlan vlan_spec; > > - struct rte_flow_item_vlan vlan_mask; > > - if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { > > - memset(&vlan_spec, 0, sizeof vlan_spec); > > - memset(&vlan_mask, 0, sizeof vlan_mask); > > - vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); > > - vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); > > - > > - /* match any protocols */ > > - vlan_mask.inner_type = 0; > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, > > - &vlan_spec, &vlan_mask); > > - } > > - > > - /* IP v4 */ > > - uint8_t proto = 0; > > - struct rte_flow_item_ipv4 ipv4_spec; > > - struct rte_flow_item_ipv4 ipv4_mask; > > - if (match->flow.dl_type == htons(ETH_TYPE_IP)) { > > - memset(&ipv4_spec, 0, sizeof ipv4_spec); > > - memset(&ipv4_mask, 0, sizeof ipv4_mask); > > - > > - ipv4_spec.hdr.type_of_service = match->flow.nw_tos; > > - ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; > > - ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; > > - ipv4_spec.hdr.src_addr = match->flow.nw_src; > > - ipv4_spec.hdr.dst_addr = match->flow.nw_dst; > > - > > - ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; > > - ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; > > - ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; > > - ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; > > - ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, > > - &ipv4_spec, &ipv4_mask); > > - > > - /* Save proto for L4 protocol setup */ > > - proto = ipv4_spec.hdr.next_proto_id & > > - ipv4_mask.hdr.next_proto_id; > > - > > - /* Remember proto mask address for later modification */ > > - ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; > > - } > > - > > - if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && > > - proto != IPPROTO_SCTP && proto != IPPROTO_TCP && > > - (match->wc.masks.tp_src || > > - match->wc.masks.tp_dst || > > - match->wc.masks.tcp_flags)) { > > - VLOG_DBG("L4 Protocol (%u) not supported", proto); > > - ret = -1; > > - goto out; > > - } > > - > > - if ((match->wc.masks.tp_src && match->wc.masks.tp_src != > OVS_BE16_MAX) || > > - (match->wc.masks.tp_dst && match->wc.masks.tp_dst != > OVS_BE16_MAX)) { > > - ret = -1; > > - goto out; > > - } > > - > > - struct rte_flow_item_tcp tcp_spec; > > - struct rte_flow_item_tcp tcp_mask; > > - if (proto == IPPROTO_TCP) { > > - memset(&tcp_spec, 0, sizeof tcp_spec); > > - memset(&tcp_mask, 0, sizeof tcp_mask); > > - tcp_spec.hdr.src_port = match->flow.tp_src; > > - tcp_spec.hdr.dst_port = match->flow.tp_dst; > > - tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; > > - tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; > > - > > - tcp_mask.hdr.src_port = match->wc.masks.tp_src; > > - tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; > > - tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; > > - tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, > > - &tcp_spec, &tcp_mask); > > - > > - /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */ > > - if (ipv4_next_proto_mask) { > > - *ipv4_next_proto_mask = 0; > > - } > > - goto end_proto_check; > > - } > > - > > - struct rte_flow_item_udp udp_spec; > > - struct rte_flow_item_udp udp_mask; > > - if (proto == IPPROTO_UDP) { > > - memset(&udp_spec, 0, sizeof udp_spec); > > - memset(&udp_mask, 0, sizeof udp_mask); > > - udp_spec.hdr.src_port = match->flow.tp_src; > > - udp_spec.hdr.dst_port = match->flow.tp_dst; > > - > > - udp_mask.hdr.src_port = match->wc.masks.tp_src; > > - udp_mask.hdr.dst_port = match->wc.masks.tp_dst; > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, > > - &udp_spec, &udp_mask); > > - > > - /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match > */ > > - if (ipv4_next_proto_mask) { > > - *ipv4_next_proto_mask = 0; > > - } > > - goto end_proto_check; > > - } > > - > > - struct rte_flow_item_sctp sctp_spec; > > - struct rte_flow_item_sctp sctp_mask; > > - if (proto == IPPROTO_SCTP) { > > - memset(&sctp_spec, 0, sizeof sctp_spec); > > - memset(&sctp_mask, 0, sizeof sctp_mask); > > - sctp_spec.hdr.src_port = match->flow.tp_src; > > - sctp_spec.hdr.dst_port = match->flow.tp_dst; > > - > > - sctp_mask.hdr.src_port = match->wc.masks.tp_src; > > - sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, > > - &sctp_spec, &sctp_mask); > > - > > - /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match > */ > > - if (ipv4_next_proto_mask) { > > - *ipv4_next_proto_mask = 0; > > - } > > - goto end_proto_check; > > - } > > - > > - struct rte_flow_item_icmp icmp_spec; > > - struct rte_flow_item_icmp icmp_mask; > > - if (proto == IPPROTO_ICMP) { > > - memset(&icmp_spec, 0, sizeof icmp_spec); > > - memset(&icmp_mask, 0, sizeof icmp_mask); > > - icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); > > - icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); > > - > > - icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match- > >wc.masks.tp_src); > > - icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match- > >wc.masks.tp_dst); > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, > > - &icmp_spec, &icmp_mask); > > - > > - /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto > match */ > > - if (ipv4_next_proto_mask) { > > - *ipv4_next_proto_mask = 0; > > - } > > - goto end_proto_check; > > - } > > - > > -end_proto_check: > > - > > - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, > NULL); > > - > > - struct rte_flow_action_mark mark; > > - struct action_rss_data *rss; > > - > > - mark.id = info->flow_mark; > > - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); > > - > > - > > - rss = add_flow_rss_action(&actions, netdev); > > - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); > > - > > - flow = netdev_dpdk_rte_flow_create(netdev, > &flow_attr,patterns.items, > > - actions.actions, &error); > > - > > - free(rss); > > - if (!flow) { > > - VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", > > - netdev_get_name(netdev), error.type, error.message); > > - ret = -1; > > - goto out; > > - } > > - ufid_to_rte_flow_associate(ufid, flow); > > - VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", > > - netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); > > - > > -out: > > - free(patterns.items); > > - free(actions.actions); > > - return ret; > > -} > > - > > -/* > > - * Check if any unsupported flow patterns are specified. > > - */ > > -static int > > -netdev_dpdk_validate_flow(const struct match *match) { > > - struct match match_zero_wc; > > - const struct flow *masks = &match->wc.masks; > > - > > - /* Create a wc-zeroed version of flow */ > > - match_init(&match_zero_wc, &match->flow, &match->wc); > > - > > - if (!is_all_zeros(&match_zero_wc.flow.tunnel, > > - sizeof match_zero_wc.flow.tunnel)) { > > - goto err; > > - } > > - > > - if (masks->metadata || masks->skb_priority || > > - masks->pkt_mark || masks->dp_hash) { > > - goto err; > > - } > > - > > - /* recirc id must be zero */ > > - if (match_zero_wc.flow.recirc_id) { > > - goto err; > > - } > > - > > - if (masks->ct_state || masks->ct_nw_proto || > > - masks->ct_zone || masks->ct_mark || > > - !ovs_u128_is_zero(masks->ct_label)) { > > - goto err; > > - } > > - > > - if (masks->conj_id || masks->actset_output) { > > - goto err; > > - } > > - > > - /* unsupported L2 */ > > - if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) { > > - goto err; > > - } > > - > > - /* unsupported L3 */ > > - if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst || > > - !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) || > > - !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) || > > - !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) || > > - !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) || > > - !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) || > > - !is_all_zeros(&masks->nsh, sizeof masks->nsh) || > > - !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) || > > - !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) { > > - goto err; > > - } > > - > > - /* If fragmented, then don't HW accelerate - for now */ > > - if (match_zero_wc.flow.nw_frag) { > > - goto err; > > - } > > - > > - /* unsupported L4 */ > > - if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) { > > - goto err; > > - } > > - > > - return 0; > > - > > -err: > > - VLOG_ERR("cannot HW accelerate this flow due to unsupported > protocols"); > > - return -1; > > -} > > - > > -static int > > -netdev_dpdk_destroy_rte_flow(struct netdev *netdev, > > - const ovs_u128 *ufid, > > - struct rte_flow *rte_flow) { > > - struct rte_flow_error error; > > - int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); > > - > > - if (ret == 0) { > > - ufid_to_rte_flow_disassociate(ufid); > > - VLOG_DBG("%s: removed rte flow %p associated with ufid " > UUID_FMT "\n", > > - netdev_get_name(netdev), rte_flow, > > - UUID_ARGS((struct uuid *)ufid)); > > - } else { > > - VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", > > - netdev_get_name(netdev), error.type, error.message); > > - } > > - > > - return ret; > > -} > > - > > -static int > > -netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > > - struct nlattr *actions, size_t actions_len, > > - const ovs_u128 *ufid, struct offload_info *info, > > - struct dpif_flow_stats *stats OVS_UNUSED) { > > - struct rte_flow *rte_flow; > > - int ret; > > - > > - /* > > - * If an old rte_flow exists, it means it's a flow modification. > > - * Here destroy the old rte flow first before adding a new one. > > - */ > > - rte_flow = ufid_to_rte_flow_find(ufid); > > - if (rte_flow) { > > - ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > > - if (ret < 0) { > > - return ret; > > - } > > - } > > - > > - ret = netdev_dpdk_validate_flow(match); > > - if (ret < 0) { > > - return ret; > > - } > > - > > - return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, > > - actions_len, ufid, info); > > -} > > - > > -static int > > -netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > > - struct dpif_flow_stats *stats OVS_UNUSED) { > > - > > - struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); > > - > > - if (!rte_flow) { > > - return -1; > > - } > > - > > - return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > > -} > > - > > -#define DPDK_FLOW_OFFLOAD_API \ > > - .flow_put = netdev_dpdk_flow_put, \ > > - .flow_del = netdev_dpdk_flow_del > > - > > #define NETDEV_DPDK_CLASS_COMMON \ > > .is_pmd = true, \ > > .alloc = netdev_dpdk_alloc, \ > > diff --git a/lib/netdev-rte-offloads.c b/lib/netdev-rte-offloads.c > > new file mode 100644 > > index 0000000..0212156 > > --- /dev/null > > +++ b/lib/netdev-rte-offloads.c > > @@ -0,0 +1,775 @@ > > +/* > > + * Copyright (c) 2019 Mellanox Technologies, Ltd. > > I'm not a layer but it seems strange to have above copyright for the > file with copy-pasted code written by other people. > However, I'm not sure how this should look like. > > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww > w.apache.org%2Flicenses%2FLICENSE- > 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 > 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 > 36863627943149395&sdata=GFoau2tQaeBk6KwZcleg4lZcbN%2B11WIBb > Xb%2FfE50eE0%3D&reserved=0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express > or implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > +#include <config.h> > > Include of "netdev-rte-offloads.h" should be here. > Blank line. > > > +#include <rte_flow.h> > > Blank line. > > > +#include "netdev-rte-offloads.h" > > +#include "netdev-provider.h" > > +#include "dpif-netdev.h" > > Do you really need above header ? > > > +#include "cmap.h" > > +#include "openvswitch/vlog.h" > > +#include "openvswitch/match.h" > > +#include "packets.h" > > +#include "uuid.h" > > Above headers should go in alphabetical order. > > For the style guidelines please refer: > Documentation/internals/contributing/coding-style.rst > > > + > > +VLOG_DEFINE_THIS_MODULE(netdev_rte_offloads); > > + > > +/* > > + * A mapping from ufid to dpdk rte_flow. > > + */ > > +static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; > > + > > +struct ufid_to_rte_flow_data { > > + struct cmap_node node; > > + ovs_u128 ufid; > > + struct rte_flow *rte_flow; > > +}; > > + > > + > > +/* Find rte_flow with @ufid */ > > +static struct rte_flow * > > +ufid_to_rte_flow_find(const ovs_u128 *ufid) { > > The '{' should go on a next line in function definition. > > > + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > > + struct ufid_to_rte_flow_data *data; > > + > > + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) > { > > + if (ovs_u128_equals(*ufid, data->ufid)) { > > + return data->rte_flow; > > + } > > + } > > + > > + return NULL; > > +} > > + > > +static inline void > > +ufid_to_rte_flow_associate(const ovs_u128 *ufid, > > + struct rte_flow *rte_flow) { > > Same. > > > + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > > + struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); > > No need to parenthesize the operand of sizeof. > > > + > > + /* > > + * We should not simply overwrite an existing rte flow. > > + * We should have deleted it first before re-adding it. > > + * Thus, if following assert triggers, something is wrong: > > + * the rte_flow is not destroyed. > > + */ > > + ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); > > + > > + data->ufid = *ufid; > > + data->rte_flow = rte_flow; > > + > > + cmap_insert(&ufid_to_rte_flow, > > + CONST_CAST(struct cmap_node *, &data->node), hash); > > +} > > + > > +static inline void > > +ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { > > Same. > > > + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > > sizeof *ufid > > > + struct ufid_to_rte_flow_data *data; > > + > > + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) > { > > + if (ovs_u128_equals(*ufid, data->ufid)) { > > + cmap_remove(&ufid_to_rte_flow, > > + CONST_CAST(struct cmap_node *, &data->node), hash); > > + ovsrcu_postpone(free, data); > > + return; > > + } > > + } > > + > > + VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", > > + UUID_ARGS((struct uuid *)ufid)); > > Space needed between the cast and variable: '(struct uuid *) ufid'. > > > +} > > + > > +/* > > + * To avoid individual xrealloc calls for each new element, a 'curent_max' > > + * is used to keep track of current allocated number of elements. Starts > > + * by 8 and doubles on each xrealloc call > > + */ > > +struct flow_patterns { > > + struct rte_flow_item *items; > > + int cnt; > > + int current_max; > > +}; > > + > > +struct flow_actions { > > + struct rte_flow_action *actions; > > + int cnt; > > + int current_max; > > +}; > > + > > +static void > > +dump_flow_pattern(struct rte_flow_item *item) > > +{ > > + struct ds s; > > + > > + if (!VLOG_IS_DBG_ENABLED() || item->type == > RTE_FLOW_ITEM_TYPE_END) { > > + return; > > + } > > + > > + ds_init(&s); > > + > > + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { > > + const struct rte_flow_item_eth *eth_spec = item->spec; > > + const struct rte_flow_item_eth *eth_mask = item->mask; > > + > > + ds_put_cstr(&s, "rte flow eth pattern:\n"); > > + if (eth_spec) { > > + ds_put_format(&s, > > + " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > > + "type=0x%04" PRIx16"\n", > > + ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), > > + ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), > > + ntohs(eth_spec->type)); > > + } else { > > + ds_put_cstr(&s, " Spec = null\n"); > > + } > > + if (eth_mask) { > > + ds_put_format(&s, > > + " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > > + "type=0x%04"PRIx16"\n", > > + ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), > > + ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), > > + eth_mask->type); > > + } else { > > + ds_put_cstr(&s, " Mask = null\n"); > > + } > > + } > > + > > + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { > > + const struct rte_flow_item_vlan *vlan_spec = item->spec; > > + const struct rte_flow_item_vlan *vlan_mask = item->mask; > > + > > + ds_put_cstr(&s, "rte flow vlan pattern:\n"); > > + if (vlan_spec) { > > + ds_put_format(&s, > > + " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > > + ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); > > + } else { > > + ds_put_cstr(&s, " Spec = null\n"); > > + } > > + > > + if (vlan_mask) { > > + ds_put_format(&s, > > + " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > > + ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); > > + } else { > > + ds_put_cstr(&s, " Mask = null\n"); > > + } > > + } > > + > > + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { > > + const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; > > + const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; > > + > > + ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); > > + if (ipv4_spec) { > > + ds_put_format(&s, > > + " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > > + ", src="IP_FMT", dst="IP_FMT"\n", > > + ipv4_spec->hdr.type_of_service, > > + ipv4_spec->hdr.time_to_live, > > + ipv4_spec->hdr.next_proto_id, > > + IP_ARGS(ipv4_spec->hdr.src_addr), > > + IP_ARGS(ipv4_spec->hdr.dst_addr)); > > + } else { > > + ds_put_cstr(&s, " Spec = null\n"); > > + } > > + if (ipv4_mask) { > > + ds_put_format(&s, > > + " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > > + ", src="IP_FMT", dst="IP_FMT"\n", > > + ipv4_mask->hdr.type_of_service, > > + ipv4_mask->hdr.time_to_live, > > + ipv4_mask->hdr.next_proto_id, > > + IP_ARGS(ipv4_mask->hdr.src_addr), > > + IP_ARGS(ipv4_mask->hdr.dst_addr)); > > + } else { > > + ds_put_cstr(&s, " Mask = null\n"); > > + } > > + } > > + > > + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { > > + const struct rte_flow_item_udp *udp_spec = item->spec; > > + const struct rte_flow_item_udp *udp_mask = item->mask; > > + > > + ds_put_cstr(&s, "rte flow udp pattern:\n"); > > + if (udp_spec) { > > + ds_put_format(&s, > > + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > > + ntohs(udp_spec->hdr.src_port), > > + ntohs(udp_spec->hdr.dst_port)); > > + } else { > > + ds_put_cstr(&s, " Spec = null\n"); > > + } > > + if (udp_mask) { > > + ds_put_format(&s, > > + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > > + udp_mask->hdr.src_port, > > + udp_mask->hdr.dst_port); > > + } else { > > + ds_put_cstr(&s, " Mask = null\n"); > > + } > > + } > > + > > + if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { > > + const struct rte_flow_item_sctp *sctp_spec = item->spec; > > + const struct rte_flow_item_sctp *sctp_mask = item->mask; > > + > > + ds_put_cstr(&s, "rte flow sctp pattern:\n"); > > + if (sctp_spec) { > > + ds_put_format(&s, > > + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > > + ntohs(sctp_spec->hdr.src_port), > > + ntohs(sctp_spec->hdr.dst_port)); > > + } else { > > + ds_put_cstr(&s, " Spec = null\n"); > > + } > > + if (sctp_mask) { > > + ds_put_format(&s, > > + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > > + sctp_mask->hdr.src_port, > > + sctp_mask->hdr.dst_port); > > + } else { > > + ds_put_cstr(&s, " Mask = null\n"); > > + } > > + } > > + > > + if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { > > + const struct rte_flow_item_icmp *icmp_spec = item->spec; > > + const struct rte_flow_item_icmp *icmp_mask = item->mask; > > + > > + ds_put_cstr(&s, "rte flow icmp pattern:\n"); > > + if (icmp_spec) { > > + ds_put_format(&s, > > + " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", > > + icmp_spec->hdr.icmp_type, > > + icmp_spec->hdr.icmp_code); > > + } else { > > + ds_put_cstr(&s, " Spec = null\n"); > > + } > > + if (icmp_mask) { > > + ds_put_format(&s, > > + " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", > > + icmp_spec->hdr.icmp_type, > > + icmp_spec->hdr.icmp_code); > > + } else { > > + ds_put_cstr(&s, " Mask = null\n"); > > + } > > + } > > + > > + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { > > + const struct rte_flow_item_tcp *tcp_spec = item->spec; > > + const struct rte_flow_item_tcp *tcp_mask = item->mask; > > + > > + ds_put_cstr(&s, "rte flow tcp pattern:\n"); > > + if (tcp_spec) { > > + ds_put_format(&s, > > + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 > > + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > > + ntohs(tcp_spec->hdr.src_port), > > + ntohs(tcp_spec->hdr.dst_port), > > + tcp_spec->hdr.data_off, > > + tcp_spec->hdr.tcp_flags); > > + } else { > > + ds_put_cstr(&s, " Spec = null\n"); > > + } > > + if (tcp_mask) { > > + ds_put_format(&s, > > + " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 > > + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > > + tcp_mask->hdr.src_port, > > + tcp_mask->hdr.dst_port, > > + tcp_mask->hdr.data_off, > > + tcp_mask->hdr.tcp_flags); > > + } else { > > + ds_put_cstr(&s, " Mask = null\n"); > > + } > > + } > > + > > + VLOG_DBG("%s", ds_cstr(&s)); > > + ds_destroy(&s); > > +} > > + > > +static void > > +add_flow_pattern(struct flow_patterns *patterns, enum > rte_flow_item_type type, > > + const void *spec, const void *mask) { > > ditto > > > + int cnt = patterns->cnt; > > + > > + if (cnt == 0) { > > + patterns->current_max = 8; > > + patterns->items = xcalloc(patterns->current_max, > > + sizeof(struct rte_flow_item)); > > sizeof *patterns->items > > > + } else if (cnt == patterns->current_max) { > > + patterns->current_max *= 2; > > + patterns->items = xrealloc(patterns->items, patterns->current_max * > > + sizeof(struct rte_flow_item)); > > sizeof *patterns->items > > > + } > > + > > + patterns->items[cnt].type = type; > > + patterns->items[cnt].spec = spec; > > + patterns->items[cnt].mask = mask; > > + patterns->items[cnt].last = NULL; > > + dump_flow_pattern(&patterns->items[cnt]); > > + patterns->cnt++; > > +} > > + > > +static void > > +add_flow_action(struct flow_actions *actions, enum > rte_flow_action_type type, > > + const void *conf) > > +{ > > + int cnt = actions->cnt; > > + > > + if (cnt == 0) { > > + actions->current_max = 8; > > + actions->actions = xcalloc(actions->current_max, > > + sizeof(struct rte_flow_action)); > > sizeof *actions->actions > > > + } else if (cnt == actions->current_max) { > > + actions->current_max *= 2; > > + actions->actions = xrealloc(actions->actions, actions->current_max * > > + sizeof(struct rte_flow_action)); > > sizeof *actions->actions > > > + } > > + > > + actions->actions[cnt].type = type; > > + actions->actions[cnt].conf = conf; > > + actions->cnt++; > > +} > > + > > +struct action_rss_data { > > + struct rte_flow_action_rss conf; > > + uint16_t queue[0]; > > +}; > > + > > +static struct action_rss_data * > > +add_flow_rss_action(struct flow_actions *actions, > > + struct netdev *netdev) { > > ditto > > > + int i; > > + struct action_rss_data *rss_data; > > + > > + rss_data = xmalloc(sizeof(struct action_rss_data) + > > + sizeof(uint16_t) * netdev->n_rxq); > > sizeof *rss_data + netdev_n_rxq(netdev) * sizeof rss_data->queue[0] > > As we're not inside the netdev we need to use getter 'netdev_n_rxq'. > > > + *rss_data = (struct action_rss_data) { > > + .conf = (struct rte_flow_action_rss) { > > + .func = RTE_ETH_HASH_FUNCTION_DEFAULT, > > + .level = 0, > > + .types = 0, > > + .queue_num = netdev->n_rxq, > > netdev_n_rxq(netdev) > > > + .queue = rss_data->queue, > > + .key_len = 0, > > + .key = NULL > > + }, > > + }; > > + > > + /* Override queue array with default */ > > + for (i = 0; i < netdev->n_rxq; i++) { > > netdev_n_rxq(netdev) > > > + rss_data->queue[i] = i; > > + } > > + > > + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data- > >conf); > > + > > + return rss_data; > > +} > > + > > +static int > > +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, > > + const struct match *match, > > + struct nlattr *nl_actions OVS_UNUSED, > > + size_t actions_len OVS_UNUSED, > > + const ovs_u128 *ufid, > > + struct offload_info *info) { > > ditto > > > I'll skip the rest of this function as I already have a path with refactoring: > > https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpat > chwork.ozlabs.org%2Fpatch%2F1037609%2F&data=02%7C01%7Croniba > %40mellanox.com%7C1cbceab6b5394a306a9e08d69818689d%7Ca652971c7d2 > e4d9ba6a4d149256f461b%7C0%7C0%7C636863627943149395&sdata=Z7 > uBounMbAB6QF0LivTDkOjfyy%2B%2BrudgT96z77glGAE%3D&reserved= > 0 > > > + const struct rte_flow_attr flow_attr = { > > + .group = 0, > > + .priority = 0, > > + .ingress = 1, > > + .egress = 0 > > + }; > > + struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; > > + struct flow_actions actions = { .actions = NULL, .cnt = 0 }; > > + struct rte_flow *flow; > > + struct rte_flow_error error; > > + uint8_t *ipv4_next_proto_mask = NULL; > > + int ret = 0; > > + > > + /* Eth */ > > + struct rte_flow_item_eth eth_spec; > > + struct rte_flow_item_eth eth_mask; > > + if (!eth_addr_is_zero(match->wc.masks.dl_src) || > > + !eth_addr_is_zero(match->wc.masks.dl_dst)) { > > + memset(ð_spec, 0, sizeof eth_spec); > > + memset(ð_mask, 0, sizeof eth_mask); > > + rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof > eth_spec.dst); > > + rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof > eth_spec.src); > > + eth_spec.type = match->flow.dl_type; > > + > > + rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, > > + sizeof eth_mask.dst); > > + rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, > > + sizeof eth_mask.src); > > + eth_mask.type = match->wc.masks.dl_type; > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, > > + ð_spec, ð_mask); > > + } else { > > + /* > > + * If user specifies a flow (like UDP flow) without L2 patterns, > > + * OVS will at least set the dl_type. Normally, it's enough to > > + * create an eth pattern just with it. Unluckily, some Intel's > > + * NIC (such as XL710) doesn't support that. Below is a workaround, > > + * which simply matches any L2 pkts. > > + */ > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, > NULL); > > + } > > + > > + /* VLAN */ > > + struct rte_flow_item_vlan vlan_spec; > > + struct rte_flow_item_vlan vlan_mask; > > + if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { > > + memset(&vlan_spec, 0, sizeof vlan_spec); > > + memset(&vlan_mask, 0, sizeof vlan_mask); > > + vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); > > + vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); > > + > > + /* match any protocols */ > > + vlan_mask.inner_type = 0; > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, > > + &vlan_spec, &vlan_mask); > > + } > > + > > + /* IP v4 */ > > + uint8_t proto = 0; > > + struct rte_flow_item_ipv4 ipv4_spec; > > + struct rte_flow_item_ipv4 ipv4_mask; > > + if (match->flow.dl_type == htons(ETH_TYPE_IP)) { > > + memset(&ipv4_spec, 0, sizeof ipv4_spec); > > + memset(&ipv4_mask, 0, sizeof ipv4_mask); > > + > > + ipv4_spec.hdr.type_of_service = match->flow.nw_tos; > > + ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; > > + ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; > > + ipv4_spec.hdr.src_addr = match->flow.nw_src; > > + ipv4_spec.hdr.dst_addr = match->flow.nw_dst; > > + > > + ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; > > + ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; > > + ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; > > + ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; > > + ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, > > + &ipv4_spec, &ipv4_mask); > > + > > + /* Save proto for L4 protocol setup */ > > + proto = ipv4_spec.hdr.next_proto_id & > > + ipv4_mask.hdr.next_proto_id; > > + > > + /* Remember proto mask address for later modification */ > > + ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; > > + } > > + > > + if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && > > + proto != IPPROTO_SCTP && proto != IPPROTO_TCP && > > + (match->wc.masks.tp_src || > > + match->wc.masks.tp_dst || > > + match->wc.masks.tcp_flags)) { > > + VLOG_DBG("L4 Protocol (%u) not supported", proto); > > + ret = -1; > > + goto out; > > + } > > + > > + if ((match->wc.masks.tp_src && match->wc.masks.tp_src != > OVS_BE16_MAX) || > > + (match->wc.masks.tp_dst && match->wc.masks.tp_dst != > OVS_BE16_MAX)) { > > + ret = -1; > > + goto out; > > + } > > + > > + struct rte_flow_item_tcp tcp_spec; > > + struct rte_flow_item_tcp tcp_mask; > > + if (proto == IPPROTO_TCP) { > > + memset(&tcp_spec, 0, sizeof tcp_spec); > > + memset(&tcp_mask, 0, sizeof tcp_mask); > > + tcp_spec.hdr.src_port = match->flow.tp_src; > > + tcp_spec.hdr.dst_port = match->flow.tp_dst; > > + tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; > > + tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; > > + > > + tcp_mask.hdr.src_port = match->wc.masks.tp_src; > > + tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; > > + tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; > > + tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, > > + &tcp_spec, &tcp_mask); > > + > > + /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match > */ > > + if (ipv4_next_proto_mask) { > > + *ipv4_next_proto_mask = 0; > > + } > > + goto end_proto_check; > > + } > > + > > + struct rte_flow_item_udp udp_spec; > > + struct rte_flow_item_udp udp_mask; > > + if (proto == IPPROTO_UDP) { > > + memset(&udp_spec, 0, sizeof udp_spec); > > + memset(&udp_mask, 0, sizeof udp_mask); > > + udp_spec.hdr.src_port = match->flow.tp_src; > > + udp_spec.hdr.dst_port = match->flow.tp_dst; > > + > > + udp_mask.hdr.src_port = match->wc.masks.tp_src; > > + udp_mask.hdr.dst_port = match->wc.masks.tp_dst; > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, > > + &udp_spec, &udp_mask); > > + > > + /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match > */ > > + if (ipv4_next_proto_mask) { > > + *ipv4_next_proto_mask = 0; > > + } > > + goto end_proto_check; > > + } > > + > > + struct rte_flow_item_sctp sctp_spec; > > + struct rte_flow_item_sctp sctp_mask; > > + if (proto == IPPROTO_SCTP) { > > + memset(&sctp_spec, 0, sizeof sctp_spec); > > + memset(&sctp_mask, 0, sizeof sctp_mask); > > + sctp_spec.hdr.src_port = match->flow.tp_src; > > + sctp_spec.hdr.dst_port = match->flow.tp_dst; > > + > > + sctp_mask.hdr.src_port = match->wc.masks.tp_src; > > + sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, > > + &sctp_spec, &sctp_mask); > > + > > + /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match > */ > > + if (ipv4_next_proto_mask) { > > + *ipv4_next_proto_mask = 0; > > + } > > + goto end_proto_check; > > + } > > + > > + struct rte_flow_item_icmp icmp_spec; > > + struct rte_flow_item_icmp icmp_mask; > > + if (proto == IPPROTO_ICMP) { > > + memset(&icmp_spec, 0, sizeof icmp_spec); > > + memset(&icmp_mask, 0, sizeof icmp_mask); > > + icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); > > + icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); > > + > > + icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match- > >wc.masks.tp_src); > > + icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match- > >wc.masks.tp_dst); > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, > > + &icmp_spec, &icmp_mask); > > + > > + /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto > match */ > > + if (ipv4_next_proto_mask) { > > + *ipv4_next_proto_mask = 0; > > + } > > + goto end_proto_check; > > + } > > + > > +end_proto_check: > > + > > + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, > NULL); > > + > > + struct rte_flow_action_mark mark; > > + struct action_rss_data *rss; > > + > > + mark.id = info->flow_mark; > > + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); > > + > > + > > + rss = add_flow_rss_action(&actions, netdev); > > + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); > > + > > + flow = netdev_dpdk_rte_flow_create(netdev, > &flow_attr,patterns.items, > > + actions.actions, &error); > > + > > + free(rss); > > + if (!flow) { > > + VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", > > + netdev_get_name(netdev), error.type, error.message); > > + ret = -1; > > + goto out; > > + } > > + ufid_to_rte_flow_associate(ufid, flow); > > + VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", > > + netdev_get_name(netdev), flow, UUID_ARGS((struct uuid > *)ufid)); > > + > > +out: > > + free(patterns.items); > > + free(actions.actions); > > + return ret; > > +} > > > Code below was refactored recently. You're removing the new version from > netdev-dpdk.c, but adding the old one here. > > > +static bool > > +is_all_zero(const void *addr, size_t n) { > > + size_t i = 0; > > + const uint8_t *p = (uint8_t *)addr; > > + > > + for (i = 0; i < n; i++) { > > + if (p[i] != 0) { > > + return false; > > + } > > + } > > + > > + return true; > > +} > > + > > +/* > > + * Check if any unsupported flow patterns are specified. > > + */ > > +static int > > +netdev_dpdk_validate_flow(const struct match *match) { > > + struct match match_zero_wc; > > + > > + /* Create a wc-zeroed version of flow */ > > + match_init(&match_zero_wc, &match->flow, &match->wc); > > + > > + if (!is_all_zero(&match_zero_wc.flow.tunnel, > > + sizeof(match_zero_wc.flow.tunnel))) { > > + goto err; > > + } > > + > > + if (match->wc.masks.metadata || > > + match->wc.masks.skb_priority || > > + match->wc.masks.pkt_mark || > > + match->wc.masks.dp_hash) { > > + goto err; > > + } > > + > > + /* recirc id must be zero */ > > + if (match_zero_wc.flow.recirc_id) { > > + goto err; > > + } > > + > > + if (match->wc.masks.ct_state || > > + match->wc.masks.ct_nw_proto || > > + match->wc.masks.ct_zone || > > + match->wc.masks.ct_mark || > > + match->wc.masks.ct_label.u64.hi || > > + match->wc.masks.ct_label.u64.lo) { > > + goto err; > > + } > > + > > + if (match->wc.masks.conj_id || > > + match->wc.masks.actset_output) { > > + goto err; > > + } > > + > > + /* unsupported L2 */ > > + if (!is_all_zero(&match->wc.masks.mpls_lse, > > + sizeof(match_zero_wc.flow.mpls_lse))) { > > + goto err; > > + } > > + > > + /* unsupported L3 */ > > + if (match->wc.masks.ipv6_label || > > + match->wc.masks.ct_nw_src || > > + match->wc.masks.ct_nw_dst || > > + !is_all_zero(&match->wc.masks.ipv6_src, sizeof(struct in6_addr)) || > > + !is_all_zero(&match->wc.masks.ipv6_dst, sizeof(struct in6_addr)) || > > + !is_all_zero(&match->wc.masks.ct_ipv6_src, sizeof(struct in6_addr)) > || > > + !is_all_zero(&match->wc.masks.ct_ipv6_dst, sizeof(struct in6_addr)) > || > > + !is_all_zero(&match->wc.masks.nd_target, sizeof(struct in6_addr)) || > > + !is_all_zero(&match->wc.masks.nsh, sizeof(struct ovs_key_nsh)) || > > + !is_all_zero(&match->wc.masks.arp_sha, sizeof(struct eth_addr)) || > > + !is_all_zero(&match->wc.masks.arp_tha, sizeof(struct eth_addr))) { > > + goto err; > > + } > > + > > + /* If fragmented, then don't HW accelerate - for now */ > > + if (match_zero_wc.flow.nw_frag) { > > + goto err; > > + } > > + > > + /* unsupported L4 */ > > + if (match->wc.masks.igmp_group_ip4 || > > + match->wc.masks.ct_tp_src || > > + match->wc.masks.ct_tp_dst) { > > + goto err; > > + } > > + > > + return 0; > > + > > +err: > > + VLOG_ERR("cannot HW accelerate this flow due to unsupported > protocols"); > > + return -1; > > +} > > + > > +static int > > +netdev_dpdk_destroy_rte_flow(struct netdev *netdev, > > + const ovs_u128 *ufid, > > + struct rte_flow *rte_flow) { > > ditto > > > + struct rte_flow_error error; > > + int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); > > + > > + if (ret == 0) { > > + ufid_to_rte_flow_disassociate(ufid); > > + VLOG_DBG("%s: removed rte flow %p associated with ufid " > UUID_FMT "\n", > > + netdev_get_name(netdev), rte_flow, > > + UUID_ARGS((struct uuid *)ufid)); > > + } else { > > + VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", > > + netdev_get_name(netdev), error.type, error.message); > > + } > > + > > + return ret; > > +} > > + > > +int > > +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > > + struct nlattr *actions, size_t actions_len, > > + const ovs_u128 *ufid, struct offload_info *info, > > + struct dpif_flow_stats *stats OVS_UNUSED) { > > ditto > > > + struct rte_flow *rte_flow; > > + int ret; > > + > > + /* > > + * If an old rte_flow exists, it means it's a flow modification. > > + * Here destroy the old rte flow first before adding a new one. > > + */ > > + rte_flow = ufid_to_rte_flow_find(ufid); > > + if (rte_flow) { > > + ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > > + if (ret < 0) { > > + return ret; > > + } > > + } > > + > > + ret = netdev_dpdk_validate_flow(match); > > + if (ret < 0) { > > + return ret; > > + } > > + > > + return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, > > + actions_len, ufid, info); > > +} > > + > > +int > > +netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > > + struct dpif_flow_stats *stats OVS_UNUSED) { > > ditto > > > + > > + struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); > > + > > + if (!rte_flow) { > > + return -1; > > + } > > + > > + return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > > +} > > + > > diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h > > new file mode 100644 > > index 0000000..573e214 > > --- /dev/null > > +++ b/lib/netdev-rte-offloads.h > > @@ -0,0 +1,38 @@ > > +/* > > + * Copyright (c) 2019 Mellanox Technologies, Ltd. > > + * > > + * Licensed under the Apache License, Version 2.0 (the "License"); > > + * you may not use this file except in compliance with the License. > > + * You may obtain a copy of the License at: > > + * > > + * > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww > w.apache.org%2Flicenses%2FLICENSE- > 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 > 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 > 36863627943159409&sdata=VmwZSLlGZaYByKuVjr9CEP9ECDX4Y1ClI7Uq > 1wdErx0%3D&reserved=0 > > + * > > + * Unless required by applicable law or agreed to in writing, software > > + * distributed under the License is distributed on an "AS IS" BASIS, > > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express > or implied. > > + * See the License for the specific language governing permissions and > > + * limitations under the License. > > + */ > > + > > +#ifndef NETDEV_VPORT_OFFLOADS_H > > +#define NETDEV_VPORT_OFFLOADS_H 1 > > + > > +#include "openvswitch/types.h" > > It'll be good to have a blank line here. > > > +struct netdev; > > +struct match; > > +struct nlattr; > > +struct offload_info; > > +struct dpif_flow_stats; > > + > > +int netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > > + struct nlattr *actions, size_t actions_len, > > + const ovs_u128 *ufid, struct offload_info *info, > > + struct dpif_flow_stats *stats OVS_UNUSED); > > +int netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > > + struct dpif_flow_stats *stats OVS_UNUSED); > > + > > +#define DPDK_FLOW_OFFLOAD_API \ > > + .flow_put = netdev_dpdk_flow_put, \ > > + .flow_del = netdev_dpdk_flow_del > > + > > +#endif /* netdev-rte-offloads.h */ > >
On 21.02.2019 19:31, Roni Bar Yanai wrote: > > >> -----Original Message----- >> From: Ilya Maximets <i.maximets@samsung.com> >> Sent: Thursday, February 21, 2019 6:20 PM >> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org >> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>; >> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>; >> Roni Bar Yanai <roniba@mellanox.com>; Flavio Leitner <fbl@sysclose.org> >> Subject: Re: [PATCH v2 2/3] netdev-dpdk: Move offloading-code to a new >> file >> >> At first, 'git am' fails to apply the patch: >> >> Applying: netdev-dpdk: Move offloading-code to a new file >> error: patch failed: lib/netdev-dpdk.c:4240 >> error: lib/netdev-dpdk.c: patch does not apply >> .git/rebase-apply/patch:1564: new blank line at EOF. >> + >> Patch failed at 0002 netdev-dpdk: Move offloading-code to a new file >> >> I'm not sure why it complains about 'lib/netdev-dpdk.c' because the >> issue is the blanlk line at the end of netdev-rte-offloads.c. >> >> On 21.02.2019 15:07, Ophir Munk wrote: >>> From: Roni Bar Yanai <roniba@mellanox.com> >>> >>> Hardware offloading-code is moved to a new file called >> >> Just curious, why you writing "offloading-code" as a single dash >> separated word? >> >>> netdev-rte-offloads.c. The original offloading-code is copied as is >> >> IMHO, it's a good time to make style changes in the code, because >> current functions doesn't follow a lot of OVS coding style guidelines >> and we're touching them anyway. >> > > I agree it is a good time, but I think it should be done on a separate commit, because if > something breaks (although all are style changes), it will be hard to find the changes on git log. The entire file > Will be + . You already made mistakes while rebasing the copy-pasted code. Thorough review is required anyway. Moving of few braces will not change anything. 'sizeof' related changes are mostly obvious. If you wish, I could handle 'n_rxq' usage update in a separate patch as it's not really a style change. > > >>> from the netdev-dpdk.c file to the new file, where future >>> offloading-code should be added as well. The netdev-dpdk.c file will >>> remain unchanged as new offloading-code is added. >>> >>> Reviewed-by: Asaf Penso <asafp@mellanox.com> >>> Signed-off-by: Roni Bar Yanai <roniba@mellanox.com> >>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com> >>> --- >>> lib/automake.mk | 4 +- >>> lib/netdev-dpdk.c | 727 +------------------------------------------ >>> lib/netdev-rte-offloads.c | 775 >> ++++++++++++++++++++++++++++++++++++++++++++++ >>> lib/netdev-rte-offloads.h | 38 +++ >>> 4 files changed, 817 insertions(+), 727 deletions(-) >>> create mode 100644 lib/netdev-rte-offloads.c >>> create mode 100644 lib/netdev-rte-offloads.h >>> >>> diff --git a/lib/automake.mk b/lib/automake.mk >>> index bae032b..cc5dccf 100644 >>> --- a/lib/automake.mk >>> +++ b/lib/automake.mk >>> @@ -138,6 +138,7 @@ lib_libopenvswitch_la_SOURCES = \ >>> lib/netdev-dpdk.h \ >>> lib/netdev-dummy.c \ >>> lib/netdev-provider.h \ >>> + lib/netdev-rte-offloads.h \ >>> lib/netdev-vport.c \ >>> lib/netdev-vport.h \ >>> lib/netdev-vport-private.h \ >>> @@ -411,7 +412,8 @@ endif >>> if DPDK_NETDEV >>> lib_libopenvswitch_la_SOURCES += \ >>> lib/dpdk.c \ >>> - lib/netdev-dpdk.c >>> + lib/netdev-dpdk.c \ >>> + lib/netdev-rte-offloads.c >>> else >>> lib_libopenvswitch_la_SOURCES += \ >>> lib/dpdk-stub.c >>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >>> index 163d4ec..c4228d2 100644 >>> --- a/lib/netdev-dpdk.c >>> +++ b/lib/netdev-dpdk.c >>> @@ -47,6 +47,7 @@ >>> #include "dpif-netdev.h" >>> #include "fatal-signal.h" >>> #include "netdev-provider.h" >>> +#include "netdev-rte-offloads.h" >>> #include "netdev-vport.h" >>> #include "odp-util.h" >>> #include "openvswitch/dynamic-string.h" >>> @@ -179,17 +180,6 @@ static const struct rte_eth_conf port_conf = { >>> }; >>> >>> /* >>> - * A mapping from ufid to dpdk rte_flow. >>> - */ >>> -static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; >>> - >>> -struct ufid_to_rte_flow_data { >>> - struct cmap_node node; >>> - ovs_u128 ufid; >>> - struct rte_flow *rte_flow; >>> -}; >>> - >>> -/* >>> * These callbacks allow virtio-net devices to be added to vhost ports when >>> * configuration has been fully completed. >>> */ >>> @@ -4240,721 +4230,6 @@ netdev_dpdk_rte_flow_create(struct netdev >> *netdev, >>> return flow; >>> } >>> >>> -/* Find rte_flow with @ufid */ >>> -static struct rte_flow * >>> -ufid_to_rte_flow_find(const ovs_u128 *ufid) { >>> - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); >>> - struct ufid_to_rte_flow_data *data; >>> - >>> - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { >>> - if (ovs_u128_equals(*ufid, data->ufid)) { >>> - return data->rte_flow; >>> - } >>> - } >>> - >>> - return NULL; >>> -} >>> - >>> -static inline void >>> -ufid_to_rte_flow_associate(const ovs_u128 *ufid, >>> - struct rte_flow *rte_flow) { >>> - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); >>> - struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); >>> - >>> - /* >>> - * We should not simply overwrite an existing rte flow. >>> - * We should have deleted it first before re-adding it. >>> - * Thus, if following assert triggers, something is wrong: >>> - * the rte_flow is not destroyed. >>> - */ >>> - ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); >>> - >>> - data->ufid = *ufid; >>> - data->rte_flow = rte_flow; >>> - >>> - cmap_insert(&ufid_to_rte_flow, >>> - CONST_CAST(struct cmap_node *, &data->node), hash); >>> -} >>> - >>> -static inline void >>> -ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { >>> - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); >>> - struct ufid_to_rte_flow_data *data; >>> - >>> - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { >>> - if (ovs_u128_equals(*ufid, data->ufid)) { >>> - cmap_remove(&ufid_to_rte_flow, >>> - CONST_CAST(struct cmap_node *, &data->node), hash); >>> - ovsrcu_postpone(free, data); >>> - return; >>> - } >>> - } >>> - >>> - VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", >>> - UUID_ARGS((struct uuid *)ufid)); >>> -} >>> - >>> -/* >>> - * To avoid individual xrealloc calls for each new element, a 'curent_max' >>> - * is used to keep track of current allocated number of elements. Starts >>> - * by 8 and doubles on each xrealloc call >>> - */ >>> -struct flow_patterns { >>> - struct rte_flow_item *items; >>> - int cnt; >>> - int current_max; >>> -}; >>> - >>> -struct flow_actions { >>> - struct rte_flow_action *actions; >>> - int cnt; >>> - int current_max; >>> -}; >>> - >>> -static void >>> -dump_flow_pattern(struct rte_flow_item *item) >>> -{ >>> - struct ds s; >>> - >>> - if (!VLOG_IS_DBG_ENABLED() || item->type == >> RTE_FLOW_ITEM_TYPE_END) { >>> - return; >>> - } >>> - >>> - ds_init(&s); >>> - >>> - if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { >>> - const struct rte_flow_item_eth *eth_spec = item->spec; >>> - const struct rte_flow_item_eth *eth_mask = item->mask; >>> - >>> - ds_put_cstr(&s, "rte flow eth pattern:\n"); >>> - if (eth_spec) { >>> - ds_put_format(&s, >>> - " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " >>> - "type=0x%04" PRIx16"\n", >>> - ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), >>> - ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), >>> - ntohs(eth_spec->type)); >>> - } else { >>> - ds_put_cstr(&s, " Spec = null\n"); >>> - } >>> - if (eth_mask) { >>> - ds_put_format(&s, >>> - " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " >>> - "type=0x%04"PRIx16"\n", >>> - ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), >>> - ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), >>> - eth_mask->type); >>> - } else { >>> - ds_put_cstr(&s, " Mask = null\n"); >>> - } >>> - } >>> - >>> - if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { >>> - const struct rte_flow_item_vlan *vlan_spec = item->spec; >>> - const struct rte_flow_item_vlan *vlan_mask = item->mask; >>> - >>> - ds_put_cstr(&s, "rte flow vlan pattern:\n"); >>> - if (vlan_spec) { >>> - ds_put_format(&s, >>> - " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", >>> - ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); >>> - } else { >>> - ds_put_cstr(&s, " Spec = null\n"); >>> - } >>> - >>> - if (vlan_mask) { >>> - ds_put_format(&s, >>> - " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", >>> - ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); >>> - } else { >>> - ds_put_cstr(&s, " Mask = null\n"); >>> - } >>> - } >>> - >>> - if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { >>> - const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; >>> - const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; >>> - >>> - ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); >>> - if (ipv4_spec) { >>> - ds_put_format(&s, >>> - " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 >>> - ", src="IP_FMT", dst="IP_FMT"\n", >>> - ipv4_spec->hdr.type_of_service, >>> - ipv4_spec->hdr.time_to_live, >>> - ipv4_spec->hdr.next_proto_id, >>> - IP_ARGS(ipv4_spec->hdr.src_addr), >>> - IP_ARGS(ipv4_spec->hdr.dst_addr)); >>> - } else { >>> - ds_put_cstr(&s, " Spec = null\n"); >>> - } >>> - if (ipv4_mask) { >>> - ds_put_format(&s, >>> - " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 >>> - ", src="IP_FMT", dst="IP_FMT"\n", >>> - ipv4_mask->hdr.type_of_service, >>> - ipv4_mask->hdr.time_to_live, >>> - ipv4_mask->hdr.next_proto_id, >>> - IP_ARGS(ipv4_mask->hdr.src_addr), >>> - IP_ARGS(ipv4_mask->hdr.dst_addr)); >>> - } else { >>> - ds_put_cstr(&s, " Mask = null\n"); >>> - } >>> - } >>> - >>> - if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { >>> - const struct rte_flow_item_udp *udp_spec = item->spec; >>> - const struct rte_flow_item_udp *udp_mask = item->mask; >>> - >>> - ds_put_cstr(&s, "rte flow udp pattern:\n"); >>> - if (udp_spec) { >>> - ds_put_format(&s, >>> - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", >>> - ntohs(udp_spec->hdr.src_port), >>> - ntohs(udp_spec->hdr.dst_port)); >>> - } else { >>> - ds_put_cstr(&s, " Spec = null\n"); >>> - } >>> - if (udp_mask) { >>> - ds_put_format(&s, >>> - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", >>> - udp_mask->hdr.src_port, >>> - udp_mask->hdr.dst_port); >>> - } else { >>> - ds_put_cstr(&s, " Mask = null\n"); >>> - } >>> - } >>> - >>> - if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { >>> - const struct rte_flow_item_sctp *sctp_spec = item->spec; >>> - const struct rte_flow_item_sctp *sctp_mask = item->mask; >>> - >>> - ds_put_cstr(&s, "rte flow sctp pattern:\n"); >>> - if (sctp_spec) { >>> - ds_put_format(&s, >>> - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", >>> - ntohs(sctp_spec->hdr.src_port), >>> - ntohs(sctp_spec->hdr.dst_port)); >>> - } else { >>> - ds_put_cstr(&s, " Spec = null\n"); >>> - } >>> - if (sctp_mask) { >>> - ds_put_format(&s, >>> - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", >>> - sctp_mask->hdr.src_port, >>> - sctp_mask->hdr.dst_port); >>> - } else { >>> - ds_put_cstr(&s, " Mask = null\n"); >>> - } >>> - } >>> - >>> - if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { >>> - const struct rte_flow_item_icmp *icmp_spec = item->spec; >>> - const struct rte_flow_item_icmp *icmp_mask = item->mask; >>> - >>> - ds_put_cstr(&s, "rte flow icmp pattern:\n"); >>> - if (icmp_spec) { >>> - ds_put_format(&s, >>> - " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", >>> - icmp_spec->hdr.icmp_type, >>> - icmp_spec->hdr.icmp_code); >>> - } else { >>> - ds_put_cstr(&s, " Spec = null\n"); >>> - } >>> - if (icmp_mask) { >>> - ds_put_format(&s, >>> - " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", >>> - icmp_spec->hdr.icmp_type, >>> - icmp_spec->hdr.icmp_code); >>> - } else { >>> - ds_put_cstr(&s, " Mask = null\n"); >>> - } >>> - } >>> - >>> - if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { >>> - const struct rte_flow_item_tcp *tcp_spec = item->spec; >>> - const struct rte_flow_item_tcp *tcp_mask = item->mask; >>> - >>> - ds_put_cstr(&s, "rte flow tcp pattern:\n"); >>> - if (tcp_spec) { >>> - ds_put_format(&s, >>> - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 >>> - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", >>> - ntohs(tcp_spec->hdr.src_port), >>> - ntohs(tcp_spec->hdr.dst_port), >>> - tcp_spec->hdr.data_off, >>> - tcp_spec->hdr.tcp_flags); >>> - } else { >>> - ds_put_cstr(&s, " Spec = null\n"); >>> - } >>> - if (tcp_mask) { >>> - ds_put_format(&s, >>> - " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 >>> - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", >>> - tcp_mask->hdr.src_port, >>> - tcp_mask->hdr.dst_port, >>> - tcp_mask->hdr.data_off, >>> - tcp_mask->hdr.tcp_flags); >>> - } else { >>> - ds_put_cstr(&s, " Mask = null\n"); >>> - } >>> - } >>> - >>> - VLOG_DBG("%s", ds_cstr(&s)); >>> - ds_destroy(&s); >>> -} >>> - >>> -static void >>> -add_flow_pattern(struct flow_patterns *patterns, enum >> rte_flow_item_type type, >>> - const void *spec, const void *mask) { >>> - int cnt = patterns->cnt; >>> - >>> - if (cnt == 0) { >>> - patterns->current_max = 8; >>> - patterns->items = xcalloc(patterns->current_max, >>> - sizeof(struct rte_flow_item)); >>> - } else if (cnt == patterns->current_max) { >>> - patterns->current_max *= 2; >>> - patterns->items = xrealloc(patterns->items, patterns->current_max * >>> - sizeof(struct rte_flow_item)); >>> - } >>> - >>> - patterns->items[cnt].type = type; >>> - patterns->items[cnt].spec = spec; >>> - patterns->items[cnt].mask = mask; >>> - patterns->items[cnt].last = NULL; >>> - dump_flow_pattern(&patterns->items[cnt]); >>> - patterns->cnt++; >>> -} >>> - >>> -static void >>> -add_flow_action(struct flow_actions *actions, enum >> rte_flow_action_type type, >>> - const void *conf) >>> -{ >>> - int cnt = actions->cnt; >>> - >>> - if (cnt == 0) { >>> - actions->current_max = 8; >>> - actions->actions = xcalloc(actions->current_max, >>> - sizeof(struct rte_flow_action)); >>> - } else if (cnt == actions->current_max) { >>> - actions->current_max *= 2; >>> - actions->actions = xrealloc(actions->actions, actions->current_max * >>> - sizeof(struct rte_flow_action)); >>> - } >>> - >>> - actions->actions[cnt].type = type; >>> - actions->actions[cnt].conf = conf; >>> - actions->cnt++; >>> -} >>> - >>> -struct action_rss_data { >>> - struct rte_flow_action_rss conf; >>> - uint16_t queue[0]; >>> -}; >>> - >>> -static struct action_rss_data * >>> -add_flow_rss_action(struct flow_actions *actions, >>> - struct netdev *netdev) { >>> - int i; >>> - struct action_rss_data *rss_data; >>> - >>> - rss_data = xmalloc(sizeof(struct action_rss_data) + >>> - sizeof(uint16_t) * netdev->n_rxq); >>> - *rss_data = (struct action_rss_data) { >>> - .conf = (struct rte_flow_action_rss) { >>> - .func = RTE_ETH_HASH_FUNCTION_DEFAULT, >>> - .level = 0, >>> - .types = 0, >>> - .queue_num = netdev->n_rxq, >>> - .queue = rss_data->queue, >>> - .key_len = 0, >>> - .key = NULL >>> - }, >>> - }; >>> - >>> - /* Override queue array with default */ >>> - for (i = 0; i < netdev->n_rxq; i++) { >>> - rss_data->queue[i] = i; >>> - } >>> - >>> - add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data- >>> conf); >>> - >>> - return rss_data; >>> -} >>> - >>> -static int >>> -netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, >>> - const struct match *match, >>> - struct nlattr *nl_actions OVS_UNUSED, >>> - size_t actions_len OVS_UNUSED, >>> - const ovs_u128 *ufid, >>> - struct offload_info *info) { >>> - const struct rte_flow_attr flow_attr = { >>> - .group = 0, >>> - .priority = 0, >>> - .ingress = 1, >>> - .egress = 0 >>> - }; >>> - struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; >>> - struct flow_actions actions = { .actions = NULL, .cnt = 0 }; >>> - struct rte_flow *flow; >>> - struct rte_flow_error error; >>> - uint8_t *ipv4_next_proto_mask = NULL; >>> - int ret = 0; >>> - >>> - /* Eth */ >>> - struct rte_flow_item_eth eth_spec; >>> - struct rte_flow_item_eth eth_mask; >>> - if (!eth_addr_is_zero(match->wc.masks.dl_src) || >>> - !eth_addr_is_zero(match->wc.masks.dl_dst)) { >>> - memset(ð_spec, 0, sizeof eth_spec); >>> - memset(ð_mask, 0, sizeof eth_mask); >>> - rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof >> eth_spec.dst); >>> - rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof >> eth_spec.src); >>> - eth_spec.type = match->flow.dl_type; >>> - >>> - rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, >>> - sizeof eth_mask.dst); >>> - rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, >>> - sizeof eth_mask.src); >>> - eth_mask.type = match->wc.masks.dl_type; >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, >>> - ð_spec, ð_mask); >>> - } else { >>> - /* >>> - * If user specifies a flow (like UDP flow) without L2 patterns, >>> - * OVS will at least set the dl_type. Normally, it's enough to >>> - * create an eth pattern just with it. Unluckily, some Intel's >>> - * NIC (such as XL710) doesn't support that. Below is a workaround, >>> - * which simply matches any L2 pkts. >>> - */ >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, >> NULL); >>> - } >>> - >>> - /* VLAN */ >>> - struct rte_flow_item_vlan vlan_spec; >>> - struct rte_flow_item_vlan vlan_mask; >>> - if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { >>> - memset(&vlan_spec, 0, sizeof vlan_spec); >>> - memset(&vlan_mask, 0, sizeof vlan_mask); >>> - vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); >>> - vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); >>> - >>> - /* match any protocols */ >>> - vlan_mask.inner_type = 0; >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, >>> - &vlan_spec, &vlan_mask); >>> - } >>> - >>> - /* IP v4 */ >>> - uint8_t proto = 0; >>> - struct rte_flow_item_ipv4 ipv4_spec; >>> - struct rte_flow_item_ipv4 ipv4_mask; >>> - if (match->flow.dl_type == htons(ETH_TYPE_IP)) { >>> - memset(&ipv4_spec, 0, sizeof ipv4_spec); >>> - memset(&ipv4_mask, 0, sizeof ipv4_mask); >>> - >>> - ipv4_spec.hdr.type_of_service = match->flow.nw_tos; >>> - ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; >>> - ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; >>> - ipv4_spec.hdr.src_addr = match->flow.nw_src; >>> - ipv4_spec.hdr.dst_addr = match->flow.nw_dst; >>> - >>> - ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; >>> - ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; >>> - ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; >>> - ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; >>> - ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, >>> - &ipv4_spec, &ipv4_mask); >>> - >>> - /* Save proto for L4 protocol setup */ >>> - proto = ipv4_spec.hdr.next_proto_id & >>> - ipv4_mask.hdr.next_proto_id; >>> - >>> - /* Remember proto mask address for later modification */ >>> - ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; >>> - } >>> - >>> - if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && >>> - proto != IPPROTO_SCTP && proto != IPPROTO_TCP && >>> - (match->wc.masks.tp_src || >>> - match->wc.masks.tp_dst || >>> - match->wc.masks.tcp_flags)) { >>> - VLOG_DBG("L4 Protocol (%u) not supported", proto); >>> - ret = -1; >>> - goto out; >>> - } >>> - >>> - if ((match->wc.masks.tp_src && match->wc.masks.tp_src != >> OVS_BE16_MAX) || >>> - (match->wc.masks.tp_dst && match->wc.masks.tp_dst != >> OVS_BE16_MAX)) { >>> - ret = -1; >>> - goto out; >>> - } >>> - >>> - struct rte_flow_item_tcp tcp_spec; >>> - struct rte_flow_item_tcp tcp_mask; >>> - if (proto == IPPROTO_TCP) { >>> - memset(&tcp_spec, 0, sizeof tcp_spec); >>> - memset(&tcp_mask, 0, sizeof tcp_mask); >>> - tcp_spec.hdr.src_port = match->flow.tp_src; >>> - tcp_spec.hdr.dst_port = match->flow.tp_dst; >>> - tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; >>> - tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; >>> - >>> - tcp_mask.hdr.src_port = match->wc.masks.tp_src; >>> - tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; >>> - tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; >>> - tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, >>> - &tcp_spec, &tcp_mask); >>> - >>> - /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */ >>> - if (ipv4_next_proto_mask) { >>> - *ipv4_next_proto_mask = 0; >>> - } >>> - goto end_proto_check; >>> - } >>> - >>> - struct rte_flow_item_udp udp_spec; >>> - struct rte_flow_item_udp udp_mask; >>> - if (proto == IPPROTO_UDP) { >>> - memset(&udp_spec, 0, sizeof udp_spec); >>> - memset(&udp_mask, 0, sizeof udp_mask); >>> - udp_spec.hdr.src_port = match->flow.tp_src; >>> - udp_spec.hdr.dst_port = match->flow.tp_dst; >>> - >>> - udp_mask.hdr.src_port = match->wc.masks.tp_src; >>> - udp_mask.hdr.dst_port = match->wc.masks.tp_dst; >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, >>> - &udp_spec, &udp_mask); >>> - >>> - /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match >> */ >>> - if (ipv4_next_proto_mask) { >>> - *ipv4_next_proto_mask = 0; >>> - } >>> - goto end_proto_check; >>> - } >>> - >>> - struct rte_flow_item_sctp sctp_spec; >>> - struct rte_flow_item_sctp sctp_mask; >>> - if (proto == IPPROTO_SCTP) { >>> - memset(&sctp_spec, 0, sizeof sctp_spec); >>> - memset(&sctp_mask, 0, sizeof sctp_mask); >>> - sctp_spec.hdr.src_port = match->flow.tp_src; >>> - sctp_spec.hdr.dst_port = match->flow.tp_dst; >>> - >>> - sctp_mask.hdr.src_port = match->wc.masks.tp_src; >>> - sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, >>> - &sctp_spec, &sctp_mask); >>> - >>> - /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match >> */ >>> - if (ipv4_next_proto_mask) { >>> - *ipv4_next_proto_mask = 0; >>> - } >>> - goto end_proto_check; >>> - } >>> - >>> - struct rte_flow_item_icmp icmp_spec; >>> - struct rte_flow_item_icmp icmp_mask; >>> - if (proto == IPPROTO_ICMP) { >>> - memset(&icmp_spec, 0, sizeof icmp_spec); >>> - memset(&icmp_mask, 0, sizeof icmp_mask); >>> - icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); >>> - icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); >>> - >>> - icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match- >>> wc.masks.tp_src); >>> - icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match- >>> wc.masks.tp_dst); >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, >>> - &icmp_spec, &icmp_mask); >>> - >>> - /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto >> match */ >>> - if (ipv4_next_proto_mask) { >>> - *ipv4_next_proto_mask = 0; >>> - } >>> - goto end_proto_check; >>> - } >>> - >>> -end_proto_check: >>> - >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, >> NULL); >>> - >>> - struct rte_flow_action_mark mark; >>> - struct action_rss_data *rss; >>> - >>> - mark.id = info->flow_mark; >>> - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); >>> - >>> - >>> - rss = add_flow_rss_action(&actions, netdev); >>> - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); >>> - >>> - flow = netdev_dpdk_rte_flow_create(netdev, >> &flow_attr,patterns.items, >>> - actions.actions, &error); >>> - >>> - free(rss); >>> - if (!flow) { >>> - VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", >>> - netdev_get_name(netdev), error.type, error.message); >>> - ret = -1; >>> - goto out; >>> - } >>> - ufid_to_rte_flow_associate(ufid, flow); >>> - VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", >>> - netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); >>> - >>> -out: >>> - free(patterns.items); >>> - free(actions.actions); >>> - return ret; >>> -} >>> - >>> -/* >>> - * Check if any unsupported flow patterns are specified. >>> - */ >>> -static int >>> -netdev_dpdk_validate_flow(const struct match *match) { >>> - struct match match_zero_wc; >>> - const struct flow *masks = &match->wc.masks; >>> - >>> - /* Create a wc-zeroed version of flow */ >>> - match_init(&match_zero_wc, &match->flow, &match->wc); >>> - >>> - if (!is_all_zeros(&match_zero_wc.flow.tunnel, >>> - sizeof match_zero_wc.flow.tunnel)) { >>> - goto err; >>> - } >>> - >>> - if (masks->metadata || masks->skb_priority || >>> - masks->pkt_mark || masks->dp_hash) { >>> - goto err; >>> - } >>> - >>> - /* recirc id must be zero */ >>> - if (match_zero_wc.flow.recirc_id) { >>> - goto err; >>> - } >>> - >>> - if (masks->ct_state || masks->ct_nw_proto || >>> - masks->ct_zone || masks->ct_mark || >>> - !ovs_u128_is_zero(masks->ct_label)) { >>> - goto err; >>> - } >>> - >>> - if (masks->conj_id || masks->actset_output) { >>> - goto err; >>> - } >>> - >>> - /* unsupported L2 */ >>> - if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) { >>> - goto err; >>> - } >>> - >>> - /* unsupported L3 */ >>> - if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst || >>> - !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) || >>> - !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) || >>> - !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) || >>> - !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) || >>> - !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) || >>> - !is_all_zeros(&masks->nsh, sizeof masks->nsh) || >>> - !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) || >>> - !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) { >>> - goto err; >>> - } >>> - >>> - /* If fragmented, then don't HW accelerate - for now */ >>> - if (match_zero_wc.flow.nw_frag) { >>> - goto err; >>> - } >>> - >>> - /* unsupported L4 */ >>> - if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) { >>> - goto err; >>> - } >>> - >>> - return 0; >>> - >>> -err: >>> - VLOG_ERR("cannot HW accelerate this flow due to unsupported >> protocols"); >>> - return -1; >>> -} >>> - >>> -static int >>> -netdev_dpdk_destroy_rte_flow(struct netdev *netdev, >>> - const ovs_u128 *ufid, >>> - struct rte_flow *rte_flow) { >>> - struct rte_flow_error error; >>> - int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); >>> - >>> - if (ret == 0) { >>> - ufid_to_rte_flow_disassociate(ufid); >>> - VLOG_DBG("%s: removed rte flow %p associated with ufid " >> UUID_FMT "\n", >>> - netdev_get_name(netdev), rte_flow, >>> - UUID_ARGS((struct uuid *)ufid)); >>> - } else { >>> - VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", >>> - netdev_get_name(netdev), error.type, error.message); >>> - } >>> - >>> - return ret; >>> -} >>> - >>> -static int >>> -netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, >>> - struct nlattr *actions, size_t actions_len, >>> - const ovs_u128 *ufid, struct offload_info *info, >>> - struct dpif_flow_stats *stats OVS_UNUSED) { >>> - struct rte_flow *rte_flow; >>> - int ret; >>> - >>> - /* >>> - * If an old rte_flow exists, it means it's a flow modification. >>> - * Here destroy the old rte flow first before adding a new one. >>> - */ >>> - rte_flow = ufid_to_rte_flow_find(ufid); >>> - if (rte_flow) { >>> - ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); >>> - if (ret < 0) { >>> - return ret; >>> - } >>> - } >>> - >>> - ret = netdev_dpdk_validate_flow(match); >>> - if (ret < 0) { >>> - return ret; >>> - } >>> - >>> - return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, >>> - actions_len, ufid, info); >>> -} >>> - >>> -static int >>> -netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, >>> - struct dpif_flow_stats *stats OVS_UNUSED) { >>> - >>> - struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); >>> - >>> - if (!rte_flow) { >>> - return -1; >>> - } >>> - >>> - return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); >>> -} >>> - >>> -#define DPDK_FLOW_OFFLOAD_API \ >>> - .flow_put = netdev_dpdk_flow_put, \ >>> - .flow_del = netdev_dpdk_flow_del >>> - >>> #define NETDEV_DPDK_CLASS_COMMON \ >>> .is_pmd = true, \ >>> .alloc = netdev_dpdk_alloc, \ >>> diff --git a/lib/netdev-rte-offloads.c b/lib/netdev-rte-offloads.c >>> new file mode 100644 >>> index 0000000..0212156 >>> --- /dev/null >>> +++ b/lib/netdev-rte-offloads.c >>> @@ -0,0 +1,775 @@ >>> +/* >>> + * Copyright (c) 2019 Mellanox Technologies, Ltd. >> >> I'm not a layer but it seems strange to have above copyright for the >> file with copy-pasted code written by other people. >> However, I'm not sure how this should look like. >> >>> + * >>> + * Licensed under the Apache License, Version 2.0 (the "License"); >>> + * you may not use this file except in compliance with the License. >>> + * You may obtain a copy of the License at: >>> + * >>> + * >> https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww >> w.apache.org%2Flicenses%2FLICENSE- >> 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 >> 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 >> 36863627943149395&sdata=GFoau2tQaeBk6KwZcleg4lZcbN%2B11WIBb >> Xb%2FfE50eE0%3D&reserved=0 >>> + * >>> + * Unless required by applicable law or agreed to in writing, software >>> + * distributed under the License is distributed on an "AS IS" BASIS, >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express >> or implied. >>> + * See the License for the specific language governing permissions and >>> + * limitations under the License. >>> + */ >>> +#include <config.h> >> >> Include of "netdev-rte-offloads.h" should be here. >> Blank line. >> >>> +#include <rte_flow.h> >> >> Blank line. >> >>> +#include "netdev-rte-offloads.h" >>> +#include "netdev-provider.h" >>> +#include "dpif-netdev.h" >> >> Do you really need above header ? >> >>> +#include "cmap.h" >>> +#include "openvswitch/vlog.h" >>> +#include "openvswitch/match.h" >>> +#include "packets.h" >>> +#include "uuid.h" >> >> Above headers should go in alphabetical order. >> >> For the style guidelines please refer: >> Documentation/internals/contributing/coding-style.rst >> >>> + >>> +VLOG_DEFINE_THIS_MODULE(netdev_rte_offloads); >>> + >>> +/* >>> + * A mapping from ufid to dpdk rte_flow. >>> + */ >>> +static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; >>> + >>> +struct ufid_to_rte_flow_data { >>> + struct cmap_node node; >>> + ovs_u128 ufid; >>> + struct rte_flow *rte_flow; >>> +}; >>> + >>> + >>> +/* Find rte_flow with @ufid */ >>> +static struct rte_flow * >>> +ufid_to_rte_flow_find(const ovs_u128 *ufid) { >> >> The '{' should go on a next line in function definition. >> >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); >>> + struct ufid_to_rte_flow_data *data; >>> + >>> + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) >> { >>> + if (ovs_u128_equals(*ufid, data->ufid)) { >>> + return data->rte_flow; >>> + } >>> + } >>> + >>> + return NULL; >>> +} >>> + >>> +static inline void >>> +ufid_to_rte_flow_associate(const ovs_u128 *ufid, >>> + struct rte_flow *rte_flow) { >> >> Same. >> >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); >>> + struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); >> >> No need to parenthesize the operand of sizeof. >> >>> + >>> + /* >>> + * We should not simply overwrite an existing rte flow. >>> + * We should have deleted it first before re-adding it. >>> + * Thus, if following assert triggers, something is wrong: >>> + * the rte_flow is not destroyed. >>> + */ >>> + ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); >>> + >>> + data->ufid = *ufid; >>> + data->rte_flow = rte_flow; >>> + >>> + cmap_insert(&ufid_to_rte_flow, >>> + CONST_CAST(struct cmap_node *, &data->node), hash); >>> +} >>> + >>> +static inline void >>> +ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { >> >> Same. >> >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); >> >> sizeof *ufid >> >>> + struct ufid_to_rte_flow_data *data; >>> + >>> + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) >> { >>> + if (ovs_u128_equals(*ufid, data->ufid)) { >>> + cmap_remove(&ufid_to_rte_flow, >>> + CONST_CAST(struct cmap_node *, &data->node), hash); >>> + ovsrcu_postpone(free, data); >>> + return; >>> + } >>> + } >>> + >>> + VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", >>> + UUID_ARGS((struct uuid *)ufid)); >> >> Space needed between the cast and variable: '(struct uuid *) ufid'. >> >>> +} >>> + >>> +/* >>> + * To avoid individual xrealloc calls for each new element, a 'curent_max' >>> + * is used to keep track of current allocated number of elements. Starts >>> + * by 8 and doubles on each xrealloc call >>> + */ >>> +struct flow_patterns { >>> + struct rte_flow_item *items; >>> + int cnt; >>> + int current_max; >>> +}; >>> + >>> +struct flow_actions { >>> + struct rte_flow_action *actions; >>> + int cnt; >>> + int current_max; >>> +}; >>> + >>> +static void >>> +dump_flow_pattern(struct rte_flow_item *item) >>> +{ >>> + struct ds s; >>> + >>> + if (!VLOG_IS_DBG_ENABLED() || item->type == >> RTE_FLOW_ITEM_TYPE_END) { >>> + return; >>> + } >>> + >>> + ds_init(&s); >>> + >>> + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { >>> + const struct rte_flow_item_eth *eth_spec = item->spec; >>> + const struct rte_flow_item_eth *eth_mask = item->mask; >>> + >>> + ds_put_cstr(&s, "rte flow eth pattern:\n"); >>> + if (eth_spec) { >>> + ds_put_format(&s, >>> + " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " >>> + "type=0x%04" PRIx16"\n", >>> + ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), >>> + ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), >>> + ntohs(eth_spec->type)); >>> + } else { >>> + ds_put_cstr(&s, " Spec = null\n"); >>> + } >>> + if (eth_mask) { >>> + ds_put_format(&s, >>> + " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " >>> + "type=0x%04"PRIx16"\n", >>> + ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), >>> + ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), >>> + eth_mask->type); >>> + } else { >>> + ds_put_cstr(&s, " Mask = null\n"); >>> + } >>> + } >>> + >>> + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { >>> + const struct rte_flow_item_vlan *vlan_spec = item->spec; >>> + const struct rte_flow_item_vlan *vlan_mask = item->mask; >>> + >>> + ds_put_cstr(&s, "rte flow vlan pattern:\n"); >>> + if (vlan_spec) { >>> + ds_put_format(&s, >>> + " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", >>> + ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); >>> + } else { >>> + ds_put_cstr(&s, " Spec = null\n"); >>> + } >>> + >>> + if (vlan_mask) { >>> + ds_put_format(&s, >>> + " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", >>> + ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); >>> + } else { >>> + ds_put_cstr(&s, " Mask = null\n"); >>> + } >>> + } >>> + >>> + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { >>> + const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; >>> + const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; >>> + >>> + ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); >>> + if (ipv4_spec) { >>> + ds_put_format(&s, >>> + " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 >>> + ", src="IP_FMT", dst="IP_FMT"\n", >>> + ipv4_spec->hdr.type_of_service, >>> + ipv4_spec->hdr.time_to_live, >>> + ipv4_spec->hdr.next_proto_id, >>> + IP_ARGS(ipv4_spec->hdr.src_addr), >>> + IP_ARGS(ipv4_spec->hdr.dst_addr)); >>> + } else { >>> + ds_put_cstr(&s, " Spec = null\n"); >>> + } >>> + if (ipv4_mask) { >>> + ds_put_format(&s, >>> + " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 >>> + ", src="IP_FMT", dst="IP_FMT"\n", >>> + ipv4_mask->hdr.type_of_service, >>> + ipv4_mask->hdr.time_to_live, >>> + ipv4_mask->hdr.next_proto_id, >>> + IP_ARGS(ipv4_mask->hdr.src_addr), >>> + IP_ARGS(ipv4_mask->hdr.dst_addr)); >>> + } else { >>> + ds_put_cstr(&s, " Mask = null\n"); >>> + } >>> + } >>> + >>> + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { >>> + const struct rte_flow_item_udp *udp_spec = item->spec; >>> + const struct rte_flow_item_udp *udp_mask = item->mask; >>> + >>> + ds_put_cstr(&s, "rte flow udp pattern:\n"); >>> + if (udp_spec) { >>> + ds_put_format(&s, >>> + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", >>> + ntohs(udp_spec->hdr.src_port), >>> + ntohs(udp_spec->hdr.dst_port)); >>> + } else { >>> + ds_put_cstr(&s, " Spec = null\n"); >>> + } >>> + if (udp_mask) { >>> + ds_put_format(&s, >>> + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", >>> + udp_mask->hdr.src_port, >>> + udp_mask->hdr.dst_port); >>> + } else { >>> + ds_put_cstr(&s, " Mask = null\n"); >>> + } >>> + } >>> + >>> + if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { >>> + const struct rte_flow_item_sctp *sctp_spec = item->spec; >>> + const struct rte_flow_item_sctp *sctp_mask = item->mask; >>> + >>> + ds_put_cstr(&s, "rte flow sctp pattern:\n"); >>> + if (sctp_spec) { >>> + ds_put_format(&s, >>> + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", >>> + ntohs(sctp_spec->hdr.src_port), >>> + ntohs(sctp_spec->hdr.dst_port)); >>> + } else { >>> + ds_put_cstr(&s, " Spec = null\n"); >>> + } >>> + if (sctp_mask) { >>> + ds_put_format(&s, >>> + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", >>> + sctp_mask->hdr.src_port, >>> + sctp_mask->hdr.dst_port); >>> + } else { >>> + ds_put_cstr(&s, " Mask = null\n"); >>> + } >>> + } >>> + >>> + if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { >>> + const struct rte_flow_item_icmp *icmp_spec = item->spec; >>> + const struct rte_flow_item_icmp *icmp_mask = item->mask; >>> + >>> + ds_put_cstr(&s, "rte flow icmp pattern:\n"); >>> + if (icmp_spec) { >>> + ds_put_format(&s, >>> + " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", >>> + icmp_spec->hdr.icmp_type, >>> + icmp_spec->hdr.icmp_code); >>> + } else { >>> + ds_put_cstr(&s, " Spec = null\n"); >>> + } >>> + if (icmp_mask) { >>> + ds_put_format(&s, >>> + " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", >>> + icmp_spec->hdr.icmp_type, >>> + icmp_spec->hdr.icmp_code); >>> + } else { >>> + ds_put_cstr(&s, " Mask = null\n"); >>> + } >>> + } >>> + >>> + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { >>> + const struct rte_flow_item_tcp *tcp_spec = item->spec; >>> + const struct rte_flow_item_tcp *tcp_mask = item->mask; >>> + >>> + ds_put_cstr(&s, "rte flow tcp pattern:\n"); >>> + if (tcp_spec) { >>> + ds_put_format(&s, >>> + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 >>> + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", >>> + ntohs(tcp_spec->hdr.src_port), >>> + ntohs(tcp_spec->hdr.dst_port), >>> + tcp_spec->hdr.data_off, >>> + tcp_spec->hdr.tcp_flags); >>> + } else { >>> + ds_put_cstr(&s, " Spec = null\n"); >>> + } >>> + if (tcp_mask) { >>> + ds_put_format(&s, >>> + " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 >>> + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", >>> + tcp_mask->hdr.src_port, >>> + tcp_mask->hdr.dst_port, >>> + tcp_mask->hdr.data_off, >>> + tcp_mask->hdr.tcp_flags); >>> + } else { >>> + ds_put_cstr(&s, " Mask = null\n"); >>> + } >>> + } >>> + >>> + VLOG_DBG("%s", ds_cstr(&s)); >>> + ds_destroy(&s); >>> +} >>> + >>> +static void >>> +add_flow_pattern(struct flow_patterns *patterns, enum >> rte_flow_item_type type, >>> + const void *spec, const void *mask) { >> >> ditto >> >>> + int cnt = patterns->cnt; >>> + >>> + if (cnt == 0) { >>> + patterns->current_max = 8; >>> + patterns->items = xcalloc(patterns->current_max, >>> + sizeof(struct rte_flow_item)); >> >> sizeof *patterns->items >> >>> + } else if (cnt == patterns->current_max) { >>> + patterns->current_max *= 2; >>> + patterns->items = xrealloc(patterns->items, patterns->current_max * >>> + sizeof(struct rte_flow_item)); >> >> sizeof *patterns->items >> >>> + } >>> + >>> + patterns->items[cnt].type = type; >>> + patterns->items[cnt].spec = spec; >>> + patterns->items[cnt].mask = mask; >>> + patterns->items[cnt].last = NULL; >>> + dump_flow_pattern(&patterns->items[cnt]); >>> + patterns->cnt++; >>> +} >>> + >>> +static void >>> +add_flow_action(struct flow_actions *actions, enum >> rte_flow_action_type type, >>> + const void *conf) >>> +{ >>> + int cnt = actions->cnt; >>> + >>> + if (cnt == 0) { >>> + actions->current_max = 8; >>> + actions->actions = xcalloc(actions->current_max, >>> + sizeof(struct rte_flow_action)); >> >> sizeof *actions->actions >> >>> + } else if (cnt == actions->current_max) { >>> + actions->current_max *= 2; >>> + actions->actions = xrealloc(actions->actions, actions->current_max * >>> + sizeof(struct rte_flow_action)); >> >> sizeof *actions->actions >> >>> + } >>> + >>> + actions->actions[cnt].type = type; >>> + actions->actions[cnt].conf = conf; >>> + actions->cnt++; >>> +} >>> + >>> +struct action_rss_data { >>> + struct rte_flow_action_rss conf; >>> + uint16_t queue[0]; >>> +}; >>> + >>> +static struct action_rss_data * >>> +add_flow_rss_action(struct flow_actions *actions, >>> + struct netdev *netdev) { >> >> ditto >> >>> + int i; >>> + struct action_rss_data *rss_data; >>> + >>> + rss_data = xmalloc(sizeof(struct action_rss_data) + >>> + sizeof(uint16_t) * netdev->n_rxq); >> >> sizeof *rss_data + netdev_n_rxq(netdev) * sizeof rss_data->queue[0] >> >> As we're not inside the netdev we need to use getter 'netdev_n_rxq'. >> >>> + *rss_data = (struct action_rss_data) { >>> + .conf = (struct rte_flow_action_rss) { >>> + .func = RTE_ETH_HASH_FUNCTION_DEFAULT, >>> + .level = 0, >>> + .types = 0, >>> + .queue_num = netdev->n_rxq, >> >> netdev_n_rxq(netdev) >> >>> + .queue = rss_data->queue, >>> + .key_len = 0, >>> + .key = NULL >>> + }, >>> + }; >>> + >>> + /* Override queue array with default */ >>> + for (i = 0; i < netdev->n_rxq; i++) { >> >> netdev_n_rxq(netdev) >> >>> + rss_data->queue[i] = i; >>> + } >>> + >>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data- >>> conf); >>> + >>> + return rss_data; >>> +} >>> + >>> +static int >>> +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, >>> + const struct match *match, >>> + struct nlattr *nl_actions OVS_UNUSED, >>> + size_t actions_len OVS_UNUSED, >>> + const ovs_u128 *ufid, >>> + struct offload_info *info) { >> >> ditto >> >> >> I'll skip the rest of this function as I already have a path with refactoring: >> >> https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpat >> chwork.ozlabs.org%2Fpatch%2F1037609%2F&data=02%7C01%7Croniba >> %40mellanox.com%7C1cbceab6b5394a306a9e08d69818689d%7Ca652971c7d2 >> e4d9ba6a4d149256f461b%7C0%7C0%7C636863627943149395&sdata=Z7 >> uBounMbAB6QF0LivTDkOjfyy%2B%2BrudgT96z77glGAE%3D&reserved= >> 0 >> >>> + const struct rte_flow_attr flow_attr = { >>> + .group = 0, >>> + .priority = 0, >>> + .ingress = 1, >>> + .egress = 0 >>> + }; >>> + struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; >>> + struct flow_actions actions = { .actions = NULL, .cnt = 0 }; >>> + struct rte_flow *flow; >>> + struct rte_flow_error error; >>> + uint8_t *ipv4_next_proto_mask = NULL; >>> + int ret = 0; >>> + >>> + /* Eth */ >>> + struct rte_flow_item_eth eth_spec; >>> + struct rte_flow_item_eth eth_mask; >>> + if (!eth_addr_is_zero(match->wc.masks.dl_src) || >>> + !eth_addr_is_zero(match->wc.masks.dl_dst)) { >>> + memset(ð_spec, 0, sizeof eth_spec); >>> + memset(ð_mask, 0, sizeof eth_mask); >>> + rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof >> eth_spec.dst); >>> + rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof >> eth_spec.src); >>> + eth_spec.type = match->flow.dl_type; >>> + >>> + rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, >>> + sizeof eth_mask.dst); >>> + rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, >>> + sizeof eth_mask.src); >>> + eth_mask.type = match->wc.masks.dl_type; >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, >>> + ð_spec, ð_mask); >>> + } else { >>> + /* >>> + * If user specifies a flow (like UDP flow) without L2 patterns, >>> + * OVS will at least set the dl_type. Normally, it's enough to >>> + * create an eth pattern just with it. Unluckily, some Intel's >>> + * NIC (such as XL710) doesn't support that. Below is a workaround, >>> + * which simply matches any L2 pkts. >>> + */ >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, >> NULL); >>> + } >>> + >>> + /* VLAN */ >>> + struct rte_flow_item_vlan vlan_spec; >>> + struct rte_flow_item_vlan vlan_mask; >>> + if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { >>> + memset(&vlan_spec, 0, sizeof vlan_spec); >>> + memset(&vlan_mask, 0, sizeof vlan_mask); >>> + vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); >>> + vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); >>> + >>> + /* match any protocols */ >>> + vlan_mask.inner_type = 0; >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, >>> + &vlan_spec, &vlan_mask); >>> + } >>> + >>> + /* IP v4 */ >>> + uint8_t proto = 0; >>> + struct rte_flow_item_ipv4 ipv4_spec; >>> + struct rte_flow_item_ipv4 ipv4_mask; >>> + if (match->flow.dl_type == htons(ETH_TYPE_IP)) { >>> + memset(&ipv4_spec, 0, sizeof ipv4_spec); >>> + memset(&ipv4_mask, 0, sizeof ipv4_mask); >>> + >>> + ipv4_spec.hdr.type_of_service = match->flow.nw_tos; >>> + ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; >>> + ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; >>> + ipv4_spec.hdr.src_addr = match->flow.nw_src; >>> + ipv4_spec.hdr.dst_addr = match->flow.nw_dst; >>> + >>> + ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; >>> + ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; >>> + ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; >>> + ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; >>> + ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, >>> + &ipv4_spec, &ipv4_mask); >>> + >>> + /* Save proto for L4 protocol setup */ >>> + proto = ipv4_spec.hdr.next_proto_id & >>> + ipv4_mask.hdr.next_proto_id; >>> + >>> + /* Remember proto mask address for later modification */ >>> + ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; >>> + } >>> + >>> + if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && >>> + proto != IPPROTO_SCTP && proto != IPPROTO_TCP && >>> + (match->wc.masks.tp_src || >>> + match->wc.masks.tp_dst || >>> + match->wc.masks.tcp_flags)) { >>> + VLOG_DBG("L4 Protocol (%u) not supported", proto); >>> + ret = -1; >>> + goto out; >>> + } >>> + >>> + if ((match->wc.masks.tp_src && match->wc.masks.tp_src != >> OVS_BE16_MAX) || >>> + (match->wc.masks.tp_dst && match->wc.masks.tp_dst != >> OVS_BE16_MAX)) { >>> + ret = -1; >>> + goto out; >>> + } >>> + >>> + struct rte_flow_item_tcp tcp_spec; >>> + struct rte_flow_item_tcp tcp_mask; >>> + if (proto == IPPROTO_TCP) { >>> + memset(&tcp_spec, 0, sizeof tcp_spec); >>> + memset(&tcp_mask, 0, sizeof tcp_mask); >>> + tcp_spec.hdr.src_port = match->flow.tp_src; >>> + tcp_spec.hdr.dst_port = match->flow.tp_dst; >>> + tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; >>> + tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; >>> + >>> + tcp_mask.hdr.src_port = match->wc.masks.tp_src; >>> + tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; >>> + tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; >>> + tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, >>> + &tcp_spec, &tcp_mask); >>> + >>> + /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match >> */ >>> + if (ipv4_next_proto_mask) { >>> + *ipv4_next_proto_mask = 0; >>> + } >>> + goto end_proto_check; >>> + } >>> + >>> + struct rte_flow_item_udp udp_spec; >>> + struct rte_flow_item_udp udp_mask; >>> + if (proto == IPPROTO_UDP) { >>> + memset(&udp_spec, 0, sizeof udp_spec); >>> + memset(&udp_mask, 0, sizeof udp_mask); >>> + udp_spec.hdr.src_port = match->flow.tp_src; >>> + udp_spec.hdr.dst_port = match->flow.tp_dst; >>> + >>> + udp_mask.hdr.src_port = match->wc.masks.tp_src; >>> + udp_mask.hdr.dst_port = match->wc.masks.tp_dst; >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, >>> + &udp_spec, &udp_mask); >>> + >>> + /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match >> */ >>> + if (ipv4_next_proto_mask) { >>> + *ipv4_next_proto_mask = 0; >>> + } >>> + goto end_proto_check; >>> + } >>> + >>> + struct rte_flow_item_sctp sctp_spec; >>> + struct rte_flow_item_sctp sctp_mask; >>> + if (proto == IPPROTO_SCTP) { >>> + memset(&sctp_spec, 0, sizeof sctp_spec); >>> + memset(&sctp_mask, 0, sizeof sctp_mask); >>> + sctp_spec.hdr.src_port = match->flow.tp_src; >>> + sctp_spec.hdr.dst_port = match->flow.tp_dst; >>> + >>> + sctp_mask.hdr.src_port = match->wc.masks.tp_src; >>> + sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, >>> + &sctp_spec, &sctp_mask); >>> + >>> + /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match >> */ >>> + if (ipv4_next_proto_mask) { >>> + *ipv4_next_proto_mask = 0; >>> + } >>> + goto end_proto_check; >>> + } >>> + >>> + struct rte_flow_item_icmp icmp_spec; >>> + struct rte_flow_item_icmp icmp_mask; >>> + if (proto == IPPROTO_ICMP) { >>> + memset(&icmp_spec, 0, sizeof icmp_spec); >>> + memset(&icmp_mask, 0, sizeof icmp_mask); >>> + icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); >>> + icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); >>> + >>> + icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match- >>> wc.masks.tp_src); >>> + icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match- >>> wc.masks.tp_dst); >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, >>> + &icmp_spec, &icmp_mask); >>> + >>> + /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto >> match */ >>> + if (ipv4_next_proto_mask) { >>> + *ipv4_next_proto_mask = 0; >>> + } >>> + goto end_proto_check; >>> + } >>> + >>> +end_proto_check: >>> + >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, >> NULL); >>> + >>> + struct rte_flow_action_mark mark; >>> + struct action_rss_data *rss; >>> + >>> + mark.id = info->flow_mark; >>> + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); >>> + >>> + >>> + rss = add_flow_rss_action(&actions, netdev); >>> + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); >>> + >>> + flow = netdev_dpdk_rte_flow_create(netdev, >> &flow_attr,patterns.items, >>> + actions.actions, &error); >>> + >>> + free(rss); >>> + if (!flow) { >>> + VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", >>> + netdev_get_name(netdev), error.type, error.message); >>> + ret = -1; >>> + goto out; >>> + } >>> + ufid_to_rte_flow_associate(ufid, flow); >>> + VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", >>> + netdev_get_name(netdev), flow, UUID_ARGS((struct uuid >> *)ufid)); >>> + >>> +out: >>> + free(patterns.items); >>> + free(actions.actions); >>> + return ret; >>> +} >> >> >> Code below was refactored recently. You're removing the new version from >> netdev-dpdk.c, but adding the old one here. >> >>> +static bool >>> +is_all_zero(const void *addr, size_t n) { >>> + size_t i = 0; >>> + const uint8_t *p = (uint8_t *)addr; >>> + >>> + for (i = 0; i < n; i++) { >>> + if (p[i] != 0) { >>> + return false; >>> + } >>> + } >>> + >>> + return true; >>> +} >>> + >>> +/* >>> + * Check if any unsupported flow patterns are specified. >>> + */ >>> +static int >>> +netdev_dpdk_validate_flow(const struct match *match) { >>> + struct match match_zero_wc; >>> + >>> + /* Create a wc-zeroed version of flow */ >>> + match_init(&match_zero_wc, &match->flow, &match->wc); >>> + >>> + if (!is_all_zero(&match_zero_wc.flow.tunnel, >>> + sizeof(match_zero_wc.flow.tunnel))) { >>> + goto err; >>> + } >>> + >>> + if (match->wc.masks.metadata || >>> + match->wc.masks.skb_priority || >>> + match->wc.masks.pkt_mark || >>> + match->wc.masks.dp_hash) { >>> + goto err; >>> + } >>> + >>> + /* recirc id must be zero */ >>> + if (match_zero_wc.flow.recirc_id) { >>> + goto err; >>> + } >>> + >>> + if (match->wc.masks.ct_state || >>> + match->wc.masks.ct_nw_proto || >>> + match->wc.masks.ct_zone || >>> + match->wc.masks.ct_mark || >>> + match->wc.masks.ct_label.u64.hi || >>> + match->wc.masks.ct_label.u64.lo) { >>> + goto err; >>> + } >>> + >>> + if (match->wc.masks.conj_id || >>> + match->wc.masks.actset_output) { >>> + goto err; >>> + } >>> + >>> + /* unsupported L2 */ >>> + if (!is_all_zero(&match->wc.masks.mpls_lse, >>> + sizeof(match_zero_wc.flow.mpls_lse))) { >>> + goto err; >>> + } >>> + >>> + /* unsupported L3 */ >>> + if (match->wc.masks.ipv6_label || >>> + match->wc.masks.ct_nw_src || >>> + match->wc.masks.ct_nw_dst || >>> + !is_all_zero(&match->wc.masks.ipv6_src, sizeof(struct in6_addr)) || >>> + !is_all_zero(&match->wc.masks.ipv6_dst, sizeof(struct in6_addr)) || >>> + !is_all_zero(&match->wc.masks.ct_ipv6_src, sizeof(struct in6_addr)) >> || >>> + !is_all_zero(&match->wc.masks.ct_ipv6_dst, sizeof(struct in6_addr)) >> || >>> + !is_all_zero(&match->wc.masks.nd_target, sizeof(struct in6_addr)) || >>> + !is_all_zero(&match->wc.masks.nsh, sizeof(struct ovs_key_nsh)) || >>> + !is_all_zero(&match->wc.masks.arp_sha, sizeof(struct eth_addr)) || >>> + !is_all_zero(&match->wc.masks.arp_tha, sizeof(struct eth_addr))) { >>> + goto err; >>> + } >>> + >>> + /* If fragmented, then don't HW accelerate - for now */ >>> + if (match_zero_wc.flow.nw_frag) { >>> + goto err; >>> + } >>> + >>> + /* unsupported L4 */ >>> + if (match->wc.masks.igmp_group_ip4 || >>> + match->wc.masks.ct_tp_src || >>> + match->wc.masks.ct_tp_dst) { >>> + goto err; >>> + } >>> + >>> + return 0; >>> + >>> +err: >>> + VLOG_ERR("cannot HW accelerate this flow due to unsupported >> protocols"); >>> + return -1; >>> +} >>> + >>> +static int >>> +netdev_dpdk_destroy_rte_flow(struct netdev *netdev, >>> + const ovs_u128 *ufid, >>> + struct rte_flow *rte_flow) { >> >> ditto >> >>> + struct rte_flow_error error; >>> + int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); >>> + >>> + if (ret == 0) { >>> + ufid_to_rte_flow_disassociate(ufid); >>> + VLOG_DBG("%s: removed rte flow %p associated with ufid " >> UUID_FMT "\n", >>> + netdev_get_name(netdev), rte_flow, >>> + UUID_ARGS((struct uuid *)ufid)); >>> + } else { >>> + VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", >>> + netdev_get_name(netdev), error.type, error.message); >>> + } >>> + >>> + return ret; >>> +} >>> + >>> +int >>> +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, >>> + struct nlattr *actions, size_t actions_len, >>> + const ovs_u128 *ufid, struct offload_info *info, >>> + struct dpif_flow_stats *stats OVS_UNUSED) { >> >> ditto >> >>> + struct rte_flow *rte_flow; >>> + int ret; >>> + >>> + /* >>> + * If an old rte_flow exists, it means it's a flow modification. >>> + * Here destroy the old rte flow first before adding a new one. >>> + */ >>> + rte_flow = ufid_to_rte_flow_find(ufid); >>> + if (rte_flow) { >>> + ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); >>> + if (ret < 0) { >>> + return ret; >>> + } >>> + } >>> + >>> + ret = netdev_dpdk_validate_flow(match); >>> + if (ret < 0) { >>> + return ret; >>> + } >>> + >>> + return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, >>> + actions_len, ufid, info); >>> +} >>> + >>> +int >>> +netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, >>> + struct dpif_flow_stats *stats OVS_UNUSED) { >> >> ditto >> >>> + >>> + struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); >>> + >>> + if (!rte_flow) { >>> + return -1; >>> + } >>> + >>> + return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); >>> +} >>> + >>> diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h >>> new file mode 100644 >>> index 0000000..573e214 >>> --- /dev/null >>> +++ b/lib/netdev-rte-offloads.h >>> @@ -0,0 +1,38 @@ >>> +/* >>> + * Copyright (c) 2019 Mellanox Technologies, Ltd. >>> + * >>> + * Licensed under the Apache License, Version 2.0 (the "License"); >>> + * you may not use this file except in compliance with the License. >>> + * You may obtain a copy of the License at: >>> + * >>> + * >> https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww >> w.apache.org%2Flicenses%2FLICENSE- >> 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 >> 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 >> 36863627943159409&sdata=VmwZSLlGZaYByKuVjr9CEP9ECDX4Y1ClI7Uq >> 1wdErx0%3D&reserved=0 >>> + * >>> + * Unless required by applicable law or agreed to in writing, software >>> + * distributed under the License is distributed on an "AS IS" BASIS, >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express >> or implied. >>> + * See the License for the specific language governing permissions and >>> + * limitations under the License. >>> + */ >>> + >>> +#ifndef NETDEV_VPORT_OFFLOADS_H >>> +#define NETDEV_VPORT_OFFLOADS_H 1 >>> + >>> +#include "openvswitch/types.h" >> >> It'll be good to have a blank line here. >> >>> +struct netdev; >>> +struct match; >>> +struct nlattr; >>> +struct offload_info; >>> +struct dpif_flow_stats; >>> + >>> +int netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, >>> + struct nlattr *actions, size_t actions_len, >>> + const ovs_u128 *ufid, struct offload_info *info, >>> + struct dpif_flow_stats *stats OVS_UNUSED); >>> +int netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, >>> + struct dpif_flow_stats *stats OVS_UNUSED); >>> + >>> +#define DPDK_FLOW_OFFLOAD_API \ >>> + .flow_put = netdev_dpdk_flow_put, \ >>> + .flow_del = netdev_dpdk_flow_del >>> + >>> +#endif /* netdev-rte-offloads.h */ >>>
> -----Original Message----- > From: Ilya Maximets <i.maximets@samsung.com> > Sent: Tuesday, February 26, 2019 4:38 PM > To: Roni Bar Yanai <roniba@mellanox.com>; Ophir Munk > <ophirmu@mellanox.com>; ovs-dev@openvswitch.org > Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>; > Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>; > Flavio Leitner <fbl@sysclose.org> > Subject: Re: [PATCH v2 2/3] netdev-dpdk: Move offloading-code to a new > file > > On 21.02.2019 19:31, Roni Bar Yanai wrote: > > > > > >> -----Original Message----- > >> From: Ilya Maximets <i.maximets@samsung.com> > >> Sent: Thursday, February 21, 2019 6:20 PM > >> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org > >> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern > <olgas@mellanox.com>; > >> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso > <asafp@mellanox.com>; > >> Roni Bar Yanai <roniba@mellanox.com>; Flavio Leitner > <fbl@sysclose.org> > >> Subject: Re: [PATCH v2 2/3] netdev-dpdk: Move offloading-code to a new > >> file > >> > >> At first, 'git am' fails to apply the patch: > >> > >> Applying: netdev-dpdk: Move offloading-code to a new file > >> error: patch failed: lib/netdev-dpdk.c:4240 > >> error: lib/netdev-dpdk.c: patch does not apply > >> .git/rebase-apply/patch:1564: new blank line at EOF. > >> + > >> Patch failed at 0002 netdev-dpdk: Move offloading-code to a new file > >> > >> I'm not sure why it complains about 'lib/netdev-dpdk.c' because the > >> issue is the blanlk line at the end of netdev-rte-offloads.c. > >> > >> On 21.02.2019 15:07, Ophir Munk wrote: > >>> From: Roni Bar Yanai <roniba@mellanox.com> > >>> > >>> Hardware offloading-code is moved to a new file called > >> > >> Just curious, why you writing "offloading-code" as a single dash > >> separated word? > >> > >>> netdev-rte-offloads.c. The original offloading-code is copied as is > >> > >> IMHO, it's a good time to make style changes in the code, because > >> current functions doesn't follow a lot of OVS coding style guidelines > >> and we're touching them anyway. > >> > > > > I agree it is a good time, but I think it should be done on a separate commit, > because if > > something breaks (although all are style changes), it will be hard to find the > changes on git log. The entire file > > Will be + . > > You already made mistakes while rebasing the copy-pasted code. > Thorough review is required anyway. Moving of few braces will > not change anything. 'sizeof' related changes are mostly obvious. > > If you wish, I could handle 'n_rxq' usage update in a separate > patch as it's not really a style change. > Ok. Will fix all on V3. Thanks. > > > > > >>> from the netdev-dpdk.c file to the new file, where future > >>> offloading-code should be added as well. The netdev-dpdk.c file will > >>> remain unchanged as new offloading-code is added. > >>> > >>> Reviewed-by: Asaf Penso <asafp@mellanox.com> > >>> Signed-off-by: Roni Bar Yanai <roniba@mellanox.com> > >>> Signed-off-by: Ophir Munk <ophirmu@mellanox.com> > >>> --- > >>> lib/automake.mk | 4 +- > >>> lib/netdev-dpdk.c | 727 +------------------------------------------ > >>> lib/netdev-rte-offloads.c | 775 > >> ++++++++++++++++++++++++++++++++++++++++++++++ > >>> lib/netdev-rte-offloads.h | 38 +++ > >>> 4 files changed, 817 insertions(+), 727 deletions(-) > >>> create mode 100644 lib/netdev-rte-offloads.c > >>> create mode 100644 lib/netdev-rte-offloads.h > >>> > >>> diff --git a/lib/automake.mk b/lib/automake.mk > >>> index bae032b..cc5dccf 100644 > >>> --- a/lib/automake.mk > >>> +++ b/lib/automake.mk > >>> @@ -138,6 +138,7 @@ lib_libopenvswitch_la_SOURCES = \ > >>> lib/netdev-dpdk.h \ > >>> lib/netdev-dummy.c \ > >>> lib/netdev-provider.h \ > >>> + lib/netdev-rte-offloads.h \ > >>> lib/netdev-vport.c \ > >>> lib/netdev-vport.h \ > >>> lib/netdev-vport-private.h \ > >>> @@ -411,7 +412,8 @@ endif > >>> if DPDK_NETDEV > >>> lib_libopenvswitch_la_SOURCES += \ > >>> lib/dpdk.c \ > >>> - lib/netdev-dpdk.c > >>> + lib/netdev-dpdk.c \ > >>> + lib/netdev-rte-offloads.c > >>> else > >>> lib_libopenvswitch_la_SOURCES += \ > >>> lib/dpdk-stub.c > >>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > >>> index 163d4ec..c4228d2 100644 > >>> --- a/lib/netdev-dpdk.c > >>> +++ b/lib/netdev-dpdk.c > >>> @@ -47,6 +47,7 @@ > >>> #include "dpif-netdev.h" > >>> #include "fatal-signal.h" > >>> #include "netdev-provider.h" > >>> +#include "netdev-rte-offloads.h" > >>> #include "netdev-vport.h" > >>> #include "odp-util.h" > >>> #include "openvswitch/dynamic-string.h" > >>> @@ -179,17 +180,6 @@ static const struct rte_eth_conf port_conf = { > >>> }; > >>> > >>> /* > >>> - * A mapping from ufid to dpdk rte_flow. > >>> - */ > >>> -static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; > >>> - > >>> -struct ufid_to_rte_flow_data { > >>> - struct cmap_node node; > >>> - ovs_u128 ufid; > >>> - struct rte_flow *rte_flow; > >>> -}; > >>> - > >>> -/* > >>> * These callbacks allow virtio-net devices to be added to vhost ports > when > >>> * configuration has been fully completed. > >>> */ > >>> @@ -4240,721 +4230,6 @@ netdev_dpdk_rte_flow_create(struct > netdev > >> *netdev, > >>> return flow; > >>> } > >>> > >>> -/* Find rte_flow with @ufid */ > >>> -static struct rte_flow * > >>> -ufid_to_rte_flow_find(const ovs_u128 *ufid) { > >>> - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >>> - struct ufid_to_rte_flow_data *data; > >>> - > >>> - CMAP_FOR_EACH_WITH_HASH (data, node, hash, > &ufid_to_rte_flow) { > >>> - if (ovs_u128_equals(*ufid, data->ufid)) { > >>> - return data->rte_flow; > >>> - } > >>> - } > >>> - > >>> - return NULL; > >>> -} > >>> - > >>> -static inline void > >>> -ufid_to_rte_flow_associate(const ovs_u128 *ufid, > >>> - struct rte_flow *rte_flow) { > >>> - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >>> - struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); > >>> - > >>> - /* > >>> - * We should not simply overwrite an existing rte flow. > >>> - * We should have deleted it first before re-adding it. > >>> - * Thus, if following assert triggers, something is wrong: > >>> - * the rte_flow is not destroyed. > >>> - */ > >>> - ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); > >>> - > >>> - data->ufid = *ufid; > >>> - data->rte_flow = rte_flow; > >>> - > >>> - cmap_insert(&ufid_to_rte_flow, > >>> - CONST_CAST(struct cmap_node *, &data->node), hash); > >>> -} > >>> - > >>> -static inline void > >>> -ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { > >>> - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >>> - struct ufid_to_rte_flow_data *data; > >>> - > >>> - CMAP_FOR_EACH_WITH_HASH (data, node, hash, > &ufid_to_rte_flow) { > >>> - if (ovs_u128_equals(*ufid, data->ufid)) { > >>> - cmap_remove(&ufid_to_rte_flow, > >>> - CONST_CAST(struct cmap_node *, &data->node), hash); > >>> - ovsrcu_postpone(free, data); > >>> - return; > >>> - } > >>> - } > >>> - > >>> - VLOG_WARN("ufid "UUID_FMT" is not associated with an rte > flow\n", > >>> - UUID_ARGS((struct uuid *)ufid)); > >>> -} > >>> - > >>> -/* > >>> - * To avoid individual xrealloc calls for each new element, a > 'curent_max' > >>> - * is used to keep track of current allocated number of elements. Starts > >>> - * by 8 and doubles on each xrealloc call > >>> - */ > >>> -struct flow_patterns { > >>> - struct rte_flow_item *items; > >>> - int cnt; > >>> - int current_max; > >>> -}; > >>> - > >>> -struct flow_actions { > >>> - struct rte_flow_action *actions; > >>> - int cnt; > >>> - int current_max; > >>> -}; > >>> - > >>> -static void > >>> -dump_flow_pattern(struct rte_flow_item *item) > >>> -{ > >>> - struct ds s; > >>> - > >>> - if (!VLOG_IS_DBG_ENABLED() || item->type == > >> RTE_FLOW_ITEM_TYPE_END) { > >>> - return; > >>> - } > >>> - > >>> - ds_init(&s); > >>> - > >>> - if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { > >>> - const struct rte_flow_item_eth *eth_spec = item->spec; > >>> - const struct rte_flow_item_eth *eth_mask = item->mask; > >>> - > >>> - ds_put_cstr(&s, "rte flow eth pattern:\n"); > >>> - if (eth_spec) { > >>> - ds_put_format(&s, > >>> - " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > >>> - "type=0x%04" PRIx16"\n", > >>> - ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), > >>> - ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), > >>> - ntohs(eth_spec->type)); > >>> - } else { > >>> - ds_put_cstr(&s, " Spec = null\n"); > >>> - } > >>> - if (eth_mask) { > >>> - ds_put_format(&s, > >>> - " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > >>> - "type=0x%04"PRIx16"\n", > >>> - ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), > >>> - ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), > >>> - eth_mask->type); > >>> - } else { > >>> - ds_put_cstr(&s, " Mask = null\n"); > >>> - } > >>> - } > >>> - > >>> - if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { > >>> - const struct rte_flow_item_vlan *vlan_spec = item->spec; > >>> - const struct rte_flow_item_vlan *vlan_mask = item->mask; > >>> - > >>> - ds_put_cstr(&s, "rte flow vlan pattern:\n"); > >>> - if (vlan_spec) { > >>> - ds_put_format(&s, > >>> - " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > >>> - ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); > >>> - } else { > >>> - ds_put_cstr(&s, " Spec = null\n"); > >>> - } > >>> - > >>> - if (vlan_mask) { > >>> - ds_put_format(&s, > >>> - " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > >>> - ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); > >>> - } else { > >>> - ds_put_cstr(&s, " Mask = null\n"); > >>> - } > >>> - } > >>> - > >>> - if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { > >>> - const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; > >>> - const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; > >>> - > >>> - ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); > >>> - if (ipv4_spec) { > >>> - ds_put_format(&s, > >>> - " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > >>> - ", src="IP_FMT", dst="IP_FMT"\n", > >>> - ipv4_spec->hdr.type_of_service, > >>> - ipv4_spec->hdr.time_to_live, > >>> - ipv4_spec->hdr.next_proto_id, > >>> - IP_ARGS(ipv4_spec->hdr.src_addr), > >>> - IP_ARGS(ipv4_spec->hdr.dst_addr)); > >>> - } else { > >>> - ds_put_cstr(&s, " Spec = null\n"); > >>> - } > >>> - if (ipv4_mask) { > >>> - ds_put_format(&s, > >>> - " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > >>> - ", src="IP_FMT", dst="IP_FMT"\n", > >>> - ipv4_mask->hdr.type_of_service, > >>> - ipv4_mask->hdr.time_to_live, > >>> - ipv4_mask->hdr.next_proto_id, > >>> - IP_ARGS(ipv4_mask->hdr.src_addr), > >>> - IP_ARGS(ipv4_mask->hdr.dst_addr)); > >>> - } else { > >>> - ds_put_cstr(&s, " Mask = null\n"); > >>> - } > >>> - } > >>> - > >>> - if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { > >>> - const struct rte_flow_item_udp *udp_spec = item->spec; > >>> - const struct rte_flow_item_udp *udp_mask = item->mask; > >>> - > >>> - ds_put_cstr(&s, "rte flow udp pattern:\n"); > >>> - if (udp_spec) { > >>> - ds_put_format(&s, > >>> - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > >>> - ntohs(udp_spec->hdr.src_port), > >>> - ntohs(udp_spec->hdr.dst_port)); > >>> - } else { > >>> - ds_put_cstr(&s, " Spec = null\n"); > >>> - } > >>> - if (udp_mask) { > >>> - ds_put_format(&s, > >>> - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > >>> - udp_mask->hdr.src_port, > >>> - udp_mask->hdr.dst_port); > >>> - } else { > >>> - ds_put_cstr(&s, " Mask = null\n"); > >>> - } > >>> - } > >>> - > >>> - if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { > >>> - const struct rte_flow_item_sctp *sctp_spec = item->spec; > >>> - const struct rte_flow_item_sctp *sctp_mask = item->mask; > >>> - > >>> - ds_put_cstr(&s, "rte flow sctp pattern:\n"); > >>> - if (sctp_spec) { > >>> - ds_put_format(&s, > >>> - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > >>> - ntohs(sctp_spec->hdr.src_port), > >>> - ntohs(sctp_spec->hdr.dst_port)); > >>> - } else { > >>> - ds_put_cstr(&s, " Spec = null\n"); > >>> - } > >>> - if (sctp_mask) { > >>> - ds_put_format(&s, > >>> - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > >>> - sctp_mask->hdr.src_port, > >>> - sctp_mask->hdr.dst_port); > >>> - } else { > >>> - ds_put_cstr(&s, " Mask = null\n"); > >>> - } > >>> - } > >>> - > >>> - if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { > >>> - const struct rte_flow_item_icmp *icmp_spec = item->spec; > >>> - const struct rte_flow_item_icmp *icmp_mask = item->mask; > >>> - > >>> - ds_put_cstr(&s, "rte flow icmp pattern:\n"); > >>> - if (icmp_spec) { > >>> - ds_put_format(&s, > >>> - " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", > >>> - icmp_spec->hdr.icmp_type, > >>> - icmp_spec->hdr.icmp_code); > >>> - } else { > >>> - ds_put_cstr(&s, " Spec = null\n"); > >>> - } > >>> - if (icmp_mask) { > >>> - ds_put_format(&s, > >>> - " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", > >>> - icmp_spec->hdr.icmp_type, > >>> - icmp_spec->hdr.icmp_code); > >>> - } else { > >>> - ds_put_cstr(&s, " Mask = null\n"); > >>> - } > >>> - } > >>> - > >>> - if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { > >>> - const struct rte_flow_item_tcp *tcp_spec = item->spec; > >>> - const struct rte_flow_item_tcp *tcp_mask = item->mask; > >>> - > >>> - ds_put_cstr(&s, "rte flow tcp pattern:\n"); > >>> - if (tcp_spec) { > >>> - ds_put_format(&s, > >>> - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 > >>> - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > >>> - ntohs(tcp_spec->hdr.src_port), > >>> - ntohs(tcp_spec->hdr.dst_port), > >>> - tcp_spec->hdr.data_off, > >>> - tcp_spec->hdr.tcp_flags); > >>> - } else { > >>> - ds_put_cstr(&s, " Spec = null\n"); > >>> - } > >>> - if (tcp_mask) { > >>> - ds_put_format(&s, > >>> - " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 > >>> - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > >>> - tcp_mask->hdr.src_port, > >>> - tcp_mask->hdr.dst_port, > >>> - tcp_mask->hdr.data_off, > >>> - tcp_mask->hdr.tcp_flags); > >>> - } else { > >>> - ds_put_cstr(&s, " Mask = null\n"); > >>> - } > >>> - } > >>> - > >>> - VLOG_DBG("%s", ds_cstr(&s)); > >>> - ds_destroy(&s); > >>> -} > >>> - > >>> -static void > >>> -add_flow_pattern(struct flow_patterns *patterns, enum > >> rte_flow_item_type type, > >>> - const void *spec, const void *mask) { > >>> - int cnt = patterns->cnt; > >>> - > >>> - if (cnt == 0) { > >>> - patterns->current_max = 8; > >>> - patterns->items = xcalloc(patterns->current_max, > >>> - sizeof(struct rte_flow_item)); > >>> - } else if (cnt == patterns->current_max) { > >>> - patterns->current_max *= 2; > >>> - patterns->items = xrealloc(patterns->items, patterns- > >current_max * > >>> - sizeof(struct rte_flow_item)); > >>> - } > >>> - > >>> - patterns->items[cnt].type = type; > >>> - patterns->items[cnt].spec = spec; > >>> - patterns->items[cnt].mask = mask; > >>> - patterns->items[cnt].last = NULL; > >>> - dump_flow_pattern(&patterns->items[cnt]); > >>> - patterns->cnt++; > >>> -} > >>> - > >>> -static void > >>> -add_flow_action(struct flow_actions *actions, enum > >> rte_flow_action_type type, > >>> - const void *conf) > >>> -{ > >>> - int cnt = actions->cnt; > >>> - > >>> - if (cnt == 0) { > >>> - actions->current_max = 8; > >>> - actions->actions = xcalloc(actions->current_max, > >>> - sizeof(struct rte_flow_action)); > >>> - } else if (cnt == actions->current_max) { > >>> - actions->current_max *= 2; > >>> - actions->actions = xrealloc(actions->actions, actions->current_max > * > >>> - sizeof(struct rte_flow_action)); > >>> - } > >>> - > >>> - actions->actions[cnt].type = type; > >>> - actions->actions[cnt].conf = conf; > >>> - actions->cnt++; > >>> -} > >>> - > >>> -struct action_rss_data { > >>> - struct rte_flow_action_rss conf; > >>> - uint16_t queue[0]; > >>> -}; > >>> - > >>> -static struct action_rss_data * > >>> -add_flow_rss_action(struct flow_actions *actions, > >>> - struct netdev *netdev) { > >>> - int i; > >>> - struct action_rss_data *rss_data; > >>> - > >>> - rss_data = xmalloc(sizeof(struct action_rss_data) + > >>> - sizeof(uint16_t) * netdev->n_rxq); > >>> - *rss_data = (struct action_rss_data) { > >>> - .conf = (struct rte_flow_action_rss) { > >>> - .func = RTE_ETH_HASH_FUNCTION_DEFAULT, > >>> - .level = 0, > >>> - .types = 0, > >>> - .queue_num = netdev->n_rxq, > >>> - .queue = rss_data->queue, > >>> - .key_len = 0, > >>> - .key = NULL > >>> - }, > >>> - }; > >>> - > >>> - /* Override queue array with default */ > >>> - for (i = 0; i < netdev->n_rxq; i++) { > >>> - rss_data->queue[i] = i; > >>> - } > >>> - > >>> - add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data- > >>> conf); > >>> - > >>> - return rss_data; > >>> -} > >>> - > >>> -static int > >>> -netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, > >>> - const struct match *match, > >>> - struct nlattr *nl_actions OVS_UNUSED, > >>> - size_t actions_len OVS_UNUSED, > >>> - const ovs_u128 *ufid, > >>> - struct offload_info *info) { > >>> - const struct rte_flow_attr flow_attr = { > >>> - .group = 0, > >>> - .priority = 0, > >>> - .ingress = 1, > >>> - .egress = 0 > >>> - }; > >>> - struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; > >>> - struct flow_actions actions = { .actions = NULL, .cnt = 0 }; > >>> - struct rte_flow *flow; > >>> - struct rte_flow_error error; > >>> - uint8_t *ipv4_next_proto_mask = NULL; > >>> - int ret = 0; > >>> - > >>> - /* Eth */ > >>> - struct rte_flow_item_eth eth_spec; > >>> - struct rte_flow_item_eth eth_mask; > >>> - if (!eth_addr_is_zero(match->wc.masks.dl_src) || > >>> - !eth_addr_is_zero(match->wc.masks.dl_dst)) { > >>> - memset(ð_spec, 0, sizeof eth_spec); > >>> - memset(ð_mask, 0, sizeof eth_mask); > >>> - rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof > >> eth_spec.dst); > >>> - rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof > >> eth_spec.src); > >>> - eth_spec.type = match->flow.dl_type; > >>> - > >>> - rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, > >>> - sizeof eth_mask.dst); > >>> - rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, > >>> - sizeof eth_mask.src); > >>> - eth_mask.type = match->wc.masks.dl_type; > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, > >>> - ð_spec, ð_mask); > >>> - } else { > >>> - /* > >>> - * If user specifies a flow (like UDP flow) without L2 patterns, > >>> - * OVS will at least set the dl_type. Normally, it's enough to > >>> - * create an eth pattern just with it. Unluckily, some Intel's > >>> - * NIC (such as XL710) doesn't support that. Below is a workaround, > >>> - * which simply matches any L2 pkts. > >>> - */ > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, > >> NULL); > >>> - } > >>> - > >>> - /* VLAN */ > >>> - struct rte_flow_item_vlan vlan_spec; > >>> - struct rte_flow_item_vlan vlan_mask; > >>> - if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { > >>> - memset(&vlan_spec, 0, sizeof vlan_spec); > >>> - memset(&vlan_mask, 0, sizeof vlan_mask); > >>> - vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); > >>> - vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); > >>> - > >>> - /* match any protocols */ > >>> - vlan_mask.inner_type = 0; > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, > >>> - &vlan_spec, &vlan_mask); > >>> - } > >>> - > >>> - /* IP v4 */ > >>> - uint8_t proto = 0; > >>> - struct rte_flow_item_ipv4 ipv4_spec; > >>> - struct rte_flow_item_ipv4 ipv4_mask; > >>> - if (match->flow.dl_type == htons(ETH_TYPE_IP)) { > >>> - memset(&ipv4_spec, 0, sizeof ipv4_spec); > >>> - memset(&ipv4_mask, 0, sizeof ipv4_mask); > >>> - > >>> - ipv4_spec.hdr.type_of_service = match->flow.nw_tos; > >>> - ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; > >>> - ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; > >>> - ipv4_spec.hdr.src_addr = match->flow.nw_src; > >>> - ipv4_spec.hdr.dst_addr = match->flow.nw_dst; > >>> - > >>> - ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; > >>> - ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; > >>> - ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; > >>> - ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; > >>> - ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, > >>> - &ipv4_spec, &ipv4_mask); > >>> - > >>> - /* Save proto for L4 protocol setup */ > >>> - proto = ipv4_spec.hdr.next_proto_id & > >>> - ipv4_mask.hdr.next_proto_id; > >>> - > >>> - /* Remember proto mask address for later modification */ > >>> - ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; > >>> - } > >>> - > >>> - if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && > >>> - proto != IPPROTO_SCTP && proto != IPPROTO_TCP && > >>> - (match->wc.masks.tp_src || > >>> - match->wc.masks.tp_dst || > >>> - match->wc.masks.tcp_flags)) { > >>> - VLOG_DBG("L4 Protocol (%u) not supported", proto); > >>> - ret = -1; > >>> - goto out; > >>> - } > >>> - > >>> - if ((match->wc.masks.tp_src && match->wc.masks.tp_src != > >> OVS_BE16_MAX) || > >>> - (match->wc.masks.tp_dst && match->wc.masks.tp_dst != > >> OVS_BE16_MAX)) { > >>> - ret = -1; > >>> - goto out; > >>> - } > >>> - > >>> - struct rte_flow_item_tcp tcp_spec; > >>> - struct rte_flow_item_tcp tcp_mask; > >>> - if (proto == IPPROTO_TCP) { > >>> - memset(&tcp_spec, 0, sizeof tcp_spec); > >>> - memset(&tcp_mask, 0, sizeof tcp_mask); > >>> - tcp_spec.hdr.src_port = match->flow.tp_src; > >>> - tcp_spec.hdr.dst_port = match->flow.tp_dst; > >>> - tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; > >>> - tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; > >>> - > >>> - tcp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> - tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> - tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; > >>> - tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & > 0xff; > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, > >>> - &tcp_spec, &tcp_mask); > >>> - > >>> - /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match > */ > >>> - if (ipv4_next_proto_mask) { > >>> - *ipv4_next_proto_mask = 0; > >>> - } > >>> - goto end_proto_check; > >>> - } > >>> - > >>> - struct rte_flow_item_udp udp_spec; > >>> - struct rte_flow_item_udp udp_mask; > >>> - if (proto == IPPROTO_UDP) { > >>> - memset(&udp_spec, 0, sizeof udp_spec); > >>> - memset(&udp_mask, 0, sizeof udp_mask); > >>> - udp_spec.hdr.src_port = match->flow.tp_src; > >>> - udp_spec.hdr.dst_port = match->flow.tp_dst; > >>> - > >>> - udp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> - udp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, > >>> - &udp_spec, &udp_mask); > >>> - > >>> - /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto > match > >> */ > >>> - if (ipv4_next_proto_mask) { > >>> - *ipv4_next_proto_mask = 0; > >>> - } > >>> - goto end_proto_check; > >>> - } > >>> - > >>> - struct rte_flow_item_sctp sctp_spec; > >>> - struct rte_flow_item_sctp sctp_mask; > >>> - if (proto == IPPROTO_SCTP) { > >>> - memset(&sctp_spec, 0, sizeof sctp_spec); > >>> - memset(&sctp_mask, 0, sizeof sctp_mask); > >>> - sctp_spec.hdr.src_port = match->flow.tp_src; > >>> - sctp_spec.hdr.dst_port = match->flow.tp_dst; > >>> - > >>> - sctp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> - sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, > >>> - &sctp_spec, &sctp_mask); > >>> - > >>> - /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto > match > >> */ > >>> - if (ipv4_next_proto_mask) { > >>> - *ipv4_next_proto_mask = 0; > >>> - } > >>> - goto end_proto_check; > >>> - } > >>> - > >>> - struct rte_flow_item_icmp icmp_spec; > >>> - struct rte_flow_item_icmp icmp_mask; > >>> - if (proto == IPPROTO_ICMP) { > >>> - memset(&icmp_spec, 0, sizeof icmp_spec); > >>> - memset(&icmp_mask, 0, sizeof icmp_mask); > >>> - icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); > >>> - icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); > >>> - > >>> - icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match- > >>> wc.masks.tp_src); > >>> - icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match- > >>> wc.masks.tp_dst); > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, > >>> - &icmp_spec, &icmp_mask); > >>> - > >>> - /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto > >> match */ > >>> - if (ipv4_next_proto_mask) { > >>> - *ipv4_next_proto_mask = 0; > >>> - } > >>> - goto end_proto_check; > >>> - } > >>> - > >>> -end_proto_check: > >>> - > >>> - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, > >> NULL); > >>> - > >>> - struct rte_flow_action_mark mark; > >>> - struct action_rss_data *rss; > >>> - > >>> - mark.id = info->flow_mark; > >>> - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, > &mark); > >>> - > >>> - > >>> - rss = add_flow_rss_action(&actions, netdev); > >>> - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); > >>> - > >>> - flow = netdev_dpdk_rte_flow_create(netdev, > >> &flow_attr,patterns.items, > >>> - actions.actions, &error); > >>> - > >>> - free(rss); > >>> - if (!flow) { > >>> - VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", > >>> - netdev_get_name(netdev), error.type, error.message); > >>> - ret = -1; > >>> - goto out; > >>> - } > >>> - ufid_to_rte_flow_associate(ufid, flow); > >>> - VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", > >>> - netdev_get_name(netdev), flow, UUID_ARGS((struct uuid > *)ufid)); > >>> - > >>> -out: > >>> - free(patterns.items); > >>> - free(actions.actions); > >>> - return ret; > >>> -} > >>> - > >>> -/* > >>> - * Check if any unsupported flow patterns are specified. > >>> - */ > >>> -static int > >>> -netdev_dpdk_validate_flow(const struct match *match) { > >>> - struct match match_zero_wc; > >>> - const struct flow *masks = &match->wc.masks; > >>> - > >>> - /* Create a wc-zeroed version of flow */ > >>> - match_init(&match_zero_wc, &match->flow, &match->wc); > >>> - > >>> - if (!is_all_zeros(&match_zero_wc.flow.tunnel, > >>> - sizeof match_zero_wc.flow.tunnel)) { > >>> - goto err; > >>> - } > >>> - > >>> - if (masks->metadata || masks->skb_priority || > >>> - masks->pkt_mark || masks->dp_hash) { > >>> - goto err; > >>> - } > >>> - > >>> - /* recirc id must be zero */ > >>> - if (match_zero_wc.flow.recirc_id) { > >>> - goto err; > >>> - } > >>> - > >>> - if (masks->ct_state || masks->ct_nw_proto || > >>> - masks->ct_zone || masks->ct_mark || > >>> - !ovs_u128_is_zero(masks->ct_label)) { > >>> - goto err; > >>> - } > >>> - > >>> - if (masks->conj_id || masks->actset_output) { > >>> - goto err; > >>> - } > >>> - > >>> - /* unsupported L2 */ > >>> - if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) { > >>> - goto err; > >>> - } > >>> - > >>> - /* unsupported L3 */ > >>> - if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst > || > >>> - !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) || > >>> - !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) || > >>> - !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) || > >>> - !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) || > >>> - !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) || > >>> - !is_all_zeros(&masks->nsh, sizeof masks->nsh) || > >>> - !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) || > >>> - !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) { > >>> - goto err; > >>> - } > >>> - > >>> - /* If fragmented, then don't HW accelerate - for now */ > >>> - if (match_zero_wc.flow.nw_frag) { > >>> - goto err; > >>> - } > >>> - > >>> - /* unsupported L4 */ > >>> - if (masks->igmp_group_ip4 || masks->ct_tp_src || masks- > >ct_tp_dst) { > >>> - goto err; > >>> - } > >>> - > >>> - return 0; > >>> - > >>> -err: > >>> - VLOG_ERR("cannot HW accelerate this flow due to unsupported > >> protocols"); > >>> - return -1; > >>> -} > >>> - > >>> -static int > >>> -netdev_dpdk_destroy_rte_flow(struct netdev *netdev, > >>> - const ovs_u128 *ufid, > >>> - struct rte_flow *rte_flow) { > >>> - struct rte_flow_error error; > >>> - int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); > >>> - > >>> - if (ret == 0) { > >>> - ufid_to_rte_flow_disassociate(ufid); > >>> - VLOG_DBG("%s: removed rte flow %p associated with ufid " > >> UUID_FMT "\n", > >>> - netdev_get_name(netdev), rte_flow, > >>> - UUID_ARGS((struct uuid *)ufid)); > >>> - } else { > >>> - VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", > >>> - netdev_get_name(netdev), error.type, error.message); > >>> - } > >>> - > >>> - return ret; > >>> -} > >>> - > >>> -static int > >>> -netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > >>> - struct nlattr *actions, size_t actions_len, > >>> - const ovs_u128 *ufid, struct offload_info *info, > >>> - struct dpif_flow_stats *stats OVS_UNUSED) { > >>> - struct rte_flow *rte_flow; > >>> - int ret; > >>> - > >>> - /* > >>> - * If an old rte_flow exists, it means it's a flow modification. > >>> - * Here destroy the old rte flow first before adding a new one. > >>> - */ > >>> - rte_flow = ufid_to_rte_flow_find(ufid); > >>> - if (rte_flow) { > >>> - ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > >>> - if (ret < 0) { > >>> - return ret; > >>> - } > >>> - } > >>> - > >>> - ret = netdev_dpdk_validate_flow(match); > >>> - if (ret < 0) { > >>> - return ret; > >>> - } > >>> - > >>> - return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, > >>> - actions_len, ufid, info); > >>> -} > >>> - > >>> -static int > >>> -netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > >>> - struct dpif_flow_stats *stats OVS_UNUSED) { > >>> - > >>> - struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); > >>> - > >>> - if (!rte_flow) { > >>> - return -1; > >>> - } > >>> - > >>> - return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > >>> -} > >>> - > >>> -#define DPDK_FLOW_OFFLOAD_API \ > >>> - .flow_put = netdev_dpdk_flow_put, \ > >>> - .flow_del = netdev_dpdk_flow_del > >>> - > >>> #define NETDEV_DPDK_CLASS_COMMON \ > >>> .is_pmd = true, \ > >>> .alloc = netdev_dpdk_alloc, \ > >>> diff --git a/lib/netdev-rte-offloads.c b/lib/netdev-rte-offloads.c > >>> new file mode 100644 > >>> index 0000000..0212156 > >>> --- /dev/null > >>> +++ b/lib/netdev-rte-offloads.c > >>> @@ -0,0 +1,775 @@ > >>> +/* > >>> + * Copyright (c) 2019 Mellanox Technologies, Ltd. > >> > >> I'm not a layer but it seems strange to have above copyright for the > >> file with copy-pasted code written by other people. > >> However, I'm not sure how this should look like. > >> > >>> + * > >>> + * Licensed under the Apache License, Version 2.0 (the "License"); > >>> + * you may not use this file except in compliance with the License. > >>> + * You may obtain a copy of the License at: > >>> + * > >>> + * > >> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww > >> w.apache.org%2Flicenses%2FLICENSE- > >> > 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 > >> > 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 > >> > 36863627943149395&sdata=GFoau2tQaeBk6KwZcleg4lZcbN%2B11WIBb > >> Xb%2FfE50eE0%3D&reserved=0 > >>> + * > >>> + * Unless required by applicable law or agreed to in writing, software > >>> + * distributed under the License is distributed on an "AS IS" BASIS, > >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either > express > >> or implied. > >>> + * See the License for the specific language governing permissions and > >>> + * limitations under the License. > >>> + */ > >>> +#include <config.h> > >> > >> Include of "netdev-rte-offloads.h" should be here. > >> Blank line. > >> > >>> +#include <rte_flow.h> > >> > >> Blank line. > >> > >>> +#include "netdev-rte-offloads.h" > >>> +#include "netdev-provider.h" > >>> +#include "dpif-netdev.h" > >> > >> Do you really need above header ? > >> > >>> +#include "cmap.h" > >>> +#include "openvswitch/vlog.h" > >>> +#include "openvswitch/match.h" > >>> +#include "packets.h" > >>> +#include "uuid.h" > >> > >> Above headers should go in alphabetical order. > >> > >> For the style guidelines please refer: > >> Documentation/internals/contributing/coding-style.rst > >> > >>> + > >>> +VLOG_DEFINE_THIS_MODULE(netdev_rte_offloads); > >>> + > >>> +/* > >>> + * A mapping from ufid to dpdk rte_flow. > >>> + */ > >>> +static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; > >>> + > >>> +struct ufid_to_rte_flow_data { > >>> + struct cmap_node node; > >>> + ovs_u128 ufid; > >>> + struct rte_flow *rte_flow; > >>> +}; > >>> + > >>> + > >>> +/* Find rte_flow with @ufid */ > >>> +static struct rte_flow * > >>> +ufid_to_rte_flow_find(const ovs_u128 *ufid) { > >> > >> The '{' should go on a next line in function definition. > >> > >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >>> + struct ufid_to_rte_flow_data *data; > >>> + > >>> + CMAP_FOR_EACH_WITH_HASH (data, node, hash, > &ufid_to_rte_flow) > >> { > >>> + if (ovs_u128_equals(*ufid, data->ufid)) { > >>> + return data->rte_flow; > >>> + } > >>> + } > >>> + > >>> + return NULL; > >>> +} > >>> + > >>> +static inline void > >>> +ufid_to_rte_flow_associate(const ovs_u128 *ufid, > >>> + struct rte_flow *rte_flow) { > >> > >> Same. > >> > >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >>> + struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); > >> > >> No need to parenthesize the operand of sizeof. > >> > >>> + > >>> + /* > >>> + * We should not simply overwrite an existing rte flow. > >>> + * We should have deleted it first before re-adding it. > >>> + * Thus, if following assert triggers, something is wrong: > >>> + * the rte_flow is not destroyed. > >>> + */ > >>> + ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); > >>> + > >>> + data->ufid = *ufid; > >>> + data->rte_flow = rte_flow; > >>> + > >>> + cmap_insert(&ufid_to_rte_flow, > >>> + CONST_CAST(struct cmap_node *, &data->node), hash); > >>> +} > >>> + > >>> +static inline void > >>> +ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { > >> > >> Same. > >> > >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >> > >> sizeof *ufid > >> > >>> + struct ufid_to_rte_flow_data *data; > >>> + > >>> + CMAP_FOR_EACH_WITH_HASH (data, node, hash, > &ufid_to_rte_flow) > >> { > >>> + if (ovs_u128_equals(*ufid, data->ufid)) { > >>> + cmap_remove(&ufid_to_rte_flow, > >>> + CONST_CAST(struct cmap_node *, &data->node), hash); > >>> + ovsrcu_postpone(free, data); > >>> + return; > >>> + } > >>> + } > >>> + > >>> + VLOG_WARN("ufid "UUID_FMT" is not associated with an rte > flow\n", > >>> + UUID_ARGS((struct uuid *)ufid)); > >> > >> Space needed between the cast and variable: '(struct uuid *) ufid'. > >> > >>> +} > >>> + > >>> +/* > >>> + * To avoid individual xrealloc calls for each new element, a > 'curent_max' > >>> + * is used to keep track of current allocated number of elements. Starts > >>> + * by 8 and doubles on each xrealloc call > >>> + */ > >>> +struct flow_patterns { > >>> + struct rte_flow_item *items; > >>> + int cnt; > >>> + int current_max; > >>> +}; > >>> + > >>> +struct flow_actions { > >>> + struct rte_flow_action *actions; > >>> + int cnt; > >>> + int current_max; > >>> +}; > >>> + > >>> +static void > >>> +dump_flow_pattern(struct rte_flow_item *item) > >>> +{ > >>> + struct ds s; > >>> + > >>> + if (!VLOG_IS_DBG_ENABLED() || item->type == > >> RTE_FLOW_ITEM_TYPE_END) { > >>> + return; > >>> + } > >>> + > >>> + ds_init(&s); > >>> + > >>> + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { > >>> + const struct rte_flow_item_eth *eth_spec = item->spec; > >>> + const struct rte_flow_item_eth *eth_mask = item->mask; > >>> + > >>> + ds_put_cstr(&s, "rte flow eth pattern:\n"); > >>> + if (eth_spec) { > >>> + ds_put_format(&s, > >>> + " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > >>> + "type=0x%04" PRIx16"\n", > >>> + ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), > >>> + ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), > >>> + ntohs(eth_spec->type)); > >>> + } else { > >>> + ds_put_cstr(&s, " Spec = null\n"); > >>> + } > >>> + if (eth_mask) { > >>> + ds_put_format(&s, > >>> + " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " > >>> + "type=0x%04"PRIx16"\n", > >>> + ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), > >>> + ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), > >>> + eth_mask->type); > >>> + } else { > >>> + ds_put_cstr(&s, " Mask = null\n"); > >>> + } > >>> + } > >>> + > >>> + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { > >>> + const struct rte_flow_item_vlan *vlan_spec = item->spec; > >>> + const struct rte_flow_item_vlan *vlan_mask = item->mask; > >>> + > >>> + ds_put_cstr(&s, "rte flow vlan pattern:\n"); > >>> + if (vlan_spec) { > >>> + ds_put_format(&s, > >>> + " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > >>> + ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); > >>> + } else { > >>> + ds_put_cstr(&s, " Spec = null\n"); > >>> + } > >>> + > >>> + if (vlan_mask) { > >>> + ds_put_format(&s, > >>> + " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", > >>> + ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); > >>> + } else { > >>> + ds_put_cstr(&s, " Mask = null\n"); > >>> + } > >>> + } > >>> + > >>> + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { > >>> + const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; > >>> + const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; > >>> + > >>> + ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); > >>> + if (ipv4_spec) { > >>> + ds_put_format(&s, > >>> + " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > >>> + ", src="IP_FMT", dst="IP_FMT"\n", > >>> + ipv4_spec->hdr.type_of_service, > >>> + ipv4_spec->hdr.time_to_live, > >>> + ipv4_spec->hdr.next_proto_id, > >>> + IP_ARGS(ipv4_spec->hdr.src_addr), > >>> + IP_ARGS(ipv4_spec->hdr.dst_addr)); > >>> + } else { > >>> + ds_put_cstr(&s, " Spec = null\n"); > >>> + } > >>> + if (ipv4_mask) { > >>> + ds_put_format(&s, > >>> + " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 > >>> + ", src="IP_FMT", dst="IP_FMT"\n", > >>> + ipv4_mask->hdr.type_of_service, > >>> + ipv4_mask->hdr.time_to_live, > >>> + ipv4_mask->hdr.next_proto_id, > >>> + IP_ARGS(ipv4_mask->hdr.src_addr), > >>> + IP_ARGS(ipv4_mask->hdr.dst_addr)); > >>> + } else { > >>> + ds_put_cstr(&s, " Mask = null\n"); > >>> + } > >>> + } > >>> + > >>> + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { > >>> + const struct rte_flow_item_udp *udp_spec = item->spec; > >>> + const struct rte_flow_item_udp *udp_mask = item->mask; > >>> + > >>> + ds_put_cstr(&s, "rte flow udp pattern:\n"); > >>> + if (udp_spec) { > >>> + ds_put_format(&s, > >>> + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > >>> + ntohs(udp_spec->hdr.src_port), > >>> + ntohs(udp_spec->hdr.dst_port)); > >>> + } else { > >>> + ds_put_cstr(&s, " Spec = null\n"); > >>> + } > >>> + if (udp_mask) { > >>> + ds_put_format(&s, > >>> + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > >>> + udp_mask->hdr.src_port, > >>> + udp_mask->hdr.dst_port); > >>> + } else { > >>> + ds_put_cstr(&s, " Mask = null\n"); > >>> + } > >>> + } > >>> + > >>> + if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { > >>> + const struct rte_flow_item_sctp *sctp_spec = item->spec; > >>> + const struct rte_flow_item_sctp *sctp_mask = item->mask; > >>> + > >>> + ds_put_cstr(&s, "rte flow sctp pattern:\n"); > >>> + if (sctp_spec) { > >>> + ds_put_format(&s, > >>> + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", > >>> + ntohs(sctp_spec->hdr.src_port), > >>> + ntohs(sctp_spec->hdr.dst_port)); > >>> + } else { > >>> + ds_put_cstr(&s, " Spec = null\n"); > >>> + } > >>> + if (sctp_mask) { > >>> + ds_put_format(&s, > >>> + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", > >>> + sctp_mask->hdr.src_port, > >>> + sctp_mask->hdr.dst_port); > >>> + } else { > >>> + ds_put_cstr(&s, " Mask = null\n"); > >>> + } > >>> + } > >>> + > >>> + if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { > >>> + const struct rte_flow_item_icmp *icmp_spec = item->spec; > >>> + const struct rte_flow_item_icmp *icmp_mask = item->mask; > >>> + > >>> + ds_put_cstr(&s, "rte flow icmp pattern:\n"); > >>> + if (icmp_spec) { > >>> + ds_put_format(&s, > >>> + " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", > >>> + icmp_spec->hdr.icmp_type, > >>> + icmp_spec->hdr.icmp_code); > >>> + } else { > >>> + ds_put_cstr(&s, " Spec = null\n"); > >>> + } > >>> + if (icmp_mask) { > >>> + ds_put_format(&s, > >>> + " Mask: icmp_type=0x%"PRIx8", > icmp_code=0x%"PRIx8"\n", > >>> + icmp_spec->hdr.icmp_type, > >>> + icmp_spec->hdr.icmp_code); > >>> + } else { > >>> + ds_put_cstr(&s, " Mask = null\n"); > >>> + } > >>> + } > >>> + > >>> + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { > >>> + const struct rte_flow_item_tcp *tcp_spec = item->spec; > >>> + const struct rte_flow_item_tcp *tcp_mask = item->mask; > >>> + > >>> + ds_put_cstr(&s, "rte flow tcp pattern:\n"); > >>> + if (tcp_spec) { > >>> + ds_put_format(&s, > >>> + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 > >>> + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > >>> + ntohs(tcp_spec->hdr.src_port), > >>> + ntohs(tcp_spec->hdr.dst_port), > >>> + tcp_spec->hdr.data_off, > >>> + tcp_spec->hdr.tcp_flags); > >>> + } else { > >>> + ds_put_cstr(&s, " Spec = null\n"); > >>> + } > >>> + if (tcp_mask) { > >>> + ds_put_format(&s, > >>> + " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 > >>> + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", > >>> + tcp_mask->hdr.src_port, > >>> + tcp_mask->hdr.dst_port, > >>> + tcp_mask->hdr.data_off, > >>> + tcp_mask->hdr.tcp_flags); > >>> + } else { > >>> + ds_put_cstr(&s, " Mask = null\n"); > >>> + } > >>> + } > >>> + > >>> + VLOG_DBG("%s", ds_cstr(&s)); > >>> + ds_destroy(&s); > >>> +} > >>> + > >>> +static void > >>> +add_flow_pattern(struct flow_patterns *patterns, enum > >> rte_flow_item_type type, > >>> + const void *spec, const void *mask) { > >> > >> ditto > >> > >>> + int cnt = patterns->cnt; > >>> + > >>> + if (cnt == 0) { > >>> + patterns->current_max = 8; > >>> + patterns->items = xcalloc(patterns->current_max, > >>> + sizeof(struct rte_flow_item)); > >> > >> sizeof *patterns->items > >> > >>> + } else if (cnt == patterns->current_max) { > >>> + patterns->current_max *= 2; > >>> + patterns->items = xrealloc(patterns->items, patterns- > >current_max * > >>> + sizeof(struct rte_flow_item)); > >> > >> sizeof *patterns->items > >> > >>> + } > >>> + > >>> + patterns->items[cnt].type = type; > >>> + patterns->items[cnt].spec = spec; > >>> + patterns->items[cnt].mask = mask; > >>> + patterns->items[cnt].last = NULL; > >>> + dump_flow_pattern(&patterns->items[cnt]); > >>> + patterns->cnt++; > >>> +} > >>> + > >>> +static void > >>> +add_flow_action(struct flow_actions *actions, enum > >> rte_flow_action_type type, > >>> + const void *conf) > >>> +{ > >>> + int cnt = actions->cnt; > >>> + > >>> + if (cnt == 0) { > >>> + actions->current_max = 8; > >>> + actions->actions = xcalloc(actions->current_max, > >>> + sizeof(struct rte_flow_action)); > >> > >> sizeof *actions->actions > >> > >>> + } else if (cnt == actions->current_max) { > >>> + actions->current_max *= 2; > >>> + actions->actions = xrealloc(actions->actions, actions->current_max > * > >>> + sizeof(struct rte_flow_action)); > >> > >> sizeof *actions->actions > >> > >>> + } > >>> + > >>> + actions->actions[cnt].type = type; > >>> + actions->actions[cnt].conf = conf; > >>> + actions->cnt++; > >>> +} > >>> + > >>> +struct action_rss_data { > >>> + struct rte_flow_action_rss conf; > >>> + uint16_t queue[0]; > >>> +}; > >>> + > >>> +static struct action_rss_data * > >>> +add_flow_rss_action(struct flow_actions *actions, > >>> + struct netdev *netdev) { > >> > >> ditto > >> > >>> + int i; > >>> + struct action_rss_data *rss_data; > >>> + > >>> + rss_data = xmalloc(sizeof(struct action_rss_data) + > >>> + sizeof(uint16_t) * netdev->n_rxq); > >> > >> sizeof *rss_data + netdev_n_rxq(netdev) * sizeof rss_data->queue[0] > >> > >> As we're not inside the netdev we need to use getter 'netdev_n_rxq'. > >> > >>> + *rss_data = (struct action_rss_data) { > >>> + .conf = (struct rte_flow_action_rss) { > >>> + .func = RTE_ETH_HASH_FUNCTION_DEFAULT, > >>> + .level = 0, > >>> + .types = 0, > >>> + .queue_num = netdev->n_rxq, > >> > >> netdev_n_rxq(netdev) > >> > >>> + .queue = rss_data->queue, > >>> + .key_len = 0, > >>> + .key = NULL > >>> + }, > >>> + }; > >>> + > >>> + /* Override queue array with default */ > >>> + for (i = 0; i < netdev->n_rxq; i++) { > >> > >> netdev_n_rxq(netdev) > >> > >>> + rss_data->queue[i] = i; > >>> + } > >>> + > >>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data- > >>> conf); > >>> + > >>> + return rss_data; > >>> +} > >>> + > >>> +static int > >>> +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, > >>> + const struct match *match, > >>> + struct nlattr *nl_actions OVS_UNUSED, > >>> + size_t actions_len OVS_UNUSED, > >>> + const ovs_u128 *ufid, > >>> + struct offload_info *info) { > >> > >> ditto > >> > >> > >> I'll skip the rest of this function as I already have a path with refactoring: > >> > >> > https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpat > >> > chwork.ozlabs.org%2Fpatch%2F1037609%2F&data=02%7C01%7Croniba > >> > %40mellanox.com%7C1cbceab6b5394a306a9e08d69818689d%7Ca652971c7d2 > >> > e4d9ba6a4d149256f461b%7C0%7C0%7C636863627943149395&sdata=Z7 > >> > uBounMbAB6QF0LivTDkOjfyy%2B%2BrudgT96z77glGAE%3D&reserved= > >> 0 > >> > >>> + const struct rte_flow_attr flow_attr = { > >>> + .group = 0, > >>> + .priority = 0, > >>> + .ingress = 1, > >>> + .egress = 0 > >>> + }; > >>> + struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; > >>> + struct flow_actions actions = { .actions = NULL, .cnt = 0 }; > >>> + struct rte_flow *flow; > >>> + struct rte_flow_error error; > >>> + uint8_t *ipv4_next_proto_mask = NULL; > >>> + int ret = 0; > >>> + > >>> + /* Eth */ > >>> + struct rte_flow_item_eth eth_spec; > >>> + struct rte_flow_item_eth eth_mask; > >>> + if (!eth_addr_is_zero(match->wc.masks.dl_src) || > >>> + !eth_addr_is_zero(match->wc.masks.dl_dst)) { > >>> + memset(ð_spec, 0, sizeof eth_spec); > >>> + memset(ð_mask, 0, sizeof eth_mask); > >>> + rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof > >> eth_spec.dst); > >>> + rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof > >> eth_spec.src); > >>> + eth_spec.type = match->flow.dl_type; > >>> + > >>> + rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, > >>> + sizeof eth_mask.dst); > >>> + rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, > >>> + sizeof eth_mask.src); > >>> + eth_mask.type = match->wc.masks.dl_type; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, > >>> + ð_spec, ð_mask); > >>> + } else { > >>> + /* > >>> + * If user specifies a flow (like UDP flow) without L2 patterns, > >>> + * OVS will at least set the dl_type. Normally, it's enough to > >>> + * create an eth pattern just with it. Unluckily, some Intel's > >>> + * NIC (such as XL710) doesn't support that. Below is a workaround, > >>> + * which simply matches any L2 pkts. > >>> + */ > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, > >> NULL); > >>> + } > >>> + > >>> + /* VLAN */ > >>> + struct rte_flow_item_vlan vlan_spec; > >>> + struct rte_flow_item_vlan vlan_mask; > >>> + if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { > >>> + memset(&vlan_spec, 0, sizeof vlan_spec); > >>> + memset(&vlan_mask, 0, sizeof vlan_mask); > >>> + vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); > >>> + vlan_mask.tci = match->wc.masks.vlans[0].tci & > ~htons(VLAN_CFI); > >>> + > >>> + /* match any protocols */ > >>> + vlan_mask.inner_type = 0; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, > >>> + &vlan_spec, &vlan_mask); > >>> + } > >>> + > >>> + /* IP v4 */ > >>> + uint8_t proto = 0; > >>> + struct rte_flow_item_ipv4 ipv4_spec; > >>> + struct rte_flow_item_ipv4 ipv4_mask; > >>> + if (match->flow.dl_type == htons(ETH_TYPE_IP)) { > >>> + memset(&ipv4_spec, 0, sizeof ipv4_spec); > >>> + memset(&ipv4_mask, 0, sizeof ipv4_mask); > >>> + > >>> + ipv4_spec.hdr.type_of_service = match->flow.nw_tos; > >>> + ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; > >>> + ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; > >>> + ipv4_spec.hdr.src_addr = match->flow.nw_src; > >>> + ipv4_spec.hdr.dst_addr = match->flow.nw_dst; > >>> + > >>> + ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; > >>> + ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; > >>> + ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; > >>> + ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; > >>> + ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, > >>> + &ipv4_spec, &ipv4_mask); > >>> + > >>> + /* Save proto for L4 protocol setup */ > >>> + proto = ipv4_spec.hdr.next_proto_id & > >>> + ipv4_mask.hdr.next_proto_id; > >>> + > >>> + /* Remember proto mask address for later modification */ > >>> + ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; > >>> + } > >>> + > >>> + if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && > >>> + proto != IPPROTO_SCTP && proto != IPPROTO_TCP && > >>> + (match->wc.masks.tp_src || > >>> + match->wc.masks.tp_dst || > >>> + match->wc.masks.tcp_flags)) { > >>> + VLOG_DBG("L4 Protocol (%u) not supported", proto); > >>> + ret = -1; > >>> + goto out; > >>> + } > >>> + > >>> + if ((match->wc.masks.tp_src && match->wc.masks.tp_src != > >> OVS_BE16_MAX) || > >>> + (match->wc.masks.tp_dst && match->wc.masks.tp_dst != > >> OVS_BE16_MAX)) { > >>> + ret = -1; > >>> + goto out; > >>> + } > >>> + > >>> + struct rte_flow_item_tcp tcp_spec; > >>> + struct rte_flow_item_tcp tcp_mask; > >>> + if (proto == IPPROTO_TCP) { > >>> + memset(&tcp_spec, 0, sizeof tcp_spec); > >>> + memset(&tcp_mask, 0, sizeof tcp_mask); > >>> + tcp_spec.hdr.src_port = match->flow.tp_src; > >>> + tcp_spec.hdr.dst_port = match->flow.tp_dst; > >>> + tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; > >>> + tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; > >>> + > >>> + tcp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> + tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> + tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; > >>> + tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & > 0xff; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, > >>> + &tcp_spec, &tcp_mask); > >>> + > >>> + /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match > >> */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> + struct rte_flow_item_udp udp_spec; > >>> + struct rte_flow_item_udp udp_mask; > >>> + if (proto == IPPROTO_UDP) { > >>> + memset(&udp_spec, 0, sizeof udp_spec); > >>> + memset(&udp_mask, 0, sizeof udp_mask); > >>> + udp_spec.hdr.src_port = match->flow.tp_src; > >>> + udp_spec.hdr.dst_port = match->flow.tp_dst; > >>> + > >>> + udp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> + udp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, > >>> + &udp_spec, &udp_mask); > >>> + > >>> + /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto > match > >> */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> + struct rte_flow_item_sctp sctp_spec; > >>> + struct rte_flow_item_sctp sctp_mask; > >>> + if (proto == IPPROTO_SCTP) { > >>> + memset(&sctp_spec, 0, sizeof sctp_spec); > >>> + memset(&sctp_mask, 0, sizeof sctp_mask); > >>> + sctp_spec.hdr.src_port = match->flow.tp_src; > >>> + sctp_spec.hdr.dst_port = match->flow.tp_dst; > >>> + > >>> + sctp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> + sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, > >>> + &sctp_spec, &sctp_mask); > >>> + > >>> + /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto > match > >> */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> + struct rte_flow_item_icmp icmp_spec; > >>> + struct rte_flow_item_icmp icmp_mask; > >>> + if (proto == IPPROTO_ICMP) { > >>> + memset(&icmp_spec, 0, sizeof icmp_spec); > >>> + memset(&icmp_mask, 0, sizeof icmp_mask); > >>> + icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); > >>> + icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); > >>> + > >>> + icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match- > >>> wc.masks.tp_src); > >>> + icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match- > >>> wc.masks.tp_dst); > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, > >>> + &icmp_spec, &icmp_mask); > >>> + > >>> + /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto > >> match */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> +end_proto_check: > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, > >> NULL); > >>> + > >>> + struct rte_flow_action_mark mark; > >>> + struct action_rss_data *rss; > >>> + > >>> + mark.id = info->flow_mark; > >>> + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, > &mark); > >>> + > >>> + > >>> + rss = add_flow_rss_action(&actions, netdev); > >>> + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); > >>> + > >>> + flow = netdev_dpdk_rte_flow_create(netdev, > >> &flow_attr,patterns.items, > >>> + actions.actions, &error); > >>> + > >>> + free(rss); > >>> + if (!flow) { > >>> + VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", > >>> + netdev_get_name(netdev), error.type, error.message); > >>> + ret = -1; > >>> + goto out; > >>> + } > >>> + ufid_to_rte_flow_associate(ufid, flow); > >>> + VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", > >>> + netdev_get_name(netdev), flow, UUID_ARGS((struct uuid > >> *)ufid)); > >>> + > >>> +out: > >>> + free(patterns.items); > >>> + free(actions.actions); > >>> + return ret; > >>> +} > >> > >> > >> Code below was refactored recently. You're removing the new version > from > >> netdev-dpdk.c, but adding the old one here. > >> > >>> +static bool > >>> +is_all_zero(const void *addr, size_t n) { > >>> + size_t i = 0; > >>> + const uint8_t *p = (uint8_t *)addr; > >>> + > >>> + for (i = 0; i < n; i++) { > >>> + if (p[i] != 0) { > >>> + return false; > >>> + } > >>> + } > >>> + > >>> + return true; > >>> +} > >>> + > >>> +/* > >>> + * Check if any unsupported flow patterns are specified. > >>> + */ > >>> +static int > >>> +netdev_dpdk_validate_flow(const struct match *match) { > >>> + struct match match_zero_wc; > >>> + > >>> + /* Create a wc-zeroed version of flow */ > >>> + match_init(&match_zero_wc, &match->flow, &match->wc); > >>> + > >>> + if (!is_all_zero(&match_zero_wc.flow.tunnel, > >>> + sizeof(match_zero_wc.flow.tunnel))) { > >>> + goto err; > >>> + } > >>> + > >>> + if (match->wc.masks.metadata || > >>> + match->wc.masks.skb_priority || > >>> + match->wc.masks.pkt_mark || > >>> + match->wc.masks.dp_hash) { > >>> + goto err; > >>> + } > >>> + > >>> + /* recirc id must be zero */ > >>> + if (match_zero_wc.flow.recirc_id) { > >>> + goto err; > >>> + } > >>> + > >>> + if (match->wc.masks.ct_state || > >>> + match->wc.masks.ct_nw_proto || > >>> + match->wc.masks.ct_zone || > >>> + match->wc.masks.ct_mark || > >>> + match->wc.masks.ct_label.u64.hi || > >>> + match->wc.masks.ct_label.u64.lo) { > >>> + goto err; > >>> + } > >>> + > >>> + if (match->wc.masks.conj_id || > >>> + match->wc.masks.actset_output) { > >>> + goto err; > >>> + } > >>> + > >>> + /* unsupported L2 */ > >>> + if (!is_all_zero(&match->wc.masks.mpls_lse, > >>> + sizeof(match_zero_wc.flow.mpls_lse))) { > >>> + goto err; > >>> + } > >>> + > >>> + /* unsupported L3 */ > >>> + if (match->wc.masks.ipv6_label || > >>> + match->wc.masks.ct_nw_src || > >>> + match->wc.masks.ct_nw_dst || > >>> + !is_all_zero(&match->wc.masks.ipv6_src, sizeof(struct in6_addr)) > || > >>> + !is_all_zero(&match->wc.masks.ipv6_dst, sizeof(struct in6_addr)) > || > >>> + !is_all_zero(&match->wc.masks.ct_ipv6_src, sizeof(struct > in6_addr)) > >> || > >>> + !is_all_zero(&match->wc.masks.ct_ipv6_dst, sizeof(struct > in6_addr)) > >> || > >>> + !is_all_zero(&match->wc.masks.nd_target, sizeof(struct in6_addr)) > || > >>> + !is_all_zero(&match->wc.masks.nsh, sizeof(struct ovs_key_nsh)) > || > >>> + !is_all_zero(&match->wc.masks.arp_sha, sizeof(struct eth_addr)) > || > >>> + !is_all_zero(&match->wc.masks.arp_tha, sizeof(struct eth_addr))) > { > >>> + goto err; > >>> + } > >>> + > >>> + /* If fragmented, then don't HW accelerate - for now */ > >>> + if (match_zero_wc.flow.nw_frag) { > >>> + goto err; > >>> + } > >>> + > >>> + /* unsupported L4 */ > >>> + if (match->wc.masks.igmp_group_ip4 || > >>> + match->wc.masks.ct_tp_src || > >>> + match->wc.masks.ct_tp_dst) { > >>> + goto err; > >>> + } > >>> + > >>> + return 0; > >>> + > >>> +err: > >>> + VLOG_ERR("cannot HW accelerate this flow due to unsupported > >> protocols"); > >>> + return -1; > >>> +} > >>> + > >>> +static int > >>> +netdev_dpdk_destroy_rte_flow(struct netdev *netdev, > >>> + const ovs_u128 *ufid, > >>> + struct rte_flow *rte_flow) { > >> > >> ditto > >> > >>> + struct rte_flow_error error; > >>> + int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); > >>> + > >>> + if (ret == 0) { > >>> + ufid_to_rte_flow_disassociate(ufid); > >>> + VLOG_DBG("%s: removed rte flow %p associated with ufid " > >> UUID_FMT "\n", > >>> + netdev_get_name(netdev), rte_flow, > >>> + UUID_ARGS((struct uuid *)ufid)); > >>> + } else { > >>> + VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", > >>> + netdev_get_name(netdev), error.type, error.message); > >>> + } > >>> + > >>> + return ret; > >>> +} > >>> + > >>> +int > >>> +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > >>> + struct nlattr *actions, size_t actions_len, > >>> + const ovs_u128 *ufid, struct offload_info *info, > >>> + struct dpif_flow_stats *stats OVS_UNUSED) { > >> > >> ditto > >> > >>> + struct rte_flow *rte_flow; > >>> + int ret; > >>> + > >>> + /* > >>> + * If an old rte_flow exists, it means it's a flow modification. > >>> + * Here destroy the old rte flow first before adding a new one. > >>> + */ > >>> + rte_flow = ufid_to_rte_flow_find(ufid); > >>> + if (rte_flow) { > >>> + ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > >>> + if (ret < 0) { > >>> + return ret; > >>> + } > >>> + } > >>> + > >>> + ret = netdev_dpdk_validate_flow(match); > >>> + if (ret < 0) { > >>> + return ret; > >>> + } > >>> + > >>> + return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, > >>> + actions_len, ufid, info); > >>> +} > >>> + > >>> +int > >>> +netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > >>> + struct dpif_flow_stats *stats OVS_UNUSED) { > >> > >> ditto > >> > >>> + > >>> + struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); > >>> + > >>> + if (!rte_flow) { > >>> + return -1; > >>> + } > >>> + > >>> + return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > >>> +} > >>> + > >>> diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h > >>> new file mode 100644 > >>> index 0000000..573e214 > >>> --- /dev/null > >>> +++ b/lib/netdev-rte-offloads.h > >>> @@ -0,0 +1,38 @@ > >>> +/* > >>> + * Copyright (c) 2019 Mellanox Technologies, Ltd. > >>> + * > >>> + * Licensed under the Apache License, Version 2.0 (the "License"); > >>> + * you may not use this file except in compliance with the License. > >>> + * You may obtain a copy of the License at: > >>> + * > >>> + * > >> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww > >> w.apache.org%2Flicenses%2FLICENSE- > >> > 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 > >> > 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 > >> > 36863627943159409&sdata=VmwZSLlGZaYByKuVjr9CEP9ECDX4Y1ClI7Uq > >> 1wdErx0%3D&reserved=0 > >>> + * > >>> + * Unless required by applicable law or agreed to in writing, software > >>> + * distributed under the License is distributed on an "AS IS" BASIS, > >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either > express > >> or implied. > >>> + * See the License for the specific language governing permissions and > >>> + * limitations under the License. > >>> + */ > >>> + > >>> +#ifndef NETDEV_VPORT_OFFLOADS_H > >>> +#define NETDEV_VPORT_OFFLOADS_H 1 > >>> + > >>> +#include "openvswitch/types.h" > >> > >> It'll be good to have a blank line here. > >> > >>> +struct netdev; > >>> +struct match; > >>> +struct nlattr; > >>> +struct offload_info; > >>> +struct dpif_flow_stats; > >>> + > >>> +int netdev_dpdk_flow_put(struct netdev *netdev, struct match > *match, > >>> + struct nlattr *actions, size_t actions_len, > >>> + const ovs_u128 *ufid, struct offload_info *info, > >>> + struct dpif_flow_stats *stats OVS_UNUSED); > >>> +int netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 > *ufid, > >>> + struct dpif_flow_stats *stats OVS_UNUSED); > >>> + > >>> +#define DPDK_FLOW_OFFLOAD_API \ > >>> + .flow_put = netdev_dpdk_flow_put, \ > >>> + .flow_del = netdev_dpdk_flow_del > >>> + > >>> +#endif /* netdev-rte-offloads.h */ > >>>
Hi Ilya, all styling issues of this commit were addressed per your review. They will be updated in v3 > -----Original Message----- > From: Ilya Maximets <i.maximets@samsung.com> > Sent: Tuesday, February 26, 2019 4:38 PM > To: Roni Bar Yanai <roniba@mellanox.com>; Ophir Munk > <ophirmu@mellanox.com>; ovs-dev@openvswitch.org > Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern <olgas@mellanox.com>; > Kevin Traynor <ktraynor@redhat.com>; Asaf Penso <asafp@mellanox.com>; > Flavio Leitner <fbl@sysclose.org> > Subject: Re: [PATCH v2 2/3] netdev-dpdk: Move offloading-code to a new > file > > On 21.02.2019 19:31, Roni Bar Yanai wrote: > > > > > >> -----Original Message----- > >> From: Ilya Maximets <i.maximets@samsung.com> > >> Sent: Thursday, February 21, 2019 6:20 PM > >> To: Ophir Munk <ophirmu@mellanox.com>; ovs-dev@openvswitch.org > >> Cc: Ian Stokes <ian.stokes@intel.com>; Olga Shern > <olgas@mellanox.com>; > >> Kevin Traynor <ktraynor@redhat.com>; Asaf Penso > <asafp@mellanox.com>; > >> Roni Bar Yanai <roniba@mellanox.com>; Flavio Leitner > <fbl@sysclose.org> > >> Subject: Re: [PATCH v2 2/3] netdev-dpdk: Move offloading-code to a new > >> file > >> > >> At first, 'git am' fails to apply the patch: > >> > >> Applying: netdev-dpdk: Move offloading-code to a new file > >> error: patch failed: lib/netdev-dpdk.c:4240 > >> error: lib/netdev-dpdk.c: patch does not apply > >> .git/rebase-apply/patch:1564: new blank line at EOF. > >> + > >> Patch failed at 0002 netdev-dpdk: Move offloading-code to a new file > >> > >> I'm not sure why it complains about 'lib/netdev-dpdk.c' because the > >> issue is the blanlk line at the end of netdev-rte-offloads.c. Hopefully it is fixed in v3. > >> > >> On 21.02.2019 15:07, Ophir Munk wrote: > >>> From: Roni Bar Yanai <roniba@mellanox.com> > >>> > >>> Hardware offloading-code is moved to a new file called > >> > >> Just curious, why you writing "offloading-code" as a single dash > >> separated word? > >> > >>> netdev-rte-offloads.c. The original offloading-code is copied as is > >> > >> IMHO, it's a good time to make style changes in the code, because > >> current functions doesn't follow a lot of OVS coding style guidelines > >> and we're touching them anyway. > >> > > > > I agree it is a good time, but I think it should be done on a separate commit, > because if > > something breaks (although all are style changes), it will be hard to find the > changes on git log. The entire file > > Will be + . > > You already made mistakes while rebasing the copy-pasted code. > Thorough review is required anyway. Moving of few braces will > not change anything. 'sizeof' related changes are mostly obvious. > > If you wish, I could handle 'n_rxq' usage update in a separate > patch as it's not really a style change. > > > Thanks Ilya. I will send it all in v3 > > > >>> --- /dev/null > >>> +++ b/lib/netdev-rte-offloads.c > >>> @@ -0,0 +1,775 @@ > >>> +/* > >>> + * Copyright (c) 2019 Mellanox Technologies, Ltd. > >> > >> I'm not a layer but it seems strange to have above copyright for the > >> file with copy-pasted code written by other people. > >> However, I'm not sure how this should look like. In v3 will add 2 Copyrights (original + Mellanox) > >> > >>> + * > >>> + * Licensed under the Apache License, Version 2.0 (the "License"); > >>> + * you may not use this file except in compliance with the License. > >>> + * You may obtain a copy of the License at: > >>> + * > >>> + * > >> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww > >> w.apache.org%2Flicenses%2FLICENSE- > >> > 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 > >> > 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 > >> > 36863627943149395&sdata=GFoau2tQaeBk6KwZcleg4lZcbN%2B11WIBb > >> Xb%2FfE50eE0%3D&reserved=0 > >>> + * > >>> + * Unless required by applicable law or agreed to in writing, software > >>> + * distributed under the License is distributed on an "AS IS" BASIS, > >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either > express > >> or implied. > >>> + * See the License for the specific language governing permissions and > >>> + * limitations under the License. > >>> + */ > >>> +#include <config.h> > >> > >> Include of "netdev-rte-offloads.h" should be here. > >> Blank line. > >> > >>> +#include <rte_flow.h> > >> > >> Blank line. Fixed in v3 > >> > >>> +#include "netdev-rte-offloads.h" > >>> +#include "netdev-provider.h" > >>> +#include "dpif-netdev.h" > >> > >> Do you really need above header ? > >> Yes we do > >>> +#include "cmap.h" > >>> +#include "openvswitch/vlog.h" > >>> +#include "openvswitch/match.h" > >>> +#include "packets.h" > >>> +#include "uuid.h" > >> > >> Above headers should go in alphabetical order. > >> > >> For the style guidelines please refer: > >> Documentation/internals/contributing/coding-style.rst > >> > >>> + > >>> +VLOG_DEFINE_THIS_MODULE(netdev_rte_offloads); > >>> + > >>> +/* > >>> + * A mapping from ufid to dpdk rte_flow. > >>> + */ > >>> +static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; > >>> + > >>> +struct ufid_to_rte_flow_data { > >>> + struct cmap_node node; > >>> + ovs_u128 ufid; > >>> + struct rte_flow *rte_flow; > >>> +}; > >>> + > >>> + > >>> +/* Find rte_flow with @ufid */ > >>> +static struct rte_flow * > >>> +ufid_to_rte_flow_find(const ovs_u128 *ufid) { > >> > >> The '{' should go on a next line in function definition. > >> Fixed in v3 > >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >>> + struct ufid_to_rte_flow_data *data; > >>> + > >>> + CMAP_FOR_EACH_WITH_HASH (data, node, hash, > &ufid_to_rte_flow) > >> { > >>> + if (ovs_u128_equals(*ufid, data->ufid)) { > >>> + return data->rte_flow; > >>> + } > >>> + } > >>> + > >>> + return NULL; > >>> +} > >>> + > >>> +static inline void > >>> +ufid_to_rte_flow_associate(const ovs_u128 *ufid, > >>> + struct rte_flow *rte_flow) { > >> > >> Same. > >> Fixed in v3 > >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >>> + struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); > >> > >> No need to parenthesize the operand of sizeof. > >> Fixed in v3 > >>> + > >>> + /* > >>> + * We should not simply overwrite an existing rte flow. > >>> + * We should have deleted it first before re-adding it. > >>> + * Thus, if following assert triggers, something is wrong: > >>> + * the rte_flow is not destroyed. > >>> + */ > >>> + ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); > >>> + > >>> + data->ufid = *ufid; > >>> + data->rte_flow = rte_flow; > >>> + > >>> + cmap_insert(&ufid_to_rte_flow, > >>> + CONST_CAST(struct cmap_node *, &data->node), hash); > >>> +} > >>> + > >>> +static inline void > >>> +ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { > >> > >> Same. > >> Fixed in v3 > >>> + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); > >> > >> sizeof *ufid > >> Fixed in v3 > >> > >> Space needed between the cast and variable: '(struct uuid *) ufid'. > >> Fixed in v3 > >>> +} > >>> + > >>> +/* > >>> + ... > >>> +static void > >>> +add_flow_pattern(struct flow_patterns *patterns, enum > >> rte_flow_item_type type, > >>> + const void *spec, const void *mask) { > >> > >> ditto > >> Fixed in v3 > >>> + int cnt = patterns->cnt; > >>> + > >>> + if (cnt == 0) { > >>> + patterns->current_max = 8; > >>> + patterns->items = xcalloc(patterns->current_max, > >>> + sizeof(struct rte_flow_item)); > >> > >> sizeof *patterns->items Fixed in v3 > >> > >>> + } else if (cnt == patterns->current_max) { > >>> + patterns->current_max *= 2; > >>> + patterns->items = xrealloc(patterns->items, patterns- > >current_max * > >>> + sizeof(struct rte_flow_item)); > >> > >> sizeof *patterns->items > >> Fixed in v3 > >>> + } > >>> + > >>> + patterns->items[cnt].type = type; > >>> + patterns->items[cnt].spec = spec; > >>> + patterns->items[cnt].mask = mask; > >>> + patterns->items[cnt].last = NULL; > >>> + dump_flow_pattern(&patterns->items[cnt]); > >>> + patterns->cnt++; > >>> +} > >>> + > >>> +static void > >>> +add_flow_action(struct flow_actions *actions, enum > >> rte_flow_action_type type, > >>> + const void *conf) > >>> +{ > >>> + int cnt = actions->cnt; > >>> + > >>> + if (cnt == 0) { > >>> + actions->current_max = 8; > >>> + actions->actions = xcalloc(actions->current_max, > >>> + sizeof(struct rte_flow_action)); > >> > >> sizeof *actions->actions > >> Fixed in v3 > >>> + } else if (cnt == actions->current_max) { > >>> + actions->current_max *= 2; > >>> + actions->actions = xrealloc(actions->actions, actions->current_max > * > >>> + sizeof(struct rte_flow_action)); > >> > >> sizeof *actions->actions > >> Fixed in v3 > >>> + } > >>> + > >>> + actions->actions[cnt].type = type; > >>> + actions->actions[cnt].conf = conf; > >>> + actions->cnt++; > >>> +} > >>> + > >>> +struct action_rss_data { > >>> + struct rte_flow_action_rss conf; > >>> + uint16_t queue[0]; > >>> +}; > >>> + > >>> +static struct action_rss_data * > >>> +add_flow_rss_action(struct flow_actions *actions, > >>> + struct netdev *netdev) { > >> > >> ditto Fixed in v3 > >> > >>> + int i; > >>> + struct action_rss_data *rss_data; > >>> + > >>> + rss_data = xmalloc(sizeof(struct action_rss_data) + > >>> + sizeof(uint16_t) * netdev->n_rxq); > >> > >> sizeof *rss_data + netdev_n_rxq(netdev) * sizeof rss_data->queue[0] > >> Fixed in v3 > >> As we're not inside the netdev we need to use getter 'netdev_n_rxq'. Fixed in v3 > >> > >>> + *rss_data = (struct action_rss_data) { > >>> + .conf = (struct rte_flow_action_rss) { > >>> + .func = RTE_ETH_HASH_FUNCTION_DEFAULT, > >>> + .level = 0, > >>> + .types = 0, > >>> + .queue_num = netdev->n_rxq, > >> > >> netdev_n_rxq(netdev) > >> > >>> + .queue = rss_data->queue, > >>> + .key_len = 0, > >>> + .key = NULL > >>> + }, > >>> + }; > >>> + > >>> + /* Override queue array with default */ > >>> + for (i = 0; i < netdev->n_rxq; i++) { > >> > >> netdev_n_rxq(netdev) Fixed in v3 > >> > >>> + rss_data->queue[i] = i; > >>> + } > >>> + > >>> + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data- > >>> conf); > >>> + > >>> + return rss_data; > >>> +} > >>> + > >>> +static int > >>> +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, > >>> + const struct match *match, > >>> + struct nlattr *nl_actions OVS_UNUSED, > >>> + size_t actions_len OVS_UNUSED, > >>> + const ovs_u128 *ufid, > >>> + struct offload_info *info) { > >> > >> ditto > >> > >> Fixed in v3 > >> I'll skip the rest of this function as I already have a path with refactoring: > >> OK. Fixed code styling but letting you finalize the refactoring patch (unless you would like to incorporate it here as part of this patch. > >> > https://emea01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fpat > >> > chwork.ozlabs.org%2Fpatch%2F1037609%2F&data=02%7C01%7Croniba > >> > %40mellanox.com%7C1cbceab6b5394a306a9e08d69818689d%7Ca652971c7d2 > >> > e4d9ba6a4d149256f461b%7C0%7C0%7C636863627943149395&sdata=Z7 > >> > uBounMbAB6QF0LivTDkOjfyy%2B%2BrudgT96z77glGAE%3D&reserved= > >> 0 > >> > >>> + const struct rte_flow_attr flow_attr = { > >>> + .group = 0, > >>> + .priority = 0, > >>> + .ingress = 1, > >>> + .egress = 0 > >>> + }; > >>> + struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; > >>> + struct flow_actions actions = { .actions = NULL, .cnt = 0 }; > >>> + struct rte_flow *flow; > >>> + struct rte_flow_error error; > >>> + uint8_t *ipv4_next_proto_mask = NULL; > >>> + int ret = 0; > >>> + > >>> + /* Eth */ > >>> + struct rte_flow_item_eth eth_spec; > >>> + struct rte_flow_item_eth eth_mask; > >>> + if (!eth_addr_is_zero(match->wc.masks.dl_src) || > >>> + !eth_addr_is_zero(match->wc.masks.dl_dst)) { > >>> + memset(ð_spec, 0, sizeof eth_spec); > >>> + memset(ð_mask, 0, sizeof eth_mask); > >>> + rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof > >> eth_spec.dst); > >>> + rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof > >> eth_spec.src); > >>> + eth_spec.type = match->flow.dl_type; > >>> + > >>> + rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, > >>> + sizeof eth_mask.dst); > >>> + rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, > >>> + sizeof eth_mask.src); > >>> + eth_mask.type = match->wc.masks.dl_type; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, > >>> + ð_spec, ð_mask); > >>> + } else { > >>> + /* > >>> + * If user specifies a flow (like UDP flow) without L2 patterns, > >>> + * OVS will at least set the dl_type. Normally, it's enough to > >>> + * create an eth pattern just with it. Unluckily, some Intel's > >>> + * NIC (such as XL710) doesn't support that. Below is a workaround, > >>> + * which simply matches any L2 pkts. > >>> + */ > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, > >> NULL); > >>> + } > >>> + > >>> + /* VLAN */ > >>> + struct rte_flow_item_vlan vlan_spec; > >>> + struct rte_flow_item_vlan vlan_mask; > >>> + if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { > >>> + memset(&vlan_spec, 0, sizeof vlan_spec); > >>> + memset(&vlan_mask, 0, sizeof vlan_mask); > >>> + vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); > >>> + vlan_mask.tci = match->wc.masks.vlans[0].tci & > ~htons(VLAN_CFI); > >>> + > >>> + /* match any protocols */ > >>> + vlan_mask.inner_type = 0; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, > >>> + &vlan_spec, &vlan_mask); > >>> + } > >>> + > >>> + /* IP v4 */ > >>> + uint8_t proto = 0; > >>> + struct rte_flow_item_ipv4 ipv4_spec; > >>> + struct rte_flow_item_ipv4 ipv4_mask; > >>> + if (match->flow.dl_type == htons(ETH_TYPE_IP)) { > >>> + memset(&ipv4_spec, 0, sizeof ipv4_spec); > >>> + memset(&ipv4_mask, 0, sizeof ipv4_mask); > >>> + > >>> + ipv4_spec.hdr.type_of_service = match->flow.nw_tos; > >>> + ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; > >>> + ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; > >>> + ipv4_spec.hdr.src_addr = match->flow.nw_src; > >>> + ipv4_spec.hdr.dst_addr = match->flow.nw_dst; > >>> + > >>> + ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; > >>> + ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; > >>> + ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; > >>> + ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; > >>> + ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, > >>> + &ipv4_spec, &ipv4_mask); > >>> + > >>> + /* Save proto for L4 protocol setup */ > >>> + proto = ipv4_spec.hdr.next_proto_id & > >>> + ipv4_mask.hdr.next_proto_id; > >>> + > >>> + /* Remember proto mask address for later modification */ > >>> + ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; > >>> + } > >>> + > >>> + if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && > >>> + proto != IPPROTO_SCTP && proto != IPPROTO_TCP && > >>> + (match->wc.masks.tp_src || > >>> + match->wc.masks.tp_dst || > >>> + match->wc.masks.tcp_flags)) { > >>> + VLOG_DBG("L4 Protocol (%u) not supported", proto); > >>> + ret = -1; > >>> + goto out; > >>> + } > >>> + > >>> + if ((match->wc.masks.tp_src && match->wc.masks.tp_src != > >> OVS_BE16_MAX) || > >>> + (match->wc.masks.tp_dst && match->wc.masks.tp_dst != > >> OVS_BE16_MAX)) { > >>> + ret = -1; > >>> + goto out; > >>> + } > >>> + > >>> + struct rte_flow_item_tcp tcp_spec; > >>> + struct rte_flow_item_tcp tcp_mask; > >>> + if (proto == IPPROTO_TCP) { > >>> + memset(&tcp_spec, 0, sizeof tcp_spec); > >>> + memset(&tcp_mask, 0, sizeof tcp_mask); > >>> + tcp_spec.hdr.src_port = match->flow.tp_src; > >>> + tcp_spec.hdr.dst_port = match->flow.tp_dst; > >>> + tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; > >>> + tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; > >>> + > >>> + tcp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> + tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> + tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; > >>> + tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & > 0xff; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, > >>> + &tcp_spec, &tcp_mask); > >>> + > >>> + /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match > >> */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> + struct rte_flow_item_udp udp_spec; > >>> + struct rte_flow_item_udp udp_mask; > >>> + if (proto == IPPROTO_UDP) { > >>> + memset(&udp_spec, 0, sizeof udp_spec); > >>> + memset(&udp_mask, 0, sizeof udp_mask); > >>> + udp_spec.hdr.src_port = match->flow.tp_src; > >>> + udp_spec.hdr.dst_port = match->flow.tp_dst; > >>> + > >>> + udp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> + udp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, > >>> + &udp_spec, &udp_mask); > >>> + > >>> + /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto > match > >> */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> + struct rte_flow_item_sctp sctp_spec; > >>> + struct rte_flow_item_sctp sctp_mask; > >>> + if (proto == IPPROTO_SCTP) { > >>> + memset(&sctp_spec, 0, sizeof sctp_spec); > >>> + memset(&sctp_mask, 0, sizeof sctp_mask); > >>> + sctp_spec.hdr.src_port = match->flow.tp_src; > >>> + sctp_spec.hdr.dst_port = match->flow.tp_dst; > >>> + > >>> + sctp_mask.hdr.src_port = match->wc.masks.tp_src; > >>> + sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, > >>> + &sctp_spec, &sctp_mask); > >>> + > >>> + /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto > match > >> */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> + struct rte_flow_item_icmp icmp_spec; > >>> + struct rte_flow_item_icmp icmp_mask; > >>> + if (proto == IPPROTO_ICMP) { > >>> + memset(&icmp_spec, 0, sizeof icmp_spec); > >>> + memset(&icmp_mask, 0, sizeof icmp_mask); > >>> + icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); > >>> + icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); > >>> + > >>> + icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match- > >>> wc.masks.tp_src); > >>> + icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match- > >>> wc.masks.tp_dst); > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, > >>> + &icmp_spec, &icmp_mask); > >>> + > >>> + /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto > >> match */ > >>> + if (ipv4_next_proto_mask) { > >>> + *ipv4_next_proto_mask = 0; > >>> + } > >>> + goto end_proto_check; > >>> + } > >>> + > >>> +end_proto_check: > >>> + > >>> + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, > >> NULL); > >>> + > >>> + struct rte_flow_action_mark mark; > >>> + struct action_rss_data *rss; > >>> + > >>> + mark.id = info->flow_mark; > >>> + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, > &mark); > >>> + > >>> + > >>> + rss = add_flow_rss_action(&actions, netdev); > >>> + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); > >>> + > >>> + flow = netdev_dpdk_rte_flow_create(netdev, > >> &flow_attr,patterns.items, > >>> + actions.actions, &error); > >>> + > >>> + free(rss); > >>> + if (!flow) { > >>> + VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", > >>> + netdev_get_name(netdev), error.type, error.message); > >>> + ret = -1; > >>> + goto out; > >>> + } > >>> + ufid_to_rte_flow_associate(ufid, flow); > >>> + VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", > >>> + netdev_get_name(netdev), flow, UUID_ARGS((struct uuid > >> *)ufid)); > >>> + > >>> +out: > >>> + free(patterns.items); > >>> + free(actions.actions); > >>> + return ret; > >>> +} > >> > >> > >> Code below was refactored recently. You're removing the new version > from > >> netdev-dpdk.c, but adding the old one here. > >> Fixed in v3 > >>> +static bool > >>> +is_all_zero(const void *addr, size_t n) { > >>> + size_t i = 0; > >>> + const uint8_t *p = (uint8_t *)addr; > >>> + > >>> + for (i = 0; i < n; i++) { > >>> + if (p[i] != 0) { > >>> + return false; > >>> + } > >>> + } > >>> + > >>> + return true; > >>> +} > >>> + > >>> +/* > >>> + * Check if any unsupported flow patterns are specified. > >>> + */ > >>> +static int > >>> +netdev_dpdk_validate_flow(const struct match *match) { > >>> + struct match match_zero_wc; > >>> + > >>> + /* Create a wc-zeroed version of flow */ > >>> + match_init(&match_zero_wc, &match->flow, &match->wc); > >>> + > >>> + if (!is_all_zero(&match_zero_wc.flow.tunnel, > >>> + sizeof(match_zero_wc.flow.tunnel))) { > >>> + goto err; > >>> + } > >>> + > >>> + if (match->wc.masks.metadata || > >>> + match->wc.masks.skb_priority || > >>> + match->wc.masks.pkt_mark || > >>> + match->wc.masks.dp_hash) { > >>> + goto err; > >>> + } > >>> + > >>> + /* recirc id must be zero */ > >>> + if (match_zero_wc.flow.recirc_id) { > >>> + goto err; > >>> + } > >>> + > >>> + if (match->wc.masks.ct_state || > >>> + match->wc.masks.ct_nw_proto || > >>> + match->wc.masks.ct_zone || > >>> + match->wc.masks.ct_mark || > >>> + match->wc.masks.ct_label.u64.hi || > >>> + match->wc.masks.ct_label.u64.lo) { > >>> + goto err; > >>> + } > >>> + > >>> + if (match->wc.masks.conj_id || > >>> + match->wc.masks.actset_output) { > >>> + goto err; > >>> + } > >>> + > >>> + /* unsupported L2 */ > >>> + if (!is_all_zero(&match->wc.masks.mpls_lse, > >>> + sizeof(match_zero_wc.flow.mpls_lse))) { > >>> + goto err; > >>> + } > >>> + > >>> + /* unsupported L3 */ > >>> + if (match->wc.masks.ipv6_label || > >>> + match->wc.masks.ct_nw_src || > >>> + match->wc.masks.ct_nw_dst || > >>> + !is_all_zero(&match->wc.masks.ipv6_src, sizeof(struct in6_addr)) > || > >>> + !is_all_zero(&match->wc.masks.ipv6_dst, sizeof(struct in6_addr)) > || > >>> + !is_all_zero(&match->wc.masks.ct_ipv6_src, sizeof(struct > in6_addr)) > >> || > >>> + !is_all_zero(&match->wc.masks.ct_ipv6_dst, sizeof(struct > in6_addr)) > >> || > >>> + !is_all_zero(&match->wc.masks.nd_target, sizeof(struct in6_addr)) > || > >>> + !is_all_zero(&match->wc.masks.nsh, sizeof(struct ovs_key_nsh)) > || > >>> + !is_all_zero(&match->wc.masks.arp_sha, sizeof(struct eth_addr)) > || > >>> + !is_all_zero(&match->wc.masks.arp_tha, sizeof(struct eth_addr))) > { > >>> + goto err; > >>> + } > >>> + > >>> + /* If fragmented, then don't HW accelerate - for now */ > >>> + if (match_zero_wc.flow.nw_frag) { > >>> + goto err; > >>> + } > >>> + > >>> + /* unsupported L4 */ > >>> + if (match->wc.masks.igmp_group_ip4 || > >>> + match->wc.masks.ct_tp_src || > >>> + match->wc.masks.ct_tp_dst) { > >>> + goto err; > >>> + } > >>> + > >>> + return 0; > >>> + > >>> +err: > >>> + VLOG_ERR("cannot HW accelerate this flow due to unsupported > >> protocols"); > >>> + return -1; > >>> +} > >>> + > >>> +static int > >>> +netdev_dpdk_destroy_rte_flow(struct netdev *netdev, > >>> + const ovs_u128 *ufid, > >>> + struct rte_flow *rte_flow) { > >> > >> ditto > >> Fixed in v3 > >>> + struct rte_flow_error error; > >>> + int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); > >>> + > >>> + if (ret == 0) { > >>> + ufid_to_rte_flow_disassociate(ufid); > >>> + VLOG_DBG("%s: removed rte flow %p associated with ufid " > >> UUID_FMT "\n", > >>> + netdev_get_name(netdev), rte_flow, > >>> + UUID_ARGS((struct uuid *)ufid)); > >>> + } else { > >>> + VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", > >>> + netdev_get_name(netdev), error.type, error.message); > >>> + } > >>> + > >>> + return ret; > >>> +} > >>> + > >>> +int > >>> +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, > >>> + struct nlattr *actions, size_t actions_len, > >>> + const ovs_u128 *ufid, struct offload_info *info, > >>> + struct dpif_flow_stats *stats OVS_UNUSED) { > >> > >> ditto > >> Fixed in v3 > >>> + struct rte_flow *rte_flow; > >>> + int ret; > >>> + > >>> + /* > >>> + * If an old rte_flow exists, it means it's a flow modification. > >>> + * Here destroy the old rte flow first before adding a new one. > >>> + */ > >>> + rte_flow = ufid_to_rte_flow_find(ufid); > >>> + if (rte_flow) { > >>> + ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > >>> + if (ret < 0) { > >>> + return ret; > >>> + } > >>> + } > >>> + > >>> + ret = netdev_dpdk_validate_flow(match); > >>> + if (ret < 0) { > >>> + return ret; > >>> + } > >>> + > >>> + return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, > >>> + actions_len, ufid, info); > >>> +} > >>> + > >>> +int > >>> +netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, > >>> + struct dpif_flow_stats *stats OVS_UNUSED) { > >> > >> ditto > >> Fixed in v3 > >>> + > >>> + struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); > >>> + > >>> + if (!rte_flow) { > >>> + return -1; > >>> + } > >>> + > >>> + return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); > >>> +} > >>> + > >>> diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h > >>> new file mode 100644 > >>> index 0000000..573e214 > >>> --- /dev/null > >>> +++ b/lib/netdev-rte-offloads.h > >>> @@ -0,0 +1,38 @@ > >>> +/* > >>> + * Copyright (c) 2019 Mellanox Technologies, Ltd. > >>> + * > >>> + * Licensed under the Apache License, Version 2.0 (the "License"); > >>> + * you may not use this file except in compliance with the License. > >>> + * You may obtain a copy of the License at: > >>> + * > >>> + * > >> > https://emea01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fww > >> w.apache.org%2Flicenses%2FLICENSE- > >> > 2.0&data=02%7C01%7Croniba%40mellanox.com%7C1cbceab6b5394a30 > >> > 6a9e08d69818689d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C6 > >> > 36863627943159409&sdata=VmwZSLlGZaYByKuVjr9CEP9ECDX4Y1ClI7Uq > >> 1wdErx0%3D&reserved=0 > >>> + * > >>> + * Unless required by applicable law or agreed to in writing, software > >>> + * distributed under the License is distributed on an "AS IS" BASIS, > >>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either > express > >> or implied. > >>> + * See the License for the specific language governing permissions and > >>> + * limitations under the License. > >>> + */ > >>> + > >>> +#ifndef NETDEV_VPORT_OFFLOADS_H > >>> +#define NETDEV_VPORT_OFFLOADS_H 1 > >>> + > >>> +#include "openvswitch/types.h" > >> > >> It'll be good to have a blank line here. > >> Fixed in v3 > >>> +struct netdev; > >>> +struct match; > >>> +struct nlattr; > >>> +struct offload_info; > >>> +struct dpif_flow_stats; > >>> + > >>> +int netdev_dpdk_flow_put(struct netdev *netdev, struct match > *match, > >>> + struct nlattr *actions, size_t actions_len, > >>> + const ovs_u128 *ufid, struct offload_info *info, > >>> + struct dpif_flow_stats *stats OVS_UNUSED); > >>> +int netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 > *ufid, > >>> + struct dpif_flow_stats *stats OVS_UNUSED); > >>> + > >>> +#define DPDK_FLOW_OFFLOAD_API \ > >>> + .flow_put = netdev_dpdk_flow_put, \ > >>> + .flow_del = netdev_dpdk_flow_del > >>> + > >>> +#endif /* netdev-rte-offloads.h */ > >>>
diff --git a/lib/automake.mk b/lib/automake.mk index bae032b..cc5dccf 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -138,6 +138,7 @@ lib_libopenvswitch_la_SOURCES = \ lib/netdev-dpdk.h \ lib/netdev-dummy.c \ lib/netdev-provider.h \ + lib/netdev-rte-offloads.h \ lib/netdev-vport.c \ lib/netdev-vport.h \ lib/netdev-vport-private.h \ @@ -411,7 +412,8 @@ endif if DPDK_NETDEV lib_libopenvswitch_la_SOURCES += \ lib/dpdk.c \ - lib/netdev-dpdk.c + lib/netdev-dpdk.c \ + lib/netdev-rte-offloads.c else lib_libopenvswitch_la_SOURCES += \ lib/dpdk-stub.c diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 163d4ec..c4228d2 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -47,6 +47,7 @@ #include "dpif-netdev.h" #include "fatal-signal.h" #include "netdev-provider.h" +#include "netdev-rte-offloads.h" #include "netdev-vport.h" #include "odp-util.h" #include "openvswitch/dynamic-string.h" @@ -179,17 +180,6 @@ static const struct rte_eth_conf port_conf = { }; /* - * A mapping from ufid to dpdk rte_flow. - */ -static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; - -struct ufid_to_rte_flow_data { - struct cmap_node node; - ovs_u128 ufid; - struct rte_flow *rte_flow; -}; - -/* * These callbacks allow virtio-net devices to be added to vhost ports when * configuration has been fully completed. */ @@ -4240,721 +4230,6 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev, return flow; } -/* Find rte_flow with @ufid */ -static struct rte_flow * -ufid_to_rte_flow_find(const ovs_u128 *ufid) { - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); - struct ufid_to_rte_flow_data *data; - - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { - if (ovs_u128_equals(*ufid, data->ufid)) { - return data->rte_flow; - } - } - - return NULL; -} - -static inline void -ufid_to_rte_flow_associate(const ovs_u128 *ufid, - struct rte_flow *rte_flow) { - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); - struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); - - /* - * We should not simply overwrite an existing rte flow. - * We should have deleted it first before re-adding it. - * Thus, if following assert triggers, something is wrong: - * the rte_flow is not destroyed. - */ - ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); - - data->ufid = *ufid; - data->rte_flow = rte_flow; - - cmap_insert(&ufid_to_rte_flow, - CONST_CAST(struct cmap_node *, &data->node), hash); -} - -static inline void -ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { - size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); - struct ufid_to_rte_flow_data *data; - - CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { - if (ovs_u128_equals(*ufid, data->ufid)) { - cmap_remove(&ufid_to_rte_flow, - CONST_CAST(struct cmap_node *, &data->node), hash); - ovsrcu_postpone(free, data); - return; - } - } - - VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", - UUID_ARGS((struct uuid *)ufid)); -} - -/* - * To avoid individual xrealloc calls for each new element, a 'curent_max' - * is used to keep track of current allocated number of elements. Starts - * by 8 and doubles on each xrealloc call - */ -struct flow_patterns { - struct rte_flow_item *items; - int cnt; - int current_max; -}; - -struct flow_actions { - struct rte_flow_action *actions; - int cnt; - int current_max; -}; - -static void -dump_flow_pattern(struct rte_flow_item *item) -{ - struct ds s; - - if (!VLOG_IS_DBG_ENABLED() || item->type == RTE_FLOW_ITEM_TYPE_END) { - return; - } - - ds_init(&s); - - if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { - const struct rte_flow_item_eth *eth_spec = item->spec; - const struct rte_flow_item_eth *eth_mask = item->mask; - - ds_put_cstr(&s, "rte flow eth pattern:\n"); - if (eth_spec) { - ds_put_format(&s, - " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " - "type=0x%04" PRIx16"\n", - ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), - ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), - ntohs(eth_spec->type)); - } else { - ds_put_cstr(&s, " Spec = null\n"); - } - if (eth_mask) { - ds_put_format(&s, - " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " - "type=0x%04"PRIx16"\n", - ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), - ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), - eth_mask->type); - } else { - ds_put_cstr(&s, " Mask = null\n"); - } - } - - if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { - const struct rte_flow_item_vlan *vlan_spec = item->spec; - const struct rte_flow_item_vlan *vlan_mask = item->mask; - - ds_put_cstr(&s, "rte flow vlan pattern:\n"); - if (vlan_spec) { - ds_put_format(&s, - " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", - ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); - } else { - ds_put_cstr(&s, " Spec = null\n"); - } - - if (vlan_mask) { - ds_put_format(&s, - " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", - ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); - } else { - ds_put_cstr(&s, " Mask = null\n"); - } - } - - if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { - const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; - const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; - - ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); - if (ipv4_spec) { - ds_put_format(&s, - " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 - ", src="IP_FMT", dst="IP_FMT"\n", - ipv4_spec->hdr.type_of_service, - ipv4_spec->hdr.time_to_live, - ipv4_spec->hdr.next_proto_id, - IP_ARGS(ipv4_spec->hdr.src_addr), - IP_ARGS(ipv4_spec->hdr.dst_addr)); - } else { - ds_put_cstr(&s, " Spec = null\n"); - } - if (ipv4_mask) { - ds_put_format(&s, - " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 - ", src="IP_FMT", dst="IP_FMT"\n", - ipv4_mask->hdr.type_of_service, - ipv4_mask->hdr.time_to_live, - ipv4_mask->hdr.next_proto_id, - IP_ARGS(ipv4_mask->hdr.src_addr), - IP_ARGS(ipv4_mask->hdr.dst_addr)); - } else { - ds_put_cstr(&s, " Mask = null\n"); - } - } - - if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { - const struct rte_flow_item_udp *udp_spec = item->spec; - const struct rte_flow_item_udp *udp_mask = item->mask; - - ds_put_cstr(&s, "rte flow udp pattern:\n"); - if (udp_spec) { - ds_put_format(&s, - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", - ntohs(udp_spec->hdr.src_port), - ntohs(udp_spec->hdr.dst_port)); - } else { - ds_put_cstr(&s, " Spec = null\n"); - } - if (udp_mask) { - ds_put_format(&s, - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", - udp_mask->hdr.src_port, - udp_mask->hdr.dst_port); - } else { - ds_put_cstr(&s, " Mask = null\n"); - } - } - - if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { - const struct rte_flow_item_sctp *sctp_spec = item->spec; - const struct rte_flow_item_sctp *sctp_mask = item->mask; - - ds_put_cstr(&s, "rte flow sctp pattern:\n"); - if (sctp_spec) { - ds_put_format(&s, - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", - ntohs(sctp_spec->hdr.src_port), - ntohs(sctp_spec->hdr.dst_port)); - } else { - ds_put_cstr(&s, " Spec = null\n"); - } - if (sctp_mask) { - ds_put_format(&s, - " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", - sctp_mask->hdr.src_port, - sctp_mask->hdr.dst_port); - } else { - ds_put_cstr(&s, " Mask = null\n"); - } - } - - if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { - const struct rte_flow_item_icmp *icmp_spec = item->spec; - const struct rte_flow_item_icmp *icmp_mask = item->mask; - - ds_put_cstr(&s, "rte flow icmp pattern:\n"); - if (icmp_spec) { - ds_put_format(&s, - " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", - icmp_spec->hdr.icmp_type, - icmp_spec->hdr.icmp_code); - } else { - ds_put_cstr(&s, " Spec = null\n"); - } - if (icmp_mask) { - ds_put_format(&s, - " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", - icmp_spec->hdr.icmp_type, - icmp_spec->hdr.icmp_code); - } else { - ds_put_cstr(&s, " Mask = null\n"); - } - } - - if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { - const struct rte_flow_item_tcp *tcp_spec = item->spec; - const struct rte_flow_item_tcp *tcp_mask = item->mask; - - ds_put_cstr(&s, "rte flow tcp pattern:\n"); - if (tcp_spec) { - ds_put_format(&s, - " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", - ntohs(tcp_spec->hdr.src_port), - ntohs(tcp_spec->hdr.dst_port), - tcp_spec->hdr.data_off, - tcp_spec->hdr.tcp_flags); - } else { - ds_put_cstr(&s, " Spec = null\n"); - } - if (tcp_mask) { - ds_put_format(&s, - " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 - ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", - tcp_mask->hdr.src_port, - tcp_mask->hdr.dst_port, - tcp_mask->hdr.data_off, - tcp_mask->hdr.tcp_flags); - } else { - ds_put_cstr(&s, " Mask = null\n"); - } - } - - VLOG_DBG("%s", ds_cstr(&s)); - ds_destroy(&s); -} - -static void -add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type, - const void *spec, const void *mask) { - int cnt = patterns->cnt; - - if (cnt == 0) { - patterns->current_max = 8; - patterns->items = xcalloc(patterns->current_max, - sizeof(struct rte_flow_item)); - } else if (cnt == patterns->current_max) { - patterns->current_max *= 2; - patterns->items = xrealloc(patterns->items, patterns->current_max * - sizeof(struct rte_flow_item)); - } - - patterns->items[cnt].type = type; - patterns->items[cnt].spec = spec; - patterns->items[cnt].mask = mask; - patterns->items[cnt].last = NULL; - dump_flow_pattern(&patterns->items[cnt]); - patterns->cnt++; -} - -static void -add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type, - const void *conf) -{ - int cnt = actions->cnt; - - if (cnt == 0) { - actions->current_max = 8; - actions->actions = xcalloc(actions->current_max, - sizeof(struct rte_flow_action)); - } else if (cnt == actions->current_max) { - actions->current_max *= 2; - actions->actions = xrealloc(actions->actions, actions->current_max * - sizeof(struct rte_flow_action)); - } - - actions->actions[cnt].type = type; - actions->actions[cnt].conf = conf; - actions->cnt++; -} - -struct action_rss_data { - struct rte_flow_action_rss conf; - uint16_t queue[0]; -}; - -static struct action_rss_data * -add_flow_rss_action(struct flow_actions *actions, - struct netdev *netdev) { - int i; - struct action_rss_data *rss_data; - - rss_data = xmalloc(sizeof(struct action_rss_data) + - sizeof(uint16_t) * netdev->n_rxq); - *rss_data = (struct action_rss_data) { - .conf = (struct rte_flow_action_rss) { - .func = RTE_ETH_HASH_FUNCTION_DEFAULT, - .level = 0, - .types = 0, - .queue_num = netdev->n_rxq, - .queue = rss_data->queue, - .key_len = 0, - .key = NULL - }, - }; - - /* Override queue array with default */ - for (i = 0; i < netdev->n_rxq; i++) { - rss_data->queue[i] = i; - } - - add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf); - - return rss_data; -} - -static int -netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, - const struct match *match, - struct nlattr *nl_actions OVS_UNUSED, - size_t actions_len OVS_UNUSED, - const ovs_u128 *ufid, - struct offload_info *info) { - const struct rte_flow_attr flow_attr = { - .group = 0, - .priority = 0, - .ingress = 1, - .egress = 0 - }; - struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; - struct flow_actions actions = { .actions = NULL, .cnt = 0 }; - struct rte_flow *flow; - struct rte_flow_error error; - uint8_t *ipv4_next_proto_mask = NULL; - int ret = 0; - - /* Eth */ - struct rte_flow_item_eth eth_spec; - struct rte_flow_item_eth eth_mask; - if (!eth_addr_is_zero(match->wc.masks.dl_src) || - !eth_addr_is_zero(match->wc.masks.dl_dst)) { - memset(ð_spec, 0, sizeof eth_spec); - memset(ð_mask, 0, sizeof eth_mask); - rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof eth_spec.dst); - rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof eth_spec.src); - eth_spec.type = match->flow.dl_type; - - rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, - sizeof eth_mask.dst); - rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, - sizeof eth_mask.src); - eth_mask.type = match->wc.masks.dl_type; - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, - ð_spec, ð_mask); - } else { - /* - * If user specifies a flow (like UDP flow) without L2 patterns, - * OVS will at least set the dl_type. Normally, it's enough to - * create an eth pattern just with it. Unluckily, some Intel's - * NIC (such as XL710) doesn't support that. Below is a workaround, - * which simply matches any L2 pkts. - */ - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); - } - - /* VLAN */ - struct rte_flow_item_vlan vlan_spec; - struct rte_flow_item_vlan vlan_mask; - if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { - memset(&vlan_spec, 0, sizeof vlan_spec); - memset(&vlan_mask, 0, sizeof vlan_mask); - vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); - vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); - - /* match any protocols */ - vlan_mask.inner_type = 0; - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, - &vlan_spec, &vlan_mask); - } - - /* IP v4 */ - uint8_t proto = 0; - struct rte_flow_item_ipv4 ipv4_spec; - struct rte_flow_item_ipv4 ipv4_mask; - if (match->flow.dl_type == htons(ETH_TYPE_IP)) { - memset(&ipv4_spec, 0, sizeof ipv4_spec); - memset(&ipv4_mask, 0, sizeof ipv4_mask); - - ipv4_spec.hdr.type_of_service = match->flow.nw_tos; - ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; - ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; - ipv4_spec.hdr.src_addr = match->flow.nw_src; - ipv4_spec.hdr.dst_addr = match->flow.nw_dst; - - ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; - ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; - ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; - ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; - ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, - &ipv4_spec, &ipv4_mask); - - /* Save proto for L4 protocol setup */ - proto = ipv4_spec.hdr.next_proto_id & - ipv4_mask.hdr.next_proto_id; - - /* Remember proto mask address for later modification */ - ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; - } - - if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && - proto != IPPROTO_SCTP && proto != IPPROTO_TCP && - (match->wc.masks.tp_src || - match->wc.masks.tp_dst || - match->wc.masks.tcp_flags)) { - VLOG_DBG("L4 Protocol (%u) not supported", proto); - ret = -1; - goto out; - } - - if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) || - (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) { - ret = -1; - goto out; - } - - struct rte_flow_item_tcp tcp_spec; - struct rte_flow_item_tcp tcp_mask; - if (proto == IPPROTO_TCP) { - memset(&tcp_spec, 0, sizeof tcp_spec); - memset(&tcp_mask, 0, sizeof tcp_mask); - tcp_spec.hdr.src_port = match->flow.tp_src; - tcp_spec.hdr.dst_port = match->flow.tp_dst; - tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; - tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; - - tcp_mask.hdr.src_port = match->wc.masks.tp_src; - tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; - tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; - tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, - &tcp_spec, &tcp_mask); - - /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */ - if (ipv4_next_proto_mask) { - *ipv4_next_proto_mask = 0; - } - goto end_proto_check; - } - - struct rte_flow_item_udp udp_spec; - struct rte_flow_item_udp udp_mask; - if (proto == IPPROTO_UDP) { - memset(&udp_spec, 0, sizeof udp_spec); - memset(&udp_mask, 0, sizeof udp_mask); - udp_spec.hdr.src_port = match->flow.tp_src; - udp_spec.hdr.dst_port = match->flow.tp_dst; - - udp_mask.hdr.src_port = match->wc.masks.tp_src; - udp_mask.hdr.dst_port = match->wc.masks.tp_dst; - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, - &udp_spec, &udp_mask); - - /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match */ - if (ipv4_next_proto_mask) { - *ipv4_next_proto_mask = 0; - } - goto end_proto_check; - } - - struct rte_flow_item_sctp sctp_spec; - struct rte_flow_item_sctp sctp_mask; - if (proto == IPPROTO_SCTP) { - memset(&sctp_spec, 0, sizeof sctp_spec); - memset(&sctp_mask, 0, sizeof sctp_mask); - sctp_spec.hdr.src_port = match->flow.tp_src; - sctp_spec.hdr.dst_port = match->flow.tp_dst; - - sctp_mask.hdr.src_port = match->wc.masks.tp_src; - sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, - &sctp_spec, &sctp_mask); - - /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match */ - if (ipv4_next_proto_mask) { - *ipv4_next_proto_mask = 0; - } - goto end_proto_check; - } - - struct rte_flow_item_icmp icmp_spec; - struct rte_flow_item_icmp icmp_mask; - if (proto == IPPROTO_ICMP) { - memset(&icmp_spec, 0, sizeof icmp_spec); - memset(&icmp_mask, 0, sizeof icmp_mask); - icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); - icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); - - icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match->wc.masks.tp_src); - icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match->wc.masks.tp_dst); - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, - &icmp_spec, &icmp_mask); - - /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match */ - if (ipv4_next_proto_mask) { - *ipv4_next_proto_mask = 0; - } - goto end_proto_check; - } - -end_proto_check: - - add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); - - struct rte_flow_action_mark mark; - struct action_rss_data *rss; - - mark.id = info->flow_mark; - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); - - - rss = add_flow_rss_action(&actions, netdev); - add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); - - flow = netdev_dpdk_rte_flow_create(netdev, &flow_attr,patterns.items, - actions.actions, &error); - - free(rss); - if (!flow) { - VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", - netdev_get_name(netdev), error.type, error.message); - ret = -1; - goto out; - } - ufid_to_rte_flow_associate(ufid, flow); - VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", - netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); - -out: - free(patterns.items); - free(actions.actions); - return ret; -} - -/* - * Check if any unsupported flow patterns are specified. - */ -static int -netdev_dpdk_validate_flow(const struct match *match) { - struct match match_zero_wc; - const struct flow *masks = &match->wc.masks; - - /* Create a wc-zeroed version of flow */ - match_init(&match_zero_wc, &match->flow, &match->wc); - - if (!is_all_zeros(&match_zero_wc.flow.tunnel, - sizeof match_zero_wc.flow.tunnel)) { - goto err; - } - - if (masks->metadata || masks->skb_priority || - masks->pkt_mark || masks->dp_hash) { - goto err; - } - - /* recirc id must be zero */ - if (match_zero_wc.flow.recirc_id) { - goto err; - } - - if (masks->ct_state || masks->ct_nw_proto || - masks->ct_zone || masks->ct_mark || - !ovs_u128_is_zero(masks->ct_label)) { - goto err; - } - - if (masks->conj_id || masks->actset_output) { - goto err; - } - - /* unsupported L2 */ - if (!is_all_zeros(masks->mpls_lse, sizeof masks->mpls_lse)) { - goto err; - } - - /* unsupported L3 */ - if (masks->ipv6_label || masks->ct_nw_src || masks->ct_nw_dst || - !is_all_zeros(&masks->ipv6_src, sizeof masks->ipv6_src) || - !is_all_zeros(&masks->ipv6_dst, sizeof masks->ipv6_dst) || - !is_all_zeros(&masks->ct_ipv6_src, sizeof masks->ct_ipv6_src) || - !is_all_zeros(&masks->ct_ipv6_dst, sizeof masks->ct_ipv6_dst) || - !is_all_zeros(&masks->nd_target, sizeof masks->nd_target) || - !is_all_zeros(&masks->nsh, sizeof masks->nsh) || - !is_all_zeros(&masks->arp_sha, sizeof masks->arp_sha) || - !is_all_zeros(&masks->arp_tha, sizeof masks->arp_tha)) { - goto err; - } - - /* If fragmented, then don't HW accelerate - for now */ - if (match_zero_wc.flow.nw_frag) { - goto err; - } - - /* unsupported L4 */ - if (masks->igmp_group_ip4 || masks->ct_tp_src || masks->ct_tp_dst) { - goto err; - } - - return 0; - -err: - VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols"); - return -1; -} - -static int -netdev_dpdk_destroy_rte_flow(struct netdev *netdev, - const ovs_u128 *ufid, - struct rte_flow *rte_flow) { - struct rte_flow_error error; - int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); - - if (ret == 0) { - ufid_to_rte_flow_disassociate(ufid); - VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n", - netdev_get_name(netdev), rte_flow, - UUID_ARGS((struct uuid *)ufid)); - } else { - VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", - netdev_get_name(netdev), error.type, error.message); - } - - return ret; -} - -static int -netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, - struct nlattr *actions, size_t actions_len, - const ovs_u128 *ufid, struct offload_info *info, - struct dpif_flow_stats *stats OVS_UNUSED) { - struct rte_flow *rte_flow; - int ret; - - /* - * If an old rte_flow exists, it means it's a flow modification. - * Here destroy the old rte flow first before adding a new one. - */ - rte_flow = ufid_to_rte_flow_find(ufid); - if (rte_flow) { - ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); - if (ret < 0) { - return ret; - } - } - - ret = netdev_dpdk_validate_flow(match); - if (ret < 0) { - return ret; - } - - return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, - actions_len, ufid, info); -} - -static int -netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, - struct dpif_flow_stats *stats OVS_UNUSED) { - - struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); - - if (!rte_flow) { - return -1; - } - - return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); -} - -#define DPDK_FLOW_OFFLOAD_API \ - .flow_put = netdev_dpdk_flow_put, \ - .flow_del = netdev_dpdk_flow_del - #define NETDEV_DPDK_CLASS_COMMON \ .is_pmd = true, \ .alloc = netdev_dpdk_alloc, \ diff --git a/lib/netdev-rte-offloads.c b/lib/netdev-rte-offloads.c new file mode 100644 index 0000000..0212156 --- /dev/null +++ b/lib/netdev-rte-offloads.c @@ -0,0 +1,775 @@ +/* + * Copyright (c) 2019 Mellanox Technologies, Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <config.h> +#include <rte_flow.h> +#include "netdev-rte-offloads.h" +#include "netdev-provider.h" +#include "dpif-netdev.h" +#include "cmap.h" +#include "openvswitch/vlog.h" +#include "openvswitch/match.h" +#include "packets.h" +#include "uuid.h" + +VLOG_DEFINE_THIS_MODULE(netdev_rte_offloads); + +/* + * A mapping from ufid to dpdk rte_flow. + */ +static struct cmap ufid_to_rte_flow = CMAP_INITIALIZER; + +struct ufid_to_rte_flow_data { + struct cmap_node node; + ovs_u128 ufid; + struct rte_flow *rte_flow; +}; + + +/* Find rte_flow with @ufid */ +static struct rte_flow * +ufid_to_rte_flow_find(const ovs_u128 *ufid) { + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); + struct ufid_to_rte_flow_data *data; + + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { + if (ovs_u128_equals(*ufid, data->ufid)) { + return data->rte_flow; + } + } + + return NULL; +} + +static inline void +ufid_to_rte_flow_associate(const ovs_u128 *ufid, + struct rte_flow *rte_flow) { + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); + struct ufid_to_rte_flow_data *data = xzalloc(sizeof(*data)); + + /* + * We should not simply overwrite an existing rte flow. + * We should have deleted it first before re-adding it. + * Thus, if following assert triggers, something is wrong: + * the rte_flow is not destroyed. + */ + ovs_assert(ufid_to_rte_flow_find(ufid) == NULL); + + data->ufid = *ufid; + data->rte_flow = rte_flow; + + cmap_insert(&ufid_to_rte_flow, + CONST_CAST(struct cmap_node *, &data->node), hash); +} + +static inline void +ufid_to_rte_flow_disassociate(const ovs_u128 *ufid) { + size_t hash = hash_bytes(ufid, sizeof(*ufid), 0); + struct ufid_to_rte_flow_data *data; + + CMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_to_rte_flow) { + if (ovs_u128_equals(*ufid, data->ufid)) { + cmap_remove(&ufid_to_rte_flow, + CONST_CAST(struct cmap_node *, &data->node), hash); + ovsrcu_postpone(free, data); + return; + } + } + + VLOG_WARN("ufid "UUID_FMT" is not associated with an rte flow\n", + UUID_ARGS((struct uuid *)ufid)); +} + +/* + * To avoid individual xrealloc calls for each new element, a 'curent_max' + * is used to keep track of current allocated number of elements. Starts + * by 8 and doubles on each xrealloc call + */ +struct flow_patterns { + struct rte_flow_item *items; + int cnt; + int current_max; +}; + +struct flow_actions { + struct rte_flow_action *actions; + int cnt; + int current_max; +}; + +static void +dump_flow_pattern(struct rte_flow_item *item) +{ + struct ds s; + + if (!VLOG_IS_DBG_ENABLED() || item->type == RTE_FLOW_ITEM_TYPE_END) { + return; + } + + ds_init(&s); + + if (item->type == RTE_FLOW_ITEM_TYPE_ETH) { + const struct rte_flow_item_eth *eth_spec = item->spec; + const struct rte_flow_item_eth *eth_mask = item->mask; + + ds_put_cstr(&s, "rte flow eth pattern:\n"); + if (eth_spec) { + ds_put_format(&s, + " Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " + "type=0x%04" PRIx16"\n", + ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes), + ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes), + ntohs(eth_spec->type)); + } else { + ds_put_cstr(&s, " Spec = null\n"); + } + if (eth_mask) { + ds_put_format(&s, + " Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", " + "type=0x%04"PRIx16"\n", + ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes), + ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes), + eth_mask->type); + } else { + ds_put_cstr(&s, " Mask = null\n"); + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) { + const struct rte_flow_item_vlan *vlan_spec = item->spec; + const struct rte_flow_item_vlan *vlan_mask = item->mask; + + ds_put_cstr(&s, "rte flow vlan pattern:\n"); + if (vlan_spec) { + ds_put_format(&s, + " Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", + ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci)); + } else { + ds_put_cstr(&s, " Spec = null\n"); + } + + if (vlan_mask) { + ds_put_format(&s, + " Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n", + ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci)); + } else { + ds_put_cstr(&s, " Mask = null\n"); + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) { + const struct rte_flow_item_ipv4 *ipv4_spec = item->spec; + const struct rte_flow_item_ipv4 *ipv4_mask = item->mask; + + ds_put_cstr(&s, "rte flow ipv4 pattern:\n"); + if (ipv4_spec) { + ds_put_format(&s, + " Spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 + ", src="IP_FMT", dst="IP_FMT"\n", + ipv4_spec->hdr.type_of_service, + ipv4_spec->hdr.time_to_live, + ipv4_spec->hdr.next_proto_id, + IP_ARGS(ipv4_spec->hdr.src_addr), + IP_ARGS(ipv4_spec->hdr.dst_addr)); + } else { + ds_put_cstr(&s, " Spec = null\n"); + } + if (ipv4_mask) { + ds_put_format(&s, + " Mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8 + ", src="IP_FMT", dst="IP_FMT"\n", + ipv4_mask->hdr.type_of_service, + ipv4_mask->hdr.time_to_live, + ipv4_mask->hdr.next_proto_id, + IP_ARGS(ipv4_mask->hdr.src_addr), + IP_ARGS(ipv4_mask->hdr.dst_addr)); + } else { + ds_put_cstr(&s, " Mask = null\n"); + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_UDP) { + const struct rte_flow_item_udp *udp_spec = item->spec; + const struct rte_flow_item_udp *udp_mask = item->mask; + + ds_put_cstr(&s, "rte flow udp pattern:\n"); + if (udp_spec) { + ds_put_format(&s, + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", + ntohs(udp_spec->hdr.src_port), + ntohs(udp_spec->hdr.dst_port)); + } else { + ds_put_cstr(&s, " Spec = null\n"); + } + if (udp_mask) { + ds_put_format(&s, + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", + udp_mask->hdr.src_port, + udp_mask->hdr.dst_port); + } else { + ds_put_cstr(&s, " Mask = null\n"); + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_SCTP) { + const struct rte_flow_item_sctp *sctp_spec = item->spec; + const struct rte_flow_item_sctp *sctp_mask = item->mask; + + ds_put_cstr(&s, "rte flow sctp pattern:\n"); + if (sctp_spec) { + ds_put_format(&s, + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n", + ntohs(sctp_spec->hdr.src_port), + ntohs(sctp_spec->hdr.dst_port)); + } else { + ds_put_cstr(&s, " Spec = null\n"); + } + if (sctp_mask) { + ds_put_format(&s, + " Mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n", + sctp_mask->hdr.src_port, + sctp_mask->hdr.dst_port); + } else { + ds_put_cstr(&s, " Mask = null\n"); + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_ICMP) { + const struct rte_flow_item_icmp *icmp_spec = item->spec; + const struct rte_flow_item_icmp *icmp_mask = item->mask; + + ds_put_cstr(&s, "rte flow icmp pattern:\n"); + if (icmp_spec) { + ds_put_format(&s, + " Spec: icmp_type=%"PRIu8", icmp_code=%"PRIu8"\n", + icmp_spec->hdr.icmp_type, + icmp_spec->hdr.icmp_code); + } else { + ds_put_cstr(&s, " Spec = null\n"); + } + if (icmp_mask) { + ds_put_format(&s, + " Mask: icmp_type=0x%"PRIx8", icmp_code=0x%"PRIx8"\n", + icmp_spec->hdr.icmp_type, + icmp_spec->hdr.icmp_code); + } else { + ds_put_cstr(&s, " Mask = null\n"); + } + } + + if (item->type == RTE_FLOW_ITEM_TYPE_TCP) { + const struct rte_flow_item_tcp *tcp_spec = item->spec; + const struct rte_flow_item_tcp *tcp_mask = item->mask; + + ds_put_cstr(&s, "rte flow tcp pattern:\n"); + if (tcp_spec) { + ds_put_format(&s, + " Spec: src_port=%"PRIu16", dst_port=%"PRIu16 + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", + ntohs(tcp_spec->hdr.src_port), + ntohs(tcp_spec->hdr.dst_port), + tcp_spec->hdr.data_off, + tcp_spec->hdr.tcp_flags); + } else { + ds_put_cstr(&s, " Spec = null\n"); + } + if (tcp_mask) { + ds_put_format(&s, + " Mask: src_port=%"PRIx16", dst_port=%"PRIx16 + ", data_off=0x%"PRIx8", tcp_flags=0x%"PRIx8"\n", + tcp_mask->hdr.src_port, + tcp_mask->hdr.dst_port, + tcp_mask->hdr.data_off, + tcp_mask->hdr.tcp_flags); + } else { + ds_put_cstr(&s, " Mask = null\n"); + } + } + + VLOG_DBG("%s", ds_cstr(&s)); + ds_destroy(&s); +} + +static void +add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type, + const void *spec, const void *mask) { + int cnt = patterns->cnt; + + if (cnt == 0) { + patterns->current_max = 8; + patterns->items = xcalloc(patterns->current_max, + sizeof(struct rte_flow_item)); + } else if (cnt == patterns->current_max) { + patterns->current_max *= 2; + patterns->items = xrealloc(patterns->items, patterns->current_max * + sizeof(struct rte_flow_item)); + } + + patterns->items[cnt].type = type; + patterns->items[cnt].spec = spec; + patterns->items[cnt].mask = mask; + patterns->items[cnt].last = NULL; + dump_flow_pattern(&patterns->items[cnt]); + patterns->cnt++; +} + +static void +add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type, + const void *conf) +{ + int cnt = actions->cnt; + + if (cnt == 0) { + actions->current_max = 8; + actions->actions = xcalloc(actions->current_max, + sizeof(struct rte_flow_action)); + } else if (cnt == actions->current_max) { + actions->current_max *= 2; + actions->actions = xrealloc(actions->actions, actions->current_max * + sizeof(struct rte_flow_action)); + } + + actions->actions[cnt].type = type; + actions->actions[cnt].conf = conf; + actions->cnt++; +} + +struct action_rss_data { + struct rte_flow_action_rss conf; + uint16_t queue[0]; +}; + +static struct action_rss_data * +add_flow_rss_action(struct flow_actions *actions, + struct netdev *netdev) { + int i; + struct action_rss_data *rss_data; + + rss_data = xmalloc(sizeof(struct action_rss_data) + + sizeof(uint16_t) * netdev->n_rxq); + *rss_data = (struct action_rss_data) { + .conf = (struct rte_flow_action_rss) { + .func = RTE_ETH_HASH_FUNCTION_DEFAULT, + .level = 0, + .types = 0, + .queue_num = netdev->n_rxq, + .queue = rss_data->queue, + .key_len = 0, + .key = NULL + }, + }; + + /* Override queue array with default */ + for (i = 0; i < netdev->n_rxq; i++) { + rss_data->queue[i] = i; + } + + add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, &rss_data->conf); + + return rss_data; +} + +static int +netdev_dpdk_add_rte_flow_offload(struct netdev *netdev, + const struct match *match, + struct nlattr *nl_actions OVS_UNUSED, + size_t actions_len OVS_UNUSED, + const ovs_u128 *ufid, + struct offload_info *info) { + const struct rte_flow_attr flow_attr = { + .group = 0, + .priority = 0, + .ingress = 1, + .egress = 0 + }; + struct flow_patterns patterns = { .items = NULL, .cnt = 0 }; + struct flow_actions actions = { .actions = NULL, .cnt = 0 }; + struct rte_flow *flow; + struct rte_flow_error error; + uint8_t *ipv4_next_proto_mask = NULL; + int ret = 0; + + /* Eth */ + struct rte_flow_item_eth eth_spec; + struct rte_flow_item_eth eth_mask; + if (!eth_addr_is_zero(match->wc.masks.dl_src) || + !eth_addr_is_zero(match->wc.masks.dl_dst)) { + memset(ð_spec, 0, sizeof eth_spec); + memset(ð_mask, 0, sizeof eth_mask); + rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof eth_spec.dst); + rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof eth_spec.src); + eth_spec.type = match->flow.dl_type; + + rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst, + sizeof eth_mask.dst); + rte_memcpy(ð_mask.src, &match->wc.masks.dl_src, + sizeof eth_mask.src); + eth_mask.type = match->wc.masks.dl_type; + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, + ð_spec, ð_mask); + } else { + /* + * If user specifies a flow (like UDP flow) without L2 patterns, + * OVS will at least set the dl_type. Normally, it's enough to + * create an eth pattern just with it. Unluckily, some Intel's + * NIC (such as XL710) doesn't support that. Below is a workaround, + * which simply matches any L2 pkts. + */ + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL); + } + + /* VLAN */ + struct rte_flow_item_vlan vlan_spec; + struct rte_flow_item_vlan vlan_mask; + if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) { + memset(&vlan_spec, 0, sizeof vlan_spec); + memset(&vlan_mask, 0, sizeof vlan_mask); + vlan_spec.tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI); + vlan_mask.tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI); + + /* match any protocols */ + vlan_mask.inner_type = 0; + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_VLAN, + &vlan_spec, &vlan_mask); + } + + /* IP v4 */ + uint8_t proto = 0; + struct rte_flow_item_ipv4 ipv4_spec; + struct rte_flow_item_ipv4 ipv4_mask; + if (match->flow.dl_type == htons(ETH_TYPE_IP)) { + memset(&ipv4_spec, 0, sizeof ipv4_spec); + memset(&ipv4_mask, 0, sizeof ipv4_mask); + + ipv4_spec.hdr.type_of_service = match->flow.nw_tos; + ipv4_spec.hdr.time_to_live = match->flow.nw_ttl; + ipv4_spec.hdr.next_proto_id = match->flow.nw_proto; + ipv4_spec.hdr.src_addr = match->flow.nw_src; + ipv4_spec.hdr.dst_addr = match->flow.nw_dst; + + ipv4_mask.hdr.type_of_service = match->wc.masks.nw_tos; + ipv4_mask.hdr.time_to_live = match->wc.masks.nw_ttl; + ipv4_mask.hdr.next_proto_id = match->wc.masks.nw_proto; + ipv4_mask.hdr.src_addr = match->wc.masks.nw_src; + ipv4_mask.hdr.dst_addr = match->wc.masks.nw_dst; + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_IPV4, + &ipv4_spec, &ipv4_mask); + + /* Save proto for L4 protocol setup */ + proto = ipv4_spec.hdr.next_proto_id & + ipv4_mask.hdr.next_proto_id; + + /* Remember proto mask address for later modification */ + ipv4_next_proto_mask = &ipv4_mask.hdr.next_proto_id; + } + + if (proto != IPPROTO_ICMP && proto != IPPROTO_UDP && + proto != IPPROTO_SCTP && proto != IPPROTO_TCP && + (match->wc.masks.tp_src || + match->wc.masks.tp_dst || + match->wc.masks.tcp_flags)) { + VLOG_DBG("L4 Protocol (%u) not supported", proto); + ret = -1; + goto out; + } + + if ((match->wc.masks.tp_src && match->wc.masks.tp_src != OVS_BE16_MAX) || + (match->wc.masks.tp_dst && match->wc.masks.tp_dst != OVS_BE16_MAX)) { + ret = -1; + goto out; + } + + struct rte_flow_item_tcp tcp_spec; + struct rte_flow_item_tcp tcp_mask; + if (proto == IPPROTO_TCP) { + memset(&tcp_spec, 0, sizeof tcp_spec); + memset(&tcp_mask, 0, sizeof tcp_mask); + tcp_spec.hdr.src_port = match->flow.tp_src; + tcp_spec.hdr.dst_port = match->flow.tp_dst; + tcp_spec.hdr.data_off = ntohs(match->flow.tcp_flags) >> 8; + tcp_spec.hdr.tcp_flags = ntohs(match->flow.tcp_flags) & 0xff; + + tcp_mask.hdr.src_port = match->wc.masks.tp_src; + tcp_mask.hdr.dst_port = match->wc.masks.tp_dst; + tcp_mask.hdr.data_off = ntohs(match->wc.masks.tcp_flags) >> 8; + tcp_mask.hdr.tcp_flags = ntohs(match->wc.masks.tcp_flags) & 0xff; + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_TCP, + &tcp_spec, &tcp_mask); + + /* proto == TCP and ITEM_TYPE_TCP, thus no need for proto match */ + if (ipv4_next_proto_mask) { + *ipv4_next_proto_mask = 0; + } + goto end_proto_check; + } + + struct rte_flow_item_udp udp_spec; + struct rte_flow_item_udp udp_mask; + if (proto == IPPROTO_UDP) { + memset(&udp_spec, 0, sizeof udp_spec); + memset(&udp_mask, 0, sizeof udp_mask); + udp_spec.hdr.src_port = match->flow.tp_src; + udp_spec.hdr.dst_port = match->flow.tp_dst; + + udp_mask.hdr.src_port = match->wc.masks.tp_src; + udp_mask.hdr.dst_port = match->wc.masks.tp_dst; + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_UDP, + &udp_spec, &udp_mask); + + /* proto == UDP and ITEM_TYPE_UDP, thus no need for proto match */ + if (ipv4_next_proto_mask) { + *ipv4_next_proto_mask = 0; + } + goto end_proto_check; + } + + struct rte_flow_item_sctp sctp_spec; + struct rte_flow_item_sctp sctp_mask; + if (proto == IPPROTO_SCTP) { + memset(&sctp_spec, 0, sizeof sctp_spec); + memset(&sctp_mask, 0, sizeof sctp_mask); + sctp_spec.hdr.src_port = match->flow.tp_src; + sctp_spec.hdr.dst_port = match->flow.tp_dst; + + sctp_mask.hdr.src_port = match->wc.masks.tp_src; + sctp_mask.hdr.dst_port = match->wc.masks.tp_dst; + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_SCTP, + &sctp_spec, &sctp_mask); + + /* proto == SCTP and ITEM_TYPE_SCTP, thus no need for proto match */ + if (ipv4_next_proto_mask) { + *ipv4_next_proto_mask = 0; + } + goto end_proto_check; + } + + struct rte_flow_item_icmp icmp_spec; + struct rte_flow_item_icmp icmp_mask; + if (proto == IPPROTO_ICMP) { + memset(&icmp_spec, 0, sizeof icmp_spec); + memset(&icmp_mask, 0, sizeof icmp_mask); + icmp_spec.hdr.icmp_type = (uint8_t)ntohs(match->flow.tp_src); + icmp_spec.hdr.icmp_code = (uint8_t)ntohs(match->flow.tp_dst); + + icmp_mask.hdr.icmp_type = (uint8_t)ntohs(match->wc.masks.tp_src); + icmp_mask.hdr.icmp_code = (uint8_t)ntohs(match->wc.masks.tp_dst); + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ICMP, + &icmp_spec, &icmp_mask); + + /* proto == ICMP and ITEM_TYPE_ICMP, thus no need for proto match */ + if (ipv4_next_proto_mask) { + *ipv4_next_proto_mask = 0; + } + goto end_proto_check; + } + +end_proto_check: + + add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_END, NULL, NULL); + + struct rte_flow_action_mark mark; + struct action_rss_data *rss; + + mark.id = info->flow_mark; + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark); + + + rss = add_flow_rss_action(&actions, netdev); + add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL); + + flow = netdev_dpdk_rte_flow_create(netdev, &flow_attr,patterns.items, + actions.actions, &error); + + free(rss); + if (!flow) { + VLOG_ERR("%s: rte flow creat error: %u : message : %s\n", + netdev_get_name(netdev), error.type, error.message); + ret = -1; + goto out; + } + ufid_to_rte_flow_associate(ufid, flow); + VLOG_DBG("%s: installed flow %p by ufid "UUID_FMT"\n", + netdev_get_name(netdev), flow, UUID_ARGS((struct uuid *)ufid)); + +out: + free(patterns.items); + free(actions.actions); + return ret; +} + +static bool +is_all_zero(const void *addr, size_t n) { + size_t i = 0; + const uint8_t *p = (uint8_t *)addr; + + for (i = 0; i < n; i++) { + if (p[i] != 0) { + return false; + } + } + + return true; +} + +/* + * Check if any unsupported flow patterns are specified. + */ +static int +netdev_dpdk_validate_flow(const struct match *match) { + struct match match_zero_wc; + + /* Create a wc-zeroed version of flow */ + match_init(&match_zero_wc, &match->flow, &match->wc); + + if (!is_all_zero(&match_zero_wc.flow.tunnel, + sizeof(match_zero_wc.flow.tunnel))) { + goto err; + } + + if (match->wc.masks.metadata || + match->wc.masks.skb_priority || + match->wc.masks.pkt_mark || + match->wc.masks.dp_hash) { + goto err; + } + + /* recirc id must be zero */ + if (match_zero_wc.flow.recirc_id) { + goto err; + } + + if (match->wc.masks.ct_state || + match->wc.masks.ct_nw_proto || + match->wc.masks.ct_zone || + match->wc.masks.ct_mark || + match->wc.masks.ct_label.u64.hi || + match->wc.masks.ct_label.u64.lo) { + goto err; + } + + if (match->wc.masks.conj_id || + match->wc.masks.actset_output) { + goto err; + } + + /* unsupported L2 */ + if (!is_all_zero(&match->wc.masks.mpls_lse, + sizeof(match_zero_wc.flow.mpls_lse))) { + goto err; + } + + /* unsupported L3 */ + if (match->wc.masks.ipv6_label || + match->wc.masks.ct_nw_src || + match->wc.masks.ct_nw_dst || + !is_all_zero(&match->wc.masks.ipv6_src, sizeof(struct in6_addr)) || + !is_all_zero(&match->wc.masks.ipv6_dst, sizeof(struct in6_addr)) || + !is_all_zero(&match->wc.masks.ct_ipv6_src, sizeof(struct in6_addr)) || + !is_all_zero(&match->wc.masks.ct_ipv6_dst, sizeof(struct in6_addr)) || + !is_all_zero(&match->wc.masks.nd_target, sizeof(struct in6_addr)) || + !is_all_zero(&match->wc.masks.nsh, sizeof(struct ovs_key_nsh)) || + !is_all_zero(&match->wc.masks.arp_sha, sizeof(struct eth_addr)) || + !is_all_zero(&match->wc.masks.arp_tha, sizeof(struct eth_addr))) { + goto err; + } + + /* If fragmented, then don't HW accelerate - for now */ + if (match_zero_wc.flow.nw_frag) { + goto err; + } + + /* unsupported L4 */ + if (match->wc.masks.igmp_group_ip4 || + match->wc.masks.ct_tp_src || + match->wc.masks.ct_tp_dst) { + goto err; + } + + return 0; + +err: + VLOG_ERR("cannot HW accelerate this flow due to unsupported protocols"); + return -1; +} + +static int +netdev_dpdk_destroy_rte_flow(struct netdev *netdev, + const ovs_u128 *ufid, + struct rte_flow *rte_flow) { + struct rte_flow_error error; + int ret = netdev_dpdk_rte_flow_destroy(netdev, rte_flow, &error); + + if (ret == 0) { + ufid_to_rte_flow_disassociate(ufid); + VLOG_DBG("%s: removed rte flow %p associated with ufid " UUID_FMT "\n", + netdev_get_name(netdev), rte_flow, + UUID_ARGS((struct uuid *)ufid)); + } else { + VLOG_ERR("%s: rte flow destroy error: %u : message : %s\n", + netdev_get_name(netdev), error.type, error.message); + } + + return ret; +} + +int +netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, + struct nlattr *actions, size_t actions_len, + const ovs_u128 *ufid, struct offload_info *info, + struct dpif_flow_stats *stats OVS_UNUSED) { + struct rte_flow *rte_flow; + int ret; + + /* + * If an old rte_flow exists, it means it's a flow modification. + * Here destroy the old rte flow first before adding a new one. + */ + rte_flow = ufid_to_rte_flow_find(ufid); + if (rte_flow) { + ret = netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); + if (ret < 0) { + return ret; + } + } + + ret = netdev_dpdk_validate_flow(match); + if (ret < 0) { + return ret; + } + + return netdev_dpdk_add_rte_flow_offload(netdev, match, actions, + actions_len, ufid, info); +} + +int +netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, + struct dpif_flow_stats *stats OVS_UNUSED) { + + struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid); + + if (!rte_flow) { + return -1; + } + + return netdev_dpdk_destroy_rte_flow(netdev, ufid, rte_flow); +} + diff --git a/lib/netdev-rte-offloads.h b/lib/netdev-rte-offloads.h new file mode 100644 index 0000000..573e214 --- /dev/null +++ b/lib/netdev-rte-offloads.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2019 Mellanox Technologies, Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NETDEV_VPORT_OFFLOADS_H +#define NETDEV_VPORT_OFFLOADS_H 1 + +#include "openvswitch/types.h" +struct netdev; +struct match; +struct nlattr; +struct offload_info; +struct dpif_flow_stats; + +int netdev_dpdk_flow_put(struct netdev *netdev, struct match *match, + struct nlattr *actions, size_t actions_len, + const ovs_u128 *ufid, struct offload_info *info, + struct dpif_flow_stats *stats OVS_UNUSED); +int netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid, + struct dpif_flow_stats *stats OVS_UNUSED); + +#define DPDK_FLOW_OFFLOAD_API \ + .flow_put = netdev_dpdk_flow_put, \ + .flow_del = netdev_dpdk_flow_del + +#endif /* netdev-rte-offloads.h */