Message ID | 1551292128-4632-3-git-send-email-john.hurley@netronome.com |
---|---|
State | Superseded |
Delegated to: | Simon Horman |
Headers | show |
Series | ovs-tc: support OvS internal port offload | expand |
On 27/02/2019 20:28, John Hurley wrote: > Rules applied to OvS internal ports are not represented in TC datapaths. > However, it is possible to support rules matching on internal ports in TC. > The start_xmit ndo of OvS internal ports directs packets back into the OvS > kernel datapath where they are rematched with the ingress port now being > that of the internal port. Due to this, rules matching on an internal port > can be added as TC rules to an egress qdisc for these ports. > > Allow rules applied to internal ports to be offloaded to TC as egress > filters. However, prevent the offload of rules that are redirecting to an > internal port. Packets matching these should pass through the network > stack for processing and so there is, currently, no benefit in adding them > to TC. > > Signed-off-by: John Hurley <john.hurley@netronome.com> > Reviewed-by: Simon Horman <simon.horman@netronome.com> > --- > lib/dpif.c | 31 ++++++------------------------- > lib/netdev-linux.c | 1 + > lib/netdev-tc-offloads.c | 40 ++++++++++++++++++++++++++++++---------- > 3 files changed, 37 insertions(+), 35 deletions(-) > > diff --git a/lib/dpif.c b/lib/dpif.c > index 457c9bf..ce413d1 100644 > --- a/lib/dpif.c > +++ b/lib/dpif.c > @@ -100,15 +100,6 @@ static bool should_log_flow_message(const struct vlog_module *module, > /* Incremented whenever tnl route, arp, etc changes. */ > struct seq *tnl_conf_seq; > > -static bool > -dpif_is_internal_port(const char *type) > -{ > - /* For userspace datapath, tap devices are the equivalent > - * of internal devices in the kernel datapath, so both > - * these types are 'internal' devices. */ > - return !strcmp(type, "internal") || !strcmp(type, "tap"); > -} > - in the commit after you only handle internal ports. according to commit "5119e258da92 dpif: Fix cleanup of userspace datapath." some tests failed without it. so need to verify this or leave the skip for tap devices maybe. beside that looks ok to me. > static void > dp_initialize(void) > { > @@ -359,10 +350,6 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) > struct netdev *netdev; > int err; > > - if (dpif_is_internal_port(dpif_port.type)) { > - continue; > - } > - > err = netdev_open(dpif_port.name, dpif_port.type, &netdev); > > if (!err) { > @@ -434,9 +421,7 @@ dpif_remove_netdev_ports(struct dpif *dpif) { > struct dpif_port dpif_port; > > DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { > - if (!dpif_is_internal_port(dpif_port.type)) { > - netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); > - } > + netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); > } > } > > @@ -581,16 +566,12 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) > if (!error) { > VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu32, > dpif_name(dpif), netdev_name, port_no); > + struct dpif_port dpif_port; > > - if (!dpif_is_internal_port(netdev_get_type(netdev))) { > - > - struct dpif_port dpif_port; > - > - dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); > - dpif_port.name = CONST_CAST(char *, netdev_name); > - dpif_port.port_no = port_no; > - netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); > - } > + dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); > + dpif_port.name = CONST_CAST(char *, netdev_name); > + dpif_port.port_no = port_no; > + netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); > } else { > VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", > dpif_name(dpif), netdev_name, ovs_strerror(error)); > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 8d37eb6..18445bd 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -3223,6 +3223,7 @@ const struct netdev_class netdev_tap_class = { > > const struct netdev_class netdev_internal_class = { > NETDEV_LINUX_CLASS_COMMON, > + LINUX_FLOW_OFFLOAD_API, > .type = "internal", > .construct = netdev_linux_construct, > .get_stats = netdev_internal_get_stats, > diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c > index 31ad9f4..3b2ca56 100644 > --- a/lib/netdev-tc-offloads.c > +++ b/lib/netdev-tc-offloads.c > @@ -52,6 +52,12 @@ struct netlink_field { > int size; > }; > > +static bool > +is_internal_port(const char *type) > +{ > + return !strcmp(type, "internal"); > +} > + > static struct netlink_field set_flower_map[][4] = { > [OVS_KEY_ATTR_IPV4] = { > { offsetof(struct ovs_key_ipv4, ipv4_src), > @@ -178,11 +184,12 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) > /* Wrapper function to delete filter and ufid tc mapping */ > static int > del_filter_and_ufid_mapping(int ifindex, int prio, int handle, > - uint32_t block_id, const ovs_u128 *ufid) > + uint32_t block_id, const ovs_u128 *ufid, > + bool egress) > { > int err; > > - err = tc_del_filter(ifindex, prio, handle, block_id, false); > + err = tc_del_filter(ifindex, prio, handle, block_id, egress); > del_ufid_tc_mapping(ufid); > > return err; > @@ -339,6 +346,7 @@ get_block_id_from_netdev(struct netdev *netdev) > int > netdev_tc_flow_flush(struct netdev *netdev) > { > + bool egress = is_internal_port(netdev_get_type(netdev)); > int ifindex = netdev_get_ifindex(netdev); > uint32_t block_id = 0; > > @@ -350,13 +358,14 @@ netdev_tc_flow_flush(struct netdev *netdev) > > block_id = get_block_id_from_netdev(netdev); > > - return tc_flush(ifindex, block_id, false); > + return tc_flush(ifindex, block_id, egress); > } > > int > netdev_tc_flow_dump_create(struct netdev *netdev, > struct netdev_flow_dump **dump_out) > { > + bool egress = is_internal_port(netdev_get_type(netdev)); > struct netdev_flow_dump *dump; > uint32_t block_id = 0; > int ifindex; > @@ -372,7 +381,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev, > dump = xzalloc(sizeof *dump); > dump->nl_dump = xzalloc(sizeof *dump->nl_dump); > dump->netdev = netdev_ref(netdev); > - tc_dump_flower_start(ifindex, dump->nl_dump, block_id, false); > + tc_dump_flower_start(ifindex, dump->nl_dump, block_id, egress); > > *dump_out = dump; > > @@ -1072,6 +1081,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > struct dpif_flow_stats *stats OVS_UNUSED) > { > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > + bool egress = is_internal_port(netdev_get_type(netdev)); > struct tc_flower flower; > const struct flow *key = &match->flow; > struct flow *mask = &match->wc.masks; > @@ -1286,6 +1296,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > odp_port_t port = nl_attr_get_odp_port(nla); > struct netdev *outdev = netdev_ports_get(port, info->dpif_class); > > + if (is_internal_port(netdev_get_type(outdev))) { > + return EOPNOTSUPP; > + } > action->ifindex_out = netdev_get_ifindex(outdev); > action->type = TC_ACT_OUTPUT; > flower.action_count++; > @@ -1333,7 +1346,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > handle = get_ufid_tc_mapping(ufid, &prio, NULL); > if (handle && prio) { > VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", handle, prio); > - del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); > + del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, > + egress); > } > > if (!prio) { > @@ -1347,7 +1361,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > flower.act_cookie.data = ufid; > flower.act_cookie.len = sizeof *ufid; > > - err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, false); > + err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, egress); > if (!err) { > add_ufid_tc_mapping(ufid, flower.prio, flower.handle, netdev, ifindex); > } > @@ -1371,6 +1385,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, > odp_port_t in_port; > int prio = 0; > int ifindex; > + bool egress; > int handle; > int err; > > @@ -1379,6 +1394,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, > return ENOENT; > } > > + egress = is_internal_port(netdev_get_type(netdev)); > ifindex = netdev_get_ifindex(dev); > if (ifindex < 0) { > VLOG_ERR_RL(&error_rl, "flow_get: failed to get ifindex for %s: %s", > @@ -1390,7 +1406,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, > VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d)", > netdev_get_name(dev), prio, handle); > block_id = get_block_id_from_netdev(netdev); > - err = tc_get_flower(ifindex, prio, handle, &flower, block_id, false); > + err = tc_get_flower(ifindex, prio, handle, &flower, block_id, egress); > netdev_close(dev); > if (err) { > VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s", > @@ -1417,6 +1433,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, > struct netdev *dev; > int prio = 0; > int ifindex; > + bool egress; > int handle; > int error; > > @@ -1425,6 +1442,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, > return ENOENT; > } > > + egress = is_internal_port(netdev_get_type(netdev)); > ifindex = netdev_get_ifindex(dev); > if (ifindex < 0) { > VLOG_ERR_RL(&error_rl, "flow_del: failed to get ifindex for %s: %s", > @@ -1437,14 +1455,15 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, > > if (stats) { > memset(stats, 0, sizeof *stats); > - if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, false)) { > + if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, egress)) { > stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); > stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); > stats->used = flower.lastused; > } > } > > - error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); > + error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, > + egress); > > netdev_close(dev); > > @@ -1516,6 +1535,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) > { > static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER; > static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER; > + bool egress = is_internal_port(netdev_get_type(netdev)); > uint32_t block_id = 0; > int ifindex; > int error; > @@ -1538,7 +1558,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) > } > > block_id = get_block_id_from_netdev(netdev); > - error = tc_add_del_ingress_qdisc(ifindex, true, block_id, false); > + error = tc_add_del_ingress_qdisc(ifindex, true, block_id, egress); > > if (error && error != EEXIST) { > VLOG_ERR("failed adding ingress qdisc required for offloading: %s", >
On 05/03/2019 15:46, Roi Dayan wrote: > > > On 27/02/2019 20:28, John Hurley wrote: >> Rules applied to OvS internal ports are not represented in TC datapaths. >> However, it is possible to support rules matching on internal ports in TC. >> The start_xmit ndo of OvS internal ports directs packets back into the OvS >> kernel datapath where they are rematched with the ingress port now being >> that of the internal port. Due to this, rules matching on an internal port >> can be added as TC rules to an egress qdisc for these ports. >> >> Allow rules applied to internal ports to be offloaded to TC as egress >> filters. However, prevent the offload of rules that are redirecting to an >> internal port. Packets matching these should pass through the network >> stack for processing and so there is, currently, no benefit in adding them >> to TC. >> >> Signed-off-by: John Hurley <john.hurley@netronome.com> >> Reviewed-by: Simon Horman <simon.horman@netronome.com> >> --- >> lib/dpif.c | 31 ++++++------------------------- >> lib/netdev-linux.c | 1 + >> lib/netdev-tc-offloads.c | 40 ++++++++++++++++++++++++++++++---------- >> 3 files changed, 37 insertions(+), 35 deletions(-) >> >> diff --git a/lib/dpif.c b/lib/dpif.c >> index 457c9bf..ce413d1 100644 >> --- a/lib/dpif.c >> +++ b/lib/dpif.c >> @@ -100,15 +100,6 @@ static bool should_log_flow_message(const struct vlog_module *module, >> /* Incremented whenever tnl route, arp, etc changes. */ >> struct seq *tnl_conf_seq; >> >> -static bool >> -dpif_is_internal_port(const char *type) >> -{ >> - /* For userspace datapath, tap devices are the equivalent >> - * of internal devices in the kernel datapath, so both >> - * these types are 'internal' devices. */ >> - return !strcmp(type, "internal") || !strcmp(type, "tap"); >> -} >> - > > in the commit after you only handle internal ports. sorry. i was referring to the change in this commit. I got mixed up and thought i was writing the reply on the first commit. > according to commit "5119e258da92 dpif: Fix cleanup of userspace datapath." > some tests failed without it. > so need to verify this or leave the skip for tap devices maybe. > beside that looks ok to me. > >> static void >> dp_initialize(void) >> { >> @@ -359,10 +350,6 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) >> struct netdev *netdev; >> int err; >> >> - if (dpif_is_internal_port(dpif_port.type)) { >> - continue; >> - } >> - >> err = netdev_open(dpif_port.name, dpif_port.type, &netdev); >> >> if (!err) { >> @@ -434,9 +421,7 @@ dpif_remove_netdev_ports(struct dpif *dpif) { >> struct dpif_port dpif_port; >> >> DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { >> - if (!dpif_is_internal_port(dpif_port.type)) { >> - netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); >> - } >> + netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); >> } >> } >> >> @@ -581,16 +566,12 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) >> if (!error) { >> VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu32, >> dpif_name(dpif), netdev_name, port_no); >> + struct dpif_port dpif_port; >> >> - if (!dpif_is_internal_port(netdev_get_type(netdev))) { >> - >> - struct dpif_port dpif_port; >> - >> - dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); >> - dpif_port.name = CONST_CAST(char *, netdev_name); >> - dpif_port.port_no = port_no; >> - netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); >> - } >> + dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); >> + dpif_port.name = CONST_CAST(char *, netdev_name); >> + dpif_port.port_no = port_no; >> + netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); >> } else { >> VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", >> dpif_name(dpif), netdev_name, ovs_strerror(error)); >> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c >> index 8d37eb6..18445bd 100644 >> --- a/lib/netdev-linux.c >> +++ b/lib/netdev-linux.c >> @@ -3223,6 +3223,7 @@ const struct netdev_class netdev_tap_class = { >> >> const struct netdev_class netdev_internal_class = { >> NETDEV_LINUX_CLASS_COMMON, >> + LINUX_FLOW_OFFLOAD_API, >> .type = "internal", >> .construct = netdev_linux_construct, >> .get_stats = netdev_internal_get_stats, >> diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c >> index 31ad9f4..3b2ca56 100644 >> --- a/lib/netdev-tc-offloads.c >> +++ b/lib/netdev-tc-offloads.c >> @@ -52,6 +52,12 @@ struct netlink_field { >> int size; >> }; >> >> +static bool >> +is_internal_port(const char *type) >> +{ >> + return !strcmp(type, "internal"); >> +} >> + >> static struct netlink_field set_flower_map[][4] = { >> [OVS_KEY_ATTR_IPV4] = { >> { offsetof(struct ovs_key_ipv4, ipv4_src), >> @@ -178,11 +184,12 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) >> /* Wrapper function to delete filter and ufid tc mapping */ >> static int >> del_filter_and_ufid_mapping(int ifindex, int prio, int handle, >> - uint32_t block_id, const ovs_u128 *ufid) >> + uint32_t block_id, const ovs_u128 *ufid, >> + bool egress) >> { >> int err; >> >> - err = tc_del_filter(ifindex, prio, handle, block_id, false); >> + err = tc_del_filter(ifindex, prio, handle, block_id, egress); >> del_ufid_tc_mapping(ufid); >> >> return err; >> @@ -339,6 +346,7 @@ get_block_id_from_netdev(struct netdev *netdev) >> int >> netdev_tc_flow_flush(struct netdev *netdev) >> { >> + bool egress = is_internal_port(netdev_get_type(netdev)); >> int ifindex = netdev_get_ifindex(netdev); >> uint32_t block_id = 0; >> >> @@ -350,13 +358,14 @@ netdev_tc_flow_flush(struct netdev *netdev) >> >> block_id = get_block_id_from_netdev(netdev); >> >> - return tc_flush(ifindex, block_id, false); >> + return tc_flush(ifindex, block_id, egress); >> } >> >> int >> netdev_tc_flow_dump_create(struct netdev *netdev, >> struct netdev_flow_dump **dump_out) >> { >> + bool egress = is_internal_port(netdev_get_type(netdev)); >> struct netdev_flow_dump *dump; >> uint32_t block_id = 0; >> int ifindex; >> @@ -372,7 +381,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev, >> dump = xzalloc(sizeof *dump); >> dump->nl_dump = xzalloc(sizeof *dump->nl_dump); >> dump->netdev = netdev_ref(netdev); >> - tc_dump_flower_start(ifindex, dump->nl_dump, block_id, false); >> + tc_dump_flower_start(ifindex, dump->nl_dump, block_id, egress); >> >> *dump_out = dump; >> >> @@ -1072,6 +1081,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, >> struct dpif_flow_stats *stats OVS_UNUSED) >> { >> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); >> + bool egress = is_internal_port(netdev_get_type(netdev)); >> struct tc_flower flower; >> const struct flow *key = &match->flow; >> struct flow *mask = &match->wc.masks; >> @@ -1286,6 +1296,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, >> odp_port_t port = nl_attr_get_odp_port(nla); >> struct netdev *outdev = netdev_ports_get(port, info->dpif_class); >> >> + if (is_internal_port(netdev_get_type(outdev))) { >> + return EOPNOTSUPP; >> + } >> action->ifindex_out = netdev_get_ifindex(outdev); >> action->type = TC_ACT_OUTPUT; >> flower.action_count++; >> @@ -1333,7 +1346,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, >> handle = get_ufid_tc_mapping(ufid, &prio, NULL); >> if (handle && prio) { >> VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", handle, prio); >> - del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); >> + del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, >> + egress); >> } >> >> if (!prio) { >> @@ -1347,7 +1361,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, >> flower.act_cookie.data = ufid; >> flower.act_cookie.len = sizeof *ufid; >> >> - err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, false); >> + err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, egress); >> if (!err) { >> add_ufid_tc_mapping(ufid, flower.prio, flower.handle, netdev, ifindex); >> } >> @@ -1371,6 +1385,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, >> odp_port_t in_port; >> int prio = 0; >> int ifindex; >> + bool egress; >> int handle; >> int err; >> >> @@ -1379,6 +1394,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, >> return ENOENT; >> } >> >> + egress = is_internal_port(netdev_get_type(netdev)); >> ifindex = netdev_get_ifindex(dev); >> if (ifindex < 0) { >> VLOG_ERR_RL(&error_rl, "flow_get: failed to get ifindex for %s: %s", >> @@ -1390,7 +1406,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, >> VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d)", >> netdev_get_name(dev), prio, handle); >> block_id = get_block_id_from_netdev(netdev); >> - err = tc_get_flower(ifindex, prio, handle, &flower, block_id, false); >> + err = tc_get_flower(ifindex, prio, handle, &flower, block_id, egress); >> netdev_close(dev); >> if (err) { >> VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s", >> @@ -1417,6 +1433,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, >> struct netdev *dev; >> int prio = 0; >> int ifindex; >> + bool egress; >> int handle; >> int error; >> >> @@ -1425,6 +1442,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, >> return ENOENT; >> } >> >> + egress = is_internal_port(netdev_get_type(netdev)); >> ifindex = netdev_get_ifindex(dev); >> if (ifindex < 0) { >> VLOG_ERR_RL(&error_rl, "flow_del: failed to get ifindex for %s: %s", >> @@ -1437,14 +1455,15 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, >> >> if (stats) { >> memset(stats, 0, sizeof *stats); >> - if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, false)) { >> + if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, egress)) { >> stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); >> stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); >> stats->used = flower.lastused; >> } >> } >> >> - error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); >> + error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, >> + egress); >> >> netdev_close(dev); >> >> @@ -1516,6 +1535,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) >> { >> static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER; >> static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER; >> + bool egress = is_internal_port(netdev_get_type(netdev)); >> uint32_t block_id = 0; >> int ifindex; >> int error; >> @@ -1538,7 +1558,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) >> } >> >> block_id = get_block_id_from_netdev(netdev); >> - error = tc_add_del_ingress_qdisc(ifindex, true, block_id, false); >> + error = tc_add_del_ingress_qdisc(ifindex, true, block_id, egress); >> >> if (error && error != EEXIST) { >> VLOG_ERR("failed adding ingress qdisc required for offloading: %s", >>
On Tue, Mar 5, 2019 at 1:46 PM Roi Dayan <roid@mellanox.com> wrote: > > > > On 27/02/2019 20:28, John Hurley wrote: > > Rules applied to OvS internal ports are not represented in TC datapaths. > > However, it is possible to support rules matching on internal ports in TC. > > The start_xmit ndo of OvS internal ports directs packets back into the OvS > > kernel datapath where they are rematched with the ingress port now being > > that of the internal port. Due to this, rules matching on an internal port > > can be added as TC rules to an egress qdisc for these ports. > > > > Allow rules applied to internal ports to be offloaded to TC as egress > > filters. However, prevent the offload of rules that are redirecting to an > > internal port. Packets matching these should pass through the network > > stack for processing and so there is, currently, no benefit in adding them > > to TC. > > > > Signed-off-by: John Hurley <john.hurley@netronome.com> > > Reviewed-by: Simon Horman <simon.horman@netronome.com> > > --- > > lib/dpif.c | 31 ++++++------------------------- > > lib/netdev-linux.c | 1 + > > lib/netdev-tc-offloads.c | 40 ++++++++++++++++++++++++++++++---------- > > 3 files changed, 37 insertions(+), 35 deletions(-) > > > > diff --git a/lib/dpif.c b/lib/dpif.c > > index 457c9bf..ce413d1 100644 > > --- a/lib/dpif.c > > +++ b/lib/dpif.c > > @@ -100,15 +100,6 @@ static bool should_log_flow_message(const struct vlog_module *module, > > /* Incremented whenever tnl route, arp, etc changes. */ > > struct seq *tnl_conf_seq; > > > > -static bool > > -dpif_is_internal_port(const char *type) > > -{ > > - /* For userspace datapath, tap devices are the equivalent > > - * of internal devices in the kernel datapath, so both > > - * these types are 'internal' devices. */ > > - return !strcmp(type, "internal") || !strcmp(type, "tap"); > > -} > > - > > in the commit after you only handle internal ports. > according to commit "5119e258da92 dpif: Fix cleanup of userspace datapath." > some tests failed without it. > so need to verify this or leave the skip for tap devices maybe. > beside that looks ok to me. > Thanks. I agree that it makes sense to keep this in for tap devices. > > static void > > dp_initialize(void) > > { > > @@ -359,10 +350,6 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) > > struct netdev *netdev; > > int err; > > > > - if (dpif_is_internal_port(dpif_port.type)) { > > - continue; > > - } > > - > > err = netdev_open(dpif_port.name, dpif_port.type, &netdev); > > > > if (!err) { > > @@ -434,9 +421,7 @@ dpif_remove_netdev_ports(struct dpif *dpif) { > > struct dpif_port dpif_port; > > > > DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { > > - if (!dpif_is_internal_port(dpif_port.type)) { > > - netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); > > - } > > + netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); > > } > > } > > > > @@ -581,16 +566,12 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) > > if (!error) { > > VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu32, > > dpif_name(dpif), netdev_name, port_no); > > + struct dpif_port dpif_port; > > > > - if (!dpif_is_internal_port(netdev_get_type(netdev))) { > > - > > - struct dpif_port dpif_port; > > - > > - dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); > > - dpif_port.name = CONST_CAST(char *, netdev_name); > > - dpif_port.port_no = port_no; > > - netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); > > - } > > + dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); > > + dpif_port.name = CONST_CAST(char *, netdev_name); > > + dpif_port.port_no = port_no; > > + netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); > > } else { > > VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", > > dpif_name(dpif), netdev_name, ovs_strerror(error)); > > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > > index 8d37eb6..18445bd 100644 > > --- a/lib/netdev-linux.c > > +++ b/lib/netdev-linux.c > > @@ -3223,6 +3223,7 @@ const struct netdev_class netdev_tap_class = { > > > > const struct netdev_class netdev_internal_class = { > > NETDEV_LINUX_CLASS_COMMON, > > + LINUX_FLOW_OFFLOAD_API, > > .type = "internal", > > .construct = netdev_linux_construct, > > .get_stats = netdev_internal_get_stats, > > diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c > > index 31ad9f4..3b2ca56 100644 > > --- a/lib/netdev-tc-offloads.c > > +++ b/lib/netdev-tc-offloads.c > > @@ -52,6 +52,12 @@ struct netlink_field { > > int size; > > }; > > > > +static bool > > +is_internal_port(const char *type) > > +{ > > + return !strcmp(type, "internal"); > > +} > > + > > static struct netlink_field set_flower_map[][4] = { > > [OVS_KEY_ATTR_IPV4] = { > > { offsetof(struct ovs_key_ipv4, ipv4_src), > > @@ -178,11 +184,12 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) > > /* Wrapper function to delete filter and ufid tc mapping */ > > static int > > del_filter_and_ufid_mapping(int ifindex, int prio, int handle, > > - uint32_t block_id, const ovs_u128 *ufid) > > + uint32_t block_id, const ovs_u128 *ufid, > > + bool egress) > > { > > int err; > > > > - err = tc_del_filter(ifindex, prio, handle, block_id, false); > > + err = tc_del_filter(ifindex, prio, handle, block_id, egress); > > del_ufid_tc_mapping(ufid); > > > > return err; > > @@ -339,6 +346,7 @@ get_block_id_from_netdev(struct netdev *netdev) > > int > > netdev_tc_flow_flush(struct netdev *netdev) > > { > > + bool egress = is_internal_port(netdev_get_type(netdev)); > > int ifindex = netdev_get_ifindex(netdev); > > uint32_t block_id = 0; > > > > @@ -350,13 +358,14 @@ netdev_tc_flow_flush(struct netdev *netdev) > > > > block_id = get_block_id_from_netdev(netdev); > > > > - return tc_flush(ifindex, block_id, false); > > + return tc_flush(ifindex, block_id, egress); > > } > > > > int > > netdev_tc_flow_dump_create(struct netdev *netdev, > > struct netdev_flow_dump **dump_out) > > { > > + bool egress = is_internal_port(netdev_get_type(netdev)); > > struct netdev_flow_dump *dump; > > uint32_t block_id = 0; > > int ifindex; > > @@ -372,7 +381,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev, > > dump = xzalloc(sizeof *dump); > > dump->nl_dump = xzalloc(sizeof *dump->nl_dump); > > dump->netdev = netdev_ref(netdev); > > - tc_dump_flower_start(ifindex, dump->nl_dump, block_id, false); > > + tc_dump_flower_start(ifindex, dump->nl_dump, block_id, egress); > > > > *dump_out = dump; > > > > @@ -1072,6 +1081,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > > struct dpif_flow_stats *stats OVS_UNUSED) > > { > > static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); > > + bool egress = is_internal_port(netdev_get_type(netdev)); > > struct tc_flower flower; > > const struct flow *key = &match->flow; > > struct flow *mask = &match->wc.masks; > > @@ -1286,6 +1296,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > > odp_port_t port = nl_attr_get_odp_port(nla); > > struct netdev *outdev = netdev_ports_get(port, info->dpif_class); > > > > + if (is_internal_port(netdev_get_type(outdev))) { > > + return EOPNOTSUPP; > > + } > > action->ifindex_out = netdev_get_ifindex(outdev); > > action->type = TC_ACT_OUTPUT; > > flower.action_count++; > > @@ -1333,7 +1346,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > > handle = get_ufid_tc_mapping(ufid, &prio, NULL); > > if (handle && prio) { > > VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", handle, prio); > > - del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); > > + del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, > > + egress); > > } > > > > if (!prio) { > > @@ -1347,7 +1361,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, > > flower.act_cookie.data = ufid; > > flower.act_cookie.len = sizeof *ufid; > > > > - err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, false); > > + err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, egress); > > if (!err) { > > add_ufid_tc_mapping(ufid, flower.prio, flower.handle, netdev, ifindex); > > } > > @@ -1371,6 +1385,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, > > odp_port_t in_port; > > int prio = 0; > > int ifindex; > > + bool egress; > > int handle; > > int err; > > > > @@ -1379,6 +1394,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, > > return ENOENT; > > } > > > > + egress = is_internal_port(netdev_get_type(netdev)); > > ifindex = netdev_get_ifindex(dev); > > if (ifindex < 0) { > > VLOG_ERR_RL(&error_rl, "flow_get: failed to get ifindex for %s: %s", > > @@ -1390,7 +1406,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, > > VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d)", > > netdev_get_name(dev), prio, handle); > > block_id = get_block_id_from_netdev(netdev); > > - err = tc_get_flower(ifindex, prio, handle, &flower, block_id, false); > > + err = tc_get_flower(ifindex, prio, handle, &flower, block_id, egress); > > netdev_close(dev); > > if (err) { > > VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s", > > @@ -1417,6 +1433,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, > > struct netdev *dev; > > int prio = 0; > > int ifindex; > > + bool egress; > > int handle; > > int error; > > > > @@ -1425,6 +1442,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, > > return ENOENT; > > } > > > > + egress = is_internal_port(netdev_get_type(netdev)); > > ifindex = netdev_get_ifindex(dev); > > if (ifindex < 0) { > > VLOG_ERR_RL(&error_rl, "flow_del: failed to get ifindex for %s: %s", > > @@ -1437,14 +1455,15 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, > > > > if (stats) { > > memset(stats, 0, sizeof *stats); > > - if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, false)) { > > + if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, egress)) { > > stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); > > stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); > > stats->used = flower.lastused; > > } > > } > > > > - error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); > > + error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, > > + egress); > > > > netdev_close(dev); > > > > @@ -1516,6 +1535,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) > > { > > static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER; > > static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER; > > + bool egress = is_internal_port(netdev_get_type(netdev)); > > uint32_t block_id = 0; > > int ifindex; > > int error; > > @@ -1538,7 +1558,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) > > } > > > > block_id = get_block_id_from_netdev(netdev); > > - error = tc_add_del_ingress_qdisc(ifindex, true, block_id, false); > > + error = tc_add_del_ingress_qdisc(ifindex, true, block_id, egress); > > > > if (error && error != EEXIST) { > > VLOG_ERR("failed adding ingress qdisc required for offloading: %s", > >
diff --git a/lib/dpif.c b/lib/dpif.c index 457c9bf..ce413d1 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -100,15 +100,6 @@ static bool should_log_flow_message(const struct vlog_module *module, /* Incremented whenever tnl route, arp, etc changes. */ struct seq *tnl_conf_seq; -static bool -dpif_is_internal_port(const char *type) -{ - /* For userspace datapath, tap devices are the equivalent - * of internal devices in the kernel datapath, so both - * these types are 'internal' devices. */ - return !strcmp(type, "internal") || !strcmp(type, "tap"); -} - static void dp_initialize(void) { @@ -359,10 +350,6 @@ do_open(const char *name, const char *type, bool create, struct dpif **dpifp) struct netdev *netdev; int err; - if (dpif_is_internal_port(dpif_port.type)) { - continue; - } - err = netdev_open(dpif_port.name, dpif_port.type, &netdev); if (!err) { @@ -434,9 +421,7 @@ dpif_remove_netdev_ports(struct dpif *dpif) { struct dpif_port dpif_port; DPIF_PORT_FOR_EACH (&dpif_port, &port_dump, dpif) { - if (!dpif_is_internal_port(dpif_port.type)) { - netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); - } + netdev_ports_remove(dpif_port.port_no, dpif->dpif_class); } } @@ -581,16 +566,12 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop) if (!error) { VLOG_DBG_RL(&dpmsg_rl, "%s: added %s as port %"PRIu32, dpif_name(dpif), netdev_name, port_no); + struct dpif_port dpif_port; - if (!dpif_is_internal_port(netdev_get_type(netdev))) { - - struct dpif_port dpif_port; - - dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); - dpif_port.name = CONST_CAST(char *, netdev_name); - dpif_port.port_no = port_no; - netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); - } + dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev)); + dpif_port.name = CONST_CAST(char *, netdev_name); + dpif_port.port_no = port_no; + netdev_ports_insert(netdev, dpif->dpif_class, &dpif_port); } else { VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s", dpif_name(dpif), netdev_name, ovs_strerror(error)); diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 8d37eb6..18445bd 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -3223,6 +3223,7 @@ const struct netdev_class netdev_tap_class = { const struct netdev_class netdev_internal_class = { NETDEV_LINUX_CLASS_COMMON, + LINUX_FLOW_OFFLOAD_API, .type = "internal", .construct = netdev_linux_construct, .get_stats = netdev_internal_get_stats, diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c index 31ad9f4..3b2ca56 100644 --- a/lib/netdev-tc-offloads.c +++ b/lib/netdev-tc-offloads.c @@ -52,6 +52,12 @@ struct netlink_field { int size; }; +static bool +is_internal_port(const char *type) +{ + return !strcmp(type, "internal"); +} + static struct netlink_field set_flower_map[][4] = { [OVS_KEY_ATTR_IPV4] = { { offsetof(struct ovs_key_ipv4, ipv4_src), @@ -178,11 +184,12 @@ del_ufid_tc_mapping(const ovs_u128 *ufid) /* Wrapper function to delete filter and ufid tc mapping */ static int del_filter_and_ufid_mapping(int ifindex, int prio, int handle, - uint32_t block_id, const ovs_u128 *ufid) + uint32_t block_id, const ovs_u128 *ufid, + bool egress) { int err; - err = tc_del_filter(ifindex, prio, handle, block_id, false); + err = tc_del_filter(ifindex, prio, handle, block_id, egress); del_ufid_tc_mapping(ufid); return err; @@ -339,6 +346,7 @@ get_block_id_from_netdev(struct netdev *netdev) int netdev_tc_flow_flush(struct netdev *netdev) { + bool egress = is_internal_port(netdev_get_type(netdev)); int ifindex = netdev_get_ifindex(netdev); uint32_t block_id = 0; @@ -350,13 +358,14 @@ netdev_tc_flow_flush(struct netdev *netdev) block_id = get_block_id_from_netdev(netdev); - return tc_flush(ifindex, block_id, false); + return tc_flush(ifindex, block_id, egress); } int netdev_tc_flow_dump_create(struct netdev *netdev, struct netdev_flow_dump **dump_out) { + bool egress = is_internal_port(netdev_get_type(netdev)); struct netdev_flow_dump *dump; uint32_t block_id = 0; int ifindex; @@ -372,7 +381,7 @@ netdev_tc_flow_dump_create(struct netdev *netdev, dump = xzalloc(sizeof *dump); dump->nl_dump = xzalloc(sizeof *dump->nl_dump); dump->netdev = netdev_ref(netdev); - tc_dump_flower_start(ifindex, dump->nl_dump, block_id, false); + tc_dump_flower_start(ifindex, dump->nl_dump, block_id, egress); *dump_out = dump; @@ -1072,6 +1081,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, struct dpif_flow_stats *stats OVS_UNUSED) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); + bool egress = is_internal_port(netdev_get_type(netdev)); struct tc_flower flower; const struct flow *key = &match->flow; struct flow *mask = &match->wc.masks; @@ -1286,6 +1296,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, odp_port_t port = nl_attr_get_odp_port(nla); struct netdev *outdev = netdev_ports_get(port, info->dpif_class); + if (is_internal_port(netdev_get_type(outdev))) { + return EOPNOTSUPP; + } action->ifindex_out = netdev_get_ifindex(outdev); action->type = TC_ACT_OUTPUT; flower.action_count++; @@ -1333,7 +1346,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, handle = get_ufid_tc_mapping(ufid, &prio, NULL); if (handle && prio) { VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d", handle, prio); - del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); + del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, + egress); } if (!prio) { @@ -1347,7 +1361,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, flower.act_cookie.data = ufid; flower.act_cookie.len = sizeof *ufid; - err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, false); + err = tc_replace_flower(ifindex, prio, handle, &flower, block_id, egress); if (!err) { add_ufid_tc_mapping(ufid, flower.prio, flower.handle, netdev, ifindex); } @@ -1371,6 +1385,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, odp_port_t in_port; int prio = 0; int ifindex; + bool egress; int handle; int err; @@ -1379,6 +1394,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, return ENOENT; } + egress = is_internal_port(netdev_get_type(netdev)); ifindex = netdev_get_ifindex(dev); if (ifindex < 0) { VLOG_ERR_RL(&error_rl, "flow_get: failed to get ifindex for %s: %s", @@ -1390,7 +1406,7 @@ netdev_tc_flow_get(struct netdev *netdev OVS_UNUSED, VLOG_DBG_RL(&rl, "flow get (dev %s prio %d handle %d)", netdev_get_name(dev), prio, handle); block_id = get_block_id_from_netdev(netdev); - err = tc_get_flower(ifindex, prio, handle, &flower, block_id, false); + err = tc_get_flower(ifindex, prio, handle, &flower, block_id, egress); netdev_close(dev); if (err) { VLOG_ERR_RL(&error_rl, "flow get failed (dev %s prio %d handle %d): %s", @@ -1417,6 +1433,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, struct netdev *dev; int prio = 0; int ifindex; + bool egress; int handle; int error; @@ -1425,6 +1442,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, return ENOENT; } + egress = is_internal_port(netdev_get_type(netdev)); ifindex = netdev_get_ifindex(dev); if (ifindex < 0) { VLOG_ERR_RL(&error_rl, "flow_del: failed to get ifindex for %s: %s", @@ -1437,14 +1455,15 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED, if (stats) { memset(stats, 0, sizeof *stats); - if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, false)) { + if (!tc_get_flower(ifindex, prio, handle, &flower, block_id, egress)) { stats->n_packets = get_32aligned_u64(&flower.stats.n_packets); stats->n_bytes = get_32aligned_u64(&flower.stats.n_bytes); stats->used = flower.lastused; } } - error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid); + error = del_filter_and_ufid_mapping(ifindex, prio, handle, block_id, ufid, + egress); netdev_close(dev); @@ -1516,6 +1535,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) { static struct ovsthread_once multi_mask_once = OVSTHREAD_ONCE_INITIALIZER; static struct ovsthread_once block_once = OVSTHREAD_ONCE_INITIALIZER; + bool egress = is_internal_port(netdev_get_type(netdev)); uint32_t block_id = 0; int ifindex; int error; @@ -1538,7 +1558,7 @@ netdev_tc_init_flow_api(struct netdev *netdev) } block_id = get_block_id_from_netdev(netdev); - error = tc_add_del_ingress_qdisc(ifindex, true, block_id, false); + error = tc_add_del_ingress_qdisc(ifindex, true, block_id, egress); if (error && error != EEXIST) { VLOG_ERR("failed adding ingress qdisc required for offloading: %s",