@@ -420,6 +420,7 @@ enum dpdk_hw_ol_features {
struct netdev_dpdk {
PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+ dpdk_port_t flow_transfer_proxy_port_id;
dpdk_port_t port_id;
/* If true, device was attached by rte_eth_dev_attach(). */
@@ -1130,8 +1131,9 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
uint32_t rx_chksm_offload_capa = RTE_ETH_RX_OFFLOAD_UDP_CKSUM |
RTE_ETH_RX_OFFLOAD_TCP_CKSUM |
RTE_ETH_RX_OFFLOAD_IPV4_CKSUM;
-
#ifdef ALLOW_EXPERIMENTAL_API
+ int ret;
+
/*
* Full tunnel offload requires that tunnel ID metadata be
* delivered with "miss" packets from the hardware to the
@@ -1141,6 +1143,27 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
* Request delivery of such metadata.
*/
dpdk_eth_dev_init_rx_metadata(dev);
+
+ /*
+ * Managing "transfer" flows requires that the user communicate them
+ * via a port which has the privilege to control the embedded switch.
+ * For some vendors, all ports in a given switching domain have
+ * this privilege. For other vendors, it's only one port.
+ *
+ * Get the proxy port ID and remember it for later use.
+ */
+ ret = rte_flow_pick_transfer_proxy(dev->port_id,
+ &dev->flow_transfer_proxy_port_id, NULL);
+ if (ret != 0) {
+ /*
+ * The PMD does not indicate the proxy port.
+ * Assume the proxy is unneeded.
+ */
+ dev->flow_transfer_proxy_port_id = dev->port_id;
+ }
+#else /* ! ALLOW_EXPERIMENTAL_API */
+ /* No API to get transfer proxy; assume the proxy is unneeded. */
+ dev->flow_transfer_proxy_port_id = dev->port_id;
#endif /* ALLOW_EXPERIMENTAL_API */
rte_eth_dev_info_get(dev->port_id, &info);
@@ -3762,8 +3785,12 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[], void *aux OVS_UNUSED)
{
struct ds used_interfaces = DS_EMPTY_INITIALIZER;
+#ifdef ALLOW_EXPERIMENTAL_API
+ struct netdev_dpdk *dev_self = NULL;
+#endif /* ALLOW_EXPERIMENTAL_API */
struct rte_eth_dev_info dev_info;
dpdk_port_t sibling_port_id;
+ struct netdev_dpdk *dev;
dpdk_port_t port_id;
bool used = false;
char *response;
@@ -3781,8 +3808,6 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
argv[1]);
RTE_ETH_FOREACH_DEV_SIBLING (sibling_port_id, port_id) {
- struct netdev_dpdk *dev;
-
LIST_FOR_EACH (dev, list_node, &dpdk_list) {
if (dev->port_id != sibling_port_id) {
continue;
@@ -3802,6 +3827,27 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc OVS_UNUSED,
}
ds_destroy(&used_interfaces);
+#ifdef ALLOW_EXPERIMENTAL_API
+ /*
+ * The device being detached may happen to be a flow proxy port
+ * for another device (still attached). If so, do not allow to
+ * detach. Devices dependent on this one must be detached first.
+ */
+ LIST_FOR_EACH (dev, list_node, &dpdk_list) {
+ if (dev->port_id == port_id) {
+ dev_self = dev;
+ } else if (dev->flow_transfer_proxy_port_id == port_id) {
+ response = xasprintf("Device '%s' can not be detached (flow proxy)",
+ argv[1]);
+ goto error;
+ }
+ }
+
+ /* Indicate that the device being detached no longer needs a flow proxy. */
+ if (dev_self != NULL)
+ dev_self->flow_transfer_proxy_port_id = port_id;
+#endif /* ALLOW_EXPERIMENTAL_API */
+
rte_eth_dev_info_get(port_id, &dev_info);
rte_eth_dev_close(port_id);
if (rte_dev_remove(dev_info.device) < 0) {
@@ -5167,7 +5213,8 @@ unlock:
}
int
-netdev_dpdk_get_port_id(struct netdev *netdev)
+netdev_dpdk_get_port_id(struct netdev *netdev,
+ bool flow_transfer_proxy)
{
struct netdev_dpdk *dev;
int ret = -1;
@@ -5178,7 +5225,7 @@ netdev_dpdk_get_port_id(struct netdev *netdev)
dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
- ret = dev->port_id;
+ ret = flow_transfer_proxy ? dev->flow_transfer_proxy_port_id : dev->port_id;
ovs_mutex_unlock(&dev->mutex);
out:
return ret;
@@ -5214,13 +5261,15 @@ out:
int
netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
- struct rte_flow *rte_flow,
+ bool transfer, struct rte_flow *rte_flow,
struct rte_flow_error *error)
{
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
int ret;
- ret = rte_flow_destroy(dev->port_id, rte_flow, error);
+ ret = rte_flow_destroy(transfer ?
+ dev->flow_transfer_proxy_port_id : dev->port_id,
+ rte_flow, error);
return ret;
}
@@ -5234,7 +5283,19 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
struct rte_flow *flow;
struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
- flow = rte_flow_create(dev->port_id, attr, items, actions, error);
+#ifdef ALLOW_EXPERIMENTAL_API
+ if (!attr->transfer) {
+ /*
+ * The 1st item in any pattern is a traffic source one.
+ * It is unnecessary in the case of non-transfer rules.
+ */
+ ++(items);
+ }
+#endif /* ALLOW_EXPERIMENTAL_API */
+
+ flow = rte_flow_create(attr->transfer ?
+ dev->flow_transfer_proxy_port_id : dev->port_id,
+ attr, items, actions, error);
return flow;
}
@@ -5262,7 +5323,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
}
dev = netdev_dpdk_cast(netdev);
- ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
+ ret = rte_flow_query(dev->flow_transfer_proxy_port_id, rte_flow,
+ actions, query, error);
return ret;
}
@@ -5284,8 +5346,8 @@ netdev_dpdk_rte_flow_tunnel_decap_set(struct netdev *netdev,
dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
- ret = rte_flow_tunnel_decap_set(dev->port_id, tunnel, actions,
- num_of_actions, error);
+ ret = rte_flow_tunnel_decap_set(dev->flow_transfer_proxy_port_id, tunnel,
+ actions, num_of_actions, error);
ovs_mutex_unlock(&dev->mutex);
return ret;
}
@@ -5306,8 +5368,8 @@ netdev_dpdk_rte_flow_tunnel_match(struct netdev *netdev,
dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
- ret = rte_flow_tunnel_match(dev->port_id, tunnel, items, num_of_items,
- error);
+ ret = rte_flow_tunnel_match(dev->flow_transfer_proxy_port_id, tunnel,
+ items, num_of_items, error);
ovs_mutex_unlock(&dev->mutex);
return ret;
}
@@ -5328,7 +5390,8 @@ netdev_dpdk_rte_flow_get_restore_info(struct netdev *netdev,
dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
- ret = rte_flow_get_restore_info(dev->port_id, m, info, error);
+ ret = rte_flow_get_restore_info(dev->flow_transfer_proxy_port_id,
+ m, info, error);
ovs_mutex_unlock(&dev->mutex);
return ret;
}
@@ -5349,8 +5412,8 @@ netdev_dpdk_rte_flow_tunnel_action_decap_release(
dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
- ret = rte_flow_tunnel_action_decap_release(dev->port_id, actions,
- num_of_actions, error);
+ ret = rte_flow_tunnel_action_decap_release(dev->flow_transfer_proxy_port_id,
+ actions, num_of_actions, error);
ovs_mutex_unlock(&dev->mutex);
return ret;
}
@@ -5370,8 +5433,8 @@ netdev_dpdk_rte_flow_tunnel_item_release(struct netdev *netdev,
dev = netdev_dpdk_cast(netdev);
ovs_mutex_lock(&dev->mutex);
- ret = rte_flow_tunnel_item_release(dev->port_id, items, num_of_items,
- error);
+ ret = rte_flow_tunnel_item_release(dev->flow_transfer_proxy_port_id,
+ items, num_of_items, error);
ovs_mutex_unlock(&dev->mutex);
return ret;
}
@@ -35,7 +35,7 @@ bool netdev_dpdk_flow_api_supported(struct netdev *);
int
netdev_dpdk_rte_flow_destroy(struct netdev *netdev,
- struct rte_flow *rte_flow,
+ bool transfer, struct rte_flow *rte_flow,
struct rte_flow_error *error);
struct rte_flow *
netdev_dpdk_rte_flow_create(struct netdev *netdev,
@@ -49,7 +49,7 @@ netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
struct rte_flow_query_count *query,
struct rte_flow_error *error);
int
-netdev_dpdk_get_port_id(struct netdev *netdev);
+netdev_dpdk_get_port_id(struct netdev *netdev, bool flow_transfer_proxy);
#ifdef ALLOW_EXPERIMENTAL_API
@@ -353,8 +353,23 @@ dump_flow_pattern(struct ds *s,
if (item->type == RTE_FLOW_ITEM_TYPE_END) {
ds_put_cstr(s, "end ");
+#ifdef ALLOW_EXPERIMENTAL_API
+ } else if (item->type == RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT) {
+ const struct rte_flow_item_ethdev *ethdev_spec = item->spec;
+ const struct rte_flow_item_ethdev *ethdev_mask = item->mask;
+
+ ds_put_cstr(s, "represented_port ");
+
+ DUMP_PATTERN_ITEM(ethdev_mask->port_id, false, "ethdev_port_id",
+ "%"PRIu16, ethdev_spec->port_id,
+ ethdev_mask->port_id, 0);
+ } else if (flow_patterns->tnl_pmd_items_cnt &&
+ pattern_index < 1 /* REPRESENTED_PORT */ +
+ flow_patterns->tnl_pmd_items_cnt) {
+#else /* ! ALLOW_EXPERIMENTAL_API */
} else if (flow_patterns->tnl_pmd_items_cnt &&
pattern_index < flow_patterns->tnl_pmd_items_cnt) {
+#endif /* ALLOW_EXPERIMENTAL_API */
return;
} else if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
const struct rte_flow_item_eth *eth_spec = item->spec;
@@ -927,7 +942,8 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
extra_str = ds_cstr(&s_extra);
VLOG_DBG_RL(&rl, "%s: rte_flow 0x%"PRIxPTR" %s flow create %d %s",
netdev_get_name(netdev), (intptr_t) flow, extra_str,
- netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
+ netdev_dpdk_get_port_id(netdev, attr->transfer),
+ ds_cstr(&s));
}
} else {
enum vlog_level level = VLL_WARN;
@@ -942,7 +958,8 @@ netdev_offload_dpdk_flow_create(struct netdev *netdev,
extra_str = ds_cstr(&s_extra);
VLOG_RL(&rl, level, "%s: Failed flow: %s flow create %d %s",
netdev_get_name(netdev), extra_str,
- netdev_dpdk_get_port_id(netdev), ds_cstr(&s));
+ netdev_dpdk_get_port_id(netdev, attr->transfer),
+ ds_cstr(&s));
}
}
ds_destroy(&s);
@@ -1035,6 +1052,12 @@ free_flow_patterns(struct flow_patterns *patterns)
struct rte_flow_error error;
int i;
+#ifdef ALLOW_EXPERIMENTAL_API
+ /* REPRESENTED_PORT */
+ free(CONST_CAST(void *, patterns->items[0].spec));
+ free(CONST_CAST(void *, patterns->items[0].mask));
+#endif /* ALLOW_EXPERIMENTAL_API */
+
if (patterns->tnl_pmd_items) {
struct rte_flow_item *tnl_pmd_items = patterns->tnl_pmd_items;
uint32_t tnl_pmd_items_cnt = patterns->tnl_pmd_items_cnt;
@@ -1049,7 +1072,12 @@ free_flow_patterns(struct flow_patterns *patterns)
}
}
+#ifdef ALLOW_EXPERIMENTAL_API
+ for (i = 1 /* REPRESENTED_PORT */ + patterns->tnl_pmd_items_cnt;
+ i < patterns->cnt; i++) {
+#else /* ! ALLOW_EXPERIMENTAL_API */
for (i = patterns->tnl_pmd_items_cnt; i < patterns->cnt; i++) {
+#endif /* ALLOW_EXPERIMENTAL_API */
if (patterns->items[i].spec) {
free(CONST_CAST(void *, patterns->items[i].spec));
}
@@ -1115,13 +1143,13 @@ vport_to_rte_tunnel(struct netdev *vport,
tunnel->tp_dst = tnl_cfg->dst_port;
if (!VLOG_DROP_DBG(&rl)) {
ds_put_format(s_tnl, "flow tunnel create %d type vxlan; ",
- netdev_dpdk_get_port_id(netdev));
+ netdev_dpdk_get_port_id(netdev, true));
}
} else if (!strcmp(netdev_get_type(vport), "gre")) {
tunnel->type = RTE_FLOW_ITEM_TYPE_GRE;
if (!VLOG_DROP_DBG(&rl)) {
ds_put_format(s_tnl, "flow tunnel create %d type gre; ",
- netdev_dpdk_get_port_id(netdev));
+ netdev_dpdk_get_port_id(netdev, true));
}
} else {
VLOG_DBG_RL(&rl, "vport type '%s' is not supported",
@@ -1383,10 +1411,23 @@ parse_flow_match(struct netdev *netdev,
struct flow_patterns *patterns,
struct match *match)
{
+#ifdef ALLOW_EXPERIMENTAL_API
+ struct netdev *physdev = netdev_ports_get(orig_in_port, netdev->dpif_type);
+ struct rte_flow_item_ethdev *ethdev_spec = xzalloc(sizeof *ethdev_spec);
+ struct rte_flow_item_ethdev *ethdev_mask = xzalloc(sizeof *ethdev_mask);
+#endif /* ALLOW_EXPERIMENTAL_API */
struct rte_flow_item_eth *eth_spec = NULL, *eth_mask = NULL;
struct flow *consumed_masks;
uint8_t proto = 0;
+#ifdef ALLOW_EXPERIMENTAL_API
+ /* Add an explicit traffic source item to the beginning of the pattern. */
+ ethdev_spec->port_id = netdev_dpdk_get_port_id(physdev, false);
+ *ethdev_mask = rte_flow_item_ethdev_mask;
+ add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_REPRESENTED_PORT,
+ ethdev_spec, ethdev_mask, NULL);
+#endif /* ALLOW_EXPERIMENTAL_API */
+
consumed_masks = &match->wc.masks;
if (!flow_tnl_dst_is_set(&match->flow.tunnel)) {
@@ -1784,7 +1825,7 @@ add_represented_port_action(struct flow_actions *actions,
struct rte_flow_action_ethdev *ethdev;
int outdev_id;
- outdev_id = netdev_dpdk_get_port_id(outdev);
+ outdev_id = netdev_dpdk_get_port_id(outdev, false);
if (outdev_id < 0) {
return -1;
}
@@ -1801,7 +1842,7 @@ add_port_id_action(struct flow_actions *actions,
struct rte_flow_action_port_id *port_id;
int outdev_id;
- outdev_id = netdev_dpdk_get_port_id(outdev);
+ outdev_id = netdev_dpdk_get_port_id(outdev, false);
if (outdev_id < 0) {
return -1;
}
@@ -2333,6 +2374,7 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
struct netdev *physdev;
struct netdev *netdev;
ovs_u128 *ufid;
+ bool transfer;
int ret;
ovs_mutex_lock(&rte_flow_data->lock);
@@ -2344,12 +2386,13 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
rte_flow_data->dead = true;
+ transfer = rte_flow_data->actions_offloaded;
rte_flow = rte_flow_data->rte_flow;
physdev = rte_flow_data->physdev;
netdev = rte_flow_data->netdev;
ufid = &rte_flow_data->ufid;
- ret = netdev_dpdk_rte_flow_destroy(physdev, rte_flow, &error);
+ ret = netdev_dpdk_rte_flow_destroy(physdev, transfer, rte_flow, &error);
if (ret == 0) {
struct netdev_offload_dpdk_data *data;
@@ -2364,12 +2407,12 @@ netdev_offload_dpdk_flow_destroy(struct ufid_to_rte_flow_data *rte_flow_data)
" flow destroy %d ufid " UUID_FMT,
netdev_get_name(netdev), netdev_get_name(physdev),
(intptr_t) rte_flow,
- netdev_dpdk_get_port_id(physdev),
+ netdev_dpdk_get_port_id(physdev, transfer),
UUID_ARGS((struct uuid *) ufid));
} else {
VLOG_ERR("Failed flow: %s/%s: flow destroy %d ufid " UUID_FMT,
netdev_get_name(netdev), netdev_get_name(physdev),
- netdev_dpdk_get_port_id(physdev),
+ netdev_dpdk_get_port_id(physdev, transfer),
UUID_ARGS((struct uuid *) ufid));
}