@@ -836,8 +836,10 @@ struct dpif_upcall {
/* DPIF_UC_ACTION only. */
struct nlattr *userdata; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
- struct nlattr *out_tun_key; /* Output tunnel key. */
- struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
+ struct nlattr *out_tun_key; /* Output tunnel key. */
+ struct nlattr *actions; /* Argument to OVS_ACTION_ATTR_USERSPACE. */
+ struct flow flow; /* Caller provided 'flow' if the 'key' is not
+ available. */
};
/* A callback to notify higher layer of dpif about to be purged, so that
@@ -970,7 +970,7 @@ pkt_metadata_from_flow(struct pkt_metadata *md, const struct flow *flow)
md->recirc_id = flow->recirc_id;
md->dp_hash = flow->dp_hash;
- flow_tnl_copy__(&md->tunnel, &flow->tunnel);
+ flow_tnl_copy(&md->tunnel, &flow->tunnel);
md->skb_priority = flow->skb_priority;
md->pkt_mark = flow->pkt_mark;
md->in_port = flow->in_port;
@@ -28,6 +28,8 @@
extern "C" {
#endif
+struct dpif_upcall;
+
struct netdev_flow_api {
char *type;
/* Flush all offloaded flows from a netdev.
@@ -121,6 +123,34 @@ struct netdev_flow_api {
int (*meter_del)(ofproto_meter_id meter_id,
struct ofputil_meter_stats *stats);
+ /* Polls for upcall offload packets for an upcall handler. If successful,
+ * stores the upcall into '*upcall', using 'buf' for storage.
+ *
+ * The implementation should point '&upcall->flow' and 'upcall->userdata'
+ * (if any) into data in the caller-provided 'buf'. The implementation may
+ * also use 'buf' for storing the data of 'upcall->packet'. If necessary
+ * to make room, the implementation may reallocate the data in 'buf'.
+ *
+ * The caller owns the data of 'upcall->packet' and may modify it. If
+ * packet's headroom is exhausted as it is manipulated, 'upcall->packet'
+ * will be reallocated. This requires the data of 'upcall->packet' to be
+ * released with ofpbuf_uninit() before 'upcall' is destroyed. However,
+ * when an error is returned, the 'upcall->packet' may be uninitialized
+ * and should not be released.
+ *
+ * This function must not block. If no upcall is pending when it is
+ * called, it should return EAGAIN without blocking.
+ *
+ * Return 0 if successful, otherwise returns a positive errno value.
+ */
+ int (*recv)(struct dpif_upcall *upcall, struct ofpbuf *buf,
+ uint32_t handler_id);
+
+ /* Arranges for the poll loop for an upcall handler to wake up when
+ * offload provider has a message queued to be received with the recv
+ * member functions. */
+ void (*recv_wait)(uint32_t handler_id);
+
/* Initializies the netdev flow api.
* Return 0 if successful, otherwise returns a positive errno value. */
int (*init_flow_api)(struct netdev *);
@@ -18,6 +18,8 @@
#include <errno.h>
#include <linux/if_ether.h>
+#include <linux/psample.h>
+#include <poll.h>
#include "cmap.h"
#include "dpif-provider.h"
@@ -35,6 +37,7 @@
#include "openvswitch/hmap.h"
#include "openvswitch/match.h"
#include "openvswitch/ofpbuf.h"
+#include "openvswitch/poll-loop.h"
#include "openvswitch/thread.h"
#include "openvswitch/types.h"
#include "openvswitch/util.h"
@@ -126,6 +129,9 @@ struct sgid_node {
struct offload_sample sample;
};
+static struct nl_sock *psample_sock;
+static int psample_family;
+
/* The sgid_map mutex protects the sample_group_ids and the sgid_map for
* cmap_insert(), cmap_remove(), or cmap_replace() operations. */
static struct ovs_mutex sgid_lock = OVS_MUTEX_INITIALIZER;
@@ -157,6 +163,14 @@ sgid_find(uint32_t id)
return node ? CONTAINER_OF(node, struct sgid_node, id_node) : NULL;
}
+static struct offload_sample *
+sample_find(uint32_t id)
+{
+ struct sgid_node *node = sgid_find(id);
+
+ return node ? &node->sample: NULL;
+}
+
static void
offload_sample_clone(struct offload_sample *dst,
const struct offload_sample *src,
@@ -3074,6 +3088,55 @@ tc_cleanup_policer_actions(struct id_pool *police_ids,
hmap_destroy(&map);
}
+static void
+psample_init(void)
+{
+ unsigned int psample_mcgroup;
+ int err;
+
+ if (!netdev_is_flow_api_enabled()) {
+ VLOG_DBG("Flow API is not enabled");
+ return;
+ }
+
+ if (psample_sock) {
+ VLOG_DBG("Psample socket is already initialized");
+ return;
+ }
+
+ err = nl_lookup_genl_family(PSAMPLE_GENL_NAME,
+ &psample_family);
+ if (err) {
+ VLOG_INFO("Generic Netlink family '%s' does not exist: %s\n"
+ "Please make sure the kernel module psample is loaded",
+ PSAMPLE_GENL_NAME, ovs_strerror(err));
+ return;
+ }
+
+ err = nl_lookup_genl_mcgroup(PSAMPLE_GENL_NAME,
+ PSAMPLE_NL_MCGRP_SAMPLE_NAME,
+ &psample_mcgroup);
+ if (err) {
+ VLOG_INFO("Failed to join Netlink multicast group '%s': %s",
+ PSAMPLE_NL_MCGRP_SAMPLE_NAME, ovs_strerror(err));
+ return;
+ }
+
+ err = nl_sock_create(NETLINK_GENERIC, &psample_sock);
+ if (err) {
+ VLOG_INFO("Failed to create psample socket: %s", ovs_strerror(err));
+ return;
+ }
+
+ err = nl_sock_join_mcgroup(psample_sock, psample_mcgroup);
+ if (err) {
+ VLOG_INFO("Failed to join psample mcgroup: %s", ovs_strerror(err));
+ nl_sock_destroy(psample_sock);
+ psample_sock = NULL;
+ return;
+ }
+}
+
static int
netdev_tc_init_flow_api(struct netdev *netdev)
{
@@ -3134,6 +3197,7 @@ netdev_tc_init_flow_api(struct netdev *netdev)
ovs_mutex_lock(&sgid_lock);
sample_group_ids = id_pool_create(1, UINT32_MAX - 1);
ovs_mutex_unlock(&sgid_lock);
+ psample_init();
ovsthread_once_done(&once);
}
@@ -3351,6 +3415,113 @@ meter_tc_del_policer(ofproto_meter_id meter_id,
return err;
}
+struct offload_psample {
+ struct nlattr *packet; /* Packet data. */
+ uint32_t group_id; /* Mapping id for sample offload. */
+};
+
+static int
+nl_parse_psample(struct offload_psample *psample, struct ofpbuf *buf)
+{
+ static const struct nl_policy ovs_psample_policy[] = {
+ [PSAMPLE_ATTR_SAMPLE_GROUP] = { .type = NL_A_U32 },
+ [PSAMPLE_ATTR_DATA] = { .type = NL_A_UNSPEC },
+ };
+ struct nlattr *a[ARRAY_SIZE(ovs_psample_policy)];
+ struct genlmsghdr *genl;
+ struct nlmsghdr *nlmsg;
+ struct ofpbuf b;
+
+ b = ofpbuf_const_initializer(buf->data, buf->size);
+ nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
+ genl = ofpbuf_try_pull(&b, sizeof *genl);
+ if (!nlmsg || !genl || nlmsg->nlmsg_type != psample_family
+ || !nl_policy_parse(&b, 0, ovs_psample_policy, a,
+ ARRAY_SIZE(ovs_psample_policy))) {
+ return EINVAL;
+ }
+
+ psample->group_id = nl_attr_get_u32(a[PSAMPLE_ATTR_SAMPLE_GROUP]);
+ psample->packet = a[PSAMPLE_ATTR_DATA];
+
+ return 0;
+}
+
+static int
+psample_parse_packet(struct offload_psample *psample,
+ struct dpif_upcall *upcall)
+{
+ struct flow *flow = &upcall->flow;
+ struct offload_sample *sample;
+
+ memset(upcall, 0, sizeof *upcall);
+ dp_packet_use_const(&upcall->packet,
+ nl_attr_get(psample->packet),
+ nl_attr_get_size(psample->packet));
+
+ sample = sample_find(psample->group_id);
+ if (!sample) {
+ VLOG_ERR_RL(&error_rl, "Failed to get sample info via group id: %d",
+ psample->group_id);
+ return ENOENT;
+ }
+
+ upcall->userdata = sample->userdata;
+ if (sample->tunnel) {
+ flow_tnl_copy(&flow->tunnel, sample->tunnel);
+ }
+ if (sample->userspace_actions) {
+ upcall->actions = sample->userspace_actions;
+ }
+ flow->in_port.odp_port = netdev_ifindex_to_odp_port(sample->ifindex);
+ upcall->type = DPIF_UC_ACTION;
+
+ return 0;
+}
+
+static int
+netdev_tc_recv(struct dpif_upcall *upcall, struct ofpbuf *buf,
+ uint32_t handler_id)
+{
+ int read_tries = 0;
+
+ if (handler_id || !psample_sock) {
+ return EAGAIN;
+ }
+
+ for (;;) {
+ struct offload_psample psample;
+ int error;
+
+ if (++read_tries > 50) {
+ return EAGAIN;
+ }
+
+ error = nl_sock_recv(psample_sock, buf, NULL, false);
+ if (error == ENOBUFS) {
+ continue;
+ }
+ if (error) {
+ return error;
+ }
+ error = nl_parse_psample(&psample, buf);
+
+ return error ? error : psample_parse_packet(&psample, upcall);
+ }
+
+ return EAGAIN;
+}
+
+static void
+netdev_tc_recv_wait(uint32_t handler_id)
+{
+ /* For simplicity, i.e., using a single NetLink socket, only the first
+ * handler thread will be used. */
+ if (!handler_id && psample_sock) {
+ poll_fd_wait(nl_sock_fd(psample_sock), POLLIN);
+ }
+}
+
const struct netdev_flow_api netdev_offload_tc = {
.type = "linux_tc",
.flow_flush = netdev_tc_flow_flush,
@@ -3364,5 +3535,7 @@ const struct netdev_flow_api netdev_offload_tc = {
.meter_set = meter_tc_set_policer,
.meter_get = meter_tc_get_policer,
.meter_del = meter_tc_del_policer,
+ .recv = netdev_tc_recv,
+ .recv_wait = netdev_tc_recv_wait,
.init_flow_api = netdev_tc_init_flow_api,
};
@@ -38,6 +38,7 @@
#include "netdev-provider.h"
#include "netdev-vport.h"
#include "odp-netlink.h"
+#include "odp-util.h"
#include "openflow/openflow.h"
#include "packets.h"
#include "openvswitch/ofp-print.h"
@@ -826,7 +827,7 @@ odp_port_t
netdev_ifindex_to_odp_port(int ifindex)
{
struct port_to_netdev_data *data;
- odp_port_t ret = 0;
+ odp_port_t ret = ODPP_NONE;
ovs_rwlock_rdlock(&ifindex_to_port_rwlock);
HMAP_FOR_EACH_WITH_HASH (data, ifindex_node, ifindex, &ifindex_to_port) {
@@ -86,7 +86,7 @@ flow_tnl_size(const struct flow_tnl *src)
* data in 'dst' is NOT cleared, so this must not be used in cases where the
* uninitialized portion may be hashed over. */
static inline void
-flow_tnl_copy__(struct flow_tnl *dst, const struct flow_tnl *src)
+flow_tnl_copy(struct flow_tnl *dst, const struct flow_tnl *src)
{
memcpy(dst, src, flow_tnl_size(src));
}