@@ -440,6 +440,7 @@ dist-docs:
include Documentation/automake.mk
include m4/automake.mk
+include bpf/automake.mk
include lib/automake.mk
include ofproto/automake.mk
include utilities/automake.mk
new file mode 100644
@@ -0,0 +1,715 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+/* OVS Datapath Execution
+ * ======================
+ *
+ * When a lookup is successful the eBPF gets a list of actions to be
+ * executed, such as outputting the packet to a certain port, or
+ * pushing a VLAN tag. The list of actions is configured in ovs-vswitchd
+ * and may be a variable length depending on the desired network processing
+ * behaviour. For example, an L2 switch doing unknown broadcast sends
+ * packet to all its current ports. The OVS datapath’s actions is derived
+ * from the OpenFlow action specification and the OVSDB schema for
+ * ovs-vswitchd.
+ *
+ */
+#include <errno.h>
+#include <stdint.h>
+#include <iproute2/bpf_elf.h>
+#include <linux/ip.h>
+
+#include "api.h"
+#include "maps.h"
+#include "helpers.h"
+
+#define ENABLE_POINTER_LOOKUP 1
+
+#define ALIGNED_CAST(TYPE, ATTR) ((TYPE) (void *) (ATTR))
+
+#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
+#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
+#define TTL_OFF (ETH_HLEN + offsetof(struct iphdr, ttl))
+#define DST_OFF (ETH_HLEN + offsetof(struct iphdr, daddr))
+#define SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
+
+static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
+{
+ __u8 old_tos;
+
+ bpf_skb_load_bytes(skb, TOS_OFF, &old_tos, 1);
+
+ if (old_tos == new_tos) {
+ printt("tos not change %d\n", old_tos);
+ return;
+ }
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_tos, new_tos, 2);
+
+ /* Use helper here because using direct packet
+ * access causes verifier error
+ */
+ bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
+}
+
+static inline void set_ip_ttl(struct __sk_buff *skb, __u8 new_ttl)
+{
+ __u8 old_ttl;
+
+ bpf_skb_load_bytes(skb, TTL_OFF, &old_ttl, 1);
+
+ if (old_ttl == new_ttl) {
+ printt("ttl not change %d\n", old_ttl);
+ return;
+ }
+
+ printt("old ttl %d -> new ttl %d\n", old_ttl, new_ttl);
+
+ bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ttl, new_ttl, 2);
+ bpf_skb_store_bytes(skb, TTL_OFF, &new_ttl, sizeof(new_ttl), 0);
+}
+
+static inline void set_ip_dst(struct __sk_buff *skb, ovs_be32 new_dst)
+{
+ ovs_be32 old_dst;
+
+ bpf_skb_load_bytes(skb, DST_OFF, &old_dst, 4);
+
+ if (old_dst == new_dst) {
+ printt("dst ip not change %x\n", old_dst);
+ return;
+ }
+ printt("old dst %x -> new dst %x\n", old_dst, new_dst);
+
+ l3_csum_replace4(skb, IP_CSUM_OFF, old_dst, new_dst);
+ bpf_skb_store_bytes(skb, DST_OFF, &new_dst, sizeof(new_dst), 0);
+}
+
+static inline void set_ip_src(struct __sk_buff *skb, ovs_be32 new_src)
+{
+ ovs_be32 old_src;
+
+ bpf_skb_load_bytes(skb, SRC_OFF, &old_src, 4);
+
+ if (old_src == new_src) {
+ printt("src ip not change %x\n", old_src);
+ return;
+ }
+ printt("old src %x -> new src %x\n", old_src, new_src);
+
+ l3_csum_replace4(skb, IP_CSUM_OFF, old_src, new_src);
+ bpf_skb_store_bytes(skb, SRC_OFF, &new_src, sizeof(new_src), 0);
+}
+
+/*
+ * Every OVS action need to lookup the action list and
+ * with index, find out the next action to process
+ */
+static inline struct bpf_action *pre_tail_action(struct __sk_buff *skb,
+ struct bpf_action_batch **__batch)
+{
+ uint32_t index = ovs_cb_get_action_index(skb);
+ struct bpf_action *action = NULL;
+ struct bpf_action_batch *batch;
+ int zero_index = 0;
+
+ if (index >= BPF_DP_MAX_ACTION) {
+ printt("ERR max ebpf action hit\n");
+ return NULL;
+ }
+
+ if (skb->cb[OVS_CB_DOWNCALL_EXE]) {
+ /* Downcall packet has a dedicated action list */
+ batch = bpf_map_lookup_elem(&execute_actions, &zero_index);
+ } else {
+ struct bpf_flow_key *exe_flow_key;
+
+ exe_flow_key = bpf_map_lookup_elem(&percpu_executing_key,
+ &zero_index);
+ if (!exe_flow_key) {
+ printt("empty percpu_executing_key\n");
+ return NULL;
+ }
+
+#if ENABLE_POINTER_LOOKUP
+ /*
+ * kernel 4.18-rc1, commit:
+ * bpf: allow map helpers access to map values directly
+ */
+ batch = bpf_map_lookup_elem(&flow_table, exe_flow_key);
+#else
+ struct bpf_flow_key flow_key = *exe_flow_key;
+ batch = bpf_map_lookup_elem(&flow_table, &flow_key);
+#endif
+ }
+ if (!batch) {
+ printt("no batch action found\n");
+ return NULL;
+ }
+
+ *__batch = batch;
+ action = &((batch)->actions[index]);
+ return action;
+}
+
+/*
+ * After processing the action, tail call the next.
+ */
+static inline int post_tail_action(struct __sk_buff *skb,
+ struct bpf_action_batch *batch)
+{
+ struct bpf_action *next_action;
+ uint32_t index;
+
+ if (!batch)
+ return TC_ACT_SHOT;
+
+ index = skb->cb[OVS_CB_ACT_IDX] + 1;
+ skb->cb[OVS_CB_ACT_IDX] = index;
+
+ if (index >= BPF_DP_MAX_ACTION)
+ goto finish;
+
+ next_action = &batch->actions[index];
+ if (next_action->type == 0)
+ goto finish;
+
+ printt("next action type = %d\n", next_action->type);
+ bpf_tail_call(skb, &tailcalls, next_action->type);
+
+ printt("[BUG] tail call missing\n");
+ return TC_ACT_SHOT;
+
+finish:
+ if (skb->cb[OVS_CB_DOWNCALL_EXE]) {
+ int index = 0;
+ bpf_map_delete_elem(&execute_actions, &index);
+ }
+ return TC_ACT_STOLEN;
+}
+
+/*
+ * Use this action to indicate end of action list
+ * BPF program: tail-0
+ */
+__section_tail(OVS_ACTION_ATTR_UNSPEC)
+static int tail_action_unspec(struct __sk_buff *skb)
+{
+ int index OVS_UNUSED = ovs_cb_get_action_index(skb);
+
+ printt("action index = %d, end of processing\n", index);
+
+ /* Handle actions=drop, we return SHOT so the device's dropped stats
+ will be incremented (see sch_handle_ingress).
+
+ If there are more actions, ex: actions=a1,a2,drop, this is
+ handled in post_tail_actions and return STOLEN
+ */
+ return TC_ACT_SHOT;
+}
+
+/*
+ * BPF program: tail-1
+ */
+__section_tail(OVS_ACTION_ATTR_OUTPUT)
+static int tail_action_output(struct __sk_buff *skb)
+{
+ int ret __attribute__((__unused__));
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+ int flags;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ /* Internal dev is tap type and hooked only to bpf egress filter.
+ When output to an internal device, a packet is clone-redirected to
+ this device's ingress so that this packet is processed by kernel stack.
+ Why? Since if the packet is sent to its egress, it is delivered to the
+ tap device's socket, not kernel.
+ */
+ flags = action->u.out.flags & OVS_BPF_FLAGS_TX_STACK ? BPF_F_INGRESS : 0;
+ printt("output action port = %d ingress? %d\n",
+ action->u.out.port, (flags));
+
+ bpf_clone_redirect(skb, action->u.out.port, flags);
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements OVS userspace
+ * BPF program: tail-2
+ */
+__section_tail(OVS_ACTION_ATTR_USERSPACE)
+static int tail_action_userspace(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ /* XXX If move this declaration to top, the stack will overflow. */
+ struct bpf_upcall md = {
+ .type = OVS_UPCALL_ACTION,
+ .skb_len = skb->len,
+ .ifindex = skb->ifindex,
+ };
+
+ if (action->u.userspace.nlattr_len > sizeof(md.uactions)) {
+ printt("userspace action is too large\n");
+ return TC_ACT_SHOT;
+ }
+
+ memcpy(md.uactions, action->u.userspace.nlattr_data, sizeof(md.uactions));
+ md.uactions_len = action->u.userspace.nlattr_len;
+
+ struct ebpf_headers_t *hdrs = bpf_get_headers();
+ if (!hdrs) {
+ printt("headers is NULL\n");
+ return TC_ACT_SHOT;
+ }
+
+ memcpy(&md.key.headers, hdrs, sizeof(*hdrs));
+
+ uint64_t flags = skb->len;
+ flags <<= 32;
+ flags |= BPF_F_CURRENT_CPU;
+ int err = skb_event_output(skb, &upcalls, flags, &md, sizeof md);
+
+ if (err) {
+ printt("skb_event_output of userspace action: %d", err);
+ return TC_ACT_SHOT;
+ }
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements BPF tunnel
+ * BPF program: tail-3
+ */
+__section_tail(OVS_ACTION_ATTR_SET)
+static int tail_action_tunnel_set(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+ uint64_t flags;
+
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+ struct ovs_action_set_tunnel *tunnel;
+ int key_attr;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ /* SET for tunnel */
+ if (action->is_set_tunnel) {
+ tunnel = &action->u.tunnel;
+
+ /* hard-coded now, should fetch it from action->u */
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.tunnel_id = tunnel->tunnel_id;
+ key.tunnel_tos = tunnel->tunnel_tos;
+ key.tunnel_ttl = tunnel->tunnel_ttl;
+
+ printt("tunnel_id = %x\n", key.tunnel_id);
+
+ /* TODO: handle BPF_F_DONT_FRAGMENT and BPF_F_SEQ_NUMBER */
+ flags = BPF_F_ZERO_CSUM_TX;
+ if (!tunnel->use_ipv6) {
+ key.remote_ipv4 = tunnel->remote_ipv4;
+ flags &= ~BPF_F_TUNINFO_IPV6;
+ } else {
+ memcpy(&key.remote_ipv4, &tunnel->remote_ipv4, 16);
+ flags |= BPF_F_TUNINFO_IPV6;
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), flags);
+ if (ret < 0)
+ printt("ERR setting tunnel key\n");
+
+ if (tunnel->gnvopt_valid) {
+ ret = bpf_skb_set_tunnel_opt(skb, &tunnel->gnvopt,
+ sizeof tunnel->gnvopt);
+ if (ret < 0)
+ printt("ERR setting tunnel opt\n");
+ }
+
+ return post_tail_action(skb, batch);
+ }
+
+ /* SET for packet fields */
+ key_attr = action->u.mset.key_type;
+
+ switch (key_attr) {
+ case OVS_KEY_ATTR_ETHERNET: {
+ u8 *data = (u8 *)(long)skb->data;
+ u8 *data_end = (u8 *)(long)skb->data_end;
+ struct ethhdr *eth;
+ struct ovs_key_ethernet *ether;
+ int i;
+
+ /* packet data */
+ eth = (struct ethhdr *)data;
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_SHOT;
+
+ /* value from map */
+ ether = &action->u.mset.key.ether;
+ for (i = 0; i < 6; i++) {
+ printt("mac dest[%d]: %x -> %x\n",
+ i, eth->h_dest[i], ether->eth_dst.ea[i]);
+ eth->h_dest[i] = ether->eth_dst.ea[i];
+ }
+ for (i = 0; i < 6; i++) {
+ printt("mac src[%d]: %x -> %x\n",
+ i, eth->h_dest[i], ether->eth_dst.ea[i]);
+ eth->h_source[i] = ether->eth_src.ea[i];
+ }
+ break;
+ }
+ case OVS_KEY_ATTR_UNSPEC:
+ case OVS_KEY_ATTR_TUNNEL:
+ default:
+ printt("ERR: Un-implemented key attr %d in set action\n", key_attr);
+ return TC_ACT_SHOT;
+ }
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements VLAN push
+ * BPF program: tail-4
+ */
+__section_tail(OVS_ACTION_ATTR_PUSH_VLAN)
+static int tail_action_push_vlan(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ printt("push vlan\n");
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("vlan push tci %d\n", bpf_ntohs(action->u.push_vlan.vlan_tci));
+ printt("vlan push tpid %d\n", bpf_ntohs(action->u.push_vlan.vlan_tpid));
+
+ vlan_push(skb, action->u.push_vlan.vlan_tpid,
+ bpf_ntohs(action->u.push_vlan.vlan_tci) & VLAN_VID_MASK);
+ //bpf_ntohs(action->u.push_vlan.vlan_tci) & (u16)~VLAN_TAG_PRESENT);
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements VLAN pop
+ * BPF program: tail-5
+ */
+__section_tail(OVS_ACTION_ATTR_POP_VLAN)
+static int tail_action_pop_vlan(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("vlan pop %d\n");
+ bpf_skb_vlan_pop(skb);
+
+ /* FIXME: invalidate_flow_key()? */
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements sample
+ * BPF program: tail-6
+ */
+__section_tail(OVS_ACTION_ATTR_SAMPLE)
+static int tail_action_sample(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: Sample action not implemented,\
+ do you want to do it? \n");
+
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements recirculation
+ * BPF program: tail-7
+ */
+__section_tail(OVS_ACTION_ATTR_RECIRC)
+static int tail_action_recirc(struct __sk_buff *skb)
+{
+ u32 recirc_id = 0;
+ struct bpf_action *action;
+ struct bpf_action_batch *batch ;
+ struct ebpf_metadata_t *ebpf_md;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ /* recirc should be the last action.
+ * level does not handle */
+
+ /* don't check the is_flow_key_valid(),
+ * now always re-parsing the header.
+ */
+ recirc_id = action->u.recirc_id;
+ printt("recirc id = %d\n", recirc_id);
+
+ /* update metadata */
+ ebpf_md = bpf_get_mds();
+ if (!ebpf_md) {
+ printt("lookup metadata failed\n");
+ return TC_ACT_SHOT;
+ }
+ ebpf_md->md.recirc_id = recirc_id;
+
+ skb->cb[OVS_CB_ACT_IDX] = 0;
+ skb->cb[OVS_CB_DOWNCALL_EXE] = 0;
+
+ /* FIXME: recirc should not call this. */
+ bpf_tail_call(skb, &tailcalls, MATCH_ACTION_CALL);
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implement hash
+ * BPF program: tail-8
+ */
+__section_tail(OVS_ACTION_ATTR_HASH)
+static int tail_action_hash(struct __sk_buff *skb)
+{
+ u32 hash = 0;
+ int index = 0;
+ struct ebpf_metadata_t *ebpf_md;
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("skb->hash before = %x\n", skb->hash);
+ hash = bpf_get_hash_recalc(skb);
+ printt("skb->hash = %x hash \n", skb->hash);
+ if (!hash)
+ hash = 0x1;
+
+ ebpf_md = bpf_map_lookup_elem(&percpu_metadata, &index);
+ if (!ebpf_md) {
+ printt("LOOKUP metadata failed\n");
+ return TC_ACT_SHOT;
+ }
+ printt("save hash to ebpf_md->md.dp_hash\n");
+ ebpf_md->md.dp_hash = hash; /* or create a ovs_flow_hash?*/
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements MPLS push
+ * BPF program: tail-9
+ */
+__section_tail(OVS_ACTION_ATTR_PUSH_MPLS)
+static int tail_action_mpls_push(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: Push MPLS action not implemented,\
+ do you want to do it? \n");
+
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements MPLS pop
+ * BPF program: tail-10
+ */
+__section_tail(OVS_ACTION_ATTR_POP_MPLS)
+static int tail_action_mpls_pop(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: Pop MPLS action not implemented,\
+ do you want to do it? \n");
+
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements set packet's fields, mask not supported.
+ * Many other fields not implemented yet.
+ * BPF program: tail-11
+ * TODO: hit verifier limit here, maybe create more program and
+ * more tail call.
+ */
+__section_tail(OVS_ACTION_ATTR_SET_MASKED)
+static int tail_action_set_masked(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+ int key_attr;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ key_attr = action->u.mset.key_type;
+
+ switch (key_attr) {
+ case OVS_KEY_ATTR_ETHERNET: {
+ u8 *data = (u8 *)(long)skb->data;
+ u8 *data_end = (u8 *)(long)skb->data_end;
+ struct ethhdr *eth;
+ struct ovs_key_ethernet *ether;
+ int i;
+
+ /* packet data */
+ eth = (struct ethhdr *)data;
+ if (data + sizeof(*eth) > data_end)
+ return TC_ACT_SHOT;
+
+ /* value from map */
+ ether = &action->u.mset.key.ether;
+ for (i = 0; i < 6; i++) {
+ printt("mac dest[%d]: %x -> %x\n",
+ i, eth->h_dest[i], ether->eth_dst.ea[i]);
+ eth->h_dest[i] = ether->eth_dst.ea[i];
+ }
+ for (i = 0; i < 6; i++) {
+ printt("mac src[%d]: %x -> %x\n",
+ i, eth->h_dest[i], ether->eth_dst.ea[i]);
+ eth->h_source[i] = ether->eth_src.ea[i];
+ }
+ break;
+ }
+ case OVS_KEY_ATTR_IPV4: {
+ u8 *data = (u8 *)(long)skb->data;
+ u8 *data_end = (u8 *)(long)skb->data_end;
+ struct iphdr *nh;
+ struct ovs_key_ipv4 *ipv4;
+
+ /* packet data */
+ nh = ALIGNED_CAST(struct iphdr *, data + sizeof(struct ethhdr));
+ if ((u8 *)nh + sizeof(struct iphdr) + 12 > data_end) {
+ return TC_ACT_SHOT;
+ }
+
+ /* value from map */
+ ipv4 = &action->u.mset.key.ipv4;
+ /* set ipv4_proto is not supported, see
+ * datapath/actions.c
+ */
+ set_ip_tos(skb, ipv4->ipv4_tos);
+ set_ip_ttl(skb, ipv4->ipv4_ttl);
+
+#if ENABLE_POINTER_LOOKUP
+ set_ip_src(skb, ipv4->ipv4_src);
+ set_ip_dst(skb, ipv4->ipv4_dst);
+#endif
+
+ printt("set_masked ipv4 done\n");
+ /* XXX ignore frag */
+
+ break;
+ }
+ case OVS_KEY_ATTR_UNSPEC:
+ case OVS_KEY_ATTR_ENCAP:
+ case OVS_KEY_ATTR_PRIORITY: /* u32 skb->priority */
+ case OVS_KEY_ATTR_IN_PORT: /* u32 OVS dp port number */
+ case OVS_KEY_ATTR_VLAN: /* be16 VLAN TCI */
+ case OVS_KEY_ATTR_ETHERTYPE: /* be16 Ethernet type */
+ case OVS_KEY_ATTR_IPV6: /* struct ovs_key_ipv6 */
+ case OVS_KEY_ATTR_TCP: /* struct ovs_key_tcp */
+ case OVS_KEY_ATTR_UDP: /* struct ovs_key_udp */
+ case OVS_KEY_ATTR_ICMP: /* struct ovs_key_icmp */
+ case OVS_KEY_ATTR_ICMPV6: /* struct ovs_key_icmpv6 */
+ case OVS_KEY_ATTR_ARP: /* struct ovs_key_arp */
+ case OVS_KEY_ATTR_ND: /* struct ovs_key_nd */
+ case OVS_KEY_ATTR_SKB_MARK: /* u32 skb mark */
+ case OVS_KEY_ATTR_TUNNEL: /* Nested set of ovs_tunnel attributes */
+ case OVS_KEY_ATTR_SCTP: /* struct ovs_key_sctp */
+ case OVS_KEY_ATTR_TCP_FLAGS: /* be16 TCP flags. */
+ case OVS_KEY_ATTR_DP_HASH: /* u32 hash value. Value 0 indicates the hash */
+ case OVS_KEY_ATTR_RECIRC_ID: /* u32 recirc id */
+ case OVS_KEY_ATTR_MPLS: /* array of struct ovs_key_mpls. */
+ case OVS_KEY_ATTR_CT_STATE: /* u32 bitmask of OVS_CS_F_* */
+ case OVS_KEY_ATTR_CT_ZONE: /* u16 connection tracking zone. */
+ case OVS_KEY_ATTR_CT_MARK: /* u32 connection tracking mark */
+ case OVS_KEY_ATTR_CT_LABELS: /* 16-octet connection tracking labels */
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4: /* struct ovs_key_ct_tuple_ipv4 */
+ case OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6: /* struct ovs_key_ct_tuple_ipv6 */
+ case OVS_KEY_ATTR_NSH: /* Nested set of ovs_nsh_key_* */
+#ifdef __KERNEL__
+ case OVS_KEY_ATTR_TUNNEL_INFO: /* struct ovs_tunnel_info */
+#endif
+#ifndef __KERNEL__
+ case OVS_KEY_ATTR_PACKET_TYPE: /* be32 packet type */
+#endif
+ case __OVS_KEY_ATTR_MAX:
+ default:
+ printt("ERR Un-implemented key attr %d in set_masked\n", key_attr);
+ return TC_ACT_SHOT;
+ }
+
+ return post_tail_action(skb, batch);
+}
+
+/*
+ * This action implements connection tracking
+ * BPF program: tail-12
+ */
+__section_tail(OVS_ACTION_ATTR_CT)
+static int tail_action_ct(struct __sk_buff *skb OVS_UNUSED)
+{
+ printt("ERR: CT (connection tracking) not implemented,\
+ do you want to do it? \n");
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This action implements packet truncate
+ * BPF program: tail-13
+ */
+__section_tail(OVS_ACTION_ATTR_TRUNC)
+static int tail_action_trunc(struct __sk_buff *skb)
+{
+ struct bpf_action *action;
+ struct bpf_action_batch *batch;
+
+ action = pre_tail_action(skb, &batch);
+ if (!action)
+ return TC_ACT_SHOT;
+
+ printt("len before: %d\n", skb->len);
+ printt("truncate to %d\n", action->u.trunc.max_len);
+
+ /* The helper will resize the skb to the given new size */
+ bpf_skb_change_tail(skb, action->u.trunc.max_len, 0);
+
+ printt("len after: %d\n", skb->len);
+ return post_tail_action(skb, batch);
+}
new file mode 100644
@@ -0,0 +1,279 @@
+#ifndef __BPF_API__
+#define __BPF_API__
+
+/* Note:
+ *
+ * This file can be included into eBPF kernel programs. It contains
+ * a couple of useful helper functions, map/section ABI (bpf_elf.h),
+ * misc macros and some eBPF specific LLVM built-ins.
+ */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#define UNSPEC_CALL 0
+#define OUTPUT_CALL 1
+#define PARSER_CALL 32
+#define MATCH_ACTION_CALL 33
+#define DEPARSER_CALL 34
+#define UPCALL_CALL 35
+
+#ifndef TC_ACT_OK
+#define TC_ACT_OK 0
+#define TC_ACT_RECLASSIFY 1
+#define TC_ACT_SHOT 2
+#define TC_ACT_PIPE 3
+#define TC_ACT_STOLEN 4
+#define TC_ACT_QUEUED 5
+#define TC_ACT_REPEAT 6
+#define TC_ACT_REDIRECT 7
+#endif
+
+/** Misc macros. */
+
+#ifndef __stringify
+# define __stringify(X) #X
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((__unused__))
+#endif
+
+#ifndef htons
+# define htons(X) __constant_htons((X))
+#endif
+
+#ifndef ntohs
+# define ntohs(X) __constant_ntohs((X))
+#endif
+
+#ifndef htonl
+# define htonl(X) __constant_htonl((X))
+#endif
+
+#ifndef ntohl
+# define ntohl(X) __constant_ntohl((X))
+#endif
+
+#ifndef __inline__
+# define __inline__ __attribute__((always_inline))
+#endif
+
+#ifndef __section
+# define __section(NAME) \
+ __attribute__((section(NAME), used))
+#endif
+
+#ifndef __section_tail
+# define __section_tail(KEY) \
+ __section("tail-" __stringify(KEY))
+#endif
+
+#ifndef __section_license
+# define __section_license \
+ __section(ELF_SECTION_LICENSE)
+#endif
+
+#ifndef __section_maps
+# define __section_maps \
+ __section(ELF_SECTION_MAPS)
+#endif
+
+#ifndef BPF_LICENSE
+# define BPF_LICENSE(NAME) \
+ char ____license[] __section_license = NAME
+#endif
+
+#ifndef __BPF_MAP
+# define __BPF_MAP(NAME, TYPE, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ struct bpf_map_def __section_maps NAME = { \
+ .type = (TYPE), \
+ .key_size = (SIZE_KEY), \
+ .value_size = (SIZE_VALUE), \
+ .max_entries = (MAX_ELEM), \
+ .map_flags = 0, \
+ }
+#endif
+
+#ifndef BPF_HASH
+# define BPF_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_HASH, ID, SIZE_KEY, SIZE_VALUE, \
+ PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PERCPU_HASH
+# define BPF_PERCPU_HASH(NAME, ID, SIZE_KEY, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PERCPU_HASH, ID, SIZE_KEY, SIZE_VALUE, \
+ PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_ARRAY
+# define BPF_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_ARRAY, ID, sizeof(uint32_t), \
+ SIZE_VALUE, PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PERCPU_ARRAY
+# define BPF_PERCPU_ARRAY(NAME, ID, SIZE_VALUE, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PERCPU_ARRAY, ID, sizeof(uint32_t), \
+ SIZE_VALUE, PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PROG_ARRAY
+# define BPF_PROG_ARRAY(NAME, ID, PIN, MAX_ELEM) \
+ __BPF_MAP(NAME, BPF_MAP_TYPE_PROG_ARRAY, ID, sizeof(uint32_t), \
+ sizeof(uint32_t), PIN, MAX_ELEM)
+#endif
+
+#ifndef BPF_PERF_OUTPUT
+# define BPF_PERF_OUTPUT(name, pin) \
+ __BPF_MAP(name, BPF_MAP_TYPE_PERF_EVENT_ARRAY, 0, sizeof(uint32_t), \
+ sizeof(uint32_t), pin, __NR_CPUS__)
+#endif
+
+/** Classifier helper */
+
+#ifndef BPF_H_DEFAULT
+# define BPF_H_DEFAULT -1
+#endif
+
+/** BPF helper functions for tc. Individual flags are in linux/bpf.h */
+
+#ifndef BPF_FUNC
+# define BPF_FUNC(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused = (void *) BPF_FUNC_##NAME
+#endif
+
+#ifndef BPF_FUNC2
+# define BPF_FUNC2(NAME, ...) \
+ (* NAME)(__VA_ARGS__) __maybe_unused
+#endif
+
+/* Map access/manipulation */
+static void *BPF_FUNC(map_lookup_elem, void *map, const void *key);
+static int BPF_FUNC(map_update_elem, void *map, const void *key,
+ const void *value, uint32_t flags);
+static int BPF_FUNC(map_delete_elem, void *map, const void *key);
+
+/* Time access */
+static uint64_t BPF_FUNC(ktime_get_ns, void);
+
+/* Debugging */
+
+/* FIXME: __attribute__ ((format(printf, 1, 3))) not possible unless
+ * llvm bug https://llvm.org/bugs/show_bug.cgi?id=26243 gets resolved.
+ * It would require ____fmt to be made const, which generates a reloc
+ * entry (non-map).
+ */
+static void BPF_FUNC(trace_printk, const char *fmt, int fmt_size, ...);
+
+#ifndef printt
+# ifdef DEBUG_BPF_OFF
+# define printt(fmt, ...)
+# else
+# define printt(fmt, ...) \
+ ({ \
+ char ____fmt[] = fmt; \
+ trace_printk(____fmt, sizeof(____fmt), ##__VA_ARGS__); \
+ })
+# endif
+#endif
+
+/* Random numbers */
+static uint32_t BPF_FUNC(get_prandom_u32, void);
+
+/* Tail calls */
+static void BPF_FUNC(tail_call, struct __sk_buff *skb, void *map,
+ uint32_t index);
+
+/* System helpers */
+static uint32_t BPF_FUNC(get_smp_processor_id, void);
+
+/* Packet misc meta data */
+static uint32_t BPF_FUNC(get_hash_recalc, struct __sk_buff *skb);
+
+static int BPF_FUNC(skb_under_cgroup, void *map, uint32_t index);
+
+/* Packet redirection */
+static int BPF_FUNC(redirect, int ifindex, uint32_t flags);
+static int BPF_FUNC(clone_redirect, struct __sk_buff *skb, int ifindex,
+ uint32_t flags);
+
+/* Packet manipulation */
+static int BPF_FUNC(skb_load_bytes, struct __sk_buff *skb, uint32_t off,
+ void *to, uint32_t len);
+static int BPF_FUNC(skb_store_bytes, struct __sk_buff *skb, uint32_t off,
+ const void *from, uint32_t len, uint32_t flags);
+
+static int BPF_FUNC(l3_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(l4_csum_replace, struct __sk_buff *skb, uint32_t off,
+ uint32_t from, uint32_t to, uint32_t flags);
+static int BPF_FUNC(csum_diff, void *from, uint32_t from_size, void *to,
+ uint32_t to_size, uint32_t seed);
+
+static int BPF_FUNC(skb_change_type, struct __sk_buff *skb, uint32_t type);
+static int BPF_FUNC(skb_change_proto, struct __sk_buff *skb, uint32_t proto,
+ uint32_t flags);
+static int BPF_FUNC(skb_change_tail, struct __sk_buff *skb, uint32_t nlen,
+ uint32_t flags);
+
+/* Packet vlan encap/decap */
+static int BPF_FUNC(skb_vlan_push, struct __sk_buff *skb, uint16_t proto,
+ uint16_t vlan_tci);
+static int BPF_FUNC(skb_vlan_pop, struct __sk_buff *skb);
+
+/* Packet tunnel encap/decap */
+static int BPF_FUNC(skb_get_tunnel_key, struct __sk_buff *skb,
+ struct bpf_tunnel_key *to, uint32_t size, uint32_t flags);
+static int BPF_FUNC(skb_set_tunnel_key, struct __sk_buff *skb,
+ const struct bpf_tunnel_key *from, uint32_t size,
+ uint32_t flags);
+
+static int BPF_FUNC(skb_get_tunnel_opt, struct __sk_buff *skb,
+ void *to, uint32_t size);
+static int BPF_FUNC(skb_set_tunnel_opt, struct __sk_buff *skb,
+ const void *from, uint32_t size);
+
+/* Events for user space */
+static int BPF_FUNC2(skb_event_output, struct __sk_buff *skb, void *map, uint64_t index,
+ const void *data, uint32_t size) = (void *)BPF_FUNC_perf_event_output;
+
+/** LLVM built-ins, mem*() routines work for constant size */
+
+#ifndef lock_xadd
+# define lock_xadd(ptr, val) ((void) __sync_fetch_and_add(ptr, val))
+#endif
+
+#ifndef memset
+# define memset(s, c, n) __builtin_memset((s), (c), (n))
+#endif
+
+#ifndef memcpy
+# define memcpy(d, s, n) __builtin_memcpy((d), (s), (n))
+#endif
+
+#ifndef memmove
+# define memmove(d, s, n) __builtin_memmove((d), (s), (n))
+#endif
+
+/* FIXME: __builtin_memcmp() is not yet fully useable unless llvm bug
+ * https://llvm.org/bugs/show_bug.cgi?id=26218 gets resolved. Also
+ * this one would generate a reloc entry (non-map), otherwise.
+ */
+#if 0
+#ifndef memcmp
+# define memcmp(a, b, n) __builtin_memcmp((a), (b), (n))
+#endif
+#endif
+
+unsigned long long load_byte(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.byte");
+
+unsigned long long load_half(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.half");
+
+unsigned long long load_word(void *skb, unsigned long long off)
+ asm ("llvm.bpf.load.word");
+
+#endif /* __BPF_API__ */
new file mode 100644
@@ -0,0 +1,60 @@
+bpf_sources = bpf/datapath.c
+bpf_headers = \
+ bpf/api.h \
+ bpf/datapath.h \
+ bpf/odp-bpf.h \
+ bpf/ovs-p4.h \
+ bpf/helpers.h \
+ bpf/openvswitch.h \
+ bpf/maps.h \
+ bpf/parser.h \
+ bpf/lookup.h \
+ bpf/action.h \
+ bpf/generated_headers.h \
+ bpf/xdp.h
+bpf_extra = \
+ bpf/ovs-proto.p4
+
+# Regardless of configuration with GCC, we must compile the BPF with clang
+# since GCC doesn't have a BPF backend. Clang dones't support these flags,
+# so we filter them out.
+
+bpf_FILTER_FLAGS := $(filter-out -Wbool-compare, $(AM_CFLAGS))
+bpf_FILTER_FLAGS2 := $(filter-out -Wduplicated-cond, $(bpf_FILTER_FLAGS))
+bpf_FILTER_FLAGS3 := $(filter-out --coverage, $(bpf_FILTER_FLAGS2))
+bpf_CFLAGS := $(bpf_FILTER_FLAGS3)
+bpf_CFLAGS += -D__NR_CPUS__=$(shell nproc) -O2 -Wall -Werror -emit-llvm
+bpf_CFLAGS += -I$(top_builddir)/include -I$(top_srcdir)/include
+bpf_CFLAGS += -Wno-error=pointer-arith # Allow skb->data arithmetic
+bpf_CFLAGS += -I${IPROUTE2_SRC_PATH}/include/uapi/
+# FIXME:
+#bpf_CFLAGS += -D__KERNEL__
+
+dist_sources = $(bpf_sources)
+dist_headers = $(bpf_headers)
+build_sources = $(dist_sources)
+build_headers = $(dist_headers)
+build_objects = $(patsubst %.c,%.o,$(build_sources))
+
+LLC ?= llc-3.8
+CLANG ?= clang-3.8
+
+bpf: $(build_objects)
+bpf/datapath.o: $(bpf_sources) $(bpf_headers)
+ $(MKDIR_P) $(dir $@)
+ @which $(CLANG) >/dev/null 2>&1 || \
+ (echo "Unable to find clang, Install clang (>=3.7) package"; exit 1)
+ $(AM_V_CC) $(CLANG) $(bpf_CFLAGS) -c $< -o - | \
+ $(LLC) -march=bpf -filetype=obj -o $@
+
+bpf/datapath_dbg.o: $(bpf_sources) $(bpf_headers)
+ @which clang-4.0 > /dev/null 2>&1 || \
+ (echo "Unable to find clang-4.0 for debugging"; exit 1)
+ clang-4.0 $(bpf_CFLAGS) -g -c $< -o -| llc-4.0 -march=bpf -filetype=obj -o $@_dbg
+ llvm-objdump-4.0 -S -no-show-raw-insn $@_dbg > $@_dbg.objdump
+
+EXTRA_DIST += $(dist_sources) $(dist_headers) $(bpf_extra)
+if HAVE_BPF
+dist_bpf_DATA += $(build_objects)
+endif
+
new file mode 100644
@@ -0,0 +1,192 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <iproute2/bpf_elf.h>
+
+#include "api.h"
+#include "odp-bpf.h"
+#include "datapath.h"
+
+/*
+ * Instead of having multiple BPF object files,
+ * include all headers and generate single datapath.o
+ */
+#include "maps.h"
+#include "parser.h"
+#include "lookup.h"
+#include "action.h"
+#include "xdp.h"
+
+/* We don't rely on specific versions of the kernel; however libbpf requires
+ * this to be both specified and non-zero. */
+static const __maybe_unused __section("version") uint32_t version = 0x1;
+
+static inline void __maybe_unused
+bpf_debug(struct __sk_buff *skb, enum ovs_dbg_subtype subtype, int error)
+{
+ uint64_t cpu = get_smp_processor_id();
+ uint64_t flags = skb->len;
+ struct bpf_upcall md = {
+ .type = OVS_UPCALL_DEBUG,
+ .subtype = subtype,
+ .ifindex = skb->ingress_ifindex,
+ .cpu = cpu,
+ .skb_len = skb->len,
+ .error = error
+ };
+
+ flags <<= 32;
+ flags |= BPF_F_CURRENT_CPU;
+
+ skb_event_output(skb, &upcalls, flags, &md, sizeof(md));
+}
+
+/*
+ * This program forwards the packet to userspace, using the
+ * perf_event_output helper function.
+ * BPF program: tail-35
+ */
+__section_tail(UPCALL_CALL)
+static inline int process_upcall(struct __sk_buff *skb)
+{
+ struct bpf_upcall md = {
+ .type = OVS_UPCALL_MISS,
+ .skb_len = skb->len,
+ //.ifindex = ovs_cb_get_ifindex(skb),
+ };
+ int stat, err;
+ struct ebpf_headers_t *hdrs = bpf_get_headers();
+ struct ebpf_metadata_t *mds = bpf_get_mds();
+
+ if (!hdrs || !mds) {
+ printt("headers/mds is NULL\n");
+ return TC_ACT_OK;
+ }
+
+ md.ifindex = mds->md.in_port;
+
+ memcpy(&md.key.headers, hdrs, sizeof(struct ebpf_headers_t));
+ memcpy(&md.key.mds, mds, sizeof(struct ebpf_metadata_t));
+
+ if (hdrs->valid & VLAN_VALID) {
+ printt("upcall skb->len(%d) with vlan %x %x\n",
+ skb->len, hdrs->vlan.etherType, hdrs->vlan.tci);
+
+ /* Here we push the vlan to the packet data so
+ * the upcall function 'extract_key' can get vlan info.
+ * Is this the same as kernel dp?
+ */
+ skb_vlan_push(skb, hdrs->vlan.etherType,
+ hdrs->vlan.tci & ~VLAN_TAG_PRESENT);
+ md.skb_len = skb->len;
+ }
+
+ uint64_t flags = skb->len;
+ flags <<= 32;
+ flags |= BPF_F_CURRENT_CPU;
+
+ err = skb_event_output(skb, &upcalls, flags, &md, sizeof(md));
+ stat = !err ? OVS_DP_STATS_MISSED
+ : err == -ENOSPC ? OVS_DP_STATS_LOST
+ : OVS_DP_STATS_ERRORS;
+ stats_account(stat);
+ return TC_ACT_OK;
+}
+
+/*
+ * This is the ENTRY POINT for packet seen at ingress queue
+ */
+__section("ingress")
+static int to_stack(struct __sk_buff *skb)
+{
+ printt("\n\ningress from %d (%d)\n", skb->ingress_ifindex, skb->ifindex);
+
+ ovs_cb_init(skb, true);
+ bpf_tail_call(skb, &tailcalls, PARSER_CALL);
+
+ printt("ERR: tail call fail in ingress\n");
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This is the ENTRY POINT for packet seen at egress queue
+ */
+__section("egress")
+static int from_stack(struct __sk_buff *skb)
+{
+ printt("\n\negress from %d (%d)\n", skb->ingress_ifindex, skb->ifindex);
+
+ ovs_cb_init(skb, false);
+ bpf_tail_call(skb, &tailcalls, PARSER_CALL);
+
+ printt("ERR: tail call fail in egress\n");
+ return TC_ACT_SHOT;
+}
+
+/*
+ * This is the ENTRY POINT for downcall packet
+ */
+__section("downcall")
+static int execute(struct __sk_buff *skb)
+{
+ struct bpf_downcall md;
+ u32 ebpf_zero = 0;
+ int flags, ofs;
+
+ ofs = skb->len - sizeof(md);
+ skb_load_bytes(skb, ofs, &md, sizeof(md));
+ flags = md.flags & OVS_BPF_FLAGS_TX_STACK ? BPF_F_INGRESS : 0;
+
+ printt("downcall (%d) from %d flags %d\n", md.type,
+ md.ifindex, flags);
+
+ bpf_map_update_elem(&percpu_metadata, &ebpf_zero, &md.md, BPF_ANY);
+
+ skb_change_tail(skb, ofs, 0);
+
+ switch (md.type) {
+ case OVS_BPF_DOWNCALL_EXECUTE: {
+ struct bpf_action_batch *action_batch;
+
+ action_batch = bpf_map_lookup_elem(&execute_actions, &ebpf_zero);
+ if (action_batch) {
+ printt("get valid action_batch\n");
+ skb->cb[OVS_CB_DOWNCALL_EXE] = 1;
+ bpf_tail_call(skb, &tailcalls, action_batch->actions[0].type);
+ } else {
+ printt("get null action_batch\n");
+ }
+ break;
+ }
+ case OVS_BPF_DOWNCALL_OUTPUT: {
+ /* Skip writing the BPF metadata in parser */
+ skb->cb[OVS_CB_ACT_IDX] = -1;
+ /* Redirect to the device this packet came from, so it's as though the
+ * packet was freshly received. This should execute PARSER_CALL. */
+ return redirect(md.ifindex, flags);
+ }
+ default:
+ printt("Unknown downcall type %d\n", md.type);
+ break;
+ }
+ return 0;
+}
+
+BPF_LICENSE("GPL");
new file mode 100644
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "odp-bpf.h"
+
+#define SKB_CB_U32S 5 /* According to linux/bpf.h. */
+
+enum ovs_cb_idx {
+ OVS_CB_ACT_IDX, /* Next action to process in action batch. */
+ OVS_CB_INGRESS, /* 0 = egress; nonzero = ingress. */
+ OVS_CB_DOWNCALL_EXE, /* 0 = match/execute, 1 = downcall/execute. */
+};
+
+static void
+ovs_cb_init(struct __sk_buff *skb, bool ingress)
+{
+ for (int i = 0; i < SKB_CB_U32S; i++)
+ skb->cb[i] = 0;
+ skb->cb[OVS_CB_INGRESS] = ingress;
+}
+
+static bool
+ovs_cb_is_initial_parse(struct __sk_buff *skb) {
+ int index = skb->cb[OVS_CB_ACT_IDX];
+
+ if (index != 0) {
+ printt("recirc, don't update metadata, index %d\n", index);
+ }
+ return index == 0;
+}
+
+static uint32_t
+ovs_cb_get_action_index(struct __sk_buff *skb)
+{
+ return skb->cb[OVS_CB_ACT_IDX];
+}
+
+static uint32_t OVS_UNUSED
+ovs_cb_get_ifindex(struct __sk_buff *skb)
+{
+ uint32_t ifindex;
+
+ if (!skb)
+ return 0;
+
+ /* This workaround the compiler optimization issue */
+ if (skb->cb[OVS_CB_INGRESS]) {
+ __asm__ __volatile__("": : :"memory");
+ return skb->ingress_ifindex;
+ }
+
+ ifindex = skb->ifindex;
+ __asm__ __volatile__("": : :"memory");
+
+ return ifindex;
+}
new file mode 100644
@@ -0,0 +1,182 @@
+#ifndef P4_GENERATED_HEADERS
+#define P4_GENERATED_HEADERS
+
+/* We sometimes disable IPV6 to work
+ * around 512-Byte BPF stack limit
+ */
+#define BPF_ENABLE_IPV6
+
+#ifndef BPF_TYPES
+#define BPF_TYPES
+typedef signed char s8;
+typedef unsigned char u8;
+typedef signed short s16;
+typedef unsigned short u16;
+typedef signed int s32;
+typedef unsigned int u32;
+typedef signed long long s64;
+typedef unsigned long long u64;
+#endif
+
+/*TODO: OVS only need addr and label */
+struct ipv6_t {
+ u8 version; /* 4 bits */
+ u8 trafficClass; /* 8 bits */
+ u32 flowLabel; /* 20 bits */
+ u16 payloadLen; /* 16 bits */
+ u8 nextHdr; /* 8 bits */
+ u8 hopLimit; /* 8 bits */
+ char srcAddr[16]; /* 128 bits */
+ char dstAddr[16]; /* 128 bits */
+};
+struct pkt_metadata_t {
+ u32 recirc_id; /* 32 bits */
+ u32 dp_hash; /* 32 bits */
+ u32 skb_priority; /* 32 bits */
+ u32 pkt_mark; /* 32 bits */
+ u16 ct_state; /* 16 bits */
+ u16 ct_zone; /* 16 bits */
+ u32 ct_mark; /* 32 bits */
+ char ct_label[16]; /* 128 bits */
+ u32 in_port; /* 32 bits ifindex */
+};
+struct udp_t {
+ u16 srcPort; /* 16 bits */
+ u16 dstPort; /* 16 bits */
+};
+struct arp_rarp_t {
+ ovs_be16 ar_hrd; /* format of hardware address */
+ ovs_be16 ar_pro; /* format of protocol address */
+ unsigned char ar_hln; /* length of hardware address */
+ unsigned char ar_pln; /* length of protocol address */
+ ovs_be16 ar_op; /* ARP opcode (command) */
+
+ /* Ethernet+IPv4 specific members. */
+ unsigned char ar_sha[6]; /* sender hardware address */
+ unsigned char ar_sip[4]; /* sender IP address: be32 */
+ unsigned char ar_tha[6]; /* target hardware address */
+ unsigned char ar_tip[4]; /* target IP address: be32 */
+} __attribute__((packed));
+struct icmp_t {
+ u8 type;
+ u8 code;
+};
+struct icmpv6_t {
+ u8 type;
+ u8 code;
+ u16 csum;
+ union {
+ uint32_t data32[1]; /* type-specific field */
+ uint16_t data16[2]; /* type-specific field */
+ uint8_t data8[4]; /* type-specific field */
+ } dataun;
+};
+struct ipv4_t {
+ u8 ttl; /* 8 bits */
+ u8 protocol; /* 8 bits */
+ u8 tos; /* 8 bits */
+ ovs_be32 srcAddr; /* 32 bits */
+ ovs_be32 dstAddr; /* 32 bits */
+};
+struct gnv_opt {
+ ovs_be16 opt_class;
+ uint8_t type;
+ uint8_t length:5;
+ uint8_t r3:1;
+ uint8_t r2:1;
+ uint8_t r1:1;
+ uint8_t opt_data[4]; /* hard-coded to 4 byte */
+};
+struct flow_tnl_t {
+ union {
+ struct {
+ u32 ip_dst; /* 32 bits */ // BPF uses host byte-order
+ u32 ip_src; /* 32 bits */
+ } ip4;
+#ifdef BPF_ENABLE_IPV6
+ struct {
+ char ipv6_dst[16]; /* 128 bits */
+ char ipv6_src[16]; /* 128 bits */
+ } ip6;
+#endif
+ };
+ u32 tun_id; /* 32 bits */
+ u16 flags; /* 16 bits */
+ u8 ip_tos; /* 8 bits */
+ u8 ip_ttl; /* 8 bits */
+ ovs_be16 tp_src; /* 16 bits */
+ ovs_be16 tp_dst; /* 16 bits */
+ u16 gbp_id; /* 16 bits */
+ u8 gbp_flags; /* 8 bits */
+ u8 use_ipv6: 4,
+ gnvopt_valid: 4;
+ struct gnv_opt gnvopt;
+ char pad1[0]; /* 40 bits */
+};
+
+/* ovs key only needs ports and flags */
+struct tcp_t {
+ ovs_be16 srcPort; /* 16 bits */
+ ovs_be16 dstPort; /* 16 bits */
+ ovs_be16 flags; /* 8 bits */
+};
+
+struct ethernet_t {
+ char dstAddr[6]; /* 48 bits */
+ char srcAddr[6]; /* 48 bits */
+ ovs_be16 etherType; /* 16 bits */
+};
+
+struct vlan_tag_t {
+ union {
+ u16 pcp:3,
+ cfi:1,
+ vid:12;
+ u16 tci; /* host byte order */
+ };
+ ovs_be16 etherType; /* network byte order */
+};
+
+struct mpls_t {
+ ovs_be32 top_lse; /* top label stack entry */
+};
+
+enum proto_valid {
+ ETHER_VALID = 1 << 0,
+ MPLS_VALID = 1 << 1,
+ IPV4_VALID = 1 << 2,
+ IPV6_VALID = 1 << 3,
+ ARP_VALID = 1 << 4,
+ TCP_VALID = 1 << 5,
+ UDP_VALID = 1 << 6,
+ ICMP_VALID = 1 << 7,
+ VLAN_VALID = 1 << 8,
+ CVLAN_VALID = 1 << 9,
+ ICMPV6_VALID = 1 << 10,
+};
+
+struct ebpf_headers_t {
+ u32 valid;
+ struct ethernet_t ethernet;
+ struct mpls_t mpls;
+ union {
+ struct ipv4_t ipv4;
+#ifdef BPF_ENABLE_IPV6
+ struct ipv6_t ipv6;
+#endif
+ struct arp_rarp_t arp;
+ };
+ union {
+ struct tcp_t tcp;
+ struct udp_t udp;
+ struct icmp_t icmp;
+ struct icmpv6_t icmpv6;
+ };
+ struct vlan_tag_t vlan;
+ struct vlan_tag_t cvlan;
+};
+struct ebpf_metadata_t {
+ struct pkt_metadata_t md;
+ struct flow_tnl_t tnl_md;
+};
+#endif
new file mode 100644
@@ -0,0 +1,248 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef __OVSBPF_HELPERS_H
+#define __OVSBPF_HELPERS_H
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/bpf.h>
+
+/* Additional headers */
+# define printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+#define ERR_EXIT() \
+ ({printk("[ERROR] \n"); return TC_ACT_OK;})
+
+#define NOT_HERE() \
+ ({printk("[ERROR] Program should not reach here\n");})
+
+#ifndef BPF_TYPES
+#define BPF_TYPES
+typedef signed char s8;
+typedef unsigned char u8;
+typedef signed short s16;
+typedef unsigned short u16;
+typedef signed int s32;
+typedef unsigned int u32;
+typedef signed long long s64;
+typedef unsigned long long u64;
+#endif
+
+#define ___constant_swab16(x) ((__u16)( \
+ (((__u16)(x) & (__u16)0x00ffU) << 8) | \
+ (((__u16)(x) & (__u16)0xff00U) >> 8)))
+
+#define ___constant_swab32(x) ((__u32)( \
+ (((__u32)(x) & (__u32)0x000000ffUL) << 24) | \
+ (((__u32)(x) & (__u32)0x0000ff00UL) << 8) | \
+ (((__u32)(x) & (__u32)0x00ff0000UL) >> 8) | \
+ (((__u32)(x) & (__u32)0xff000000UL) >> 24)))
+
+#define ___constant_swab64(x) ((__u64)( \
+ (((__u64)(x) & (__u64)0x00000000000000ffULL) << 56) | \
+ (((__u64)(x) & (__u64)0x000000000000ff00ULL) << 40) | \
+ (((__u64)(x) & (__u64)0x0000000000ff0000ULL) << 24) | \
+ (((__u64)(x) & (__u64)0x00000000ff000000ULL) << 8) | \
+ (((__u64)(x) & (__u64)0x000000ff00000000ULL) >> 8) | \
+ (((__u64)(x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
+ (((__u64)(x) & (__u64)0x00ff000000000000ULL) >> 40) | \
+ (((__u64)(x) & (__u64)0xff00000000000000ULL) >> 56)))
+
+#define __constant_htonl(x) (___constant_swab32((x)))
+#define __constant_ntohl(x) (___constant_swab32(x))
+#define __constant_htons(x) (___constant_swab16((x)))
+#define __constant_ntohs(x) ___constant_swab16((x))
+
+static u16 OVS_UNUSED bpf_ntohs(ovs_be16 x) {
+ return __constant_ntohs((OVS_FORCE u16)x);
+}
+
+static ovs_be16 bpf_htons(u16 x) {
+ return (OVS_FORCE ovs_be16)__constant_htons(x);
+}
+
+static u32 OVS_UNUSED bpf_ntohl(ovs_be32 x) {
+ return __constant_ntohl((OVS_FORCE u32)x);
+}
+
+static ovs_be32 bpf_htonl(u32 x) {
+ return (OVS_FORCE ovs_be32)__constant_htonl(x);
+}
+
+static u64 OVS_UNUSED bpf_ntohll(ovs_be64 x) {
+ return ___constant_swab64((OVS_FORCE u64)x);
+}
+
+static ovs_be64 bpf_htonll(u64 x) {
+ return (OVS_FORCE ovs_be64)___constant_swab64(x);
+}
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+ (void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+ (void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
+static int (*bpf_perf_event_read)(void *map, int index) =
+ (void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+ unsigned long long flags, void *data,
+ int size) =
+ (void *) BPF_FUNC_perf_event_output;
+static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
+ (void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+ (void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+ (void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+ (void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_skb_vlan_push)(void *ctx, int vlan_proto, int vlan_tci) =
+ (void *) BPF_FUNC_skb_vlan_push;
+static int (*bpf_skb_vlan_pop)(void *ctx) =
+ (void *) BPF_FUNC_skb_vlan_pop;
+static int (*bpf_skb_change_tail)(void *ctx, int len, int flags) =
+ (void *) BPF_FUNC_skb_change_tail;
+static int (*bpf_get_hash_recalc)(void *ctx) =
+ (void *) BPF_FUNC_get_hash_recalc;
+
+static int OVS_UNUSED vlan_push(void *ctx, ovs_be16 proto, u16 tci)
+{
+ return bpf_skb_vlan_push(ctx, (OVS_FORCE int)proto, tci);
+}
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+ unsigned int id;
+ unsigned int pinning;
+};
+
+/* used in TC */
+/*
+struct bpf_elf_map {
+ __u32 type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ __u32 id;
+ __u32 pinning;
+};
+*/
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+ (void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+ (void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+ (void *) BPF_FUNC_skb_change_head;
+
+static int l3_csum_replace4(void *ctx, int off, ovs_be32 from, ovs_be32 to)
+{
+ return bpf_l3_csum_replace(ctx, off, (OVS_FORCE int)from, (OVS_FORCE int)to, 4);
+}
+
+static int OVS_UNUSED l3_csum_replace2(void *ctx, int off, ovs_be16 from, ovs_be16 to)
+{
+ return bpf_l3_csum_replace(ctx, off, (OVS_FORCE int)from, (OVS_FORCE int)to, 2);
+}
+
+#if defined(__x86_64__)
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+#endif
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), \
+ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
new file mode 100644
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+#include <openvswitch/compiler.h>
+#include "ovs-p4.h"
+#include "api.h"
+#include "helpers.h"
+#include "maps.h"
+
+/* eBPF executes actions by tailcall because eBPF doesn't support for-loop and
+ * unroll produces oversized code.
+ *
+ * Each action handler uses current packet's key to look for the next action.
+ * However, the key can be changed by some actions like hash, so a stable
+ * key is kept in an eBPF map named percpu_executing_key. In action handler,
+ * firstly, the stable key is got from percpu_executing_key, then it is used
+ * to look up the actions being executed. skb->cb[OVS_CB_ACT_IDX] points to
+ * next action.
+ */
+static inline void ovs_execute_actions(struct __sk_buff *skb,
+ struct bpf_action *action)
+{
+ enum ovs_action_attr type;
+ type = action->type;
+
+ printt("action type %d\n", type);
+
+ /* note: this isn't a for loop, tail call won't return. */
+ switch (type) {
+ case OVS_ACTION_ATTR_UNSPEC:
+ printt("end of action processing\n");
+ break;
+ case OVS_ACTION_ATTR_OUTPUT:
+ printt("output action port = %d\n", action->u.out.port);
+ break;
+ case OVS_ACTION_ATTR_USERSPACE:
+ printt("userspace action, len = %d, ifindex = %d upcall back\n",
+ action->u.userspace.nlattr_len, ovs_cb_get_ifindex(skb));
+ break;
+ case OVS_ACTION_ATTR_SET:
+ printt("set action, is_set_tunnel = %d\n",
+ action->is_set_tunnel);
+ break;
+ case OVS_ACTION_ATTR_PUSH_VLAN:
+ printt("vlan push tci %d\n", action->u.push_vlan.vlan_tci);
+ break;
+ case OVS_ACTION_ATTR_POP_VLAN:
+ printt("vlan pop\n");
+ break;
+ case OVS_ACTION_ATTR_RECIRC:
+ printt("recirc\n");
+ break;
+ case OVS_ACTION_ATTR_HASH:
+ printt("hash\n");
+ break;
+ case OVS_ACTION_ATTR_SET_MASKED:
+ printt("set masked\n");
+ break;
+ case OVS_ACTION_ATTR_CT:
+ printt("ct\n");
+ break;
+ case OVS_ACTION_ATTR_TRUNC:
+ printt("truncate\n");
+ break;
+ case OVS_ACTION_ATTR_SAMPLE: /* Nested case OVS_SAMPLE_ATTR_*. */
+ case OVS_ACTION_ATTR_PUSH_MPLS: /* struct ovs_action_push_mpls. */
+ case OVS_ACTION_ATTR_POP_MPLS: /* __be16 ethertype. */
+ case OVS_ACTION_ATTR_PUSH_ETH: /* struct ovs_action_push_eth. */
+ case OVS_ACTION_ATTR_POP_ETH: /* No argument. */
+ case OVS_ACTION_ATTR_CT_CLEAR: /* No argument. */
+ case OVS_ACTION_ATTR_PUSH_NSH: /* Nested case OVS_NSH_KEY_ATTR_*. */
+ case OVS_ACTION_ATTR_POP_NSH: /* No argument. */
+#ifndef __KERNEL__
+ case OVS_ACTION_ATTR_TUNNEL_PUSH: /* struct ovs_action_push_tnl*/
+ case OVS_ACTION_ATTR_TUNNEL_POP: /* u32 port number. */
+ case OVS_ACTION_ATTR_CLONE: /* Nested case OVS_CLONE_ATTR_*. */
+ case OVS_ACTION_ATTR_METER: /* u32 meter number. */
+#endif
+ case __OVS_ACTION_ATTR_MAX:
+#ifdef __KERNEL__
+ case OVS_ACTION_ATTR_SET_TO_MASKED: /* Kernel module internal masked
+ * set action converted from
+ * case OVS_ACTION_ATTR_SET. */
+#endif
+ default:
+ printt("ERR: action type %d not supportedn", type);
+ break;
+ }
+
+ bpf_tail_call(skb, &tailcalls, type);
+
+ /* OVS_NOT_REACHED */
+ return;
+}
+
+static inline void
+stats_account(enum ovs_bpf_dp_stats index)
+{
+ uint32_t stat = 1;
+ uint64_t *value;
+
+ value = map_lookup_elem(&datapath_stats, &index);
+ if (value) {
+ __sync_fetch_and_add(value, stat);
+ }
+}
+
+/* OVS revalidator thread reads each entry in eBPF map
+ * (flow_table and dp_flow_stats), reports to OpenFlow
+ * table statistics, and decide to remove/keep the entry
+ * by comparing its timestamp.
+ */
+static inline void
+flow_stats_account(struct ebpf_headers_t *headers,
+ struct ebpf_metadata_t *mds,
+ size_t bytes)
+{
+ struct bpf_flow_key flow_key;
+ struct bpf_flow_stats *flow_stats;
+
+ flow_key.headers = *headers;
+ flow_key.mds = *mds;
+
+ flow_stats = bpf_map_lookup_elem(&dp_flow_stats, &flow_key);
+ if (!flow_stats) {
+ struct bpf_flow_stats s = {0, 0, 0};
+ int err;
+
+ printt("flow not found in flow stats, first install\n");
+ s.packet_count = 1;
+ s.byte_count = bytes;
+ s.used = bpf_ktime_get_ns() / (1000*1000); /* msec */
+ err = bpf_map_update_elem(&dp_flow_stats, &flow_key, &s, BPF_ANY);
+ if (err) {
+ return;
+ }
+ } else {
+ flow_stats->packet_count += 1;
+ flow_stats->byte_count += bytes;
+ flow_stats->used = bpf_ktime_get_ns() / (1000*1000); /* msec */
+ printt("current: packets %d count %d ts %d\n",
+ flow_stats->packet_count, flow_stats->byte_count, flow_stats->used);
+ }
+
+ return;
+}
+
+static inline struct bpf_action_batch *
+ovs_lookup_flow(struct ebpf_headers_t *headers,
+ struct ebpf_metadata_t *mds)
+{
+ struct bpf_flow_key flow_key;
+
+ flow_key.headers = *headers;
+ flow_key.mds = *mds;
+
+ return bpf_map_lookup_elem(&flow_table, &flow_key);
+}
+
+__section_tail(MATCH_ACTION_CALL)
+static int lookup(struct __sk_buff* skb OVS_UNUSED)
+{
+ struct bpf_action_batch *action_batch;
+ struct ebpf_headers_t *headers;
+ struct ebpf_metadata_t *mds;
+
+ headers = bpf_get_headers();
+ if (!headers) {
+ printt("no packet header found\n");
+ ERR_EXIT();
+ }
+
+ mds = bpf_get_mds();
+ if (!mds) {
+ printt("no packet metadata found\n");
+ ERR_EXIT();
+ }
+
+ /* LOOKUP */
+ action_batch = ovs_lookup_flow(headers, mds);
+ if (!action_batch) {
+ printt("no action found, upcall to userspace\n");
+ bpf_tail_call(skb, &tailcalls, UPCALL_CALL);
+
+ /* OVS_NOT_REACHED */
+ return TC_ACT_OK;
+ } else {
+ printt("action found! stay in BPF\n");
+ /* DP Stats Update */
+ stats_account(OVS_DP_STATS_HIT);
+ /* Flow Stats Update */
+ flow_stats_account(headers, mds, skb->len);
+ }
+
+ /* Hit verifier limit when moving declaration up. */
+ struct bpf_flow_key flow_key;
+ flow_key.headers = *headers;
+ flow_key.mds = *mds;
+ int index = 0;
+ int error = bpf_map_update_elem(&percpu_executing_key, &index,
+ &flow_key, BPF_ANY);
+ if (error) {
+ printt("update percpu_executing_key failed: %d\n", error);
+ return TC_ACT_OK;
+ }
+
+ /* the subsequent actions will be tail called. */
+ ovs_execute_actions(skb, &action_batch->actions[0]);
+
+ printt("ERROR: tail call fails\n");
+
+ /* OVS_NOT_REACHED */
+ return TC_ACT_OK;
+}
new file mode 100644
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+#ifndef BPFMAP_OPENVSWITCH_H
+#define BPFMAP_OPENVSWITCH_H 1
+
+#include "api.h"
+#include "openvswitch.h"
+#include "ovs-p4.h"
+
+/* ovs-vswitchd as a writer will update these maps.
+ * bpf datapath as reader lookup and processes */
+
+/* FIXME: copy from iproute2 */
+enum {
+ BPF_MAP_ID_PROTO,
+ BPF_MAP_ID_QUEUE,
+ BPF_MAP_ID_DROPS,
+ BPF_MAP_ID_ACTION,
+ BPF_MAP_ID_INGRESS,
+ __BPF_MAP_ID_MAX,
+#define BPF_MAP_ID_MAX __BPF_MAP_ID_MAX
+};
+
+/* A bpf flow key is extracted from the
+ * parser.h and saved in
+ * 1) percpu_headers, and
+ * 2) percpu_metadata
+ * Access: BPF is the only writer/reader
+ */
+BPF_PERCPU_ARRAY(percpu_headers,
+ 0,
+ sizeof(struct ebpf_headers_t),
+ 0,
+ 1
+);
+BPF_PERCPU_ARRAY(percpu_metadata,
+ 0,
+ sizeof(struct ebpf_metadata_t),
+ 0,
+ 1
+);
+
+/* BPF flow tale
+ * Access: BPF is the reader for lookup,
+ * ovs-vswitchd is the writer
+ */
+BPF_HASH(flow_table,
+ 0,
+ sizeof(struct bpf_flow_key),
+ sizeof(struct bpf_action_batch),
+ 0,
+ 256
+);
+
+/* BPF flow stats table
+ * Access: BPF is the writer for updating,
+ * ovs-vswitchd/revalidator is the reader
+ */
+BPF_HASH(dp_flow_stats,
+ 0,
+ sizeof(struct bpf_flow_key),
+ sizeof(struct bpf_flow_stats),
+ 0,
+ 256
+);
+
+/*
+ * Map for implementing the upcall, which forwards the
+ * first packet (lookup misses) to ovs-vswitchd
+ */
+BPF_PERF_OUTPUT(upcalls, 0);
+
+
+/* BPF datapath stats
+ * Access: BPF is the writer,
+ * ovs-vswitchd is the reader
+ * XXX: switch to percpu to improve performance
+ */
+BPF_ARRAY(datapath_stats,
+ 0,
+ sizeof(uint64_t),
+ 0,
+ __OVS_DP_STATS_MAX
+);
+
+/* Global tail call map:
+ * index 0-31 for actions (OVS_ACTION_ATTR_*)
+ * index 32-63 for others
+ */
+BPF_PROG_ARRAY(tailcalls,
+ 0,
+ 0,
+ 64
+);
+
+/* A dedicated action list for downcall packet.
+ * Access: ovs-vswitch is the writer,
+ * BPF is the reader
+ */
+BPF_ARRAY(execute_actions,
+ 0,
+ sizeof(struct bpf_action_batch),
+ 0,
+ 1
+);
+
+/* A dedicated key for downcall packet.
+ * Access: ovs-vswitch is the writer,
+ * BPF is the reader
+ */
+BPF_PERCPU_ARRAY(percpu_executing_key,
+ 0,
+ sizeof(struct bpf_flow_key),
+ 0,
+ 1
+);
+
+struct ebpf_headers_t;
+struct ebpf_metadata_t;
+
+static inline struct ebpf_headers_t *bpf_get_headers()
+{
+ int ebpf_zero = 0;
+ return bpf_map_lookup_elem(&percpu_headers, &ebpf_zero);
+}
+
+static inline struct ebpf_metadata_t *bpf_get_mds()
+{
+ int ebpf_zero = 0;
+ return bpf_map_lookup_elem(&percpu_metadata, &ebpf_zero);
+}
+
+#endif /* BPFMAP_OPENVSWITCH_H */
new file mode 100644
@@ -0,0 +1,255 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+#ifndef BPF_OPENVSWITCH_H
+#define BPF_OPENVSWITCH_H 1
+
+#include "odp-netlink.h"
+#include "generated_headers.h"
+
+enum ovs_upcall_cmd {
+ OVS_UPCALL_UNSPEC = OVS_PACKET_CMD_UNSPEC,
+
+ /* Kernel-to-user notifications. */
+ OVS_UPCALL_MISS = OVS_PACKET_CMD_MISS,
+ OVS_UPCALL_ACTION = OVS_PACKET_CMD_ACTION,
+
+ /* Userspace commands. */
+ OVS_UPCALL_EXECUTE = OVS_PACKET_CMD_EXECUTE,
+
+ OVS_UPCALL_DEBUG,
+};
+
+enum ovs_dbg_subtype {
+ OVS_DBG_ST_UNSPEC,
+ OVS_DBG_ST_REDIRECT,
+ __OVS_DBG_ST_MAX,
+};
+#define OVS_DBG_ST_MAX (__OVS_DBG_ST_MAX - 1)
+
+static const char *bpf_upcall_subtypes[] OVS_UNUSED = {
+ [OVS_DBG_ST_UNSPEC] = "Unspecified",
+ [OVS_DBG_ST_REDIRECT] = "Downcall redirect",
+};
+
+/* Used with 'datapath_stats' map. */
+enum ovs_bpf_dp_stats {
+ OVS_DP_STATS_UNSPEC,
+ OVS_DP_STATS_HIT,
+ OVS_DP_STATS_MISSED,
+ OVS_DP_STATS_LOST,
+ OVS_DP_STATS_FLOWS,
+ OVS_DP_STATS_MASK_HIT,
+ OVS_DP_STATS_MASKS,
+ OVS_DP_STATS_ERRORS,
+ __OVS_DP_STATS_MAX,
+};
+#define OVS_DP_STATS_MAX (__OVS_DP_STATS_MAX - 1)
+
+struct bpf_flow {
+ uint64_t value; /* XXX */
+};
+
+struct bpf_flow_stats {
+ uint64_t packet_count; /* Number of packets matched. */
+ uint64_t byte_count; /* Number of bytes matched. */
+ uint64_t used; /* Last used time (in jiffies). */
+ //spinlock_t lock; /* Lock for atomic stats update. */
+ //__be16 tcp_flags; /* Union of seen TCP flags. */
+};
+
+struct bpf_flow_key {
+ struct ebpf_headers_t headers;
+ struct ebpf_metadata_t mds;
+};
+
+struct bpf_upcall {
+ uint8_t type;
+ uint8_t subtype;
+ uint32_t ifindex; /* Incoming device */
+ uint32_t cpu;
+ uint32_t error;
+ uint32_t skb_len;
+#ifdef BPF_ENABLE_IPV6
+ uint8_t uactions[24]; /* Contains 'struct nlattr' */
+#else
+ uint8_t uactions[64];
+#endif
+ uint32_t uactions_len;
+ struct bpf_flow_key key;
+ /* Followed by 'skb_len' of packet data. */
+};
+
+#define OVS_BPF_FLAGS_TX_STACK (1 << 0)
+
+#define OVS_BPF_DOWNCALL_UNSPEC 0
+#define OVS_BPF_DOWNCALL_OUTPUT 1
+#define OVS_BPF_DOWNCALL_EXECUTE 2
+
+struct bpf_downcall {
+ uint32_t type;
+ uint32_t ifindex;
+ uint32_t debug;
+ uint32_t flags;
+ struct ebpf_metadata_t md;
+ /* Followed by packet data. */
+};
+
+#define ETH_ALEN 6
+
+#define OVS_ACTION_ATTR_UNSPEC 0
+#define OVS_ACTION_ATTR_OUTPUT 1
+#define OVS_ACTION_ATTR_USERSPACE 2
+#define OVS_ACTION_ATTR_SET 3
+#define OVS_ACTION_ATTR_PUSH_VLAN 4
+#define OVS_ACTION_ATTR_POP_VLAN 5
+#define OVS_ACTION_ATTR_SAMPLE 6
+#define OVS_ACTION_ATTR_RECIRC 7
+#define OVS_ACTION_ATTR_HASH 8
+#define OVS_ACTION_ATTR_PUSH_MPLS 9
+#define OVS_ACTION_ATTR_POP_MPLS 10
+#define OVS_ACTION_ATTR_SET_MASKED 11
+#define OVS_ACTION_ATTR_CT 12
+#define OVS_ACTION_ATTR_TRUNC 13
+#define OVS_ACTION_ATTR_PUSH_ETH 14
+#define OVS_ACTION_ATTR_POP_ETH 15
+
+#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
+#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
+#define VLAN_TAG_PRESENT VLAN_CFI_MASK
+
+struct flow_key {
+ __be32 src;
+ __be32 dst;
+ union {
+ __be32 ports;
+ __be16 port16[2];
+ };
+ __u32 ip_proto;
+};
+
+struct ovs_action_set_tunnel {
+ /* light weight tunnel key */
+ __u32 tunnel_id; /* tunnel id is host byte order */
+ union {
+ __u32 remote_ipv4; /* host byte order */
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext;
+ __u32 tunnel_label;
+ struct gnv_opt gnvopt;
+ __u8 gnvopt_valid;
+ __u8 use_ipv6;
+};
+
+struct ovs_action_set_masked {
+ enum ovs_key_attr key_type;
+ union {
+ struct ovs_key_ethernet ether;
+ struct ovs_key_mpls mpls;
+ struct ovs_key_ipv4 ipv4;
+ struct ovs_key_ipv6 ipv6;
+ struct ovs_key_tcp tcp;
+ struct ovs_key_udp udp;
+ struct ovs_key_sctp sctp;
+ struct ovs_key_icmp icmp;
+ struct ovs_key_icmpv6 icmpv6;
+ struct ovs_key_arp arp;
+ } key;
+#if 0
+ /* BPF datapath does not support mask */
+ union {
+ struct ovs_key_ethernet ether;
+ struct ovs_key_mpls mpls;
+ struct ovs_key_ipv4 ipv4;
+ struct ovs_key_ipv6 ipv6;
+ struct ovs_key_tcp tcp;
+ struct ovs_key_udp udp;
+ struct ovs_key_sctp sctp;
+ struct ovs_key_icmp icmp;
+ struct ovs_key_icmpv6 icmpv6;
+ struct ovs_key_arp arp;
+ } mask;
+#endif
+};
+
+struct ovs_action_output {
+ uint32_t port;
+ uint32_t flags;
+};
+
+struct ovs_action_ct {
+ int commit;
+ /* XXX: Include everything in enum ovs_ct_attr. */
+};
+
+struct ovs_action_userspace {
+ __u16 nlattr_len;
+ __u8 nlattr_data[64];
+};
+
+struct bpf_action {
+ enum ovs_action_attr type; /* action type */
+ uint32_t is_set_tunnel; /* to distinguish between SET (tunnel) and SET_MASKED (fields) */
+ union {
+ struct ovs_action_output out; /* OVS_ACTION_ATTR_OUTPUT: 8B */
+ struct ovs_action_trunc trunc; /* OVS_ACTION_ATTR_TRUNC: 4B */
+ struct ovs_action_hash hash; /* OVS_ACTION_ATTR_HASH: 8B */
+ struct ovs_action_push_mpls mpls; /* OVS_ACTION_ATTR_PUSH_MPLS: 6B */
+ ovs_be16 ethertype; /* OVS_ACTION_ATTR_POP_MPLS: 2B */
+ struct ovs_action_push_vlan push_vlan; /* OVS_ACTION_ATTR_PUSH_VLAN: 4B */
+ /* OVS_ACTION_ATTR_POP_VLAN: 0B */
+ uint32_t recirc_id; /* OVS_ACTION_ATTR_RECIRC: 4B */
+ struct ovs_action_set_tunnel tunnel;
+ struct ovs_action_set_masked mset; /* OVS_ACTION_ATTR_SET_MASK: */
+ struct ovs_action_ct ct; /* OVS_ACTION_ATTR_CT: */
+ struct ovs_action_userspace userspace; /* OVS_ACTION_ATTR_USERSPACE: */
+
+ uint64_t aligned[16]; // make it 128 byte
+ } u;
+};
+
+#define BPF_DP_MAX_ACTION 32
+struct bpf_action_batch {
+ struct bpf_action actions[BPF_DP_MAX_ACTION];
+};
+
+#endif /* BPF_OPENVSWITCH_H */
new file mode 100644
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+#ifndef __BPF_OPENVSWITCH__
+#define __BPF_OPENVSWITCH__
+#include <stdint.h>
+#include "odp-netlink.h"
+
+#ifndef BPFNL_OPENVSWITCH_H
+#define BPFNL_OPENVSWITCH_H 1
+#endif /* BPFNL_OPENVSWITCH_H */
+
+#endif /* __BPF_OPENVSWITCH__ */
new file mode 100644
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef BPFP4_OPENVSWITCH_H
+#define BPFP4_OPENVSWITCH_H 1
+
+#include "helpers.h"
+#include "generated_headers.h"
+/*
+ * From BCC src/cc/export/helpers.h
+ */
+#define MASK(_n) ((_n) < 64 ? (1ull << (_n)) - 1 : ((u64)-1LL))
+#define MASK128(_n) ((_n) < 128 ? ((unsigned __int128)1 << (_n)) - 1 : ((unsigned __int128)-1))
+
+static inline u64 load_dword(void *skb, u64 off) {
+ return ((u64)load_word(skb, off) << 32) | load_word(skb, off + 4);
+}
+static inline __attribute__((always_inline))
+void bpf_dins_pkt(void *pkt, u64 off, u64 bofs, u64 bsz, u64 val) {
+ // The load_xxx function does a bswap before returning the short/word/dword,
+ // so the value in register will always be host endian. However, the bytes
+ // written back need to be in network order.
+ if (bofs == 0 && bsz == 8) {
+ bpf_skb_store_bytes(pkt, off, &val, 1, 0);
+ } else if (bofs + bsz <= 8) {
+ u8 v = load_byte(pkt, off);
+ v &= ~(MASK(bsz) << (8 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (8 - (bofs + bsz)));
+ bpf_skb_store_bytes(pkt, off, &v, 1, 0);
+ } else if (bofs == 0 && bsz == 16) {
+ u16 v = bpf_htons(val);
+ bpf_skb_store_bytes(pkt, off, &v, 2, 0);
+ } else if (bofs + bsz <= 16) {
+ u16 v = load_half(pkt, off);
+ v &= ~(MASK(bsz) << (16 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (16 - (bofs + bsz)));
+ v = bpf_htons(v);
+ bpf_skb_store_bytes(pkt, off, &v, 2, 0);
+ } else if (bofs == 0 && bsz == 32) {
+ u32 v = bpf_htonl(val);
+ bpf_skb_store_bytes(pkt, off, &v, 4, 0);
+ } else if (bofs + bsz <= 32) {
+ u32 v = load_word(pkt, off);
+ v &= ~(MASK(bsz) << (32 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (32 - (bofs + bsz)));
+ v = bpf_htonl(v);
+ bpf_skb_store_bytes(pkt, off, &v, 4, 0);
+ } else if (bofs == 0 && bsz == 64) {
+ u64 v = bpf_htonll(val);
+ bpf_skb_store_bytes(pkt, off, &v, 8, 0);
+ } else if (bofs + bsz <= 64) {
+ u64 v = load_dword(pkt, off);
+ v &= ~(MASK(bsz) << (64 - (bofs + bsz)));
+ v |= ((val & MASK(bsz)) << (64 - (bofs + bsz)));
+ v = bpf_htonll(v);
+ bpf_skb_store_bytes(pkt, off, &v, 8, 0);
+ }
+}
+
+enum ErrorCode {
+ p4_pe_no_error,
+ p4_pe_index_out_of_bounds,
+ p4_pe_out_of_packet,
+ p4_pe_header_too_long,
+ p4_pe_header_too_short,
+ p4_pe_unhandled_select,
+ p4_pe_checksum,
+ p4_pe_too_many_encap,
+ p4_pe_ipv6_disabled,
+};
+
+#define EBPF_MASK(t, w) ((((t)(1)) << (w)) - (t)1)
+#define BYTES(w) ((w + 7) / 8)
+
+#endif
new file mode 100644
@@ -0,0 +1,329 @@
+/*
+ * Copyright (c) 2016 Nicira, Inc.
+ *
+ * This file is offered under your choice of two licenses: Apache 2.0 or GNU
+ * GPL 2.0 or later. The permission statements for each of these licenses is
+ * given below. You may license your modifications to this file under either
+ * of these licenses or both. If you wish to license your modifications under
+ * only one of these licenses, delete the permission text for the other
+ * license.
+ *
+ * ----------------------------------------------------------------------
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ----------------------------------------------------------------------
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ * ----------------------------------------------------------------------
+ */
+
+/* OVS P4 1.0 protocol file
+ * use bcc to generate eBPF C file
+ * see bcc project: https://github.com/iovisor/bcc.git
+ * under ~/bcc/src/cc/frontends/p4/test/
+ */
+#define ETH_P_8021Q 0x8100 /* 802.1Q VLAN Extended Header */
+#define ETH_P_8021AD 0x88A8 /* 802.1ad Service VLAN */
+#define ETH_P_ARP 0x0806
+#define ETH_P_IPV4 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define IPPROTO_ICMP 1
+#define IPPROTO_IGMP 2
+#define IPPROTO_TCP 6
+#define IPPROTO_UDP 17
+#define IPPROTO_GRE 47
+#define IPPROTO_SCTP 132
+
+header_type ethernet_t {
+ fields {
+ dstAddr : 48;
+ srcAddr : 48;
+ etherType : 16;
+ }
+}
+
+header_type vlan_tag_t {
+ fields {
+ pcp : 3;
+ cfi : 1;
+ vid : 12;
+ etherType : 16;
+ }
+}
+
+header_type mpls_t {
+ fields {
+ label : 20;
+ exp : 3;
+ bos : 1;
+ ttl : 8;
+ }
+}
+
+header_type arp_rarp_t {
+ fields {
+ hwType : 16;
+ protoType : 16;
+ hwAddrLen : 8;
+ protoAddrLen : 8;
+ opcode : 16;
+ }
+}
+
+header_type arp_rarp_ipv4_t {
+ fields {
+ srcHwAddr : 48;
+ srcProtoAddr : 32;
+ dstHwAddr : 48;
+ dstProtoAddr : 32;
+ }
+}
+
+header_type ipv4_t {
+ fields {
+ version : 4;
+ ihl : 4;
+ diffserv : 8;
+ totalLen : 16;
+ identification : 16;
+ flags : 3;
+ fragOffset : 13;
+ ttl : 8;
+ protocol : 8;
+ hdrChecksum : 16;
+ srcAddr : 32;
+ dstAddr: 32;
+ }
+}
+
+header_type ipv6_t {
+ fields {
+ version : 4;
+ trafficClass : 8;
+ flowLabel : 20;
+ payloadLen : 16;
+ nextHdr : 8;
+ hopLimit : 8;
+ srcAddr : 128;
+ dstAddr : 128;
+ }
+}
+
+header_type icmp_t {
+ fields {
+ typeCode : 16;
+ hdrChecksum : 16;
+ }
+}
+
+header_type tcp_t {
+ fields {
+ srcPort : 16;
+ dstPort : 16;
+ seqNo : 32;
+ ackNo : 32;
+ dataOffset : 4;
+ res : 4;
+ flags : 8;
+ window : 16;
+ checksum : 16;
+ urgentPtr : 16;
+ }
+}
+
+header_type udp_t {
+ fields {
+ srcPort : 16;
+ dstPort : 16;
+ length_ : 16;
+ checksum : 16;
+ }
+}
+
+header_type sctp_t {
+ fields {
+ srcPort : 16;
+ dstPort : 16;
+ verifTag : 32;
+ checksum : 32;
+ }
+}
+
+header_type gre_t {
+ fields {
+ C : 1;
+ R : 1;
+ K : 1;
+ S : 1;
+ s : 1;
+ recurse : 3;
+ flags : 5;
+ ver : 3;
+ proto : 16;
+ }
+}
+
+/* ----------------- metadata ---------------- */
+header_type pkt_metadata_t {
+ fields {
+ recirc_id : 32; /* Recirculation id carried with the
+ recirculating packets. 0 for packets
+ received from the wire. */
+ dp_hash : 32; /* hash value computed by the recirculation
+ action. */
+ skb_priority : 32; /* Packet priority for QoS. */
+ pkt_mark : 32; /* Packet mark. */
+ ct_state : 16; /* Connection state. */
+ ct_zone : 16; /* Connection zone. */
+ ct_mark : 32; /* Connection mark. */
+ ct_label : 128; /* Connection label. */
+ in_port : 32; /* Input port. */
+ }
+}
+
+header_type flow_tnl_t {
+ fields {
+ /* struct flow_tnl:
+ * Tunnel information used in flow key and metadata.
+ */
+ ip_dst : 32;
+ ipv6_dst : 64;
+ ip_src: 32;
+ ipv6_src : 64;
+ tun_id : 64;
+ flags : 16;
+ ip_tos : 8;
+ ip_ttl : 8;
+ tp_src : 16;
+ tp_dst : 16;
+ gbp_id : 16;
+ gbp_flags : 8;
+ pad1: 40; /* Pad to 64 bits. */
+ /* struct tun_metadata metadata; */
+ }
+}
+
+header ethernet_t ethernet;
+header ipv4_t ipv4;
+header ipv6_t ipv6;
+header arp_rarp_t arp;
+header tcp_t tcp;
+header udp_t udp;
+header icmp_t icmp;
+header vlan_tag_t vlan;
+metadata pkt_metadata_t md;
+metadata flow_tnl_t tnl_md;
+
+parser start {
+ return parse_ethernet;
+}
+
+parser parse_ethernet{
+ extract(ethernet);
+ return select(latest.etherType) {
+ ETH_P_8021Q: parse_vlan;
+ ETH_P_8021AD: parse_vlan;
+ ETH_P_ARP: parse_arp;
+ ETH_P_IPV4: parse_ipv4;
+ ETH_P_IPV6: parse_ipv6;
+ default: ingress;
+ }
+}
+
+parser parse_vlan {
+ extract(vlan);
+ return select(latest.etherType) {
+ ETH_P_ARP: parse_arp;
+ ETH_P_IPV4: parse_ipv4;
+ ETH_P_IPV6: parse_ipv6;
+ default: ingress;
+ }
+}
+
+parser parse_arp {
+ extract(arp);
+ return ingress;
+}
+
+parser parse_ipv4 {
+ extract(ipv4);
+ return select(latest.protocol) {
+ IPPROTO_TCP: parse_tcp;
+ IPPROTO_UDP: parse_udp;
+ IPPROTO_ICMP: parse_icmp;
+ default: ingress;
+ }
+}
+
+parser parse_ipv6 {
+ extract(ipv6);
+ return select(latest.nextHdr) {
+ IPPROTO_TCP: parse_tcp;
+ IPPROTO_UDP: parse_udp;
+ IPPROTO_ICMP: parse_icmp;
+ default: ingress;
+ }
+}
+
+parser parse_tcp {
+ extract(tcp);
+ return ingress;
+}
+
+parser parse_udp {
+ extract(udp);
+ return ingress;
+}
+
+parser parse_icmp {
+ extract(icmp);
+ return ingress;
+}
+/* ------------------------------------------------------------------------- */
+action nop() {}
+
+table ovs_tbl {
+ reads {
+ /* Avoid compiler optimizes out, although
+ we are not using it at all */
+ ethernet.dstAddr: exact;
+ vlan.etherType: exact;
+ ipv4.dstAddr: exact;
+ ipv6.dstAddr: exact;
+ icmp.typeCode: exact;
+ tcp.dstPort: exact;
+ udp.dstPort: exact;
+ md.in_port: exact;
+ tnl_md.tun_id: exact;
+ }
+ actions {
+ nop;
+ }
+}
+
+control ingress
+{
+ apply(ovs_tbl);
+}
+
new file mode 100644
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2016, 2017, 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "ovs-p4.h"
+#include "api.h"
+#include "helpers.h"
+#include "maps.h"
+#include <linux/if_ether.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+
+#define TCP_FLAGS_BE16(tp) (*(__be16 *)&tcp_flag_word(tp) & bpf_htons(0x0FFF))
+
+static bool ipv6_has_ext(u8 nw_proto) {
+ if ((nw_proto == IPPROTO_HOPOPTS) ||
+ (nw_proto == IPPROTO_ROUTING) ||
+ (nw_proto == IPPROTO_DSTOPTS) ||
+ (nw_proto == IPPROTO_AH) ||
+ (nw_proto == IPPROTO_FRAGMENT)) {
+ return true;
+ }
+ return false;
+}
+
+__section_tail(PARSER_CALL)
+static int ovs_parser(struct __sk_buff* skb) {
+ void *data = (void *)(long)skb->data;
+ struct ebpf_headers_t hdrs = {};
+ struct ebpf_metadata_t metadata = {};
+ struct bpf_tunnel_key key;
+ struct ethhdr *eth;
+ ovs_be16 eth_proto;
+ u32 ebpf_zero = 0;
+ int offset = 0;
+ u8 nw_proto = 0;
+ int err = 0, ret = 0;
+
+ /* Verifier Check. */
+ if ((char *)data + sizeof(*eth) > (char *)(long)skb->data_end) {
+ printt("ERR parsing ethernet\n");
+ return TC_ACT_SHOT;
+ }
+
+ eth = data;
+ if (eth->h_proto == 0) {
+ printt("eth_proto == 0, return TC_ACT_OK\n");
+ return TC_ACT_OK;
+ }
+
+ printt("eth_proto = 0x%x len = %d\n", bpf_ntohs(eth->h_proto), skb->len);
+ printt("skb->protocol = 0x%x\n", skb->protocol);
+ printt("skb->ingress_ifindex %d skb->ifindex %d\n",
+ skb->ingress_ifindex, skb->ifindex);
+
+ /* Link Layer. */
+ if (skb_load_bytes(skb, offset, &hdrs.ethernet, sizeof(hdrs.ethernet)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ offset += sizeof(hdrs.ethernet);
+ hdrs.valid |= ETHER_VALID;
+
+ /* VLAN 8021Q (0x8100) or 8021AD (0x8a88) in metadata
+ * note: vlan in metadata is always the outer vlan
+ */
+ if (skb->vlan_tci) {
+ hdrs.vlan.tci = skb->vlan_tci | VLAN_TAG_PRESENT; /* host byte order */
+ hdrs.vlan.etherType = skb->vlan_proto;
+ hdrs.valid |= VLAN_VALID;
+
+ printt("skb metadata: vlan proto 0x%x tci %x\n", bpf_ntohs(skb->vlan_proto), skb->vlan_tci);
+ }
+
+ eth_proto = eth->h_proto;
+
+ if (eth->h_proto == bpf_htons(ETH_P_8021Q)){
+
+ /* The inner, if exists, is VLAN 8021Q (0x8100) */
+ struct vlan_hdr { /* wired format */
+ ovs_be16 tci;
+ ovs_be16 ethertype;
+ } cvlan;
+
+ /* parse cvlan */
+ if (skb_load_bytes(skb, offset - 2, &cvlan, sizeof(cvlan)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ offset += sizeof(hdrs.cvlan);
+ hdrs.valid |= CVLAN_VALID;
+
+ hdrs.cvlan.tci = bpf_ntohs(cvlan.tci);
+ hdrs.cvlan.etherType = cvlan.ethertype;
+
+ printt("vlan tci 0x%x ethertype 0x%x\n",
+ hdrs.cvlan.tci, bpf_ntohs(hdrs.cvlan.etherType));
+
+ skb_load_bytes(skb, offset - 2, ð_proto, 2);
+ printt("eth_proto = 0x%x\n", bpf_ntohs(eth_proto));
+ }
+
+ /* Network Layer.
+ * see key_extract() in net/openvswitch/flow.c */
+ if (eth_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr nh;
+
+ printt("parse ipv4\n");
+ if (skb_load_bytes(skb, offset, &nh, sizeof(nh)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ offset += nh.ihl * 4;
+ hdrs.valid |= IPV4_VALID;
+
+ hdrs.ipv4.ttl = nh.ttl; /* u8 */
+ hdrs.ipv4.tos = nh.tos; /* u8 */
+ hdrs.ipv4.protocol = nh.protocol; /* u8*/
+ hdrs.ipv4.srcAddr = nh.saddr; /* be32 */
+ hdrs.ipv4.dstAddr = nh.daddr; /* be32 */
+
+ nw_proto = hdrs.ipv4.protocol;
+ printt("next proto 0x%x\n", nw_proto);
+
+ } else if (eth_proto == bpf_htons(ETH_P_ARP) ||
+ eth_proto == bpf_htons(ETH_P_RARP)) {
+ struct arp_rarp_t *arp;
+
+ printt("parse arp/rarp\n");
+
+ /* the struct arp_rarp_t is wired format */
+ arp = &hdrs.arp;
+ if (skb_load_bytes(skb, offset, arp, sizeof(hdrs.arp)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ offset += sizeof(hdrs.arp);
+ hdrs.valid |= ARP_VALID;
+
+ if (arp->ar_hrd == bpf_htons(ARPHRD_ETHER) &&
+ arp->ar_pro == bpf_htons(ETH_P_IP) &&
+ arp->ar_hln == ETH_ALEN &&
+ arp->ar_pln == 4) {
+ printt("valid arp\n");
+ } else {
+ printt("ERR: invalid arp\n");
+ }
+ goto parse_metadata;
+
+ } else if (eth_proto == bpf_htons(ETH_P_IPV6)) {
+
+ struct ipv6hdr ip6hdr; /* wired format */
+
+ if (skb_load_bytes(skb, offset, &ip6hdr, sizeof(ip6hdr)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ offset += sizeof(struct ipv6hdr); /* wired format */
+ hdrs.valid |= IPV6_VALID;
+
+ printt("parse ipv6\n");
+
+ memcpy(&hdrs.ipv6.flowLabel, &ip6hdr.flow_lbl, 4); //FIXME
+ memcpy(&hdrs.ipv6.srcAddr, &ip6hdr.saddr, 16);
+ memcpy(&hdrs.ipv6.dstAddr, &ip6hdr.daddr, 16);
+
+ nw_proto = ip6hdr.nexthdr;
+
+ if (ipv6_has_ext(nw_proto)) {
+ printt("WARN: ipv6 nexthdr %x does not supported\n", nw_proto);
+ // need to update offset
+ }
+
+ printt("next proto = %x\n", nw_proto);
+
+ } else {
+ printt("ERR: eth_proto %x not supported\n", bpf_ntohs(eth_proto));
+ return TC_ACT_OK;
+ }
+
+ /* Transport Layer.
+ * Handle: TCP, UDP, ICMP
+ */
+ if (nw_proto == IPPROTO_TCP) {
+ struct tcphdr tcp;
+
+ if (skb_load_bytes(skb, offset, &tcp, sizeof(tcp)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ hdrs.valid |= TCP_VALID;
+
+ hdrs.tcp.srcPort = tcp.source;
+ hdrs.tcp.dstPort = tcp.dest;
+ hdrs.tcp.flags = TCP_FLAGS_BE16(&tcp);
+
+ printt("parse tcp src %d dst %d\n", bpf_ntohs(tcp.source), bpf_ntohs(tcp.dest));
+
+ } else if (nw_proto == IPPROTO_UDP) {
+ struct udphdr udp;
+
+ if (skb_load_bytes(skb, offset, &udp, sizeof(udp)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ hdrs.valid |= UDP_VALID;
+
+ hdrs.udp.srcPort = udp.source;
+ hdrs.udp.dstPort = udp.dest;
+
+ printt("parse udp src %d dst %d\n", bpf_ntohs(udp.source), bpf_ntohs(udp.dest));
+
+ } else if (nw_proto == IPPROTO_ICMP) { /* ICMP v4 */
+ struct icmphdr icmp;
+
+ if (skb_load_bytes(skb, offset, &icmp, sizeof(icmp)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ hdrs.valid |= ICMP_VALID;
+
+ hdrs.icmp.type = icmp.type;
+ hdrs.icmp.code = icmp.code;
+
+ printt("parse icmp type %d code %d\n", icmp.type, icmp.code);
+
+ } else if (nw_proto == 0x3a /*EXTHDR_ICMP*/) { /* ICMP v6 */
+ struct icmphdr icmp;
+
+ if (skb_load_bytes(skb, offset, &icmp, sizeof(icmp)) < 0) {
+ err = p4_pe_header_too_short;
+ printt("ERR: load byte %d\n", __LINE__);
+ goto end;
+ }
+ hdrs.valid |= ICMPV6_VALID;
+
+ hdrs.icmpv6.type = icmp.type;
+ hdrs.icmpv6.code = icmp.code;
+
+ printt("parse icmp v6 type %d code %d\n", icmp.type, icmp.code);
+ } else if (nw_proto == IPPROTO_GRE) {
+ printt("receive gre packet\n");
+ } else {
+ printt("WARN: nw_proto 0x%x not parsed\n", nw_proto);
+ /* Continue */
+ }
+
+parse_metadata:
+ metadata.md.skb_priority = skb->priority;
+
+ /* Don't use ovs_cb_get_ifindex(), that gets optimized into something
+ * that can't be verified. >:( */
+ if (skb->cb[OVS_CB_INGRESS]) {
+ metadata.md.in_port = skb->ingress_ifindex;
+ }
+ if (!skb->cb[OVS_CB_INGRESS]) {
+ metadata.md.in_port = skb->ifindex;
+ }
+ metadata.md.pkt_mark = skb->mark;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (!ret) {
+ printt("bpf_skb_get_tunnel_key id = %d ipv4\n", key.tunnel_id);
+ metadata.tnl_md.tun_id = key.tunnel_id;
+ metadata.tnl_md.ip4.ip_src = key.remote_ipv4;
+ metadata.tnl_md.ip_tos = key.tunnel_tos;
+ metadata.tnl_md.ip_ttl = key.tunnel_ttl;
+ metadata.tnl_md.use_ipv6 = 0;
+ metadata.tnl_md.flags = 0;
+#ifdef BPF_ENABLE_IPV6
+ } else if (ret == -EPROTO) {
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (!ret) {
+ printt("bpf_skb_get_tunnel_key id = %d ipv6\n", key.tunnel_id);
+ metadata.tnl_md.tun_id = key.tunnel_id;
+ memcpy(&metadata.tnl_md.ip6.ipv6_src, &key.remote_ipv4, 16);
+ metadata.tnl_md.ip_tos = key.tunnel_tos;
+ metadata.tnl_md.ip_ttl = key.tunnel_ttl;
+ metadata.tnl_md.use_ipv6 = 1;
+ metadata.tnl_md.flags = 0;
+ }
+#endif
+ }
+
+ if (!ret) {
+ ret = bpf_skb_get_tunnel_opt(skb, &metadata.tnl_md.gnvopt,
+ sizeof metadata.tnl_md.gnvopt);
+ if (ret > 0)
+ metadata.tnl_md.gnvopt_valid = 1;
+ printt("bpf_skb_get_tunnel_opt ret = %d\n", ret);
+ }
+
+end:
+ if (err != p4_pe_no_error) {
+ printt("parse error: %d, drop\n", err);
+ return TC_ACT_SHOT;
+ }
+
+ /* write flow key and md to key map */
+ printt("Parser: updating flow key\n");
+ bpf_map_update_elem(&percpu_headers,
+ &ebpf_zero, &hdrs, BPF_ANY);
+
+ if (ovs_cb_is_initial_parse(skb)) {
+ bpf_map_update_elem(&percpu_metadata,
+ &ebpf_zero, &metadata, BPF_ANY);
+ }
+ skb->cb[OVS_CB_ACT_IDX] = 0;
+
+ /* tail call next stage */
+ printt("tail call match + lookup stage\n");
+ bpf_tail_call(skb, &tailcalls, MATCH_ACTION_CALL);
+
+ printt("[ERROR] missing tail call\n");
+ return TC_ACT_OK;
+}
new file mode 100644
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2018 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+#include "ovs-p4.h"
+#include "api.h"
+#include "helpers.h"
+
+__section("xdp")
+static int xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
+{
+ /* TODO: see p4c-xdp project */
+ printt("return XDP_PASS\n");
+ return XDP_PASS;
+}
+
+__section("af_xdp")
+static int af_xdp_ingress(struct xdp_md *ctx OVS_UNUSED)
+{
+ /* TODO: see xdpsock_kern.c ans xdpsock_user.c */
+ return XDP_PASS;
+}