@@ -814,7 +814,54 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
OVS_GREP_IFELSE([$KSRC/include/net/inet_frag.h],
frag_percpu_counter_batch[],
[OVS_DEFINE([HAVE_FRAG_PERCPU_COUNTER_BATCH])])
-
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+ [null_compute_pseudo],
+ [OVS_DEFINE([HAVE_NULL_COMPUTE_PSEUDO])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+ [__skb_checksum_convert],
+ [OVS_DEFINE([HAVE_SKB_CHECKSUM_CONVERT])])
+ OVS_FIND_FIELD_IFELSE([$KSRC/include/linux/netdevice.h], [net_device],
+ [max_mtu],
+ [OVS_DEFINE([HAVE_NET_DEVICE_MAX_MTU])])
+ OVS_GREP_IFELSE([$KSRC/include/net/erspan.h],
+ [__LINUX_ERSPAN_H],
+ [OVS_DEFINE([HAVE_LINUX_ERSPAN_H])])
+ OVS_FIND_PARAM_IFELSE([$KSRC/net/ipv6/ip6_gre.c],
+ [ip6gre_tunnel_validate], [extack],
+ [OVS_DEFINE([HAVE_IP6GRE_EXTACK])])
+ OVS_FIND_FIELD_IFELSE([$KSRC/include/net/ip6_tunnel.h], [__ip6_tnl_parm],
+ [erspan_ver],
+ [OVS_DEFINE([HAVE_IP6_TNL_PARM_ERSPAN_VER])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+ [SKB_GSO_IPXIP6],
+ [OVS_DEFINE([HAVE_SKB_GSO_IPXIP6])])
+ OVS_FIND_PARAM_IFELSE([$KSRC/include/net/ipv6.h],
+ [ip6_make_flowlabel], [fl6],
+ [OVS_DEFINE([HAVE_IP6_MAKE_FLOWLABEL_FL6])])
+ OVS_FIND_FIELD_IFELSE([$KSRC/include/net/ipv6.h], [netns_sysctl_ipv6],
+ [auto_flowlabels],
+ [OVS_DEFINE([HAVE_NETNS_SYSCTL_IPV6_AUTO_FLOWLABELS])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/netdevice.h],
+ [netif_keep_dst],
+ [OVS_DEFINE([HAVE_NETIF_KEEP_DST])])
+ OVS_FIND_FIELD_IFELSE([$KSRC/include/linux/netdevice.h], [net_device_ops],
+ [ndo_get_iflink],
+ [OVS_DEFINE([HAVE_NDO_GET_IFLINK])])
+ OVS_GREP_IFELSE([$KSRC/include/linux/skbuff.h],
+ [skb_set_inner_ipproto],
+ [OVS_DEFINE([HAVE_SKB_SET_INNER_IPPROTO])])
+ OVS_GREP_IFELSE([$KSRC/include/uapi/linux/if_tunnel.h],
+ [tunnel_encap_types],
+ [OVS_DEFINE([HAVE_TUNNEL_ENCAP_TYPES])])
+ OVS_GREP_IFELSE([$KSRC/include/uapi/linux/if_tunnel.h],
+ [IFLA_IPTUN_ENCAP_TYPE],
+ [OVS_DEFINE([HAVE_IFLA_IPTUN_ENCAP_TYPE])])
+ OVS_GREP_IFELSE([$KSRC/include/uapi/linux/if_tunnel.h],
+ [IFLA_IPTUN_COLLECT_METADATA],
+ [OVS_DEFINE([HAVE_IFLA_IPTUN_COLLECT_METADATA])])
+ OVS_GREP_IFELSE([$KSRC/net/ipv4/gre_demux.c],
+ [parse_gre_header],
+ [OVS_DEFINE([HAVE_DEMUX_PARSE_GRE_HEADER])])
if cmp -s datapath/linux/kcompat.h.new \
datapath/linux/kcompat.h >/dev/null 2>&1; then
@@ -104,5 +104,6 @@ openvswitch_headers += \
linux/compat/include/net/netfilter/nf_conntrack_zones.h \
linux/compat/include/net/netfilter/nf_nat.h \
linux/compat/include/net/netfilter/ipv6/nf_defrag_ipv6.h \
- linux/compat/include/net/sctp/checksum.h
+ linux/compat/include/net/sctp/checksum.h \
+ linux/compat/include/net/erspan.h
EXTRA_DIST += linux/compat/build-aux/export-check-whitelist
@@ -41,91 +41,25 @@
#ifndef USE_UPSTREAM_TUNNEL
#if IS_ENABLED(CONFIG_NET_IPGRE_DEMUX)
-#ifndef HAVE_GRE_HANDLE_OFFLOADS
-
-#ifndef HAVE_GRE_CISCO_REGISTER
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37)
-
-#define GREPROTO_CISCO 0
-#define GREPROTO_MAX 1
-
-struct gre_protocol {
- int (*handler)(struct sk_buff *skb);
-};
-static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-
-static int gre_rcv(struct sk_buff *skb)
-{
- const struct gre_protocol *proto;
- u8 ver;
- int ret;
-
- if (!pskb_may_pull(skb, 12))
- goto drop;
-
- ver = skb->data[1] & 0x7f;
- if (ver >= GREPROTO_MAX)
- goto drop;
-
- rcu_read_lock();
- proto = rcu_dereference(gre_proto[ver]);
- if (!proto || !proto->handler)
- goto drop_unlock;
- ret = proto->handler(skb);
- rcu_read_unlock();
- return ret;
-
-drop_unlock:
- rcu_read_unlock();
-drop:
- kfree_skb(skb);
- return NET_RX_DROP;
-}
-
-static const struct net_protocol net_gre_protocol = {
- .handler = gre_rcv,
- .netns_ok = 1,
-};
-
-static int gre_add_protocol(const struct gre_protocol *proto, u8 version)
-{
- if (version >= GREPROTO_MAX)
- return -EINVAL;
-
- if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
- pr_err("%s: cannot register gre protocol handler\n", __func__);
- return -EAGAIN;
- }
-
- return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ?
- 0 : -EBUSY;
-}
-
-static int gre_del_protocol(const struct gre_protocol *proto, u8 version)
+#define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
+#define gre_calc_hlen rpl_ip_gre_calc_hlen
+static int rpl_ip_gre_calc_hlen(__be16 o_flags)
{
- int ret;
-
- if (version >= GREPROTO_MAX)
- return -EINVAL;
-
- ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ?
- 0 : -EBUSY;
-
- if (ret)
- return ret;
-
- synchronize_net();
-
- ret = inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
- if (ret)
- return ret;
+ int addend = 4;
- return 0;
+ if (o_flags & TUNNEL_CSUM)
+ addend += 4;
+ if (o_flags & TUNNEL_KEY)
+ addend += 4;
+ if (o_flags & TUNNEL_SEQ)
+ addend += 4;
+ return addend;
}
-#endif
+#ifndef HAVE_GRE_HANDLE_OFFLOADS
+#ifndef HAVE_GRE_CISCO_REGISTER
+#ifdef HAVE_DEMUX_PARSE_GRE_HEADER
static __sum16 check_checksum(struct sk_buff *skb)
{
__sum16 csum = 0;
@@ -148,20 +82,6 @@ static __sum16 check_checksum(struct sk_buff *skb)
return csum;
}
-#define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
-static int ip_gre_calc_hlen(__be16 o_flags)
-{
- int addend = 4;
-
- if (o_flags & TUNNEL_CSUM)
- addend += 4;
- if (o_flags & TUNNEL_KEY)
- addend += 4;
- if (o_flags & TUNNEL_SEQ)
- addend += 4;
- return addend;
-}
-
#define gre_flags_to_tnl_flags rpl_gre_flags_to_tnl_flags
static __be16 gre_flags_to_tnl_flags(__be16 flags)
{
@@ -202,13 +122,12 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
tpi->flags = gre_flags_to_tnl_flags(greh->flags);
hdr_len = ip_gre_calc_hlen(tpi->flags);
+ tpi->hdr_len = hdr_len;
+ tpi->proto = greh->protocol;
if (!pskb_may_pull(skb, hdr_len))
return -EINVAL;
- greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen);
- tpi->proto = greh->protocol;
-
options = (__be32 *)(greh + 1);
if (greh->flags & GRE_CSUM) {
if (check_checksum(skb)) {
@@ -246,20 +165,25 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
}
+#endif /* HAVE_DEMUX_PARSE_GRE_HEADER */
+
static struct gre_cisco_protocol __rcu *gre_cisco_proto;
static int gre_cisco_rcv(struct sk_buff *skb)
{
struct tnl_ptk_info tpi;
- bool csum_err = false;
struct gre_cisco_protocol *proto;
rcu_read_lock();
proto = rcu_dereference(gre_cisco_proto);
if (!proto)
goto drop;
-
- if (parse_gre_header(skb, &tpi, &csum_err) < 0)
- goto drop;
+#ifdef HAVE_DEMUX_PARSE_GRE_HEADER
+ {
+ bool csum_err = false;
+ if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+ goto drop;
+ }
+#endif
proto->handler(skb, &tpi);
rcu_read_unlock();
return 0;
@@ -309,5 +233,101 @@ EXPORT_SYMBOL_GPL(rpl_gre_cisco_unregister);
#endif /* !HAVE_GRE_CISCO_REGISTER */
#endif
+void rpl_gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ int hdr_len)
+{
+ struct gre_base_hdr *greh;
+
+ skb_push(skb, hdr_len);
+
+ skb_reset_transport_header(skb);
+ greh = (struct gre_base_hdr *)skb->data;
+ greh->flags = tnl_flags_to_gre_flags(tpi->flags);
+ greh->protocol = tpi->proto;
+
+ if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+ if (tpi->flags&TUNNEL_SEQ) {
+ *ptr = tpi->seq;
+ ptr--;
+ }
+ if (tpi->flags&TUNNEL_KEY) {
+ *ptr = tpi->key;
+ ptr--;
+ }
+ if (tpi->flags&TUNNEL_CSUM &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
+ *ptr = 0;
+ *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+ skb->len, 0));
+ }
+ }
+}
+EXPORT_SYMBOL_GPL(rpl_gre_build_header);
+
+/* Fills in tpi and returns header length to be pulled. */
+int rpl_gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err, __be16 proto, int nhs)
+{
+ const struct gre_base_hdr *greh;
+ __be32 *options;
+ int hdr_len;
+
+ if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr))))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)(skb->data + nhs);
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
+
+ tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ hdr_len = gre_calc_hlen(tpi->flags);
+
+ if (!pskb_may_pull(skb, nhs + hdr_len))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)(skb->data + nhs);
+ tpi->proto = greh->protocol;
+
+ options = (__be32 *)(greh + 1);
+ if (greh->flags & GRE_CSUM) {
+ if (skb_checksum_simple_validate(skb)) {
+ *csum_err = true;
+ return -EINVAL;
+ }
+
+ skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ null_compute_pseudo);
+ options++;
+ }
+
+ if (greh->flags & GRE_KEY) {
+ tpi->key = *options;
+ options++;
+ } else {
+ tpi->key = 0;
+ }
+ if (unlikely(greh->flags & GRE_SEQ)) {
+ tpi->seq = *options;
+ options++;
+ } else {
+ tpi->seq = 0;
+ }
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IPv4/IPv6
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ tpi->proto = proto;
+ if ((*(u8 *)options & 0xF0) != 0x40)
+ hdr_len += 4;
+ }
+ tpi->hdr_len = hdr_len;
+ return hdr_len;
+}
+EXPORT_SYMBOL(rpl_gre_parse_header);
+
#endif /* CONFIG_NET_IPGRE_DEMUX */
#endif /* USE_UPSTREAM_TUNNEL */
@@ -23,6 +23,10 @@
#define ETH_P_NSH 0x894F /* Network Service Header */
#endif
+#ifndef ETH_P_ERSPAN
+#define ETH_P_ERSPAN 0x88BE /* ERSPAN TYPE II */
+#endif
+
#define inner_eth_hdr rpl_inner_eth_hdr
static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb)
{
@@ -21,6 +21,35 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
#define ignore_df local_df
#endif
+
+#ifndef HAVE_NULL_COMPUTE_PSEUDO
+static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
+{
+ return 0;
+}
+#endif
+
+#ifndef HAVE_SKB_CHECKSUM_CONVERT
+static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
+{
+ return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid);
+}
+
+static inline void __skb_checksum_convert(struct sk_buff *skb,
+ __sum16 check, __wsum pseudo)
+{
+ skb->csum = ~pseudo;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+}
+
+#define skb_checksum_try_convert(skb, proto, check, compute_pseudo) \
+do { \
+ if (__skb_checksum_convert_check(skb)) \
+ __skb_checksum_convert(skb, check, \
+ compute_pseudo(skb, proto)); \
+} while (0)
+
+#endif
#ifndef HAVE_SKB_COPY_FROM_LINEAR_DATA_OFFSET
static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb,
const int offset, void *to,
@@ -1,6 +1,11 @@
#ifndef __NET_DST_METADATA_WRAPPER_H
#define __NET_DST_METADATA_WRAPPER_H 1
+enum metadata_type {
+ METADATA_IP_TUNNEL,
+ METADATA_HW_PORT_MUX,
+};
+
#ifdef USE_UPSTREAM_TUNNEL
#include_next <net/dst_metadata.h>
#else
@@ -11,19 +16,26 @@
#include <net/ipv6.h>
#include <net/ip_tunnels.h>
+struct hw_port_info {
+ struct net_device *lower_dev;
+ u32 port_id;
+};
+
struct metadata_dst {
- unsigned long dst;
+ struct dst_entry dst;
+ enum metadata_type type;
union {
struct ip_tunnel_info tun_info;
+ struct hw_port_info port_info;
} u;
};
static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
{
- unsigned long *dst;
+ struct dst_entry *dst;
dst = &md_dst->dst;
- *dst = 0;
+
#if 0
dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
@@ -105,11 +117,6 @@ void ovs_ip_tunnel_rcv(struct net_device *dev, struct sk_buff *skb,
struct metadata_dst *tun_dst);
#ifndef HAVE_METADATA_DST_ALLOC_WITH_METADATA_TYPE
-enum metadata_type {
- METADATA_IP_TUNNEL,
- METADATA_HW_PORT_MUX,
-};
-
static inline struct metadata_dst *
rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags)
{
new file mode 100644
@@ -0,0 +1,65 @@
+#ifndef USE_UPSTREAM_TUNNEL
+#ifndef __LINUX_ERSPAN_H
+#define __LINUX_ERSPAN_H
+
+/*
+ * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |0|0|0|1|0|00000|000000000|00000| Protocol Type for ERSPAN |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Sequence Number (increments per packet per session) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Note that in the above GRE header [RFC1701] out of the C, R, K, S,
+ * s, Recur, Flags, Version fields only S (bit 03) is set to 1. The
+ * other fields are set to zero, so only a sequence number follows.
+ *
+ * ERSPAN Type II header (8 octets [42:49])
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Ver | VLAN | COS | En|T| Session ID |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved | Index |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
+ */
+
+#define ERSPAN_VERSION 0x1
+
+#define VER_MASK 0xf000
+#define VLAN_MASK 0x0fff
+#define COS_MASK 0xe000
+#define EN_MASK 0x1800
+#define T_MASK 0x0400
+#define ID_MASK 0x03ff
+#define INDEX_MASK 0xfffff
+
+enum erspan_encap_type {
+ ERSPAN_ENCAP_NOVLAN = 0x0, /* originally without VLAN tag */
+ ERSPAN_ENCAP_ISL = 0x1, /* originally ISL encapsulated */
+ ERSPAN_ENCAP_8021Q = 0x2, /* originally 802.1Q encapsulated */
+ ERSPAN_ENCAP_INFRAME = 0x3, /* VLAN tag perserved in frame */
+};
+
+struct erspan_metadata {
+ __be32 index; /* type II */
+};
+
+struct erspanhdr {
+ __be16 ver_vlan;
+#define VER_OFFSET 12
+ __be16 session_id;
+#define COS_OFFSET 13
+#define EN_OFFSET 11
+#define T_OFFSET 10
+ struct erspan_metadata md;
+};
+
+#endif
+#else
+#include_next <net/erspan.h>
+#endif
@@ -28,11 +28,7 @@ static inline struct net_device *rpl_gretap_fb_dev_create(
#endif
#else
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37) || \
- defined(HAVE_GRE_CISCO_REGISTER)
#include_next <net/gre.h>
-#endif
#ifndef HAVE_GRE_CISCO_REGISTER
@@ -62,6 +58,10 @@ struct gre_base_hdr {
#endif /* HAVE_GRE_CISCO_REGISTER */
+#define gre_build_header rpl_gre_build_header
+void rpl_gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ int hdr_len);
+
int rpl_ipgre_init(void);
void rpl_ipgre_fini(void);
@@ -69,6 +69,10 @@ void rpl_ipgre_fini(void);
struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
u8 name_assign_type);
+#define gre_parse_header rpl_gre_parse_header
+int rpl_gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err, __be16 proto, int nhs);
+
#define gre_fb_xmit rpl_gre_fb_xmit
netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb);
#endif /* USE_UPSTREAM_TUNNEL */
@@ -79,4 +83,5 @@ netdev_tx_t rpl_gre_fb_xmit(struct sk_buff *skb);
#define gre_fill_metadata_dst ovs_gre_fill_metadata_dst
int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
+
#endif
@@ -8,6 +8,11 @@
* Only function that do not depend on ip_tunnel structure can
* be used. Those needs to be explicitly defined in this header file. */
#include_next <net/ip_tunnels.h>
+
+#ifndef TUNNEL_ERSPAN_OPT
+#define TUNNEL_ERSPAN_OPT __cpu_to_be16(0x4000)
+#endif
+#define ovs_ip_tunnel_encap ip_tunnel_encap
#else
#include <linux/if_tunnel.h>
@@ -18,6 +23,21 @@
#include <net/inet_ecn.h>
#include <net/ip.h>
#include <net/rtnetlink.h>
+#include <net/gro_cells.h>
+
+#ifndef MAX_IPTUN_ENCAP_OPS
+#define MAX_IPTUN_ENCAP_OPS 8
+#endif
+
+#ifndef HAVE_TUNNEL_ENCAP_TYPES
+enum tunnel_encap_types {
+ TUNNEL_ENCAP_NONE,
+ TUNNEL_ENCAP_FOU,
+ TUNNEL_ENCAP_GUE,
+};
+
+#define HAVE_TUNNEL_ENCAP_TYPES 1
+#endif
#define __iptunnel_pull_header rpl___iptunnel_pull_header
int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
@@ -41,13 +61,17 @@ int ovs_iptunnel_handle_offloads(struct sk_buff *skb,
*/
#define iptunnel_handle_offloads rpl_iptunnel_handle_offloads
struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb,
- bool csum_help,
- int gso_type_mask);
+ bool csum_help,
+ int gso_type_mask);
#define iptunnel_xmit rpl_iptunnel_xmit
void rpl_iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 proto, __u8 tos, __u8 ttl,
__be16 df, bool xnet);
+#define ip_tunnel_xmit rpl_ip_tunnel_xmit
+void rpl_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ const struct iphdr *tnl_params, const u8 protocol);
+
#ifndef TUNNEL_CSUM
#define TUNNEL_CSUM __cpu_to_be16(0x01)
@@ -64,12 +88,17 @@ struct tnl_ptk_info {
__be16 proto;
__be32 key;
__be32 seq;
+ int hdr_len;
};
#define PACKET_RCVD 0
#define PACKET_REJECT 1
+#define PACKET_NEXT 2
#endif
+#define IP_TNL_HASH_BITS 7
+#define IP_TNL_HASH_SIZE (1 << IP_TNL_HASH_BITS)
+
#ifndef TUNNEL_DONT_FRAGMENT
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
#endif
@@ -91,6 +120,9 @@ struct tnl_ptk_info {
#undef TUNNEL_OPTIONS_PRESENT
#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
+/* Keep error state on tunnel for 30 sec */
+#define IPTUNNEL_ERR_TIMEO (30*HZ)
+
/* Used to memset ip_tunnel padding. */
#define IP_TUNNEL_KEY_SIZE offsetofend(struct ip_tunnel_key, tp_dst)
@@ -131,6 +163,30 @@ struct ip_tunnel_info {
u8 mode;
};
+/* 6rd prefix/relay information */
+#ifdef CONFIG_IPV6_SIT_6RD
+struct ip_tunnel_6rd_parm {
+ struct in6_addr prefix;
+ __be32 relay_prefix;
+ u16 prefixlen;
+ u16 relay_prefixlen;
+};
+#endif
+
+struct ip_tunnel_encap {
+ u16 type;
+ u16 flags;
+ __be16 sport;
+ __be16 dport;
+};
+
+struct ip_tunnel_prl_entry {
+ struct ip_tunnel_prl_entry __rcu *next;
+ __be32 addr;
+ u16 flags;
+ struct rcu_head rcu_head;
+};
+
static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info *tun_info)
{
return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET;
@@ -203,39 +259,115 @@ ip_tunnel_dst_cache_usable(const struct sk_buff *skb,
}
#endif
-#define ip_tunnel rpl_ip_tunnel
+#define ip_tunnel_dst rpl_ip_tunnel_dst
+struct rpl_ip_tunnel_dst {
+ struct dst_entry __rcu *dst;
+ __be32 saddr;
+};
+#define ip_tunnel rpl_ip_tunnel
struct ip_tunnel {
+ struct ip_tunnel __rcu *next;
+ struct hlist_node hash_node;
struct net_device *dev;
struct net *net; /* netns for packet i/o */
- int err_count; /* Number of arrived ICMP errors */
unsigned long err_time; /* Time when the last ICMP error
- * arrived
- */
+ * arrived */
+ int err_count; /* Number of arrived ICMP errors */
/* These four fields used only by GRE */
u32 i_seqno; /* The last seen seqno */
u32 o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
- int mlink;
+
+ /* These four fields used only by ERSPAN */
+ u32 index; /* ERSPAN type II index */
+ u8 erspan_ver; /* ERSPAN version */
+ u8 dir; /* ERSPAN direction */
+ u16 hwid; /* ERSPAN hardware ID */
+
+ struct dst_cache dst_cache;
struct ip_tunnel_parm parms;
+ int mlink;
int encap_hlen; /* Encap header length (FOU,GUE) */
int hlen; /* tun_hlen + encap_hlen */
+ struct ip_tunnel_encap encap;
- int ip_tnl_net_id;
- bool collect_md;
+ /* for SIT */
+#ifdef CONFIG_IPV6_SIT_6RD
+ struct ip_tunnel_6rd_parm ip6rd;
+#endif
+ struct ip_tunnel_prl_entry __rcu *prl; /* potential router list */
+ unsigned int prl_count; /* # of entries in PRL */
+ unsigned int ip_tnl_net_id;
+ struct gro_cells gro_cells;
+ __u32 fwmark;
+ bool collect_md;
+ bool ignore_df;
};
#define ip_tunnel_net rpl_ip_tunnel_net
struct ip_tunnel_net {
+ struct net_device *fb_tunnel_dev;
+ struct hlist_head tunnels[IP_TNL_HASH_SIZE];
struct ip_tunnel __rcu *collect_md_tun;
- struct rtnl_link_ops *rtnl_ops;
};
+struct ip_tunnel_encap_ops {
+ size_t (*encap_hlen)(struct ip_tunnel_encap *e);
+ int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ const u8 *protocol, struct flowi4 *fl4);
+};
+
+extern const struct ip_tunnel_encap_ops __rcu *
+ rpl_iptun_encaps[MAX_IPTUN_ENCAP_OPS];
+
+#define ip_encap_hlen rpl_ip_encap_hlen
+static inline int rpl_ip_encap_hlen(struct ip_tunnel_encap *e)
+{
+ const struct ip_tunnel_encap_ops *ops;
+ int hlen = -EINVAL;
+
+ if (e->type == TUNNEL_ENCAP_NONE)
+ return 0;
+
+ if (e->type >= MAX_IPTUN_ENCAP_OPS)
+ return -EINVAL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(rpl_iptun_encaps[e->type]);
+ if (likely(ops && ops->encap_hlen))
+ hlen = ops->encap_hlen(e);
+ rcu_read_unlock();
+
+ return hlen;
+}
+
+static inline int ovs_ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
+ const u8 *protocol, struct flowi4 *fl4)
+{
+ const struct ip_tunnel_encap_ops *ops;
+ int ret = -EINVAL;
+
+ if (t->encap.type == TUNNEL_ENCAP_NONE)
+ return 0;
+
+ if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+ return -EINVAL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(rpl_iptun_encaps[t->encap.type]);
+ if (likely(ops && ops->build_header))
+ ret = ops->build_header(skb, &t->encap, protocol, fl4);
+ rcu_read_unlock();
+
+ return ret;
+}
+
#ifndef HAVE_PCPU_SW_NETSTATS
#define ip_tunnel_get_stats64 rpl_ip_tunnel_get_stats64
#else
@@ -322,6 +454,12 @@ struct net *rpl_ip_tunnel_get_link_net(const struct net_device *dev);
#define __ip_tunnel_change_mtu rpl___ip_tunnel_change_mtu
int rpl___ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
+#define ip_tunnel_lookup rpl_ip_tunnel_lookup
+struct ip_tunnel *rpl_ip_tunnel_lookup(struct ip_tunnel_net *itn,
+ int link, __be16 flags,
+ __be32 remote, __be32 local,
+ __be32 key);
+
static inline int iptunnel_pull_offloads(struct sk_buff *skb)
{
if (skb_is_gso(skb)) {
@@ -52,6 +52,7 @@
#include <net/rtnetlink.h>
#include <net/gre.h>
#include <net/dst_metadata.h>
+#include <net/erspan.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -63,6 +64,10 @@
#include "vport-netdev.h"
static int gre_tap_net_id __read_mostly;
+static int ipgre_net_id __read_mostly;
+static unsigned int erspan_net_id __read_mostly;
+
+static struct rtnl_link_ops ipgre_link_ops __read_mostly;
#define ip_gre_calc_hlen rpl_ip_gre_calc_hlen
static int ip_gre_calc_hlen(__be16 o_flags)
@@ -78,100 +83,308 @@ static int ip_gre_calc_hlen(__be16 o_flags)
return addend;
}
-#define tnl_flags_to_gre_flags rpl_tnl_flags_to_gre_flags
-static __be16 tnl_flags_to_gre_flags(__be16 tflags)
+/* Returns the least-significant 32 bits of a __be64. */
+static __be32 tunnel_id_to_key(__be64 x)
{
- __be16 flags = 0;
-
- if (tflags & TUNNEL_CSUM)
- flags |= GRE_CSUM;
- if (tflags & TUNNEL_ROUTING)
- flags |= GRE_ROUTING;
- if (tflags & TUNNEL_KEY)
- flags |= GRE_KEY;
- if (tflags & TUNNEL_SEQ)
- flags |= GRE_SEQ;
- if (tflags & TUNNEL_STRICT)
- flags |= GRE_STRICT;
- if (tflags & TUNNEL_REC)
- flags |= GRE_REC;
- if (tflags & TUNNEL_VERSION)
- flags |= GRE_VERSION;
+#ifdef __BIG_ENDIAN
+ return (__force __be32)x;
+#else
+ return (__force __be32)((__force u64)x >> 32);
+#endif
+}
- return flags;
+/* Called with rcu_read_lock and BH disabled. */
+static int gre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
+{
+ return PACKET_REJECT;
}
-static __be64 key_to_tunnel_id(__be32 key)
+static struct dst_ops md_dst_ops = {
+ .family = AF_UNSPEC,
+};
+
+#ifndef DST_METADATA
+#define DST_METADATA 0x0080
+#endif
+
+static void rpl__metadata_dst_init(struct metadata_dst *md_dst,
+ enum metadata_type type, u8 optslen)
+
{
-#ifdef __BIG_ENDIAN
- return (__force __be64)((__force u32)key);
-#else
- return (__force __be64)((__force u64)key << 32);
+ struct dst_entry *dst;
+
+ dst = &md_dst->dst;
+ dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
+ DST_METADATA | DST_NOCOUNT);
+
+#if 0
+ /* unused in OVS */
+ dst->input = dst_md_discard;
+ dst->output = dst_md_discard_out;
#endif
+ memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
+ md_dst->type = type;
}
-/* Returns the least-significant 32 bits of a __be64. */
-static __be32 tunnel_id_to_key(__be64 x)
+struct metadata_dst *erspan_rpl_metadata_dst_alloc(u8 optslen, enum metadata_type type,
+ gfp_t flags)
{
-#ifdef __BIG_ENDIAN
- return (__force __be32)x;
-#else
- return (__force __be32)((__force u64)x >> 32);
-#endif
+ struct metadata_dst *md_dst;
+
+ md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
+ if (!md_dst)
+ return NULL;
+
+ rpl__metadata_dst_init(md_dst, type, optslen);
+
+ return md_dst;
+}
+static inline struct metadata_dst *rpl_tun_rx_dst(int md_size)
+{
+ struct metadata_dst *tun_dst;
+
+ tun_dst = erspan_rpl_metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC);
+ if (!tun_dst)
+ return NULL;
+
+ tun_dst->u.tun_info.options_len = 0;
+ tun_dst->u.tun_info.mode = 0;
+ return tun_dst;
+}
+static inline struct metadata_dst *rpl__ip_tun_set_dst(__be32 saddr,
+ __be32 daddr,
+ __u8 tos, __u8 ttl,
+ __be16 tp_dst,
+ __be16 flags,
+ __be64 tunnel_id,
+ int md_size)
+{
+ struct metadata_dst *tun_dst;
+
+ tun_dst = rpl_tun_rx_dst(md_size);
+ if (!tun_dst)
+ return NULL;
+
+ ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+ saddr, daddr, tos, ttl,
+ 0, 0, tp_dst, tunnel_id, flags);
+ return tun_dst;
}
-static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+static inline struct metadata_dst *rpl_ip_tun_rx_dst(struct sk_buff *skb,
+ __be16 flags,
+ __be64 tunnel_id,
+ int md_size)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+
+ return rpl__ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl,
+ 0, flags, tunnel_id, md_size);
+}
+
+static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
{
struct net *net = dev_net(skb->dev);
- struct metadata_dst tun_dst;
+ struct metadata_dst *tun_dst = NULL;
struct ip_tunnel_net *itn;
- const struct iphdr *iph;
struct ip_tunnel *tunnel;
+ struct erspanhdr *ershdr;
+ const struct iphdr *iph;
+ __be32 session_id;
+ __be32 index;
+ int len;
- if (tpi->proto != htons(ETH_P_TEB))
- return PACKET_REJECT;
+ itn = net_generic(net, erspan_net_id);
+ iph = ip_hdr(skb);
+ len = gre_hdr_len + sizeof(*ershdr);
- itn = net_generic(net, gre_tap_net_id);
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return -ENOMEM;
iph = ip_hdr(skb);
- tunnel = rcu_dereference(itn->collect_md_tun);
+ ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+
+ /* The original GRE header does not have key field,
+ * Use ERSPAN 10-bit session ID as key.
+ */
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
+ /* OVS doesn't set tunnel key - so don't bother with it */
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+ tpi->flags,
+ iph->saddr, iph->daddr, 0);
+
if (tunnel) {
- __be16 flags;
- __be64 tun_id;
- int err;
-
- if (iptunnel_pull_offloads(skb))
- return PACKET_REJECT;
-
- skb_pop_mac_header(skb);
- flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
- tun_id = key_to_tunnel_id(tpi->key);
- ovs_ip_tun_rx_dst(&tun_dst, skb, flags, tun_id, 0);
-
- skb_reset_network_header(skb);
- err = IP_ECN_decapsulate(iph, skb);
- if (unlikely(err)) {
- if (err > 1) {
- ++tunnel->dev->stats.rx_frame_errors;
- ++tunnel->dev->stats.rx_errors;
+ if (__iptunnel_pull_header(skb,
+ gre_hdr_len + sizeof(*ershdr),
+ htons(ETH_P_TEB),
+ false, false) < 0)
+ goto drop;
+
+ if (tunnel->collect_md) {
+ struct ip_tunnel_info *info;
+ struct erspan_metadata *md;
+ __be64 tun_id;
+ __be16 flags;
+
+ tpi->flags |= TUNNEL_KEY;
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = rpl_ip_tun_rx_dst(skb, flags, tun_id, sizeof(*md));
+ if (!tun_dst)
return PACKET_REJECT;
- }
+
+ md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md->version = ver;
+ md2 = &md->u.md2;
+ memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
+ ERSPAN_V2_MDSIZE);
+
+ info = &tun_dst->u.tun_info;
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ info->options_len = sizeof(*md);
+ }
+
+ skb_reset_mac_header(skb);
+ ovs_ip_tunnel_rcv(tunnel->dev, skb, tun_dst);
+ kfree(tun_dst);
+ return PACKET_RCVD;
+ }
+drop:
+ kfree_skb(skb);
+ return PACKET_RCVD;
+}
+
+
+static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
+{
+ struct metadata_dst tun_dst;
+ const struct iphdr *iph;
+ struct ip_tunnel *tunnel;
+
+ iph = ip_hdr(skb);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+ iph->saddr, iph->daddr, tpi->key);
+
+ if (tunnel) {
+ if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
+ raw_proto, false) < 0)
+ goto drop;
+
+ if (tunnel->dev->type != ARPHRD_NONE)
+ skb_pop_mac_header(skb);
+ else
+ skb_reset_mac_header(skb);
+ if (tunnel->collect_md) {
+ __be16 flags;
+ __be64 tun_id;
+
+ flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ tun_id = key32_to_tunnel_id(tpi->key);
+ ovs_ip_tun_rx_dst(&tun_dst, skb, flags, tun_id, 0);
}
ovs_ip_tunnel_rcv(tunnel->dev, skb, &tun_dst);
return PACKET_RCVD;
}
- return PACKET_REJECT;
+ return PACKET_NEXT;
+
+drop:
+ kfree_skb(skb);
+ return PACKET_RCVD;
+}
+
+
+static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ int hdr_len)
+{
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn;
+ int res;
+
+ if (tpi->proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
+
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
+ if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
+ /* ipgre tunnels in collect metadata mode should receive
+ * also ETH_P_TEB traffic.
+ */
+ itn = net_generic(net, ipgre_net_id);
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
+ }
+ return res;
}
-static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
+ const struct iphdr *tnl_params,
+ __be16 proto)
{
- if (ipgre_rcv(skb, tpi) == PACKET_RCVD)
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct tnl_ptk_info tpi;
+
+ tpi.flags = tunnel->parms.o_flags;
+ tpi.proto = proto;
+ tpi.key = tunnel->parms.o_key;
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+ tpi.seq = htonl(tunnel->o_seqno);
+
+ /* Push GRE header. */
+ gre_build_header(skb, &tpi, tunnel->hlen);
+
+ ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
+}
+
+#ifndef HAVE_DEMUX_PARSE_GRE_HEADER
+static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *unused_tpi)
+{
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+ int hdr_len;
+
+ hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
+ if (hdr_len < 0)
+ goto drop;
+
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+ if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+ return 0;
+ }
+
+ if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+ return 0;
+drop:
+
+ kfree_skb(skb);
+ return 0;
+}
+#else
+static int gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *__tpi)
+{
+ struct tnl_ptk_info tpi = *__tpi;
+
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ tpi.proto == htons(ETH_P_ERSPAN2))) {
+ if (erspan_rcv(skb, &tpi, 0) == PACKET_RCVD)
+ return 0;
+ goto drop;
+ }
+
+ if (ipgre_rcv(skb, &tpi, 0) == PACKET_RCVD)
return 0;
+drop:
+
kfree_skb(skb);
return 0;
}
+#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4,7,0)
/* gre_handle_offloads() has different return type on older kernsl. */
@@ -342,6 +555,83 @@ err_free_skb:
}
EXPORT_SYMBOL(rpl_gre_fb_xmit);
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct erspan_metadata *md;
+ struct rtable *rt = NULL;
+ struct tnl_ptk_info tpi;
+ bool truncate = false;
+ struct flowi4 fl;
+ int tunnel_hlen;
+ int version;
+ __be16 df;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto err_free_rt;
+
+ /* ERSPAN has fixed 8 byte GRE header */
+ version = md->version;
+ tunnel_hlen = 8 + erspan_hdr_len(version);
+
+ rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
+ if (!rt)
+ return;
+
+ if (gre_handle_offloads(skb, false))
+ goto err_free_rt;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ truncate = true;
+ }
+
+ if (version == 1) {
+ erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
+ ntohl(md->u.index), truncate, true);
+ tpi.hdr_len = ERSPAN_V1_MDSIZE;
+ tpi.proto = htons(ETH_P_ERSPAN);
+ } else if (version == 2) {
+ erspan_build_header_v2(skb,
+ ntohl(tunnel_id_to_key32(key->tun_id)),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, true);
+ tpi.hdr_len = ERSPAN_V2_MDSIZE;
+ tpi.proto = htons(ETH_P_ERSPAN2);
+ } else {
+ goto err_free_rt;
+ }
+
+ tpi.flags = (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ);
+ tpi.key = tunnel_id_to_key32(key->tun_id);
+ tpi.seq = htonl(tunnel->o_seqno++);
+
+ gre_build_header(skb, &tpi, tunnel_hlen);
+
+ df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+
+ iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE,
+ key->tos, key->ttl, df, false);
+ return;
+
+err_free_rt:
+ ip_rt_put(rt);
+err_free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+}
+
#define GRE_FEATURES (NETIF_F_SG | \
NETIF_F_FRAGLIST | \
NETIF_F_HIGHDMA | \
@@ -354,23 +644,27 @@ static void __gre_tunnel_init(struct net_device *dev)
int t_hlen;
tunnel = netdev_priv(dev);
- tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->tun_hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->parms.iph.protocol = IPPROTO_GRE;
tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
t_hlen = tunnel->hlen + sizeof(struct iphdr);
- dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
- dev->mtu = ETH_DATA_LEN - t_hlen - 4;
-
dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported. */
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ /* TCP offload with GRE SEQ is not supported, nor
+ * can we support 2 levels of outer headers requiring
+ * an update.
+ */
+ if (!(tunnel->parms.o_flags & TUNNEL_CSUM) ||
+ (tunnel->encap.type == TUNNEL_ENCAP_NONE)) {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+
/* Can use a lockless transmit, unless we generate
* output sequences
*/
@@ -378,19 +672,31 @@ static void __gre_tunnel_init(struct net_device *dev)
}
}
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_err(struct sk_buff *skb, u32 info,
- const struct tnl_ptk_info *tpi)
-{
- return PACKET_REJECT;
-}
-
static struct gre_cisco_protocol ipgre_protocol = {
.handler = gre_rcv,
.err_handler = gre_err,
.priority = 1,
};
+static int __net_init ipgre_init_net(struct net *net)
+{
+ return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
+}
+
+static void __net_exit ipgre_exit_net(struct net *net)
+{
+ struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
+
+ ip_tunnel_delete_net(itn, &ipgre_link_ops);
+}
+
+static struct pernet_operations ipgre_net_ops = {
+ .init = ipgre_init_net,
+ .exit = ipgre_exit_net,
+ .id = &ipgre_net_id,
+ .size = sizeof(struct ip_tunnel_net),
+};
+
static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
{
__be16 flags;
@@ -433,14 +739,129 @@ out:
return ipgre_tunnel_validate(tb, data);
}
-static void ipgre_netlink_parms(struct net_device *dev,
- struct nlattr *data[],
- struct nlattr *tb[],
- struct ip_tunnel_parm *parms)
+enum {
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,18,0)
+ IFLA_GRE_ENCAP_TYPE = IFLA_GRE_FLAGS + 1,
+ IFLA_GRE_ENCAP_FLAGS,
+ IFLA_GRE_ENCAP_SPORT,
+ IFLA_GRE_ENCAP_DPORT,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,3,0)
+ IFLA_GRE_COLLECT_METADATA = IFLA_GRE_ENCAP_DPORT + 1,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,8,0)
+ IFLA_GRE_IGNORE_DF = IFLA_GRE_COLLECT_METADATA + 1,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,12,0)
+ IFLA_GRE_FWMARK = IFLA_GRE_IGNORE_DF + 1,
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(4,15,0)
+ IFLA_GRE_ERSPAN_INDEX = IFLA_GRE_FWMARK + 1,
+#endif
+};
+
+#define RPL_IFLA_GRE_MAX (IFLA_GRE_ERSPAN_INDEX + 1)
+
+static int erspan_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+ __be16 flags = 0;
+ int ret;
+
+ if (!data)
+ return 0;
+
+ ret = ipgre_tap_validate(tb, data);
+ if (ret)
+ return ret;
+
+ /* ERSPAN should only have GRE sequence and key flag */
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (flags != (GRE_SEQ | GRE_KEY))
+ return -EINVAL;
+
+ /* ERSPAN Session ID only has 10-bit. Since we reuse
+ * 32-bit key field as ID, check it's range.
+ */
+ if (data[IFLA_GRE_IKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_OKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ipgre_netlink_parms(struct net_device *dev,
+ struct nlattr *data[],
+ struct nlattr *tb[],
+ struct ip_tunnel_parm *parms)
{
+ struct ip_tunnel *t = netdev_priv(dev);
+
memset(parms, 0, sizeof(*parms));
parms->iph.protocol = IPPROTO_GRE;
+
+ if (!data)
+ return 0;
+
+ if (data[IFLA_GRE_LINK])
+ parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
+
+ if (data[IFLA_GRE_IFLAGS])
+ parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
+
+ if (data[IFLA_GRE_OFLAGS])
+ parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
+
+ if (data[IFLA_GRE_IKEY])
+ parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
+
+ if (data[IFLA_GRE_OKEY])
+ parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
+
+ if (data[IFLA_GRE_LOCAL])
+ parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
+
+ if (data[IFLA_GRE_REMOTE])
+ parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
+
+ if (data[IFLA_GRE_TTL])
+ parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
+
+ if (data[IFLA_GRE_TOS])
+ parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
+
+ if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
+ if (t->ignore_df)
+ return -EINVAL;
+ parms->iph.frag_off = htons(IP_DF);
+ }
+
+ if (data[IFLA_GRE_COLLECT_METADATA]) {
+ t->collect_md = true;
+ if (dev->type == ARPHRD_IPGRE)
+ dev->type = ARPHRD_NONE;
+ }
+
+ if (data[IFLA_GRE_IGNORE_DF]) {
+ if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
+ && (parms->iph.frag_off & htons(IP_DF)))
+ return -EINVAL;
+ t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
+ }
+
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+ if (t->index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+
+ return 0;
}
static int gre_tap_init(struct net_device *dev)
@@ -462,6 +883,87 @@ static netdev_tx_t gre_dev_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+static inline u8 tos_to_cos(u8 tos)
+{
+ u8 dscp, cos;
+
+ dscp = tos >> 2;
+ cos = dscp >> 3;
+ return cos;
+}
+
+static void erspan_build_header(struct sk_buff *skb,
+ __be32 id, u32 index, bool truncate)
+{
+ struct iphdr *iphdr = ip_hdr(skb);
+ struct ethhdr *eth = eth_hdr(skb);
+ enum erspan_encap_type enc_type;
+ struct erspanhdr *ershdr;
+ struct qtag_prefix {
+ __be16 eth_type;
+ __be16 tci;
+ } *qp;
+ u16 vlan_tci = 0;
+
+ enc_type = ERSPAN_ENCAP_NOVLAN;
+
+ /* If mirrored packet has vlan tag, extract tci and
+ * perserve vlan header in the mirrored frame.
+ */
+ if (eth->h_proto == htons(ETH_P_8021Q)) {
+ qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
+ vlan_tci = ntohs(qp->tci);
+ enc_type = ERSPAN_ENCAP_INFRAME;
+ }
+
+ skb_push(skb, sizeof(*ershdr));
+ ershdr = (struct erspanhdr *)skb->data;
+ memset(ershdr, 0, sizeof(*ershdr));
+
+ ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
+ (ERSPAN_VERSION << VER_OFFSET));
+ ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
+ ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
+ (enc_type << EN_OFFSET & EN_MASK) |
+ ((truncate << T_OFFSET) & T_MASK));
+ ershdr->md.index = htonl(index & INDEX_MASK);
+}
+
+static netdev_tx_t erspan_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ bool truncate = false;
+
+ if (gre_handle_offloads(skb, false))
+ goto free_skb;
+
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
+
+ if (skb->len > dev->mtu) {
+ pskb_trim(skb, dev->mtu);
+ truncate = true;
+ }
+
+ /* Push ERSPAN header */
+ erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+ tunnel->parms.o_flags &= ~TUNNEL_KEY;
+ __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
+ return NETDEV_TX_OK;
+
+free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t __erspan_fb_xmit(struct sk_buff *skb)
+{
+ erspan_fb_xmit(skb, skb->dev, skb->protocol);
+ return NETDEV_TX_OK;
+}
+
int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@ -481,22 +983,176 @@ int ovs_gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
}
EXPORT_SYMBOL_GPL(ovs_gre_fill_metadata_dst);
+static int erspan_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int t_hlen;
+
+ tunnel->tun_hlen = 8;
+ tunnel->parms.iph.protocol = IPPROTO_GRE;
+ t_hlen = tunnel->hlen + sizeof(struct iphdr) + sizeof(struct erspanhdr);
+
+ dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
+ dev->mtu = ETH_DATA_LEN - t_hlen - 4;
+ dev->features |= GRE_FEATURES;
+ dev->hw_features |= GRE_FEATURES;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+
+ return ip_tunnel_init(dev);
+}
+
+static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ const void *daddr, const void *saddr, unsigned int len)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct iphdr *iph;
+ struct gre_base_hdr *greh;
+
+ iph = (struct iphdr *)__skb_push(skb, t->hlen + sizeof(*iph));
+ greh = (struct gre_base_hdr *)(iph+1);
+ greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
+ greh->protocol = htons(type);
+
+ memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
+
+ /* Set the source hardware address. */
+ if (saddr)
+ memcpy(&iph->saddr, saddr, 4);
+ if (daddr)
+ memcpy(&iph->daddr, daddr, 4);
+ if (iph->daddr)
+ return t->hlen + sizeof(*iph);
+
+ return -(t->hlen + sizeof(*iph));
+}
+
+static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
+{
+ const struct iphdr *iph = (const struct iphdr *) skb_mac_header(skb);
+ memcpy(haddr, &iph->saddr, 4);
+ return 4;
+}
+
+static const struct header_ops ipgre_header_ops = {
+ .create = ipgre_header,
+ .parse = ipgre_header_parse,
+};
+
+static int ipgre_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct iphdr *iph = &tunnel->parms.iph;
+
+ __gre_tunnel_init(dev);
+
+ memcpy(dev->dev_addr, &iph->saddr, 4);
+ memcpy(dev->broadcast, &iph->daddr, 4);
+
+ dev->flags = IFF_NOARP;
+ netif_keep_dst(dev);
+ dev->addr_len = 4;
+
+ if (!tunnel->collect_md) {
+ dev->header_ops = &ipgre_header_ops;
+ }
+
+ return ip_tunnel_init(dev);
+}
+
+static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *tnl_params;
+
+ if (tunnel->collect_md) {
+ gre_fb_xmit(skb);
+ return NETDEV_TX_OK;
+ }
+
+ if (dev->header_ops) {
+ /* Need space for new headers */
+ if (skb_cow_head(skb, dev->needed_headroom -
+ (tunnel->hlen + sizeof(struct iphdr))))
+ goto free_skb;
+
+ tnl_params = (const struct iphdr *)skb->data;
+
+ /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
+ * to gre header.
+ */
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ skb_reset_mac_header(skb);
+ } else {
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
+
+ tnl_params = &tunnel->parms.iph;
+ }
+
+ if (gre_handle_offloads(skb, !!(tunnel->parms.o_flags & TUNNEL_CSUM)))
+ goto free_skb;
+
+ __gre_xmit(skb, dev, tnl_params, skb->protocol);
+ return NETDEV_TX_OK;
+
+free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+ return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops ipgre_netdev_ops = {
+ .ndo_init = ipgre_tunnel_init,
+ .ndo_uninit = rpl_ip_tunnel_uninit,
+ .ndo_start_xmit = ipgre_xmit,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+#ifdef HAVE_GET_LINK_NET
+ .ndo_get_iflink = ip_tunnel_get_iflink,
+#endif
+};
+
static const struct net_device_ops gre_tap_netdev_ops = {
.ndo_init = gre_tap_init,
- .ndo_uninit = ip_tunnel_uninit,
+ .ndo_uninit = rpl_ip_tunnel_uninit,
.ndo_start_xmit = gre_dev_xmit,
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip_tunnel_change_mtu,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = rpl_ip_tunnel_get_stats64,
#ifdef HAVE_NDO_GET_IFLINK
- .ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
#endif
#ifdef HAVE_NDO_FILL_METADATA_DST
.ndo_fill_metadata_dst = gre_fill_metadata_dst,
#endif
};
+static const struct net_device_ops erspan_netdev_ops = {
+ .ndo_init = erspan_tunnel_init,
+ .ndo_uninit = rpl_ip_tunnel_uninit,
+ .ndo_start_xmit = erspan_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
+ .ndo_get_stats64 = rpl_ip_tunnel_get_stats64,
+#ifdef HAVE_NDO_GET_IFLINK
+ .ndo_get_iflink = rpl_ip_tunnel_get_iflink,
+#endif
+#ifdef HAVE_NDO_FILL_METADATA_DST
+ .ndo_fill_metadata_dst = gre_fill_metadata_dst,
+#endif
+};
+
+static void ipgre_tunnel_setup(struct net_device *dev)
+{
+ dev->netdev_ops = &ipgre_netdev_ops;
+ dev->type = ARPHRD_IPGRE;
+ ip_tunnel_setup(dev, ipgre_net_id);
+}
+
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
@@ -505,6 +1161,16 @@ static void ipgre_tap_setup(struct net_device *dev)
ip_tunnel_setup(dev, gre_tap_net_id);
}
+static void erspan_setup(struct net_device *dev)
+{
+ eth_hw_addr_random(dev);
+ ether_setup(dev);
+ dev->netdev_ops = &erspan_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ ip_tunnel_setup(dev, erspan_net_id);
+}
+
static int ipgre_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
@@ -550,6 +1216,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_COLLECT_METADATA */
nla_total_size(0) +
+ /* IFLA_GRE_ERSPAN_INDEX */
+ nla_total_size(4) +
0;
}
@@ -571,13 +1239,17 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
!!(p->iph.frag_off & htons(IP_DF))))
goto nla_put_failure;
+ if (t->index)
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
return -EMSGSIZE;
}
-static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
+static const struct nla_policy ipgre_policy[RPL_IFLA_GRE_MAX + 1] = {
[IFLA_GRE_LINK] = { .type = NLA_U32 },
[IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
@@ -588,11 +1260,28 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+};
+
+static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
+ .kind = "gre",
+ .maxtype = RPL_IFLA_GRE_MAX,
+ .policy = ipgre_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = ipgre_tunnel_setup,
+ .validate = ipgre_tunnel_validate,
+ .newlink = ipgre_newlink,
+ .dellink = ip_tunnel_dellink,
+ .get_size = ipgre_get_size,
+ .fill_info = ipgre_fill_info,
+#ifdef HAVE_GET_LINK_NET
+ .get_link_net = ip_tunnel_get_link_net,
+#endif
};
static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
.kind = "ovs_gretap",
- .maxtype = IFLA_GRE_MAX,
+ .maxtype = RPL_IFLA_GRE_MAX,
.policy = ipgre_policy,
.priv_size = sizeof(struct ip_tunnel),
.setup = ipgre_tap_setup,
@@ -606,6 +1295,22 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
#endif
};
+static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+ .kind = "erspan",
+ .maxtype = RPL_IFLA_GRE_MAX,
+ .policy = ipgre_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = erspan_setup,
+ .validate = erspan_validate,
+ .newlink = ipgre_newlink,
+ .dellink = ip_tunnel_dellink,
+ .get_size = ipgre_get_size,
+ .fill_info = ipgre_fill_info,
+#ifdef HAVE_GET_LINK_NET
+ .get_link_net = ip_tunnel_get_link_net,
+#endif
+};
+
struct net_device *rpl_gretap_fb_dev_create(struct net *net, const char *name,
u8 name_assign_type)
{
@@ -646,6 +1351,26 @@ out:
}
EXPORT_SYMBOL_GPL(rpl_gretap_fb_dev_create);
+static int __net_init erspan_init_net(struct net *net)
+{
+ return ip_tunnel_init_net(net, erspan_net_id,
+ &erspan_link_ops, NULL);
+}
+
+static void __net_exit erspan_exit_net(struct net *net)
+{
+ struct ip_tunnel_net *itn = net_generic(net, erspan_net_id);
+
+ ip_tunnel_delete_net(itn, &erspan_link_ops);
+}
+
+static struct pernet_operations erspan_net_ops = {
+ .init = erspan_init_net,
+ .exit = erspan_exit_net,
+ .id = &erspan_net_id,
+ .size = sizeof(struct ip_tunnel_net),
+};
+
static int __net_init ipgre_tap_init_net(struct net *net)
{
return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
@@ -665,6 +1390,158 @@ static struct pernet_operations ipgre_tap_net_ops = {
.size = sizeof(struct ip_tunnel_net),
};
+static struct net_device *erspan_fb_dev_create(struct net *net,
+ const char *name,
+ u8 name_assign_type)
+{
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net_device *dev;
+ LIST_HEAD(list_kill);
+ struct ip_tunnel *t;
+ int err;
+
+ memset(&tb, 0, sizeof(tb));
+
+ dev = rtnl_create_link(net, (char *)name, name_assign_type,
+ &erspan_link_ops, tb);
+ if (IS_ERR(dev))
+ return dev;
+
+ t = netdev_priv(dev);
+ t->collect_md = true;
+ /* Configure flow based GRE device. */
+ err = ipgre_newlink(net, dev, tb, NULL);
+ if (err < 0) {
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto out;
+
+ return dev;
+out:
+ ip_tunnel_dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ return ERR_PTR(err);
+}
+
+static struct vport_ops ovs_erspan_vport_ops;
+
+static struct vport *erspan_tnl_create(const struct vport_parms *parms)
+{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct net_device *dev;
+ struct vport *vport;
+ int err;
+
+ vport = ovs_vport_alloc(0, &ovs_erspan_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ rtnl_lock();
+ dev = erspan_fb_dev_create(net, parms->name, NET_NAME_USER);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ return ERR_CAST(dev);
+ }
+
+ err = dev_change_flags(dev, dev->flags | IFF_UP);
+ if (err < 0) {
+ rtnl_delete_link(dev);
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ return ERR_PTR(err);
+ }
+
+ rtnl_unlock();
+ return vport;
+}
+
+static struct vport *erspan_create(const struct vport_parms *parms)
+{
+ struct vport *vport;
+
+ vport = erspan_tnl_create(parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ return ovs_netdev_link(vport, parms->name);
+}
+
+#ifndef OVS_VPORT_TYPE_ERSPAN
+/* Until integration is done... */
+#define OVS_VPORT_TYPE_ERSPAN 107 /* ERSPAN tunnel. */
+#endif
+static struct vport_ops ovs_erspan_vport_ops = {
+ .type = OVS_VPORT_TYPE_ERSPAN,
+ .create = erspan_create,
+ .send = __erspan_fb_xmit,
+#ifndef USE_UPSTREAM_TUNNEL
+ .fill_metadata_dst = gre_fill_metadata_dst,
+#endif
+ .destroy = ovs_netdev_tunnel_destroy,
+};
+
+static struct vport_ops ovs_ipgre_vport_ops;
+
+static struct vport *ipgre_tnl_create(const struct vport_parms *parms)
+{
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct net_device *dev;
+ struct vport *vport;
+ int err;
+
+ vport = ovs_vport_alloc(0, &ovs_ipgre_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ rtnl_lock();
+ dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ return ERR_CAST(dev);
+ }
+
+ err = dev_change_flags(dev, dev->flags | IFF_UP);
+ if (err < 0) {
+ rtnl_delete_link(dev);
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ return ERR_PTR(err);
+ }
+
+ rtnl_unlock();
+ return vport;
+}
+
+static struct vport *ipgre_create(const struct vport_parms *parms)
+{
+ struct vport *vport;
+
+ vport = ipgre_tnl_create(parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ return ovs_netdev_link(vport, parms->name);
+}
+
+static struct vport_ops ovs_ipgre_vport_ops = {
+ .type = OVS_VPORT_TYPE_GRE,
+ .create = ipgre_create,
+ .send = gre_fb_xmit,
+#ifndef USE_UPSTREAM_TUNNEL
+ .fill_metadata_dst = gre_fill_metadata_dst,
+#endif
+ .destroy = ovs_netdev_tunnel_destroy,
+};
+
int rpl_ipgre_init(void)
{
int err;
@@ -673,22 +1550,31 @@ int rpl_ipgre_init(void)
if (err < 0)
goto pnet_tap_faied;
+ err = register_pernet_device(&erspan_net_ops);
+ if (err < 0)
+ goto pnet_erspan_failed;
+
+ err = register_pernet_device(&ipgre_net_ops);
+ if (err < 0)
+ goto pnet_ipgre_failed;
+
err = gre_cisco_register(&ipgre_protocol);
if (err < 0) {
pr_info("%s: can't add protocol\n", __func__);
goto add_proto_failed;
}
- err = rtnl_link_register(&ipgre_tap_ops);
- if (err < 0)
- goto tap_ops_failed;
-
pr_info("GRE over IPv4 tunneling driver\n");
+
+ ovs_vport_ops_register(&ovs_ipgre_vport_ops);
+ ovs_vport_ops_register(&ovs_erspan_vport_ops);
return 0;
-tap_ops_failed:
- gre_cisco_unregister(&ipgre_protocol);
add_proto_failed:
+ unregister_pernet_device(&ipgre_net_ops);
+pnet_ipgre_failed:
+ unregister_pernet_device(&erspan_net_ops);
+pnet_erspan_failed:
unregister_pernet_device(&ipgre_tap_net_ops);
pnet_tap_faied:
pr_err("Error while initializing GRE %d\n", err);
@@ -697,8 +1583,11 @@ pnet_tap_faied:
void rpl_ipgre_fini(void)
{
- rtnl_link_unregister(&ipgre_tap_ops);
+ ovs_vport_ops_unregister(&ovs_erspan_vport_ops);
+ ovs_vport_ops_unregister(&ovs_ipgre_vport_ops);
gre_cisco_unregister(&ipgre_protocol);
+ unregister_pernet_device(&ipgre_net_ops);
+ unregister_pernet_device(&erspan_net_ops);
unregister_pernet_device(&ipgre_tap_net_ops);
}
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 Nicira, Inc.
+ * Copyright (c) 2013,2018 Nicira, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -18,7 +18,6 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/kconfig.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/skbuff.h>
@@ -52,7 +51,6 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
-#include <net/udp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -63,18 +61,107 @@
#include "compat.h"
#ifndef USE_UPSTREAM_TUNNEL
+const struct ip_tunnel_encap_ops __rcu *
+ rpl_iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
+
+static unsigned int rpl_ip_tunnel_hash(__be32 key, __be32 remote)
+{
+ return hash_32((__force u32)key ^ (__force u32)remote,
+ IP_TNL_HASH_BITS);
+}
+
+static bool rpl_ip_tunnel_key_match(const struct ip_tunnel_parm *p,
+ __be16 flags, __be32 key)
+{
+ if (p->i_flags & TUNNEL_KEY) {
+ if (flags & TUNNEL_KEY)
+ return key == p->i_key;
+ else
+ /* key expected, none present */
+ return false;
+ } else
+ return !(flags & TUNNEL_KEY);
+}
+
+static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
+ struct ip_tunnel_parm *parms)
+{
+ unsigned int h;
+ __be32 remote;
+ __be32 i_key = parms->i_key;
+
+ if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
+ remote = parms->iph.daddr;
+ else
+ remote = 0;
+
+ if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+ i_key = 0;
+
+ h = rpl_ip_tunnel_hash(i_key, remote);
+ return &itn->tunnels[h];
+}
+
static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{
+ struct hlist_head *head = ip_bucket(itn, &t->parms);
+
if (t->collect_md)
rcu_assign_pointer(itn->collect_md_tun, t);
- else
- WARN_ONCE(1, "%s: collect md not set\n", t->dev->name);
+ hlist_add_head_rcu(&t->hash_node, head);
}
static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{
if (t->collect_md)
rcu_assign_pointer(itn->collect_md_tun, NULL);
+ hlist_del_init_rcu(&t->hash_node);
+}
+
+static struct net_device *__ip_tunnel_create(struct net *net,
+ const struct rtnl_link_ops *ops,
+ struct ip_tunnel_parm *parms)
+{
+ int err;
+ struct ip_tunnel *tunnel;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+
+ if (parms->name[0])
+ strlcpy(name, parms->name, IFNAMSIZ);
+ else {
+ if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
+ err = -E2BIG;
+ goto failed;
+ }
+ strlcpy(name, ops->kind, IFNAMSIZ);
+ strncat(name, "%d", 2);
+ }
+
+ ASSERT_RTNL();
+ dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
+ if (!dev) {
+ err = -ENOMEM;
+ goto failed;
+ }
+ dev_net_set(dev, net);
+
+ dev->rtnl_link_ops = ops;
+
+ tunnel = netdev_priv(dev);
+ tunnel->parms = *parms;
+ tunnel->net = net;
+
+ err = register_netdevice(dev);
+ if (err)
+ goto failed_free;
+
+ return dev;
+
+failed_free:
+ free_netdev(dev);
+failed:
+ return ERR_PTR(err);
}
static inline void init_tunnel_flow(struct flowi4 *fl4,
@@ -118,6 +205,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
}
if (dev->type != ARPHRD_ETHER)
dev->flags |= IFF_POINTOPOINT;
+
+ dst_cache_reset(&tunnel->dst_cache);
}
if (!tdev && tunnel->parms.link)
@@ -162,6 +251,222 @@ int rpl_ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
return rpl___ip_tunnel_change_mtu(dev, new_mtu, true);
}
+static int rpl_tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+ struct rtable *rt, __be16 df,
+ const struct iphdr *inner_iph)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+ int mtu;
+
+ if (df)
+ mtu = dst_mtu(&rt->dst) - dev->hard_header_len
+ - sizeof(struct iphdr) - tunnel->hlen;
+ else
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ if (!skb_is_gso(skb) &&
+ (inner_iph->frag_off & htons(IP_DF)) &&
+ mtu < pkt_size) {
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ return -E2BIG;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+
+ if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
+ mtu >= IPV6_MIN_MTU) {
+ if ((tunnel->parms.iph.daddr &&
+ !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+ rt6->rt6i_dst.plen == 128) {
+ rt6->rt6i_flags |= RTF_MODIFIED;
+ dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
+ }
+ }
+
+ if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
+ mtu < pkt_size) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ return -E2BIG;
+ }
+ }
+#endif
+ return 0;
+}
+
+void rpl_ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ const struct iphdr *tnl_params, const u8 protocol)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct iphdr *inner_iph;
+ struct flowi4 fl4;
+ u8 tos, ttl;
+ __be16 df;
+ struct rtable *rt; /* Route to the other host */
+ unsigned int max_headroom; /* The extra header space needed */
+ __be32 dst;
+ bool connected;
+
+ inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
+ connected = (tunnel->parms.iph.daddr != 0);
+
+ dst = tnl_params->daddr;
+ if (dst == 0) {
+ /* NBMA tunnel */
+
+ if (skb_dst(skb) == NULL) {
+ dev->stats.tx_fifo_errors++;
+ goto tx_error;
+ }
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ rt = skb_rtable(skb);
+ dst = rt_nexthop(rt, inner_iph->daddr);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ const struct in6_addr *addr6;
+ struct neighbour *neigh;
+ bool do_tx_error_icmp;
+ int addr_type;
+
+ neigh = dst_neigh_lookup(skb_dst(skb),
+ &ipv6_hdr(skb)->daddr);
+ if (neigh == NULL)
+ goto tx_error;
+
+ addr6 = (const struct in6_addr *)&neigh->primary_key;
+ addr_type = ipv6_addr_type(addr6);
+
+ if (addr_type == IPV6_ADDR_ANY) {
+ addr6 = &ipv6_hdr(skb)->daddr;
+ addr_type = ipv6_addr_type(addr6);
+ }
+
+ if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
+ do_tx_error_icmp = true;
+ else {
+ do_tx_error_icmp = false;
+ dst = addr6->s6_addr32[3];
+ }
+ neigh_release(neigh);
+ if (do_tx_error_icmp)
+ goto tx_error_icmp;
+ }
+#endif
+ else
+ goto tx_error;
+
+ connected = false;
+ }
+
+ tos = tnl_params->tos;
+ if (tos & 0x1) {
+ tos &= ~0x1;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ tos = inner_iph->tos;
+ connected = false;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
+ connected = false;
+ }
+ }
+
+ init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
+ tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
+
+ if (ovs_ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+ goto tx_error;
+
+ rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
+ NULL;
+
+ if (!rt) {
+ rt = ip_route_output_key(tunnel->net, &fl4);
+
+ if (IS_ERR(rt)) {
+ dev->stats.tx_carrier_errors++;
+ goto tx_error;
+ }
+ if (connected)
+ dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ fl4.saddr);
+ }
+
+ if (rt->dst.dev == dev) {
+ ip_rt_put(rt);
+ dev->stats.collisions++;
+ goto tx_error;
+ }
+
+ if (rpl_tnl_update_pmtu(dev, skb, rt,
+ tnl_params->frag_off, inner_iph)) {
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+
+ if (tunnel->err_count > 0) {
+ if (time_before(jiffies,
+ tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
+ tunnel->err_count--;
+
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ dst_link_failure(skb);
+ } else
+ tunnel->err_count = 0;
+ }
+
+ tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
+ ttl = tnl_params->ttl;
+ if (ttl == 0) {
+ if (skb->protocol == htons(ETH_P_IP))
+ ttl = inner_iph->ttl;
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
+#endif
+ else
+ ttl = ip4_dst_hoplimit(&rt->dst);
+ }
+
+ df = tnl_params->frag_off;
+ if (skb->protocol == htons(ETH_P_IP))
+ df |= (inner_iph->frag_off&htons(IP_DF));
+
+ max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ + rt->dst.header_len;
+ if (max_headroom > dev->needed_headroom)
+ dev->needed_headroom = max_headroom;
+
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ ip_rt_put(rt);
+ dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ return;
+ }
+
+ iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
+ tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+
+ return;
+
+#if IS_ENABLED(CONFIG_IPV6)
+tx_error_icmp:
+ dst_link_failure(skb);
+#endif
+tx_error:
+ dev->stats.tx_errors++;
+ kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(rpl_ip_tunnel_xmit);
+
static void ip_tunnel_dev_free(struct net_device *dev)
{
free_percpu(dev->tstats);
@@ -183,24 +488,63 @@ int rpl_ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname)
{
struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
+ struct ip_tunnel_parm parms;
+ unsigned int i;
- itn->collect_md_tun = NULL;
- itn->rtnl_ops = ops;
- return 0;
+ for (i = 0; i < IP_TNL_HASH_SIZE; i++)
+ INIT_HLIST_HEAD(&itn->tunnels[i]);
+
+ if (!ops) {
+ itn->fb_tunnel_dev = NULL;
+ return 0;
+ }
+
+ memset(&parms, 0, sizeof(parms));
+ if (devname)
+ strlcpy(parms.name, devname, IFNAMSIZ);
+
+ rtnl_lock();
+ itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
+ /* FB netdevice is special: we have one, and only one per netns.
+ * * Allowing to move it to another netns is clearly unsafe.
+ * */
+ if (!IS_ERR(itn->fb_tunnel_dev)) {
+ itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
+ ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
+ }
+ rtnl_unlock();
+
+ return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
}
static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
struct rtnl_link_ops *ops)
{
- struct ip_tunnel *t;
-
- t = rtnl_dereference(itn->collect_md_tun);
- if (!t)
- return;
- unregister_netdevice_queue(t->dev, head);
+ struct net *net = dev_net(itn->fb_tunnel_dev);
+ struct net_device *dev, *aux;
+ int h;
+
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == ops)
+ unregister_netdevice_queue(dev, head);
+
+ for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+ struct hlist_node *n;
+ struct hlist_head *thead = &itn->tunnels[h];
+
+ hlist_for_each_entry_safe(t, n, thead, hash_node)
+ /* If dev is in the same netns, it has already
+ * been added to the list by the previous loop.
+ */
+ if (!net_eq(dev_net(t->dev), net))
+ unregister_netdevice_queue(t->dev, head);
+ }
}
-void rpl_ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
+void rpl_ip_tunnel_delete_net(struct ip_tunnel_net *itn,
+ struct rtnl_link_ops *ops)
{
LIST_HEAD(list);
@@ -251,20 +595,41 @@ int rpl_ip_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
+ int err;
- dev->destructor = ip_tunnel_dev_free;
- dev->tstats = (typeof(dev->tstats)) netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+#ifndef HAVE_NEEDS_FREE_NETDEV
+ dev->destructor = ip_tunnel_dev_free;
+#else
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip_tunnel_dev_free;
+#endif
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
if (!dev->tstats)
return -ENOMEM;
+
+ err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (err) {
+ free_percpu(dev->tstats);
+ return err;
+ }
+
+ err = gro_cells_init(&tunnel->gro_cells, dev);
+ if (err) {
+ dst_cache_destroy(&tunnel->dst_cache);
+ free_percpu(dev->tstats);
+ return err;
+ }
+
tunnel->dev = dev;
tunnel->net = dev_net(dev);
strcpy(tunnel->parms.name, dev->name);
iph->version = 4;
iph->ihl = 5;
- if (tunnel->collect_md)
+ if (tunnel->collect_md) {
dev->features |= NETIF_F_NETNS_LOCAL;
-
+ netif_keep_dst(dev);
+ }
return 0;
}
@@ -300,4 +665,94 @@ struct net *rpl_ip_tunnel_get_link_net(const struct net_device *dev)
return tunnel->net;
}
+struct ip_tunnel *rpl_ip_tunnel_lookup(struct ip_tunnel_net *itn,
+ int link, __be16 flags,
+ __be32 remote, __be32 local,
+ __be32 key)
+{
+ unsigned int hash;
+ struct ip_tunnel *t, *cand = NULL;
+ struct hlist_head *head;
+
+ hash = rpl_ip_tunnel_hash(key, remote);
+ head = &itn->tunnels[hash];
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if (local != t->parms.iph.saddr ||
+ remote != t->parms.iph.daddr ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else
+ cand = t;
+ }
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if (remote != t->parms.iph.daddr ||
+ t->parms.iph.saddr != 0 ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else if (!cand)
+ cand = t;
+ }
+
+ hash = rpl_ip_tunnel_hash(key, 0);
+ head = &itn->tunnels[hash];
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
+ (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
+ continue;
+
+ if (!(t->dev->flags & IFF_UP))
+ continue;
+
+ if (!rpl_ip_tunnel_key_match(&t->parms, flags, key))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else if (!cand)
+ cand = t;
+ }
+
+ if (flags & TUNNEL_NO_KEY)
+ goto skip_key_lookup;
+
+ hlist_for_each_entry_rcu(t, head, hash_node) {
+ if (t->parms.i_key != key ||
+ t->parms.iph.saddr != 0 ||
+ t->parms.iph.daddr != 0 ||
+ !(t->dev->flags & IFF_UP))
+ continue;
+
+ if (t->parms.link == link)
+ return t;
+ else if (!cand)
+ cand = t;
+ }
+
+skip_key_lookup:
+ if (cand)
+ return cand;
+
+ if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
+ return netdev_priv(itn->fb_tunnel_dev);
+
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(rpl_ip_tunnel_lookup);
+
#endif
@@ -129,6 +129,47 @@ error:
}
EXPORT_SYMBOL_GPL(ovs_iptunnel_handle_offloads);
+struct sk_buff *rpl_iptunnel_handle_offloads(struct sk_buff *skb,
+ bool csum_help,
+ int gso_type_mask)
+{
+ int err;
+
+ if (likely(!skb->encapsulation)) {
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
+
+ if (skb_is_gso(skb)) {
+ err = skb_unclone(skb, GFP_ATOMIC);
+ if (unlikely(err))
+ goto error;
+ skb_shinfo(skb)->gso_type |= gso_type_mask;
+ return skb;
+ }
+
+ /* If packet is not gso and we are resolving any partial checksum,
+ * clear encapsulation flag. This allows setting CHECKSUM_PARTIAL
+ * on the outer header without confusing devices that implement
+ * NETIF_F_IP_CSUM with encapsulation.
+ */
+ if (csum_help)
+ skb->encapsulation = 0;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL && csum_help) {
+ err = skb_checksum_help(skb);
+ if (unlikely(err))
+ goto error;
+ } else if (skb->ip_summed != CHECKSUM_PARTIAL)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ return skb;
+error:
+ kfree_skb(skb);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(rpl_iptunnel_handle_offloads);
+
int rpl___iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
__be16 inner_proto, bool raw_proto, bool xnet)
{