Message ID | CAGsizzJTbGGpwrn-P=SUtqBW=1AN6hCTVXKp_oKHPBAoGZ4ubA@mail.gmail.com |
---|---|
State | Rejected, archived |
Delegated to: | David Miller |
Headers | show |
Le lundi 16 janvier 2012 à 13:13 +0100, Štefan Gula a écrit : > From: Stefan Gula <steweg@gmail.com > > This patch is an extension for current Ethernet over GRE > implementation, which allows user to create virtual bridge (multipoint > VPN) and forward traffic based on Ethernet MAC address informations in > it. It simulates the Bridge bahaviour learing mechanism, but instead > of learning port ID from which given MAC address comes, it learns IP > address of peer which encapsulated given packet. Multicast, Broadcast > and unknown-multicast traffic is send over network as multicast > enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be > represented as one single virtual switch on logical level and be also > represented as one multicast IPv4 address on network level. > > Signed-off-by: Stefan Gula <steweg@gmail.com> > > --- > > Patch was tested for more than one year in real network infrastructure > consisting of 98 Linux based access-points > OK, but please always send new patches on top on net-next tree. > diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-my/Documentation/dontdiff > --- linux-3.2.1-orig/Documentation/dontdiff 2012-01-16 12:32:18.000000000 +0100 > +++ linux-3.2.1-my/Documentation/dontdiff 2012-01-12 20:42:45.000000000 +0100 > @@ -260,5 +260,3 @@ wakeup.lds > zImage* > zconf.hash.c > zoffset.h > -mkpiggy > -mdp Hmm, what is this ? > diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h > --- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100 > +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100 > @@ -27,6 +27,14 @@ struct ip_tunnel { > __u32 o_seqno; /* The last output seqno */ > int hlen; /* Precalculated GRE header length */ > int mlink; > +#ifdef CONFIG_NET_IPGRE_BRIDGE > +#define GRETAP_BR_HASH_BITS 8 > +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS) > + struct hlist_head hash[GRETAP_BR_HASH_SIZE]; > + spinlock_t hash_lock; > + unsigned long ageing_time; > + struct timer_list gc_timer; > +#endif > > struct ip_tunnel_parm parms; > > diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig > --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100 > +++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100 > @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST > Network), but can be distributed all over the Internet. If you want > to do that, say Y here and to "IP multicast routing" below. > > +config NET_IPGRE_BRIDGE > + bool "IP: Ethernet over multipoint GRE over IP" > + depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST > + help > + Allows you to use multipoint GRE VPN as virtual switch and interconnect > + several L2 endpoints over L3 routed infrastructure. It is useful for > + creating multipoint L2 VPNs which can be later used inside bridge > + interfaces If you want to use. GRE multipoint L2 VPN feature say Y. > + > config IP_MROUTE > bool "IP: multicast routing" > depends on IP_MULTICAST > diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff > linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c > --- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100 > +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 12:29:03.000000000 +0100 > @@ -52,6 +52,11 @@ > #include <net/ip6_route.h> > #endif > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > +#include <linux/jhash.h> > +#include <asm/unaligned.h> > +#endif > + > /* > Problems & solutions > -------------------- > @@ -134,6 +139,199 @@ struct ipgre_net { > struct net_device *fb_tunnel_dev; > }; > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + /* > + * This part of code includes codes to enable L2 ethernet > + * switch virtualization over IP routed infrastructure with > + * utilization of multicast capable endpoint using Ethernet > + * over GRE > + * > + * Author: Stefan Gula > + * Signed-off-by: Stefan Gula <steweg@gmail.com> > + */ > +struct mac_addr { > + unsigned char addr[6]; Not sure if you need a 'struct mac_addr' for this... unsigned char mac_addr[ETH_ALEN] ? > +}; > + > +struct ipgre_tap_bridge_entry { > + struct hlist_node hlist; > + u32 raddr; > + struct mac_addr addr; > + struct net_device *dev; > + struct rcu_head rcu; > + atomic_t use_count; > + unsigned long ageing_timer; > +}; > + > +static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly; > +static u32 ipgre_salt __read_mostly; > + > +static inline int ipgre_tap_bridge_hash(const unsigned char *mac) > +{ > + u32 key = get_unaligned((u32 *)(mac + 2)); > + return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1); > +} > + > +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel, > + const struct ipgre_tap_bridge_entry *entry) > +{ > + return time_before_eq(entry->ageing_timer + tunnel->ageing_time, > + jiffies); > +} > + > +static void ipgre_tap_bridge_rcu_free(struct rcu_head *head) > +{ > + struct ipgre_tap_bridge_entry *entry > + = container_of(head, struct ipgre_tap_bridge_entry, rcu); > + kmem_cache_free(ipgre_tap_bridge_cache, entry); > +} > + > +void ipgre_tap_bridge_put(struct ipgre_tap_bridge_entry *entry) > +{ > + if (atomic_dec_and_test(&entry->use_count)) > + call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free); You can now use kfree_rcu() and get rid of ipgre_tap_bridge_rcu_free() > +} > + > +static inline void ipgre_tap_bridge_delete(struct > ipgre_tap_bridge_entry *entry) > +{ > + hlist_del_rcu(&entry->hlist); > + ipgre_tap_bridge_put(entry); > +} > + > +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create( > + struct hlist_head *head, > + u32 source, > + const unsigned char *addr, struct net_device *dev) > +{ > + struct ipgre_tap_bridge_entry *entry; > + entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC); > + if (entry) { > + memcpy(entry->addr.addr, addr, ETH_ALEN); > + atomic_set(&entry->use_count, 1); > + hlist_add_head_rcu(&entry->hlist, head); > + entry->raddr = source; > + entry->ageing_timer = jiffies; > + entry->dev = dev; > + } > + return entry; > +} > + > +struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(struct ip_tunnel *tunnel, > + const unsigned char *addr) > +{ > + struct hlist_node *h; > + struct ipgre_tap_bridge_entry *entry; > + hlist_for_each_entry_rcu(entry, h, > + &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist) > + { > + if (!compare_ether_addr(entry->addr.addr, addr)) { > + if (unlikely(ipgre_tap_bridge_has_expired(tunnel, > + entry))) > + break; > + return entry; > + } > + } > + > + return NULL; > +} > + > +struct ipgre_tap_bridge_entry *ipgre_tap_bridge_get(struct ip_tunnel *tunnel, > + const unsigned char *addr) > +{ > + struct ipgre_tap_bridge_entry *entry; > + rcu_read_lock(); > + entry = __ipgre_tap_bridge_get(tunnel, addr); > + if (entry && !atomic_inc_not_zero(&entry->use_count)) > + entry = NULL; > + rcu_read_unlock(); > + return entry; > +} > + Unfortunately you call ipgre_tap_bridge_get() from ipgre_tap_bridge_get_raddr() but you dont release the refcount on use_count. Leak in ipgre_tunnel_xmit() > +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, > + const unsigned char *addr) > +{ > + struct ipgre_tap_bridge_entry *entry; > + entry = ipgre_tap_bridge_get(tunnel, addr); So maybe you want __ipgre_tap_bridge_get(tunnel, addr); here ? > + if (entry == NULL) > + return 0; > + else > + return entry->raddr; > +} > + > + > +static inline struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find( > + struct hlist_head *head, > + const unsigned char *addr) > +{ > + struct hlist_node *h; > + struct ipgre_tap_bridge_entry *entry; > + hlist_for_each_entry_rcu(entry, h, head, hlist) { > + if (!compare_ether_addr(entry->addr.addr, addr)) > + return entry; > + } > + return NULL; > +} > + > +int ipgre_tap_bridge_init(void) > +{ > + ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache", > + sizeof(struct ipgre_tap_bridge_entry), > + 0, > + SLAB_HWCACHE_ALIGN, NULL); > + if (!ipgre_tap_bridge_cache) > + return -ENOMEM; > + get_random_bytes(&ipgre_salt, sizeof(ipgre_salt)); > + return 0; > +} > + > +void ipgre_tap_bridge_fini(void) > +{ > + kmem_cache_destroy(ipgre_tap_bridge_cache); > +} > + > +void ipgre_tap_bridge_cleanup(unsigned long _data) > +{ > + struct ip_tunnel *tunnel = (struct ip_tunnel *)_data; > + unsigned long delay = tunnel->ageing_time; > + unsigned long next_timer = jiffies + tunnel->ageing_time; > + int i; > + spin_lock_bh(&tunnel->hash_lock); Since you are called from timer, no need to use _bh() variant. spin_lock(&tunnel->hash_lock); > + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { > + struct ipgre_tap_bridge_entry *entry; > + struct hlist_node *h, *n; > + hlist_for_each_entry_safe(entry, h, n, > + &tunnel->hash[i], hlist) > + { > + unsigned long this_timer; > + this_timer = entry->ageing_timer + delay; > + if (time_before_eq(this_timer, jiffies)) > + ipgre_tap_bridge_delete(entry); > + else if (time_before(this_timer, next_timer)) > + next_timer = this_timer; > + } > + } > + spin_unlock_bh(&tunnel->hash_lock); > + mod_timer(&tunnel->gc_timer, round_jiffies(next_timer + HZ/4)); wow... why setup a 250 ms timer, if entries are valid 300 seconds ? > +} > + > +void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel) > +{ > + int i; > + spin_lock_bh(&tunnel->hash_lock); > + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { > + struct ipgre_tap_bridge_entry *entry; > + struct hlist_node *h, *n; > + hlist_for_each_entry_safe(entry, h, n, > + &tunnel->hash[i], hlist) > + { > + ipgre_tap_bridge_delete(entry); > + } > + } > + spin_unlock_bh(&tunnel->hash_lock); > +} > + > +#endif > + > /* Tunnel hash table */ > > /* > @@ -563,6 +761,13 @@ static int ipgre_rcv(struct sk_buff *skb > int offset = 4; > __be16 gre_proto; > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + u32 orig_source; > + struct hlist_head *head; > + struct ipgre_tap_bridge_entry *entry; > + struct ethhdr *tethhdr; > +#endif > + > if (!pskb_may_pull(skb, 16)) > goto drop_nolock; > > @@ -659,10 +864,38 @@ static int ipgre_rcv(struct sk_buff *skb > tunnel->dev->stats.rx_errors++; > goto drop; > } > - > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + orig_source = iph->saddr; > +#endif > iph = ip_hdr(skb); > skb->protocol = eth_type_trans(skb, tunnel->dev); > skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + if (ipv4_is_multicast(tunnel->parms.iph.daddr)) { > + tethhdr = eth_hdr(skb); > + if ((tethhdr->h_source[0]&1) == 0) { if (!is_multicast_ether_addr(tethhdr->h_source)) ? > + head = &tunnel->hash[ > + ipgre_tap_bridge_hash( > + tethhdr->h_source)]; > + entry = ipgre_tap_bridge_find(head, > + tethhdr->h_source); > + if (likely(entry)) { > + entry->raddr = orig_source; > + entry->ageing_timer = jiffies; > + } else { > + spin_lock(&tunnel->hash_lock); > + if (!ipgre_tap_bridge_find(head, > + tethhdr->h_source)) > + ipgre_tap_bridge_create( > + head, > + orig_source, > + tethhdr->h_source, > + tunnel->dev); > + spin_unlock(&tunnel->hash_lock); > + } > + } > + } > +#endif > } > > tstats = this_cpu_ptr(tunnel->dev->tstats); > @@ -716,7 +949,17 @@ static netdev_tx_t ipgre_tunnel_xmit(str > tiph = &tunnel->parms.iph; > } > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + if ((dev->type == ARPHRD_ETHER) && ipv4_is_multicast( please format like this : if ((dev->type == ARPHRD_ETHER) && ipv4_is_multicast(tunnel->parms.iph.daddr)) > + tunnel->parms.iph.daddr)) > + dst = ipgre_tap_bridge_get_raddr(tunnel, > + ((struct ethhdr *)skb->data)->h_dest); > + if (dst == 0) > + dst = tiph->daddr; > + if (dst == 0) { > +#else > if ((dst = tiph->daddr) == 0) { > +#endif > /* NBMA tunnel */ > > if (skb_dst(skb) == NULL) { > @@ -1209,6 +1452,16 @@ static int ipgre_open(struct net_device > return -EADDRNOTAVAIL; > t->mlink = dev->ifindex; > ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + if (t->dev->type == ARPHRD_ETHER) { > + INIT_HLIST_HEAD(t->hash); > + spin_lock_init(&t->hash_lock); > + t->ageing_time = 300 * HZ; > + setup_timer(&t->gc_timer, ipgre_tap_bridge_cleanup, > + (unsigned long) t); > + mod_timer(&t->gc_timer, jiffies + t->ageing_time); > + } > +#endif > } > return 0; > } > @@ -1219,6 +1472,12 @@ static int ipgre_close(struct net_device > > if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { > struct in_device *in_dev; > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + if (t->dev->type == ARPHRD_ETHER) { > + ipgre_tap_bridge_flush(t); > + del_timer_sync(&t->gc_timer); > + } > +#endif > in_dev = inetdev_by_index(dev_net(dev), t->mlink); > if (in_dev) > ip_mc_dec_group(in_dev, t->parms.iph.daddr); > @@ -1341,6 +1600,12 @@ static int __net_init ipgre_init_net(str > struct ipgre_net *ign = net_generic(net, ipgre_net_id); > int err; > > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + err = ipgre_tap_bridge_init(); > + if (err) > + goto err_out; > +#endif > + > ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", > ipgre_tunnel_setup); > if (!ign->fb_tunnel_dev) { > @@ -1362,6 +1627,10 @@ static int __net_init ipgre_init_net(str > err_reg_dev: > ipgre_dev_free(ign->fb_tunnel_dev); > err_alloc_dev: > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + ipgre_tap_bridge_fini(); > +err_out: > +#endif > return err; > } > > @@ -1375,6 +1644,9 @@ static void __net_exit ipgre_exit_net(st > ipgre_destroy_tunnels(ign, &list); > unregister_netdevice_many(&list); > rtnl_unlock(); > +#ifdef CONFIG_NET_IPGRE_BRIDGE > + ipgre_tap_bridge_fini(); > +#endif > } > > static struct pernet_operations ipgre_net_ops = { > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> > + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { > > + struct ipgre_tap_bridge_entry *entry; > > + struct hlist_node *h, *n; > > + hlist_for_each_entry_safe(entry, h, n, > > + &tunnel->hash[i], hlist) > > + { > > + unsigned long this_timer; > > + this_timer = entry->ageing_timer + delay; > > + if (time_before_eq(this_timer, jiffies)) > > + ipgre_tap_bridge_delete(entry); > > + else if (time_before(this_timer, next_timer)) > > + next_timer = this_timer; > > + } > > + } > > + spin_unlock_bh(&tunnel->hash_lock); > > + mod_timer(&tunnel->gc_timer, round_jiffies(next_timer + HZ/4)); > > wow... why setup a 250 ms timer, if entries are valid 300 seconds ? Isn't that code trying to wakeup 250ms after the first expiry of any of its items? Do you even need a timer at all? It may be enough to just put a timestamp into each entry and tidy up when scanning one of the hash chains in (say) the 'add item' path - when you need write access anyway. A hash table might not be the best structure either! The hash lookup is still o(n) for n >> table_size. It also may be likely that you'll do repeated lookups for a small number of items - in which case using a hash to cache recent lookups might be useful (maybe with some type of balanced tree structure) David -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Jan 16, 2012 at 01:13:19PM +0100, Štefan Gula wrote:
> it learns IP address of peer which encapsulated given packet.
This feature is already present in the Linux kernel, albeit only for IP
to IP lookup. Please look at "userspace ARPd" and its relation to
multipoint GRE. OpenNHRP is the associated userspace part.
That code reuses the existing neighbor management infrastructure. Adding
support for gretap and automatic learning from incoming packets
shouldn't be too hard...
-David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/1/16 David Laight <David.Laight@aculab.com>: >> > + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { >> > + struct ipgre_tap_bridge_entry *entry; >> > + struct hlist_node *h, *n; >> > + hlist_for_each_entry_safe(entry, h, n, >> > + &tunnel->hash[i], hlist) >> > + { >> > + unsigned long this_timer; >> > + this_timer = entry->ageing_timer + delay; >> > + if (time_before_eq(this_timer, jiffies)) >> > + ipgre_tap_bridge_delete(entry); >> > + else if (time_before(this_timer, next_timer)) >> > + next_timer = this_timer; >> > + } >> > + } >> > + spin_unlock_bh(&tunnel->hash_lock); >> > + mod_timer(&tunnel->gc_timer, round_jiffies(next_timer + HZ/4)); >> >> wow... why setup a 250 ms timer, if entries are valid 300 seconds ? > > Isn't that code trying to wakeup 250ms after the first expiry of any > of its items? > > Do you even need a timer at all? > > It may be enough to just put a timestamp into each entry > and tidy up when scanning one of the hash chains in (say) > the 'add item' path - when you need write access anyway. > > A hash table might not be the best structure either! > The hash lookup is still o(n) for n >> table_size. > It also may be likely that you'll do repeated lookups for a small > number of items - in which case using a hash to cache recent lookups > might be useful (maybe with some type of balanced tree structure) > > David > > That part of code is modified from original linux bridge code, as I wanted to avoid of developing something that was already developed. The timer is actually needed to network
Dňa 16. januára 2012 14:19, Eric Dumazet <eric.dumazet@gmail.com> napísal/a: > Le lundi 16 janvier 2012 à 13:13 +0100, Štefan Gula a écrit : >> From: Stefan Gula <steweg@gmail.com >> >> This patch is an extension for current Ethernet over GRE >> implementation, which allows user to create virtual bridge (multipoint >> VPN) and forward traffic based on Ethernet MAC address informations in >> it. It simulates the Bridge bahaviour learing mechanism, but instead >> of learning port ID from which given MAC address comes, it learns IP >> address of peer which encapsulated given packet. Multicast, Broadcast >> and unknown-multicast traffic is send over network as multicast >> enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be >> represented as one single virtual switch on logical level and be also >> represented as one multicast IPv4 address on network level. >> >> Signed-off-by: Stefan Gula <steweg@gmail.com> >> >> --- >> >> Patch was tested for more than one year in real network infrastructure >> consisting of 98 Linux based access-points >> > > OK, but please always send new patches on top on net-next tree. > >> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff >> linux-3.2.1-orig/Documentation/dontdiff >> linux-3.2.1-my/Documentation/dontdiff >> --- linux-3.2.1-orig/Documentation/dontdiff 2012-01-16 12:32:18.000000000 +0100 >> +++ linux-3.2.1-my/Documentation/dontdiff 2012-01-12 20:42:45.000000000 +0100 >> @@ -260,5 +260,3 @@ wakeup.lds >> zImage* >> zconf.hash.c >> zoffset.h >> -mkpiggy >> -mdp > > Hmm, what is this ? Sorry, this one is not related to my patch, I have follwed the guideline and those 2 files always end-up in my diff file even I didn't modify them, so I have added them into dontdiff file assuming that that file will not pop-up in my diff file. Apparently wrong assumption, so please ignore those lines > >> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff >> linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h >> --- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100 >> +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100 >> @@ -27,6 +27,14 @@ struct ip_tunnel { >> __u32 o_seqno; /* The last output seqno */ >> int hlen; /* Precalculated GRE header length */ >> int mlink; >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> +#define GRETAP_BR_HASH_BITS 8 >> +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS) >> + struct hlist_head hash[GRETAP_BR_HASH_SIZE]; >> + spinlock_t hash_lock; >> + unsigned long ageing_time; >> + struct timer_list gc_timer; >> +#endif >> >> struct ip_tunnel_parm parms; >> >> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff >> linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig >> --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100 >> +++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100 >> @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST >> Network), but can be distributed all over the Internet. If you want >> to do that, say Y here and to "IP multicast routing" below. >> >> +config NET_IPGRE_BRIDGE >> + bool "IP: Ethernet over multipoint GRE over IP" >> + depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST >> + help >> + Allows you to use multipoint GRE VPN as virtual switch and interconnect >> + several L2 endpoints over L3 routed infrastructure. It is useful for >> + creating multipoint L2 VPNs which can be later used inside bridge >> + interfaces If you want to use. GRE multipoint L2 VPN feature say Y. >> + >> config IP_MROUTE >> bool "IP: multicast routing" >> depends on IP_MULTICAST >> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff >> linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c >> --- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100 >> +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 12:29:03.000000000 +0100 >> @@ -52,6 +52,11 @@ >> #include <net/ip6_route.h> >> #endif >> >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> +#include <linux/jhash.h> >> +#include <asm/unaligned.h> >> +#endif >> + >> /* >> Problems & solutions >> -------------------- >> @@ -134,6 +139,199 @@ struct ipgre_net { >> struct net_device *fb_tunnel_dev; >> }; >> >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + /* >> + * This part of code includes codes to enable L2 ethernet >> + * switch virtualization over IP routed infrastructure with >> + * utilization of multicast capable endpoint using Ethernet >> + * over GRE >> + * >> + * Author: Stefan Gula >> + * Signed-off-by: Stefan Gula <steweg@gmail.com> >> + */ >> +struct mac_addr { >> + unsigned char addr[6]; > > Not sure if you need a 'struct mac_addr' for this... > > unsigned char mac_addr[ETH_ALEN] ? not sure either, but I used in that time when I developed the code. Do I have to change that to make this patch to kernel? >> +}; >> + >> +struct ipgre_tap_bridge_entry { >> + struct hlist_node hlist; >> + u32 raddr; >> + struct mac_addr addr; >> + struct net_device *dev; >> + struct rcu_head rcu; >> + atomic_t use_count; >> + unsigned long ageing_timer; > > >> +}; >> + >> +static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly; >> +static u32 ipgre_salt __read_mostly; >> + >> +static inline int ipgre_tap_bridge_hash(const unsigned char *mac) >> +{ >> + u32 key = get_unaligned((u32 *)(mac + 2)); >> + return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1); >> +} >> + >> +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel, >> + const struct ipgre_tap_bridge_entry *entry) >> +{ >> + return time_before_eq(entry->ageing_timer + tunnel->ageing_time, >> + jiffies); >> +} >> + >> +static void ipgre_tap_bridge_rcu_free(struct rcu_head *head) >> +{ >> + struct ipgre_tap_bridge_entry *entry >> + = container_of(head, struct ipgre_tap_bridge_entry, rcu); >> + kmem_cache_free(ipgre_tap_bridge_cache, entry); >> +} >> + >> +void ipgre_tap_bridge_put(struct ipgre_tap_bridge_entry *entry) >> +{ >> + if (atomic_dec_and_test(&entry->use_count)) >> + call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free); > > You can now use kfree_rcu() and get rid of ipgre_tap_bridge_rcu_free() hmm... for that part of code was copy & pasted from linux bridge code from version 2.6.26. Do I have to change that or is it optional? > >> +} >> + >> +static inline void ipgre_tap_bridge_delete(struct >> ipgre_tap_bridge_entry *entry) >> +{ >> + hlist_del_rcu(&entry->hlist); >> + ipgre_tap_bridge_put(entry); >> +} >> + >> +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create( >> + struct hlist_head *head, >> + u32 source, >> + const unsigned char *addr, struct net_device *dev) >> +{ >> + struct ipgre_tap_bridge_entry *entry; >> + entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC); >> + if (entry) { >> + memcpy(entry->addr.addr, addr, ETH_ALEN); >> + atomic_set(&entry->use_count, 1); >> + hlist_add_head_rcu(&entry->hlist, head); >> + entry->raddr = source; >> + entry->ageing_timer = jiffies; >> + entry->dev = dev; >> + } >> + return entry; >> +} >> + >> +struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(struct ip_tunnel *tunnel, >> + const unsigned char *addr) >> +{ >> + struct hlist_node *h; >> + struct ipgre_tap_bridge_entry *entry; >> + hlist_for_each_entry_rcu(entry, h, >> + &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist) >> + { >> + if (!compare_ether_addr(entry->addr.addr, addr)) { >> + if (unlikely(ipgre_tap_bridge_has_expired(tunnel, >> + entry))) >> + break; >> + return entry; >> + } >> + } >> + >> + return NULL; >> +} >> + >> +struct ipgre_tap_bridge_entry *ipgre_tap_bridge_get(struct ip_tunnel *tunnel, >> + const unsigned char *addr) >> +{ >> + struct ipgre_tap_bridge_entry *entry; >> + rcu_read_lock(); >> + entry = __ipgre_tap_bridge_get(tunnel, addr); >> + if (entry && !atomic_inc_not_zero(&entry->use_count)) >> + entry = NULL; >> + rcu_read_unlock(); >> + return entry; >> +} >> + > > Unfortunately you call ipgre_tap_bridge_get() from > ipgre_tap_bridge_get_raddr() but you dont release the refcount on > use_count. Leak in ipgre_tunnel_xmit() > > >> +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, >> + const unsigned char *addr) >> +{ >> + struct ipgre_tap_bridge_entry *entry; >> + entry = ipgre_tap_bridge_get(tunnel, addr); > > So maybe you want __ipgre_tap_bridge_get(tunnel, addr); here ? > Hmmm.. I am sorry, I am maybe not that expert on C coding, as most of the codes were copied from linux bridge code. Can you give me a hint how should I change that? >> + if (entry == NULL) >> + return 0; >> + else >> + return entry->raddr; >> +} >> + >> + >> +static inline struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find( >> + struct hlist_head *head, >> + const unsigned char *addr) >> +{ >> + struct hlist_node *h; >> + struct ipgre_tap_bridge_entry *entry; >> + hlist_for_each_entry_rcu(entry, h, head, hlist) { >> + if (!compare_ether_addr(entry->addr.addr, addr)) > > > >> + return entry; >> + } >> + return NULL; >> +} >> + >> +int ipgre_tap_bridge_init(void) >> +{ >> + ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache", >> + sizeof(struct ipgre_tap_bridge_entry), >> + 0, >> + SLAB_HWCACHE_ALIGN, NULL); >> + if (!ipgre_tap_bridge_cache) >> + return -ENOMEM; >> + get_random_bytes(&ipgre_salt, sizeof(ipgre_salt)); >> + return 0; >> +} >> + >> +void ipgre_tap_bridge_fini(void) >> +{ >> + kmem_cache_destroy(ipgre_tap_bridge_cache); >> +} >> + >> +void ipgre_tap_bridge_cleanup(unsigned long _data) >> +{ >> + struct ip_tunnel *tunnel = (struct ip_tunnel *)_data; >> + unsigned long delay = tunnel->ageing_time; >> + unsigned long next_timer = jiffies + tunnel->ageing_time; >> + int i; >> + spin_lock_bh(&tunnel->hash_lock); > > Since you are called from timer, no need to use _bh() variant. again copied structure from linux bridge code > > > spin_lock(&tunnel->hash_lock); > >> + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { >> + struct ipgre_tap_bridge_entry *entry; >> + struct hlist_node *h, *n; >> + hlist_for_each_entry_safe(entry, h, n, >> + &tunnel->hash[i], hlist) >> + { >> + unsigned long this_timer; >> + this_timer = entry->ageing_timer + delay; >> + if (time_before_eq(this_timer, jiffies)) >> + ipgre_tap_bridge_delete(entry); >> + else if (time_before(this_timer, next_timer)) >> + next_timer = this_timer; >> + } >> + } >> + spin_unlock_bh(&tunnel->hash_lock); >> + mod_timer(&tunnel->gc_timer, round_jiffies(next_timer + HZ/4)); > > wow... why setup a 250 ms timer, if entries are valid 300 seconds ? no idea, it was in original linux bridge code > > > >> +} >> + >> +void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel) >> +{ >> + int i; >> + spin_lock_bh(&tunnel->hash_lock); >> + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { >> + struct ipgre_tap_bridge_entry *entry; >> + struct hlist_node *h, *n; >> + hlist_for_each_entry_safe(entry, h, n, >> + &tunnel->hash[i], hlist) >> + { >> + ipgre_tap_bridge_delete(entry); >> + } >> + } >> + spin_unlock_bh(&tunnel->hash_lock); >> +} >> + >> +#endif >> + >> /* Tunnel hash table */ >> >> /* >> @@ -563,6 +761,13 @@ static int ipgre_rcv(struct sk_buff *skb >> int offset = 4; >> __be16 gre_proto; >> >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + u32 orig_source; >> + struct hlist_head *head; >> + struct ipgre_tap_bridge_entry *entry; >> + struct ethhdr *tethhdr; >> +#endif >> + >> if (!pskb_may_pull(skb, 16)) >> goto drop_nolock; >> >> @@ -659,10 +864,38 @@ static int ipgre_rcv(struct sk_buff *skb >> tunnel->dev->stats.rx_errors++; >> goto drop; >> } >> - >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + orig_source = iph->saddr; >> +#endif >> iph = ip_hdr(skb); >> skb->protocol = eth_type_trans(skb, tunnel->dev); >> skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + if (ipv4_is_multicast(tunnel->parms.iph.daddr)) { >> + tethhdr = eth_hdr(skb); > >> + if ((tethhdr->h_source[0]&1) == 0) { > > if (!is_multicast_ether_addr(tethhdr->h_source)) ? > fixed >> + head = &tunnel->hash[ >> + ipgre_tap_bridge_hash( >> + tethhdr->h_source)]; >> + entry = ipgre_tap_bridge_find(head, >> + tethhdr->h_source); >> + if (likely(entry)) { >> + entry->raddr = orig_source; >> + entry->ageing_timer = jiffies; >> + } else { >> + spin_lock(&tunnel->hash_lock); >> + if (!ipgre_tap_bridge_find(head, >> + tethhdr->h_source)) >> + ipgre_tap_bridge_create( >> + head, >> + orig_source, >> + tethhdr->h_source, >> + tunnel->dev); >> + spin_unlock(&tunnel->hash_lock); >> + } >> + } >> + } >> +#endif >> } >> >> tstats = this_cpu_ptr(tunnel->dev->tstats); >> @@ -716,7 +949,17 @@ static netdev_tx_t ipgre_tunnel_xmit(str >> tiph = &tunnel->parms.iph; >> } >> >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + if ((dev->type == ARPHRD_ETHER) && ipv4_is_multicast( > > please format like this : > > if ((dev->type == ARPHRD_ETHER) && > ipv4_is_multicast(tunnel->parms.iph.daddr)) > fixed >> + tunnel->parms.iph.daddr)) >> + dst = ipgre_tap_bridge_get_raddr(tunnel, >> + ((struct ethhdr *)skb->data)->h_dest); >> + if (dst == 0) >> + dst = tiph->daddr; >> + if (dst == 0) { >> +#else >> if ((dst = tiph->daddr) == 0) { >> +#endif >> /* NBMA tunnel */ >> >> if (skb_dst(skb) == NULL) { >> @@ -1209,6 +1452,16 @@ static int ipgre_open(struct net_device >> return -EADDRNOTAVAIL; >> t->mlink = dev->ifindex; >> ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr); >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + if (t->dev->type == ARPHRD_ETHER) { >> + INIT_HLIST_HEAD(t->hash); >> + spin_lock_init(&t->hash_lock); >> + t->ageing_time = 300 * HZ; >> + setup_timer(&t->gc_timer, ipgre_tap_bridge_cleanup, >> + (unsigned long) t); >> + mod_timer(&t->gc_timer, jiffies + t->ageing_time); > > >> + } >> +#endif >> } >> return 0; >> } >> @@ -1219,6 +1472,12 @@ static int ipgre_close(struct net_device >> >> if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { >> struct in_device *in_dev; >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + if (t->dev->type == ARPHRD_ETHER) { >> + ipgre_tap_bridge_flush(t); >> + del_timer_sync(&t->gc_timer); >> + } >> +#endif >> in_dev = inetdev_by_index(dev_net(dev), t->mlink); >> if (in_dev) >> ip_mc_dec_group(in_dev, t->parms.iph.daddr); >> @@ -1341,6 +1600,12 @@ static int __net_init ipgre_init_net(str >> struct ipgre_net *ign = net_generic(net, ipgre_net_id); >> int err; >> >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + err = ipgre_tap_bridge_init(); >> + if (err) >> + goto err_out; >> +#endif >> + >> ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0", >> ipgre_tunnel_setup); >> if (!ign->fb_tunnel_dev) { >> @@ -1362,6 +1627,10 @@ static int __net_init ipgre_init_net(str >> err_reg_dev: >> ipgre_dev_free(ign->fb_tunnel_dev); >> err_alloc_dev: >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + ipgre_tap_bridge_fini(); >> +err_out: >> +#endif >> return err; >> } >> >> @@ -1375,6 +1644,9 @@ static void __net_exit ipgre_exit_net(st >> ipgre_destroy_tunnels(ign, &list); >> unregister_netdevice_many(&list); >> rtnl_unlock(); >> +#ifdef CONFIG_NET_IPGRE_BRIDGE >> + ipgre_tap_bridge_fini(); >> +#endif >> } >> >> static struct pernet_operations ipgre_net_ops = { >> > >
Le lundi 16 janvier 2012 à 15:05 +0100, Štefan Gula a écrit : > Dňa 16. januára 2012 14:19, Eric Dumazet <eric.dumazet@gmail.com> napísal/a: > > Unfortunately you call ipgre_tap_bridge_get() from > > ipgre_tap_bridge_get_raddr() but you dont release the refcount on > > use_count. Leak in ipgre_tunnel_xmit() > > > > > >> +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, > >> + const unsigned char *addr) > >> +{ > >> + struct ipgre_tap_bridge_entry *entry; > >> + entry = ipgre_tap_bridge_get(tunnel, addr); > > > > So maybe you want __ipgre_tap_bridge_get(tunnel, addr); here ? > > > Hmmm.. I am sorry, I am maybe not that expert on C coding, as most of > the codes were copied from linux bridge code. Can you give me a hint > how should I change that? I dont see in net/bridge the code you copied. Could you give more information ? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dňa 16. januára 2012 15:25, Eric Dumazet <eric.dumazet@gmail.com> napísal/a: > Le lundi 16 janvier 2012 à 15:05 +0100, Štefan Gula a écrit : >> Dňa 16. januára 2012 14:19, Eric Dumazet <eric.dumazet@gmail.com> napísal/a: > >> > Unfortunately you call ipgre_tap_bridge_get() from >> > ipgre_tap_bridge_get_raddr() but you dont release the refcount on >> > use_count. Leak in ipgre_tunnel_xmit() >> > >> > >> >> +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, >> >> + const unsigned char *addr) >> >> +{ >> >> + struct ipgre_tap_bridge_entry *entry; >> >> + entry = ipgre_tap_bridge_get(tunnel, addr); >> > >> > So maybe you want __ipgre_tap_bridge_get(tunnel, addr); here ? >> > >> Hmmm.. I am sorry, I am maybe not that expert on C coding, as most of >> the codes were copied from linux bridge code. Can you give me a hint >> how should I change that? > > I dont see in net/bridge the code you copied. > > Could you give more information ? > > File: net/birdge/br_fdb.c Functions: __br_fdb_get br_fdb_get My analogy fuctions: __ipgre_tap_bridge_get ipgre_tap_bridge_get
Le lundi 16 janvier 2012 à 15:47 +0100, Štefan Gula a écrit : > File: net/birdge/br_fdb.c > Functions: > __br_fdb_get > br_fdb_get > My analogy fuctions: > __ipgre_tap_bridge_get > ipgre_tap_bridge_get > There is no br_fdb_get(), you copied some buggy code I am afraid. Read again your patch. If you use atomic_inc_not_zero(&x->use_count), then you must do the atomic_dec() or leak entries. Current bridge code doesnt use atomic_inc_not_zero() -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dňa 16. januára 2012 16:13, Eric Dumazet <eric.dumazet@gmail.com> napísal/a: > Le lundi 16 janvier 2012 à 15:47 +0100, Štefan Gula a écrit : > >> File: net/birdge/br_fdb.c >> Functions: >> __br_fdb_get >> br_fdb_get >> My analogy fuctions: >> __ipgre_tap_bridge_get >> ipgre_tap_bridge_get >> > > There is no br_fdb_get(), you copied some buggy code I am afraid. > > Read again your patch. > > If you use atomic_inc_not_zero(&x->use_count), then you must do the > atomic_dec() or leak entries. > > Current bridge code doesnt use atomic_inc_not_zero() > > > You are right, new bridge code doesn't have such function, I will adjust that -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 16 Jan 2012 13:13:19 +0100 Štefan Gula <steweg@gmail.com> wrote: > From: Stefan Gula <steweg@gmail.com > > This patch is an extension for current Ethernet over GRE > implementation, which allows user to create virtual bridge (multipoint > VPN) and forward traffic based on Ethernet MAC address informations in > it. It simulates the Bridge bahaviour learing mechanism, but instead > of learning port ID from which given MAC address comes, it learns IP > address of peer which encapsulated given packet. Multicast, Broadcast > and unknown-multicast traffic is send over network as multicast > enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be > represented as one single virtual switch on logical level and be also > represented as one multicast IPv4 address on network level. > > Signed-off-by: Stefan Gula <steweg@gmail.com> Thanks for the effort, but it is duplicating existing functionality. It possible to do this already with existing gretap device and the current bridge. The same thing is also supported by OpenVswitch. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dňa 16. januára 2012 17:36, Stephen Hemminger <shemminger@vyatta.com> napísal/a: > On Mon, 16 Jan 2012 13:13:19 +0100 > Štefan Gula <steweg@gmail.com> wrote: > >> From: Stefan Gula <steweg@gmail.com >> >> This patch is an extension for current Ethernet over GRE >> implementation, which allows user to create virtual bridge (multipoint >> VPN) and forward traffic based on Ethernet MAC address informations in >> it. It simulates the Bridge bahaviour learing mechanism, but instead >> of learning port ID from which given MAC address comes, it learns IP >> address of peer which encapsulated given packet. Multicast, Broadcast >> and unknown-multicast traffic is send over network as multicast >> enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be >> represented as one single virtual switch on logical level and be also >> represented as one multicast IPv4 address on network level. >> >> Signed-off-by: Stefan Gula <steweg@gmail.com> > > Thanks for the effort, but it is duplicating existing functionality. > It possible to do this already with existing gretap device and the > current bridge. > > The same thing is also supported by OpenVswitch. > gretap with bridge will not do the same as gretap allows you to only encapsulate L2 frames inside the GRE - this one part is actually utilized in my code. GRE multipoint implementation is also utilized in my code as well. But what is missing is forwarding logic here, which prevents the traffic going not optimal way. Scenario one - e.g. if you connect through 3 sites with using 1 gretap multipoint VPN, it always forwards frames between site 1 and site 2 even if they are unicast. That represents waste of bandwidth for site 3. Now assume that there will be more than 40 sites and I hope you see that single current multipoint gretap is not also good solution here The second scenario - e.g. using 3 sites using point-to-point gretap interfaces between each 2 sites (2 gretap VPN interfaces per site) and bridging those interfaces with real ones results in looped topology which needs to utilized STP inside to prevent loops. Once STP converges the topology will looks like this, traffic from site 1 to site 2 will go always directly by the way of unicast (on GRE level), from site 2 to site 3 always directly by the way of unicast (on GRE level) and from site 1 to site 3 will go indirectly through site 2 due STP limitations, which results in another not optimalized traffic flows. Now assume that the number of sites rises, so gretap+standard bridge code is also not a good solution here. My code utilizes it that way that I have extended the gretap multipoint interface with the forwarding logic e.g. using 3 sites, each site uses only one gretap VPN interface and if destination MAC address is known to bridge code inside the gretap interface forwarding logic, it forwards it towards only VPN endpoint that actually need that by the way of unicasting on GRE level. On the other hand if the destination MAC address is unknown or destination MAC address is L2 multicast or L2 broadcast than the frame is spread out through multicasting on GRE level, providing delivery mechanism analogous to standard switches on top of the multipoint GRE tunnels. I also get through briefly over OpenVswitch documentation and found that it is more related to virtualization inside the box like VMware switches or so and not to such technologies interconnecting two or more separate segments over routed L3 infrastructure - there is a mention about the CAPWAP UDP transport but this is more related to WiFi implementations than generic ones. My patch also doesn't need any special userspace api to be configured. It utilizes the existing one. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 16 Jan 2012 18:26:57 +0100 Štefan Gula <steweg@gmail.com> wrote: > Dňa 16. januára 2012 17:36, Stephen Hemminger <shemminger@vyatta.com> napísal/a: > > On Mon, 16 Jan 2012 13:13:19 +0100 > > Štefan Gula <steweg@gmail.com> wrote: > > > >> From: Stefan Gula <steweg@gmail.com > >> > >> This patch is an extension for current Ethernet over GRE > >> implementation, which allows user to create virtual bridge (multipoint > >> VPN) and forward traffic based on Ethernet MAC address informations in > >> it. It simulates the Bridge bahaviour learing mechanism, but instead > >> of learning port ID from which given MAC address comes, it learns IP > >> address of peer which encapsulated given packet. Multicast, Broadcast > >> and unknown-multicast traffic is send over network as multicast > >> enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be > >> represented as one single virtual switch on logical level and be also > >> represented as one multicast IPv4 address on network level. > >> > >> Signed-off-by: Stefan Gula <steweg@gmail.com> > > > > Thanks for the effort, but it is duplicating existing functionality. > > It possible to do this already with existing gretap device and the > > current bridge. > > > > The same thing is also supported by OpenVswitch. > > > > gretap with bridge will not do the same as gretap allows you to only > encapsulate L2 frames inside the GRE - this one part is actually > utilized in my code. GRE multipoint implementation is also utilized in > my code as well. But what is missing is forwarding logic here, which > prevents the traffic going not optimal way. Scenario one - e.g. if you > connect through 3 sites with using 1 gretap multipoint VPN, it always > forwards frames between site 1 and site 2 even if they are unicast. > That represents waste of bandwidth for site 3. Now assume that there > will be more than 40 sites and I hope you see that single current > multipoint gretap is not also good solution here > > The second scenario - e.g. using 3 sites using point-to-point gretap > interfaces between each 2 sites (2 gretap VPN interfaces per site) and > bridging those interfaces with real ones results in looped topology > which needs to utilized STP inside to prevent loops. Once STP > converges the topology will looks like this, traffic from site 1 to > site 2 will go always directly by the way of unicast (on GRE level), > from site 2 to site 3 always directly by the way of unicast (on GRE > level) and from site 1 to site 3 will go indirectly through site 2 due > STP limitations, which results in another not optimalized traffic > flows. Now assume that the number of sites rises, so gretap+standard > bridge code is also not a good solution here. > > My code utilizes it that way that I have extended the gretap > multipoint interface with the forwarding logic e.g. using 3 sites, > each site uses only one gretap VPN interface and if destination MAC > address is known to bridge code inside the gretap interface forwarding > logic, it forwards it towards only VPN endpoint that actually need > that by the way of unicasting on GRE level. On the other hand if the > destination MAC address is unknown or destination MAC address is L2 > multicast or L2 broadcast than the frame is spread out through > multicasting on GRE level, providing delivery mechanism analogous to > standard switches on top of the multipoint GRE tunnels. Couldn't this be controlled from user space either by programming the FDB with netlink or doing alternative version of STP? > I also get through briefly over OpenVswitch documentation and found > that it is more related to virtualization inside the box like VMware > switches or so and not to such technologies interconnecting two or > more separate segments over routed L3 infrastructure - there is a > mention about the CAPWAP UDP transport but this is more related to > WiFi implementations than generic ones. My patch also doesn't need any > special userspace api to be configured. It utilizes the existing one. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/1/16 Stephen Hemminger <shemminger@vyatta.com>: > On Mon, 16 Jan 2012 18:26:57 +0100 > Štefan Gula <steweg@gmail.com> wrote: > >> Dňa 16. januára 2012 17:36, Stephen Hemminger <shemminger@vyatta.com> napísal/a: >> > On Mon, 16 Jan 2012 13:13:19 +0100 >> > Štefan Gula <steweg@gmail.com> wrote: >> > >> >> From: Stefan Gula <steweg@gmail.com >> >> >> >> This patch is an extension for current Ethernet over GRE >> >> implementation, which allows user to create virtual bridge (multipoint >> >> VPN) and forward traffic based on Ethernet MAC address informations in >> >> it. It simulates the Bridge bahaviour learing mechanism, but instead >> >> of learning port ID from which given MAC address comes, it learns IP >> >> address of peer which encapsulated given packet. Multicast, Broadcast >> >> and unknown-multicast traffic is send over network as multicast >> >> enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be >> >> represented as one single virtual switch on logical level and be also >> >> represented as one multicast IPv4 address on network level. >> >> >> >> Signed-off-by: Stefan Gula <steweg@gmail.com> >> > >> > Thanks for the effort, but it is duplicating existing functionality. >> > It possible to do this already with existing gretap device and the >> > current bridge. >> > >> > The same thing is also supported by OpenVswitch. >> > >> >> gretap with bridge will not do the same as gretap allows you to only >> encapsulate L2 frames inside the GRE - this one part is actually >> utilized in my code. GRE multipoint implementation is also utilized in >> my code as well. But what is missing is forwarding logic here, which >> prevents the traffic going not optimal way. Scenario one - e.g. if you >> connect through 3 sites with using 1 gretap multipoint VPN, it always >> forwards frames between site 1 and site 2 even if they are unicast. >> That represents waste of bandwidth for site 3. Now assume that there >> will be more than 40 sites and I hope you see that single current >> multipoint gretap is not also good solution here >> >> The second scenario - e.g. using 3 sites using point-to-point gretap >> interfaces between each 2 sites (2 gretap VPN interfaces per site) and >> bridging those interfaces with real ones results in looped topology >> which needs to utilized STP inside to prevent loops. Once STP >> converges the topology will looks like this, traffic from site 1 to >> site 2 will go always directly by the way of unicast (on GRE level), >> from site 2 to site 3 always directly by the way of unicast (on GRE >> level) and from site 1 to site 3 will go indirectly through site 2 due >> STP limitations, which results in another not optimalized traffic >> flows. Now assume that the number of sites rises, so gretap+standard >> bridge code is also not a good solution here. >> >> My code utilizes it that way that I have extended the gretap >> multipoint interface with the forwarding logic e.g. using 3 sites, >> each site uses only one gretap VPN interface and if destination MAC >> address is known to bridge code inside the gretap interface forwarding >> logic, it forwards it towards only VPN endpoint that actually need >> that by the way of unicasting on GRE level. On the other hand if the >> destination MAC address is unknown or destination MAC address is L2 >> multicast or L2 broadcast than the frame is spread out through >> multicasting on GRE level, providing delivery mechanism analogous to >> standard switches on top of the multipoint GRE tunnels. > > Couldn't this be controlled from user space either by programming > the FDB with netlink or doing alternative version of STP? For certain small number of clients yes, for many clients it is administrative nightmare. e.g. I have in my network more than 98 VPN endpoints and 4k users constantly migrating from one site to another. This solution provides me flexible way to do this, with no administrative work. > > >> I also get through briefly over OpenVswitch documentation and found >> that it is more related to virtualization inside the box like VMware >> switches or so and not to such technologies interconnecting two or >> more separate segments over routed L3 infrastructure - there is a >> mention about the CAPWAP UDP transport but this is more related to >> WiFi implementations than generic ones. My patch also doesn't need any >> special userspace api to be configured. It utilizes the existing one. > -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2012/1/16 Štefan Gula <steweg@gmail.com>: > Dňa 16. januára 2012 17:36, Stephen Hemminger <shemminger@vyatta.com> napísal/a: >> On Mon, 16 Jan 2012 13:13:19 +0100 >> Štefan Gula <steweg@gmail.com> wrote: >> >>> From: Stefan Gula <steweg@gmail.com >>> >>> This patch is an extension for current Ethernet over GRE >>> implementation, which allows user to create virtual bridge (multipoint >>> VPN) and forward traffic based on Ethernet MAC address informations in >>> it. It simulates the Bridge bahaviour learing mechanism, but instead >>> of learning port ID from which given MAC address comes, it learns IP >>> address of peer which encapsulated given packet. Multicast, Broadcast >>> and unknown-multicast traffic is send over network as multicast >>> enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be >>> represented as one single virtual switch on logical level and be also >>> represented as one multicast IPv4 address on network level. >>> >>> Signed-off-by: Stefan Gula <steweg@gmail.com> >> >> Thanks for the effort, but it is duplicating existing functionality. >> It possible to do this already with existing gretap device and the >> current bridge. >> >> The same thing is also supported by OpenVswitch. >> > > gretap with bridge will not do the same as gretap allows you to only > encapsulate L2 frames inside the GRE - this one part is actually > utilized in my code. GRE multipoint implementation is also utilized in > my code as well. But what is missing is forwarding logic here, which > prevents the traffic going not optimal way. Scenario one - e.g. if you > connect through 3 sites with using 1 gretap multipoint VPN, it always > forwards frames between site 1 and site 2 even if they are unicast. > That represents waste of bandwidth for site 3. Now assume that there > will be more than 40 sites and I hope you see that single current > multipoint gretap is not also good solution here > > The second scenario - e.g. using 3 sites using point-to-point gretap > interfaces between each 2 sites (2 gretap VPN interfaces per site) and > bridging those interfaces with real ones results in looped topology > which needs to utilized STP inside to prevent loops. Once STP > converges the topology will looks like this, traffic from site 1 to > site 2 will go always directly by the way of unicast (on GRE level), > from site 2 to site 3 always directly by the way of unicast (on GRE > level) and from site 1 to site 3 will go indirectly through site 2 due > STP limitations, which results in another not optimalized traffic > flows. Now assume that the number of sites rises, so gretap+standard > bridge code is also not a good solution here. > > My code utilizes it that way that I have extended the gretap > multipoint interface with the forwarding logic e.g. using 3 sites, > each site uses only one gretap VPN interface and if destination MAC > address is known to bridge code inside the gretap interface forwarding > logic, it forwards it towards only VPN endpoint that actually need > that by the way of unicasting on GRE level. On the other hand if the > destination MAC address is unknown or destination MAC address is L2 > multicast or L2 broadcast than the frame is spread out through > multicasting on GRE level, providing delivery mechanism analogous to > standard switches on top of the multipoint GRE tunnels. > > I also get through briefly over OpenVswitch documentation and found > that it is more related to virtualization inside the box like VMware > switches or so and not to such technologies interconnecting two or > more separate segments over routed L3 infrastructure - there is a > mention about the CAPWAP UDP transport but this is more related to > WiFi implementations than generic ones. My patch also doesn't need any > special userspace api to be configured. It utilizes the existing one. I understand what you're trying to do and I think that the goal makes sense but I agree with Stephen that this is not the right way to go about it. I see two issues: * It copies a lot of bridge code, making it unmaintainable and inflexible to other use cases. * The implementation exists in the GRE protocol stack but it applies equally to other tunneling protocols as well (VXLAN comes to mind). Open vSwitch doesn't quite do this level of learning yet but it's the direction that we want to move in (and there's nothing particularly virtualization specific about it). What I think makes the most sense is to create some internal interfaces to the GRE stack that exposes the information needed to do learning. That way there is only one instance of the protocol code for each tunneling mechanism and then each way of managing those addresses (i.e. the current device-based mechanism, Open vSwitch, potentially a direct bridge-based mechanism, etc.) can be reused as well. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Dňa 16. januára 2012 22:22, Jesse Gross <jesse@nicira.com> napísal/a: > 2012/1/16 Štefan Gula <steweg@gmail.com>: >> Dňa 16. januára 2012 17:36, Stephen Hemminger <shemminger@vyatta.com> napísal/a: >>> On Mon, 16 Jan 2012 13:13:19 +0100 >>> Štefan Gula <steweg@gmail.com> wrote: >>> >>>> From: Stefan Gula <steweg@gmail.com >>>> >>>> This patch is an extension for current Ethernet over GRE >>>> implementation, which allows user to create virtual bridge (multipoint >>>> VPN) and forward traffic based on Ethernet MAC address informations in >>>> it. It simulates the Bridge bahaviour learing mechanism, but instead >>>> of learning port ID from which given MAC address comes, it learns IP >>>> address of peer which encapsulated given packet. Multicast, Broadcast >>>> and unknown-multicast traffic is send over network as multicast >>>> enacapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be >>>> represented as one single virtual switch on logical level and be also >>>> represented as one multicast IPv4 address on network level. >>>> >>>> Signed-off-by: Stefan Gula <steweg@gmail.com> >>> >>> Thanks for the effort, but it is duplicating existing functionality. >>> It possible to do this already with existing gretap device and the >>> current bridge. >>> >>> The same thing is also supported by OpenVswitch. >>> >> >> gretap with bridge will not do the same as gretap allows you to only >> encapsulate L2 frames inside the GRE - this one part is actually >> utilized in my code. GRE multipoint implementation is also utilized in >> my code as well. But what is missing is forwarding logic here, which >> prevents the traffic going not optimal way. Scenario one - e.g. if you >> connect through 3 sites with using 1 gretap multipoint VPN, it always >> forwards frames between site 1 and site 2 even if they are unicast. >> That represents waste of bandwidth for site 3. Now assume that there >> will be more than 40 sites and I hope you see that single current >> multipoint gretap is not also good solution here >> >> The second scenario - e.g. using 3 sites using point-to-point gretap >> interfaces between each 2 sites (2 gretap VPN interfaces per site) and >> bridging those interfaces with real ones results in looped topology >> which needs to utilized STP inside to prevent loops. Once STP >> converges the topology will looks like this, traffic from site 1 to >> site 2 will go always directly by the way of unicast (on GRE level), >> from site 2 to site 3 always directly by the way of unicast (on GRE >> level) and from site 1 to site 3 will go indirectly through site 2 due >> STP limitations, which results in another not optimalized traffic >> flows. Now assume that the number of sites rises, so gretap+standard >> bridge code is also not a good solution here. >> >> My code utilizes it that way that I have extended the gretap >> multipoint interface with the forwarding logic e.g. using 3 sites, >> each site uses only one gretap VPN interface and if destination MAC >> address is known to bridge code inside the gretap interface forwarding >> logic, it forwards it towards only VPN endpoint that actually need >> that by the way of unicasting on GRE level. On the other hand if the >> destination MAC address is unknown or destination MAC address is L2 >> multicast or L2 broadcast than the frame is spread out through >> multicasting on GRE level, providing delivery mechanism analogous to >> standard switches on top of the multipoint GRE tunnels. >> >> I also get through briefly over OpenVswitch documentation and found >> that it is more related to virtualization inside the box like VMware >> switches or so and not to such technologies interconnecting two or >> more separate segments over routed L3 infrastructure - there is a >> mention about the CAPWAP UDP transport but this is more related to >> WiFi implementations than generic ones. My patch also doesn't need any >> special userspace api to be configured. It utilizes the existing one. > > I understand what you're trying to do and I think that the goal makes > sense but I agree with Stephen that this is not the right way to go > about it. I see two issues: > > * It copies a lot of bridge code, making it unmaintainable and > inflexible to other use cases. > * The implementation exists in the GRE protocol stack but it applies > equally to other tunneling protocols as well (VXLAN comes to mind). > > Open vSwitch doesn't quite do this level of learning yet but it's the > direction that we want to move in (and there's nothing particularly > virtualization specific about it). What I think makes the most sense > is to create some internal interfaces to the GRE stack that exposes > the information needed to do learning. That way there is only one > instance of the protocol code for each tunneling mechanism and then > each way of managing those addresses (i.e. the current device-based > mechanism, Open vSwitch, potentially a direct bridge-based mechanism, > etc.) can be reused as well. I agree with you that using such approach of copying bridge code with some modifications is maybe not flexible enough, but on the other hand it does the job needed. It also doesn't breaks any previous compatibility of usage of gretap interfaces as it modifies the encapsulation and decapsulation part of codes *how the traffic goes to and from remote destinations), which doesn't face any change on the internal linkages coding inside the box (e.g. linking that gretap interface to standard logical bridge interface is still fully possible). I would rather see getting some standard set of generic bridge code, which can be reused anywhere in network stack, but for now this is the only way I know how to do it. Exposing the GRE interfaces will not do the job as what I needed was to rebuild the bridge logic to allow learning endpoint IPs instead of network ports IDs (it's almost the same as using multiple gretap interfaces inside one bridge.interface) To obtain back the maintainability I would assume redesigning the bridge code is the best way here, but I am not that well coder to do it myself. So if anybody is interested in this feel free to do it. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-my/Documentation/dontdiff --- linux-3.2.1-orig/Documentation/dontdiff 2012-01-16 12:32:18.000000000 +0100 +++ linux-3.2.1-my/Documentation/dontdiff 2012-01-12 20:42:45.000000000 +0100 @@ -260,5 +260,3 @@ wakeup.lds zImage* zconf.hash.c zoffset.h -mkpiggy -mdp diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h --- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100 @@ -27,6 +27,14 @@ struct ip_tunnel { __u32 o_seqno; /* The last output seqno */ int hlen; /* Precalculated GRE header length */ int mlink; +#ifdef CONFIG_NET_IPGRE_BRIDGE +#define GRETAP_BR_HASH_BITS 8 +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS) + struct hlist_head hash[GRETAP_BR_HASH_SIZE]; + spinlock_t hash_lock; + unsigned long ageing_time; + struct timer_list gc_timer; +#endif struct ip_tunnel_parm parms; diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100 @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST Network), but can be distributed all over the Internet. If you want to do that, say Y here and to "IP multicast routing" below. +config NET_IPGRE_BRIDGE + bool "IP: Ethernet over multipoint GRE over IP" + depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST + help + Allows you to use multipoint GRE VPN as virtual switch and interconnect + several L2 endpoints over L3 routed infrastructure. It is useful for + creating multipoint L2 VPNs which can be later used inside bridge + interfaces If you want to use. GRE multipoint L2 VPN feature say Y. + config IP_MROUTE bool "IP: multicast routing" depends on IP_MULTICAST diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c --- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100 +++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-16 12:29:03.000000000 +0100 @@ -52,6 +52,11 @@ #include <net/ip6_route.h> #endif +#ifdef CONFIG_NET_IPGRE_BRIDGE +#include <linux/jhash.h> +#include <asm/unaligned.h> +#endif + /* Problems & solutions -------------------- @@ -134,6 +139,199 @@ struct ipgre_net { struct net_device *fb_tunnel_dev; }; +#ifdef CONFIG_NET_IPGRE_BRIDGE + /* + * This part of code includes codes to enable L2 ethernet + * switch virtualization over IP routed infrastructure with + * utilization of multicast capable endpoint using Ethernet + * over GRE + * + * Author: Stefan Gula + * Signed-off-by: Stefan Gula <steweg@gmail.com> + */ +struct mac_addr { + unsigned char addr[6]; +}; + +struct ipgre_tap_bridge_entry { + struct hlist_node hlist; + u32 raddr; + struct mac_addr addr; + struct net_device *dev; + struct rcu_head rcu; + atomic_t use_count; + unsigned long ageing_timer; +}; + +static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly; +static u32 ipgre_salt __read_mostly; + +static inline int ipgre_tap_bridge_hash(const unsigned char *mac) +{ + u32 key = get_unaligned((u32 *)(mac + 2)); + return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1); +} + +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel, + const struct ipgre_tap_bridge_entry *entry) +{ + return time_before_eq(entry->ageing_timer + tunnel->ageing_time, + jiffies); +} + +static void ipgre_tap_bridge_rcu_free(struct rcu_head *head) +{ + struct ipgre_tap_bridge_entry *entry + = container_of(head, struct ipgre_tap_bridge_entry, rcu); + kmem_cache_free(ipgre_tap_bridge_cache, entry); +} + +void ipgre_tap_bridge_put(struct ipgre_tap_bridge_entry *entry) +{ + if (atomic_dec_and_test(&entry->use_count)) + call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free); +} + +static inline void ipgre_tap_bridge_delete(struct ipgre_tap_bridge_entry *entry) +{ + hlist_del_rcu(&entry->hlist); + ipgre_tap_bridge_put(entry); +} + +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create( + struct hlist_head *head, + u32 source, + const unsigned char *addr, struct net_device *dev) +{ + struct ipgre_tap_bridge_entry *entry; + entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC); + if (entry) { + memcpy(entry->addr.addr, addr, ETH_ALEN); + atomic_set(&entry->use_count, 1); + hlist_add_head_rcu(&entry->hlist, head); + entry->raddr = source; + entry->ageing_timer = jiffies; + entry->dev = dev; + } + return entry; +} + +struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(struct ip_tunnel *tunnel, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry_rcu(entry, h, + &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist) + { + if (!compare_ether_addr(entry->addr.addr, addr)) { + if (unlikely(ipgre_tap_bridge_has_expired(tunnel, + entry))) + break; + return entry; + } + } + + return NULL; +} + +struct ipgre_tap_bridge_entry *ipgre_tap_bridge_get(struct ip_tunnel *tunnel, + const unsigned char *addr) +{ + struct ipgre_tap_bridge_entry *entry; + rcu_read_lock(); + entry = __ipgre_tap_bridge_get(tunnel, addr); + if (entry && !atomic_inc_not_zero(&entry->use_count)) + entry = NULL; + rcu_read_unlock(); + return entry; +} + +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel, + const unsigned char *addr) +{ + struct ipgre_tap_bridge_entry *entry; + entry = ipgre_tap_bridge_get(tunnel, addr); + if (entry == NULL) + return 0; + else + return entry->raddr; +} + + +static inline struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find( + struct hlist_head *head, + const unsigned char *addr) +{ + struct hlist_node *h; + struct ipgre_tap_bridge_entry *entry; + hlist_for_each_entry_rcu(entry, h, head, hlist) { + if (!compare_ether_addr(entry->addr.addr, addr)) + return entry; + } + return NULL; +} + +int ipgre_tap_bridge_init(void) +{ + ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache", + sizeof(struct ipgre_tap_bridge_entry), + 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!ipgre_tap_bridge_cache) + return -ENOMEM; + get_random_bytes(&ipgre_salt, sizeof(ipgre_salt)); + return 0; +} + +void ipgre_tap_bridge_fini(void) +{ + kmem_cache_destroy(ipgre_tap_bridge_cache); +} + +void ipgre_tap_bridge_cleanup(unsigned long _data) +{ + struct ip_tunnel *tunnel = (struct ip_tunnel *)_data; + unsigned long delay = tunnel->ageing_time; + unsigned long next_timer = jiffies + tunnel->ageing_time; + int i; + spin_lock_bh(&tunnel->hash_lock); + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { + struct ipgre_tap_bridge_entry *entry; + struct hlist_node *h, *n; + hlist_for_each_entry_safe(entry, h, n, + &tunnel->hash[i], hlist) + { + unsigned long this_timer; + this_timer = entry->ageing_timer + delay; + if (time_before_eq(this_timer, jiffies)) + ipgre_tap_bridge_delete(entry); + else if (time_before(this_timer, next_timer)) + next_timer = this_timer; + } + } + spin_unlock_bh(&tunnel->hash_lock); + mod_timer(&tunnel->gc_timer, round_jiffies(next_timer + HZ/4)); +} + +void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel) +{ + int i; + spin_lock_bh(&tunnel->hash_lock); + for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) { + struct ipgre_tap_bridge_entry *entry; + struct hlist_node *h, *n; + hlist_for_each_entry_safe(entry, h, n, + &tunnel->hash[i], hlist) + { + ipgre_tap_bridge_delete(entry); + } + } + spin_unlock_bh(&tunnel->hash_lock); +} + +#endif + /* Tunnel hash table */