diff mbox

[ovs-dev,v3,04/28] datapath: backport: net: add dst_cache support

Message ID 1467421118-121600-4-git-send-email-pshelar@ovn.org
State Superseded
Headers show

Commit Message

Pravin Shelar July 2, 2016, 12:58 a.m. UTC
This backports dst-cache implementation from upstream implementation.

    commit 911362c70df5b766c243dc297fadeaced786ffd8
    Author: Paolo Abeni <pabeni@redhat.com>

    net: add dst_cache support
    This patch add a generic, lockless dst cache implementation.
    The need for lock is avoided updating the dst cache fields
    only in per cpu scope, and requiring that the cache manipulation
    functions are invoked with the local bh disabled.

    The refresh_ts and reset_ts fields are used to ensure the cache
    consistency in case of cuncurrent cache update (dst_cache_set*) and
    reset operation (dst_cache_reset).

    Consider the following scenario:

    CPU1:                                       CPU2:
      <cache lookup with emtpy cache: it fails>
      <get dst via uncached route lookup>
                                                <related configuration changes>
                                                dst_cache_reset()
      dst_cache_set()

    The dst entry set passed to dst_cache_set() should not be used
    for later dst cache lookup, because it's obtained using old
    configuration values.

    Since the refresh_ts is updated only on dst_cache lookup, the
    cached value in the above scenario will be discarded on the next
    lookup.

    Signed-off-by: Paolo Abeni <pabeni@redhat.com>
    Suggested-and-acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
    Signed-off-by: David S. Miller <davem@davemloft.net>

Signed-off-by: Pravin B Shelar <pshelar@ovn.org>
Acked-by: Jesse Gross <jesse@kernel.org>
---
 acinclude.m4                                  |   1 +
 datapath/linux/Modules.mk                     |   3 +
 datapath/linux/compat/dst_cache.c             | 169 ++++++++++++++++++++++++++
 datapath/linux/compat/include/linux/percpu.h  |  11 ++
 datapath/linux/compat/include/net/dst_cache.h | 104 ++++++++++++++++
 datapath/linux/compat/include/net/ip6_fib.h   |  36 ++++++
 datapath/linux/compat/utils.c                 |  28 +++++
 7 files changed, 352 insertions(+)
 create mode 100644 datapath/linux/compat/dst_cache.c
 create mode 100644 datapath/linux/compat/include/net/dst_cache.h
 create mode 100644 datapath/linux/compat/include/net/ip6_fib.h

Comments

Jesse Gross July 6, 2016, 1:54 a.m. UTC | #1
On Fri, Jul 1, 2016 at 5:58 PM, Pravin B Shelar <pshelar@ovn.org> wrote:
> diff --git a/acinclude.m4 b/acinclude.m4
> index 263c31d..05b5f48 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -556,6 +556,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
>
>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [dst_discard_sk])
>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [__skb_dst_copy])
> +  OVS_GREP_IFELSE([$KSRC/include/net/dst_cache.h], [dst_cache])

Looking at this some more, is the symbol created by this check
actually used anywhere? Unless I am missing something, it seems like
we unconditionally replace the dst cache with ours.

It seems to me that we might want to make the use of the dst cache
follow whether we are using upstream tunnels or not. If the upstream
dst cache is available then it should be fine to use that with the OVS
tunnel implementation as well. However, if we end up using upstream
tunnels and the OVS dst cache (because of inconsistent backports) then
that probably won't work very well.
Pravin Shelar July 6, 2016, 10:44 p.m. UTC | #2
On Tue, Jul 5, 2016 at 6:54 PM, Jesse Gross <jesse@kernel.org> wrote:
> On Fri, Jul 1, 2016 at 5:58 PM, Pravin B Shelar <pshelar@ovn.org> wrote:
>> diff --git a/acinclude.m4 b/acinclude.m4
>> index 263c31d..05b5f48 100644
>> --- a/acinclude.m4
>> +++ b/acinclude.m4
>> @@ -556,6 +556,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
>>
>>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [dst_discard_sk])
>>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [__skb_dst_copy])
>> +  OVS_GREP_IFELSE([$KSRC/include/net/dst_cache.h], [dst_cache])
>
> Looking at this some more, is the symbol created by this check
> actually used anywhere? Unless I am missing something, it seems like
> we unconditionally replace the dst cache with ours.
>
> It seems to me that we might want to make the use of the dst cache
> follow whether we are using upstream tunnels or not. If the upstream
> dst cache is available then it should be fine to use that with the OVS
> tunnel implementation as well. However, if we end up using upstream
> tunnels and the OVS dst cache (because of inconsistent backports) then
> that probably won't work very well.

I added DST_CACHE to detect dat_cache support in kernel, but since
none of supported kernel has it, I could not test the use of the
symbol correctly. Therefore I have decided to remove the symbol for
now. we can introduce it when we add support for newer kernel.
Jesse Gross July 6, 2016, 11:34 p.m. UTC | #3
On Wed, Jul 6, 2016 at 3:44 PM, pravin shelar <pshelar@ovn.org> wrote:
> On Tue, Jul 5, 2016 at 6:54 PM, Jesse Gross <jesse@kernel.org> wrote:
>> On Fri, Jul 1, 2016 at 5:58 PM, Pravin B Shelar <pshelar@ovn.org> wrote:
>>> diff --git a/acinclude.m4 b/acinclude.m4
>>> index 263c31d..05b5f48 100644
>>> --- a/acinclude.m4
>>> +++ b/acinclude.m4
>>> @@ -556,6 +556,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
>>>
>>>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [dst_discard_sk])
>>>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [__skb_dst_copy])
>>> +  OVS_GREP_IFELSE([$KSRC/include/net/dst_cache.h], [dst_cache])
>>
>> Looking at this some more, is the symbol created by this check
>> actually used anywhere? Unless I am missing something, it seems like
>> we unconditionally replace the dst cache with ours.
>>
>> It seems to me that we might want to make the use of the dst cache
>> follow whether we are using upstream tunnels or not. If the upstream
>> dst cache is available then it should be fine to use that with the OVS
>> tunnel implementation as well. However, if we end up using upstream
>> tunnels and the OVS dst cache (because of inconsistent backports) then
>> that probably won't work very well.
>
> I added DST_CACHE to detect dat_cache support in kernel, but since
> none of supported kernel has it, I could not test the use of the
> symbol correctly. Therefore I have decided to remove the symbol for
> now. we can introduce it when we add support for newer kernel.

I think it may be best to use !USE_UPSTREAM_TUNNEL, especially if we
remove the dst cache feature check. Otherwise, there's a good chance
that we might end up trying to use our dst cache with the upstream
tunnel code, which won't work.
Pravin Shelar July 7, 2016, 1:48 a.m. UTC | #4
On Wed, Jul 6, 2016 at 4:34 PM, Jesse Gross <jesse@kernel.org> wrote:
> On Wed, Jul 6, 2016 at 3:44 PM, pravin shelar <pshelar@ovn.org> wrote:
>> On Tue, Jul 5, 2016 at 6:54 PM, Jesse Gross <jesse@kernel.org> wrote:
>>> On Fri, Jul 1, 2016 at 5:58 PM, Pravin B Shelar <pshelar@ovn.org> wrote:
>>>> diff --git a/acinclude.m4 b/acinclude.m4
>>>> index 263c31d..05b5f48 100644
>>>> --- a/acinclude.m4
>>>> +++ b/acinclude.m4
>>>> @@ -556,6 +556,7 @@ AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
>>>>
>>>>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [dst_discard_sk])
>>>>    OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [__skb_dst_copy])
>>>> +  OVS_GREP_IFELSE([$KSRC/include/net/dst_cache.h], [dst_cache])
>>>
>>> Looking at this some more, is the symbol created by this check
>>> actually used anywhere? Unless I am missing something, it seems like
>>> we unconditionally replace the dst cache with ours.
>>>
>>> It seems to me that we might want to make the use of the dst cache
>>> follow whether we are using upstream tunnels or not. If the upstream
>>> dst cache is available then it should be fine to use that with the OVS
>>> tunnel implementation as well. However, if we end up using upstream
>>> tunnels and the OVS dst cache (because of inconsistent backports) then
>>> that probably won't work very well.
>>
>> I added DST_CACHE to detect dat_cache support in kernel, but since
>> none of supported kernel has it, I could not test the use of the
>> symbol correctly. Therefore I have decided to remove the symbol for
>> now. we can introduce it when we add support for newer kernel.
>
> I think it may be best to use !USE_UPSTREAM_TUNNEL, especially if we
> remove the dst cache feature check. Otherwise, there's a good chance
> that we might end up trying to use our dst cache with the upstream
> tunnel code, which won't work.

ok. I am also planing to move other tunnel related function under
!USE_UPSTREAM_TUNNEL to simplify it bit.
diff mbox

Patch

diff --git a/acinclude.m4 b/acinclude.m4
index 263c31d..05b5f48 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -556,6 +556,7 @@  AC_DEFUN([OVS_CHECK_LINUX_COMPAT], [
 
   OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [dst_discard_sk])
   OVS_GREP_IFELSE([$KSRC/include/net/dst.h], [__skb_dst_copy])
+  OVS_GREP_IFELSE([$KSRC/include/net/dst_cache.h], [dst_cache])
 
   OVS_GREP_IFELSE([$KSRC/include/net/genetlink.h], [genl_has_listeners])
   OVS_GREP_IFELSE([$KSRC/include/net/genetlink.h], [mcgrp_offset])
diff --git a/datapath/linux/Modules.mk b/datapath/linux/Modules.mk
index d6ec06a..ae7c753 100644
--- a/datapath/linux/Modules.mk
+++ b/datapath/linux/Modules.mk
@@ -1,5 +1,6 @@ 
 openvswitch_sources += \
 	linux/compat/dev-openvswitch.c \
+	linux/compat/dst_cache.c \
 	linux/compat/exthdrs_core.c \
 	linux/compat/flex_array.c \
 	linux/compat/flow_dissector.c \
@@ -69,6 +70,7 @@  openvswitch_headers += \
 	linux/compat/include/linux/workqueue.h \
 	linux/compat/include/net/checksum.h \
 	linux/compat/include/net/dst.h \
+	linux/compat/include/net/dst_cache.h \
 	linux/compat/include/net/dst_metadata.h \
 	linux/compat/include/net/flow_keys.h \
 	linux/compat/include/net/genetlink.h \
@@ -79,6 +81,7 @@  openvswitch_headers += \
 	linux/compat/include/net/inetpeer.h \
 	linux/compat/include/net/ip.h \
 	linux/compat/include/net/ip_tunnels.h \
+	linux/compat/include/net/ip6_fib.h \
 	linux/compat/include/net/ip6_route.h \
 	linux/compat/include/net/ip6_tunnel.h \
 	linux/compat/include/net/ipv6.h \
diff --git a/datapath/linux/compat/dst_cache.c b/datapath/linux/compat/dst_cache.c
new file mode 100644
index 0000000..fdb3d64
--- /dev/null
+++ b/datapath/linux/compat/dst_cache.c
@@ -0,0 +1,169 @@ 
+/*
+ * net/core/dst_cache.c - dst entry cache
+ *
+ * Copyright (c) 2016 Paolo Abeni <pabeni@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <net/dst_cache.h>
+#include <net/route.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_fib.h>
+#endif
+#include <uapi/linux/in.h>
+
+struct dst_cache_pcpu {
+	unsigned long refresh_ts;
+	struct dst_entry *dst;
+	u32 cookie;
+	union {
+		struct in_addr in_saddr;
+		struct in6_addr in6_saddr;
+	};
+};
+
+static void dst_cache_per_cpu_dst_set(struct dst_cache_pcpu *dst_cache,
+				      struct dst_entry *dst, u32 cookie)
+{
+	dst_release(dst_cache->dst);
+	if (dst)
+		dst_hold(dst);
+
+	dst_cache->cookie = cookie;
+	dst_cache->dst = dst;
+}
+
+static struct dst_entry *dst_cache_per_cpu_get(struct dst_cache *dst_cache,
+					       struct dst_cache_pcpu *idst)
+{
+	struct dst_entry *dst;
+
+	dst = idst->dst;
+	if (!dst)
+		goto fail;
+
+	/* the cache already hold a dst reference; it can't go away */
+	dst_hold(dst);
+
+	if (unlikely(!time_after(idst->refresh_ts, dst_cache->reset_ts) ||
+		     (dst->obsolete && !dst->ops->check(dst, idst->cookie)))) {
+		dst_cache_per_cpu_dst_set(idst, NULL, 0);
+		dst_release(dst);
+		goto fail;
+	}
+	return dst;
+
+fail:
+	idst->refresh_ts = jiffies;
+	return NULL;
+}
+
+struct dst_entry *rpl_dst_cache_get(struct dst_cache *dst_cache)
+{
+	if (!dst_cache->cache)
+		return NULL;
+
+	return dst_cache_per_cpu_get(dst_cache, this_cpu_ptr(dst_cache->cache));
+}
+EXPORT_SYMBOL_GPL(rpl_dst_cache_get);
+
+struct rtable *rpl_dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr)
+{
+	struct dst_cache_pcpu *idst;
+	struct dst_entry *dst;
+
+	if (!dst_cache->cache)
+		return NULL;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst = dst_cache_per_cpu_get(dst_cache, idst);
+	if (!dst)
+		return NULL;
+
+	*saddr = idst->in_saddr.s_addr;
+	return container_of(dst, struct rtable, dst);
+}
+EXPORT_SYMBOL_GPL(rpl_dst_cache_get_ip4);
+
+void rpl_dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       __be32 saddr)
+{
+	struct dst_cache_pcpu *idst;
+
+	if (!dst_cache->cache)
+		return;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst_cache_per_cpu_dst_set(idst, dst, 0);
+	idst->in_saddr.s_addr = saddr;
+}
+EXPORT_SYMBOL_GPL(rpl_dst_cache_set_ip4);
+
+#if IS_ENABLED(CONFIG_IPV6)
+void rpl_dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       const struct in6_addr *addr)
+{
+	struct dst_cache_pcpu *idst;
+
+	if (!dst_cache->cache)
+		return;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst_cache_per_cpu_dst_set(this_cpu_ptr(dst_cache->cache), dst,
+				  rt6_get_cookie((struct rt6_info *)dst));
+	idst->in6_saddr = *addr;
+}
+EXPORT_SYMBOL_GPL(rpl_dst_cache_set_ip6);
+
+struct dst_entry *rpl_dst_cache_get_ip6(struct dst_cache *dst_cache,
+				    struct in6_addr *saddr)
+{
+	struct dst_cache_pcpu *idst;
+	struct dst_entry *dst;
+
+	if (!dst_cache->cache)
+		return NULL;
+
+	idst = this_cpu_ptr(dst_cache->cache);
+	dst = dst_cache_per_cpu_get(dst_cache, idst);
+	if (!dst)
+		return NULL;
+
+	*saddr = idst->in6_saddr;
+	return dst;
+}
+EXPORT_SYMBOL_GPL(rpl_dst_cache_get_ip6);
+
+#endif
+
+int rpl_dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp)
+{
+	dst_cache->cache = alloc_percpu_gfp(struct dst_cache_pcpu,
+					    gfp | __GFP_ZERO);
+	if (!dst_cache->cache)
+		return -ENOMEM;
+
+	dst_cache_reset(dst_cache);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rpl_dst_cache_init);
+
+void rpl_dst_cache_destroy(struct dst_cache *dst_cache)
+{
+	int i;
+
+	if (!dst_cache->cache)
+		return;
+
+	for_each_possible_cpu(i)
+		dst_release(per_cpu_ptr(dst_cache->cache, i)->dst);
+
+	free_percpu(dst_cache->cache);
+}
+EXPORT_SYMBOL_GPL(rpl_dst_cache_destroy);
diff --git a/datapath/linux/compat/include/linux/percpu.h b/datapath/linux/compat/include/linux/percpu.h
index f9fcabb..871c877 100644
--- a/datapath/linux/compat/include/linux/percpu.h
+++ b/datapath/linux/compat/include/linux/percpu.h
@@ -31,4 +31,15 @@ 
 #define get_pcpu_ptr(name) (this_cpu_ptr(&name))
 #endif
 
+#ifndef alloc_percpu_gfp
+#define NEED_ALLOC_PERCPU_GFP
+
+void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
+
+#define alloc_percpu_gfp(type, gfp)                                     \
+        (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type),       \
+                                                __alignof__(type), gfp)
+#endif
+
+
 #endif
diff --git a/datapath/linux/compat/include/net/dst_cache.h b/datapath/linux/compat/include/net/dst_cache.h
new file mode 100644
index 0000000..ae435fc
--- /dev/null
+++ b/datapath/linux/compat/include/net/dst_cache.h
@@ -0,0 +1,104 @@ 
+#ifndef _NET_DST_CACHE_WRAPPER_H
+#define _NET_DST_CACHE_WRAPPER_H
+
+#include <linux/jiffies.h>
+#include <net/dst.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_fib.h>
+#endif
+
+struct dst_cache {
+	struct dst_cache_pcpu __percpu *cache;
+	unsigned long reset_ts;
+};
+
+/**
+ *	dst_cache_get - perform cache lookup
+ *	@dst_cache: the cache
+ *
+ *	The caller should use dst_cache_get_ip4() if it need to retrieve the
+ *	source address to be used when xmitting to the cached dst.
+ *	local BH must be disabled.
+ */
+#define rpl_dst_cache_get dst_cache_get
+struct dst_entry *rpl_dst_cache_get(struct dst_cache *dst_cache);
+
+/**
+ *	dst_cache_get_ip4 - perform cache lookup and fetch ipv4 source address
+ *	@dst_cache: the cache
+ *	@saddr: return value for the retrieved source address
+ *
+ *	local BH must be disabled.
+ */
+#define rpl_dst_cache_get_ip4 dst_cache_get_ip4
+struct rtable *rpl_dst_cache_get_ip4(struct dst_cache *dst_cache, __be32 *saddr);
+
+/**
+ *	dst_cache_set_ip4 - store the ipv4 dst into the cache
+ *	@dst_cache: the cache
+ *	@dst: the entry to be cached
+ *	@saddr: the source address to be stored inside the cache
+ *
+ *	local BH must be disabled.
+ */
+#define rpl_dst_cache_set_ip4 dst_cache_set_ip4
+void rpl_dst_cache_set_ip4(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       __be32 saddr);
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+/**
+ *	dst_cache_set_ip6 - store the ipv6 dst into the cache
+ *	@dst_cache: the cache
+ *	@dst: the entry to be cached
+ *	@saddr: the source address to be stored inside the cache
+ *
+ *	local BH must be disabled.
+ */
+#define rpl_dst_cache_set_ip6 dst_cache_set_ip6
+void rpl_dst_cache_set_ip6(struct dst_cache *dst_cache, struct dst_entry *dst,
+		       const struct in6_addr *addr);
+
+/**
+ *	dst_cache_get_ip6 - perform cache lookup and fetch ipv6 source address
+ *	@dst_cache: the cache
+ *	@saddr: return value for the retrieved source address
+ *
+ *	local BH must be disabled.
+ */
+#define rpl_dst_cache_get_ip6 dst_cache_get_ip6
+struct dst_entry *rpl_dst_cache_get_ip6(struct dst_cache *dst_cache,
+				    struct in6_addr *saddr);
+#endif
+
+/**
+ *	dst_cache_reset - invalidate the cache contents
+ *	@dst_cache: the cache
+ *
+ *	This do not free the cached dst to avoid races and contentions.
+ *	the dst will be freed on later cache lookup.
+ */
+static inline void dst_cache_reset(struct dst_cache *dst_cache)
+{
+	dst_cache->reset_ts = jiffies;
+}
+
+/**
+ *	dst_cache_init - initialize the cache, allocating the required storage
+ *	@dst_cache: the cache
+ *	@gfp: allocation flags
+ */
+#define rpl_dst_cache_init dst_cache_init
+int rpl_dst_cache_init(struct dst_cache *dst_cache, gfp_t gfp);
+
+/**
+ *	dst_cache_destroy - empty the cache and free the allocated storage
+ *	@dst_cache: the cache
+ *
+ *	No synchronization is enforced: it must be called only when the cache
+ *	is unsed.
+ */
+#define rpl_dst_cache_destroy dst_cache_destroy
+void rpl_dst_cache_destroy(struct dst_cache *dst_cache);
+
+#endif
diff --git a/datapath/linux/compat/include/net/ip6_fib.h b/datapath/linux/compat/include/net/ip6_fib.h
new file mode 100644
index 0000000..2c8a513
--- /dev/null
+++ b/datapath/linux/compat/include/net/ip6_fib.h
@@ -0,0 +1,36 @@ 
+/*
+ *      Linux INET6 implementation
+ *
+ *      Authors:
+ *      Pedro Roque             <roque@di.fc.ul.pt>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _IP6_FIB_WRAPPER_H
+#define _IP6_FIB_WRAPPER_H
+
+#include_next <net/ip6_fib.h>
+
+#ifndef RTF_PCPU
+#define RTF_PCPU        0x40000000
+#endif
+
+#ifndef RTF_LOCAL
+#define RTF_LOCAL       0x80000000
+#endif
+
+#define rt6_get_cookie rpl_rt6_get_cookie
+static inline u32 rt6_get_cookie(const struct rt6_info *rt)
+{
+	if (rt->rt6i_flags & RTF_PCPU ||
+			(unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
+		rt = (struct rt6_info *)(rt->dst.from);
+
+	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+}
+
+#endif
diff --git a/datapath/linux/compat/utils.c b/datapath/linux/compat/utils.c
index 0ee6e80..7113e09 100644
--- a/datapath/linux/compat/utils.c
+++ b/datapath/linux/compat/utils.c
@@ -65,3 +65,31 @@  bool rpl___net_get_random_once(void *buf, int nbytes, bool *done,
 EXPORT_SYMBOL_GPL(rpl___net_get_random_once);
 
 #endif
+
+#ifdef NEED_ALLOC_PERCPU_GFP
+void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
+{
+	void __percpu *p;
+	int i;
+
+	/* older kernel do not allow all GFP flags, specifically atomic
+	 * allocation.
+	 */
+	if (gfp & ~(GFP_KERNEL | __GFP_ZERO))
+		return NULL;
+	p = __alloc_percpu(size, align);
+	if (!p)
+		return p;
+
+	if (!(gfp & __GFP_ZERO))
+		return p;
+
+	for_each_possible_cpu(i) {
+		void *d;
+
+		d = per_cpu_ptr(p, i);
+		memset(d, 0, size);
+	}
+	return p;
+}
+#endif