Patchwork macvlan: lockless tx path

login
register
mail settings
Submitter Eric Dumazet
Date Nov. 10, 2010, 3:41 p.m.
Message ID <1289403709.2860.216.camel@edumazet-laptop>
Download mbox | patch
Permalink /patch/70642/
State Changes Requested
Delegated to: David Miller
Headers show

Comments

Eric Dumazet - Nov. 10, 2010, 3:41 p.m.
macvlan is a stacked device, like tunnels. We should use the lockless
mechanism we are using in tunnels and loopback.

This patch completely removes locking in TX path.

tx stat counters are added into existing percpu stat structure, renamed
from rx_stats to pcpu_stats.

Note : this reverts commit 2c11455321f37 (macvlan: add multiqueue
capability)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 drivers/net/macvlan.c      |   72 +++++++++++++----------------------
 include/linux/if_macvlan.h |   26 +++++++-----
 2 files changed, 43 insertions(+), 55 deletions(-)



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 10, 2010, 5:39 p.m.
On 11/10/2010 07:41 AM, Eric Dumazet wrote:
> macvlan is a stacked device, like tunnels. We should use the lockless
> mechanism we are using in tunnels and loopback.
>
> This patch completely removes locking in TX path.
>
> tx stat counters are added into existing percpu stat structure, renamed
> from rx_stats to pcpu_stats.
>
> Note : this reverts commit 2c11455321f37 (macvlan: add multiqueue
> capability)
>
> Signed-off-by: Eric Dumazet<eric.dumazet@gmail.com>
> ---
>   drivers/net/macvlan.c      |   72 +++++++++++++----------------------
>   include/linux/if_macvlan.h |   26 +++++++-----
>   2 files changed, 43 insertions(+), 55 deletions(-)
>

> diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
> index 8a2fd66..3779b5f 100644
> --- a/include/linux/if_macvlan.h
> +++ b/include/linux/if_macvlan.h
> @@ -25,19 +25,23 @@ struct macvlan_port;
>   struct macvtap_queue;
>
>   /**
> - *	struct macvlan_rx_stats - MACVLAN percpu rx stats
> + *	struct macvlan_pcpu_stats - MACVLAN percpu stats
>    *	@rx_packets: number of received packets
>    *	@rx_bytes: number of received bytes
>    *	@rx_multicast: number of received multicast packets
> + *	@tx_

Minor nit..seems you missed a few there?

>    *	@syncp: synchronization point for 64bit counters
>    *	@rx_errors: number of errors
>    */
> -struct macvlan_rx_stats {
> +struct macvlan_pcpu_stats {
>   	u64			rx_packets;
>   	u64			rx_bytes;
>   	u64			rx_multicast;
> +	u64			tx_packets;
> +	u64			tx_bytes;
>   	struct u64_stats_sync	syncp;
>   	unsigned long		rx_errors;
> +	unsigned long		tx_dropped;

Any reason to not also make those u64?


Thanks,
Ben
Eric Dumazet - Nov. 10, 2010, 5:43 p.m.
Le mercredi 10 novembre 2010 à 09:39 -0800, Ben Greear a écrit :

> >   /**
> > - *	struct macvlan_rx_stats - MACVLAN percpu rx stats
> > + *	struct macvlan_pcpu_stats - MACVLAN percpu stats
> >    *	@rx_packets: number of received packets
> >    *	@rx_bytes: number of received bytes
> >    *	@rx_multicast: number of received multicast packets
> > + *	@tx_
> 
> Minor nit..seems you missed a few there?
> 

Arg... you're right !

> >    *	@syncp: synchronization point for 64bit counters
> >    *	@rx_errors: number of errors
> >    */
> > -struct macvlan_rx_stats {
> > +struct macvlan_pcpu_stats {
> >   	u64			rx_packets;
> >   	u64			rx_bytes;
> >   	u64			rx_multicast;
> > +	u64			tx_packets;
> > +	u64			tx_bytes;
> >   	struct u64_stats_sync	syncp;
> >   	unsigned long		rx_errors;
> > +	unsigned long		tx_dropped;
> 
> Any reason to not also make those u64?
> 

Well, they are supposed to be not incremented often, and they are packet
counts only, so a wrap around in less than 5 seconds is very unlikely.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 10, 2010, 5:53 p.m.
On 11/10/2010 09:43 AM, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 09:39 -0800, Ben Greear a écrit :
>
>>>    /**
>>> - *	struct macvlan_rx_stats - MACVLAN percpu rx stats
>>> + *	struct macvlan_pcpu_stats - MACVLAN percpu stats
>>>     *	@rx_packets: number of received packets
>>>     *	@rx_bytes: number of received bytes
>>>     *	@rx_multicast: number of received multicast packets
>>> + *	@tx_
>>
>> Minor nit..seems you missed a few there?
>>
>
> Arg... you're right !
>
>>>     *	@syncp: synchronization point for 64bit counters
>>>     *	@rx_errors: number of errors
>>>     */
>>> -struct macvlan_rx_stats {
>>> +struct macvlan_pcpu_stats {
>>>    	u64			rx_packets;
>>>    	u64			rx_bytes;
>>>    	u64			rx_multicast;
>>> +	u64			tx_packets;
>>> +	u64			tx_bytes;
>>>    	struct u64_stats_sync	syncp;
>>>    	unsigned long		rx_errors;
>>> +	unsigned long		tx_dropped;
>>
>> Any reason to not also make those u64?
>>
>
> Well, they are supposed to be not incremented often, and they are packet
> counts only, so a wrap around in less than 5 seconds is very unlikely.

I agree, but if these can be read from user-space, it can be tricky to make
solid code to deal with wraps when the thing wrapping can be 32 or 64 bits,
depending on whether the kernel is compiled 32-bit or 64-bit.

So, my preference is to use u32 or u64 so there is no guesswork involved.

To be sure, this problem exists in lots of places already (/proc/net/dev comes to mind),
but the fewer places the better in my opinion.

That said, I don't feel too strongly about it, so if you want to keep these
stats as they are, I shall argue no more :)

Thanks,
Ben
Eric Dumazet - Nov. 10, 2010, 6:18 p.m.
Le mercredi 10 novembre 2010 à 09:53 -0800, Ben Greear a écrit :

> I agree, but if these can be read from user-space, it can be tricky to make
> solid code to deal with wraps when the thing wrapping can be 32 or 64 bits,
> depending on whether the kernel is compiled 32-bit or 64-bit.
> 
> So, my preference is to use u32 or u64 so there is no guesswork involved.
> 
> To be sure, this problem exists in lots of places already (/proc/net/dev comes to mind),
> but the fewer places the better in my opinion.
> 

On a 32bit kernel, very few devices provide 64bit counters, so an
application reading /proc/net/dev should be prepared to handle 32 or
64bit counter.

On a 64bit kernel, many devices still provide 32, 36, 40 bit counters
(hardware based). Same conclusion for userspace.

So really, an SNMP application must be able to cope with any counter
width.

As percpu data is going to hurt in the 4096 cpu cases, we should try to
not make percpu structures too big.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 10, 2010, 6:40 p.m.
On 11/10/2010 10:18 AM, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 09:53 -0800, Ben Greear a écrit :
>
>> I agree, but if these can be read from user-space, it can be tricky to make
>> solid code to deal with wraps when the thing wrapping can be 32 or 64 bits,
>> depending on whether the kernel is compiled 32-bit or 64-bit.
>>
>> So, my preference is to use u32 or u64 so there is no guesswork involved.
>>
>> To be sure, this problem exists in lots of places already (/proc/net/dev comes to mind),
>> but the fewer places the better in my opinion.
>>
>
> On a 32bit kernel, very few devices provide 64bit counters, so an
> application reading /proc/net/dev should be prepared to handle 32 or
> 64bit counter.
>
> On a 64bit kernel, many devices still provide 32, 36, 40 bit counters
> (hardware based). Same conclusion for userspace.

In my opinion, the kernel and/or driver should deal with this such that
at worst the user has to deal with 32 v/s 64 bits based on whether the
kernel is compiled for 32 or 64 bit CPUs.  (Let the driver sample at
intervals needed to never wrap it's counters more than once and update
software stats of well-defined bit-width, and present those software
counters to users.

In practice, this seems to be the case, at least for the NICs I've used
(mostly Intel).  But, please don't propagate the idea that any width of
counters is OK to present to user-space:  It is completely unfair to
make app writers have to know the network driver and/or hardware quirks to
know how often it must sample stats.

> So really, an SNMP application must be able to cope with any counter
> width.
>
> As percpu data is going to hurt in the 4096 cpu cases, we should try to
> not make percpu structures too big.

Well, maybe using u32 would have positive benefits on 64-bit kernels then?

Thanks,
Ben
Eric Dumazet - Nov. 10, 2010, 8:33 p.m.
Le mercredi 10 novembre 2010 à 10:40 -0800, Ben Greear a écrit :

> In my opinion, the kernel and/or driver should deal with this such that
> at worst the user has to deal with 32 v/s 64 bits based on whether the
> kernel is compiled for 32 or 64 bit CPUs.  (Let the driver sample at
> intervals needed to never wrap it's counters more than once and update
> software stats of well-defined bit-width, and present those software
> counters to users.
> 

How so ? Are you willing to provide patches for all network drivers ?

> In practice, this seems to be the case, at least for the NICs I've used
> (mostly Intel).  But, please don't propagate the idea that any width of
> counters is OK to present to user-space:  It is completely unfair to
> make app writers have to know the network driver and/or hardware quirks to
> know how often it must sample stats.
> 

I am sorry Ben, but /proc/net/dev doesnt publish each counter effective
width. Its unfair, but its like that.

An appplication must be able to cope for wrap arounds, running on a 32
or 64bit kernel. Our duty is to provide 64bit counters for high speed
interfaces where possible.
For a 10Mb adapter, there is no need, since a 32bit counter doesnt wrap
in less than one hour (RFC1902 suggestion)

As I said, many drivers counters are not 32bit or 64bit. I did many
driver get_stats() checks lately...

Why should we cap them to 32bit if they really are 36 or 40 bits ? 


> Well, maybe using u32 would have positive benefits on 64-bit kernels then?
> 

But we want to handle 40/100Gbps devices, and keep SNMP apps happy.

We really need 64bit for them, and MACVLAN might be used on top of such
devices.

Or are you suggesting using u32 instead of "unsigned long" for
rx_errors/tx_dropped ?

This would indeed save 8 bytes per cpu per macvlan.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Hutchings - Nov. 10, 2010, 9:04 p.m.
On Wed, 2010-11-10 at 21:33 +0100, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 10:40 -0800, Ben Greear a écrit :
> 
> > In my opinion, the kernel and/or driver should deal with this such that
> > at worst the user has to deal with 32 v/s 64 bits based on whether the
> > kernel is compiled for 32 or 64 bit CPUs.  (Let the driver sample at
> > intervals needed to never wrap it's counters more than once and update
> > software stats of well-defined bit-width, and present those software
> > counters to users.
> > 
> 
> How so ? Are you willing to provide patches for all network drivers ?
> 
> > In practice, this seems to be the case, at least for the NICs I've used
> > (mostly Intel).  But, please don't propagate the idea that any width of
> > counters is OK to present to user-space:  It is completely unfair to
> > make app writers have to know the network driver and/or hardware quirks to
> > know how often it must sample stats.
> > 
> 
> I am sorry Ben, but /proc/net/dev doesnt publish each counter effective
> width. Its unfair, but its like that.
> 
> An appplication must be able to cope for wrap arounds, running on a 32
> or 64bit kernel. Our duty is to provide 64bit counters for high speed
> interfaces where possible.
> For a 10Mb adapter, there is no need, since a 32bit counter doesnt wrap
> in less than one hour (RFC1902 suggestion)
> 
> As I said, many drivers counters are not 32bit or 64bit. I did many
> driver get_stats() checks lately...
> 
> Why should we cap them to 32bit if they really are 36 or 40 bits ? 
[...]

Drivers should calculate differences and accumulate them in a 64-bit
counter.  (A lot of hardware has read-to-clear counters anyway, in which
case the driver *has* to accumulate the values it reads.)

Ben.
Eric Dumazet - Nov. 10, 2010, 9:12 p.m.
Le mercredi 10 novembre 2010 à 21:04 +0000, Ben Hutchings a écrit :

> Drivers should calculate differences and accumulate them in a 64-bit
> counter.  (A lot of hardware has read-to-clear counters anyway, in which
> case the driver *has* to accumulate the values it reads.)

You are mistaken. These are _hardware_ counters. If they were software,
of course they would be 32 or 64 bit.

And doing the thing you describe in software is racy.
I tried to remove many races, not to add new ones.

Yes, some drivers read one hardware counter using two instructions, and
this is racy.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 10, 2010, 9:35 p.m.
On 11/10/2010 12:33 PM, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 10:40 -0800, Ben Greear a écrit :
>
>> In my opinion, the kernel and/or driver should deal with this such that
>> at worst the user has to deal with 32 v/s 64 bits based on whether the
>> kernel is compiled for 32 or 64 bit CPUs.  (Let the driver sample at
>> intervals needed to never wrap it's counters more than once and update
>> software stats of well-defined bit-width, and present those software
>> counters to users.
>>
>
> How so ? Are you willing to provide patches for all network drivers ?

I'm willing to attempt to fix something that I use and can test.

Either way, I think it's legitimate to document at least the desired
behaviour so that driver writers know what to aim for.

>> In practice, this seems to be the case, at least for the NICs I've used
>> (mostly Intel).  But, please don't propagate the idea that any width of
>> counters is OK to present to user-space:  It is completely unfair to
>> make app writers have to know the network driver and/or hardware quirks to
>> know how often it must sample stats.
>>
>
> I am sorry Ben, but /proc/net/dev doesnt publish each counter effective
> width. Its unfair, but its like that.
>
> An appplication must be able to cope for wrap arounds, running on a 32
> or 64bit kernel. Our duty is to provide 64bit counters for high speed
> interfaces where possible.
> For a 10Mb adapter, there is no need, since a 32bit counter doesnt wrap
> in less than one hour (RFC1902 suggestion)

So an application that must deal with wraps must poll at the minimal
time interval for wrapping 32-bit counters at whatever speed, or it
must pay attention to the driver to somehow know that this magic driver
can *really* do 64-bit stats properly?

Please note that just because a counter is less than the previous read,
that doesn't by itself tell us if it wrapped once or twice.  And, if we
don't know at which number of bits it wraps, then we don't know how many
to add even if we are certain it wrapped only once.

In general, I want to treat eth0 the same as eth5, and not worry that one
is 10/100 realtek and the other a 10G Intel.

If netlink reports stats64, then those should only wrap at 64 bits,
and if it reports stats32, then wrap at 32-bits.

> As I said, many drivers counters are not 32bit or 64bit. I did many
> driver get_stats() checks lately...
>
> Why should we cap them to 32bit if they really are 36 or 40 bits ?
>
>
>> Well, maybe using u32 would have positive benefits on 64-bit kernels then?
>>
>
> But we want to handle 40/100Gbps devices, and keep SNMP apps happy.
>
> We really need 64bit for them, and MACVLAN might be used on top of such
> devices.
>
> Or are you suggesting using u32 instead of "unsigned long" for
> rx_errors/tx_dropped ?
>
> This would indeed save 8 bytes per cpu per macvlan.

Yes, that was what I was trying to suggest.  I'm all for 64-bit numbers
in anything that can wrap anytime soon, and anywhere you think 32-bits
is enough, just use u32 so we don't have to worry about the number of
bits in 'unsigned long' on different platforms.

Thanks,
Ben
Ben Hutchings - Nov. 10, 2010, 9:53 p.m.
On Wed, 2010-11-10 at 22:12 +0100, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 21:04 +0000, Ben Hutchings a écrit :
> 
> > Drivers should calculate differences and accumulate them in a 64-bit
> > counter.  (A lot of hardware has read-to-clear counters anyway, in which
> > case the driver *has* to accumulate the values it reads.)
> 
> You are mistaken. These are _hardware_ counters. If they were software,
> of course they would be 32 or 64 bit.

Of course I understood that.

> And doing the thing you describe in software is racy.
> I tried to remove many races, not to add new ones.

If you do it in ndo_get_stats{,64} and don't use your own lock, yes.

> Yes, some drivers read one hardware counter using two instructions, and
> this is racy.

Most hardware which supports MMIO to multi-word counters has some kind
of latching scheme where you can read the words/registers in order and
get consistent values (within a single counter; consistency between
counters is another matter).  Obviously you have to use a lock to
serialise stats updates in this case - whether or not you maintain wider
software counters.

Oh, another problem with using register values directly is that
statistics are likely to be reset whenever the device is reconfigured in
the way that requires a hardware reset.

Ben.
Eric Dumazet - Nov. 10, 2010, 10:21 p.m.
Le mercredi 10 novembre 2010 à 13:35 -0800, Ben Greear a écrit :


> So an application that must deal with wraps must poll at the minimal
> time interval for wrapping 32-bit counters at whatever speed, or it
> must pay attention to the driver to somehow know that this magic driver
> can *really* do 64-bit stats properly?
> 

Are you aware that you speak of something that is not specified at all
in linux ?

Frequency of polling is not part of any RFC. This usually is tunable in
the _application_. Some people sample stats every 5 minutes, some sample
every second, and hit the "xxx driver updates its stats every two
seconds, this sucks"

I wrote SNMP apps based on /proc/net/dev and all just work, with any
versions, any driver. Of course, some of them broke 6 years ago because
they were 32bit legacy application, running on a 64bit kernel. I never
asked David to change /proc/net/dev to cap counters to 32bit.

When 128bit cpu come, some userland changes are needed to parse 128bit
numbers.

In anycase, apps dont have to know a particular driver provides 64bit or
32bit counter. Only choice for them is to automatically detect the
wraparound, because they fetch a STRING, not a Counter32 or Counter64

This works for all drivers, legacy, new, Intel or whatever. If a driver
changes from 32 to 64, nothing special happens in /proc/net/dev.

RRD for example handles this just fine.

> Please note that just because a counter is less than the previous read,
> that doesn't by itself tell us if it wrapped once or twice.  And, if we
> don't know at which number of bits it wraps, then we don't know how many
> to add even if we are certain it wrapped only once.
> 

I repeat : Nothing in /proc/net/dev can tell you when a counter will
wrap (the counter width).

You also need to use the correct polling frequency, depending on max
speed. It was already the case with 32bit counters, 64bit ones only gave
some extra range.

> In general, I want to treat eth0 the same as eth5, and not worry that one
> is 10/100 realtek and the other a 10G Intel.
> 


> If netlink reports stats64, then those should only wrap at 64 bits,
> and if it reports stats32, then wrap at 32-bits.
> 

I believe you are mistaken. We provide stats64 for all drivers, even
32bit legacy ones. rtnetlink has no way to report counter widths,
because nobody cared.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 10, 2010, 10:53 p.m.
On 11/10/2010 02:21 PM, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 13:35 -0800, Ben Greear a écrit :

NOTE:  I trimmed the CC list..this has nothing in particular to
do with mac-vlans now.

>> So an application that must deal with wraps must poll at the minimal
>> time interval for wrapping 32-bit counters at whatever speed, or it
>> must pay attention to the driver to somehow know that this magic driver
>> can *really* do 64-bit stats properly?
>>
>
> Are you aware that you speak of something that is not specified at all
> in linux ?
>
> Frequency of polling is not part of any RFC. This usually is tunable in
> the _application_. Some people sample stats every 5 minutes, some sample
> every second, and hit the "xxx driver updates its stats every two
> seconds, this sucks"

These '2 sec' granularity bugs are a pain and should be (and have been)
fixed.

> I wrote SNMP apps based on /proc/net/dev and all just work, with any
> versions, any driver. Of course, some of them broke 6 years ago because
> they were 32bit legacy application, running on a 64bit kernel. I never
> asked David to change /proc/net/dev to cap counters to 32bit.

I did similar, and then wrote extra code to detect a 64-bit kernel and if
so assume that the counters wrap at 64 bits so I didn't have to poll so
often to make sure I didn't miss a wrap for a 10G NIC.  If instead one wraps at 33
bits and the other at 36, there is no way for me to deal with the wrap
properly w/out explicitly knowing about that 33 and 36.

If the old 32-bit counters in /proc/net/dev instead had a driver that
managed to wrap them at 28 bits, I can't see how your application could
have worked properly, so you must have been assuming that the kernel would
always return a full 32-bit counter.

Now, I'm trying to use netlink api since I'm hoping that is more efficient
and controllable than just reading /proc/net/dev over and over.  Netlink
explicitly can return a set of 32-bit counters or 64-bit.

All I want is to ensure than they are actually 32 or 64 bit, not 36 bit crammed in a
64-bit counter.  In other words, make the driver and/or kernel do it's
job and abstract access to hardware and return a consistent interface.

>> Please note that just because a counter is less than the previous read,
>> that doesn't by itself tell us if it wrapped once or twice.  And, if we
>> don't know at which number of bits it wraps, then we don't know how many
>> to add even if we are certain it wrapped only once.
>>
>
> I repeat : Nothing in /proc/net/dev can tell you when a counter will
> wrap (the counter width).

My primary concern is netlink API now, and even for proc/net/dev, there is no
good reason to show 32-bit counters mixed with 64-bit counters on 64-bit systems.
The kernel can deal with this easily enough, and it should.

>> If netlink reports stats64, then those should only wrap at 64 bits,
>> and if it reports stats32, then wrap at 32-bits.
>>
>
> I believe you are mistaken. We provide stats64 for all drivers, even
> 32bit legacy ones. rtnetlink has no way to report counter widths,
> because nobody cared.

Then it's busted.  If it claims to return stats64, but instead is returning
something different, it is wrong.  Netlink API still defines a stats32, so
the kernel should use that if it can't reliably deal with 64-bit counters.

Thanks,
Ben
Eric Dumazet - Nov. 10, 2010, 11:24 p.m.
Le mercredi 10 novembre 2010 à 14:53 -0800, Ben Greear a écrit :

> Then it's busted.  If it claims to return stats64, but instead is returning
> something different, it is wrong.  Netlink API still defines a stats32, so
> the kernel should use that if it can't reliably deal with 64-bit counters.
> 

It claims nothing like that. You obviously assumed wrong things.

It provides a framework, not a mandatory or exclusive one.

Really this endless discussion is going nowhere.

I suggest you send patches if you want, I am very pleased by current
stats handling.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet - Nov. 10, 2010, 11:36 p.m.
Le mercredi 10 novembre 2010 à 14:53 -0800, Ben Greear a écrit :

> I did similar, and then wrote extra code to detect a 64-bit kernel and if
> so assume that the counters wrap at 64 bits so I didn't have to poll so
> often to make sure I didn't miss a wrap for a 10G NIC.  If instead one wraps at 33
> bits and the other at 36, there is no way for me to deal with the wrap
> properly w/out explicitly knowing about that 33 and 36.
> 

How do you define 'wrap around' ? Maybe your definition is wrong.

> If the old 32-bit counters in /proc/net/dev instead had a driver that
> managed to wrap them at 28 bits, I can't see how your application could
> have worked properly, so you must have been assuming that the kernel would
> always return a full 32-bit counter.

I suggest you take a look at various SNMP applications that handle this
just fine. RRD for example. You can patch your favorite driver to cap
the stats to 37, 38, ... 31 or 30 bits, it works.

If you sample values faster than half the period, it works.

This has nothing to do with 32 or 64 bits, really.

Read RFC1230 for a good advice


	All the statistics are defined using the
        syntax Counter as 32 bit wrap around
        counters.  Thus, if an interface's
        hardware chip set maintains these
        statistics in 16-bit counters, then the
        agent must read the hardware's counters
        frequently enough to prevent loss of
        significance, in order to maintain
        a 32-bit counter in software."

Yes, 16bit counters _are_ fine, even if provided in a 32bit counter, if
you read value fast enough.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 10, 2010, 11:46 p.m.
On 11/10/2010 03:36 PM, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 14:53 -0800, Ben Greear a écrit :
>
>> I did similar, and then wrote extra code to detect a 64-bit kernel and if
>> so assume that the counters wrap at 64 bits so I didn't have to poll so
>> often to make sure I didn't miss a wrap for a 10G NIC.  If instead one wraps at 33
>> bits and the other at 36, there is no way for me to deal with the wrap
>> properly w/out explicitly knowing about that 33 and 36.
>>
>
> How do you define 'wrap around' ? Maybe your definition is wrong.

Maybe so.  My algorithm looks like:

  // uint64 accum;
  // uint32 old;
  // uint32 new;
  if (old > new) {
      // This assumes counters wrap at 32 bits (ie, 0xFFFFFFFF).
      accum += ((uint32)(0xFFFFFFFF) - old) + new;
  }
  else if (old < new) {
      accum += new - old;
  }
  old = new;

...

Is there some way I can do this w/out the (0xFFFFFFFF - old),
and thus the assumption of 32-bit counters?

Thanks,
Ben
Eric Dumazet - Nov. 11, 2010, 7:03 a.m.
Le mercredi 10 novembre 2010 à 15:46 -0800, Ben Greear a écrit :
> On 11/10/2010 03:36 PM, Eric Dumazet wrote:
> > Le mercredi 10 novembre 2010 à 14:53 -0800, Ben Greear a écrit :
> >
> >> I did similar, and then wrote extra code to detect a 64-bit kernel and if
> >> so assume that the counters wrap at 64 bits so I didn't have to poll so
> >> often to make sure I didn't miss a wrap for a 10G NIC.  If instead one wraps at 33
> >> bits and the other at 36, there is no way for me to deal with the wrap
> >> properly w/out explicitly knowing about that 33 and 36.
> >>
> >
> > How do you define 'wrap around' ? Maybe your definition is wrong.
> 
> Maybe so.  My algorithm looks like:
> 
>   // uint64 accum;
>   // uint32 old;
>   // uint32 new;
>   if (old > new) {
>       // This assumes counters wrap at 32 bits (ie, 0xFFFFFFFF).
>       accum += ((uint32)(0xFFFFFFFF) - old) + new;
>   }
>   else if (old < new) {
>       accum += new - old;
>   }
>   old = new;
> 
> ...
> 
> Is there some way I can do this w/out the (0xFFFFFFFF - old),
> and thus the assumption of 32-bit counters?
> 

Yes, please take a look at RRD for an example, then you can adapt to
your needs.

<quote>

http://www.mrtg.org/rrdtool/tut/rrdtutorial.en.html

At the time of writing this document, RRDtool knows of counters that are
either 32 bits or 64 bits of size. These counters can handle the
following different values:

 - 32 bits: 0 ..           4294967295
 - 64 bits: 0 .. 18446744073709551615

If these numbers look strange to you, you can view them in their
hexadecimal form:

 - 32 bits: 0 ..         FFFFFFFF
 - 64 bits: 0 .. FFFFFFFFFFFFFFFF

RRDtool handles both counters the same. If an overflow occurs and the
delta would be negative, RRDtool first adds the maximum of a small
counter + 1 to the delta. If the delta is still negative, it had to be
the large counter that wrapped. Add the maximum possible value of the
large counter + 1 and subtract the erroneously added small value.

There is a risk in this: suppose the large counter wrapped while adding
a huge delta, it could happen, theoretically, that adding the smaller
value would make the delta positive. In this unlikely case the results
would not be correct. The increase should be nearly as high as the
maximum counter value for that to happen, so chances are you would have
several other problems as well and this particular problem would not
even be worth thinking about. Even though, I did include an example, so
you can judge for yourself.

The next section gives you some numerical examples for counter-wraps.
Try to do the calculations yourself or just believe me if your
calculator can't handle the numbers :)

Correction numbers:

 - 32 bits: (4294967295 + 1) =                                4294967296
 - 64 bits: (18446744073709551615 + 1)
                                    - correction1 = 18446744069414584320

 Before:        4294967200
 Increase:                100
 Should become: 4294967300
 But really is:             4
 Delta:        -4294967196
 Correction1:  -4294967196 + 4294967296 = 100

 Before:        18446744073709551000
 Increase:                             800
 Should become: 18446744073709551800
 But really is:                        184
 Delta:        -18446744073709550816
 Correction1:  -18446744073709550816
                                + 4294967296 = -18446744069414583520
 Correction2:  -18446744069414583520
                   + 18446744069414584320 = 800

 Before:        18446744073709551615 ( maximum value )
 Increase:      18446744069414584320 ( absurd increase, minimum for
 Should become: 36893488143124135935             this example to work )
 But really is: 18446744069414584319
 Delta:                     -4294967296
 Correction1:  -4294967296 + 4294967296 = 0
 (not negative -> no correction2)

 Before:        18446744073709551615 ( maximum value )
 Increase:      18446744069414584319 ( one less increase )
 Should become: 36893488143124135934
 But really is: 18446744069414584318
 Delta:                     -4294967297
 Correction1:  -4294967297 + 4294967296 = -1
 Correction2:  -1 + 18446744069414584320 = 18446744069414584319

As you can see from the last two examples, you need strange numbers for
RRDtool to fail (provided it's bug free of course), so this should not
happen. However, SNMP or whatever method you choose to collect the data,
might also report wrong numbers occasionally. We can't prevent all
errors, but there are some things we can do. The RRDtool "create"
command takes two special parameters for this. They define the minimum
and maximum allowed values. Until now, we used "U", meaning "unknown".
If you provide values for one or both of them and if RRDtool receives
data points that are outside these limits, it will ignore those values.
For a thermometer in degrees Celsius, the absolute minimum is just under
-273. For my router, I can assume this minimum is much higher so I would
set it to 10, where as the maximum temperature I would set to 80. Any
higher and the device would be out of order.

For the speed of my car, I would never expect negative numbers and also
I would not expect a speed higher than 230. Anything else, and there
must have been an error. Remember: the opposite is not true, if the
numbers pass this check, it doesn't mean that they are correct. Always
judge the graph with a healthy dose of suspicion if it seems weird to
you.

http://www.mrtg.org/rrdtool/doc/rrdcreate.en.html

COUNTER

    is for continuous incrementing counters like the ifInOctets counter
in a router. The COUNTER data source assumes that the counter never
decreases, except when a counter overflows. The update function takes
the overflow into account. The counter is stored as a per-second rate.
When the counter overflows, RRDtool checks if the overflow happened at
the 32bit or 64bit border and acts accordingly by adding an appropriate
value to the result.

</quote>


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 11, 2010, 4:40 p.m.
On 11/10/2010 11:03 PM, Eric Dumazet wrote:
> Le mercredi 10 novembre 2010 à 15:46 -0800, Ben Greear a écrit :
>> On 11/10/2010 03:36 PM, Eric Dumazet wrote:
>>> Le mercredi 10 novembre 2010 à 14:53 -0800, Ben Greear a écrit :
>>>
>>>> I did similar, and then wrote extra code to detect a 64-bit kernel and if
>>>> so assume that the counters wrap at 64 bits so I didn't have to poll so
>>>> often to make sure I didn't miss a wrap for a 10G NIC.  If instead one wraps at 33
>>>> bits and the other at 36, there is no way for me to deal with the wrap
>>>> properly w/out explicitly knowing about that 33 and 36.
>>>>
>>>
>>> How do you define 'wrap around' ? Maybe your definition is wrong.
>>
>> Maybe so.  My algorithm looks like:
>>
>>    // uint64 accum;
>>    // uint32 old;
>>    // uint32 new;
>>    if (old>  new) {
>>        // This assumes counters wrap at 32 bits (ie, 0xFFFFFFFF).
>>        accum += ((uint32)(0xFFFFFFFF) - old) + new;
>>    }
>>    else if (old<  new) {
>>        accum += new - old;
>>    }
>>    old = new;
>>
>> ...
>>
>> Is there some way I can do this w/out the (0xFFFFFFFF - old),
>> and thus the assumption of 32-bit counters?
>>
>
> Yes, please take a look at RRD for an example, then you can adapt to
> your needs.
>
> <quote>
>
> http://www.mrtg.org/rrdtool/tut/rrdtutorial.en.html
>
> At the time of writing this document, RRDtool knows of counters that are
> either 32 bits or 64 bits of size. These counters can handle the
> following different values:
>
>   - 32 bits: 0 ..           4294967295
>   - 64 bits: 0 .. 18446744073709551615
>
> If these numbers look strange to you, you can view them in their
> hexadecimal form:
>
>   - 32 bits: 0 ..         FFFFFFFF
>   - 64 bits: 0 .. FFFFFFFFFFFFFFFF
>
> RRDtool handles both counters the same. If an overflow occurs and the
> delta would be negative, RRDtool first adds the maximum of a small
> counter + 1 to the delta. If the delta is still negative, it had to be
> the large counter that wrapped. Add the maximum possible value of the
> large counter + 1 and subtract the erroneously added small value.
>
> There is a risk in this: suppose the large counter wrapped while adding
> a huge delta, it could happen, theoretically, that adding the smaller
> value would make the delta positive. In this unlikely case the results
> would not be correct. The increase should be nearly as high as the
> maximum counter value for that to happen, so chances are you would have
> several other problems as well and this particular problem would not
> even be worth thinking about. Even though, I did include an example, so
> you can judge for yourself.

So, they assume counters are exactly 32 or 64 bits.
Your example of the 36-bit counter would break their
assumptions of 32 or 64 bits.

I agree that you can guess if the counter is 32 or 64, at least with today's
hardware and relatively normal poll times, and the requirement that the
counters can ONLY be 32 or 64 bits.  I still consider it a kludge to
return 32 bit counters in stats64, however.  Would you consider
a patch to have netlink pay attention to whether the stats are 32 or
64 (based on a flag returned from dev_get_stats perhaps)?

Thanks,
Ben
Eric Dumazet - Nov. 11, 2010, 4:56 p.m.
Le jeudi 11 novembre 2010 à 08:40 -0800, Ben Greear a écrit :

> So, they assume counters are exactly 32 or 64 bits.
> Your example of the 36-bit counter would break their
> assumptions of 32 or 64 bits.
> 

They 'assume'. I am not. How do you handle counters that suddenly go to
0 ?

> I agree that you can guess if the counter is 32 or 64, at least with today's
> hardware and relatively normal poll times, and the requirement that the
> counters can ONLY be 32 or 64 bits.  I still consider it a kludge to
> return 32 bit counters in stats64, however.  Would you consider
> a patch to have netlink pay attention to whether the stats are 32 or
> 64 (based on a flag returned from dev_get_stats perhaps)?

So what ? How is it going to help /proc/net/dev users (most apps use it)

Could you please adapt your software, and not adapt linux to your
needs ? Dont your software runs on linux 2.6.32 ?



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 11, 2010, 5:20 p.m.
On 11/11/2010 08:56 AM, Eric Dumazet wrote:
> Le jeudi 11 novembre 2010 à 08:40 -0800, Ben Greear a écrit :
>
>> So, they assume counters are exactly 32 or 64 bits.
>> Your example of the 36-bit counter would break their
>> assumptions of 32 or 64 bits.
>>
>
> They 'assume'. I am not. How do you handle counters that suddenly go to
> 0 ?

I've shown you my algorithm that requires one to know the counter
width, and in response you offered on that will work with 32 OR 64 bits,
as long as you make some assumptions.

If you have an algorithm that can properly deal with wrapped counters of
arbitrary bits, then post it.

As for counters that go to zero, if the previous value was > 0, then
it was a wrap.  There is a reason Dave won't let anyone add a patch to
clear network counters.  If the network device was removed and came back,
then you must listen for those events and take proper precautions (set
prev to 0 before sampling any counters, for instance).


>> I agree that you can guess if the counter is 32 or 64, at least with today's
>> hardware and relatively normal poll times, and the requirement that the
>> counters can ONLY be 32 or 64 bits.  I still consider it a kludge to
>> return 32 bit counters in stats64, however.  Would you consider
>> a patch to have netlink pay attention to whether the stats are 32 or
>> 64 (based on a flag returned from dev_get_stats perhaps)?
>
> So what ? How is it going to help /proc/net/dev users (most apps use it)

At least they will have an opportunity to use a better defined API
if they wish.  And, if you only want stats for one interface, and
you have 4000 VLANs in the system, reading /proc/net/dev seems quite
inefficient.

> Could you please adapt your software, and not adapt linux to your
> needs ? Dont your software runs on linux 2.6.32 ?

Yes, I used to carry a patch that allowed direct access to the netdev_stats,
and I'd fall back to parsing /proc/net/dev on standard kernels.  I've now
moved to using netlink API as that seems the preferred method going
forward and allows the granularity (ie, get stats for a single device)
that I prefer.

And, I'll certainly keep trying to improve Linux, because if it helps
my needs, it may help others.  If it causes harm to others, then hopefully
someone will notice and reject my patch or help me to see how to make
it better.

Thanks,
Ben
Eric Dumazet - Nov. 11, 2010, 6:02 p.m.
Le jeudi 11 novembre 2010 à 09:20 -0800, Ben Greear a écrit :

> I've shown you my algorithm that requires one to know the counter
> width, and in response you offered on that will work with 32 OR 64 bits,
> as long as you make some assumptions.
> 
> If you have an algorithm that can properly deal with wrapped counters of
> arbitrary bits, then post it.
> 

Its only a generalization of RRD algo

No rocket science I am afraid.

You can try all the numbers in `seq 32 64` in this order, to get a
generic algo.

If you can certify all your devices are 32 or 64, then the RRD method is
OK.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ben Greear - Nov. 11, 2010, 6:13 p.m.
On 11/11/2010 10:02 AM, Eric Dumazet wrote:
> Le jeudi 11 novembre 2010 à 09:20 -0800, Ben Greear a écrit :
>
>> I've shown you my algorithm that requires one to know the counter
>> width, and in response you offered on that will work with 32 OR 64 bits,
>> as long as you make some assumptions.
>>
>> If you have an algorithm that can properly deal with wrapped counters of
>> arbitrary bits, then post it.
>>
>
> Its only a generalization of RRD algo
>
> No rocket science I am afraid.
>
> You can try all the numbers in `seq 32 64` in this order, to get a
> generic algo.

Ok, so then you have to sample soon enough that a 32-bit counter
can't wrap twice..otherwise you couldn't tell a 32 bit from a
33 bit counter, and you basically gain nothing from having "64-bit"
stats.

And that still assumes at least 32-bit stats..not 16 or whatever.
Thankfully, I doubt there are any drivers using < 32 bit stats.

I'll work on a patch for my idea when I get a chance..we'll see if
anyone likes it.

If you are aware of any drivers that return counters of other than 32 or
64bit widths, please let us know and perhaps we can fix them as well.

Thanks,
Ben
Eric Dumazet - Nov. 11, 2010, 6:46 p.m.
Le jeudi 11 novembre 2010 à 10:13 -0800, Ben Greear a écrit :

> If you are aware of any drivers that return counters of other than 32 or
> 64bit widths, please let us know and perhaps we can fix them as well.

I am pretty sure I found some of them, but cannot recall right now.

Some ethtool stats have definitly not a 32 or 64 bit range



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 0fc9dc7..fce80a7 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -243,17 +243,19 @@  xmit_world:
 netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 			       struct net_device *dev)
 {
-	int i = skb_get_queue_mapping(skb);
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
 	unsigned int len = skb->len;
 	int ret;
+	const struct macvlan_dev *vlan = netdev_priv(dev);
+	struct macvlan_pcpu_stats *pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
 
 	ret = macvlan_queue_xmit(skb, dev);
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
-		txq->tx_packets++;
-		txq->tx_bytes += len;
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
 	} else
-		txq->tx_dropped++;
+		pcpu_stats->tx_dropped++;
 
 	return ret;
 }
@@ -414,14 +416,15 @@  static int macvlan_init(struct net_device *dev)
 	dev->state		= (dev->state & ~MACVLAN_STATE_MASK) |
 				  (lowerdev->state & MACVLAN_STATE_MASK);
 	dev->features 		= lowerdev->features & MACVLAN_FEATURES;
+	dev->features		|= NETIF_F_LLTX;
 	dev->gso_max_size	= lowerdev->gso_max_size;
 	dev->iflink		= lowerdev->ifindex;
 	dev->hard_header_len	= lowerdev->hard_header_len;
 
 	macvlan_set_lockdep_class(dev);
 
-	vlan->rx_stats = alloc_percpu(struct macvlan_rx_stats);
-	if (!vlan->rx_stats)
+	vlan->pcpu_stats = alloc_percpu(struct macvlan_pcpu_stats);
+	if (!vlan->pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -431,7 +434,7 @@  static void macvlan_uninit(struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	free_percpu(vlan->rx_stats);
+	free_percpu(vlan->pcpu_stats);
 }
 
 static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
@@ -439,33 +442,34 @@  static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
-	dev_txq_stats_fold(dev, stats);
-
-	if (vlan->rx_stats) {
-		struct macvlan_rx_stats *p, accum = {0};
-		u64 rx_packets, rx_bytes, rx_multicast;
+	if (vlan->pcpu_stats) {
+		struct macvlan_pcpu_stats *p;
+		u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes;
 		unsigned int start;
 		int i;
 
 		for_each_possible_cpu(i) {
-			p = per_cpu_ptr(vlan->rx_stats, i);
+			p = per_cpu_ptr(vlan->pcpu_stats, i);
 			do {
 				start = u64_stats_fetch_begin_bh(&p->syncp);
 				rx_packets	= p->rx_packets;
 				rx_bytes	= p->rx_bytes;
 				rx_multicast	= p->rx_multicast;
+				tx_packets	= p->tx_packets;
+				tx_bytes	= p->tx_bytes;
 			} while (u64_stats_fetch_retry_bh(&p->syncp, start));
-			accum.rx_packets	+= rx_packets;
-			accum.rx_bytes		+= rx_bytes;
-			accum.rx_multicast	+= rx_multicast;
-			/* rx_errors is an ulong, updated without syncp protection */
-			accum.rx_errors		+= p->rx_errors;
+			stats->rx_packets	+= rx_packets;
+			stats->rx_bytes		+= rx_bytes;
+			stats->multicast	+= rx_multicast;
+			stats->tx_packets	+= tx_packets;
+			stats->tx_bytes		+= tx_bytes;
+			/* rx_errors & tx_dropped are ulong, updated
+			 * without syncp protection
+			 */
+			stats->rx_errors	+= p->rx_errors;
+			stats->tx_dropped	+= p->tx_dropped;
 		}
-		stats->rx_packets = accum.rx_packets;
-		stats->rx_bytes   = accum.rx_bytes;
-		stats->rx_errors  = accum.rx_errors;
-		stats->rx_dropped = accum.rx_errors;
-		stats->multicast  = accum.rx_multicast;
+		stats->rx_dropped = stats->rx_errors;
 	}
 	return stats;
 }
@@ -601,25 +605,6 @@  static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 	return 0;
 }
 
-static int macvlan_get_tx_queues(struct net *net,
-				 struct nlattr *tb[],
-				 unsigned int *num_tx_queues,
-				 unsigned int *real_num_tx_queues)
-{
-	struct net_device *real_dev;
-
-	if (!tb[IFLA_LINK])
-		return -EINVAL;
-
-	real_dev = __dev_get_by_index(net, nla_get_u32(tb[IFLA_LINK]));
-	if (!real_dev)
-		return -ENODEV;
-
-	*num_tx_queues      = real_dev->num_tx_queues;
-	*real_num_tx_queues = real_dev->real_num_tx_queues;
-	return 0;
-}
-
 int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 			   struct nlattr *tb[], struct nlattr *data[],
 			   int (*receive)(struct sk_buff *skb),
@@ -743,7 +728,6 @@  int macvlan_link_register(struct rtnl_link_ops *ops)
 {
 	/* common fields */
 	ops->priv_size		= sizeof(struct macvlan_dev);
-	ops->get_tx_queues	= macvlan_get_tx_queues;
 	ops->validate		= macvlan_validate;
 	ops->maxtype		= IFLA_MACVLAN_MAX;
 	ops->policy		= macvlan_policy;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 8a2fd66..3779b5f 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -25,19 +25,23 @@  struct macvlan_port;
 struct macvtap_queue;
 
 /**
- *	struct macvlan_rx_stats - MACVLAN percpu rx stats
+ *	struct macvlan_pcpu_stats - MACVLAN percpu stats
  *	@rx_packets: number of received packets
  *	@rx_bytes: number of received bytes
  *	@rx_multicast: number of received multicast packets
+ *	@tx_
  *	@syncp: synchronization point for 64bit counters
  *	@rx_errors: number of errors
  */
-struct macvlan_rx_stats {
+struct macvlan_pcpu_stats {
 	u64			rx_packets;
 	u64			rx_bytes;
 	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
 	struct u64_stats_sync	syncp;
 	unsigned long		rx_errors;
+	unsigned long		tx_dropped;
 };
 
 /*
@@ -52,7 +56,7 @@  struct macvlan_dev {
 	struct hlist_node	hlist;
 	struct macvlan_port	*port;
 	struct net_device	*lowerdev;
-	struct macvlan_rx_stats __percpu *rx_stats;
+	struct macvlan_pcpu_stats __percpu *pcpu_stats;
 	enum macvlan_mode	mode;
 	int (*receive)(struct sk_buff *skb);
 	int (*forward)(struct net_device *dev, struct sk_buff *skb);
@@ -64,18 +68,18 @@  static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
 				    unsigned int len, bool success,
 				    bool multicast)
 {
-	struct macvlan_rx_stats *rx_stats;
+	struct macvlan_pcpu_stats *pcpu_stats;
 
-	rx_stats = this_cpu_ptr(vlan->rx_stats);
+	pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
 	if (likely(success)) {
-		u64_stats_update_begin(&rx_stats->syncp);
-		rx_stats->rx_packets++;;
-		rx_stats->rx_bytes += len;
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->rx_packets++;;
+		pcpu_stats->rx_bytes += len;
 		if (multicast)
-			rx_stats->rx_multicast++;
-		u64_stats_update_end(&rx_stats->syncp);
+			pcpu_stats->rx_multicast++;
+		u64_stats_update_end(&pcpu_stats->syncp);
 	} else {
-		rx_stats->rx_errors++;
+		pcpu_stats->rx_errors++;
 	}
 }