diff mbox

[v5,2/2] pktgen: introduce xmit_mode '<start_xmit|netif_receive>'

Message ID 20150507143500.8534.4435.stgit@ivy
State Accepted, archived
Delegated to: David Miller
Headers show

Commit Message

Jesper Dangaard Brouer May 7, 2015, 2:35 p.m. UTC
From: Alexei Starovoitov <ast@plumgrid.com>

Introduce xmit_mode 'netif_receive' for pktgen which generates the
packets using familiar pktgen commands, but feeds them into
netif_receive_skb() instead of ndo_start_xmit().

Default mode is called 'start_xmit'.

It is designed to test netif_receive_skb and ingress qdisc
performace only. Make sure to understand how it works before
using it for other rx benchmarking.

Sample script 'pktgen.sh':
\#!/bin/bash
function pgset() {
  local result

  echo $1 > $PGDEV

  result=`cat $PGDEV | fgrep "Result: OK:"`
  if [ "$result" = "" ]; then
    cat $PGDEV | fgrep Result:
  fi
}

[ -z "$1" ] && echo "Usage: $0 DEV" && exit 1
ETH=$1

PGDEV=/proc/net/pktgen/kpktgend_0
pgset "rem_device_all"
pgset "add_device $ETH"

PGDEV=/proc/net/pktgen/$ETH
pgset "xmit_mode netif_receive"
pgset "pkt_size 60"
pgset "dst 198.18.0.1"
pgset "dst_mac 90:e2:ba:ff:ff:ff"
pgset "count 10000000"
pgset "burst 32"

PGDEV=/proc/net/pktgen/pgctrl
echo "Running... ctrl^C to stop"
pgset "start"
echo "Done"
cat /proc/net/pktgen/$ETH

Usage:
$ sudo ./pktgen.sh eth2
...
Result: OK: 232376(c232372+d3) usec, 10000000 (60byte,0frags)
  43033682pps 20656Mb/sec (20656167360bps) errors: 10000000

Raw netif_receive_skb speed should be ~43 million packet
per second on 3.7Ghz x86 and 'perf report' should look like:
  37.69%  kpktgend_0   [kernel.vmlinux]  [k] __netif_receive_skb_core
  25.81%  kpktgend_0   [kernel.vmlinux]  [k] kfree_skb
   7.22%  kpktgend_0   [kernel.vmlinux]  [k] ip_rcv
   5.68%  kpktgend_0   [pktgen]          [k] pktgen_thread_worker

If fib_table_lookup is seen on top, it means skb was processed
by the stack. To benchmark netif_receive_skb only make sure
that 'dst_mac' of your pktgen script is different from
receiving device mac and it will be dropped by ip_rcv

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>

---
v4->v5:
- Rename xmit_mode's <start_xmit|netif_receive>
- Save one branch when calling eth_type_trans(), noticed by Alex Duyck

v3->v4: Jesper address his own concerns
- User interface intro xmit_mode
- Mention xmit_mode in doc
- Make sure skb_clone cannot be used together with this mode

v2->v3: addressed more Eric comments. Thanks!

v1->v2: as suggested by Eric:
- dropped 'clone_skb' flag, now it will return enotsupp
- fix rps/rfs bug by checking skb->users after every netif_receive_skb
- tested with RPS/RFS, taps, veth, physical devs, various tc cls/act

---

 Documentation/networking/pktgen.txt |    7 +++
 net/core/pktgen.c                   |   82 +++++++++++++++++++++++++++++++++--
 2 files changed, 84 insertions(+), 5 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Alexei Starovoitov May 7, 2015, 4:28 p.m. UTC | #1
On 5/7/15 7:35 AM, Jesper Dangaard Brouer wrote:
> From: Alexei Starovoitov <ast@plumgrid.com>
>
> Introduce xmit_mode 'netif_receive' for pktgen which generates the
> packets using familiar pktgen commands, but feeds them into
> netif_receive_skb() instead of ndo_start_xmit().
>
> Default mode is called 'start_xmit'.
...
> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
>
> ---
> v4->v5:
> - Rename xmit_mode's <start_xmit|netif_receive>
> - Save one branch when calling eth_type_trans(), noticed by Alex Duyck

looks good to me. Thanks a lot.

btw, I've started to work on a patch on top of this one that allows
multiple pktgen threads to submit into the same netdev.
I've used it to stress test removal of spin_lock in ingress qdisc.
The idea is to add another 'name' parameter to command:
'add_device name dev'
'name' will be used to identify this pktgen thread in /proc
and 'dev' used as target net_device.
I think it will be useful for start_xmit testing as well.
I wonder why it wasn't done earlier? The queue configuration is
already supported.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Borkmann May 7, 2015, 5:11 p.m. UTC | #2
On 05/07/2015 06:28 PM, Alexei Starovoitov wrote:
> On 5/7/15 7:35 AM, Jesper Dangaard Brouer wrote:
>> From: Alexei Starovoitov <ast@plumgrid.com>
>>
>> Introduce xmit_mode 'netif_receive' for pktgen which generates the
>> packets using familiar pktgen commands, but feeds them into
>> netif_receive_skb() instead of ndo_start_xmit().
>>
>> Default mode is called 'start_xmit'.
> ...
>> Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
>> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
>>
>> ---
>> v4->v5:
>> - Rename xmit_mode's <start_xmit|netif_receive>
>> - Save one branch when calling eth_type_trans(), noticed by Alex Duyck
>
> looks good to me. Thanks a lot.
>
> btw, I've started to work on a patch on top of this one that allows
> multiple pktgen threads to submit into the same netdev.
> I've used it to stress test removal of spin_lock in ingress qdisc.
> The idea is to add another 'name' parameter to command:
> 'add_device name dev'
> 'name' will be used to identify this pktgen thread in /proc
> and 'dev' used as target net_device.
> I think it will be useful for start_xmit testing as well.
> I wonder why it wasn't done earlier? The queue configuration is
> already supported.

You mean other than below commit (iow independant of queue mapping)?

commit e6fce5b916cd7f7f79b2b3e53ba74bbfc1d7cf8b
Author: Robert Olsson <robert.olsson@its.uu.se>
Date:   Thu Aug 7 02:23:01 2008 -0700

     pktgen: multiqueue etc.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov May 7, 2015, 5:16 p.m. UTC | #3
On 5/7/15 10:11 AM, Daniel Borkmann wrote:
>
> You mean other than below commit (iow independant of queue mapping)?
>
> commit e6fce5b916cd7f7f79b2b3e53ba74bbfc1d7cf8b
> Author: Robert Olsson <robert.olsson@its.uu.se>
> Date:   Thu Aug 7 02:23:01 2008 -0700
>
>      pktgen: multiqueue etc.

ahh, I felt that I'm missing something ;)
Too bad it's not documented. I couldn't figure that out from sources.
Thanks! will give it a shot.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Borkmann May 7, 2015, 5:20 p.m. UTC | #4
On 05/07/2015 07:16 PM, Alexei Starovoitov wrote:
> On 5/7/15 10:11 AM, Daniel Borkmann wrote:
>>
>> You mean other than below commit (iow independant of queue mapping)?
>>
>> commit e6fce5b916cd7f7f79b2b3e53ba74bbfc1d7cf8b
>> Author: Robert Olsson <robert.olsson@its.uu.se>
>> Date:   Thu Aug 7 02:23:01 2008 -0700
>>
>>      pktgen: multiqueue etc.
>
> ahh, I felt that I'm missing something ;)
> Too bad it's not documented. I couldn't figure that out from sources.
> Thanks! will give it a shot.

Ok, in any case, I think it would be good if there's some
documentation for that as well. :)
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesper Dangaard Brouer May 8, 2015, 1:40 p.m. UTC | #5
On Thu, 07 May 2015 19:11:58 +0200 Daniel Borkmann <daniel@iogearbox.net> wrote:

> On 05/07/2015 06:28 PM, Alexei Starovoitov wrote:
> > On 5/7/15 7:35 AM, Jesper Dangaard Brouer wrote:
> >> From: Alexei Starovoitov <ast@plumgrid.com>
> >>
[...snip...]

> > btw, I've started to work on a patch on top of this one that allows
> > multiple pktgen threads to submit into the same netdev.
> > I've used it to stress test removal of spin_lock in ingress qdisc.
> > The idea is to add another 'name' parameter to command:
> > 'add_device name dev'
> > 'name' will be used to identify this pktgen thread in /proc
> > and 'dev' used as target net_device.
> > I think it will be useful for start_xmit testing as well.
> > I wonder why it wasn't done earlier? The queue configuration is
> > already supported.
> 
> You mean other than below commit (iow independant of queue mapping)?
> 
> commit e6fce5b916cd7f7f79b2b3e53ba74bbfc1d7cf8b
> Author: Robert Olsson <robert.olsson@its.uu.se>
> Date:   Thu Aug 7 02:23:01 2008 -0700
> 
>      pktgen: multiqueue etc.

For completeness and others reading this threads...

Pktgen multiqueue is already supported via mentioned commit, which adds
the device naming scheme: "add_device dev@number"

And yes, the documentation does not seem to mention this.  I've been
using it for years now... My scripts[1] take param "-t" for "threads".

I've added a more plain version of a script, based on yours, below my
signature.

The funny thing now is that scaling does not "happen" as we stall on:
   atomic_long_inc(&skb->dev->rx_dropped);

[1] https://github.com/netoptimizer/network-testing/tree/master/pktgen
- - 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Sr. Network Kernel Developer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

multiqueue pktgen script:


#!/bin/bash
function pgset() {
    local result

    echo $1 > $PGDEV

    result=`cat $PGDEV | fgrep "Result: OK:"`
    if [ "$result" = "" ]; then
        cat $PGDEV | fgrep Result:
    fi
}

[ -z "$2" ] && echo "Usage: $0 DEV num_threads" && exit 1
ETH=$1
NUM_THREADS=$2
let "NUM_THREADS -= 1"
echo "Number of threads to start: $2 (0 to $NUM_THREADS)"

# General cleanup everything since last run
PGDEV=/proc/net/pktgen/pgctrl
pgset "reset"

# Add devices to threads
#  Notice the naming scheme ETH@NUM
for NUM in `seq 0 $NUM_THREADS`; do
    PGDEV=/proc/net/pktgen/kpktgend_${NUM}
    pgset "rem_device_all"
    pgset "add_device ${ETH}@${NUM}"
done

# Config each device
for NUM in `seq 0 $NUM_THREADS`; do
    PGDEV=/proc/net/pktgen/${ETH}@${NUM}
    pgset "flag QUEUE_MAP_CPU"
    pgset "xmit_mode netif_receive"
    pgset "pkt_size 60"
    pgset "dst 198.18.0.42"
    pgset "dst_mac 90:e2:ba:ff:ff:ff"
    pgset "count 10000000"
    pgset "burst 32"
done

PGDEV=/proc/net/pktgen/pgctrl
echo "Running... ctrl^C to stop"
pgset "start"
echo "Done"

for NUM in `seq 0 $NUM_THREADS`; do
    echo "Device: ${ETH}@${NUM}"
    cat /proc/net/pktgen/${ETH}@${NUM} | grep -A2 "Result:"
done
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesper Dangaard Brouer May 8, 2015, 3:39 p.m. UTC | #6
On Thu, 07 May 2015 19:11:58 +0200 Daniel Borkmann <daniel@iogearbox.net> wrote:

> On 05/07/2015 06:28 PM, Alexei Starovoitov wrote:  
> > On 5/7/15 7:35 AM, Jesper Dangaard Brouer wrote:  
> >> From: Alexei Starovoitov <ast@plumgrid.com>
> >>  
[...snip...]

> > btw, I've started to work on a patch on top of this one that allows
> > multiple pktgen threads to submit into the same netdev.
> > I've used it to stress test removal of spin_lock in ingress qdisc.
> > The idea is to add another 'name' parameter to command:
> > 'add_device name dev'
> > 'name' will be used to identify this pktgen thread in /proc
> > and 'dev' used as target net_device.
> > I think it will be useful for start_xmit testing as well.
> > I wonder why it wasn't done earlier? The queue configuration is
> > already supported.  
> 
> You mean other than below commit (iow independant of queue mapping)?
> 
> commit e6fce5b916cd7f7f79b2b3e53ba74bbfc1d7cf8b
> Author: Robert Olsson <robert.olsson@its.uu.se>
> Date:   Thu Aug 7 02:23:01 2008 -0700
> 
>      pktgen: multiqueue etc.  

For completeness and others reading this threads...

Pktgen multiqueue is already supported via mentioned commit, which adds
the device naming scheme: "add_device dev@number"

And yes, the documentation does not seem to mention this.  I've been
using it for years now... My scripts[1] take param "-t" for "threads".

I've added a more plain version of a script, based on yours, below my
signature.

The funny thing now is that scaling does not "happen" as we stall on:
   atomic_long_inc(&skb->dev->rx_dropped);

[1] https://github.com/netoptimizer/network-testing/tree/master/pktgen
- - 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Sr. Network Kernel Developer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

multiqueue pktgen script:


#!/bin/bash
function pgset() {
    local result

    echo $1 > $PGDEV

    result=`cat $PGDEV | fgrep "Result: OK:"`
    if [ "$result" = "" ]; then
        cat $PGDEV | fgrep Result:
    fi
}

[ -z "$2" ] && echo "Usage: $0 DEV num_threads" && exit 1
ETH=$1
NUM_THREADS=$2
let "NUM_THREADS -= 1"
echo "Number of threads to start: $2 (0 to $NUM_THREADS)"

# General cleanup everything since last run
PGDEV=/proc/net/pktgen/pgctrl
pgset "reset"

# Add devices to threads
#  Notice the naming scheme ETH@NUM
for NUM in `seq 0 $NUM_THREADS`; do
    PGDEV=/proc/net/pktgen/kpktgend_${NUM}
    pgset "rem_device_all"
    pgset "add_device ${ETH}@${NUM}"
done

# Config each device
for NUM in `seq 0 $NUM_THREADS`; do
    PGDEV=/proc/net/pktgen/${ETH}@${NUM}
    pgset "flag QUEUE_MAP_CPU"
    pgset "xmit_mode netif_receive"
    pgset "pkt_size 60"
    pgset "dst 198.18.0.42"
    pgset "dst_mac 90:e2:ba:ff:ff:ff"
    pgset "count 10000000"
    pgset "burst 32"
done

PGDEV=/proc/net/pktgen/pgctrl
echo "Running... ctrl^C to stop"
pgset "start"
echo "Done"

for NUM in `seq 0 $NUM_THREADS`; do
    echo "Device: ${ETH}@${NUM}"
    cat /proc/net/pktgen/${ETH}@${NUM} | grep -A2 "Result:"
done


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jesper Dangaard Brouer May 8, 2015, 3:49 p.m. UTC | #7
On Fri, 8 May 2015 17:39:00 +0200
Jesper Dangaard Brouer <brouer@redhat.com> wrote:

> On Thu, 07 May 2015 19:11:58 +0200 Daniel Borkmann <daniel@iogearbox.net> wrote:
> 
> > On 05/07/2015 06:28 PM, Alexei Starovoitov wrote:  
> > > On 5/7/15 7:35 AM, Jesper Dangaard Brouer wrote:  
> > >> From: Alexei Starovoitov <ast@plumgrid.com>
> > >>  
> [...snip...]
> 
> > > btw, I've started to work on a patch on top of this one that allows
> > > multiple pktgen threads to submit into the same netdev.
> > > I've used it to stress test removal of spin_lock in ingress qdisc.
> > > The idea is to add another 'name' parameter to command:
> > > 'add_device name dev'
> > > 'name' will be used to identify this pktgen thread in /proc
> > > and 'dev' used as target net_device.
> > > I think it will be useful for start_xmit testing as well.
> > > I wonder why it wasn't done earlier? The queue configuration is
> > > already supported.  
> > 
> > You mean other than below commit (iow independant of queue mapping)?
> > 
> > commit e6fce5b916cd7f7f79b2b3e53ba74bbfc1d7cf8b
> > Author: Robert Olsson <robert.olsson@its.uu.se>
> > Date:   Thu Aug 7 02:23:01 2008 -0700
> > 
> >      pktgen: multiqueue etc.  
> 
> For completeness and others reading this threads...
> 
> Pktgen multiqueue is already supported via mentioned commit, which adds
> the device naming scheme: "add_device dev@number"
> 
> And yes, the documentation does not seem to mention this.  I've been
> using it for years now... My scripts[1] take param "-t" for "threads".
> [1] https://github.com/netoptimizer/network-testing/tree/master/pktgen
> 
> I've added a more plain version of a script, based on yours, below my
> signature.

Now attached.

> The funny thing now is that scaling does not "happen" as we stall on:
>    atomic_long_inc(&skb->dev->rx_dropped);

More interesting observations with the mentioned script (now attached).

On my system the scaling stopped a 24Mpps, when I increased the number
of threads the collective scaling was stuck at 24Mpps.

Then I simply removed/compiled-out the:
 atomic_long_inc(&skb->dev->rx_dropped);

And after that change, the scaling is basically infinite/perfect.

Single thread performance increased from 24.7Mpps to 31.1Mpps, which
corresponds perfectly with the cost of an atomic operation on this HW
(8.25ns).

Diff to before:
 * (1/24700988*10^9)-(1/31170819*10^9) = 8.40292328196 ns

When increasing the threads now, they all basically run at 31Mpps.
Tried it upto 12 threads.


I'm quite puzzled why a single atomic op could "freeze" my system from
scaling beyond 24Mpps.
Eric Dumazet May 8, 2015, 3:56 p.m. UTC | #8
On Fri, 2015-05-08 at 17:49 +0200, Jesper Dangaard Brouer wrote:

> I'm quite puzzled why a single atomic op could "freeze" my system from
> scaling beyond 24Mpps.

Law of physics, since this cache line has to constantly fly among your
cpus.



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet May 8, 2015, 3:57 p.m. UTC | #9
On Fri, 2015-05-08 at 17:39 +0200, Jesper Dangaard Brouer wrote:

> The funny thing now is that scaling does not "happen" as we stall on:
>    atomic_long_inc(&skb->dev->rx_dropped);

Note we already have percpu device refcnt.

We could extend this to contain a per cpu rx_dropped, but I guess only
these tests hit this path, so I simply used one atomic_long_t here.

I guess an ingress action can do better, by stealing the packet before
hitting this point.


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov May 8, 2015, 4:50 p.m. UTC | #10
On 5/8/15 8:57 AM, Eric Dumazet wrote:
>
> I guess an ingress action can do better, by stealing the packet before
> hitting this point.

unfortunately not yet :(
We have spin_lock(&gact->tcf_lock); in tcf_gact,
so simple 'action drop' doesn't scale.
Shouldn't be hard to fix though.
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander H Duyck May 8, 2015, 4:53 p.m. UTC | #11
On 05/08/2015 08:49 AM, Jesper Dangaard Brouer wrote:
> More interesting observations with the mentioned script (now attached).
>
> On my system the scaling stopped a 24Mpps, when I increased the number
> of threads the collective scaling was stuck at 24Mpps.
>
> Then I simply removed/compiled-out the:
>   atomic_long_inc(&skb->dev->rx_dropped);
>
> And after that change, the scaling is basically infinite/perfect.
>
> Single thread performance increased from 24.7Mpps to 31.1Mpps, which
> corresponds perfectly with the cost of an atomic operation on this HW
> (8.25ns).
>
> Diff to before:
>   * (1/24700988*10^9)-(1/31170819*10^9) = 8.40292328196 ns
>
> When increasing the threads now, they all basically run at 31Mpps.
> Tried it upto 12 threads.
>
>
> I'm quite puzzled why a single atomic op could "freeze" my system from
> scaling beyond 24Mpps.

The atomic access likely acts as a serializing event, and on top of that 
it would increase in time needed to be completed as you add more 
threads.  I am guessing the 8ns is probably the cost for a single 
threaded setup where the memory location is available in L2 or L1 
cache.  If it is in L3 cache that would make it more expensive.  If it 
is currently in use by another CPU then that would make it even more 
expensive.  If it is in use on another socket then we are probably 
looking at something in the high 10s if not 100s of nanoseconds.  Once 
you hit the point where the time for the atomic transaction multiplied 
by the number of threads is equal to the time it takes for any one 
thread to complete the operation you have hit the upper limit and 
everything after that is just wasted cycles spinning while waiting for 
cache line access.

So for example if you had 2 threads on the same socket you are looking 
at an L3 cache access which takes about 30 cycles.  That 30 cycles would 
likely be in addition to the 8ns you were already seeing for single 
thread performance, and I don't know if it includes the cache flush 
needed by the remote L1/L2 where the cache line currently resides.  I'd 
be interested in seeing what the 2 socket data looked like as I suspect 
you would take an even heavier hit for that.

- Alex

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexei Starovoitov May 8, 2015, 5 p.m. UTC | #12
On 5/8/15 8:49 AM, Jesper Dangaard Brouer wrote:
>>
>> I've added a more plain version of a script, based on yours, below my
>> signature.
>
> Now attached.

thanks for the script! Really useful.
Could you add it to samples/pktgen/ and remove useless and confusing
pktgen.conf-2-1 ?

>> The funny thing now is that scaling does not "happen" as we stall on:
>>     atomic_long_inc(&skb->dev->rx_dropped);
>
> More interesting observations with the mentioned script (now attached).
>
> On my system the scaling stopped a 24Mpps, when I increased the number
> of threads the collective scaling was stuck at 24Mpps.

what was your config to start hitting that drop counter?
We can convert it to per_cpu, but I'm not sure it's worth doing.
If I send normal ip packets they don't go this path. Only
unknown protocol packets suppose to hit it ?

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Alexander H Duyck May 8, 2015, 6:21 p.m. UTC | #13
On 05/08/2015 10:00 AM, Alexei Starovoitov wrote:
> On 5/8/15 8:49 AM, Jesper Dangaard Brouer wrote:
>>>
>>> I've added a more plain version of a script, based on yours, below my
>>> signature.
>>
>> Now attached.
>
> thanks for the script! Really useful.
> Could you add it to samples/pktgen/ and remove useless and confusing
> pktgen.conf-2-1 ?
>
>>> The funny thing now is that scaling does not "happen" as we stall on:
>>>     atomic_long_inc(&skb->dev->rx_dropped);
>>
>> More interesting observations with the mentioned script (now attached).
>>
>> On my system the scaling stopped a 24Mpps, when I increased the number
>> of threads the collective scaling was stuck at 24Mpps.
>
> what was your config to start hitting that drop counter?
> We can convert it to per_cpu, but I'm not sure it's worth doing.
> If I send normal ip packets they don't go this path. Only
> unknown protocol packets suppose to hit it ?

I'm assuming it just has to be a packet that isn't claimed by any 
sockets or interfaces registered on top of the device.  After all it 
isn't as if pktgen sends an unknown protocol so I would assume just 
enabling promiscuous mode on an interface without a bridge or raw socket 
would probably be enough to trigger this.  The overhead itself would 
show up in __netif_receive_skb_core in perf since the atomic_inc would 
be inlined.

I would think a common case where something like this might be seen 
would be if you registered enough macvlan interfaces to force a device 
to switch into promiscuous mode and then received traffic that wasn't 
meant for any of the other interfaces.

- Alex
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index 6199ee6..747facc 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -193,6 +193,10 @@  Examples:
  pgset "rate 300M"        set rate to 300 Mb/s
  pgset "ratep 1000000"    set rate to 1Mpps
 
+ pgset "xmit_mode netif_receive"  RX inject into stack netif_receive_skb()
+				  Works with "burst" but not with "clone_skb".
+				  Default xmit_mode is "start_xmit".
+
 Sample scripts
 ==============
 
@@ -310,6 +314,9 @@  flowlen
 rate
 ratep
 
+xmit_mode <start_xmit|netif_receive>
+
+
 References:
 ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
 ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 43bb215..8f2687d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -210,6 +210,10 @@ 
 #define T_REMDEVALL   (1<<2)	/* Remove all devs */
 #define T_REMDEV      (1<<3)	/* Remove one dev */
 
+/* Xmit modes */
+#define M_START_XMIT		0	/* Default normal TX */
+#define M_NETIF_RECEIVE 	1	/* Inject packets into stack */
+
 /* If lock -- protects updating of if_list */
 #define   if_lock(t)           spin_lock(&(t->if_lock));
 #define   if_unlock(t)           spin_unlock(&(t->if_lock));
@@ -251,13 +255,14 @@  struct pktgen_dev {
 	 * we will do a random selection from within the range.
 	 */
 	__u32 flags;
-	int removal_mark;	/* non-zero => the device is marked for
-				 * removal by worker thread */
-
+	int xmit_mode;
 	int min_pkt_size;
 	int max_pkt_size;
 	int pkt_overhead;	/* overhead for MPLS, VLANs, IPSEC etc */
 	int nfrags;
+	int removal_mark;	/* non-zero => the device is marked for
+				 * removal by worker thread */
+
 	struct page *page;
 	u64 delay;		/* nano-seconds */
 
@@ -620,6 +625,9 @@  static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->node >= 0)
 		seq_printf(seq, "     node: %d\n", pkt_dev->node);
 
+	if (pkt_dev->xmit_mode == M_NETIF_RECEIVE)
+		seq_puts(seq, "     xmit_mode: netif_receive\n");
+
 	seq_puts(seq, "     Flags: ");
 
 	if (pkt_dev->flags & F_IPV6)
@@ -1081,7 +1089,8 @@  static ssize_t pktgen_if_write(struct file *file,
 		if (len < 0)
 			return len;
 		if ((value > 0) &&
-		    (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
+		    ((pkt_dev->xmit_mode == M_NETIF_RECEIVE) ||
+		     !(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
 			return -ENOTSUPP;
 		i += len;
 		pkt_dev->clone_skb = value;
@@ -1134,7 +1143,7 @@  static ssize_t pktgen_if_write(struct file *file,
 			return len;
 
 		i += len;
-		if ((value > 1) &&
+		if ((value > 1) && (pkt_dev->xmit_mode == M_START_XMIT) &&
 		    (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING)))
 			return -ENOTSUPP;
 		pkt_dev->burst = value < 1 ? 1 : value;
@@ -1160,6 +1169,35 @@  static ssize_t pktgen_if_write(struct file *file,
 			sprintf(pg_result, "ERROR: node not possible");
 		return count;
 	}
+	if (!strcmp(name, "xmit_mode")) {
+		char f[32];
+
+		memset(f, 0, 32);
+		len = strn_len(&user_buffer[i], sizeof(f) - 1);
+		if (len < 0)
+			return len;
+
+		if (copy_from_user(f, &user_buffer[i], len))
+			return -EFAULT;
+		i += len;
+
+		if (strcmp(f, "start_xmit") == 0) {
+			pkt_dev->xmit_mode = M_START_XMIT;
+		} else if (strcmp(f, "netif_receive") == 0) {
+			/* clone_skb set earlier, not supported in this mode */
+			if (pkt_dev->clone_skb > 0)
+				return -ENOTSUPP;
+
+			pkt_dev->xmit_mode = M_NETIF_RECEIVE;
+		} else {
+			sprintf(pg_result,
+				"xmit_mode -:%s:- unknown\nAvailable modes: %s",
+				f, "start_xmit, netif_receive\n");
+			return count;
+		}
+		sprintf(pg_result, "OK: xmit_mode=%s", f);
+		return count;
+	}
 	if (!strcmp(name, "flag")) {
 		char f[32];
 		memset(f, 0, 32);
@@ -3320,6 +3358,7 @@  static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	unsigned int burst = ACCESS_ONCE(pkt_dev->burst);
 	struct net_device *odev = pkt_dev->odev;
 	struct netdev_queue *txq;
+	struct sk_buff *skb;
 	int ret;
 
 	/* If device is offline, then don't send */
@@ -3357,6 +3396,38 @@  static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->delay && pkt_dev->last_ok)
 		spin(pkt_dev, pkt_dev->next_tx);
 
+	if (pkt_dev->xmit_mode == M_NETIF_RECEIVE) {
+		skb = pkt_dev->skb;
+		skb->protocol = eth_type_trans(skb, skb->dev);
+		atomic_add(burst, &skb->users);
+		local_bh_disable();
+		do {
+			ret = netif_receive_skb(skb);
+			if (ret == NET_RX_DROP)
+				pkt_dev->errors++;
+			pkt_dev->sofar++;
+			pkt_dev->seq_num++;
+			if (atomic_read(&skb->users) != burst) {
+				/* skb was queued by rps/rfs or taps,
+				 * so cannot reuse this skb
+				 */
+				atomic_sub(burst - 1, &skb->users);
+				/* get out of the loop and wait
+				 * until skb is consumed
+				 */
+				pkt_dev->last_ok = 1;
+				break;
+			}
+			/* skb was 'freed' by stack, so clean few
+			 * bits and reuse it
+			 */
+#ifdef CONFIG_NET_CLS_ACT
+			skb->tc_verd = 0; /* reset reclass/redir ttl */
+#endif
+		} while (--burst > 0);
+		goto out; /* Skips xmit_mode M_START_XMIT */
+	}
+
 	txq = skb_get_tx_queue(odev, pkt_dev->skb);
 
 	local_bh_disable();
@@ -3404,6 +3475,7 @@  xmit_more:
 unlock:
 	HARD_TX_UNLOCK(odev, txq);
 
+out:
 	local_bh_enable();
 
 	/* If pkt_dev->count is zero, then run forever */