diff mbox series

[ovs-dev,v5,8/8] dpif-netlink: Offloading meter to tc police action

Message ID 20220527090021.22594-9-jianbol@nvidia.com
State Superseded
Headers show
Series Add support for ovs metering with tc offload | expand

Checks

Context Check Description
ovsrobot/apply-robot warning apply and check: warning
ovsrobot/intel-ovs-compilation fail test: fail

Commit Message

Jianbo Liu May 27, 2022, 9 a.m. UTC
OVS meters are created in advance and openflow rules refer to them by
their unique ID. New tc_police API is used to offload them. By calling
the API, police actions are created and meters are mapped to them.
These actions then can be used in tc filter rules by the index.

Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
---
 NEWS                             |  2 ++
 lib/dpif-netlink.c               | 31 +++++++++++++++++----
 tests/system-offloads-traffic.at | 48 ++++++++++++++++++++++++++++++++
 3 files changed, 76 insertions(+), 5 deletions(-)

Comments

Eelco Chaudron June 20, 2022, 10:18 a.m. UTC | #1
On 27 May 2022, at 11:00, Jianbo Liu wrote:

> OVS meters are created in advance and openflow rules refer to them by
> their unique ID. New tc_police API is used to offload them. By calling
> the API, police actions are created and meters are mapped to them.
> These actions then can be used in tc filter rules by the index.
>
> Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> ---
>  NEWS                             |  2 ++
>  lib/dpif-netlink.c               | 31 +++++++++++++++++----
>  tests/system-offloads-traffic.at | 48 ++++++++++++++++++++++++++++++++
>  3 files changed, 76 insertions(+), 5 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index eece0d0b2..dfd659d4e 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -29,6 +29,8 @@ Post-v2.17.0
>     - Windows:
>       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
>       * IPv6 Geneve tunnel support.
> +   - Linux datapath:
> +     * Add offloading meter tc police.
>
>
>  v2.17.0 - 17 Feb 2022
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 06e1e8ca0..0af9ee77e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -4163,11 +4163,18 @@ static int
>  dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
>                         struct ofputil_meter_config *config)
>  {
> +    int err;
> +
>      if (probe_broken_meters(dpif_)) {
>          return ENOMEM;
>      }
>
> -    return dpif_netlink_meter_set__(dpif_, meter_id, config);
> +    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
> +    if (!err && netdev_is_flow_api_enabled()) {
> +        meter_offload_set(meter_id, config);

Although currently we always return 0, we should still account for it to change in the future, so we should set err to the return value.

 +        err = meter_offload_set(meter_id, config);

> +    }
> +
> +    return err;
>  }
>
>  /* Retrieve statistics and/or delete meter 'meter_id'.  Statistics are
> @@ -4258,16 +4265,30 @@ static int
>  dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
>                         struct ofputil_meter_stats *stats, uint16_t max_bands)
>  {
> -    return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
> -                                        OVS_METER_CMD_GET);
> +    int err;
> +
> +    err = dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
> +                                       OVS_METER_CMD_GET);
> +    if (!err && netdev_is_flow_api_enabled()) {
> +        meter_offload_get(meter_id, stats, max_bands);

+        err = meter_offload_get(meter_id, stats, max_bands);

> +    }
> +
> +    return err;
>  }
>
>  static int
>  dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
>                         struct ofputil_meter_stats *stats, uint16_t max_bands)
>  {
> -    return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
> -                                        OVS_METER_CMD_DEL);
> +    int err;
> +
> +    err  = dpif_netlink_meter_get_stats(dpif, meter_id, stats,
> +                                        max_bands, OVS_METER_CMD_DEL);
> +    if (!err && netdev_is_flow_api_enabled()) {
> +        meter_offload_del(meter_id, stats, max_bands);

+        err = meter_offload_del(meter_id, stats, max_bands);

> +    }
> +
> +    return err;
>  }
>
>  static bool
> diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
> index 80bc1dd5c..7ec75340f 100644
> --- a/tests/system-offloads-traffic.at
> +++ b/tests/system-offloads-traffic.at
> @@ -168,3 +168,51 @@ matchall
>  ])
>  OVS_TRAFFIC_VSWITCHD_STOP
>  AT_CLEANUP
> +
> +AT_SETUP([offloads - check if meter offloading ])

Can we replace if with interface, as I keep on reading it as "if".

> +AT_KEYWORDS([meter])
> +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
> +OVS_TRAFFIC_VSWITCHD_START()
> +
> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-offload=true])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1'])
> +
> +ADD_NAMESPACES(at_ns0, at_ns1)
> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
> +
> +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr f0:00:00:01:01:02 dev p0])
> +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr f0:00:00:01:01:01 dev p1])
> +
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "actions=normal"])
> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
> +])
> +
> +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ], [nc0.pid])
> +
> +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1 actions=normal"])
> +
> +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u 10.1.1.2 5678 -p 6789])

Any specific reason why you need the sleep 0.5 here? Is it to be sure the flow is programmed?
If so, you might just want to do a ovs-vsctl dump-flows and checke the output?

> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" | DUMP_CLEAN_SORTED], [0], [dnl
> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no), packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
> +])

Here you verify the DP rule is inserted, but should you not also wait a second to make sure the meter is reset?
Because in theory your could/should sent 11 packets in 1 second, so 10 should be dropped!?
This is the case in the kernel environment, but with TC the learning packet is not passing trough the TC meter (this might also be a corner case we need to document).

> +for i in `seq 10`; do
> +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678 -p 6789])
> +done
> +
> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" | DUMP_CLEAN_SORTED], [0], [dnl
> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no), packets:10, bytes:330, used:0.001s, actions:outputmeter(0),3
> +])
> +
> +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
> +OFPST_METER reply (OF1.3) (xid=0x2):
> +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377 duration:0.001s bands:

byte_in_count:517 is a lot larger than with kernel code, do we know why? We should document it.

> +0: packet_count:9 byte_count:0

Here in theory we should report byte_count, like this:

0: packet_count:10 byte_count:470

However, TC does not support dropped byte count, only packet_count. So we should be ok for now, but we must add this limitation to the documentation somewhere that it's clear we will not report byte count with TC offload.

If you run this test without HW offload enabled you can see the difference in behavior, and I think there should be none (or at least the corner cases should be documented).
You could also add a "- offloads disabled" variant of this test, like other features do and add some additional reasoning why it's different there.

> +])
> +
> +OVS_TRAFFIC_VSWITCHD_STOP
> +AT_CLEANUP
> -- 
> 2.26.2

This concludes my review of v5.

//Eelco
Jianbo Liu June 21, 2022, 8:22 a.m. UTC | #2
On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
> On 27 May 2022, at 11:00, Jianbo Liu wrote:
> 
> > OVS meters are created in advance and openflow rules refer to them
> > by
> > their unique ID. New tc_police API is used to offload them. By
> > calling
> > the API, police actions are created and meters are mapped to them.
> > These actions then can be used in tc filter rules by the index.
> > 
> > Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> > ---
> >  NEWS                             |  2 ++
> >  lib/dpif-netlink.c               | 31 +++++++++++++++++----
> >  tests/system-offloads-traffic.at | 48
> > ++++++++++++++++++++++++++++++++
> >  3 files changed, 76 insertions(+), 5 deletions(-)
> > 
> > diff --git a/NEWS b/NEWS
> > index eece0d0b2..dfd659d4e 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -29,6 +29,8 @@ Post-v2.17.0
> >     - Windows:
> >       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
> >       * IPv6 Geneve tunnel support.
> > +   - Linux datapath:
> > +     * Add offloading meter tc police.
> > 
> > 
> >  v2.17.0 - 17 Feb 2022
> > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > index 06e1e8ca0..0af9ee77e 100644
> > --- a/lib/dpif-netlink.c
> > +++ b/lib/dpif-netlink.c
> > @@ -4163,11 +4163,18 @@ static int
> >  dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id
> > meter_id,
> >                         struct ofputil_meter_config *config)
> >  {
> > +    int err;
> > +
> >      if (probe_broken_meters(dpif_)) {
> >          return ENOMEM;
> >      }
> > 
> > -    return dpif_netlink_meter_set__(dpif_, meter_id, config);
> > +    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
> > +    if (!err && netdev_is_flow_api_enabled()) {
> > +        meter_offload_set(meter_id, config);
> 
> Although currently we always return 0, we should still account for it
> to change in the future, so we should set err to the return value.
> 

If there is err from meter_offload_set, it will be passed to the caller
of dpif_netlink_meter_set(). I don't agree with that, because the
caller thinks meter_set operation fail, but actually not. Besides, we
allow the case that dp meter_set success, but offloading fails, so the
return the error of meter_offload_set seems unnecessary.

>  +        err = meter_offload_set(meter_id, config);
> 
> > +    }
> > +
> > +    return err;
> >  }
> > 
> >  /* Retrieve statistics and/or delete meter 'meter_id'.  Statistics
> > are
> > @@ -4258,16 +4265,30 @@ static int
> >  dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id
> > meter_id,
> >                         struct ofputil_meter_stats *stats, uint16_t
> > max_bands)
> >  {
> > -    return dpif_netlink_meter_get_stats(dpif, meter_id, stats,
> > max_bands,
> > -                                        OVS_METER_CMD_GET);
> > +    int err;
> > +
> > +    err = dpif_netlink_meter_get_stats(dpif, meter_id, stats,
> > max_bands,
> > +                                       OVS_METER_CMD_GET);
> > +    if (!err && netdev_is_flow_api_enabled()) {
> > +        meter_offload_get(meter_id, stats, max_bands);
> 
> +        err = meter_offload_get(meter_id, stats, max_bands);
> 
> > +    }
> > +
> > +    return err;
> >  }
> > 
> >  static int
> >  dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id
> > meter_id,
> >                         struct ofputil_meter_stats *stats, uint16_t
> > max_bands)
> >  {
> > -    return dpif_netlink_meter_get_stats(dpif, meter_id, stats,
> > max_bands,
> > -                                        OVS_METER_CMD_DEL);
> > +    int err;
> > +
> > +    err  = dpif_netlink_meter_get_stats(dpif, meter_id, stats,
> > +                                        max_bands,
> > OVS_METER_CMD_DEL);
> > +    if (!err && netdev_is_flow_api_enabled()) {
> > +        meter_offload_del(meter_id, stats, max_bands);
> 
> +        err = meter_offload_del(meter_id, stats, max_bands);
> 
> > +    }
> > +
> > +    return err;
> >  }
> > 
> >  static bool
> > diff --git a/tests/system-offloads-traffic.at b/tests/system-
> > offloads-traffic.at
> > index 80bc1dd5c..7ec75340f 100644
> > --- a/tests/system-offloads-traffic.at
> > +++ b/tests/system-offloads-traffic.at
> > @@ -168,3 +168,51 @@ matchall
> >  ])
> >  OVS_TRAFFIC_VSWITCHD_STOP
> >  AT_CLEANUP
> > +
> > +AT_SETUP([offloads - check if meter offloading ])
> 
> Can we replace if with interface, as I keep on reading it as "if".
> 

Sure.

> > +AT_KEYWORDS([meter])
> > +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
> > +OVS_TRAFFIC_VSWITCHD_START()
> > +
> > +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
> > offload=true])
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps
> > bands=type=drop rate=1'])
> > +
> > +ADD_NAMESPACES(at_ns0, at_ns1)
> > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
> > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
> > +
> > +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
> > f0:00:00:01:01:02 dev p0])
> > +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
> > f0:00:00:01:01:01 dev p1])
> > +
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "actions=normal"])
> > +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 |
> > FORMAT_PING], [0], [dnl
> > +10 packets transmitted, 10 received, 0% packet loss, time 0ms
> > +])
> > +
> > +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ], [nc0.pid])
> > +
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
> > actions=normal"])
> > +
> > +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u 10.1.1.2
> > 5678 -p 6789])
> 
> Any specific reason why you need the sleep 0.5 here? Is it to be sure
> the flow is programmed?

I remember I added this because there are failures sometimes. I don't
know why, but obviously they are related to this patchset. So I added
the sleep to avoid them. It's only 0.5s, should be no problem, right? 

> If so, you might just want to do a ovs-vsctl dump-flows and checke
> the output?
> 

I don't understand your qustion. I checked ovs-vsctl dump-flows below.

> > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > DUMP_CLEAN_SORTED], [0], [dnl
> > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
> > packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
> > +])
> 
> Here you verify the DP rule is inserted, but should you not also wait
> a second to make sure the meter is reset?

OK. I will add.

> Because in theory your could/should sent 11 packets in 1 second, so
> 10 should be dropped!?
> This is the case in the kernel environment, but with TC the learning
> packet is not passing trough the TC meter (this might also be a
> corner case we need to document).
> 

Yes, it's the reason. But where to document?

> > +for i in `seq 10`; do
> > +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678 -p
> > 6789])
> > +done
> > +
> > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > DUMP_CLEAN_SORTED], [0], [dnl
> > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
> > packets:10, bytes:330, used:0.001s, actions:outputmeter(0),3
> > +])
> > +
> > +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e
> > 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
> > +OFPST_METER reply (OF1.3) (xid=0x2):
> > +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377
> > duration:0.001s bands:
> 
> byte_in_count:517 is a lot larger than with kernel code, do we know
> why? We should document it.
> 

I think DP also count the bytes of mac layer.

> > +0: packet_count:9 byte_count:0
> 
> Here in theory we should report byte_count, like this:
> 
> 0: packet_count:10 byte_count:470
> 
> However, TC does not support dropped byte count, only packet_count.
> So we should be ok for now, but we must add this limitation to the
> documentation somewhere that it's clear we will not report byte count
> with TC offload.
> 
> If you run this test without HW offload enabled you can see the
> difference in behavior, and I think there should be none (or at least
> the corner cases should be documented).
> You could also add a "- offloads disabled" variant of this test, like
> other features do and add some additional reasoning why it's
> different there.
> 

Sure, I will add.

> > +])
> > +
> > +OVS_TRAFFIC_VSWITCHD_STOP
> > +AT_CLEANUP
> > -- 
> > 2.26.2
> 
> This concludes my review of v5.
> 
> //Eelco
>
Eelco Chaudron June 27, 2022, 9:32 a.m. UTC | #3
On 21 Jun 2022, at 10:22, Jianbo Liu wrote:

> On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
>> On 27 May 2022, at 11:00, Jianbo Liu wrote:
>>
>>> OVS meters are created in advance and openflow rules refer to them
>>> by
>>> their unique ID. New tc_police API is used to offload them. By
>>> calling
>>> the API, police actions are created and meters are mapped to them.
>>> These actions then can be used in tc filter rules by the index.
>>>
>>> Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>>> ---
>>>  NEWS                             |  2 ++
>>>  lib/dpif-netlink.c               | 31 +++++++++++++++++----
>>>  tests/system-offloads-traffic.at | 48
>>> ++++++++++++++++++++++++++++++++
>>>  3 files changed, 76 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/NEWS b/NEWS
>>> index eece0d0b2..dfd659d4e 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -29,6 +29,8 @@ Post-v2.17.0
>>>     - Windows:
>>>       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
>>>       * IPv6 Geneve tunnel support.
>>> +   - Linux datapath:
>>> +     * Add offloading meter tc police.
>>>
>>>
>>>  v2.17.0 - 17 Feb 2022
>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>> index 06e1e8ca0..0af9ee77e 100644
>>> --- a/lib/dpif-netlink.c
>>> +++ b/lib/dpif-netlink.c
>>> @@ -4163,11 +4163,18 @@ static int
>>>  dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id
>>> meter_id,
>>>                         struct ofputil_meter_config *config)
>>>  {
>>> +    int err;
>>> +
>>>      if (probe_broken_meters(dpif_)) {
>>>          return ENOMEM;
>>>      }
>>>
>>> -    return dpif_netlink_meter_set__(dpif_, meter_id, config);
>>> +    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
>>> +    if (!err && netdev_is_flow_api_enabled()) {
>>> +        meter_offload_set(meter_id, config);
>>
>> Although currently we always return 0, we should still account for it
>> to change in the future, so we should set err to the return value.
>>
>
> If there is err from meter_offload_set, it will be passed to the caller
> of dpif_netlink_meter_set(). I don't agree with that, because the
> caller thinks meter_set operation fail, but actually not. Besides, we
> allow the case that dp meter_set success, but offloading fails, so the
> return the error of meter_offload_set seems unnecessary.

If this is the case, we should change the dpif_netlink_meter_set() API to return void.
And add a comment to the function why it would not return an error:

--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
     return -1;
 }

-int
+void
 meter_offload_set(ofproto_meter_id meter_id,
                   struct ofputil_meter_config *config)
 {
@@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
            }
         }
     }
-
-    return 0;
+    /* Offload APIs could fail, for example, because the offload is not
+     * supported. This is fine, as the offload API should take care of this. */
 }



>>  +        err = meter_offload_set(meter_id, config);
>>
>>> +    }
>>> +

<SNIP>

>>> diff --git a/tests/system-offloads-traffic.at b/tests/system-
>>> offloads-traffic.at
>>> index 80bc1dd5c..7ec75340f 100644
>>> --- a/tests/system-offloads-traffic.at
>>> +++ b/tests/system-offloads-traffic.at
>>> @@ -168,3 +168,51 @@ matchall
>>>  ])
>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>  AT_CLEANUP
>>> +
>>> +AT_SETUP([offloads - check if meter offloading ])
>>
>> Can we replace if with interface, as I keep on reading it as "if".
>>
>
> Sure.
>
>>> +AT_KEYWORDS([meter])
>>> +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
>>> +OVS_TRAFFIC_VSWITCHD_START()
>>> +
>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
>>> offload=true])
>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps
>>> bands=type=drop rate=1'])
>>> +
>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
>>> +
>>> +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
>>> f0:00:00:01:01:02 dev p0])
>>> +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
>>> f0:00:00:01:01:01 dev p1])
>>> +
>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "actions=normal"])
>>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 |
>>> FORMAT_PING], [0], [dnl
>>> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
>>> +])
>>> +
>>> +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ], [nc0.pid])
>>> +
>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>> "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
>>> actions=normal"])
>>> +
>>> +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u 10.1.1.2
>>> 5678 -p 6789])
>>
>> Any specific reason why you need the sleep 0.5 here? Is it to be sure
>> the flow is programmed?
>
> I remember I added this because there are failures sometimes. I don't
> know why, but obviously they are related to this patchset. So I added
> the sleep to avoid them. It's only 0.5s, should be no problem, right?

I did a lot of runs, but could not get it to fail without it. So if it fails in your case it would be good to investigate.

>> If so, you might just want to do a ovs-vsctl dump-flows and checke
>> the output?
>>
>
> I don't understand your qustion. I checked ovs-vsctl dump-flows below.

I mean in the failure case without he 0.5 seconds, what do you get.

>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>> DUMP_CLEAN_SORTED], [0], [dnl
>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
>>> packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
>>> +])
>>
>> Here you verify the DP rule is inserted, but should you not also wait
>> a second to make sure the meter is reset?
>
> OK. I will add.
>
>> Because in theory your could/should sent 11 packets in 1 second, so
>> 10 should be dropped!?
>> This is the case in the kernel environment, but with TC the learning
>> packet is not passing trough the TC meter (this might also be a
>> corner case we need to document).
>>
>
> Yes, it's the reason. But where to document?

Not sure where either, I guess in NEWS and maybe it’s time to add a howto/tc-offload.rst file?


>>> +for i in `seq 10`; do
>>> +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678 -p
>>> 6789])
>>> +done
>>> +
>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>> DUMP_CLEAN_SORTED], [0], [dnl
>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
>>> packets:10, bytes:330, used:0.001s, actions:outputmeter(0),3
>>> +])
>>> +
>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e
>>> 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
>>> +OFPST_METER reply (OF1.3) (xid=0x2):
>>> +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377
>>> duration:0.001s bands:
>>
>> byte_in_count:517 is a lot larger than with kernel code, do we know
>> why? We should document it.
>>
>
> I think DP also count the bytes of mac layer.

Any idea why?

>>> +0: packet_count:9 byte_count:0
>>
>> Here in theory we should report byte_count, like this:
>>
>> 0: packet_count:10 byte_count:470
>>
>> However, TC does not support dropped byte count, only packet_count.
>> So we should be ok for now, but we must add this limitation to the
>> documentation somewhere that it's clear we will not report byte count
>> with TC offload.
>>
>> If you run this test without HW offload enabled you can see the
>> difference in behavior, and I think there should be none (or at least
>> the corner cases should be documented).
>> You could also add a "- offloads disabled" variant of this test, like
>> other features do and add some additional reasoning why it's
>> different there.
>>
>
> Sure, I will add.

Guess this could go to the same howto/tc-offload.rst in the limitations section?

>
>>> +])
>>> +
>>> +OVS_TRAFFIC_VSWITCHD_STOP
>>> +AT_CLEANUP
>>> -- 
>>> 2.26.2
>>
>> This concludes my review of v5.
>>
>> //Eelco
>>
Jianbo Liu June 27, 2022, 2:58 p.m. UTC | #4
On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
> 
> 
> On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
> 
> > On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
> > > On 27 May 2022, at 11:00, Jianbo Liu wrote:
> > > 
> > > > OVS meters are created in advance and openflow rules refer to
> > > > them
> > > > by
> > > > their unique ID. New tc_police API is used to offload them. By
> > > > calling
> > > > the API, police actions are created and meters are mapped to
> > > > them.
> > > > These actions then can be used in tc filter rules by the index.
> > > > 
> > > > Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> > > > ---
> > > >  NEWS                             |  2 ++
> > > >  lib/dpif-netlink.c               | 31 +++++++++++++++++----
> > > >  tests/system-offloads-traffic.at | 48
> > > > ++++++++++++++++++++++++++++++++
> > > >  3 files changed, 76 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/NEWS b/NEWS
> > > > index eece0d0b2..dfd659d4e 100644
> > > > --- a/NEWS
> > > > +++ b/NEWS
> > > > @@ -29,6 +29,8 @@ Post-v2.17.0
> > > >     - Windows:
> > > >       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
> > > >       * IPv6 Geneve tunnel support.
> > > > +   - Linux datapath:
> > > > +     * Add offloading meter tc police.
> > > > 
> > > > 
> > > >  v2.17.0 - 17 Feb 2022
> > > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > > > index 06e1e8ca0..0af9ee77e 100644
> > > > --- a/lib/dpif-netlink.c
> > > > +++ b/lib/dpif-netlink.c
> > > > @@ -4163,11 +4163,18 @@ static int
> > > >  dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id
> > > > meter_id,
> > > >                         struct ofputil_meter_config *config)
> > > >  {
> > > > +    int err;
> > > > +
> > > >      if (probe_broken_meters(dpif_)) {
> > > >          return ENOMEM;
> > > >      }
> > > > 
> > > > -    return dpif_netlink_meter_set__(dpif_, meter_id, config);
> > > > +    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
> > > > +    if (!err && netdev_is_flow_api_enabled()) {
> > > > +        meter_offload_set(meter_id, config);
> > > 
> > > Although currently we always return 0, we should still account
> > > for it
> > > to change in the future, so we should set err to the return
> > > value.
> > > 
> > 
> > If there is err from meter_offload_set, it will be passed to the
> > caller
> > of dpif_netlink_meter_set(). I don't agree with that, because the
> > caller thinks meter_set operation fail, but actually not. Besides,
> > we
> > allow the case that dp meter_set success, but offloading fails, so
> > the
> > return the error of meter_offload_set seems unnecessary.
> 
> If this is the case, we should change the dpif_netlink_meter_set()
> API to return void.
> And add a comment to the function why it would not return an error:
> 

OK.

> --- a/lib/netdev-offload.c
> +++ b/lib/netdev-offload.c
> @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
>      return -1;
>  }
> 
> -int
> +void
>  meter_offload_set(ofproto_meter_id meter_id,
>                    struct ofputil_meter_config *config)
>  {
> @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
>             }
>          }
>      }
> -
> -    return 0;
> +    /* Offload APIs could fail, for example, because the offload is
> not
> +     * supported. This is fine, as the offload API should take care
> of this. */
>  }
> 
> 
> 
> > >  +        err = meter_offload_set(meter_id, config);
> > > 
> > > > +    }
> > > > +
> 
> <SNIP>
> 
> > > > diff --git a/tests/system-offloads-traffic.at b/tests/system-
> > > > offloads-traffic.at
> > > > index 80bc1dd5c..7ec75340f 100644
> > > > --- a/tests/system-offloads-traffic.at
> > > > +++ b/tests/system-offloads-traffic.at
> > > > @@ -168,3 +168,51 @@ matchall
> > > >  ])
> > > >  OVS_TRAFFIC_VSWITCHD_STOP
> > > >  AT_CLEANUP
> > > > +
> > > > +AT_SETUP([offloads - check if meter offloading ])
> > > 
> > > Can we replace if with interface, as I keep on reading it as
> > > "if".
> > > 
> > 
> > Sure.
> > 
> > > > +AT_KEYWORDS([meter])
> > > > +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
> > > > +OVS_TRAFFIC_VSWITCHD_START()
> > > > +
> > > > +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
> > > > offload=true])
> > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps
> > > > bands=type=drop rate=1'])
> > > > +
> > > > +ADD_NAMESPACES(at_ns0, at_ns1)
> > > > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
> > > > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
> > > > +
> > > > +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
> > > > f0:00:00:01:01:02 dev p0])
> > > > +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
> > > > f0:00:00:01:01:01 dev p1])
> > > > +
> > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > "actions=normal"])
> > > > +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 |
> > > > FORMAT_PING], [0], [dnl
> > > > +10 packets transmitted, 10 received, 0% packet loss, time 0ms
> > > > +])
> > > > +
> > > > +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ],
> > > > [nc0.pid])
> > > > +
> > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
> > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
> > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
> > > > actions=normal"])
> > > > +
> > > > +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u
> > > > 10.1.1.2
> > > > 5678 -p 6789])
> > > 
> > > Any specific reason why you need the sleep 0.5 here? Is it to be
> > > sure
> > > the flow is programmed?
> > 
> > I remember I added this because there are failures sometimes. I
> > don't
> > know why, but obviously they are related to this patchset. So I
> > added
> > the sleep to avoid them. It's only 0.5s, should be no problem,
> > right?
> 
> I did a lot of runs, but could not get it to fail without it. So if
> it fails in your case it would be good to investigate.

I can't reproduce today, though I run many times. It's related to my
setup, I don't test on physical machine, but a virtual machine.

> 
> > > If so, you might just want to do a ovs-vsctl dump-flows and
> > > checke
> > > the output?
> > > 
> > 
> > I don't understand your qustion. I checked ovs-vsctl dump-flows
> > below.

If I remember correctly, the used is "never", not "0.001s".

> 
> I mean in the failure case without he 0.5 seconds, what do you get.
> 
> > > > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > > > DUMP_CLEAN_SORTED], [0], [dnl
> > > > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
> > > > packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
> > > > +])
> > > 
> > > Here you verify the DP rule is inserted, but should you not also
> > > wait
> > > a second to make sure the meter is reset?
> > 
> > OK. I will add.
> > 
> > > Because in theory your could/should sent 11 packets in 1 second,
> > > so
> > > 10 should be dropped!?
> > > This is the case in the kernel environment, but with TC the
> > > learning
> > > packet is not passing trough the TC meter (this might also be a
> > > corner case we need to document).
> > > 
> > 
> > Yes, it's the reason. But where to document?
> 
> Not sure where either, I guess in NEWS and maybe it’s time to add a
> howto/tc-offload.rst file?
> 

Maybe just adding comments here, to explain why it's the result. And
it's not any related to "howto".

> 
> > > > +for i in `seq 10`; do
> > > > +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678 -p
> > > > 6789])
> > > > +done
> > > > +
> > > > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > > > DUMP_CLEAN_SORTED], [0], [dnl
> > > > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
> > > > packets:10, bytes:330, used:0.001s, actions:outputmeter(0),3
> > > > +])
> > > > +
> > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e
> > > > 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
> > > > +OFPST_METER reply (OF1.3) (xid=0x2):
> > > > +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377
> > > > duration:0.001s bands:
> > > 
> > > byte_in_count:517 is a lot larger than with kernel code, do we
> > > know
> > > why? We should document it.
> > > 
> > 
> > I think DP also count the bytes of mac layer.
> 
> Any idea why?

From ovs_meter_execute() in datapath/meter.c, the packet size of band
stats is added by skb->len, which should include the len of mac layer.
 
> 
> > > > +0: packet_count:9 byte_count:0
> > > 
> > > Here in theory we should report byte_count, like this:
> > > 
> > > 0: packet_count:10 byte_count:470
> > > 
> > > However, TC does not support dropped byte count, only
> > > packet_count.
> > > So we should be ok for now, but we must add this limitation to
> > > the
> > > documentation somewhere that it's clear we will not report byte
> > > count
> > > with TC offload.
> > > 
> > > If you run this test without HW offload enabled you can see the
> > > difference in behavior, and I think there should be none (or at
> > > least
> > > the corner cases should be documented).
> > > You could also add a "- offloads disabled" variant of this test,
> > > like
> > > other features do and add some additional reasoning why it's
> > > different there.
> > > 
> > 
> > Sure, I will add.
> 
> Guess this could go to the same howto/tc-offload.rst in the
> limitations section?
> 

Maybe also add comments here, don't waste time to find info from the
other document :)

> > 
> > > > +])
> > > > +
> > > > +OVS_TRAFFIC_VSWITCHD_STOP
> > > > +AT_CLEANUP
> > > > -- 
> > > > 2.26.2
> > > 
> > > This concludes my review of v5.
> > > 
> > > //Eelco
> > > 
>
Eelco Chaudron June 28, 2022, 7:52 a.m. UTC | #5
On 27 Jun 2022, at 16:58, Jianbo Liu wrote:

> On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
>>
>>
>> On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
>>
>>> On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
>>>> On 27 May 2022, at 11:00, Jianbo Liu wrote:
>>>>
>>>>> OVS meters are created in advance and openflow rules refer to
>>>>> them
>>>>> by
>>>>> their unique ID. New tc_police API is used to offload them. By
>>>>> calling
>>>>> the API, police actions are created and meters are mapped to
>>>>> them.
>>>>> These actions then can be used in tc filter rules by the index.
>>>>>
>>>>> Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>>>>> ---
>>>>>  NEWS                             |  2 ++
>>>>>  lib/dpif-netlink.c               | 31 +++++++++++++++++----
>>>>>  tests/system-offloads-traffic.at | 48
>>>>> ++++++++++++++++++++++++++++++++
>>>>>  3 files changed, 76 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/NEWS b/NEWS
>>>>> index eece0d0b2..dfd659d4e 100644
>>>>> --- a/NEWS
>>>>> +++ b/NEWS
>>>>> @@ -29,6 +29,8 @@ Post-v2.17.0
>>>>>     - Windows:
>>>>>       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
>>>>>       * IPv6 Geneve tunnel support.
>>>>> +   - Linux datapath:
>>>>> +     * Add offloading meter tc police.
>>>>>
>>>>>
>>>>>  v2.17.0 - 17 Feb 2022
>>>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>>>> index 06e1e8ca0..0af9ee77e 100644
>>>>> --- a/lib/dpif-netlink.c
>>>>> +++ b/lib/dpif-netlink.c
>>>>> @@ -4163,11 +4163,18 @@ static int
>>>>>  dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id
>>>>> meter_id,
>>>>>                         struct ofputil_meter_config *config)
>>>>>  {
>>>>> +    int err;
>>>>> +
>>>>>      if (probe_broken_meters(dpif_)) {
>>>>>          return ENOMEM;
>>>>>      }
>>>>>
>>>>> -    return dpif_netlink_meter_set__(dpif_, meter_id, config);
>>>>> +    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
>>>>> +    if (!err && netdev_is_flow_api_enabled()) {
>>>>> +        meter_offload_set(meter_id, config);
>>>>
>>>> Although currently we always return 0, we should still account
>>>> for it
>>>> to change in the future, so we should set err to the return
>>>> value.
>>>>
>>>
>>> If there is err from meter_offload_set, it will be passed to the
>>> caller
>>> of dpif_netlink_meter_set(). I don't agree with that, because the
>>> caller thinks meter_set operation fail, but actually not. Besides,
>>> we
>>> allow the case that dp meter_set success, but offloading fails, so
>>> the
>>> return the error of meter_offload_set seems unnecessary.
>>
>> If this is the case, we should change the dpif_netlink_meter_set()
>> API to return void.
>> And add a comment to the function why it would not return an error:
>>
>
> OK.
>
>> --- a/lib/netdev-offload.c
>> +++ b/lib/netdev-offload.c
>> @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
>>      return -1;
>>  }
>>
>> -int
>> +void
>>  meter_offload_set(ofproto_meter_id meter_id,
>>                    struct ofputil_meter_config *config)
>>  {
>> @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
>>             }
>>          }
>>      }
>> -
>> -    return 0;
>> +    /* Offload APIs could fail, for example, because the offload is
>> not
>> +     * supported. This is fine, as the offload API should take care
>> of this. */
>>  }
>>
>>
>>
>>>>  +        err = meter_offload_set(meter_id, config);
>>>>
>>>>> +    }
>>>>> +
>>
>> <SNIP>
>>
>>>>> diff --git a/tests/system-offloads-traffic.at b/tests/system-
>>>>> offloads-traffic.at
>>>>> index 80bc1dd5c..7ec75340f 100644
>>>>> --- a/tests/system-offloads-traffic.at
>>>>> +++ b/tests/system-offloads-traffic.at
>>>>> @@ -168,3 +168,51 @@ matchall
>>>>>  ])
>>>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>>>  AT_CLEANUP
>>>>> +
>>>>> +AT_SETUP([offloads - check if meter offloading ])
>>>>
>>>> Can we replace if with interface, as I keep on reading it as
>>>> "if".
>>>>
>>>
>>> Sure.
>>>
>>>>> +AT_KEYWORDS([meter])
>>>>> +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
>>>>> +OVS_TRAFFIC_VSWITCHD_START()
>>>>> +
>>>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
>>>>> offload=true])
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps
>>>>> bands=type=drop rate=1'])
>>>>> +
>>>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
>>>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
>>>>> +
>>>>> +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
>>>>> f0:00:00:01:01:02 dev p0])
>>>>> +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
>>>>> f0:00:00:01:01:01 dev p1])
>>>>> +
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>> "actions=normal"])
>>>>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 |
>>>>> FORMAT_PING], [0], [dnl
>>>>> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
>>>>> +])
>>>>> +
>>>>> +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ],
>>>>> [nc0.pid])
>>>>> +
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>> "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
>>>>> actions=normal"])
>>>>> +
>>>>> +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u
>>>>> 10.1.1.2
>>>>> 5678 -p 6789])
>>>>
>>>> Any specific reason why you need the sleep 0.5 here? Is it to be
>>>> sure
>>>> the flow is programmed?
>>>
>>> I remember I added this because there are failures sometimes. I
>>> don't
>>> know why, but obviously they are related to this patchset. So I
>>> added
>>> the sleep to avoid them. It's only 0.5s, should be no problem,
>>> right?
>>
>> I did a lot of runs, but could not get it to fail without it. So if
>> it fails in your case it would be good to investigate.
>
> I can't reproduce today, though I run many times. It's related to my
> setup, I don't test on physical machine, but a virtual machine.
>
>>
>>>> If so, you might just want to do a ovs-vsctl dump-flows and
>>>> checke
>>>> the output?
>>>>
>>>
>>> I don't understand your qustion. I checked ovs-vsctl dump-flows
>>> below.
>
> If I remember correctly, the used is "never", not "0.001s".
>
>>
>> I mean in the failure case without he 0.5 seconds, what do you get.
>>
>>>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>>>> DUMP_CLEAN_SORTED], [0], [dnl
>>>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
>>>>> packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
>>>>> +])
>>>>
>>>> Here you verify the DP rule is inserted, but should you not also
>>>> wait
>>>> a second to make sure the meter is reset?
>>>
>>> OK. I will add.
>>>
>>>> Because in theory your could/should sent 11 packets in 1 second,
>>>> so
>>>> 10 should be dropped!?
>>>> This is the case in the kernel environment, but with TC the
>>>> learning
>>>> packet is not passing trough the TC meter (this might also be a
>>>> corner case we need to document).
>>>>
>>>
>>> Yes, it's the reason. But where to document?
>>
>> Not sure where either, I guess in NEWS and maybe it’s time to add a
>> howto/tc-offload.rst file?
>>
>
> Maybe just adding comments here, to explain why it's the result. And
> it's not any related to "howto".

I think this is an important difference in behaviour we should document for the end-user. So that’s why I think we need this in the documentation somewhere.

>>>>> +for i in `seq 10`; do
>>>>> +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678 -p
>>>>> 6789])
>>>>> +done
>>>>> +
>>>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>>>> DUMP_CLEAN_SORTED], [0], [dnl
>>>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
>>>>> packets:10, bytes:330, used:0.001s, actions:outputmeter(0),3
>>>>> +])
>>>>> +
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e
>>>>> 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
>>>>> +OFPST_METER reply (OF1.3) (xid=0x2):
>>>>> +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377
>>>>> duration:0.001s bands:
>>>>
>>>> byte_in_count:517 is a lot larger than with kernel code, do we
>>>> know
>>>> why? We should document it.
>>>>
>>>
>>> I think DP also count the bytes of mac layer.
>>
>> Any idea why?
>
> From ovs_meter_execute() in datapath/meter.c, the packet size of band
> stats is added by skb->len, which should include the len of mac layer.

So this needs to be fixed in the kernel. I can remember someone from Nvidia was already looking at this?

>>>>> +0: packet_count:9 byte_count:0
>>>>
>>>> Here in theory we should report byte_count, like this:
>>>>
>>>> 0: packet_count:10 byte_count:470
>>>>
>>>> However, TC does not support dropped byte count, only
>>>> packet_count.
>>>> So we should be ok for now, but we must add this limitation to
>>>> the
>>>> documentation somewhere that it's clear we will not report byte
>>>> count
>>>> with TC offload.
>>>>
>>>> If you run this test without HW offload enabled you can see the
>>>> difference in behavior, and I think there should be none (or at
>>>> least
>>>> the corner cases should be documented).
>>>> You could also add a "- offloads disabled" variant of this test,
>>>> like
>>>> other features do and add some additional reasoning why it's
>>>> different there.
>>>>
>>>
>>> Sure, I will add.
>>
>> Guess this could go to the same howto/tc-offload.rst in the
>> limitations section?
>>
>
> Maybe also add comments here, don't waste time to find info from the
> other document :)

See above, end-users do not read test cases, so we need this in some user documentation.

>>>
>>>>> +])
>>>>> +
>>>>> +OVS_TRAFFIC_VSWITCHD_STOP
>>>>> +AT_CLEANUP
>>>>> -- 
>>>>> 2.26.2
>>>>
>>>> This concludes my review of v5.
>>>>
>>>> //Eelco
>>>>
>>
Eelco Chaudron June 28, 2022, 7:53 a.m. UTC | #6
On 27 Jun 2022, at 16:58, Jianbo Liu wrote:

> On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
>>
>>
>> On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
>>
>>> On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
>>>> On 27 May 2022, at 11:00, Jianbo Liu wrote:
>>>>
>>>>> OVS meters are created in advance and openflow rules refer to
>>>>> them
>>>>> by
>>>>> their unique ID. New tc_police API is used to offload them. By
>>>>> calling
>>>>> the API, police actions are created and meters are mapped to
>>>>> them.
>>>>> These actions then can be used in tc filter rules by the index.
>>>>>
>>>>> Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>>>>> ---
>>>>>  NEWS                             |  2 ++
>>>>>  lib/dpif-netlink.c               | 31 +++++++++++++++++----
>>>>>  tests/system-offloads-traffic.at | 48
>>>>> ++++++++++++++++++++++++++++++++
>>>>>  3 files changed, 76 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/NEWS b/NEWS
>>>>> index eece0d0b2..dfd659d4e 100644
>>>>> --- a/NEWS
>>>>> +++ b/NEWS
>>>>> @@ -29,6 +29,8 @@ Post-v2.17.0
>>>>>     - Windows:
>>>>>       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
>>>>>       * IPv6 Geneve tunnel support.
>>>>> +   - Linux datapath:
>>>>> +     * Add offloading meter tc police.
>>>>>
>>>>>
>>>>>  v2.17.0 - 17 Feb 2022
>>>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>>>> index 06e1e8ca0..0af9ee77e 100644
>>>>> --- a/lib/dpif-netlink.c
>>>>> +++ b/lib/dpif-netlink.c
>>>>> @@ -4163,11 +4163,18 @@ static int
>>>>>  dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id
>>>>> meter_id,
>>>>>                         struct ofputil_meter_config *config)
>>>>>  {
>>>>> +    int err;
>>>>> +
>>>>>      if (probe_broken_meters(dpif_)) {
>>>>>          return ENOMEM;
>>>>>      }
>>>>>
>>>>> -    return dpif_netlink_meter_set__(dpif_, meter_id, config);
>>>>> +    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
>>>>> +    if (!err && netdev_is_flow_api_enabled()) {
>>>>> +        meter_offload_set(meter_id, config);
>>>>
>>>> Although currently we always return 0, we should still account
>>>> for it
>>>> to change in the future, so we should set err to the return
>>>> value.
>>>>
>>>
>>> If there is err from meter_offload_set, it will be passed to the
>>> caller
>>> of dpif_netlink_meter_set(). I don't agree with that, because the
>>> caller thinks meter_set operation fail, but actually not. Besides,
>>> we
>>> allow the case that dp meter_set success, but offloading fails, so
>>> the
>>> return the error of meter_offload_set seems unnecessary.
>>
>> If this is the case, we should change the dpif_netlink_meter_set()
>> API to return void.
>> And add a comment to the function why it would not return an error:
>>
>
> OK.
>
>> --- a/lib/netdev-offload.c
>> +++ b/lib/netdev-offload.c
>> @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
>>      return -1;
>>  }
>>
>> -int
>> +void
>>  meter_offload_set(ofproto_meter_id meter_id,
>>                    struct ofputil_meter_config *config)
>>  {
>> @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
>>             }
>>          }
>>      }
>> -
>> -    return 0;
>> +    /* Offload APIs could fail, for example, because the offload is
>> not
>> +     * supported. This is fine, as the offload API should take care
>> of this. */
>>  }
>>
>>
>>
>>>>  +        err = meter_offload_set(meter_id, config);
>>>>
>>>>> +    }
>>>>> +
>>
>> <SNIP>
>>
>>>>> diff --git a/tests/system-offloads-traffic.at b/tests/system-
>>>>> offloads-traffic.at
>>>>> index 80bc1dd5c..7ec75340f 100644
>>>>> --- a/tests/system-offloads-traffic.at
>>>>> +++ b/tests/system-offloads-traffic.at
>>>>> @@ -168,3 +168,51 @@ matchall
>>>>>  ])
>>>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>>>  AT_CLEANUP
>>>>> +
>>>>> +AT_SETUP([offloads - check if meter offloading ])
>>>>
>>>> Can we replace if with interface, as I keep on reading it as
>>>> "if".
>>>>
>>>
>>> Sure.
>>>
>>>>> +AT_KEYWORDS([meter])
>>>>> +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
>>>>> +OVS_TRAFFIC_VSWITCHD_START()
>>>>> +
>>>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
>>>>> offload=true])
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps
>>>>> bands=type=drop rate=1'])
>>>>> +
>>>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
>>>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
>>>>> +
>>>>> +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
>>>>> f0:00:00:01:01:02 dev p0])
>>>>> +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
>>>>> f0:00:00:01:01:01 dev p1])
>>>>> +
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>> "actions=normal"])
>>>>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 |
>>>>> FORMAT_PING], [0], [dnl
>>>>> +10 packets transmitted, 10 received, 0% packet loss, time 0ms
>>>>> +])
>>>>> +
>>>>> +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ],
>>>>> [nc0.pid])
>>>>> +
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>> "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
>>>>> actions=normal"])
>>>>> +
>>>>> +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u
>>>>> 10.1.1.2
>>>>> 5678 -p 6789])
>>>>
>>>> Any specific reason why you need the sleep 0.5 here? Is it to be
>>>> sure
>>>> the flow is programmed?
>>>
>>> I remember I added this because there are failures sometimes. I
>>> don't
>>> know why, but obviously they are related to this patchset. So I
>>> added
>>> the sleep to avoid them. It's only 0.5s, should be no problem,
>>> right?
>>
>> I did a lot of runs, but could not get it to fail without it. So if
>> it fails in your case it would be good to investigate.
>
> I can't reproduce today, though I run many times. It's related to my
> setup, I don't test on physical machine, but a virtual machine.
>
>>
>>>> If so, you might just want to do a ovs-vsctl dump-flows and
>>>> checke
>>>> the output?
>>>>
>>>
>>> I don't understand your qustion. I checked ovs-vsctl dump-flows
>>> below.
>
> If I remember correctly, the used is "never", not "0.001s".


Forgot to answer this ;)

The kernel always reports “never” for this test case, that’s why I suggested the offload disabled test to see all the differences and understand why.

>>
>> I mean in the failure case without he 0.5 seconds, what do you get.
>>
>>>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>>>> DUMP_CLEAN_SORTED], [0], [dnl
>>>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
>>>>> packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
>>>>> +])
>>>>
>>>> Here you verify the DP rule is inserted, but should you not also
>>>> wait
>>>> a second to make sure the meter is reset?
>>>
>>> OK. I will add.
>>>
>>>> Because in theory your could/should sent 11 packets in 1 second,
>>>> so
>>>> 10 should be dropped!?
>>>> This is the case in the kernel environment, but with TC the
>>>> learning
>>>> packet is not passing trough the TC meter (this might also be a
>>>> corner case we need to document).
>>>>
>>>
>>> Yes, it's the reason. But where to document?
>>
>> Not sure where either, I guess in NEWS and maybe it’s time to add a
>> howto/tc-offload.rst file?
>>
>
> Maybe just adding comments here, to explain why it's the result. And
> it's not any related to "howto".
>
>>
>>>>> +for i in `seq 10`; do
>>>>> +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678 -p
>>>>> 6789])
>>>>> +done
>>>>> +
>>>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>>>> DUMP_CLEAN_SORTED], [0], [dnl
>>>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no),
>>>>> packets:10, bytes:330, used:0.001s, actions:outputmeter(0),3
>>>>> +])
>>>>> +
>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e
>>>>> 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
>>>>> +OFPST_METER reply (OF1.3) (xid=0x2):
>>>>> +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377
>>>>> duration:0.001s bands:
>>>>
>>>> byte_in_count:517 is a lot larger than with kernel code, do we
>>>> know
>>>> why? We should document it.
>>>>
>>>
>>> I think DP also count the bytes of mac layer.
>>
>> Any idea why?
>
> From ovs_meter_execute() in datapath/meter.c, the packet size of band
> stats is added by skb->len, which should include the len of mac layer.
>
>>
>>>>> +0: packet_count:9 byte_count:0
>>>>
>>>> Here in theory we should report byte_count, like this:
>>>>
>>>> 0: packet_count:10 byte_count:470
>>>>
>>>> However, TC does not support dropped byte count, only
>>>> packet_count.
>>>> So we should be ok for now, but we must add this limitation to
>>>> the
>>>> documentation somewhere that it's clear we will not report byte
>>>> count
>>>> with TC offload.
>>>>
>>>> If you run this test without HW offload enabled you can see the
>>>> difference in behavior, and I think there should be none (or at
>>>> least
>>>> the corner cases should be documented).
>>>> You could also add a "- offloads disabled" variant of this test,
>>>> like
>>>> other features do and add some additional reasoning why it's
>>>> different there.
>>>>
>>>
>>> Sure, I will add.
>>
>> Guess this could go to the same howto/tc-offload.rst in the
>> limitations section?
>>
>
> Maybe also add comments here, don't waste time to find info from the
> other document :)
>
>>>
>>>>> +])
>>>>> +
>>>>> +OVS_TRAFFIC_VSWITCHD_STOP
>>>>> +AT_CLEANUP
>>>>> -- 
>>>>> 2.26.2
>>>>
>>>> This concludes my review of v5.
>>>>
>>>> //Eelco
>>>>
>>
Jianbo Liu June 28, 2022, 8:28 a.m. UTC | #7
On Tue, 2022-06-28 at 09:53 +0200, Eelco Chaudron wrote:
> 
> 
> On 27 Jun 2022, at 16:58, Jianbo Liu wrote:
> 
> > On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
> > > 
> > > 
> > > On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
> > > 
> > > > On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
> > > > > On 27 May 2022, at 11:00, Jianbo Liu wrote:
> > > > > 
> > > > > > OVS meters are created in advance and openflow rules refer
> > > > > > to
> > > > > > them
> > > > > > by
> > > > > > their unique ID. New tc_police API is used to offload them.
> > > > > > By
> > > > > > calling
> > > > > > the API, police actions are created and meters are mapped
> > > > > > to
> > > > > > them.
> > > > > > These actions then can be used in tc filter rules by the
> > > > > > index.
> > > > > > 
> > > > > > Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> > > > > > ---
> > > > > >  NEWS                             |  2 ++
> > > > > >  lib/dpif-netlink.c               | 31 +++++++++++++++++---
> > > > > > -
> > > > > >  tests/system-offloads-traffic.at | 48
> > > > > > ++++++++++++++++++++++++++++++++
> > > > > >  3 files changed, 76 insertions(+), 5 deletions(-)
> > > > > > 
> > > > > > diff --git a/NEWS b/NEWS
> > > > > > index eece0d0b2..dfd659d4e 100644
> > > > > > --- a/NEWS
> > > > > > +++ b/NEWS
> > > > > > @@ -29,6 +29,8 @@ Post-v2.17.0
> > > > > >     - Windows:
> > > > > >       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
> > > > > >       * IPv6 Geneve tunnel support.
> > > > > > +   - Linux datapath:
> > > > > > +     * Add offloading meter tc police.
> > > > > > 
> > > > > > 
> > > > > >  v2.17.0 - 17 Feb 2022
> > > > > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > > > > > index 06e1e8ca0..0af9ee77e 100644
> > > > > > --- a/lib/dpif-netlink.c
> > > > > > +++ b/lib/dpif-netlink.c
> > > > > > @@ -4163,11 +4163,18 @@ static int
> > > > > >  dpif_netlink_meter_set(struct dpif *dpif_,
> > > > > > ofproto_meter_id
> > > > > > meter_id,
> > > > > >                         struct ofputil_meter_config
> > > > > > *config)
> > > > > >  {
> > > > > > +    int err;
> > > > > > +
> > > > > >      if (probe_broken_meters(dpif_)) {
> > > > > >          return ENOMEM;
> > > > > >      }
> > > > > > 
> > > > > > -    return dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > config);
> > > > > > +    err = dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > config);
> > > > > > +    if (!err && netdev_is_flow_api_enabled()) {
> > > > > > +        meter_offload_set(meter_id, config);
> > > > > 
> > > > > Although currently we always return 0, we should still
> > > > > account
> > > > > for it
> > > > > to change in the future, so we should set err to the return
> > > > > value.
> > > > > 
> > > > 
> > > > If there is err from meter_offload_set, it will be passed to
> > > > the
> > > > caller
> > > > of dpif_netlink_meter_set(). I don't agree with that, because
> > > > the
> > > > caller thinks meter_set operation fail, but actually not.
> > > > Besides,
> > > > we
> > > > allow the case that dp meter_set success, but offloading fails,
> > > > so
> > > > the
> > > > return the error of meter_offload_set seems unnecessary.
> > > 
> > > If this is the case, we should change the
> > > dpif_netlink_meter_set()
> > > API to return void.
> > > And add a comment to the function why it would not return an
> > > error:
> > > 
> > 
> > OK.
> > 
> > > --- a/lib/netdev-offload.c
> > > +++ b/lib/netdev-offload.c
> > > @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
> > >      return -1;
> > >  }
> > > 
> > > -int
> > > +void
> > >  meter_offload_set(ofproto_meter_id meter_id,
> > >                    struct ofputil_meter_config *config)
> > >  {
> > > @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
> > >             }
> > >          }
> > >      }
> > > -
> > > -    return 0;
> > > +    /* Offload APIs could fail, for example, because the offload
> > > is
> > > not
> > > +     * supported. This is fine, as the offload API should take
> > > care
> > > of this. */
> > >  }
> > > 
> > > 
> > > 
> > > > >  +        err = meter_offload_set(meter_id, config);
> > > > > 
> > > > > > +    }
> > > > > > +
> > > 
> > > <SNIP>
> > > 
> > > > > > diff --git a/tests/system-offloads-traffic.at
> > > > > > b/tests/system-
> > > > > > offloads-traffic.at
> > > > > > index 80bc1dd5c..7ec75340f 100644
> > > > > > --- a/tests/system-offloads-traffic.at
> > > > > > +++ b/tests/system-offloads-traffic.at
> > > > > > @@ -168,3 +168,51 @@ matchall
> > > > > >  ])
> > > > > >  OVS_TRAFFIC_VSWITCHD_STOP
> > > > > >  AT_CLEANUP
> > > > > > +
> > > > > > +AT_SETUP([offloads - check if meter offloading ])
> > > > > 
> > > > > Can we replace if with interface, as I keep on reading it as
> > > > > "if".
> > > > > 
> > > > 
> > > > Sure.
> > > > 
> > > > > > +AT_KEYWORDS([meter])
> > > > > > +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
> > > > > > +OVS_TRAFFIC_VSWITCHD_START()
> > > > > > +
> > > > > > +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
> > > > > > offload=true])
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1
> > > > > > pktps
> > > > > > bands=type=drop rate=1'])
> > > > > > +
> > > > > > +ADD_NAMESPACES(at_ns0, at_ns1)
> > > > > > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24",
> > > > > > "f0:00:00:01:01:01")
> > > > > > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24",
> > > > > > "f0:00:00:01:01:02")
> > > > > > +
> > > > > > +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
> > > > > > f0:00:00:01:01:02 dev p0])
> > > > > > +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
> > > > > > f0:00:00:01:01:01 dev p1])
> > > > > > +
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > "actions=normal"])
> > > > > > +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2
> > > > > > 10.1.1.2 |
> > > > > > FORMAT_PING], [0], [dnl
> > > > > > +10 packets transmitted, 10 received, 0% packet loss, time
> > > > > > 0ms
> > > > > > +])
> > > > > > +
> > > > > > +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ],
> > > > > > [nc0.pid])
> > > > > > +
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
> > > > > > actions=normal"])
> > > > > > +
> > > > > > +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u
> > > > > > 10.1.1.2
> > > > > > 5678 -p 6789])
> > > > > 
> > > > > Any specific reason why you need the sleep 0.5 here? Is it to
> > > > > be
> > > > > sure
> > > > > the flow is programmed?
> > > > 
> > > > I remember I added this because there are failures sometimes. I
> > > > don't
> > > > know why, but obviously they are related to this patchset. So I
> > > > added
> > > > the sleep to avoid them. It's only 0.5s, should be no problem,
> > > > right?
> > > 
> > > I did a lot of runs, but could not get it to fail without it. So
> > > if
> > > it fails in your case it would be good to investigate.
> > 
> > I can't reproduce today, though I run many times. It's related to
> > my
> > setup, I don't test on physical machine, but a virtual machine.
> > 
> > > 
> > > > > If so, you might just want to do a ovs-vsctl dump-flows and
> > > > > checke
> > > > > the output?
> > > > > 
> > > > 
> > > > I don't understand your qustion. I checked ovs-vsctl dump-flows
> > > > below.
> > 
> > If I remember correctly, the used is "never", not "0.001s".
> 
> 
> Forgot to answer this ;)
> 
> The kernel always reports “never” for this test case, that’s why I
> suggested the offload disabled test to see all the differences and
> understand why.
> 

Yes, it's "never" for the case with offload disabled. I have no idea
about the reason which causes differences, but I don't think it's
related to this patchset. Do you have any clue? Thanks!
Jianbo Liu June 28, 2022, 8:52 a.m. UTC | #8
On Tue, 2022-06-28 at 09:52 +0200, Eelco Chaudron wrote:
> 
> 
> On 27 Jun 2022, at 16:58, Jianbo Liu wrote:
> 
> > On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
> > > 
> > > 
> > > On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
> > > 
> > > > On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
> > > > > On 27 May 2022, at 11:00, Jianbo Liu wrote:
> > > > > 
> > > > > > OVS meters are created in advance and openflow rules refer
> > > > > > to
> > > > > > them
> > > > > > by
> > > > > > their unique ID. New tc_police API is used to offload them.
> > > > > > By
> > > > > > calling
> > > > > > the API, police actions are created and meters are mapped
> > > > > > to
> > > > > > them.
> > > > > > These actions then can be used in tc filter rules by the
> > > > > > index.
> > > > > > 
> > > > > > Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> > > > > > ---
> > > > > >  NEWS                             |  2 ++
> > > > > >  lib/dpif-netlink.c               | 31 +++++++++++++++++---
> > > > > > -
> > > > > >  tests/system-offloads-traffic.at | 48
> > > > > > ++++++++++++++++++++++++++++++++
> > > > > >  3 files changed, 76 insertions(+), 5 deletions(-)
> > > > > > 
> > > > > > diff --git a/NEWS b/NEWS
> > > > > > index eece0d0b2..dfd659d4e 100644
> > > > > > --- a/NEWS
> > > > > > +++ b/NEWS
> > > > > > @@ -29,6 +29,8 @@ Post-v2.17.0
> > > > > >     - Windows:
> > > > > >       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
> > > > > >       * IPv6 Geneve tunnel support.
> > > > > > +   - Linux datapath:
> > > > > > +     * Add offloading meter tc police.
> > > > > > 
> > > > > > 
> > > > > >  v2.17.0 - 17 Feb 2022
> > > > > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > > > > > index 06e1e8ca0..0af9ee77e 100644
> > > > > > --- a/lib/dpif-netlink.c
> > > > > > +++ b/lib/dpif-netlink.c
> > > > > > @@ -4163,11 +4163,18 @@ static int
> > > > > >  dpif_netlink_meter_set(struct dpif *dpif_,
> > > > > > ofproto_meter_id
> > > > > > meter_id,
> > > > > >                         struct ofputil_meter_config
> > > > > > *config)
> > > > > >  {
> > > > > > +    int err;
> > > > > > +
> > > > > >      if (probe_broken_meters(dpif_)) {
> > > > > >          return ENOMEM;
> > > > > >      }
> > > > > > 
> > > > > > -    return dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > config);
> > > > > > +    err = dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > config);
> > > > > > +    if (!err && netdev_is_flow_api_enabled()) {
> > > > > > +        meter_offload_set(meter_id, config);
> > > > > 
> > > > > Although currently we always return 0, we should still
> > > > > account
> > > > > for it
> > > > > to change in the future, so we should set err to the return
> > > > > value.
> > > > > 
> > > > 
> > > > If there is err from meter_offload_set, it will be passed to
> > > > the
> > > > caller
> > > > of dpif_netlink_meter_set(). I don't agree with that, because
> > > > the
> > > > caller thinks meter_set operation fail, but actually not.
> > > > Besides,
> > > > we
> > > > allow the case that dp meter_set success, but offloading fails,
> > > > so
> > > > the
> > > > return the error of meter_offload_set seems unnecessary.
> > > 
> > > If this is the case, we should change the
> > > dpif_netlink_meter_set()
> > > API to return void.
> > > And add a comment to the function why it would not return an
> > > error:
> > > 
> > 
> > OK.
> > 
> > > --- a/lib/netdev-offload.c
> > > +++ b/lib/netdev-offload.c
> > > @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
> > >      return -1;
> > >  }
> > > 
> > > -int
> > > +void
> > >  meter_offload_set(ofproto_meter_id meter_id,
> > >                    struct ofputil_meter_config *config)
> > >  {
> > > @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
> > >             }
> > >          }
> > >      }
> > > -
> > > -    return 0;
> > > +    /* Offload APIs could fail, for example, because the offload
> > > is
> > > not
> > > +     * supported. This is fine, as the offload API should take
> > > care
> > > of this. */
> > >  }
> > > 
> > > 
> > > 
> > > > >  +        err = meter_offload_set(meter_id, config);
> > > > > 
> > > > > > +    }
> > > > > > +
> > > 
> > > <SNIP>
> > > 
> > > > > > diff --git a/tests/system-offloads-traffic.at
> > > > > > b/tests/system-
> > > > > > offloads-traffic.at
> > > > > > index 80bc1dd5c..7ec75340f 100644
> > > > > > --- a/tests/system-offloads-traffic.at
> > > > > > +++ b/tests/system-offloads-traffic.at
> > > > > > @@ -168,3 +168,51 @@ matchall
> > > > > >  ])
> > > > > >  OVS_TRAFFIC_VSWITCHD_STOP
> > > > > >  AT_CLEANUP
> > > > > > +
> > > > > > +AT_SETUP([offloads - check if meter offloading ])
> > > > > 
> > > > > Can we replace if with interface, as I keep on reading it as
> > > > > "if".
> > > > > 
> > > > 
> > > > Sure.
> > > > 
> > > > > > +AT_KEYWORDS([meter])
> > > > > > +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
> > > > > > +OVS_TRAFFIC_VSWITCHD_START()
> > > > > > +
> > > > > > +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
> > > > > > offload=true])
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1
> > > > > > pktps
> > > > > > bands=type=drop rate=1'])
> > > > > > +
> > > > > > +ADD_NAMESPACES(at_ns0, at_ns1)
> > > > > > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24",
> > > > > > "f0:00:00:01:01:01")
> > > > > > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24",
> > > > > > "f0:00:00:01:01:02")
> > > > > > +
> > > > > > +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
> > > > > > f0:00:00:01:01:02 dev p0])
> > > > > > +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
> > > > > > f0:00:00:01:01:01 dev p1])
> > > > > > +
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > "actions=normal"])
> > > > > > +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2
> > > > > > 10.1.1.2 |
> > > > > > FORMAT_PING], [0], [dnl
> > > > > > +10 packets transmitted, 10 received, 0% packet loss, time
> > > > > > 0ms
> > > > > > +])
> > > > > > +
> > > > > > +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ],
> > > > > > [nc0.pid])
> > > > > > +
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
> > > > > > actions=normal"])
> > > > > > +
> > > > > > +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u
> > > > > > 10.1.1.2
> > > > > > 5678 -p 6789])
> > > > > 
> > > > > Any specific reason why you need the sleep 0.5 here? Is it to
> > > > > be
> > > > > sure
> > > > > the flow is programmed?
> > > > 
> > > > I remember I added this because there are failures sometimes. I
> > > > don't
> > > > know why, but obviously they are related to this patchset. So I
> > > > added
> > > > the sleep to avoid them. It's only 0.5s, should be no problem,
> > > > right?
> > > 
> > > I did a lot of runs, but could not get it to fail without it. So
> > > if
> > > it fails in your case it would be good to investigate.
> > 
> > I can't reproduce today, though I run many times. It's related to
> > my
> > setup, I don't test on physical machine, but a virtual machine.
> > 
> > > 
> > > > > If so, you might just want to do a ovs-vsctl dump-flows and
> > > > > checke
> > > > > the output?
> > > > > 
> > > > 
> > > > I don't understand your qustion. I checked ovs-vsctl dump-flows
> > > > below.
> > 
> > If I remember correctly, the used is "never", not "0.001s".
> > 
> > > 
> > > I mean in the failure case without he 0.5 seconds, what do you
> > > get.
> > > 
> > > > > > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > > > > > DUMP_CLEAN_SORTED], [0], [dnl
> > > > > > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=n
> > > > > > o),
> > > > > > packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
> > > > > > +])
> > > > > 
> > > > > Here you verify the DP rule is inserted, but should you not
> > > > > also
> > > > > wait
> > > > > a second to make sure the meter is reset?
> > > > 
> > > > OK. I will add.
> > > > 
> > > > > Because in theory your could/should sent 11 packets in 1
> > > > > second,
> > > > > so
> > > > > 10 should be dropped!?
> > > > > This is the case in the kernel environment, but with TC the
> > > > > learning
> > > > > packet is not passing trough the TC meter (this might also be
> > > > > a
> > > > > corner case we need to document).
> > > > > 
> > > > 
> > > > Yes, it's the reason. But where to document?
> > > 
> > > Not sure where either, I guess in NEWS and maybe it’s time to add
> > > a
> > > howto/tc-offload.rst file?
> > > 
> > 
> > Maybe just adding comments here, to explain why it's the result.
> > And
> > it's not any related to "howto".
> 
> I think this is an important difference in behaviour we should
> document for the end-user. So that’s why I think we need this in the
> documentation somewhere.
> 
> > > > > > +for i in `seq 10`; do
> > > > > > +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678
> > > > > > -p
> > > > > > 6789])
> > > > > > +done
> > > > > > +
> > > > > > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > > > > > DUMP_CLEAN_SORTED], [0], [dnl
> > > > > > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=n
> > > > > > o),
> > > > > > packets:10, bytes:330, used:0.001s,
> > > > > > actions:outputmeter(0),3
> > > > > > +])
> > > > > > +
> > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e
> > > > > > 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
> > > > > > +OFPST_METER reply (OF1.3) (xid=0x2):
> > > > > > +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377
> > > > > > duration:0.001s bands:
> > > > > 
> > > > > byte_in_count:517 is a lot larger than with kernel code, do
> > > > > we
> > > > > know
> > > > > why? We should document it.
> > > > > 
> > > > 
> > > > I think DP also count the bytes of mac layer.
> > > 
> > > Any idea why?
> > 
> > From ovs_meter_execute() in datapath/meter.c, the packet size of
> > band
> > stats is added by skb->len, which should include the len of mac
> > layer.
> 
> So this needs to be fixed in the kernel. I can remember someone from
> Nvidia was already looking at this?

Sorry, I don't know.

> 
> > > > > > +0: packet_count:9 byte_count:0
> > > > > 
> > > > > Here in theory we should report byte_count, like this:
> > > > > 
> > > > > 0: packet_count:10 byte_count:470
> > > > > 
> > > > > However, TC does not support dropped byte count, only
> > > > > packet_count.
> > > > > So we should be ok for now, but we must add this limitation
> > > > > to
> > > > > the
> > > > > documentation somewhere that it's clear we will not report
> > > > > byte
> > > > > count
> > > > > with TC offload.
> > > > > 
> > > > > If you run this test without HW offload enabled you can see
> > > > > the
> > > > > difference in behavior, and I think there should be none (or
> > > > > at
> > > > > least
> > > > > the corner cases should be documented).
> > > > > You could also add a "- offloads disabled" variant of this
> > > > > test,
> > > > > like
> > > > > other features do and add some additional reasoning why it's
> > > > > different there.
> > > > > 
> > > > 
> > > > Sure, I will add.
> > > 
> > > Guess this could go to the same howto/tc-offload.rst in the
> > > limitations section?
> > > 
> > 
> > Maybe also add comments here, don't waste time to find info from
> > the
> > other document :)
> 
> See above, end-users do not read test cases, so we need this in some
> user documentation.

Sorry, I don't understand. Are we talking about the tests, and why do
we get these expected results regarding the numbers of bytes and
packets? Maybe adding comments here is enough, user may not notice
these differences if he doesn't run this tests, right?  

> 
> > > > 
> > > > > > +])
> > > > > > +
> > > > > > +OVS_TRAFFIC_VSWITCHD_STOP
> > > > > > +AT_CLEANUP
> > > > > > -- 
> > > > > > 2.26.2
> > > > > 
> > > > > This concludes my review of v5.
> > > > > 
> > > > > //Eelco
> > > > > 
> > > 
>
Eelco Chaudron June 28, 2022, 11:15 a.m. UTC | #9
On 28 Jun 2022, at 10:28, Jianbo Liu wrote:

> On Tue, 2022-06-28 at 09:53 +0200, Eelco Chaudron wrote:
>>
>>
>> On 27 Jun 2022, at 16:58, Jianbo Liu wrote:
>>
>>> On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
>>>>
>>>>
>>>> On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
>>>>
>>>>> On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
>>>>>> On 27 May 2022, at 11:00, Jianbo Liu wrote:
>>>>>>
>>>>>>> OVS meters are created in advance and openflow rules refer
>>>>>>> to
>>>>>>> them
>>>>>>> by
>>>>>>> their unique ID. New tc_police API is used to offload them.
>>>>>>> By
>>>>>>> calling
>>>>>>> the API, police actions are created and meters are mapped
>>>>>>> to
>>>>>>> them.
>>>>>>> These actions then can be used in tc filter rules by the
>>>>>>> index.
>>>>>>>
>>>>>>> Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>>>>>>> ---
>>>>>>>  NEWS                             |  2 ++
>>>>>>>  lib/dpif-netlink.c               | 31 +++++++++++++++++---
>>>>>>> -
>>>>>>>  tests/system-offloads-traffic.at | 48
>>>>>>> ++++++++++++++++++++++++++++++++
>>>>>>>  3 files changed, 76 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/NEWS b/NEWS
>>>>>>> index eece0d0b2..dfd659d4e 100644
>>>>>>> --- a/NEWS
>>>>>>> +++ b/NEWS
>>>>>>> @@ -29,6 +29,8 @@ Post-v2.17.0
>>>>>>>     - Windows:
>>>>>>>       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
>>>>>>>       * IPv6 Geneve tunnel support.
>>>>>>> +   - Linux datapath:
>>>>>>> +     * Add offloading meter tc police.
>>>>>>>
>>>>>>>
>>>>>>>  v2.17.0 - 17 Feb 2022
>>>>>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>>>>>> index 06e1e8ca0..0af9ee77e 100644
>>>>>>> --- a/lib/dpif-netlink.c
>>>>>>> +++ b/lib/dpif-netlink.c
>>>>>>> @@ -4163,11 +4163,18 @@ static int
>>>>>>>  dpif_netlink_meter_set(struct dpif *dpif_,
>>>>>>> ofproto_meter_id
>>>>>>> meter_id,
>>>>>>>                         struct ofputil_meter_config
>>>>>>> *config)
>>>>>>>  {
>>>>>>> +    int err;
>>>>>>> +
>>>>>>>      if (probe_broken_meters(dpif_)) {
>>>>>>>          return ENOMEM;
>>>>>>>      }
>>>>>>>
>>>>>>> -    return dpif_netlink_meter_set__(dpif_, meter_id,
>>>>>>> config);
>>>>>>> +    err = dpif_netlink_meter_set__(dpif_, meter_id,
>>>>>>> config);
>>>>>>> +    if (!err && netdev_is_flow_api_enabled()) {
>>>>>>> +        meter_offload_set(meter_id, config);
>>>>>>
>>>>>> Although currently we always return 0, we should still
>>>>>> account
>>>>>> for it
>>>>>> to change in the future, so we should set err to the return
>>>>>> value.
>>>>>>
>>>>>
>>>>> If there is err from meter_offload_set, it will be passed to
>>>>> the
>>>>> caller
>>>>> of dpif_netlink_meter_set(). I don't agree with that, because
>>>>> the
>>>>> caller thinks meter_set operation fail, but actually not.
>>>>> Besides,
>>>>> we
>>>>> allow the case that dp meter_set success, but offloading fails,
>>>>> so
>>>>> the
>>>>> return the error of meter_offload_set seems unnecessary.
>>>>
>>>> If this is the case, we should change the
>>>> dpif_netlink_meter_set()
>>>> API to return void.
>>>> And add a comment to the function why it would not return an
>>>> error:
>>>>
>>>
>>> OK.
>>>
>>>> --- a/lib/netdev-offload.c
>>>> +++ b/lib/netdev-offload.c
>>>> @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
>>>>      return -1;
>>>>  }
>>>>
>>>> -int
>>>> +void
>>>>  meter_offload_set(ofproto_meter_id meter_id,
>>>>                    struct ofputil_meter_config *config)
>>>>  {
>>>> @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
>>>>             }
>>>>          }
>>>>      }
>>>> -
>>>> -    return 0;
>>>> +    /* Offload APIs could fail, for example, because the offload
>>>> is
>>>> not
>>>> +     * supported. This is fine, as the offload API should take
>>>> care
>>>> of this. */
>>>>  }
>>>>
>>>>
>>>>
>>>>>>  +        err = meter_offload_set(meter_id, config);
>>>>>>
>>>>>>> +    }
>>>>>>> +
>>>>
>>>> <SNIP>
>>>>
>>>>>>> diff --git a/tests/system-offloads-traffic.at
>>>>>>> b/tests/system-
>>>>>>> offloads-traffic.at
>>>>>>> index 80bc1dd5c..7ec75340f 100644
>>>>>>> --- a/tests/system-offloads-traffic.at
>>>>>>> +++ b/tests/system-offloads-traffic.at
>>>>>>> @@ -168,3 +168,51 @@ matchall
>>>>>>>  ])
>>>>>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>>>>>  AT_CLEANUP
>>>>>>> +
>>>>>>> +AT_SETUP([offloads - check if meter offloading ])
>>>>>>
>>>>>> Can we replace if with interface, as I keep on reading it as
>>>>>> "if".
>>>>>>
>>>>>
>>>>> Sure.
>>>>>
>>>>>>> +AT_KEYWORDS([meter])
>>>>>>> +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
>>>>>>> +OVS_TRAFFIC_VSWITCHD_START()
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
>>>>>>> offload=true])
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1
>>>>>>> pktps
>>>>>>> bands=type=drop rate=1'])
>>>>>>> +
>>>>>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>>>>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24",
>>>>>>> "f0:00:00:01:01:01")
>>>>>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24",
>>>>>>> "f0:00:00:01:01:02")
>>>>>>> +
>>>>>>> +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
>>>>>>> f0:00:00:01:01:02 dev p0])
>>>>>>> +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
>>>>>>> f0:00:00:01:01:01 dev p1])
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>>>> "actions=normal"])
>>>>>>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2
>>>>>>> 10.1.1.2 |
>>>>>>> FORMAT_PING], [0], [dnl
>>>>>>> +10 packets transmitted, 10 received, 0% packet loss, time
>>>>>>> 0ms
>>>>>>> +])
>>>>>>> +
>>>>>>> +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ],
>>>>>>> [nc0.pid])
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>>>> "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
>>>>>>> actions=normal"])
>>>>>>> +
>>>>>>> +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u
>>>>>>> 10.1.1.2
>>>>>>> 5678 -p 6789])
>>>>>>
>>>>>> Any specific reason why you need the sleep 0.5 here? Is it to
>>>>>> be
>>>>>> sure
>>>>>> the flow is programmed?
>>>>>
>>>>> I remember I added this because there are failures sometimes. I
>>>>> don't
>>>>> know why, but obviously they are related to this patchset. So I
>>>>> added
>>>>> the sleep to avoid them. It's only 0.5s, should be no problem,
>>>>> right?
>>>>
>>>> I did a lot of runs, but could not get it to fail without it. So
>>>> if
>>>> it fails in your case it would be good to investigate.
>>>
>>> I can't reproduce today, though I run many times. It's related to
>>> my
>>> setup, I don't test on physical machine, but a virtual machine.
>>>
>>>>
>>>>>> If so, you might just want to do a ovs-vsctl dump-flows and
>>>>>> checke
>>>>>> the output?
>>>>>>
>>>>>
>>>>> I don't understand your qustion. I checked ovs-vsctl dump-flows
>>>>> below.
>>>
>>> If I remember correctly, the used is "never", not "0.001s".
>>
>>
>> Forgot to answer this ;)
>>
>> The kernel always reports “never” for this test case, that’s why I
>> suggested the offload disabled test to see all the differences and
>> understand why.
>>
>
> Yes, it's "never" for the case with offload disabled. I have no idea
> about the reason which causes differences, but I don't think it's
> related to this patchset. Do you have any clue? Thanks!

I have no proof, just a theory :) The first packet is handled by slow path, so the rule is not hit. The succeeding packets are dropped by the kernel, so they will not reach OVS’s flow processing.
Eelco Chaudron June 28, 2022, 11:18 a.m. UTC | #10
On 28 Jun 2022, at 10:52, Jianbo Liu wrote:

> On Tue, 2022-06-28 at 09:52 +0200, Eelco Chaudron wrote:
>>
>>
>> On 27 Jun 2022, at 16:58, Jianbo Liu wrote:
>>
>>> On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
>>>>
>>>>
>>>> On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
>>>>
>>>>> On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
>>>>>> On 27 May 2022, at 11:00, Jianbo Liu wrote:
>>>>>>
>>>>>>> OVS meters are created in advance and openflow rules refer
>>>>>>> to
>>>>>>> them
>>>>>>> by
>>>>>>> their unique ID. New tc_police API is used to offload them.
>>>>>>> By
>>>>>>> calling
>>>>>>> the API, police actions are created and meters are mapped
>>>>>>> to
>>>>>>> them.
>>>>>>> These actions then can be used in tc filter rules by the
>>>>>>> index.
>>>>>>>
>>>>>>> Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
>>>>>>> ---
>>>>>>>  NEWS                             |  2 ++
>>>>>>>  lib/dpif-netlink.c               | 31 +++++++++++++++++---
>>>>>>> -
>>>>>>>  tests/system-offloads-traffic.at | 48
>>>>>>> ++++++++++++++++++++++++++++++++
>>>>>>>  3 files changed, 76 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/NEWS b/NEWS
>>>>>>> index eece0d0b2..dfd659d4e 100644
>>>>>>> --- a/NEWS
>>>>>>> +++ b/NEWS
>>>>>>> @@ -29,6 +29,8 @@ Post-v2.17.0
>>>>>>>     - Windows:
>>>>>>>       * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
>>>>>>>       * IPv6 Geneve tunnel support.
>>>>>>> +   - Linux datapath:
>>>>>>> +     * Add offloading meter tc police.
>>>>>>>
>>>>>>>
>>>>>>>  v2.17.0 - 17 Feb 2022
>>>>>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>>>>>> index 06e1e8ca0..0af9ee77e 100644
>>>>>>> --- a/lib/dpif-netlink.c
>>>>>>> +++ b/lib/dpif-netlink.c
>>>>>>> @@ -4163,11 +4163,18 @@ static int
>>>>>>>  dpif_netlink_meter_set(struct dpif *dpif_,
>>>>>>> ofproto_meter_id
>>>>>>> meter_id,
>>>>>>>                         struct ofputil_meter_config
>>>>>>> *config)
>>>>>>>  {
>>>>>>> +    int err;
>>>>>>> +
>>>>>>>      if (probe_broken_meters(dpif_)) {
>>>>>>>          return ENOMEM;
>>>>>>>      }
>>>>>>>
>>>>>>> -    return dpif_netlink_meter_set__(dpif_, meter_id,
>>>>>>> config);
>>>>>>> +    err = dpif_netlink_meter_set__(dpif_, meter_id,
>>>>>>> config);
>>>>>>> +    if (!err && netdev_is_flow_api_enabled()) {
>>>>>>> +        meter_offload_set(meter_id, config);
>>>>>>
>>>>>> Although currently we always return 0, we should still
>>>>>> account
>>>>>> for it
>>>>>> to change in the future, so we should set err to the return
>>>>>> value.
>>>>>>
>>>>>
>>>>> If there is err from meter_offload_set, it will be passed to
>>>>> the
>>>>> caller
>>>>> of dpif_netlink_meter_set(). I don't agree with that, because
>>>>> the
>>>>> caller thinks meter_set operation fail, but actually not.
>>>>> Besides,
>>>>> we
>>>>> allow the case that dp meter_set success, but offloading fails,
>>>>> so
>>>>> the
>>>>> return the error of meter_offload_set seems unnecessary.
>>>>
>>>> If this is the case, we should change the
>>>> dpif_netlink_meter_set()
>>>> API to return void.
>>>> And add a comment to the function why it would not return an
>>>> error:
>>>>
>>>
>>> OK.
>>>
>>>> --- a/lib/netdev-offload.c
>>>> +++ b/lib/netdev-offload.c
>>>> @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev *netdev)
>>>>      return -1;
>>>>  }
>>>>
>>>> -int
>>>> +void
>>>>  meter_offload_set(ofproto_meter_id meter_id,
>>>>                    struct ofputil_meter_config *config)
>>>>  {
>>>> @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id meter_id,
>>>>             }
>>>>          }
>>>>      }
>>>> -
>>>> -    return 0;
>>>> +    /* Offload APIs could fail, for example, because the offload
>>>> is
>>>> not
>>>> +     * supported. This is fine, as the offload API should take
>>>> care
>>>> of this. */
>>>>  }
>>>>
>>>>
>>>>
>>>>>>  +        err = meter_offload_set(meter_id, config);
>>>>>>
>>>>>>> +    }
>>>>>>> +
>>>>
>>>> <SNIP>
>>>>
>>>>>>> diff --git a/tests/system-offloads-traffic.at
>>>>>>> b/tests/system-
>>>>>>> offloads-traffic.at
>>>>>>> index 80bc1dd5c..7ec75340f 100644
>>>>>>> --- a/tests/system-offloads-traffic.at
>>>>>>> +++ b/tests/system-offloads-traffic.at
>>>>>>> @@ -168,3 +168,51 @@ matchall
>>>>>>>  ])
>>>>>>>  OVS_TRAFFIC_VSWITCHD_STOP
>>>>>>>  AT_CLEANUP
>>>>>>> +
>>>>>>> +AT_SETUP([offloads - check if meter offloading ])
>>>>>>
>>>>>> Can we replace if with interface, as I keep on reading it as
>>>>>> "if".
>>>>>>
>>>>>
>>>>> Sure.
>>>>>
>>>>>>> +AT_KEYWORDS([meter])
>>>>>>> +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
>>>>>>> +OVS_TRAFFIC_VSWITCHD_START()
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-
>>>>>>> offload=true])
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1
>>>>>>> pktps
>>>>>>> bands=type=drop rate=1'])
>>>>>>> +
>>>>>>> +ADD_NAMESPACES(at_ns0, at_ns1)
>>>>>>> +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24",
>>>>>>> "f0:00:00:01:01:01")
>>>>>>> +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24",
>>>>>>> "f0:00:00:01:01:02")
>>>>>>> +
>>>>>>> +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
>>>>>>> f0:00:00:01:01:02 dev p0])
>>>>>>> +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
>>>>>>> f0:00:00:01:01:01 dev p1])
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>>>> "actions=normal"])
>>>>>>> +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2
>>>>>>> 10.1.1.2 |
>>>>>>> FORMAT_PING], [0], [dnl
>>>>>>> +10 packets transmitted, 10 received, 0% packet loss, time
>>>>>>> 0ms
>>>>>>> +])
>>>>>>> +
>>>>>>> +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ],
>>>>>>> [nc0.pid])
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
>>>>>>> "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1
>>>>>>> actions=normal"])
>>>>>>> +
>>>>>>> +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u
>>>>>>> 10.1.1.2
>>>>>>> 5678 -p 6789])
>>>>>>
>>>>>> Any specific reason why you need the sleep 0.5 here? Is it to
>>>>>> be
>>>>>> sure
>>>>>> the flow is programmed?
>>>>>
>>>>> I remember I added this because there are failures sometimes. I
>>>>> don't
>>>>> know why, but obviously they are related to this patchset. So I
>>>>> added
>>>>> the sleep to avoid them. It's only 0.5s, should be no problem,
>>>>> right?
>>>>
>>>> I did a lot of runs, but could not get it to fail without it. So
>>>> if
>>>> it fails in your case it would be good to investigate.
>>>
>>> I can't reproduce today, though I run many times. It's related to
>>> my
>>> setup, I don't test on physical machine, but a virtual machine.
>>>
>>>>
>>>>>> If so, you might just want to do a ovs-vsctl dump-flows and
>>>>>> checke
>>>>>> the output?
>>>>>>
>>>>>
>>>>> I don't understand your qustion. I checked ovs-vsctl dump-flows
>>>>> below.
>>>
>>> If I remember correctly, the used is "never", not "0.001s".
>>>
>>>>
>>>> I mean in the failure case without he 0.5 seconds, what do you
>>>> get.
>>>>
>>>>>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>>>>>> DUMP_CLEAN_SORTED], [0], [dnl
>>>>>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=n
>>>>>>> o),
>>>>>>> packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
>>>>>>> +])
>>>>>>
>>>>>> Here you verify the DP rule is inserted, but should you not
>>>>>> also
>>>>>> wait
>>>>>> a second to make sure the meter is reset?
>>>>>
>>>>> OK. I will add.
>>>>>
>>>>>> Because in theory your could/should sent 11 packets in 1
>>>>>> second,
>>>>>> so
>>>>>> 10 should be dropped!?
>>>>>> This is the case in the kernel environment, but with TC the
>>>>>> learning
>>>>>> packet is not passing trough the TC meter (this might also be
>>>>>> a
>>>>>> corner case we need to document).
>>>>>>
>>>>>
>>>>> Yes, it's the reason. But where to document?
>>>>
>>>> Not sure where either, I guess in NEWS and maybe it’s time to add
>>>> a
>>>> howto/tc-offload.rst file?
>>>>
>>>
>>> Maybe just adding comments here, to explain why it's the result.
>>> And
>>> it's not any related to "howto".
>>
>> I think this is an important difference in behaviour we should
>> document for the end-user. So that’s why I think we need this in the
>> documentation somewhere.
>>
>>>>>>> +for i in `seq 10`; do
>>>>>>> +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678
>>>>>>> -p
>>>>>>> 6789])
>>>>>>> +done
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
>>>>>>> DUMP_CLEAN_SORTED], [0], [dnl
>>>>>>> +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=n
>>>>>>> o),
>>>>>>> packets:10, bytes:330, used:0.001s,
>>>>>>> actions:outputmeter(0),3
>>>>>>> +])
>>>>>>> +
>>>>>>> +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e
>>>>>>> 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
>>>>>>> +OFPST_METER reply (OF1.3) (xid=0x2):
>>>>>>> +meter:1 flow_count:1 packet_in_count:11 byte_in_count:377
>>>>>>> duration:0.001s bands:
>>>>>>
>>>>>> byte_in_count:517 is a lot larger than with kernel code, do
>>>>>> we
>>>>>> know
>>>>>> why? We should document it.
>>>>>>
>>>>>
>>>>> I think DP also count the bytes of mac layer.
>>>>
>>>> Any idea why?
>>>
>>> From ovs_meter_execute() in datapath/meter.c, the packet size of
>>> band
>>> stats is added by skb->len, which should include the len of mac
>>> layer.
>>
>> So this needs to be fixed in the kernel. I can remember someone from
>> Nvidia was already looking at this?
>
> Sorry, I don't know.
>
>>
>>>>>>> +0: packet_count:9 byte_count:0
>>>>>>
>>>>>> Here in theory we should report byte_count, like this:
>>>>>>
>>>>>> 0: packet_count:10 byte_count:470
>>>>>>
>>>>>> However, TC does not support dropped byte count, only
>>>>>> packet_count.
>>>>>> So we should be ok for now, but we must add this limitation
>>>>>> to
>>>>>> the
>>>>>> documentation somewhere that it's clear we will not report
>>>>>> byte
>>>>>> count
>>>>>> with TC offload.
>>>>>>
>>>>>> If you run this test without HW offload enabled you can see
>>>>>> the
>>>>>> difference in behavior, and I think there should be none (or
>>>>>> at
>>>>>> least
>>>>>> the corner cases should be documented).
>>>>>> You could also add a "- offloads disabled" variant of this
>>>>>> test,
>>>>>> like
>>>>>> other features do and add some additional reasoning why it's
>>>>>> different there.
>>>>>>
>>>>>
>>>>> Sure, I will add.
>>>>
>>>> Guess this could go to the same howto/tc-offload.rst in the
>>>> limitations section?
>>>>
>>>
>>> Maybe also add comments here, don't waste time to find info from
>>> the
>>> other document :)
>>
>> See above, end-users do not read test cases, so we need this in some
>> user documentation.
>
> Sorry, I don't understand. Are we talking about the tests, and why do
> we get these expected results regarding the numbers of bytes and
> packets? Maybe adding comments here is enough, user may not notice
> these differences if he doesn't run this tests, right?

I’m talking about the fact that the “ovs-ofctl -O OpenFlow13 meter-stats br0” is not reporting byte count when hwoffload is enabled. This is a missing feature/difference from previous behavior and we should document this for the end-user.

>>>>>
>>>>>>> +])
>>>>>>> +
>>>>>>> +OVS_TRAFFIC_VSWITCHD_STOP
>>>>>>> +AT_CLEANUP
>>>>>>> -- 
>>>>>>> 2.26.2
>>>>>>
>>>>>> This concludes my review of v5.
>>>>>>
>>>>>> //Eelco
>>>>>>
>>>>
>>
Jianbo Liu June 29, 2022, 1:18 a.m. UTC | #11
On Tue, 2022-06-28 at 13:15 +0200, Eelco Chaudron wrote:
> 
> 
> On 28 Jun 2022, at 10:28, Jianbo Liu wrote:
> 
> > On Tue, 2022-06-28 at 09:53 +0200, Eelco Chaudron wrote:
> > > 
> > > 
> > > On 27 Jun 2022, at 16:58, Jianbo Liu wrote:
> > > 
> > > > On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
> > > > > 
> > > > > 
> > > > > On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
> > > > > 
> > > > > > On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
> > > > > > > On 27 May 2022, at 11:00, Jianbo Liu wrote:
> > > > > > > 
> > > > > > > > OVS meters are created in advance and openflow rules
> > > > > > > > refer
> > > > > > > > to
> > > > > > > > them
> > > > > > > > by
> > > > > > > > their unique ID. New tc_police API is used to offload
> > > > > > > > them.
> > > > > > > > By
> > > > > > > > calling
> > > > > > > > the API, police actions are created and meters are
> > > > > > > > mapped
> > > > > > > > to
> > > > > > > > them.
> > > > > > > > These actions then can be used in tc filter rules by
> > > > > > > > the
> > > > > > > > index.
> > > > > > > > 
> > > > > > > > Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> > > > > > > > ---
> > > > > > > >  NEWS                             |  2 ++
> > > > > > > >  lib/dpif-netlink.c               | 31
> > > > > > > > +++++++++++++++++---
> > > > > > > > -
> > > > > > > >  tests/system-offloads-traffic.at | 48
> > > > > > > > ++++++++++++++++++++++++++++++++
> > > > > > > >  3 files changed, 76 insertions(+), 5 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/NEWS b/NEWS
> > > > > > > > index eece0d0b2..dfd659d4e 100644
> > > > > > > > --- a/NEWS
> > > > > > > > +++ b/NEWS
> > > > > > > > @@ -29,6 +29,8 @@ Post-v2.17.0
> > > > > > > >     - Windows:
> > > > > > > >       * Conntrack support for TCPv6, UDPv6, ICMPv6,
> > > > > > > > FTPv6.
> > > > > > > >       * IPv6 Geneve tunnel support.
> > > > > > > > +   - Linux datapath:
> > > > > > > > +     * Add offloading meter tc police.
> > > > > > > > 
> > > > > > > > 
> > > > > > > >  v2.17.0 - 17 Feb 2022
> > > > > > > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > > > > > > > index 06e1e8ca0..0af9ee77e 100644
> > > > > > > > --- a/lib/dpif-netlink.c
> > > > > > > > +++ b/lib/dpif-netlink.c
> > > > > > > > @@ -4163,11 +4163,18 @@ static int
> > > > > > > >  dpif_netlink_meter_set(struct dpif *dpif_,
> > > > > > > > ofproto_meter_id
> > > > > > > > meter_id,
> > > > > > > >                         struct ofputil_meter_config
> > > > > > > > *config)
> > > > > > > >  {
> > > > > > > > +    int err;
> > > > > > > > +
> > > > > > > >      if (probe_broken_meters(dpif_)) {
> > > > > > > >          return ENOMEM;
> > > > > > > >      }
> > > > > > > > 
> > > > > > > > -    return dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > > > config);
> > > > > > > > +    err = dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > > > config);
> > > > > > > > +    if (!err && netdev_is_flow_api_enabled()) {
> > > > > > > > +        meter_offload_set(meter_id, config);
> > > > > > > 
> > > > > > > Although currently we always return 0, we should still
> > > > > > > account
> > > > > > > for it
> > > > > > > to change in the future, so we should set err to the
> > > > > > > return
> > > > > > > value.
> > > > > > > 
> > > > > > 
> > > > > > If there is err from meter_offload_set, it will be passed
> > > > > > to
> > > > > > the
> > > > > > caller
> > > > > > of dpif_netlink_meter_set(). I don't agree with that,
> > > > > > because
> > > > > > the
> > > > > > caller thinks meter_set operation fail, but actually not.
> > > > > > Besides,
> > > > > > we
> > > > > > allow the case that dp meter_set success, but offloading
> > > > > > fails,
> > > > > > so
> > > > > > the
> > > > > > return the error of meter_offload_set seems unnecessary.
> > > > > 
> > > > > If this is the case, we should change the
> > > > > dpif_netlink_meter_set()
> > > > > API to return void.
> > > > > And add a comment to the function why it would not return an
> > > > > error:
> > > > > 
> > > > 
> > > > OK.
> > > > 
> > > > > --- a/lib/netdev-offload.c
> > > > > +++ b/lib/netdev-offload.c
> > > > > @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev
> > > > > *netdev)
> > > > >      return -1;
> > > > >  }
> > > > > 
> > > > > -int
> > > > > +void
> > > > >  meter_offload_set(ofproto_meter_id meter_id,
> > > > >                    struct ofputil_meter_config *config)
> > > > >  {
> > > > > @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id
> > > > > meter_id,
> > > > >             }
> > > > >          }
> > > > >      }
> > > > > -
> > > > > -    return 0;
> > > > > +    /* Offload APIs could fail, for example, because the
> > > > > offload
> > > > > is
> > > > > not
> > > > > +     * supported. This is fine, as the offload API should
> > > > > take
> > > > > care
> > > > > of this. */
> > > > >  }
> > > > > 
> > > > > 
> > > > > 
> > > > > > >  +        err = meter_offload_set(meter_id, config);
> > > > > > > 
> > > > > > > > +    }
> > > > > > > > +
> > > > > 
> > > > > <SNIP>
> > > > > 
> > > > > > > > diff --git a/tests/system-offloads-traffic.at
> > > > > > > > b/tests/system-
> > > > > > > > offloads-traffic.at
> > > > > > > > index 80bc1dd5c..7ec75340f 100644
> > > > > > > > --- a/tests/system-offloads-traffic.at
> > > > > > > > +++ b/tests/system-offloads-traffic.at
> > > > > > > > @@ -168,3 +168,51 @@ matchall
> > > > > > > >  ])
> > > > > > > >  OVS_TRAFFIC_VSWITCHD_STOP
> > > > > > > >  AT_CLEANUP
> > > > > > > > +
> > > > > > > > +AT_SETUP([offloads - check if meter offloading ])
> > > > > > > 
> > > > > > > Can we replace if with interface, as I keep on reading it
> > > > > > > as
> > > > > > > "if".
> > > > > > > 
> > > > > > 
> > > > > > Sure.
> > > > > > 
> > > > > > > > +AT_KEYWORDS([meter])
> > > > > > > > +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
> > > > > > > > +OVS_TRAFFIC_VSWITCHD_START()
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-vsctl set Open_vSwitch .
> > > > > > > > other_config:hw-
> > > > > > > > offload=true])
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0
> > > > > > > > 'meter=1
> > > > > > > > pktps
> > > > > > > > bands=type=drop rate=1'])
> > > > > > > > +
> > > > > > > > +ADD_NAMESPACES(at_ns0, at_ns1)
> > > > > > > > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24",
> > > > > > > > "f0:00:00:01:01:01")
> > > > > > > > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24",
> > > > > > > > "f0:00:00:01:01:02")
> > > > > > > > +
> > > > > > > > +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
> > > > > > > > f0:00:00:01:01:02 dev p0])
> > > > > > > > +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
> > > > > > > > f0:00:00:01:01:01 dev p1])
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > > > "actions=normal"])
> > > > > > > > +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2
> > > > > > > > 10.1.1.2 |
> > > > > > > > FORMAT_PING], [0], [dnl
> > > > > > > > +10 packets transmitted, 10 received, 0% packet loss,
> > > > > > > > time
> > > > > > > > 0ms
> > > > > > > > +])
> > > > > > > > +
> > > > > > > > +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null
> > > > > > > > ],
> > > > > > > > [nc0.pid])
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > > > "priority=10,in_port=ovs-p0,udp
> > > > > > > > actions=meter:1,normal"])
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > > > "priority=1
> > > > > > > > actions=normal"])
> > > > > > > > +
> > > > > > > > +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -
> > > > > > > > u
> > > > > > > > 10.1.1.2
> > > > > > > > 5678 -p 6789])
> > > > > > > 
> > > > > > > Any specific reason why you need the sleep 0.5 here? Is
> > > > > > > it to
> > > > > > > be
> > > > > > > sure
> > > > > > > the flow is programmed?
> > > > > > 
> > > > > > I remember I added this because there are failures
> > > > > > sometimes. I
> > > > > > don't
> > > > > > know why, but obviously they are related to this patchset.
> > > > > > So I
> > > > > > added
> > > > > > the sleep to avoid them. It's only 0.5s, should be no
> > > > > > problem,
> > > > > > right?
> > > > > 
> > > > > I did a lot of runs, but could not get it to fail without it.
> > > > > So
> > > > > if
> > > > > it fails in your case it would be good to investigate.
> > > > 
> > > > I can't reproduce today, though I run many times. It's related
> > > > to
> > > > my
> > > > setup, I don't test on physical machine, but a virtual machine.
> > > > 
> > > > > 
> > > > > > > If so, you might just want to do a ovs-vsctl dump-flows
> > > > > > > and
> > > > > > > checke
> > > > > > > the output?
> > > > > > > 
> > > > > > 
> > > > > > I don't understand your qustion. I checked ovs-vsctl dump-
> > > > > > flows
> > > > > > below.
> > > > 
> > > > If I remember correctly, the used is "never", not "0.001s".
> > > 
> > > 
> > > Forgot to answer this ;)
> > > 
> > > The kernel always reports “never” for this test case, that’s why
> > > I
> > > suggested the offload disabled test to see all the differences
> > > and
> > > understand why.
> > > 
> > 
> > Yes, it's "never" for the case with offload disabled. I have no
> > idea
> > about the reason which causes differences, but I don't think it's
> > related to this patchset. Do you have any clue? Thanks!
> 
> I have no proof, just a theory :) The first packet is handled by slow
> path, so the rule is not hit. The succeeding packets are dropped by
> the kernel, so they will not reach OVS’s flow processing.
> 

You are right, I tested. I will add the none-offload tests to show the
difference. Thanks!
Jianbo Liu June 29, 2022, 1:20 a.m. UTC | #12
On Tue, 2022-06-28 at 13:18 +0200, Eelco Chaudron wrote:
> 
> 
> On 28 Jun 2022, at 10:52, Jianbo Liu wrote:
> 
> > On Tue, 2022-06-28 at 09:52 +0200, Eelco Chaudron wrote:
> > > 
> > > 
> > > On 27 Jun 2022, at 16:58, Jianbo Liu wrote:
> > > 
> > > > On Mon, 2022-06-27 at 11:32 +0200, Eelco Chaudron wrote:
> > > > > 
> > > > > 
> > > > > On 21 Jun 2022, at 10:22, Jianbo Liu wrote:
> > > > > 
> > > > > > On Mon, 2022-06-20 at 12:18 +0200, Eelco Chaudron wrote:
> > > > > > > On 27 May 2022, at 11:00, Jianbo Liu wrote:
> > > > > > > 
> > > > > > > > OVS meters are created in advance and openflow rules
> > > > > > > > refer
> > > > > > > > to
> > > > > > > > them
> > > > > > > > by
> > > > > > > > their unique ID. New tc_police API is used to offload
> > > > > > > > them.
> > > > > > > > By
> > > > > > > > calling
> > > > > > > > the API, police actions are created and meters are
> > > > > > > > mapped
> > > > > > > > to
> > > > > > > > them.
> > > > > > > > These actions then can be used in tc filter rules by
> > > > > > > > the
> > > > > > > > index.
> > > > > > > > 
> > > > > > > > Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
> > > > > > > > ---
> > > > > > > >  NEWS                             |  2 ++
> > > > > > > >  lib/dpif-netlink.c               | 31
> > > > > > > > +++++++++++++++++---
> > > > > > > > -
> > > > > > > >  tests/system-offloads-traffic.at | 48
> > > > > > > > ++++++++++++++++++++++++++++++++
> > > > > > > >  3 files changed, 76 insertions(+), 5 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/NEWS b/NEWS
> > > > > > > > index eece0d0b2..dfd659d4e 100644
> > > > > > > > --- a/NEWS
> > > > > > > > +++ b/NEWS
> > > > > > > > @@ -29,6 +29,8 @@ Post-v2.17.0
> > > > > > > >     - Windows:
> > > > > > > >       * Conntrack support for TCPv6, UDPv6, ICMPv6,
> > > > > > > > FTPv6.
> > > > > > > >       * IPv6 Geneve tunnel support.
> > > > > > > > +   - Linux datapath:
> > > > > > > > +     * Add offloading meter tc police.
> > > > > > > > 
> > > > > > > > 
> > > > > > > >  v2.17.0 - 17 Feb 2022
> > > > > > > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> > > > > > > > index 06e1e8ca0..0af9ee77e 100644
> > > > > > > > --- a/lib/dpif-netlink.c
> > > > > > > > +++ b/lib/dpif-netlink.c
> > > > > > > > @@ -4163,11 +4163,18 @@ static int
> > > > > > > >  dpif_netlink_meter_set(struct dpif *dpif_,
> > > > > > > > ofproto_meter_id
> > > > > > > > meter_id,
> > > > > > > >                         struct ofputil_meter_config
> > > > > > > > *config)
> > > > > > > >  {
> > > > > > > > +    int err;
> > > > > > > > +
> > > > > > > >      if (probe_broken_meters(dpif_)) {
> > > > > > > >          return ENOMEM;
> > > > > > > >      }
> > > > > > > > 
> > > > > > > > -    return dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > > > config);
> > > > > > > > +    err = dpif_netlink_meter_set__(dpif_, meter_id,
> > > > > > > > config);
> > > > > > > > +    if (!err && netdev_is_flow_api_enabled()) {
> > > > > > > > +        meter_offload_set(meter_id, config);
> > > > > > > 
> > > > > > > Although currently we always return 0, we should still
> > > > > > > account
> > > > > > > for it
> > > > > > > to change in the future, so we should set err to the
> > > > > > > return
> > > > > > > value.
> > > > > > > 
> > > > > > 
> > > > > > If there is err from meter_offload_set, it will be passed
> > > > > > to
> > > > > > the
> > > > > > caller
> > > > > > of dpif_netlink_meter_set(). I don't agree with that,
> > > > > > because
> > > > > > the
> > > > > > caller thinks meter_set operation fail, but actually not.
> > > > > > Besides,
> > > > > > we
> > > > > > allow the case that dp meter_set success, but offloading
> > > > > > fails,
> > > > > > so
> > > > > > the
> > > > > > return the error of meter_offload_set seems unnecessary.
> > > > > 
> > > > > If this is the case, we should change the
> > > > > dpif_netlink_meter_set()
> > > > > API to return void.
> > > > > And add a comment to the function why it would not return an
> > > > > error:
> > > > > 
> > > > 
> > > > OK.
> > > > 
> > > > > --- a/lib/netdev-offload.c
> > > > > +++ b/lib/netdev-offload.c
> > > > > @@ -196,7 +196,7 @@ netdev_assign_flow_api(struct netdev
> > > > > *netdev)
> > > > >      return -1;
> > > > >  }
> > > > > 
> > > > > -int
> > > > > +void
> > > > >  meter_offload_set(ofproto_meter_id meter_id,
> > > > >                    struct ofputil_meter_config *config)
> > > > >  {
> > > > > @@ -212,8 +212,8 @@ meter_offload_set(ofproto_meter_id
> > > > > meter_id,
> > > > >             }
> > > > >          }
> > > > >      }
> > > > > -
> > > > > -    return 0;
> > > > > +    /* Offload APIs could fail, for example, because the
> > > > > offload
> > > > > is
> > > > > not
> > > > > +     * supported. This is fine, as the offload API should
> > > > > take
> > > > > care
> > > > > of this. */
> > > > >  }
> > > > > 
> > > > > 
> > > > > 
> > > > > > >  +        err = meter_offload_set(meter_id, config);
> > > > > > > 
> > > > > > > > +    }
> > > > > > > > +
> > > > > 
> > > > > <SNIP>
> > > > > 
> > > > > > > > diff --git a/tests/system-offloads-traffic.at
> > > > > > > > b/tests/system-
> > > > > > > > offloads-traffic.at
> > > > > > > > index 80bc1dd5c..7ec75340f 100644
> > > > > > > > --- a/tests/system-offloads-traffic.at
> > > > > > > > +++ b/tests/system-offloads-traffic.at
> > > > > > > > @@ -168,3 +168,51 @@ matchall
> > > > > > > >  ])
> > > > > > > >  OVS_TRAFFIC_VSWITCHD_STOP
> > > > > > > >  AT_CLEANUP
> > > > > > > > +
> > > > > > > > +AT_SETUP([offloads - check if meter offloading ])
> > > > > > > 
> > > > > > > Can we replace if with interface, as I keep on reading it
> > > > > > > as
> > > > > > > "if".
> > > > > > > 
> > > > > > 
> > > > > > Sure.
> > > > > > 
> > > > > > > > +AT_KEYWORDS([meter])
> > > > > > > > +AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
> > > > > > > > +OVS_TRAFFIC_VSWITCHD_START()
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-vsctl set Open_vSwitch .
> > > > > > > > other_config:hw-
> > > > > > > > offload=true])
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0
> > > > > > > > 'meter=1
> > > > > > > > pktps
> > > > > > > > bands=type=drop rate=1'])
> > > > > > > > +
> > > > > > > > +ADD_NAMESPACES(at_ns0, at_ns1)
> > > > > > > > +ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24",
> > > > > > > > "f0:00:00:01:01:01")
> > > > > > > > +ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24",
> > > > > > > > "f0:00:00:01:01:02")
> > > > > > > > +
> > > > > > > > +NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr
> > > > > > > > f0:00:00:01:01:02 dev p0])
> > > > > > > > +NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr
> > > > > > > > f0:00:00:01:01:01 dev p1])
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > > > "actions=normal"])
> > > > > > > > +NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2
> > > > > > > > 10.1.1.2 |
> > > > > > > > FORMAT_PING], [0], [dnl
> > > > > > > > +10 packets transmitted, 10 received, 0% packet loss,
> > > > > > > > time
> > > > > > > > 0ms
> > > > > > > > +])
> > > > > > > > +
> > > > > > > > +NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null
> > > > > > > > ],
> > > > > > > > [nc0.pid])
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > > > "priority=10,in_port=ovs-p0,udp
> > > > > > > > actions=meter:1,normal"])
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0
> > > > > > > > "priority=1
> > > > > > > > actions=normal"])
> > > > > > > > +
> > > > > > > > +NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -
> > > > > > > > u
> > > > > > > > 10.1.1.2
> > > > > > > > 5678 -p 6789])
> > > > > > > 
> > > > > > > Any specific reason why you need the sleep 0.5 here? Is
> > > > > > > it to
> > > > > > > be
> > > > > > > sure
> > > > > > > the flow is programmed?
> > > > > > 
> > > > > > I remember I added this because there are failures
> > > > > > sometimes. I
> > > > > > don't
> > > > > > know why, but obviously they are related to this patchset.
> > > > > > So I
> > > > > > added
> > > > > > the sleep to avoid them. It's only 0.5s, should be no
> > > > > > problem,
> > > > > > right?
> > > > > 
> > > > > I did a lot of runs, but could not get it to fail without it.
> > > > > So
> > > > > if
> > > > > it fails in your case it would be good to investigate.
> > > > 
> > > > I can't reproduce today, though I run many times. It's related
> > > > to
> > > > my
> > > > setup, I don't test on physical machine, but a virtual machine.
> > > > 
> > > > > 
> > > > > > > If so, you might just want to do a ovs-vsctl dump-flows
> > > > > > > and
> > > > > > > checke
> > > > > > > the output?
> > > > > > > 
> > > > > > 
> > > > > > I don't understand your qustion. I checked ovs-vsctl dump-
> > > > > > flows
> > > > > > below.
> > > > 
> > > > If I remember correctly, the used is "never", not "0.001s".
> > > > 
> > > > > 
> > > > > I mean in the failure case without he 0.5 seconds, what do
> > > > > you
> > > > > get.
> > > > > 
> > > > > > > > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > > > > > > > DUMP_CLEAN_SORTED], [0], [dnl
> > > > > > > > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,fr
> > > > > > > > ag=n
> > > > > > > > o),
> > > > > > > > packets:0, bytes:0, used:0.001s,
> > > > > > > > actions:outputmeter(0),3
> > > > > > > > +])
> > > > > > > 
> > > > > > > Here you verify the DP rule is inserted, but should you
> > > > > > > not
> > > > > > > also
> > > > > > > wait
> > > > > > > a second to make sure the meter is reset?
> > > > > > 
> > > > > > OK. I will add.
> > > > > > 
> > > > > > > Because in theory your could/should sent 11 packets in 1
> > > > > > > second,
> > > > > > > so
> > > > > > > 10 should be dropped!?
> > > > > > > This is the case in the kernel environment, but with TC
> > > > > > > the
> > > > > > > learning
> > > > > > > packet is not passing trough the TC meter (this might
> > > > > > > also be
> > > > > > > a
> > > > > > > corner case we need to document).
> > > > > > > 
> > > > > > 
> > > > > > Yes, it's the reason. But where to document?
> > > > > 
> > > > > Not sure where either, I guess in NEWS and maybe it’s time to
> > > > > add
> > > > > a
> > > > > howto/tc-offload.rst file?
> > > > > 
> > > > 
> > > > Maybe just adding comments here, to explain why it's the
> > > > result.
> > > > And
> > > > it's not any related to "howto".
> > > 
> > > I think this is an important difference in behaviour we should
> > > document for the end-user. So that’s why I think we need this in
> > > the
> > > documentation somewhere.
> > > 
> > > > > > > > +for i in `seq 10`; do
> > > > > > > > +NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2
> > > > > > > > 5678
> > > > > > > > -p
> > > > > > > > 6789])
> > > > > > > > +done
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" |
> > > > > > > > DUMP_CLEAN_SORTED], [0], [dnl
> > > > > > > > +in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,fr
> > > > > > > > ag=n
> > > > > > > > o),
> > > > > > > > packets:10, bytes:330, used:0.001s,
> > > > > > > > actions:outputmeter(0),3
> > > > > > > > +])
> > > > > > > > +
> > > > > > > > +AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 |
> > > > > > > > sed -e
> > > > > > > > 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0],
> > > > > > > > [dnl
> > > > > > > > +OFPST_METER reply (OF1.3) (xid=0x2):
> > > > > > > > +meter:1 flow_count:1 packet_in_count:11
> > > > > > > > byte_in_count:377
> > > > > > > > duration:0.001s bands:
> > > > > > > 
> > > > > > > byte_in_count:517 is a lot larger than with kernel code,
> > > > > > > do
> > > > > > > we
> > > > > > > know
> > > > > > > why? We should document it.
> > > > > > > 
> > > > > > 
> > > > > > I think DP also count the bytes of mac layer.
> > > > > 
> > > > > Any idea why?
> > > > 
> > > > From ovs_meter_execute() in datapath/meter.c, the packet size
> > > > of
> > > > band
> > > > stats is added by skb->len, which should include the len of mac
> > > > layer.
> > > 
> > > So this needs to be fixed in the kernel. I can remember someone
> > > from
> > > Nvidia was already looking at this?
> > 
> > Sorry, I don't know.
> > 
> > > 
> > > > > > > > +0: packet_count:9 byte_count:0
> > > > > > > 
> > > > > > > Here in theory we should report byte_count, like this:
> > > > > > > 
> > > > > > > 0: packet_count:10 byte_count:470
> > > > > > > 
> > > > > > > However, TC does not support dropped byte count, only
> > > > > > > packet_count.
> > > > > > > So we should be ok for now, but we must add this
> > > > > > > limitation
> > > > > > > to
> > > > > > > the
> > > > > > > documentation somewhere that it's clear we will not
> > > > > > > report
> > > > > > > byte
> > > > > > > count
> > > > > > > with TC offload.
> > > > > > > 
> > > > > > > If you run this test without HW offload enabled you can
> > > > > > > see
> > > > > > > the
> > > > > > > difference in behavior, and I think there should be none
> > > > > > > (or
> > > > > > > at
> > > > > > > least
> > > > > > > the corner cases should be documented).
> > > > > > > You could also add a "- offloads disabled" variant of
> > > > > > > this
> > > > > > > test,
> > > > > > > like
> > > > > > > other features do and add some additional reasoning why
> > > > > > > it's
> > > > > > > different there.
> > > > > > > 
> > > > > > 
> > > > > > Sure, I will add.
> > > > > 
> > > > > Guess this could go to the same howto/tc-offload.rst in the
> > > > > limitations section?
> > > > > 
> > > > 
> > > > Maybe also add comments here, don't waste time to find info
> > > > from
> > > > the
> > > > other document :)
> > > 
> > > See above, end-users do not read test cases, so we need this in
> > > some
> > > user documentation.
> > 
> > Sorry, I don't understand. Are we talking about the tests, and why
> > do
> > we get these expected results regarding the numbers of bytes and
> > packets? Maybe adding comments here is enough, user may not notice
> > these differences if he doesn't run this tests, right?
> 
> I’m talking about the fact that the “ovs-ofctl -O OpenFlow13 meter-
> stats br0” is not reporting byte count when hwoffload is enabled.
> This is a missing feature/difference from previous behavior and we
> should document this for the end-user.
> 

OK, I see. Will add howto/tc-offload.rst to document the feature of
meter offload and its limitation.

> > > > > > 
> > > > > > > > +])
> > > > > > > > +
> > > > > > > > +OVS_TRAFFIC_VSWITCHD_STOP
> > > > > > > > +AT_CLEANUP
> > > > > > > > -- 
> > > > > > > > 2.26.2
> > > > > > > 
> > > > > > > This concludes my review of v5.
> > > > > > > 
> > > > > > > //Eelco
> > > > > > > 
> > > > > 
> > > 
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index eece0d0b2..dfd659d4e 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,8 @@  Post-v2.17.0
    - Windows:
      * Conntrack support for TCPv6, UDPv6, ICMPv6, FTPv6.
      * IPv6 Geneve tunnel support.
+   - Linux datapath:
+     * Add offloading meter tc police.
 
 
 v2.17.0 - 17 Feb 2022
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 06e1e8ca0..0af9ee77e 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -4163,11 +4163,18 @@  static int
 dpif_netlink_meter_set(struct dpif *dpif_, ofproto_meter_id meter_id,
                        struct ofputil_meter_config *config)
 {
+    int err;
+
     if (probe_broken_meters(dpif_)) {
         return ENOMEM;
     }
 
-    return dpif_netlink_meter_set__(dpif_, meter_id, config);
+    err = dpif_netlink_meter_set__(dpif_, meter_id, config);
+    if (!err && netdev_is_flow_api_enabled()) {
+        meter_offload_set(meter_id, config);
+    }
+
+    return err;
 }
 
 /* Retrieve statistics and/or delete meter 'meter_id'.  Statistics are
@@ -4258,16 +4265,30 @@  static int
 dpif_netlink_meter_get(const struct dpif *dpif, ofproto_meter_id meter_id,
                        struct ofputil_meter_stats *stats, uint16_t max_bands)
 {
-    return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
-                                        OVS_METER_CMD_GET);
+    int err;
+
+    err = dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
+                                       OVS_METER_CMD_GET);
+    if (!err && netdev_is_flow_api_enabled()) {
+        meter_offload_get(meter_id, stats, max_bands);
+    }
+
+    return err;
 }
 
 static int
 dpif_netlink_meter_del(struct dpif *dpif, ofproto_meter_id meter_id,
                        struct ofputil_meter_stats *stats, uint16_t max_bands)
 {
-    return dpif_netlink_meter_get_stats(dpif, meter_id, stats, max_bands,
-                                        OVS_METER_CMD_DEL);
+    int err;
+
+    err  = dpif_netlink_meter_get_stats(dpif, meter_id, stats,
+                                        max_bands, OVS_METER_CMD_DEL);
+    if (!err && netdev_is_flow_api_enabled()) {
+        meter_offload_del(meter_id, stats, max_bands);
+    }
+
+    return err;
 }
 
 static bool
diff --git a/tests/system-offloads-traffic.at b/tests/system-offloads-traffic.at
index 80bc1dd5c..7ec75340f 100644
--- a/tests/system-offloads-traffic.at
+++ b/tests/system-offloads-traffic.at
@@ -168,3 +168,51 @@  matchall
 ])
 OVS_TRAFFIC_VSWITCHD_STOP
 AT_CLEANUP
+
+AT_SETUP([offloads - check if meter offloading ])
+AT_KEYWORDS([meter])
+AT_SKIP_IF([test $SUPPORT_TC_INGRESS_PPS = "no"])
+OVS_TRAFFIC_VSWITCHD_START()
+
+AT_CHECK([ovs-vsctl set Open_vSwitch . other_config:hw-offload=true])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-meter br0 'meter=1 pktps bands=type=drop rate=1'])
+
+ADD_NAMESPACES(at_ns0, at_ns1)
+ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24", "f0:00:00:01:01:01")
+ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24", "f0:00:00:01:01:02")
+
+NS_CHECK_EXEC([at_ns0], [ip neigh add 10.1.1.2 lladdr f0:00:00:01:01:02 dev p0])
+NS_CHECK_EXEC([at_ns1], [ip neigh add 10.1.1.1 lladdr f0:00:00:01:01:01 dev p1])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "actions=normal"])
+NS_CHECK_EXEC([at_ns0], [ping -q -c 10 -i 0.1 -w 2 10.1.1.2 | FORMAT_PING], [0], [dnl
+10 packets transmitted, 10 received, 0% packet loss, time 0ms
+])
+
+NETNS_DAEMONIZE([at_ns1], [nc -u -l 5678 > /dev/null ], [nc0.pid])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 del-flows br0])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=10,in_port=ovs-p0,udp actions=meter:1,normal"])
+AT_CHECK([ovs-ofctl -O OpenFlow13 add-flow br0 "priority=1 actions=normal"])
+
+NS_CHECK_EXEC([at_ns0], [sleep 0.5; echo "mark" | nc -u 10.1.1.2 5678 -p 6789])
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" | DUMP_CLEAN_SORTED], [0], [dnl
+in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no), packets:0, bytes:0, used:0.001s, actions:outputmeter(0),3
+])
+
+for i in `seq 10`; do
+NS_CHECK_EXEC([at_ns0], [echo "mark" | nc -u 10.1.1.2 5678 -p 6789])
+done
+
+AT_CHECK([ovs-appctl dpctl/dump-flows | grep "meter" | DUMP_CLEAN_SORTED], [0], [dnl
+in_port(2),eth(macs),eth_type(0x0800),ipv4(proto=17,frag=no), packets:10, bytes:330, used:0.001s, actions:outputmeter(0),3
+])
+
+AT_CHECK([ovs-ofctl -O OpenFlow13 meter-stats br0 | sed -e 's/duration:[[0-9]].[[0-9]]*s/duration:0.001s/'], [0], [dnl
+OFPST_METER reply (OF1.3) (xid=0x2):
+meter:1 flow_count:1 packet_in_count:11 byte_in_count:377 duration:0.001s bands:
+0: packet_count:9 byte_count:0
+])
+
+OVS_TRAFFIC_VSWITCHD_STOP
+AT_CLEANUP