diff mbox

[ovs-dev,2/7] dpif-netdev: retrieve flow directly from the flow mark

Message ID 1503469462-22391-3-git-send-email-yliu@fridaylinux.org
State Superseded
Headers show

Commit Message

Yuanhan Liu Aug. 23, 2017, 6:24 a.m. UTC
So that we could skip the heavy emc processing. A simple PHY-PHY
forwarding testing shows almost 80% performance improvement.

Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
Signed-off-by: Finn Christensen <fc@napatech.com>
---
 lib/dp-packet.h   | 13 +++++++++++++
 lib/dpif-netdev.c | 17 +++++++++++++++++
 2 files changed, 30 insertions(+)

Comments

Darrell Ball Aug. 29, 2017, 7:14 a.m. UTC | #1
Hi

There is a build issue after patch 2 is applied

f-netdev.Tpo -c ../lib/dpif-netdev.c -o lib/dpif-netdev.o
../lib/dpif-netdev.c: In function 'emc_processing':
../lib/dpif-netdev.c:4782:21: error: excess elements in struct initializer [-Werror]
                     miniflow_values(&fake_mf),
                     ^
../lib/dpif-netdev.c:4782:21: error: (near initialization for 'fake_mf') [-Werror]
../lib/dpif-netdev.c:4784:17: error: excess elements in struct initializer [-Werror]
                 };
                 ^
../lib/dpif-netdev.c:4784:17: error: (near initialization for 'fake_mf') [-Werror]
cc1: all warnings being treated as errors



    On 8/22/17, 11:24 PM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:
    
        So that we could skip the heavy emc processing. A simple PHY-PHY
        forwarding testing shows almost 80% performance improvement.
        
        Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
        Signed-off-by: Finn Christensen <fc@napatech.com>
        ---
         lib/dp-packet.h   | 13 +++++++++++++
         lib/dpif-netdev.c | 17 +++++++++++++++++
         2 files changed, 30 insertions(+)
        
        diff --git a/lib/dp-packet.h b/lib/dp-packet.h
        index bb3f9db..7ecabcc 100644
        --- a/lib/dp-packet.h
        +++ b/lib/dp-packet.h
        @@ -687,6 +687,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
         #define reset_dp_packet_checksum_ol_flags(arg)
         #endif
         
        +static inline bool
        +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
        +                        uint32_t *mark OVS_UNUSED)
        +{
        +#ifdef DPDK_NETDEV

NETDEV ifdefs seem to be an indication that a NETDEV specific function is needed
What do you think ?


        +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
        +        *mark = p->mbuf.hash.fdir.hi;
        +        return true;
        +    }
        +#endif
        +    return false;
        +}
        +
         enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
         
         struct dp_packet_batch {
        diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
        index aaeb7c2..b5f157e 100644
        --- a/lib/dpif-netdev.c
        +++ b/lib/dpif-netdev.c
        @@ -4765,6 +4765,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
         
             DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
                 struct dp_netdev_flow *flow;
        +        uint32_t flow_mark;
         
                 if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
                     dp_packet_delete(packet);
        @@ -4772,6 +4773,22 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
                     continue;
                 }
         
        +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
        +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
        +            if (flow) {
        +                /* FIXME: this fake mf won't update tcp_flags */
        +                struct miniflow fake_mf = {
        +                    FLOWMAP_EMPTY_INITIALIZER,
        +                    miniflow_values(&fake_mf),
        +                    miniflow_values(&fake_mf) + FLOW_U64S
        +                };
        +
        +                dp_netdev_queue_batches(packet, flow, &fake_mf, batches,
        +                                        n_batches);
        +                continue;
        +            }
        +        }
        +
                 if (i != size - 1) {
                     struct dp_packet **packets = packets_->packets;
                     /* Prefetch next packet data and metadata. */
        -- 
        2.7.4
Yuanhan Liu Aug. 29, 2017, 12:09 p.m. UTC | #2
On Tue, Aug 29, 2017 at 07:14:48AM +0000, Darrell Ball wrote:
> Hi
> 
> There is a build issue after patch 2 is applied
> 
> f-netdev.Tpo -c ../lib/dpif-netdev.c -o lib/dpif-netdev.o
> ../lib/dpif-netdev.c: In function 'emc_processing':
> ../lib/dpif-netdev.c:4782:21: error: excess elements in struct initializer [-Werror]
>                      miniflow_values(&fake_mf),
>                      ^
> ../lib/dpif-netdev.c:4782:21: error: (near initialization for 'fake_mf') [-Werror]
> ../lib/dpif-netdev.c:4784:17: error: excess elements in struct initializer [-Werror]
>                  };
>                  ^
> ../lib/dpif-netdev.c:4784:17: error: (near initialization for 'fake_mf') [-Werror]
> cc1: all warnings being treated as errors

Yes, I have noticed this warning. I haven't really looked at it though,
because there is a more important issue to fix: the tcp_flags counter
is ignored in this patchset. The possible solutions I can think of so
far are:

- build a fake miniflow just with the tcp_flags field being checked.
  However, it adds back the heavy overhead (checking each pkt) this
  patchset meant to skip. I don't know how much overhead it may
  introduce though. I could do some experiment. But I think the
  functionality outweighs the performance.

- just ignore it. I'm thinking this is obviously wrong?

- query the tcp_flags counters from the hardware. I'm just aware of
  that some NICs are able to offload it, nothing more.

That being said, even though it could be offloaded by some hardwares,
it could not for some other hardwares. Meaning, the SW fallback is
still needed. I will measure the overhead in the next version.

> 
>     On 8/22/17, 11:24 PM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:
>     
>         So that we could skip the heavy emc processing. A simple PHY-PHY
>         forwarding testing shows almost 80% performance improvement.
>         
>         Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
>         Signed-off-by: Finn Christensen <fc@napatech.com>
>         ---
>          lib/dp-packet.h   | 13 +++++++++++++
>          lib/dpif-netdev.c | 17 +++++++++++++++++
>          2 files changed, 30 insertions(+)
>         
>         diff --git a/lib/dp-packet.h b/lib/dp-packet.h
>         index bb3f9db..7ecabcc 100644
>         --- a/lib/dp-packet.h
>         +++ b/lib/dp-packet.h
>         @@ -687,6 +687,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
>          #define reset_dp_packet_checksum_ol_flags(arg)
>          #endif
>          
>         +static inline bool
>         +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
>         +                        uint32_t *mark OVS_UNUSED)
>         +{
>         +#ifdef DPDK_NETDEV
> 
> NETDEV ifdefs seem to be an indication that a NETDEV specific function is needed
> What do you think ?

Don't know. If you run "git grep DPDK_NETDEV lib/dp-packet.h", you will
see a lot of like this.

	--yliu
> 
> 
>         +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
>         +        *mark = p->mbuf.hash.fdir.hi;
>         +        return true;
>         +    }
>         +#endif
>         +    return false;
>         +}
>         +
>          enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
>          
>          struct dp_packet_batch {
>         diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>         index aaeb7c2..b5f157e 100644
>         --- a/lib/dpif-netdev.c
>         +++ b/lib/dpif-netdev.c
>         @@ -4765,6 +4765,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>          
>              DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
>                  struct dp_netdev_flow *flow;
>         +        uint32_t flow_mark;
>          
>                  if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
>                      dp_packet_delete(packet);
>         @@ -4772,6 +4773,22 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
>                      continue;
>                  }
>          
>         +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
>         +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
>         +            if (flow) {
>         +                /* FIXME: this fake mf won't update tcp_flags */
>         +                struct miniflow fake_mf = {
>         +                    FLOWMAP_EMPTY_INITIALIZER,
>         +                    miniflow_values(&fake_mf),
>         +                    miniflow_values(&fake_mf) + FLOW_U64S
>         +                };
>         +
>         +                dp_netdev_queue_batches(packet, flow, &fake_mf, batches,
>         +                                        n_batches);
>         +                continue;
>         +            }
>         +        }
>         +
>                  if (i != size - 1) {
>                      struct dp_packet **packets = packets_->packets;
>                      /* Prefetch next packet data and metadata. */
>         -- 
>         2.7.4
>         
>         
>     
>     
> 
> 
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Darrell Ball Aug. 30, 2017, 1:55 a.m. UTC | #3
On 8/29/17, 5:09 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:

    On Tue, Aug 29, 2017 at 07:14:48AM +0000, Darrell Ball wrote:
    > Hi
    > 
    > There is a build issue after patch 2 is applied
    > 
    > f-netdev.Tpo -c ../lib/dpif-netdev.c -o lib/dpif-netdev.o
    > ../lib/dpif-netdev.c: In function 'emc_processing':
    > ../lib/dpif-netdev.c:4782:21: error: excess elements in struct initializer [-Werror]
    >                      miniflow_values(&fake_mf),
    >                      ^
    > ../lib/dpif-netdev.c:4782:21: error: (near initialization for 'fake_mf') [-Werror]
    > ../lib/dpif-netdev.c:4784:17: error: excess elements in struct initializer [-Werror]
    >                  };
    >                  ^
    > ../lib/dpif-netdev.c:4784:17: error: (near initialization for 'fake_mf') [-Werror]
    > cc1: all warnings being treated as errors
    
    Yes, I have noticed this warning. I haven't really looked at it though,
    because there is a more important issue to fix: the tcp_flags counter
    is ignored in this patchset. The possible solutions I can think of so
    far are:
    
    - build a fake miniflow just with the tcp_flags field being checked.
      However, it adds back the heavy overhead (checking each pkt) this
      patchset meant to skip. I don't know how much overhead it may
      introduce though. I could do some experiment. But I think the
      functionality outweighs the performance.
    
    - just ignore it. I'm thinking this is obviously wrong?

We cannot ignore it
    
    - query the tcp_flags counters from the hardware. I'm just aware of
      that some NICs are able to offload it, nothing more.
    
    That being said, even though it could be offloaded by some hardwares,
    it could not for some other hardwares. Meaning, the SW fallback is
    still needed. I will measure the overhead in the next version.


We would kinda need to fix the warning/error in some way.
Did you intend struct mf_ctx instead of struct miniflow ?


    
    > 
    >     On 8/22/17, 11:24 PM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:
    >     
    >         So that we could skip the heavy emc processing. A simple PHY-PHY
    >         forwarding testing shows almost 80% performance improvement.
    >         
    >         Signed-off-by: Yuanhan Liu <yliu@fridaylinux.org>
    >         Signed-off-by: Finn Christensen <fc@napatech.com>
    >         ---
    >          lib/dp-packet.h   | 13 +++++++++++++
    >          lib/dpif-netdev.c | 17 +++++++++++++++++
    >          2 files changed, 30 insertions(+)
    >         
    >         diff --git a/lib/dp-packet.h b/lib/dp-packet.h
    >         index bb3f9db..7ecabcc 100644
    >         --- a/lib/dp-packet.h
    >         +++ b/lib/dp-packet.h
    >         @@ -687,6 +687,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
    >          #define reset_dp_packet_checksum_ol_flags(arg)
    >          #endif
    >          
    >         +static inline bool
    >         +dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
    >         +                        uint32_t *mark OVS_UNUSED)
    >         +{
    >         +#ifdef DPDK_NETDEV
    > 
    > NETDEV ifdefs seem to be an indication that a NETDEV specific function is needed
    > What do you think ?
    
    Don't know. If you run "git grep DPDK_NETDEV lib/dp-packet.h", you will
    see a lot of like this.

yes, I agree.
sorry, I was thinking of another series I looked at and just saw this again and auto-responded, without looking closely
where the function was located.

    
    	--yliu
    > 
    > 
    >         +    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
    >         +        *mark = p->mbuf.hash.fdir.hi;
    >         +        return true;
    >         +    }
    >         +#endif
    >         +    return false;
    >         +}
    >         +
    >          enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
    >          
    >          struct dp_packet_batch {
    >         diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
    >         index aaeb7c2..b5f157e 100644
    >         --- a/lib/dpif-netdev.c
    >         +++ b/lib/dpif-netdev.c
    >         @@ -4765,6 +4765,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
    >          
    >              DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
    >                  struct dp_netdev_flow *flow;
    >         +        uint32_t flow_mark;
    >          
    >                  if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
    >                      dp_packet_delete(packet);
    >         @@ -4772,6 +4773,22 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
    >                      continue;
    >                  }
    >          
    >         +        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
    >         +            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
    >         +            if (flow) {
    >         +                /* FIXME: this fake mf won't update tcp_flags */
    >         +                struct miniflow fake_mf = {
    >         +                    FLOWMAP_EMPTY_INITIALIZER,
    >         +                    miniflow_values(&fake_mf),
    >         +                    miniflow_values(&fake_mf) + FLOW_U64S
    >         +                };
    >         +
    >         +                dp_netdev_queue_batches(packet, flow, &fake_mf, batches,
    >         +                                        n_batches);
    >         +                continue;
    >         +            }
    >         +        }
    >         +
    >                  if (i != size - 1) {
    >                      struct dp_packet **packets = packets_->packets;
    >                      /* Prefetch next packet data and metadata. */
    >         -- 
    >         2.7.4
    >         
    >         
    >     
    >     
    > 
    > 
    > 
    > _______________________________________________
    > dev mailing list
    > dev@openvswitch.org
    > https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwIBAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=HPxdEkLEtUVaxL6nNtz6kWwJhrTcjRJBQ3Z0_0GEtLs&s=7wnAMJHP8dwmGfpYB-9qvGPDlK1DCBLY_S3alzwOgWA&e=
Yuanhan Liu Aug. 30, 2017, 2:27 a.m. UTC | #4
On Wed, Aug 30, 2017 at 01:55:28AM +0000, Darrell Ball wrote:
> 
> 
> On 8/29/17, 5:09 AM, "Yuanhan Liu" <yliu@fridaylinux.org> wrote:
> 
>     On Tue, Aug 29, 2017 at 07:14:48AM +0000, Darrell Ball wrote:
>     > Hi
>     > 
>     > There is a build issue after patch 2 is applied
>     > 
>     > f-netdev.Tpo -c ../lib/dpif-netdev.c -o lib/dpif-netdev.o
>     > ../lib/dpif-netdev.c: In function 'emc_processing':
>     > ../lib/dpif-netdev.c:4782:21: error: excess elements in struct initializer [-Werror]
>     >                      miniflow_values(&fake_mf),
>     >                      ^
>     > ../lib/dpif-netdev.c:4782:21: error: (near initialization for 'fake_mf') [-Werror]
>     > ../lib/dpif-netdev.c:4784:17: error: excess elements in struct initializer [-Werror]
>     >                  };
>     >                  ^
>     > ../lib/dpif-netdev.c:4784:17: error: (near initialization for 'fake_mf') [-Werror]
>     > cc1: all warnings being treated as errors
>     
>     Yes, I have noticed this warning. I haven't really looked at it though,
>     because there is a more important issue to fix: the tcp_flags counter
>     is ignored in this patchset. The possible solutions I can think of so
>     far are:
>     
>     - build a fake miniflow just with the tcp_flags field being checked.
>       However, it adds back the heavy overhead (checking each pkt) this
>       patchset meant to skip. I don't know how much overhead it may
>       introduce though. I could do some experiment. But I think the
>       functionality outweighs the performance.
>     
>     - just ignore it. I'm thinking this is obviously wrong?
> 
> We cannot ignore it

Yes, I know. I just want to know how bad it could be if we ignore it.

>     - query the tcp_flags counters from the hardware. I'm just aware of
>       that some NICs are able to offload it, nothing more.
>     
>     That being said, even though it could be offloaded by some hardwares,
>     it could not for some other hardwares. Meaning, the SW fallback is
>     still needed. I will measure the overhead in the next version.
> 
> 
> We would kinda need to fix the warning/error in some way.
> Did you intend struct mf_ctx instead of struct miniflow ?

Honestly, I don't know. I'm still quite rusty to it (and to OVS).
But no worry, I will figure it out.

	--yliu
diff mbox

Patch

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index bb3f9db..7ecabcc 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -687,6 +687,19 @@  reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
 #define reset_dp_packet_checksum_ol_flags(arg)
 #endif
 
+static inline bool
+dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
+                        uint32_t *mark OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+    if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
+        *mark = p->mbuf.hash.fdir.hi;
+        return true;
+    }
+#endif
+    return false;
+}
+
 enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
 
 struct dp_packet_batch {
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index aaeb7c2..b5f157e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4765,6 +4765,7 @@  emc_processing(struct dp_netdev_pmd_thread *pmd,
 
     DP_PACKET_BATCH_REFILL_FOR_EACH (i, size, packet, packets_) {
         struct dp_netdev_flow *flow;
+        uint32_t flow_mark;
 
         if (OVS_UNLIKELY(dp_packet_size(packet) < ETH_HEADER_LEN)) {
             dp_packet_delete(packet);
@@ -4772,6 +4773,22 @@  emc_processing(struct dp_netdev_pmd_thread *pmd,
             continue;
         }
 
+        if (dp_packet_has_flow_mark(packet, &flow_mark)) {
+            flow = dp_netdev_pmd_find_flow_by_mark(pmd, flow_mark);
+            if (flow) {
+                /* FIXME: this fake mf won't update tcp_flags */
+                struct miniflow fake_mf = {
+                    FLOWMAP_EMPTY_INITIALIZER,
+                    miniflow_values(&fake_mf),
+                    miniflow_values(&fake_mf) + FLOW_U64S
+                };
+
+                dp_netdev_queue_batches(packet, flow, &fake_mf, batches,
+                                        n_batches);
+                continue;
+            }
+        }
+
         if (i != size - 1) {
             struct dp_packet **packets = packets_->packets;
             /* Prefetch next packet data and metadata. */