diff mbox

[ovs-dev,5/6] byte-order: Make hton128() and ntoh128() behave like their counterparts.

Message ID 1447208184-66714-5-git-send-email-jpettit@ovn.org
State Accepted
Headers show

Commit Message

Justin Pettit Nov. 11, 2015, 2:16 a.m. UTC
From: Justin Pettit <jpettit@nicira.com>

Instead of taking the source and destination as arguments, make these
functions act like their short and long counterparts.

Signed-off-by: Justin Pettit <jpettit@nicira.com>
---
 lib/byte-order.h |   22 ++++++++++++++--------
 lib/match.c      |    6 ++----
 lib/meta-flow.c  |   26 +++++++-------------------
 lib/nx-match.c   |    6 ++----
 lib/odp-util.c   |    8 ++++----
 5 files changed, 29 insertions(+), 39 deletions(-)

Comments

Ben Pfaff Nov. 23, 2015, 6:06 p.m. UTC | #1
When Joe added these types I assumed that he used the unconventional
prototypes for hton128() and ntoh128() because the return value
convention was inefficient.  If GCC and Clang actually optimize the use
of a return value in some kind of sensible way then I agree that the
usual convention is nicer.

Joe, did you have another reason?
Joe Stringer Nov. 23, 2015, 8:49 p.m. UTC | #2
On 23 November 2015 at 10:06, Ben Pfaff <blp@ovn.org> wrote:
> When Joe added these types I assumed that he used the unconventional
> prototypes for hton128() and ntoh128() because the return value
> convention was inefficient.  If GCC and Clang actually optimize the use
> of a return value in some kind of sensible way then I agree that the
> usual convention is nicer.
>
> Joe, did you have another reason?

This was mostly done based on an assumption that this was more
optimal, rather than actually digging into the compiled code and
seeing that it was generated differently.

Looking now, before this patch vs. after on my 64-bit system..

GCC-4.9: hton128/ntoh128 require one less MOV with this patch, but
calling conventions (in format_u128) require 3 extra MOV (+4 MOV, -1
LEA) for format_u128().
Clang-3.7: hton128/ntoh128 are roughly equivalent, although with this
patch they use some MOVUPS/MOVAPS instructions for 128-bit moves.
Calling conventions seem to require as much as 6-12 (!) extra MOVs
however, details below.


Clang-3.7 in format_u128(), before:

    if (verbose || (mask && !ovs_u128_is_zero(mask))) {
    e99b:       f6 45 e7 01             testb  $0x1,-0x19(%rbp)
    e99f:       0f 85 1c 00 00 00       jne    e9c1 <format_u128+0x41>
    e9a5:       48 83 7d e8 00          cmpq   $0x0,-0x18(%rbp)
    e9aa:       0f 84 8d 00 00 00       je     ea3d <format_u128+0xbd>
    e9b0:       48 8b 7d e8             mov    -0x18(%rbp),%rdi
    e9b4:       e8 c7 c8 ff ff          callq  b280 <ovs_u128_is_zero>
    e9b9:       a8 01                   test   $0x1,%al
    e9bb:       0f 85 7c 00 00 00       jne    ea3d <format_u128+0xbd>
    e9c1:       48 8d 75 d0             lea    -0x30(%rbp),%rsi
        ovs_be128 value;

        hton128(key, &value);
    e9c5:       48 8b 7d f0             mov    -0x10(%rbp),%rdi
    e9c9:       e8 82 00 00 00          callq  ea50 <hton128>
    e9ce:       b8 10 00 00 00          mov    $0x10,%eax
    e9d3:       89 c2                   mov    %eax,%edx
    e9d5:       48 8d 75 d0             lea    -0x30(%rbp),%rsi
        ds_put_hex(ds, &value, sizeof value);
    e9d9:       48 8b 7d f8             mov    -0x8(%rbp),%rdi
    e9dd:       e8 00 00 00 00          callq  e9e2 <format_u128+0x62>




Clang-3.7, after:

    if (verbose || (mask && !ovs_u128_is_zero(mask))) {
    e99b:       f6 45 e7 01             testb  $0x1,-0x19(%rbp)
    e99f:       0f 85 1c 00 00 00       jne    e9c1 <format_u128+0x41>
    e9a5:       48 83 7d e8 00          cmpq   $0x0,-0x18(%rbp)
    e9aa:       0f 84 d1 00 00 00       je     ea81 <format_u128+0x101>
    e9b0:       48 8b 7d e8             mov    -0x18(%rbp),%rdi
    e9b4:       e8 c7 c8 ff ff          callq  b280 <ovs_u128_is_zero>
    e9b9:       a8 01                   test   $0x1,%al
    e9bb:       0f 85 c0 00 00 00       jne    ea81 <format_u128+0x101>
        ovs_be128 value;

        value = hton128(*key);
    e9c1:       48 8b 45 f0             mov    -0x10(%rbp),%rax
    e9c5:       48 8b 38                mov    (%rax),%rdi
    e9c8:       48 8b 70 08             mov    0x8(%rax),%rsi
    e9cc:       e8 bf 00 00 00          callq  ea90 <hton128>
    e9d1:       b9 10 00 00 00          mov    $0x10,%ecx
    e9d6:       89 ce                   mov    %ecx,%esi
    e9d8:       48 8d 7d d0             lea    -0x30(%rbp),%rdi
    e9dc:       48 89 45 c0             mov    %rax,-0x40(%rbp)
    e9e0:       48 89 55 c8             mov    %rdx,-0x38(%rbp)
    e9e4:       48 8b 45 c0             mov    -0x40(%rbp),%rax
    e9e8:       48 89 45 d0             mov    %rax,-0x30(%rbp)
    e9ec:       48 8b 45 c8             mov    -0x38(%rbp),%rax
    e9f0:       48 89 45 d8             mov    %rax,-0x28(%rbp)
        ds_put_hex(ds, &value, sizeof value);
    e9f4:       48 8b 45 f8             mov    -0x8(%rbp),%rax
    e9f8:       48 89 7d a8             mov    %rdi,-0x58(%rbp)
    e9fc:       48 89 c7                mov    %rax,%rdi
    e9ff:       48 8b 45 a8             mov    -0x58(%rbp),%rax
    ea03:       48 89 75 a0             mov    %rsi,-0x60(%rbp)
    ea07:       48 89 c6                mov    %rax,%rsi
    ea0a:       48 8b 55 a0             mov    -0x60(%rbp),%rdx
    ea0e:       e8 00 00 00 00          callq  ea13 <format_u128+0x93>
Joe Stringer Nov. 24, 2015, 12:26 a.m. UTC | #3
On 23 November 2015 at 12:49, Joe Stringer <joestringer@nicira.com> wrote:
> On 23 November 2015 at 10:06, Ben Pfaff <blp@ovn.org> wrote:
>> When Joe added these types I assumed that he used the unconventional
>> prototypes for hton128() and ntoh128() because the return value
>> convention was inefficient.  If GCC and Clang actually optimize the use
>> of a return value in some kind of sensible way then I agree that the
>> usual convention is nicer.
>>
>> Joe, did you have another reason?
>
> This was mostly done based on an assumption that this was more
> optimal, rather than actually digging into the compiled code and
> seeing that it was generated differently.
>
> Looking now, before this patch vs. after on my 64-bit system..
>
> GCC-4.9: hton128/ntoh128 require one less MOV with this patch, but
> calling conventions (in format_u128) require 3 extra MOV (+4 MOV, -1
> LEA) for format_u128().
> Clang-3.7: hton128/ntoh128 are roughly equivalent, although with this
> patch they use some MOVUPS/MOVAPS instructions for 128-bit moves.
> Calling conventions seem to require as much as 6-12 (!) extra MOVs
> however, details below.

<snip>

Woops, looks like I missed compiler optimizations. They look much
closer together with --O2, so I think this patch makes sense to apply
without reservation.
Ben Pfaff Nov. 24, 2015, 1:08 a.m. UTC | #4
On Mon, Nov 23, 2015 at 04:26:18PM -0800, Joe Stringer wrote:
> On 23 November 2015 at 12:49, Joe Stringer <joestringer@nicira.com> wrote:
> > On 23 November 2015 at 10:06, Ben Pfaff <blp@ovn.org> wrote:
> >> When Joe added these types I assumed that he used the unconventional
> >> prototypes for hton128() and ntoh128() because the return value
> >> convention was inefficient.  If GCC and Clang actually optimize the use
> >> of a return value in some kind of sensible way then I agree that the
> >> usual convention is nicer.
> >>
> >> Joe, did you have another reason?
> >
> > This was mostly done based on an assumption that this was more
> > optimal, rather than actually digging into the compiled code and
> > seeing that it was generated differently.
> >
> > Looking now, before this patch vs. after on my 64-bit system..
> >
> > GCC-4.9: hton128/ntoh128 require one less MOV with this patch, but
> > calling conventions (in format_u128) require 3 extra MOV (+4 MOV, -1
> > LEA) for format_u128().
> > Clang-3.7: hton128/ntoh128 are roughly equivalent, although with this
> > patch they use some MOVUPS/MOVAPS instructions for 128-bit moves.
> > Calling conventions seem to require as much as 6-12 (!) extra MOVs
> > however, details below.
> 
> <snip>
> 
> Woops, looks like I missed compiler optimizations. They look much
> closer together with --O2, so I think this patch makes sense to apply
> without reservation.

Thanks.

Acked-by: Ben Pfaff <blp@ovn.org>
Justin Pettit Nov. 24, 2015, 1:11 a.m. UTC | #5
> On Nov 23, 2015, at 5:08 PM, Ben Pfaff <blp@ovn.org> wrote:
> 
> On Mon, Nov 23, 2015 at 04:26:18PM -0800, Joe Stringer wrote:
>> On 23 November 2015 at 12:49, Joe Stringer <joestringer@nicira.com> wrote:
>>> On 23 November 2015 at 10:06, Ben Pfaff <blp@ovn.org> wrote:
>>>> When Joe added these types I assumed that he used the unconventional
>>>> prototypes for hton128() and ntoh128() because the return value
>>>> convention was inefficient.  If GCC and Clang actually optimize the use
>>>> of a return value in some kind of sensible way then I agree that the
>>>> usual convention is nicer.
>>>> 
>>>> Joe, did you have another reason?
>>> 
>>> This was mostly done based on an assumption that this was more
>>> optimal, rather than actually digging into the compiled code and
>>> seeing that it was generated differently.
>>> 
>>> Looking now, before this patch vs. after on my 64-bit system..
>>> 
>>> GCC-4.9: hton128/ntoh128 require one less MOV with this patch, but
>>> calling conventions (in format_u128) require 3 extra MOV (+4 MOV, -1
>>> LEA) for format_u128().
>>> Clang-3.7: hton128/ntoh128 are roughly equivalent, although with this
>>> patch they use some MOVUPS/MOVAPS instructions for 128-bit moves.
>>> Calling conventions seem to require as much as 6-12 (!) extra MOVs
>>> however, details below.
>> 
>> <snip>
>> 
>> Woops, looks like I missed compiler optimizations. They look much
>> closer together with --O2, so I think this patch makes sense to apply
>> without reservation.

Thanks for the thorough analysis, Joe.

> Acked-by: Ben Pfaff <blp@ovn.org>

Thanks for the reviews, Ben.  I'll push the series in a minute.

--Justin
diff mbox

Patch

diff --git a/lib/byte-order.h b/lib/byte-order.h
index 3f60698..3430e29 100644
--- a/lib/byte-order.h
+++ b/lib/byte-order.h
@@ -42,18 +42,24 @@  ovs_be64 htonll(uint64_t);
 uint64_t ntohll(ovs_be64);
 #endif
 
-static inline void
-hton128(const ovs_u128 *src, ovs_be128 *dst)
+static inline ovs_be128
+hton128(const ovs_u128 src)
 {
-    dst->be64.hi = htonll(src->u64.hi);
-    dst->be64.lo = htonll(src->u64.lo);
+    ovs_be128 dst;
+
+    dst.be64.hi = htonll(src.u64.hi);
+    dst.be64.lo = htonll(src.u64.lo);
+    return dst;
 }
 
-static inline void
-ntoh128(const ovs_be128 *src, ovs_u128 *dst)
+static inline ovs_u128
+ntoh128(const ovs_be128 src)
 {
-    dst->u64.hi = ntohll(src->be64.hi);
-    dst->u64.lo = ntohll(src->be64.lo);
+    ovs_u128 dst;
+
+    dst.u64.hi = ntohll(src.be64.hi);
+    dst.u64.lo = ntohll(src.be64.lo);
+    return dst;
 }
 
 static inline uint32_t
diff --git a/lib/match.c b/lib/match.c
index 6519065..3cd51be 100644
--- a/lib/match.c
+++ b/lib/match.c
@@ -979,13 +979,11 @@  static void
 format_ct_label_masked(struct ds *s, const ovs_u128 *key, const ovs_u128 *mask)
 {
     if (!ovs_u128_is_zero(mask)) {
-        ovs_be128 value;
-
-        hton128(key, &value);
+        ovs_be128 value = hton128(*key);
         ds_put_format(s, "ct_label=");
         ds_put_hex(s, &value, sizeof value);
         if (!is_all_ones(mask, sizeof(*mask))) {
-            hton128(mask, &value);
+            value = hton128(*mask);
             ds_put_char(s, '/');
             ds_put_hex(s, &value, sizeof value);
         }
diff --git a/lib/meta-flow.c b/lib/meta-flow.c
index 6ae64c4..b3397cf 100644
--- a/lib/meta-flow.c
+++ b/lib/meta-flow.c
@@ -671,7 +671,7 @@  mf_get_value(const struct mf_field *mf, const struct flow *flow,
         break;
 
     case MFF_CT_LABEL:
-        hton128(&flow->ct_label, &value->be128);
+        value->be128 = hton128(flow->ct_label);
         break;
 
     CASE_MFF_REGS:
@@ -918,13 +918,9 @@  mf_set_value(const struct mf_field *mf,
         match_set_ct_mark(match, ntohl(value->be32));
         break;
 
-    case MFF_CT_LABEL: {
-        ovs_u128 label;
-
-        ntoh128(&value->be128, &label);
-        match_set_ct_label(match, label);
+    case MFF_CT_LABEL:
+        match_set_ct_label(match, ntoh128(value->be128));
         break;
-    }
 
     CASE_MFF_REGS:
         match_set_reg(match, mf->id - MFF_REG0, ntohl(value->be32));
@@ -1223,7 +1219,7 @@  mf_set_flow_value(const struct mf_field *mf,
         break;
 
     case MFF_CT_LABEL:
-        ntoh128(&value->be128, &flow->ct_label);
+        flow->ct_label = ntoh128(value->be128);
         break;
 
     CASE_MFF_REGS:
@@ -1810,18 +1806,10 @@  mf_set(const struct mf_field *mf,
         match_set_ct_mark_masked(match, ntohl(value->be32), ntohl(mask->be32));
         break;
 
-    case MFF_CT_LABEL: {
-        ovs_u128 hlabel, hmask;
-
-        ntoh128(&value->be128, &hlabel);
-        if (mask) {
-            ntoh128(&mask->be128, &hmask);
-        } else {
-            hmask.u64.lo = hmask.u64.hi = UINT64_MAX;
-        }
-        match_set_ct_label_masked(match, hlabel, hmask);
+    case MFF_CT_LABEL:
+        match_set_ct_label_masked(match, ntoh128(value->be128),
+                                  mask ? ntoh128(mask->be128) : OVS_U128_MAX);
         break;
-    }
 
     case MFF_ETH_DST:
         match_set_dl_dst_masked(match, value->mac, mask->mac);
diff --git a/lib/nx-match.c b/lib/nx-match.c
index 5e986db..f98b710 100644
--- a/lib/nx-match.c
+++ b/lib/nx-match.c
@@ -786,10 +786,8 @@  nxm_put_ct_label(struct ofpbuf *b,
                  enum mf_field_id field, enum ofp_version version,
                  const ovs_u128 value, const ovs_u128 mask)
 {
-    ovs_be128 bevalue, bemask;
-
-    hton128(&value, &bevalue);
-    hton128(&mask, &bemask);
+    ovs_be128 bevalue = hton128(value);
+    ovs_be128 bemask = hton128(mask);
     nxm_put(b, field, version, &bevalue, &bemask, sizeof(bevalue));
 }
 
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 9b9792d..b7c58d3 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -2540,10 +2540,10 @@  format_u128(struct ds *ds, const ovs_u128 *key, const ovs_u128 *mask,
     if (verbose || (mask && !ovs_u128_is_zero(mask))) {
         ovs_be128 value;
 
-        hton128(key, &value);
+        value = hton128(*key);
         ds_put_hex(ds, &value, sizeof value);
         if (mask && !(ovs_u128_is_ones(mask))) {
-            hton128(mask, &value);
+            value = hton128(*mask);
             ds_put_char(ds, '/');
             ds_put_hex(ds, &value, sizeof value);
         }
@@ -2558,7 +2558,7 @@  scan_u128(const char *s_, ovs_u128 *value, ovs_u128 *mask)
     ovs_be128 be_mask;
 
     if (!parse_int_string(s, (uint8_t *)&be_value, sizeof be_value, &s)) {
-        ntoh128(&be_value, value);
+        *value = ntoh128(be_value);
 
         if (mask) {
             int n;
@@ -2572,7 +2572,7 @@  scan_u128(const char *s_, ovs_u128 *value, ovs_u128 *mask)
                 if (error) {
                     return error;
                 }
-                ntoh128(&be_mask, mask);
+                *mask = ntoh128(be_mask);
             } else {
                 *mask = OVS_U128_MAX;
             }