diff mbox series

[5/6] crypto: convert xts_mult_x to use xts_uint128 type

Message ID 20181009125541.24455-6-berrange@redhat.com
State New
Headers show
Series crypto: improve performance of XTS cipher mode | expand

Commit Message

Daniel P. Berrangé Oct. 9, 2018, 12:55 p.m. UTC
Using 64-bit arithmetic increases the performance for xts-aes-128
when built with gcrypt:

  Encrypt: 320 MB/s -> 460 MB/s
  Decrypt: 325 MB/s -> 485 MB/s

Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
---
 crypto/xts.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

Comments

Alberto Garcia Oct. 9, 2018, 1:52 p.m. UTC | #1
On Tue 09 Oct 2018 02:55:40 PM CEST, Daniel P. Berrangé wrote:
> -static void xts_mult_x(uint8_t *I)
> +static void xts_mult_x(xts_uint128 *I)
>  {
> -    int x;
> -    uint8_t t, tt;
> +    uint64_t tt;
>  
> -    for (x = t = 0; x < 16; x++) {
> -        tt = I[x] >> 7;
> -        I[x] = ((I[x] << 1) | t) & 0xFF;
> -        t = tt;
> -    }
> -    if (tt) {
> -        I[0] ^= 0x87;
> +    tt = I->a >> 63;
> +    I->a = I->a << 1;
> +
> +    if (I->b >> 63) {
> +        I->a ^= 0x87;
>      }
> +    I->b = (I->b << 1) | tt;
>  }

Does this work fine in big-endian CPUs?

Berto
Daniel P. Berrangé Oct. 9, 2018, 1:55 p.m. UTC | #2
On Tue, Oct 09, 2018 at 03:52:53PM +0200, Alberto Garcia wrote:
> On Tue 09 Oct 2018 02:55:40 PM CEST, Daniel P. Berrangé wrote:
> > -static void xts_mult_x(uint8_t *I)
> > +static void xts_mult_x(xts_uint128 *I)
> >  {
> > -    int x;
> > -    uint8_t t, tt;
> > +    uint64_t tt;
> >  
> > -    for (x = t = 0; x < 16; x++) {
> > -        tt = I[x] >> 7;
> > -        I[x] = ((I[x] << 1) | t) & 0xFF;
> > -        t = tt;
> > -    }
> > -    if (tt) {
> > -        I[0] ^= 0x87;
> > +    tt = I->a >> 63;
> > +    I->a = I->a << 1;
> > +
> > +    if (I->b >> 63) {
> > +        I->a ^= 0x87;
> >      }
> > +    I->b = (I->b << 1) | tt;
> >  }
> 
> Does this work fine in big-endian CPUs?

Hmm, that's a good question. I'd expect tests/test-crypto-xts to crash
and burn if it doesn't, so guess I'll need to find somewhere to validate
that.

Regards,
Daniel
Alberto Garcia Oct. 9, 2018, 2:25 p.m. UTC | #3
On Tue 09 Oct 2018 03:55:34 PM CEST, Daniel P. Berrangé wrote:
> On Tue, Oct 09, 2018 at 03:52:53PM +0200, Alberto Garcia wrote:
>> On Tue 09 Oct 2018 02:55:40 PM CEST, Daniel P. Berrangé wrote:
>> > -static void xts_mult_x(uint8_t *I)
>> > +static void xts_mult_x(xts_uint128 *I)
>> >  {
>> > -    int x;
>> > -    uint8_t t, tt;
>> > +    uint64_t tt;
>> >  
>> > -    for (x = t = 0; x < 16; x++) {
>> > -        tt = I[x] >> 7;
>> > -        I[x] = ((I[x] << 1) | t) & 0xFF;
>> > -        t = tt;
>> > -    }
>> > -    if (tt) {
>> > -        I[0] ^= 0x87;
>> > +    tt = I->a >> 63;
>> > +    I->a = I->a << 1;
>> > +
>> > +    if (I->b >> 63) {
>> > +        I->a ^= 0x87;
>> >      }
>> > +    I->b = (I->b << 1) | tt;
>> >  }
>> 
>> Does this work fine in big-endian CPUs?
>
> Hmm, that's a good question. I'd expect tests/test-crypto-xts to crash
> and burn if it doesn't, so guess I'll need to find somewhere to validate
> that.

I just tried in s390x:

/crypto/xts/t-1-key-32-ptx-32: **
ERROR:/home/berto/qemu/tests/test-crypto-xts.c:386:test_xts: assertion failed: (memcmp(out, data->CTX, data->PTLEN) == 0)
Aborted

Berto
diff mbox series

Patch

diff --git a/crypto/xts.c b/crypto/xts.c
index f109c8a3ee..bba3280746 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -37,19 +37,17 @@  typedef struct {
         (D)->b = (S1)->b ^ (S2)->b;             \
     } while (0)
 
-static void xts_mult_x(uint8_t *I)
+static void xts_mult_x(xts_uint128 *I)
 {
-    int x;
-    uint8_t t, tt;
+    uint64_t tt;
 
-    for (x = t = 0; x < 16; x++) {
-        tt = I[x] >> 7;
-        I[x] = ((I[x] << 1) | t) & 0xFF;
-        t = tt;
-    }
-    if (tt) {
-        I[0] ^= 0x87;
+    tt = I->a >> 63;
+    I->a = I->a << 1;
+
+    if (I->b >> 63) {
+        I->a ^= 0x87;
     }
+    I->b = (I->b << 1) | tt;
 }
 
 
@@ -77,7 +75,7 @@  static void xts_tweak_encdec(const void *ctx,
     xts_uint128_xor(dst, dst, iv);
 
     /* LFSR the tweak */
-    xts_mult_x((uint8_t *)iv);
+    xts_mult_x(iv);
 }
 
 
@@ -124,7 +122,7 @@  void xts_decrypt(const void *datactx,
     if (mo > 0) {
         xts_uint128 S, D;
         memcpy(&CC, &T, XTS_BLOCK_SIZE);
-        xts_mult_x((uint8_t *)&CC);
+        xts_mult_x(&CC);
 
         /* PP = tweak decrypt block m-1 */
         memcpy(&S, src, XTS_BLOCK_SIZE);