diff mbox

[U-Boot,v2] Implement generalised RSA public exponents for verified boot

Message ID 1401116213-24139-1-git-send-email-michael@smart-africa.com
State Changes Requested
Delegated to: Simon Glass
Headers show

Commit Message

Michael van der Westhuizen May 26, 2014, 2:56 p.m. UTC
From: Michael van der Westhuizen <michael@smart-africa.com>

Remove the verified boot limitation that only allows a single
RSA public exponent of 65537 (F4).  This allows use with existing
PKI infrastructure, and has been tested with HSM-based PKI.

Change the configuration OF tree format to store the RSA public
exponent as a 64 bit integer and implement backward compatibility
for verified boot configuration trees without this extra field.

Parameterise vboot_test.sh to test different public exponents and
do a drive-by fix to get vboot_test.sh working again.

Mathematics and other hard work by Andrew Bott.

Tested with the following public exponents: 3, 5, 17, 257, 39981,
50457, 65537 and 4294967297.

Signed-off-by: Andrew Bott <Andrew.Bott@ipaccess.com>
Signed-off-by: Andrew Wishart <Andrew.Wishart@ipaccess.com>
Signed-off-by: Neil Piercy <Neil.Piercy@ipaccess.com>
Signed-off-by: Michael van der Westhuizen <michael@smart-africa.com>
---
 Changes for v2:
  - None.  Resend to address line wrapping issues.

 doc/uImage.FIT/signature.txt |  4 +-
 include/rsa.h                |  1 +
 lib/rsa/rsa-sign.c           | 56 +++++++++++++++++++++++++++-
 lib/rsa/rsa-verify.c         | 88 +++++++++++++++++++++++++++++++++++++++++---
 test/vboot/vboot_test.sh     | 11 +++++-
 5 files changed, 151 insertions(+), 9 deletions(-)

Comments

Simon Glass May 30, 2014, 7:04 p.m. UTC | #1
Hi Michael,

On 26 May 2014 08:56,  <michael@smart-africa.com> wrote:
> From: Michael van der Westhuizen <michael@smart-africa.com>
>
> Remove the verified boot limitation that only allows a single
> RSA public exponent of 65537 (F4).  This allows use with existing
> PKI infrastructure, and has been tested with HSM-based PKI.
>
> Change the configuration OF tree format to store the RSA public
> exponent as a 64 bit integer and implement backward compatibility
> for verified boot configuration trees without this extra field.
>
> Parameterise vboot_test.sh to test different public exponents and
> do a drive-by fix to get vboot_test.sh working again.
>
> Mathematics and other hard work by Andrew Bott.

This looks good. I mostly have some nits below.

>
> Tested with the following public exponents: 3, 5, 17, 257, 39981,
> 50457, 65537 and 4294967297.
>
> Signed-off-by: Andrew Bott <Andrew.Bott@ipaccess.com>
> Signed-off-by: Andrew Wishart <Andrew.Wishart@ipaccess.com>
> Signed-off-by: Neil Piercy <Neil.Piercy@ipaccess.com>
> Signed-off-by: Michael van der Westhuizen <michael@smart-africa.com>
> ---
>  Changes for v2:
>   - None.  Resend to address line wrapping issues.
>
>  doc/uImage.FIT/signature.txt |  4 +-
>  include/rsa.h                |  1 +
>  lib/rsa/rsa-sign.c           | 56 +++++++++++++++++++++++++++-
>  lib/rsa/rsa-verify.c         | 88 +++++++++++++++++++++++++++++++++++++++++---
>  test/vboot/vboot_test.sh     | 11 +++++-
>  5 files changed, 151 insertions(+), 9 deletions(-)
>
> diff --git a/doc/uImage.FIT/signature.txt b/doc/uImage.FIT/signature.txt
> index 9502037..cc314a3 100644
> --- a/doc/uImage.FIT/signature.txt
> +++ b/doc/uImage.FIT/signature.txt
> @@ -66,7 +66,8 @@ Creating an RSA key and certificate
>  -----------------------------------
>  To create a new public key, size 2048 bits:
>
> -$ openssl genrsa -F4 -out keys/dev.key 2048
> +$ openssl genpkey -algorithm RSA -out keys/dev.key \
> +    -pkeyopt rsa_keygen_bits:2048 -pkeyopt rsa_keygen_pubexp:65537
>
>  To create a certificate for this:
>
> @@ -159,6 +160,7 @@ For RSA the following are mandatory:
>
>  - rsa,num-bits: Number of key bits (e.g. 2048)
>  - rsa,modulus: Modulus (N) as a big-endian multi-word integer
> +- rsa,exponent: Public exponent (E) as a 64 bit unsigned integer
>  - rsa,r-squared: (2^num-bits)^2 as a big-endian multi-word integer
>  - rsa,n0-inverse: -1 / modulus[0] mod 2^32
>
> diff --git a/include/rsa.h b/include/rsa.h
> index a5680ab..b84d82a 100644
> --- a/include/rsa.h
> +++ b/include/rsa.h
> @@ -27,6 +27,7 @@ struct rsa_public_key {
>         uint32_t n0inv;         /* -1 / modulus[0] mod 2^32 */
>         uint32_t *modulus;      /* modulus as little endian array */
>         uint32_t *rr;           /* R^2 as little endian array */
> +       uint64_t exponent;      /* public exponent */
>  };
>
>  #if IMAGE_ENABLE_SIGN
> diff --git a/lib/rsa/rsa-sign.c b/lib/rsa/rsa-sign.c
> index ca8c120..f20ae28 100644
> --- a/lib/rsa/rsa-sign.c
> +++ b/lib/rsa/rsa-sign.c
> @@ -261,9 +261,56 @@ err_priv:
>  }
>
>  /*
> + * rsa_get_e(): - Get the public exponent from an RSA public key
> + */
> +static int rsa_get_e(RSA *key, uint64_t *e)

rsa_get_exponent() might be better. Also s/e/exponent/ or perhaps exp
(although I see I made the same mistake in the following function).

> +{
> +       int ret;
> +       BIGNUM *bn_te;
> +       uint64_t te;
> +
> +       ret = -1;

-EINVAL

> +       bn_te = NULL;
> +
> +       if (!e)
> +               goto cleanup;
> +
> +       if (BN_num_bits(key->e) > 64)
> +               goto cleanup;
> +
> +       *e = BN_get_word(key->e);
> +
> +       if (BN_num_bits(key->e) < 33) {
> +               ret = 0;
> +               goto cleanup;
> +       }
> +
> +       bn_te = BN_dup(key->e);
> +       if (!bn_te)
> +               goto cleanup;
> +
> +       if (!BN_rshift(bn_te, bn_te, 32))
> +               goto cleanup;
> +
> +       if (!BN_mask_bits(bn_te, 32))
> +               goto cleanup;
> +
> +       te = BN_get_word(bn_te);
> +       te <<= 32;
> +       *e |= te;
> +       ret = 0;
> +
> +cleanup:
> +       if (bn_te)
> +               BN_free(bn_te);
> +
> +       return ret;
> +}
> +
> +/*
>   * rsa_get_params(): - Get the important parameters of an RSA public key
>   */
> -int rsa_get_params(RSA *key, uint32_t *n0_invp, BIGNUM **modulusp,
> +int rsa_get_params(RSA *key, uint64_t *e, uint32_t *n0_invp, BIGNUM **modulusp,
>                    BIGNUM **r_squaredp)

If you don't mind, you could change 'e' to 'exp' or 'exponent' in the
same patch.

>  {
>         BIGNUM *big1, *big2, *big32, *big2_32;
> @@ -286,6 +333,9 @@ int rsa_get_params(RSA *key, uint32_t *n0_invp, BIGNUM **modulusp,
>                 return -ENOMEM;
>         }
>
> +       if (0 != rsa_get_e(key, e))
> +               ret = -1;
> +
>         if (!BN_copy(n, key->n) || !BN_set_word(big1, 1L) ||
>             !BN_set_word(big2, 2L) || !BN_set_word(big32, 32L))
>                 ret = -1;
> @@ -386,6 +436,7 @@ static int fdt_add_bignum(void *blob, int noffset, const char *prop_name,
>  int rsa_add_verify_data(struct image_sign_info *info, void *keydest)
>  {
>         BIGNUM *modulus, *r_squared;
> +       uint64_t e;

And here.

>         uint32_t n0_inv;
>         int parent, node;
>         char name[100];
> @@ -397,7 +448,7 @@ int rsa_add_verify_data(struct image_sign_info *info, void *keydest)
>         ret = rsa_get_pub_key(info->keydir, info->keyname, &rsa);
>         if (ret)
>                 return ret;
> -       ret = rsa_get_params(rsa, &n0_inv, &modulus, &r_squared);
> +       ret = rsa_get_params(rsa, &e, &n0_inv, &modulus, &r_squared);
>         if (ret)
>                 return ret;
>         bits = BN_num_bits(modulus);
> @@ -431,6 +482,7 @@ int rsa_add_verify_data(struct image_sign_info *info, void *keydest)
>                                  info->keyname);
>         ret |= fdt_setprop_u32(keydest, node, "rsa,num-bits", bits);
>         ret |= fdt_setprop_u32(keydest, node, "rsa,n0-inverse", n0_inv);
> +       ret |= fdt_setprop_u64(keydest, node, "rsa,exponent", e);
>         ret |= fdt_add_bignum(keydest, node, "rsa,modulus", modulus, bits);
>         ret |= fdt_add_bignum(keydest, node, "rsa,r-squared", r_squared, bits);
>         ret |= fdt_setprop_string(keydest, node, FIT_ALGO_PROP,
> diff --git a/lib/rsa/rsa-verify.c b/lib/rsa/rsa-verify.c
> index 587da5b..878f92a 100644
> --- a/lib/rsa/rsa-verify.c
> +++ b/lib/rsa/rsa-verify.c
> @@ -26,6 +26,9 @@
>  #define get_unaligned_be32(a) fdt32_to_cpu(*(uint32_t *)a)
>  #define put_unaligned_be32(a, b) (*(uint32_t *)(b) = cpu_to_fdt32(a))
>
> +/* The default public exponent is 65537 */
> +#define RSA_DEFAULT_PUBEXP     0x10001

Then why use hex? Can we decide on either decimal or hex?

> +
>  /**
>   * subtract_modulus() - subtract modulus from the given value
>   *
> @@ -123,6 +126,42 @@ static void montgomery_mul(const struct rsa_public_key *key,
>  }
>
>  /**
> + * num_pub_exponent_bits() - Number of bits in the public exponent
> + *
> + * @key:       RSA key
> + * @k:         Storage for the number of public exponent bits
> + */
> +static int num_public_exponent_bits(const struct rsa_public_key *key, int *k)

s/k/keyp/ or something like that.

Also is this function able to just use ffs() or similar?

> +{
> +       uint64_t e;

s/e/exponent/ or exp

> +       const uint nbits = (sizeof(e) * 8);
> +
> +       *k = 0;

How about a local variable for this instead of using *k everywhere?

> +       e = key->exponent;
> +
> +       if (!e)
> +               return 0;
> +
> +       for (*k = 1; *k < nbits + 1; ++*k)
> +               if (!(e >>= 1))
> +                       return 0;
> +
> +       return -EINVAL;
> +}
> +
> +/**
> + * is_public_exponent_bit_set() - Check if a bit in the public exponent is set
> + *
> + * @key:       RSA key
> + * @pos:       The bit position to check
> + */
> +static int is_public_exponent_bit_set(const struct rsa_public_key *key,
> +               int pos)
> +{
> +       return key->exponent & (1 << pos);

If this is 64-but, then perhaps 1ULL << pos

> +}
> +
> +/**
>   * pow_mod() - in-place public exponentiation
>   *
>   * @key:       RSA key
> @@ -132,6 +171,7 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>  {
>         uint32_t *result, *ptr;
>         uint i;
> +       int j, k;
>
>         /* Sanity check for stack size - key->len is in 32-bit words */
>         if (key->len > RSA_MAX_KEY_BITS / 32) {
> @@ -141,18 +181,49 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>         }
>
>         uint32_t val[key->len], acc[key->len], tmp[key->len];
> +       uint32_t a_scaled[key->len];
>         result = tmp;  /* Re-use location. */
>
>         /* Convert from big endian byte array to little endian word array. */
>         for (i = 0, ptr = inout + key->len - 1; i < key->len; i++, ptr--)
>                 val[i] = get_unaligned_be32(ptr);
>
> -       montgomery_mul(key, acc, val, key->rr);  /* axx = a * RR / R mod M */
> -       for (i = 0; i < 16; i += 2) {
> -               montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod M */
> -               montgomery_mul(key, acc, tmp, tmp); /* acc = tmp^2 / R mod M */
> +       if (0 != num_public_exponent_bits(key, &k))
> +               return -EINVAL;
> +
> +       if (k < 2) {
> +               debug("Public exponent is too short (%d bits, minimum 2)\n",
> +                     k);
> +               return -EINVAL;
> +       }
> +
> +       if (!is_public_exponent_bit_set(key, k - 1) ||
> +           !is_public_exponent_bit_set(key, 0)) {
> +               debug("Invalid RSA public exponent 0x%llx.\n", key->exponent);

What is invalid about it? Perhaps expand the message?

> +               return -EINVAL;
> +       }
> +
> +       /* the bit at e[k-1] is 1 by definition, so start with: C := M */
> +       montgomery_mul(key, acc, val, key->rr); /* acc = a * RR / R mod n */
> +       /* retain scaled version for intermediate use */

Join these two comments, also the style should be:

/*
 * The bit at ...
 * ...
 */

> +       memcpy(a_scaled, acc, key->len * 4);

What is 4? is it sizeof(uint32_t?). I wonder if we could replace the
line immediately above and remove this memcpy()?

For example, if you did:

       montgomery_mul(key, result, val, key->rr); /* acc = a * RR / R mod n */

> +
> +       for (j = k - 2; j > 0; --j) {
> +               montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
> +
> +               if (is_public_exponent_bit_set(key, j)) {
> +                       /* acc = tmp * val / R mod n */
> +                       montgomery_mul(key, acc, tmp, a_scaled);
> +               } else {
> +                       /* e[j] == 0, copy tmp back to acc for next operation */
> +                       memcpy(acc, tmp, key->len * 4);
> +               }
>         }
> -       montgomery_mul(key, result, acc, val);  /* result = XX * a / R mod M */
> +
> +       /* the bit at e[0] is always 1 */
> +       montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
> +       montgomery_mul(key, acc, tmp, val); /* acc = tmp * a / R mod M */
> +       memcpy(result, acc, key->len * 4);

Do you need this special case, or if bit 0 of e is always 1, perhaps
you can just extend the loop by one more step?

>
>         /* Make sure result < mod; result is at most 1x mod too large. */
>         if (greater_equal_modulus(key, result))
> @@ -229,6 +300,8 @@ static int rsa_verify_with_keynode(struct image_sign_info *info,
>         const void *blob = info->fdt_blob;
>         struct rsa_public_key key;
>         const void *modulus, *rr;
> +       const uint64_t *pe;

s/pe/public_exponent/ or similar?

> +       int length;
>         int ret;
>
>         if (node < 0) {
> @@ -241,6 +314,11 @@ static int rsa_verify_with_keynode(struct image_sign_info *info,
>         }
>         key.len = fdtdec_get_int(blob, node, "rsa,num-bits", 0);
>         key.n0inv = fdtdec_get_int(blob, node, "rsa,n0-inverse", 0);
> +       pe = fdt_getprop(blob, node, "rsa,exponent", &length);
> +       if (!pe || length < sizeof(*pe))
> +               key.exponent = RSA_DEFAULT_PUBEXP;
> +       else
> +               key.exponent = fdt64_to_cpu(*pe);
>         modulus = fdt_getprop(blob, node, "rsa,modulus", NULL);
>         rr = fdt_getprop(blob, node, "rsa,r-squared", NULL);
>         if (!key.len || !modulus || !rr) {
> diff --git a/test/vboot/vboot_test.sh b/test/vboot/vboot_test.sh
> index 3c6efa7..885e869 100755
> --- a/test/vboot/vboot_test.sh
> +++ b/test/vboot/vboot_test.sh
> @@ -14,6 +14,7 @@ set -e
>  run_uboot() {
>         echo -n "Test Verified Boot Run: $1: "
>         ${uboot} -d sandbox-u-boot.dtb >${tmp} -c '
> +sb bind 0 test.fit;
>  sb load host 0 100 test.fit;
>  fdt addr 100;
>  bootm 100;
> @@ -54,8 +55,16 @@ echo ${mkimage} -D "${dtc}"
>  echo "Build keys"
>  mkdir -p ${keys}
>
> +PUBLIC_EXPONENT=${1}
> +
> +if [ x"${PUBLIC_EXPONENT}" = x"" ]; then
> +       PUBLIC_EXPONENT=65537
> +fi

Do we need this, or can we just do:

   if [ -n "${PUBLIC_EXPONENT}" ]; then

> +
>  # Create an RSA key pair
> -openssl genrsa -F4 -out ${keys}/dev.key 2048 2>/dev/null
> +openssl genpkey -algorithm RSA -out ${keys}/dev.key \
> +    -pkeyopt rsa_keygen_bits:2048 \
> +    -pkeyopt rsa_keygen_pubexp:${PUBLIC_EXPONENT} 2>/dev/null
>
>  # Create a certificate containing the public key
>  openssl req -batch -new -x509 -key ${keys}/dev.key -out ${keys}/dev.crt
> --
> 2.0.0.rc0
>

Regards,
Simon
Michael van der Westhuizen May 30, 2014, 8:47 p.m. UTC | #2
Hi Simon,

Thanks for the feedback.

I'll take care of the nits and look into removing some special casing.

On 30 May 2014, at 9:04 PM, Simon Glass <sjg@chromium.org> wrote:

> Hi Michael,
> 
<snip>
>> 
>> /**
>> + * num_pub_exponent_bits() - Number of bits in the public exponent
>> + *
>> + * @key:       RSA key
>> + * @k:         Storage for the number of public exponent bits
>> + */
>> +static int num_public_exponent_bits(const struct rsa_public_key *key, int *k)
> 
> s/k/keyp/ or something like that.
> 
> Also is this function able to just use ffs() or similar?

flsll() yes, but that's not portable to Linux.

> 
<snip>

>> +}
>> +
>> +/**
>>  * pow_mod() - in-place public exponentiation
>>  *
>>  * @key:       RSA key
>> @@ -132,6 +171,7 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>> {
>>        uint32_t *result, *ptr;
>>        uint i;
>> +       int j, k;
>> 
>>        /* Sanity check for stack size - key->len is in 32-bit words */
>>        if (key->len > RSA_MAX_KEY_BITS / 32) {
>> @@ -141,18 +181,49 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>>        }
>> 
>>        uint32_t val[key->len], acc[key->len], tmp[key->len];
>> +       uint32_t a_scaled[key->len];
>>        result = tmp;  /* Re-use location. */
>> 
>>        /* Convert from big endian byte array to little endian word array. */
>>        for (i = 0, ptr = inout + key->len - 1; i < key->len; i++, ptr--)
>>                val[i] = get_unaligned_be32(ptr);
>> 
>> -       montgomery_mul(key, acc, val, key->rr);  /* axx = a * RR / R mod M */
>> -       for (i = 0; i < 16; i += 2) {
>> -               montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod M */
>> -               montgomery_mul(key, acc, tmp, tmp); /* acc = tmp^2 / R mod M */
>> +       if (0 != num_public_exponent_bits(key, &k))
>> +               return -EINVAL;
>> +
>> +       if (k < 2) {
>> +               debug("Public exponent is too short (%d bits, minimum 2)\n",
>> +                     k);
>> +               return -EINVAL;
>> +       }
>> +
>> +       if (!is_public_exponent_bit_set(key, k - 1) ||
>> +           !is_public_exponent_bit_set(key, 0)) {
>> +               debug("Invalid RSA public exponent 0x%llx.\n", key->exponent);
> 
> What is invalid about it? Perhaps expand the message?
> 
>> +               return -EINVAL;
>> +       }
>> +
>> +       /* the bit at e[k-1] is 1 by definition, so start with: C := M */
>> +       montgomery_mul(key, acc, val, key->rr); /* acc = a * RR / R mod n */
>> +       /* retain scaled version for intermediate use */
> 
> Join these two comments, also the style should be:
> 
> /*
> * The bit at ...
> * ...
> */
> 
>> +       memcpy(a_scaled, acc, key->len * 4);
> 
> What is 4? is it sizeof(uint32_t?). I wonder if we could replace the
> line immediately above and remove this memcpy()?
> 
> For example, if you did:
> 
>       montgomery_mul(key, result, val, key->rr); /* acc = a * RR / R mod n */

Yes, it's sizeof(uint32_t) - that would probably be a good thing to spell out too.

result points to tmp, which is going to be repeatedly overwritten in the loop, but we need a_scaled to stay constant (as the result of the first montgomery_mul) throughout... or did I misunderstand your point?

> 
>> +
>> +       for (j = k - 2; j > 0; --j) {
>> +               montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
>> +
>> +               if (is_public_exponent_bit_set(key, j)) {
>> +                       /* acc = tmp * val / R mod n */
>> +                       montgomery_mul(key, acc, tmp, a_scaled);
>> +               } else {
>> +                       /* e[j] == 0, copy tmp back to acc for next operation */
>> +                       memcpy(acc, tmp, key->len * 4);
>> +               }
>>        }
>> -       montgomery_mul(key, result, acc, val);  /* result = XX * a / R mod M */
>> +
>> +       /* the bit at e[0] is always 1 */
>> +       montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
>> +       montgomery_mul(key, acc, tmp, val); /* acc = tmp * a / R mod M */
>> +       memcpy(result, acc, key->len * 4);


<snip>

Michael
Simon Glass May 30, 2014, 8:50 p.m. UTC | #3
Hi Michael,

On 30 May 2014 14:47, Michael van der Westhuizen
<michael@smart-africa.com> wrote:
> Hi Simon,
>
> Thanks for the feedback.
>
> I'll take care of the nits and look into removing some special casing.
>
> On 30 May 2014, at 9:04 PM, Simon Glass <sjg@chromium.org> wrote:
>
>> Hi Michael,
>>
> <snip>
>>>
>>> /**
>>> + * num_pub_exponent_bits() - Number of bits in the public exponent
>>> + *
>>> + * @key:       RSA key
>>> + * @k:         Storage for the number of public exponent bits
>>> + */
>>> +static int num_public_exponent_bits(const struct rsa_public_key *key, int *k)
>>
>> s/k/keyp/ or something like that.
>>
>> Also is this function able to just use ffs() or similar?
>
> flsll() yes, but that's not portable to Linux.

Does that matter?

>
>>
> <snip>
>
>>> +}
>>> +
>>> +/**
>>>  * pow_mod() - in-place public exponentiation
>>>  *
>>>  * @key:       RSA key
>>> @@ -132,6 +171,7 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>>> {
>>>        uint32_t *result, *ptr;
>>>        uint i;
>>> +       int j, k;
>>>
>>>        /* Sanity check for stack size - key->len is in 32-bit words */
>>>        if (key->len > RSA_MAX_KEY_BITS / 32) {
>>> @@ -141,18 +181,49 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>>>        }
>>>
>>>        uint32_t val[key->len], acc[key->len], tmp[key->len];
>>> +       uint32_t a_scaled[key->len];
>>>        result = tmp;  /* Re-use location. */
>>>
>>>        /* Convert from big endian byte array to little endian word array. */
>>>        for (i = 0, ptr = inout + key->len - 1; i < key->len; i++, ptr--)
>>>                val[i] = get_unaligned_be32(ptr);
>>>
>>> -       montgomery_mul(key, acc, val, key->rr);  /* axx = a * RR / R mod M */
>>> -       for (i = 0; i < 16; i += 2) {
>>> -               montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod M */
>>> -               montgomery_mul(key, acc, tmp, tmp); /* acc = tmp^2 / R mod M */
>>> +       if (0 != num_public_exponent_bits(key, &k))
>>> +               return -EINVAL;
>>> +
>>> +       if (k < 2) {
>>> +               debug("Public exponent is too short (%d bits, minimum 2)\n",
>>> +                     k);
>>> +               return -EINVAL;
>>> +       }
>>> +
>>> +       if (!is_public_exponent_bit_set(key, k - 1) ||
>>> +           !is_public_exponent_bit_set(key, 0)) {
>>> +               debug("Invalid RSA public exponent 0x%llx.\n", key->exponent);
>>
>> What is invalid about it? Perhaps expand the message?
>>
>>> +               return -EINVAL;
>>> +       }
>>> +
>>> +       /* the bit at e[k-1] is 1 by definition, so start with: C := M */
>>> +       montgomery_mul(key, acc, val, key->rr); /* acc = a * RR / R mod n */
>>> +       /* retain scaled version for intermediate use */
>>
>> Join these two comments, also the style should be:
>>
>> /*
>> * The bit at ...
>> * ...
>> */
>>
>>> +       memcpy(a_scaled, acc, key->len * 4);
>>
>> What is 4? is it sizeof(uint32_t?). I wonder if we could replace the
>> line immediately above and remove this memcpy()?
>>
>> For example, if you did:
>>
>>       montgomery_mul(key, result, val, key->rr); /* acc = a * RR / R mod n */
>
> Yes, it's sizeof(uint32_t) - that would probably be a good thing to spell out too.
>
> result points to tmp, which is going to be repeatedly overwritten in the loop, but we need a_scaled to stay constant (as the result of the first montgomery_mul) throughout... or did I misunderstand your point?

OK I see thanks. Perhaps add a new temporary instead of using memcpy()?

>
>>
>>> +
>>> +       for (j = k - 2; j > 0; --j) {
>>> +               montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
>>> +
>>> +               if (is_public_exponent_bit_set(key, j)) {
>>> +                       /* acc = tmp * val / R mod n */
>>> +                       montgomery_mul(key, acc, tmp, a_scaled);
>>> +               } else {
>>> +                       /* e[j] == 0, copy tmp back to acc for next operation */
>>> +                       memcpy(acc, tmp, key->len * 4);
>>> +               }
>>>        }
>>> -       montgomery_mul(key, result, acc, val);  /* result = XX * a / R mod M */
>>> +
>>> +       /* the bit at e[0] is always 1 */
>>> +       montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
>>> +       montgomery_mul(key, acc, tmp, val); /* acc = tmp * a / R mod M */
>>> +       memcpy(result, acc, key->len * 4);
>
>
> <snip>
>

Regards,
Simon
Michael van der Westhuizen May 30, 2014, 9:03 p.m. UTC | #4
Hi Simon,

On 30 May 2014, at 10:50 PM, Simon Glass <sjg@chromium.org> wrote:

> Hi Michael,
> 
> On 30 May 2014 14:47, Michael van der Westhuizen
> <michael@smart-africa.com> wrote:
>> Hi Simon,
>> 
>> Thanks for the feedback.
>> 
>> I'll take care of the nits and look into removing some special casing.
>> 
>> On 30 May 2014, at 9:04 PM, Simon Glass <sjg@chromium.org> wrote:
>> 
>>> Hi Michael,
>>> 
>> <snip>
>>>> 
>>>> /**
>>>> + * num_pub_exponent_bits() - Number of bits in the public exponent
>>>> + *
>>>> + * @key:       RSA key
>>>> + * @k:         Storage for the number of public exponent bits
>>>> + */
>>>> +static int num_public_exponent_bits(const struct rsa_public_key *key, int *k)
>>> 
>>> s/k/keyp/ or something like that.
>>> 
>>> Also is this function able to just use ffs() or similar?
>> 
>> flsll() yes, but that's not portable to Linux.
> 
> Does that matter?

This code compiles on the host, so unfortunately yes.  That's the same reason I had to work around the lack of handy *_u64 fdt helpers when reading the public exponent.

> 
>> 
>>> 
>> <snip>
>> 
>>>> +}
>>>> +
>>>> +/**
>>>> * pow_mod() - in-place public exponentiation
>>>> *
>>>> * @key:       RSA key
>>>> @@ -132,6 +171,7 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>>>> {
>>>>       uint32_t *result, *ptr;
>>>>       uint i;
>>>> +       int j, k;
>>>> 
>>>>       /* Sanity check for stack size - key->len is in 32-bit words */
>>>>       if (key->len > RSA_MAX_KEY_BITS / 32) {
>>>> @@ -141,18 +181,49 @@ static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
>>>>       }
>>>> 
>>>>       uint32_t val[key->len], acc[key->len], tmp[key->len];
>>>> +       uint32_t a_scaled[key->len];
>>>>       result = tmp;  /* Re-use location. */
>>>> 
>>>>       /* Convert from big endian byte array to little endian word array. */
>>>>       for (i = 0, ptr = inout + key->len - 1; i < key->len; i++, ptr--)
>>>>               val[i] = get_unaligned_be32(ptr);
>>>> 
>>>> -       montgomery_mul(key, acc, val, key->rr);  /* axx = a * RR / R mod M */
>>>> -       for (i = 0; i < 16; i += 2) {
>>>> -               montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod M */
>>>> -               montgomery_mul(key, acc, tmp, tmp); /* acc = tmp^2 / R mod M */
>>>> +       if (0 != num_public_exponent_bits(key, &k))
>>>> +               return -EINVAL;
>>>> +
>>>> +       if (k < 2) {
>>>> +               debug("Public exponent is too short (%d bits, minimum 2)\n",
>>>> +                     k);
>>>> +               return -EINVAL;
>>>> +       }
>>>> +
>>>> +       if (!is_public_exponent_bit_set(key, k - 1) ||
>>>> +           !is_public_exponent_bit_set(key, 0)) {
>>>> +               debug("Invalid RSA public exponent 0x%llx.\n", key->exponent);
>>> 
>>> What is invalid about it? Perhaps expand the message?
>>> 
>>>> +               return -EINVAL;
>>>> +       }
>>>> +
>>>> +       /* the bit at e[k-1] is 1 by definition, so start with: C := M */
>>>> +       montgomery_mul(key, acc, val, key->rr); /* acc = a * RR / R mod n */
>>>> +       /* retain scaled version for intermediate use */
>>> 
>>> Join these two comments, also the style should be:
>>> 
>>> /*
>>> * The bit at ...
>>> * ...
>>> */
>>> 
>>>> +       memcpy(a_scaled, acc, key->len * 4);
>>> 
>>> What is 4? is it sizeof(uint32_t?). I wonder if we could replace the
>>> line immediately above and remove this memcpy()?
>>> 
>>> For example, if you did:
>>> 
>>>      montgomery_mul(key, result, val, key->rr); /* acc = a * RR / R mod n */
>> 
>> Yes, it's sizeof(uint32_t) - that would probably be a good thing to spell out too.
>> 
>> result points to tmp, which is going to be repeatedly overwritten in the loop, but we need a_scaled to stay constant (as the result of the first montgomery_mul) throughout... or did I misunderstand your point?
> 
> OK I see thanks. Perhaps add a new temporary instead of using memcpy()?

And set it up using montgomery_mul?  Would that not be more expensive than a memcpy?

Thanks,
Michael
Simon Glass May 30, 2014, 9:11 p.m. UTC | #5
Hi Michael,

On 30 May 2014 15:03, Michael van der Westhuizen
<michael@smart-africa.com> wrote:
> Hi Simon,
>
> On 30 May 2014, at 10:50 PM, Simon Glass <sjg@chromium.org> wrote:
>
> Hi Michael,
>
> On 30 May 2014 14:47, Michael van der Westhuizen
> <michael@smart-africa.com> wrote:
>
> Hi Simon,
>
> Thanks for the feedback.
>
> I'll take care of the nits and look into removing some special casing.
>
> On 30 May 2014, at 9:04 PM, Simon Glass <sjg@chromium.org> wrote:
>
> Hi Michael,
>
> <snip>
>
>
> /**
> + * num_pub_exponent_bits() - Number of bits in the public exponent
> + *
> + * @key:       RSA key
> + * @k:         Storage for the number of public exponent bits
> + */
> +static int num_public_exponent_bits(const struct rsa_public_key *key, int
> *k)
>
>
> s/k/keyp/ or something like that.
>
> Also is this function able to just use ffs() or similar?
>
>
> flsll() yes, but that's not portable to Linux.
>
>
> Does that matter?
>
>
> This code compiles on the host, so unfortunately yes.  That's the same
> reason I had to work around the lack of handy *_u64 fdt helpers when reading
> the public exponent.

OK, although Linux might have replacements. But if not, perhaps add a
comment as to why you need the helper function.

[snip]

> For example, if you did:
>
>      montgomery_mul(key, result, val, key->rr); /* acc = a * RR / R mod n */
>
>
> Yes, it's sizeof(uint32_t) - that would probably be a good thing to spell
> out too.
>
> result points to tmp, which is going to be repeatedly overwritten in the
> loop, but we need a_scaled to stay constant (as the result of the first
> montgomery_mul) throughout... or did I misunderstand your point?
>
>
> OK I see thanks. Perhaps add a new temporary instead of using memcpy()?
>
>
> And set it up using montgomery_mul?  Would that not be more expensive than a
> memcpy?

On the host? Not sure we care about that. I was really just wondering
if an assignment is better than a memcpy() which makes assumptions
about the format.

Regards,
Simon
Michael van der Westhuizen May 30, 2014, 9:20 p.m. UTC | #6
Hi Simon,

On 30 May 2014, at 11:11 PM, Simon Glass <sjg@chromium.org> wrote:

<snip>
>> This code compiles on the host, so unfortunately yes.  That's the same
>> reason I had to work around the lack of handy *_u64 fdt helpers when reading
>> the public exponent.
> 
> OK, although Linux might have replacements. But if not, perhaps add a
> comment as to why you need the helper function.

Will do.

> 
> [snip]
> 
>> For example, if you did:
>> 
>>     montgomery_mul(key, result, val, key->rr); /* acc = a * RR / R mod n */
>> 
>> 
>> Yes, it's sizeof(uint32_t) - that would probably be a good thing to spell
>> out too.
>> 
>> result points to tmp, which is going to be repeatedly overwritten in the
>> loop, but we need a_scaled to stay constant (as the result of the first
>> montgomery_mul) throughout... or did I misunderstand your point?
>> 
>> 
>> OK I see thanks. Perhaps add a new temporary instead of using memcpy()?
>> 
>> 
>> And set it up using montgomery_mul?  Would that not be more expensive than a
>> memcpy?
> 
> On the host? Not sure we care about that. I was really just wondering
> if an assignment is better than a memcpy() which makes assumptions
> about the format.

In theory it should optimise to the same code, but I'll run tests with assignment to make sure it works as expected.

Michael
diff mbox

Patch

diff --git a/doc/uImage.FIT/signature.txt b/doc/uImage.FIT/signature.txt
index 9502037..cc314a3 100644
--- a/doc/uImage.FIT/signature.txt
+++ b/doc/uImage.FIT/signature.txt
@@ -66,7 +66,8 @@  Creating an RSA key and certificate
 -----------------------------------
 To create a new public key, size 2048 bits:
 
-$ openssl genrsa -F4 -out keys/dev.key 2048
+$ openssl genpkey -algorithm RSA -out keys/dev.key \
+    -pkeyopt rsa_keygen_bits:2048 -pkeyopt rsa_keygen_pubexp:65537
 
 To create a certificate for this:
 
@@ -159,6 +160,7 @@  For RSA the following are mandatory:
 
 - rsa,num-bits: Number of key bits (e.g. 2048)
 - rsa,modulus: Modulus (N) as a big-endian multi-word integer
+- rsa,exponent: Public exponent (E) as a 64 bit unsigned integer
 - rsa,r-squared: (2^num-bits)^2 as a big-endian multi-word integer
 - rsa,n0-inverse: -1 / modulus[0] mod 2^32
 
diff --git a/include/rsa.h b/include/rsa.h
index a5680ab..b84d82a 100644
--- a/include/rsa.h
+++ b/include/rsa.h
@@ -27,6 +27,7 @@  struct rsa_public_key {
 	uint32_t n0inv;		/* -1 / modulus[0] mod 2^32 */
 	uint32_t *modulus;	/* modulus as little endian array */
 	uint32_t *rr;		/* R^2 as little endian array */
+	uint64_t exponent;	/* public exponent */
 };
 
 #if IMAGE_ENABLE_SIGN
diff --git a/lib/rsa/rsa-sign.c b/lib/rsa/rsa-sign.c
index ca8c120..f20ae28 100644
--- a/lib/rsa/rsa-sign.c
+++ b/lib/rsa/rsa-sign.c
@@ -261,9 +261,56 @@  err_priv:
 }
 
 /*
+ * rsa_get_e(): - Get the public exponent from an RSA public key
+ */
+static int rsa_get_e(RSA *key, uint64_t *e)
+{
+	int ret;
+	BIGNUM *bn_te;
+	uint64_t te;
+
+	ret = -1;
+	bn_te = NULL;
+
+	if (!e)
+		goto cleanup;
+
+	if (BN_num_bits(key->e) > 64)
+		goto cleanup;
+
+	*e = BN_get_word(key->e);
+
+	if (BN_num_bits(key->e) < 33) {
+		ret = 0;
+		goto cleanup;
+	}
+
+	bn_te = BN_dup(key->e);
+	if (!bn_te)
+		goto cleanup;
+
+	if (!BN_rshift(bn_te, bn_te, 32))
+		goto cleanup;
+
+	if (!BN_mask_bits(bn_te, 32))
+		goto cleanup;
+
+	te = BN_get_word(bn_te);
+	te <<= 32;
+	*e |= te;
+	ret = 0;
+
+cleanup:
+	if (bn_te)
+		BN_free(bn_te);
+
+	return ret;
+}
+
+/*
  * rsa_get_params(): - Get the important parameters of an RSA public key
  */
-int rsa_get_params(RSA *key, uint32_t *n0_invp, BIGNUM **modulusp,
+int rsa_get_params(RSA *key, uint64_t *e, uint32_t *n0_invp, BIGNUM **modulusp,
 		   BIGNUM **r_squaredp)
 {
 	BIGNUM *big1, *big2, *big32, *big2_32;
@@ -286,6 +333,9 @@  int rsa_get_params(RSA *key, uint32_t *n0_invp, BIGNUM **modulusp,
 		return -ENOMEM;
 	}
 
+	if (0 != rsa_get_e(key, e))
+		ret = -1;
+
 	if (!BN_copy(n, key->n) || !BN_set_word(big1, 1L) ||
 	    !BN_set_word(big2, 2L) || !BN_set_word(big32, 32L))
 		ret = -1;
@@ -386,6 +436,7 @@  static int fdt_add_bignum(void *blob, int noffset, const char *prop_name,
 int rsa_add_verify_data(struct image_sign_info *info, void *keydest)
 {
 	BIGNUM *modulus, *r_squared;
+	uint64_t e;
 	uint32_t n0_inv;
 	int parent, node;
 	char name[100];
@@ -397,7 +448,7 @@  int rsa_add_verify_data(struct image_sign_info *info, void *keydest)
 	ret = rsa_get_pub_key(info->keydir, info->keyname, &rsa);
 	if (ret)
 		return ret;
-	ret = rsa_get_params(rsa, &n0_inv, &modulus, &r_squared);
+	ret = rsa_get_params(rsa, &e, &n0_inv, &modulus, &r_squared);
 	if (ret)
 		return ret;
 	bits = BN_num_bits(modulus);
@@ -431,6 +482,7 @@  int rsa_add_verify_data(struct image_sign_info *info, void *keydest)
 				 info->keyname);
 	ret |= fdt_setprop_u32(keydest, node, "rsa,num-bits", bits);
 	ret |= fdt_setprop_u32(keydest, node, "rsa,n0-inverse", n0_inv);
+	ret |= fdt_setprop_u64(keydest, node, "rsa,exponent", e);
 	ret |= fdt_add_bignum(keydest, node, "rsa,modulus", modulus, bits);
 	ret |= fdt_add_bignum(keydest, node, "rsa,r-squared", r_squared, bits);
 	ret |= fdt_setprop_string(keydest, node, FIT_ALGO_PROP,
diff --git a/lib/rsa/rsa-verify.c b/lib/rsa/rsa-verify.c
index 587da5b..878f92a 100644
--- a/lib/rsa/rsa-verify.c
+++ b/lib/rsa/rsa-verify.c
@@ -26,6 +26,9 @@ 
 #define get_unaligned_be32(a) fdt32_to_cpu(*(uint32_t *)a)
 #define put_unaligned_be32(a, b) (*(uint32_t *)(b) = cpu_to_fdt32(a))
 
+/* The default public exponent is 65537 */
+#define RSA_DEFAULT_PUBEXP	0x10001
+
 /**
  * subtract_modulus() - subtract modulus from the given value
  *
@@ -123,6 +126,42 @@  static void montgomery_mul(const struct rsa_public_key *key,
 }
 
 /**
+ * num_pub_exponent_bits() - Number of bits in the public exponent
+ *
+ * @key:	RSA key
+ * @k:		Storage for the number of public exponent bits
+ */
+static int num_public_exponent_bits(const struct rsa_public_key *key, int *k)
+{
+	uint64_t e;
+	const uint nbits = (sizeof(e) * 8);
+
+	*k = 0;
+	e = key->exponent;
+
+	if (!e)
+		return 0;
+
+	for (*k = 1; *k < nbits + 1; ++*k)
+		if (!(e >>= 1))
+			return 0;
+
+	return -EINVAL;
+}
+
+/**
+ * is_public_exponent_bit_set() - Check if a bit in the public exponent is set
+ *
+ * @key:	RSA key
+ * @pos:	The bit position to check
+ */
+static int is_public_exponent_bit_set(const struct rsa_public_key *key,
+		int pos)
+{
+	return key->exponent & (1 << pos);
+}
+
+/**
  * pow_mod() - in-place public exponentiation
  *
  * @key:	RSA key
@@ -132,6 +171,7 @@  static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
 {
 	uint32_t *result, *ptr;
 	uint i;
+	int j, k;
 
 	/* Sanity check for stack size - key->len is in 32-bit words */
 	if (key->len > RSA_MAX_KEY_BITS / 32) {
@@ -141,18 +181,49 @@  static int pow_mod(const struct rsa_public_key *key, uint32_t *inout)
 	}
 
 	uint32_t val[key->len], acc[key->len], tmp[key->len];
+	uint32_t a_scaled[key->len];
 	result = tmp;  /* Re-use location. */
 
 	/* Convert from big endian byte array to little endian word array. */
 	for (i = 0, ptr = inout + key->len - 1; i < key->len; i++, ptr--)
 		val[i] = get_unaligned_be32(ptr);
 
-	montgomery_mul(key, acc, val, key->rr);  /* axx = a * RR / R mod M */
-	for (i = 0; i < 16; i += 2) {
-		montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod M */
-		montgomery_mul(key, acc, tmp, tmp); /* acc = tmp^2 / R mod M */
+	if (0 != num_public_exponent_bits(key, &k))
+		return -EINVAL;
+
+	if (k < 2) {
+		debug("Public exponent is too short (%d bits, minimum 2)\n",
+		      k);
+		return -EINVAL;
+	}
+
+	if (!is_public_exponent_bit_set(key, k - 1) ||
+	    !is_public_exponent_bit_set(key, 0)) {
+		debug("Invalid RSA public exponent 0x%llx.\n", key->exponent);
+		return -EINVAL;
+	}
+
+	/* the bit at e[k-1] is 1 by definition, so start with: C := M */
+	montgomery_mul(key, acc, val, key->rr); /* acc = a * RR / R mod n */
+	/* retain scaled version for intermediate use */
+	memcpy(a_scaled, acc, key->len * 4);
+
+	for (j = k - 2; j > 0; --j) {
+		montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
+
+		if (is_public_exponent_bit_set(key, j)) {
+			/* acc = tmp * val / R mod n */
+			montgomery_mul(key, acc, tmp, a_scaled);
+		} else {
+			/* e[j] == 0, copy tmp back to acc for next operation */
+			memcpy(acc, tmp, key->len * 4);
+		}
 	}
-	montgomery_mul(key, result, acc, val);  /* result = XX * a / R mod M */
+
+	/* the bit at e[0] is always 1 */
+	montgomery_mul(key, tmp, acc, acc); /* tmp = acc^2 / R mod n */
+	montgomery_mul(key, acc, tmp, val); /* acc = tmp * a / R mod M */
+	memcpy(result, acc, key->len * 4);
 
 	/* Make sure result < mod; result is at most 1x mod too large. */
 	if (greater_equal_modulus(key, result))
@@ -229,6 +300,8 @@  static int rsa_verify_with_keynode(struct image_sign_info *info,
 	const void *blob = info->fdt_blob;
 	struct rsa_public_key key;
 	const void *modulus, *rr;
+	const uint64_t *pe;
+	int length;
 	int ret;
 
 	if (node < 0) {
@@ -241,6 +314,11 @@  static int rsa_verify_with_keynode(struct image_sign_info *info,
 	}
 	key.len = fdtdec_get_int(blob, node, "rsa,num-bits", 0);
 	key.n0inv = fdtdec_get_int(blob, node, "rsa,n0-inverse", 0);
+	pe = fdt_getprop(blob, node, "rsa,exponent", &length);
+	if (!pe || length < sizeof(*pe))
+		key.exponent = RSA_DEFAULT_PUBEXP;
+	else
+		key.exponent = fdt64_to_cpu(*pe);
 	modulus = fdt_getprop(blob, node, "rsa,modulus", NULL);
 	rr = fdt_getprop(blob, node, "rsa,r-squared", NULL);
 	if (!key.len || !modulus || !rr) {
diff --git a/test/vboot/vboot_test.sh b/test/vboot/vboot_test.sh
index 3c6efa7..885e869 100755
--- a/test/vboot/vboot_test.sh
+++ b/test/vboot/vboot_test.sh
@@ -14,6 +14,7 @@  set -e
 run_uboot() {
 	echo -n "Test Verified Boot Run: $1: "
 	${uboot} -d sandbox-u-boot.dtb >${tmp} -c '
+sb bind 0 test.fit;
 sb load host 0 100 test.fit;
 fdt addr 100;
 bootm 100;
@@ -54,8 +55,16 @@  echo ${mkimage} -D "${dtc}"
 echo "Build keys"
 mkdir -p ${keys}
 
+PUBLIC_EXPONENT=${1}
+
+if [ x"${PUBLIC_EXPONENT}" = x"" ]; then
+	PUBLIC_EXPONENT=65537
+fi
+
 # Create an RSA key pair
-openssl genrsa -F4 -out ${keys}/dev.key 2048 2>/dev/null
+openssl genpkey -algorithm RSA -out ${keys}/dev.key \
+    -pkeyopt rsa_keygen_bits:2048 \
+    -pkeyopt rsa_keygen_pubexp:${PUBLIC_EXPONENT} 2>/dev/null
 
 # Create a certificate containing the public key
 openssl req -batch -new -x509 -key ${keys}/dev.key -out ${keys}/dev.crt