Message ID | 20191121001121.21854-5-takahiro.akashi@linaro.org |
---|---|
State | Changes Requested |
Delegated to: | Tom Rini |
Headers | show |
Series | rsa: extend rsa_verify() for UEFI secure boot | expand |
On 11/21/19 1:11 AM, AKASHI Takahiro wrote: > In the current implementation of FIT_SIGNATURE, five parameters for > a RSA public key are required while only two of them are essential. > (See rsa-mod-exp.h and uImage.FIT/signature.txt) > This is a result of considering relatively limited computer power > and resources on embedded systems, while such a assumption may not > be quite practical for other use cases. > > In this patch, added is a function, rsa_gen_key_prop(), which will > generate additional parameters for other uses, in particular > UEFI secure boot, on the fly. > > Note: the current code uses some "big number" rouKtines from BearSSL > for the calculation. > > Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> > --- > include/u-boot/rsa-mod-exp.h | 23 ++ > lib/rsa/Kconfig | 1 + > lib/rsa/Makefile | 1 + > lib/rsa/rsa-keyprop.c | 725 +++++++++++++++++++++++++++++++++++ > 4 files changed, 750 insertions(+) > create mode 100644 lib/rsa/rsa-keyprop.c > > diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h > index 8a428c4b6a1a..1da8af1bb83d 100644 > --- a/include/u-boot/rsa-mod-exp.h > +++ b/include/u-boot/rsa-mod-exp.h > @@ -26,6 +26,29 @@ struct key_prop { > uint32_t exp_len; /* Exponent length in number of uint8_t */ > }; > > +/** > + * rsa_gen_key_prop() - Generate key properties of RSA public key > + * @key: Specifies key data in DER format > + * @keylen: Length of @key > + * @prop: Generated key property > + * > + * This function takes a blob of encoded RSA public key data in DER > + * format, parse it and generate all the relevant properties > + * in key_prop structure. > + * Return a pointer to struct key_prop in @prop on success. > + * > + * Return: 0 on success, negative on error > + */ > +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **proc); > + > +/** > + * rsa_free_key_prop() - Free key properties > + * @prop: Pointer to struct key_prop > + * > + * This function frees all the memories allocated by rsa_gen_key_prop(). > + */ > +void rsa_free_key_prop(struct key_prop *prop); > + > /** > * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw > * > diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig > index 71e4c06bf883..d1d6f6cf64a3 100644 > --- a/lib/rsa/Kconfig > +++ b/lib/rsa/Kconfig > @@ -33,6 +33,7 @@ config RSA_VERIFY > config RSA_VERIFY_WITH_PKEY > bool "Execute RSA verification without key parameters from FDT" > depends on RSA > + imply RSA_PUBLIC_KEY_PARSER Do we really need RSA_PUBLIC_KEY_PARSER whenever we use CONFIG_RSA=y? E.g. on a system without the UEFI sub-system? Otherwise simply let RSA_PUBLIC_KEY_PARSER depend on RSA. > help > The standard RSA-signature verification code (FIT_SIGNATURE) uses > pre-calculated key properties, that are stored in fdt blob, in > diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile > index c07305188e0c..14ed3cb4012b 100644 > --- a/lib/rsa/Makefile > +++ b/lib/rsa/Makefile > @@ -6,4 +6,5 @@ > # Wolfgang Denk, DENX Software Engineering, wd@denx.de. > > obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o > +obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o > obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o > diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c > new file mode 100644 > index 000000000000..9464df009343 > --- /dev/null > +++ b/lib/rsa/rsa-keyprop.c > @@ -0,0 +1,725 @@ > +// SPDX-License-Identifier: GPL-2.0+ and MIT > +/* > + * RSA library - generate parameters for a public key > + * > + * Copyright (c) 2019 Linaro Limited > + * Author: AKASHI Takahiro > + * > + * Big number routines in this file come from BearSSL: > + * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> > + */ > + > +#include <common.h> > +#include <image.h> > +#include <malloc.h> > +#include <asm/byteorder.h> > +#include <crypto/internal/rsa.h> > +#include <u-boot/rsa-mod-exp.h> > + > +/** > + * br_dec16be() - Convert 16-bit big-endian integer to native > + * @src: Pointer to data > + * Return: Native-endian integer > + */ > +static unsigned br_dec16be(const void *src) > +{ > + return be16_to_cpup(src); > +} > + > +/** > + * br_dec32be() - Convert 32-bit big-endian integer to native > + * @src: Pointer to data > + * Return: Native-endian integer > + */ > +static uint32_t br_dec32be(const void *src) > +{ > + return be32_to_cpup(src); > +} > + > +/** > + * br_enc32be() - Convert native 32-bit integer to big-endian > + * @dst: Pointer to buffer to store big-endian integer in > + * @x: Native 32-bit integer > + */ > +static void br_enc32be(void *dst, uint32_t x) > +{ > + __be32 tmp; > + > + tmp = cpu_to_be32(x); > + memcpy(dst, &tmp, sizeof(tmp)); > +} > + > +/* from BearSSL's src/inner.h */ > + > +/* > + * Negate a boolean. > + */ > +static uint32_t NOT(uint32_t ctl) > +{ > + return ctl ^ 1; > +} > + > +/* > + * Multiplexer: returns x if ctl == 1, y if ctl == 0. > + */ > +static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y) > +{ > + return y ^ (-ctl & (x ^ y)); > +} > + > +/* > + * Equality check: returns 1 if x == y, 0 otherwise. > + */ > +static uint32_t EQ(uint32_t x, uint32_t y) > +{ > + uint32_t q; > + > + q = x ^ y; > + return NOT((q | -q) >> 31); > +} > + > +/* > + * Inequality check: returns 1 if x != y, 0 otherwise. > + */ > +static uint32_t NEQ(uint32_t x, uint32_t y) > +{ > + uint32_t q; > + > + q = x ^ y; > + return (q | -q) >> 31; We want to minimize the code size of U-Boot. So, please, review this code and remove all of this bogus. Best regards Heinrich > +} > + > +/* > + * Comparison: returns 1 if x > y, 0 otherwise. > + */ > +static uint32_t GT(uint32_t x, uint32_t y) > +{ > + /* > + * If both x < 2^31 and y < 2^31, then y-x will have its high > + * bit set if x > y, cleared otherwise. > + * > + * If either x >= 2^31 or y >= 2^31 (but not both), then the > + * result is the high bit of x. > + * > + * If both x >= 2^31 and y >= 2^31, then we can virtually > + * subtract 2^31 from both, and we are back to the first case. > + * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already > + * fine. > + */ > + uint32_t z; > + > + z = y - x; > + return (z ^ ((x ^ y) & (x ^ z))) >> 31; > +} > + > +/* > + * Compute the bit length of a 32-bit integer. Returned value is between 0 > + * and 32 (inclusive). > + */ > +static uint32_t BIT_LENGTH(uint32_t x) > +{ > + uint32_t k, c; > + > + k = NEQ(x, 0); > + c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4; > + c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3; > + c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2; > + c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1; > + k += GT(x, 0x0001); > + return k; > +} > + > +#define GE(x, y) NOT(GT(y, x)) > +#define LT(x, y) GT(y, x) > +#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y)) > + > +/* > + * Integers 'i32' > + * -------------- > + * > + * The 'i32' functions implement computations on big integers using > + * an internal representation as an array of 32-bit integers. For > + * an array x[]: > + * -- x[0] contains the "announced bit length" of the integer > + * -- x[1], x[2]... contain the value in little-endian order (x[1] > + * contains the least significant 32 bits) > + * > + * Multiplications rely on the elementary 32x32->64 multiplication. > + * > + * The announced bit length specifies the number of bits that are > + * significant in the subsequent 32-bit words. Unused bits in the > + * last (most significant) word are set to 0; subsequent words are > + * uninitialized and need not exist at all. > + * > + * The execution time and memory access patterns of all computations > + * depend on the announced bit length, but not on the actual word > + * values. For modular integers, the announced bit length of any integer > + * modulo n is equal to the actual bit length of n; thus, computations > + * on modular integers are "constant-time" (only the modulus length may > + * leak). > + */ > + > +/* > + * Extract one word from an integer. The offset is counted in bits. > + * The word MUST entirely fit within the word elements corresponding > + * to the announced bit length of a[]. > + */ > +static uint32_t br_i32_word(const uint32_t *a, uint32_t off) > +{ > + size_t u; > + unsigned j; > + > + u = (size_t)(off >> 5) + 1; > + j = (unsigned)off & 31; > + if (j == 0) { > + return a[u]; > + } else { > + return (a[u] >> j) | (a[u + 1] << (32 - j)); > + } > +} > + > +/* from BearSSL's src/int/i32_bitlen.c */ > + > +/* > + * Compute the actual bit length of an integer. The argument x should > + * point to the first (least significant) value word of the integer. > + * The len 'xlen' contains the number of 32-bit words to access. > + * > + * CT: value or length of x does not leak. > + */ > +static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen) > +{ > + uint32_t tw, twk; > + > + tw = 0; > + twk = 0; > + while (xlen -- > 0) { > + uint32_t w, c; > + > + c = EQ(tw, 0); > + w = x[xlen]; > + tw = MUX(c, w, tw); > + twk = MUX(c, (uint32_t)xlen, twk); > + } > + return (twk << 5) + BIT_LENGTH(tw); > +} > + > +/* from BearSSL's src/int/i32_decode.c */ > + > +/* > + * Decode an integer from its big-endian unsigned representation. The > + * "true" bit length of the integer is computed, but all words of x[] > + * corresponding to the full 'len' bytes of the source are set. > + * > + * CT: value or length of x does not leak. > + */ > +static void br_i32_decode(uint32_t *x, const void *src, size_t len) > +{ > + const unsigned char *buf; > + size_t u, v; > + > + buf = src; > + u = len; > + v = 1; > + for (;;) { > + if (u < 4) { > + uint32_t w; > + > + if (u < 2) { > + if (u == 0) { > + break; > + } else { > + w = buf[0]; > + } > + } else { > + if (u == 2) { > + w = br_dec16be(buf); > + } else { > + w = ((uint32_t)buf[0] << 16) > + | br_dec16be(buf + 1); > + } > + } > + x[v ++] = w; > + break; > + } else { > + u -= 4; > + x[v ++] = br_dec32be(buf + u); > + } > + } > + x[0] = br_i32_bit_length(x + 1, v - 1); > +} > + > +/* from BearSSL's src/int/i32_encode.c */ > + > +/* > + * Encode an integer into its big-endian unsigned representation. The > + * output length in bytes is provided (parameter 'len'); if the length > + * is too short then the integer is appropriately truncated; if it is > + * too long then the extra bytes are set to 0. > + */ > +static void br_i32_encode(void *dst, size_t len, const uint32_t *x) > +{ > + unsigned char *buf; > + size_t k; > + > + buf = dst; > + > + /* > + * Compute the announced size of x in bytes; extra bytes are > + * filled with zeros. > + */ > + k = (x[0] + 7) >> 3; > + while (len > k) { > + *buf ++ = 0; > + len --; > + } > + > + /* > + * Now we use k as index within x[]. That index starts at 1; > + * we initialize it to the topmost complete word, and process > + * any remaining incomplete word. > + */ > + k = (len + 3) >> 2; > + switch (len & 3) { > + case 3: > + *buf ++ = x[k] >> 16; > + /* fall through */ > + case 2: > + *buf ++ = x[k] >> 8; > + /* fall through */ > + case 1: > + *buf ++ = x[k]; > + k --; > + } > + > + /* > + * Encode all complete words. > + */ > + while (k > 0) { > + br_enc32be(buf, x[k]); > + k --; > + buf += 4; > + } > +} > + > +/* from BearSSL's src/int/i32_ninv32.c */ > + > +/* > + * Compute -(1/x) mod 2^32. If x is even, then this function returns 0. > + */ > +static uint32_t br_i32_ninv32(uint32_t x) > +{ > + uint32_t y; > + > + y = 2 - x; > + y *= 2 - y * x; > + y *= 2 - y * x; > + y *= 2 - y * x; > + y *= 2 - y * x; > + return MUX(x & 1, -y, 0); > +} > + > +/* from BearSSL's src/int/i32_add.c */ > + > +/* > + * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[] > + * is unmodified, but the carry is still computed and returned. The > + * arrays a[] and b[] MUST have the same announced bit length. > + * > + * a[] and b[] MAY be the same array, but partial overlap is not allowed. > + */ > +static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl) > +{ > + uint32_t cc; > + size_t u, m; > + > + cc = 0; > + m = (a[0] + 63) >> 5; > + for (u = 1; u < m; u ++) { > + uint32_t aw, bw, naw; > + > + aw = a[u]; > + bw = b[u]; > + naw = aw + bw + cc; > + > + /* > + * Carry is 1 if naw < aw. Carry is also 1 if naw == aw > + * AND the carry was already 1. > + */ > + cc = (cc & EQ(naw, aw)) | LT(naw, aw); > + a[u] = MUX(ctl, naw, aw); > + } > + return cc; > +} > + > +/* from BearSSL's src/int/i32_sub.c */ > + > +/* > + * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0, > + * then a[] is unmodified, but the carry is still computed and returned. > + * The arrays a[] and b[] MUST have the same announced bit length. > + * > + * a[] and b[] MAY be the same array, but partial overlap is not allowed. > + */ > +static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl) > +{ > + uint32_t cc; > + size_t u, m; > + > + cc = 0; > + m = (a[0] + 63) >> 5; > + for (u = 1; u < m; u ++) { > + uint32_t aw, bw, naw; > + > + aw = a[u]; > + bw = b[u]; > + naw = aw - bw - cc; > + > + /* > + * Carry is 1 if naw > aw. Carry is 1 also if naw == aw > + * AND the carry was already 1. > + */ > + cc = (cc & EQ(naw, aw)) | GT(naw, aw); > + a[u] = MUX(ctl, naw, aw); > + } > + return cc; > +} > + > +/* from BearSSL's src/int/i32_div32.c */ > + > +/* > + * Constant-time division. The dividend hi:lo is divided by the > + * divisor d; the quotient is returned and the remainder is written > + * in *r. If hi == d, then the quotient does not fit on 32 bits; > + * returned value is thus truncated. If hi > d, returned values are > + * indeterminate. > + */ > +static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r) > +{ > + /* TODO: optimize this */ > + uint32_t q; > + uint32_t ch, cf; > + int k; > + > + q = 0; > + ch = EQ(hi, d); > + hi = MUX(ch, 0, hi); > + for (k = 31; k > 0; k --) { > + int j; > + uint32_t w, ctl, hi2, lo2; > + > + j = 32 - k; > + w = (hi << j) | (lo >> k); > + ctl = GE(w, d) | (hi >> k); > + hi2 = (w - d) >> j; > + lo2 = lo - (d << k); > + hi = MUX(ctl, hi2, hi); > + lo = MUX(ctl, lo2, lo); > + q |= ctl << k; > + } > + cf = GE(lo, d) | hi; > + q |= cf; > + *r = MUX(cf, lo - d, lo); > + return q; > +} > + > +/* > + * Wrapper for br_divrem(); the remainder is returned, and the quotient > + * is discarded. > + */ > +static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d) > +{ > + uint32_t r; > + > + br_divrem(hi, lo, d, &r); > + return r; > +} > + > +/* > + * Wrapper for br_divrem(); the quotient is returned, and the remainder > + * is discarded. > + */ > +static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d) > +{ > + uint32_t r; > + > + return br_divrem(hi, lo, d, &r); > +} > + > +/* from BearSSL's src/int/i32_muladd.c */ > + > +/* > + * Multiply x[] by 2^32 and then add integer z, modulo m[]. This > + * function assumes that x[] and m[] have the same announced bit > + * length, and the announced bit length of m[] matches its true > + * bit length. > + * > + * x[] and m[] MUST be distinct arrays. > + * > + * CT: only the common announced bit length of x and m leaks, not > + * the values of x, z or m. > + */ > +static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) > +{ > + uint32_t m_bitlen; > + size_t u, mlen; > + uint32_t a0, a1, b0, hi, g, q, tb; > + uint32_t chf, clow, under, over; > + uint64_t cc; > + > + /* > + * We can test on the modulus bit length since we accept to > + * leak that length. > + */ > + m_bitlen = m[0]; > + if (m_bitlen == 0) { > + return; > + } > + if (m_bitlen <= 32) { > + x[1] = br_rem(x[1], z, m[1]); > + return; > + } > + mlen = (m_bitlen + 31) >> 5; > + > + /* > + * Principle: we estimate the quotient (x*2^32+z)/m by > + * doing a 64/32 division with the high words. > + * > + * Let: > + * w = 2^32 > + * a = (w*a0 + a1) * w^N + a2 > + * b = b0 * w^N + b2 > + * such that: > + * 0 <= a0 < w > + * 0 <= a1 < w > + * 0 <= a2 < w^N > + * w/2 <= b0 < w > + * 0 <= b2 < w^N > + * a < w*b > + * I.e. the two top words of a are a0:a1, the top word of b is > + * b0, we ensured that b0 is "full" (high bit set), and a is > + * such that the quotient q = a/b fits on one word (0 <= q < w). > + * > + * If a = b*q + r (with 0 <= r < q), we can estimate q by > + * doing an Euclidean division on the top words: > + * a0*w+a1 = b0*u + v (with 0 <= v < w) > + * Then the following holds: > + * 0 <= u <= w > + * u-2 <= q <= u > + */ > + a0 = br_i32_word(x, m_bitlen - 32); > + hi = x[mlen]; > + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); > + x[1] = z; > + a1 = br_i32_word(x, m_bitlen - 32); > + b0 = br_i32_word(m, m_bitlen - 32); > + > + /* > + * We estimate a divisor q. If the quotient returned by br_div() > + * is g: > + * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF. > + * -- Otherwise: > + * -- if g == 0 then we set q = 0; > + * -- otherwise, we set q = g - 1. > + * The properties described above then ensure that the true > + * quotient is q-1, q or q+1. > + */ > + g = br_div(a0, a1, b0); > + q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1)); > + > + /* > + * We subtract q*m from x (with the extra high word of value 'hi'). > + * Since q may be off by 1 (in either direction), we may have to > + * add or subtract m afterwards. > + * > + * The 'tb' flag will be true (1) at the end of the loop if the > + * result is greater than or equal to the modulus (not counting > + * 'hi' or the carry). > + */ > + cc = 0; > + tb = 1; > + for (u = 1; u <= mlen; u ++) { > + uint32_t mw, zw, xw, nxw; > + uint64_t zl; > + > + mw = m[u]; > + zl = MUL(mw, q) + cc; > + cc = (uint32_t)(zl >> 32); > + zw = (uint32_t)zl; > + xw = x[u]; > + nxw = xw - zw; > + cc += (uint64_t)GT(nxw, xw); > + x[u] = nxw; > + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); > + } > + > + /* > + * If we underestimated q, then either cc < hi (one extra bit > + * beyond the top array word), or cc == hi and tb is true (no > + * extra bit, but the result is not lower than the modulus). In > + * these cases we must subtract m once. > + * > + * Otherwise, we may have overestimated, which will show as > + * cc > hi (thus a negative result). Correction is adding m once. > + */ > + chf = (uint32_t)(cc >> 32); > + clow = (uint32_t)cc; > + over = chf | GT(clow, hi); > + under = ~over & (tb | (~chf & LT(clow, hi))); > + br_i32_add(x, m, over); > + br_i32_sub(x, m, under); > +} > + > +/* from BearSSL's src/int/i32_reduce.c */ > + > +/* > + * Reduce an integer (a[]) modulo another (m[]). The result is written > + * in x[] and its announced bit length is set to be equal to that of m[]. > + * > + * x[] MUST be distinct from a[] and m[]. > + * > + * CT: only announced bit lengths leak, not values of x, a or m. > + */ > +static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m) > +{ > + uint32_t m_bitlen, a_bitlen; > + size_t mlen, alen, u; > + > + m_bitlen = m[0]; > + mlen = (m_bitlen + 31) >> 5; > + > + x[0] = m_bitlen; > + if (m_bitlen == 0) { > + return; > + } > + > + /* > + * If the source is shorter, then simply copy all words from a[] > + * and zero out the upper words. > + */ > + a_bitlen = a[0]; > + alen = (a_bitlen + 31) >> 5; > + if (a_bitlen < m_bitlen) { > + memcpy(x + 1, a + 1, alen * sizeof *a); > + for (u = alen; u < mlen; u ++) { > + x[u + 1] = 0; > + } > + return; > + } > + > + /* > + * The source length is at least equal to that of the modulus. > + * We must thus copy N-1 words, and input the remaining words > + * one by one. > + */ > + memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); > + x[mlen] = 0; > + for (u = 1 + alen - mlen; u > 0; u --) { > + br_i32_muladd_small(x, a[u], m); > + } > +} > + > +/** > + * rsa_free_key_prop() - Free key properties > + * @prop: Pointer to struct key_prop > + * > + * This function frees all the memories allocated by rsa_gen_key_prop(). > + */ > +void rsa_free_key_prop(struct key_prop *prop) > +{ > + if (!prop) > + return; > + > + free((void *)prop->modulus); > + free((void *)prop->public_exponent); > + free((void *)prop->rr); > + > + free(prop); > +} > + > +/** > + * rsa_gen_key_prop() - Generate key properties of RSA public key > + * @key: Specifies key data in DER format > + * @keylen: Length of @key > + * @prop: Generated key property > + * > + * This function takes a blob of encoded RSA public key data in DER > + * format, parse it and generate all the relevant properties > + * in key_prop structure. > + * Return a pointer to struct key_prop in @prop on success. > + * > + * Return: 0 on success, negative on error > + */ > +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop) > +{ > + struct rsa_key rsa_key; > + uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL; > + const int max_rsa_size = 4096; > + int rlen, i, ret; > + > + *prop = calloc(sizeof(**prop), 1); > + n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); > + rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); > + rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); > + if (!(*prop) || !n || !rr || !rrtmp) { > + ret = -ENOMEM; > + goto err; > + } > + > + ret = rsa_parse_pub_key(&rsa_key, key, keylen); > + if (ret) > + goto err; > + > + /* modulus */ > + /* removing leading 0's */ > + for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++) > + ; > + (*prop)->num_bits = (rsa_key.n_sz - i) * 8; > + (*prop)->modulus = malloc(rsa_key.n_sz - i); > + if (!(*prop)->modulus) { > + ret = -ENOMEM; > + goto err; > + } > + memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i); > + > + /* exponent */ > + (*prop)->public_exponent = calloc(1, sizeof(uint64_t)); > + if (!(*prop)->public_exponent) { > + ret = -ENOMEM; > + goto err; > + } > + memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t) > + - rsa_key.e_sz, > + rsa_key.e, rsa_key.e_sz); > + (*prop)->exp_len = rsa_key.e_sz; > + > + /* n0 inverse */ > + br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i); > + (*prop)->n0inv = br_i32_ninv32(n[1]); > + > + /* R^2 mod n; R = 2^(num_bits) */ > + rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */ > + rr[0] = 0; > + *(uint8_t *)&rr[0] = (1 << (rlen % 8)); > + for (i = 1; i < (((rlen + 31) >> 5) + 1); i++) > + rr[i] = 0; > + br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1); > + br_i32_reduce(rr, rrtmp, n); > + > + rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */ > + (*prop)->rr = malloc(rlen); > + if (!(*prop)->rr) { > + ret = -ENOMEM; > + goto err; > + } > + br_i32_encode((void *)(*prop)->rr, rlen, rr); > + > + return 0; > + > +err: > + free(n); > + free(rr); > + free(rrtmp); > + rsa_free_key_prop(*prop); > + return ret; > +} >
On 1/8/20 7:07 PM, Heinrich Schuchardt wrote: > > > On 11/21/19 1:11 AM, AKASHI Takahiro wrote: >> In the current implementation of FIT_SIGNATURE, five parameters for >> a RSA public key are required while only two of them are essential. >> (See rsa-mod-exp.h and uImage.FIT/signature.txt) >> This is a result of considering relatively limited computer power >> and resources on embedded systems, while such a assumption may not >> be quite practical for other use cases. >> >> In this patch, added is a function, rsa_gen_key_prop(), which will >> generate additional parameters for other uses, in particular >> UEFI secure boot, on the fly. >> >> Note: the current code uses some "big number" rouKtines from BearSSL >> for the calculation. >> >> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> >> --- >> include/u-boot/rsa-mod-exp.h | 23 ++ >> lib/rsa/Kconfig | 1 + >> lib/rsa/Makefile | 1 + >> lib/rsa/rsa-keyprop.c | 725 +++++++++++++++++++++++++++++++++++ >> 4 files changed, 750 insertions(+) >> create mode 100644 lib/rsa/rsa-keyprop.c >> >> diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h >> index 8a428c4b6a1a..1da8af1bb83d 100644 >> --- a/include/u-boot/rsa-mod-exp.h >> +++ b/include/u-boot/rsa-mod-exp.h >> @@ -26,6 +26,29 @@ struct key_prop { >> uint32_t exp_len; /* Exponent length in number of uint8_t */ >> }; >> >> +/** >> + * rsa_gen_key_prop() - Generate key properties of RSA public key >> + * @key: Specifies key data in DER format >> + * @keylen: Length of @key >> + * @prop: Generated key property >> + * >> + * This function takes a blob of encoded RSA public key data in DER >> + * format, parse it and generate all the relevant properties >> + * in key_prop structure. >> + * Return a pointer to struct key_prop in @prop on success. >> + * >> + * Return: 0 on success, negative on error >> + */ >> +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct >> key_prop **proc); >> + >> +/** >> + * rsa_free_key_prop() - Free key properties >> + * @prop: Pointer to struct key_prop >> + * >> + * This function frees all the memories allocated by rsa_gen_key_prop(). >> + */ >> +void rsa_free_key_prop(struct key_prop *prop); >> + >> /** >> * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw >> * >> diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig >> index 71e4c06bf883..d1d6f6cf64a3 100644 >> --- a/lib/rsa/Kconfig >> +++ b/lib/rsa/Kconfig >> @@ -33,6 +33,7 @@ config RSA_VERIFY >> config RSA_VERIFY_WITH_PKEY >> bool "Execute RSA verification without key parameters from FDT" >> depends on RSA >> + imply RSA_PUBLIC_KEY_PARSER > > Do we really need RSA_PUBLIC_KEY_PARSER whenever we use CONFIG_RSA=y? > E.g. on a system without the UEFI sub-system? > > Otherwise simply let RSA_PUBLIC_KEY_PARSER depend on RSA. > >> help >> The standard RSA-signature verification code (FIT_SIGNATURE) uses >> pre-calculated key properties, that are stored in fdt blob, in >> diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile >> index c07305188e0c..14ed3cb4012b 100644 >> --- a/lib/rsa/Makefile >> +++ b/lib/rsa/Makefile >> @@ -6,4 +6,5 @@ >> # Wolfgang Denk, DENX Software Engineering, wd@denx.de. >> >> obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o >> +obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o >> obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o >> diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c >> new file mode 100644 >> index 000000000000..9464df009343 >> --- /dev/null >> +++ b/lib/rsa/rsa-keyprop.c >> @@ -0,0 +1,725 @@ >> +// SPDX-License-Identifier: GPL-2.0+ and MIT >> +/* >> + * RSA library - generate parameters for a public key >> + * >> + * Copyright (c) 2019 Linaro Limited >> + * Author: AKASHI Takahiro >> + * >> + * Big number routines in this file come from BearSSL: >> + * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> >> + */ >> + >> +#include <common.h> >> +#include <image.h> >> +#include <malloc.h> >> +#include <asm/byteorder.h> >> +#include <crypto/internal/rsa.h> >> +#include <u-boot/rsa-mod-exp.h> >> + >> +/** >> + * br_dec16be() - Convert 16-bit big-endian integer to native >> + * @src: Pointer to data >> + * Return: Native-endian integer >> + */ >> +static unsigned br_dec16be(const void *src) >> +{ >> + return be16_to_cpup(src); >> +} >> + >> +/** >> + * br_dec32be() - Convert 32-bit big-endian integer to native >> + * @src: Pointer to data >> + * Return: Native-endian integer >> + */ >> +static uint32_t br_dec32be(const void *src) >> +{ >> + return be32_to_cpup(src); >> +} >> + >> +/** >> + * br_enc32be() - Convert native 32-bit integer to big-endian >> + * @dst: Pointer to buffer to store big-endian integer in >> + * @x: Native 32-bit integer >> + */ >> +static void br_enc32be(void *dst, uint32_t x) >> +{ >> + __be32 tmp; >> + >> + tmp = cpu_to_be32(x); >> + memcpy(dst, &tmp, sizeof(tmp)); >> +} >> + >> +/* from BearSSL's src/inner.h */ >> + >> +/* >> + * Negate a boolean. >> + */ >> +static uint32_t NOT(uint32_t ctl) >> +{ >> + return ctl ^ 1; >> +} >> + >> +/* >> + * Multiplexer: returns x if ctl == 1, y if ctl == 0. >> + */ >> +static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y) >> +{ >> + return y ^ (-ctl & (x ^ y)); >> +} >> + >> +/* >> + * Equality check: returns 1 if x == y, 0 otherwise. >> + */ >> +static uint32_t EQ(uint32_t x, uint32_t y) >> +{ >> + uint32_t q; >> + >> + q = x ^ y; >> + return NOT((q | -q) >> 31); >> +} >> + >> +/* >> + * Inequality check: returns 1 if x != y, 0 otherwise. >> + */ >> +static uint32_t NEQ(uint32_t x, uint32_t y) >> +{ >> + uint32_t q; >> + >> + q = x ^ y; >> + return (q | -q) >> 31; > > We want to minimize the code size of U-Boot. > > So, please, review this code and remove all of this bogus. Are timing attacks relevant in any of our UEFI use cases? I thought we only are verifying signatures against public keys that are anyway in reach of an adversery. Best regards Heinrich > >> +} >> + >> +/* >> + * Comparison: returns 1 if x > y, 0 otherwise. >> + */ >> +static uint32_t GT(uint32_t x, uint32_t y) >> +{ >> + /* >> + * If both x < 2^31 and y < 2^31, then y-x will have its high >> + * bit set if x > y, cleared otherwise. >> + * >> + * If either x >= 2^31 or y >= 2^31 (but not both), then the >> + * result is the high bit of x. >> + * >> + * If both x >= 2^31 and y >= 2^31, then we can virtually >> + * subtract 2^31 from both, and we are back to the first case. >> + * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already >> + * fine. >> + */ >> + uint32_t z; >> + >> + z = y - x; >> + return (z ^ ((x ^ y) & (x ^ z))) >> 31; >> +} >> + >> +/* >> + * Compute the bit length of a 32-bit integer. Returned value is >> between 0 >> + * and 32 (inclusive). >> + */ >> +static uint32_t BIT_LENGTH(uint32_t x) >> +{ >> + uint32_t k, c; >> + >> + k = NEQ(x, 0); >> + c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4; >> + c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3; >> + c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2; >> + c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1; >> + k += GT(x, 0x0001); >> + return k; >> +} >> + >> +#define GE(x, y) NOT(GT(y, x)) >> +#define LT(x, y) GT(y, x) >> +#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y)) >> + >> +/* >> + * Integers 'i32' >> + * -------------- >> + * >> + * The 'i32' functions implement computations on big integers using >> + * an internal representation as an array of 32-bit integers. For >> + * an array x[]: >> + * -- x[0] contains the "announced bit length" of the integer >> + * -- x[1], x[2]... contain the value in little-endian order (x[1] >> + * contains the least significant 32 bits) >> + * >> + * Multiplications rely on the elementary 32x32->64 multiplication. >> + * >> + * The announced bit length specifies the number of bits that are >> + * significant in the subsequent 32-bit words. Unused bits in the >> + * last (most significant) word are set to 0; subsequent words are >> + * uninitialized and need not exist at all. >> + * >> + * The execution time and memory access patterns of all computations >> + * depend on the announced bit length, but not on the actual word >> + * values. For modular integers, the announced bit length of any integer >> + * modulo n is equal to the actual bit length of n; thus, computations >> + * on modular integers are "constant-time" (only the modulus length may >> + * leak). >> + */ >> + >> +/* >> + * Extract one word from an integer. The offset is counted in bits. >> + * The word MUST entirely fit within the word elements corresponding >> + * to the announced bit length of a[]. >> + */ >> +static uint32_t br_i32_word(const uint32_t *a, uint32_t off) >> +{ >> + size_t u; >> + unsigned j; >> + >> + u = (size_t)(off >> 5) + 1; >> + j = (unsigned)off & 31; >> + if (j == 0) { >> + return a[u]; >> + } else { >> + return (a[u] >> j) | (a[u + 1] << (32 - j)); >> + } >> +} >> + >> +/* from BearSSL's src/int/i32_bitlen.c */ >> + >> +/* >> + * Compute the actual bit length of an integer. The argument x should >> + * point to the first (least significant) value word of the integer. >> + * The len 'xlen' contains the number of 32-bit words to access. >> + * >> + * CT: value or length of x does not leak. >> + */ >> +static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen) >> +{ >> + uint32_t tw, twk; >> + >> + tw = 0; >> + twk = 0; >> + while (xlen -- > 0) { >> + uint32_t w, c; >> + >> + c = EQ(tw, 0); >> + w = x[xlen]; >> + tw = MUX(c, w, tw); >> + twk = MUX(c, (uint32_t)xlen, twk); >> + } >> + return (twk << 5) + BIT_LENGTH(tw); >> +} >> + >> +/* from BearSSL's src/int/i32_decode.c */ >> + >> +/* >> + * Decode an integer from its big-endian unsigned representation. The >> + * "true" bit length of the integer is computed, but all words of x[] >> + * corresponding to the full 'len' bytes of the source are set. >> + * >> + * CT: value or length of x does not leak. >> + */ >> +static void br_i32_decode(uint32_t *x, const void *src, size_t len) >> +{ >> + const unsigned char *buf; >> + size_t u, v; >> + >> + buf = src; >> + u = len; >> + v = 1; >> + for (;;) { >> + if (u < 4) { >> + uint32_t w; >> + >> + if (u < 2) { >> + if (u == 0) { >> + break; >> + } else { >> + w = buf[0]; >> + } >> + } else { >> + if (u == 2) { >> + w = br_dec16be(buf); >> + } else { >> + w = ((uint32_t)buf[0] << 16) >> + | br_dec16be(buf + 1); >> + } >> + } >> + x[v ++] = w; >> + break; >> + } else { >> + u -= 4; >> + x[v ++] = br_dec32be(buf + u); >> + } >> + } >> + x[0] = br_i32_bit_length(x + 1, v - 1); >> +} >> + >> +/* from BearSSL's src/int/i32_encode.c */ >> + >> +/* >> + * Encode an integer into its big-endian unsigned representation. The >> + * output length in bytes is provided (parameter 'len'); if the length >> + * is too short then the integer is appropriately truncated; if it is >> + * too long then the extra bytes are set to 0. >> + */ >> +static void br_i32_encode(void *dst, size_t len, const uint32_t *x) >> +{ >> + unsigned char *buf; >> + size_t k; >> + >> + buf = dst; >> + >> + /* >> + * Compute the announced size of x in bytes; extra bytes are >> + * filled with zeros. >> + */ >> + k = (x[0] + 7) >> 3; >> + while (len > k) { >> + *buf ++ = 0; >> + len --; >> + } >> + >> + /* >> + * Now we use k as index within x[]. That index starts at 1; >> + * we initialize it to the topmost complete word, and process >> + * any remaining incomplete word. >> + */ >> + k = (len + 3) >> 2; >> + switch (len & 3) { >> + case 3: >> + *buf ++ = x[k] >> 16; >> + /* fall through */ >> + case 2: >> + *buf ++ = x[k] >> 8; >> + /* fall through */ >> + case 1: >> + *buf ++ = x[k]; >> + k --; >> + } >> + >> + /* >> + * Encode all complete words. >> + */ >> + while (k > 0) { >> + br_enc32be(buf, x[k]); >> + k --; >> + buf += 4; >> + } >> +} >> + >> +/* from BearSSL's src/int/i32_ninv32.c */ >> + >> +/* >> + * Compute -(1/x) mod 2^32. If x is even, then this function returns 0. >> + */ >> +static uint32_t br_i32_ninv32(uint32_t x) >> +{ >> + uint32_t y; >> + >> + y = 2 - x; >> + y *= 2 - y * x; >> + y *= 2 - y * x; >> + y *= 2 - y * x; >> + y *= 2 - y * x; >> + return MUX(x & 1, -y, 0); >> +} >> + >> +/* from BearSSL's src/int/i32_add.c */ >> + >> +/* >> + * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[] >> + * is unmodified, but the carry is still computed and returned. The >> + * arrays a[] and b[] MUST have the same announced bit length. >> + * >> + * a[] and b[] MAY be the same array, but partial overlap is not >> allowed. >> + */ >> +static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl) >> +{ >> + uint32_t cc; >> + size_t u, m; >> + >> + cc = 0; >> + m = (a[0] + 63) >> 5; >> + for (u = 1; u < m; u ++) { >> + uint32_t aw, bw, naw; >> + >> + aw = a[u]; >> + bw = b[u]; >> + naw = aw + bw + cc; >> + >> + /* >> + * Carry is 1 if naw < aw. Carry is also 1 if naw == aw >> + * AND the carry was already 1. >> + */ >> + cc = (cc & EQ(naw, aw)) | LT(naw, aw); >> + a[u] = MUX(ctl, naw, aw); >> + } >> + return cc; >> +} >> + >> +/* from BearSSL's src/int/i32_sub.c */ >> + >> +/* >> + * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0, >> + * then a[] is unmodified, but the carry is still computed and returned. >> + * The arrays a[] and b[] MUST have the same announced bit length. >> + * >> + * a[] and b[] MAY be the same array, but partial overlap is not >> allowed. >> + */ >> +static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl) >> +{ >> + uint32_t cc; >> + size_t u, m; >> + >> + cc = 0; >> + m = (a[0] + 63) >> 5; >> + for (u = 1; u < m; u ++) { >> + uint32_t aw, bw, naw; >> + >> + aw = a[u]; >> + bw = b[u]; >> + naw = aw - bw - cc; >> + >> + /* >> + * Carry is 1 if naw > aw. Carry is 1 also if naw == aw >> + * AND the carry was already 1. >> + */ >> + cc = (cc & EQ(naw, aw)) | GT(naw, aw); >> + a[u] = MUX(ctl, naw, aw); >> + } >> + return cc; >> +} >> + >> +/* from BearSSL's src/int/i32_div32.c */ >> + >> +/* >> + * Constant-time division. The dividend hi:lo is divided by the >> + * divisor d; the quotient is returned and the remainder is written >> + * in *r. If hi == d, then the quotient does not fit on 32 bits; >> + * returned value is thus truncated. If hi > d, returned values are >> + * indeterminate. >> + */ >> +static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, >> uint32_t *r) >> +{ >> + /* TODO: optimize this */ >> + uint32_t q; >> + uint32_t ch, cf; >> + int k; >> + >> + q = 0; >> + ch = EQ(hi, d); >> + hi = MUX(ch, 0, hi); >> + for (k = 31; k > 0; k --) { >> + int j; >> + uint32_t w, ctl, hi2, lo2; >> + >> + j = 32 - k; >> + w = (hi << j) | (lo >> k); >> + ctl = GE(w, d) | (hi >> k); >> + hi2 = (w - d) >> j; >> + lo2 = lo - (d << k); >> + hi = MUX(ctl, hi2, hi); >> + lo = MUX(ctl, lo2, lo); >> + q |= ctl << k; >> + } >> + cf = GE(lo, d) | hi; >> + q |= cf; >> + *r = MUX(cf, lo - d, lo); >> + return q; >> +} >> + >> +/* >> + * Wrapper for br_divrem(); the remainder is returned, and the quotient >> + * is discarded. >> + */ >> +static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d) >> +{ >> + uint32_t r; >> + >> + br_divrem(hi, lo, d, &r); >> + return r; >> +} >> + >> +/* >> + * Wrapper for br_divrem(); the quotient is returned, and the remainder >> + * is discarded. >> + */ >> +static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d) >> +{ >> + uint32_t r; >> + >> + return br_divrem(hi, lo, d, &r); >> +} >> + >> +/* from BearSSL's src/int/i32_muladd.c */ >> + >> +/* >> + * Multiply x[] by 2^32 and then add integer z, modulo m[]. This >> + * function assumes that x[] and m[] have the same announced bit >> + * length, and the announced bit length of m[] matches its true >> + * bit length. >> + * >> + * x[] and m[] MUST be distinct arrays. >> + * >> + * CT: only the common announced bit length of x and m leaks, not >> + * the values of x, z or m. >> + */ >> +static void br_i32_muladd_small(uint32_t *x, uint32_t z, const >> uint32_t *m) >> +{ >> + uint32_t m_bitlen; >> + size_t u, mlen; >> + uint32_t a0, a1, b0, hi, g, q, tb; >> + uint32_t chf, clow, under, over; >> + uint64_t cc; >> + >> + /* >> + * We can test on the modulus bit length since we accept to >> + * leak that length. >> + */ >> + m_bitlen = m[0]; >> + if (m_bitlen == 0) { >> + return; >> + } >> + if (m_bitlen <= 32) { >> + x[1] = br_rem(x[1], z, m[1]); >> + return; >> + } >> + mlen = (m_bitlen + 31) >> 5; >> + >> + /* >> + * Principle: we estimate the quotient (x*2^32+z)/m by >> + * doing a 64/32 division with the high words. >> + * >> + * Let: >> + * w = 2^32 >> + * a = (w*a0 + a1) * w^N + a2 >> + * b = b0 * w^N + b2 >> + * such that: >> + * 0 <= a0 < w >> + * 0 <= a1 < w >> + * 0 <= a2 < w^N >> + * w/2 <= b0 < w >> + * 0 <= b2 < w^N >> + * a < w*b >> + * I.e. the two top words of a are a0:a1, the top word of b is >> + * b0, we ensured that b0 is "full" (high bit set), and a is >> + * such that the quotient q = a/b fits on one word (0 <= q < w). >> + * >> + * If a = b*q + r (with 0 <= r < q), we can estimate q by >> + * doing an Euclidean division on the top words: >> + * a0*w+a1 = b0*u + v (with 0 <= v < w) >> + * Then the following holds: >> + * 0 <= u <= w >> + * u-2 <= q <= u >> + */ >> + a0 = br_i32_word(x, m_bitlen - 32); >> + hi = x[mlen]; >> + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); >> + x[1] = z; >> + a1 = br_i32_word(x, m_bitlen - 32); >> + b0 = br_i32_word(m, m_bitlen - 32); >> + >> + /* >> + * We estimate a divisor q. If the quotient returned by br_div() >> + * is g: >> + * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF. >> + * -- Otherwise: >> + * -- if g == 0 then we set q = 0; >> + * -- otherwise, we set q = g - 1. >> + * The properties described above then ensure that the true >> + * quotient is q-1, q or q+1. >> + */ >> + g = br_div(a0, a1, b0); >> + q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1)); >> + >> + /* >> + * We subtract q*m from x (with the extra high word of value 'hi'). >> + * Since q may be off by 1 (in either direction), we may have to >> + * add or subtract m afterwards. >> + * >> + * The 'tb' flag will be true (1) at the end of the loop if the >> + * result is greater than or equal to the modulus (not counting >> + * 'hi' or the carry). >> + */ >> + cc = 0; >> + tb = 1; >> + for (u = 1; u <= mlen; u ++) { >> + uint32_t mw, zw, xw, nxw; >> + uint64_t zl; >> + >> + mw = m[u]; >> + zl = MUL(mw, q) + cc; >> + cc = (uint32_t)(zl >> 32); >> + zw = (uint32_t)zl; >> + xw = x[u]; >> + nxw = xw - zw; >> + cc += (uint64_t)GT(nxw, xw); >> + x[u] = nxw; >> + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); >> + } >> + >> + /* >> + * If we underestimated q, then either cc < hi (one extra bit >> + * beyond the top array word), or cc == hi and tb is true (no >> + * extra bit, but the result is not lower than the modulus). In >> + * these cases we must subtract m once. >> + * >> + * Otherwise, we may have overestimated, which will show as >> + * cc > hi (thus a negative result). Correction is adding m once. >> + */ >> + chf = (uint32_t)(cc >> 32); >> + clow = (uint32_t)cc; >> + over = chf | GT(clow, hi); >> + under = ~over & (tb | (~chf & LT(clow, hi))); >> + br_i32_add(x, m, over); >> + br_i32_sub(x, m, under); >> +} >> + >> +/* from BearSSL's src/int/i32_reduce.c */ >> + >> +/* >> + * Reduce an integer (a[]) modulo another (m[]). The result is written >> + * in x[] and its announced bit length is set to be equal to that of >> m[]. >> + * >> + * x[] MUST be distinct from a[] and m[]. >> + * >> + * CT: only announced bit lengths leak, not values of x, a or m. >> + */ >> +static void br_i32_reduce(uint32_t *x, const uint32_t *a, const >> uint32_t *m) >> +{ >> + uint32_t m_bitlen, a_bitlen; >> + size_t mlen, alen, u; >> + >> + m_bitlen = m[0]; >> + mlen = (m_bitlen + 31) >> 5; >> + >> + x[0] = m_bitlen; >> + if (m_bitlen == 0) { >> + return; >> + } >> + >> + /* >> + * If the source is shorter, then simply copy all words from a[] >> + * and zero out the upper words. >> + */ >> + a_bitlen = a[0]; >> + alen = (a_bitlen + 31) >> 5; >> + if (a_bitlen < m_bitlen) { >> + memcpy(x + 1, a + 1, alen * sizeof *a); >> + for (u = alen; u < mlen; u ++) { >> + x[u + 1] = 0; >> + } >> + return; >> + } >> + >> + /* >> + * The source length is at least equal to that of the modulus. >> + * We must thus copy N-1 words, and input the remaining words >> + * one by one. >> + */ >> + memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); >> + x[mlen] = 0; >> + for (u = 1 + alen - mlen; u > 0; u --) { >> + br_i32_muladd_small(x, a[u], m); >> + } >> +} >> + >> +/** >> + * rsa_free_key_prop() - Free key properties >> + * @prop: Pointer to struct key_prop >> + * >> + * This function frees all the memories allocated by rsa_gen_key_prop(). >> + */ >> +void rsa_free_key_prop(struct key_prop *prop) >> +{ >> + if (!prop) >> + return; >> + >> + free((void *)prop->modulus); >> + free((void *)prop->public_exponent); >> + free((void *)prop->rr); >> + >> + free(prop); >> +} >> + >> +/** >> + * rsa_gen_key_prop() - Generate key properties of RSA public key >> + * @key: Specifies key data in DER format >> + * @keylen: Length of @key >> + * @prop: Generated key property >> + * >> + * This function takes a blob of encoded RSA public key data in DER >> + * format, parse it and generate all the relevant properties >> + * in key_prop structure. >> + * Return a pointer to struct key_prop in @prop on success. >> + * >> + * Return: 0 on success, negative on error >> + */ >> +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct >> key_prop **prop) >> +{ >> + struct rsa_key rsa_key; >> + uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL; >> + const int max_rsa_size = 4096; >> + int rlen, i, ret; >> + >> + *prop = calloc(sizeof(**prop), 1); >> + n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); >> + rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); >> + rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); >> + if (!(*prop) || !n || !rr || !rrtmp) { >> + ret = -ENOMEM; >> + goto err; >> + } >> + >> + ret = rsa_parse_pub_key(&rsa_key, key, keylen); >> + if (ret) >> + goto err; >> + >> + /* modulus */ >> + /* removing leading 0's */ >> + for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++) >> + ; >> + (*prop)->num_bits = (rsa_key.n_sz - i) * 8; >> + (*prop)->modulus = malloc(rsa_key.n_sz - i); >> + if (!(*prop)->modulus) { >> + ret = -ENOMEM; >> + goto err; >> + } >> + memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i); >> + >> + /* exponent */ >> + (*prop)->public_exponent = calloc(1, sizeof(uint64_t)); >> + if (!(*prop)->public_exponent) { >> + ret = -ENOMEM; >> + goto err; >> + } >> + memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t) >> + - rsa_key.e_sz, >> + rsa_key.e, rsa_key.e_sz); >> + (*prop)->exp_len = rsa_key.e_sz; >> + >> + /* n0 inverse */ >> + br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i); >> + (*prop)->n0inv = br_i32_ninv32(n[1]); >> + >> + /* R^2 mod n; R = 2^(num_bits) */ >> + rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */ >> + rr[0] = 0; >> + *(uint8_t *)&rr[0] = (1 << (rlen % 8)); >> + for (i = 1; i < (((rlen + 31) >> 5) + 1); i++) >> + rr[i] = 0; >> + br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1); >> + br_i32_reduce(rr, rrtmp, n); >> + >> + rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */ >> + (*prop)->rr = malloc(rlen); >> + if (!(*prop)->rr) { >> + ret = -ENOMEM; >> + goto err; >> + } >> + br_i32_encode((void *)(*prop)->rr, rlen, rr); >> + >> + return 0; >> + >> +err: >> + free(n); >> + free(rr); >> + free(rrtmp); >> + rsa_free_key_prop(*prop); >> + return ret; >> +} >>
On Wed, Jan 08, 2020 at 07:07:01PM +0100, Heinrich Schuchardt wrote: > > > On 11/21/19 1:11 AM, AKASHI Takahiro wrote: > >In the current implementation of FIT_SIGNATURE, five parameters for > >a RSA public key are required while only two of them are essential. > >(See rsa-mod-exp.h and uImage.FIT/signature.txt) > >This is a result of considering relatively limited computer power > >and resources on embedded systems, while such a assumption may not > >be quite practical for other use cases. > > > >In this patch, added is a function, rsa_gen_key_prop(), which will > >generate additional parameters for other uses, in particular > >UEFI secure boot, on the fly. > > > >Note: the current code uses some "big number" rouKtines from BearSSL > >for the calculation. > > > >Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> > >--- > > include/u-boot/rsa-mod-exp.h | 23 ++ > > lib/rsa/Kconfig | 1 + > > lib/rsa/Makefile | 1 + > > lib/rsa/rsa-keyprop.c | 725 +++++++++++++++++++++++++++++++++++ > > 4 files changed, 750 insertions(+) > > create mode 100644 lib/rsa/rsa-keyprop.c > > > >diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h > >index 8a428c4b6a1a..1da8af1bb83d 100644 > >--- a/include/u-boot/rsa-mod-exp.h > >+++ b/include/u-boot/rsa-mod-exp.h > >@@ -26,6 +26,29 @@ struct key_prop { > > uint32_t exp_len; /* Exponent length in number of uint8_t */ > > }; > > > >+/** > >+ * rsa_gen_key_prop() - Generate key properties of RSA public key > >+ * @key: Specifies key data in DER format > >+ * @keylen: Length of @key > >+ * @prop: Generated key property > >+ * > >+ * This function takes a blob of encoded RSA public key data in DER > >+ * format, parse it and generate all the relevant properties > >+ * in key_prop structure. > >+ * Return a pointer to struct key_prop in @prop on success. > >+ * > >+ * Return: 0 on success, negative on error > >+ */ > >+int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **proc); > >+ > >+/** > >+ * rsa_free_key_prop() - Free key properties > >+ * @prop: Pointer to struct key_prop > >+ * > >+ * This function frees all the memories allocated by rsa_gen_key_prop(). > >+ */ > >+void rsa_free_key_prop(struct key_prop *prop); > >+ > > /** > > * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw > > * > >diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig > >index 71e4c06bf883..d1d6f6cf64a3 100644 > >--- a/lib/rsa/Kconfig > >+++ b/lib/rsa/Kconfig > >@@ -33,6 +33,7 @@ config RSA_VERIFY > > config RSA_VERIFY_WITH_PKEY > > bool "Execute RSA verification without key parameters from FDT" > > depends on RSA > >+ imply RSA_PUBLIC_KEY_PARSER > > Do we really need RSA_PUBLIC_KEY_PARSER whenever we use CONFIG_RSA=y? ??? RSA_PUBLIC_KEY_PARSER will be selected only if RSA_VERIFY_WITH_PKEY is enabled. I avoided to use 'select' here because RSA_PUBLIC_KEY_PARSER is also 'selectable.' > E.g. on a system without the UEFI sub-system? > > Otherwise simply let RSA_PUBLIC_KEY_PARSER depend on RSA. Such a dependency sounds odd to me. > > > help > > The standard RSA-signature verification code (FIT_SIGNATURE) uses > > pre-calculated key properties, that are stored in fdt blob, in > >diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile > >index c07305188e0c..14ed3cb4012b 100644 > >--- a/lib/rsa/Makefile > >+++ b/lib/rsa/Makefile > >@@ -6,4 +6,5 @@ > > # Wolfgang Denk, DENX Software Engineering, wd@denx.de. > > > > obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o > >+obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o > > obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o > >diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c > >new file mode 100644 > >index 000000000000..9464df009343 > >--- /dev/null > >+++ b/lib/rsa/rsa-keyprop.c > >@@ -0,0 +1,725 @@ > >+// SPDX-License-Identifier: GPL-2.0+ and MIT > >+/* > >+ * RSA library - generate parameters for a public key > >+ * > >+ * Copyright (c) 2019 Linaro Limited > >+ * Author: AKASHI Takahiro > >+ * > >+ * Big number routines in this file come from BearSSL: > >+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> > >+ */ > >+ > >+#include <common.h> > >+#include <image.h> > >+#include <malloc.h> > >+#include <asm/byteorder.h> > >+#include <crypto/internal/rsa.h> > >+#include <u-boot/rsa-mod-exp.h> > >+ > >+/** > >+ * br_dec16be() - Convert 16-bit big-endian integer to native > >+ * @src: Pointer to data > >+ * Return: Native-endian integer > >+ */ > >+static unsigned br_dec16be(const void *src) > >+{ > >+ return be16_to_cpup(src); > >+} > >+ > >+/** > >+ * br_dec32be() - Convert 32-bit big-endian integer to native > >+ * @src: Pointer to data > >+ * Return: Native-endian integer > >+ */ > >+static uint32_t br_dec32be(const void *src) > >+{ > >+ return be32_to_cpup(src); > >+} > >+ > >+/** > >+ * br_enc32be() - Convert native 32-bit integer to big-endian > >+ * @dst: Pointer to buffer to store big-endian integer in > >+ * @x: Native 32-bit integer > >+ */ > >+static void br_enc32be(void *dst, uint32_t x) > >+{ > >+ __be32 tmp; > >+ > >+ tmp = cpu_to_be32(x); > >+ memcpy(dst, &tmp, sizeof(tmp)); > >+} > >+ > >+/* from BearSSL's src/inner.h */ > >+ > >+/* > >+ * Negate a boolean. > >+ */ > >+static uint32_t NOT(uint32_t ctl) > >+{ > >+ return ctl ^ 1; > >+} > >+ > >+/* > >+ * Multiplexer: returns x if ctl == 1, y if ctl == 0. > >+ */ > >+static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y) > >+{ > >+ return y ^ (-ctl & (x ^ y)); > >+} > >+ > >+/* > >+ * Equality check: returns 1 if x == y, 0 otherwise. > >+ */ > >+static uint32_t EQ(uint32_t x, uint32_t y) > >+{ > >+ uint32_t q; > >+ > >+ q = x ^ y; > >+ return NOT((q | -q) >> 31); > >+} > >+ > >+/* > >+ * Inequality check: returns 1 if x != y, 0 otherwise. > >+ */ > >+static uint32_t NEQ(uint32_t x, uint32_t y) > >+{ > >+ uint32_t q; > >+ > >+ q = x ^ y; > >+ return (q | -q) >> 31; > > We want to minimize the code size of U-Boot. I don't get your point. Please elaborate your concern. > So, please, review this code and remove all of this bogus. Which part of the code do you mention as 'bogus'? Thanks, -Takahiro Akashi > > Best regards > > Heinrich > > >+} > >+ > >+/* > >+ * Comparison: returns 1 if x > y, 0 otherwise. > >+ */ > >+static uint32_t GT(uint32_t x, uint32_t y) > >+{ > >+ /* > >+ * If both x < 2^31 and y < 2^31, then y-x will have its high > >+ * bit set if x > y, cleared otherwise. > >+ * > >+ * If either x >= 2^31 or y >= 2^31 (but not both), then the > >+ * result is the high bit of x. > >+ * > >+ * If both x >= 2^31 and y >= 2^31, then we can virtually > >+ * subtract 2^31 from both, and we are back to the first case. > >+ * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already > >+ * fine. > >+ */ > >+ uint32_t z; > >+ > >+ z = y - x; > >+ return (z ^ ((x ^ y) & (x ^ z))) >> 31; > >+} > >+ > >+/* > >+ * Compute the bit length of a 32-bit integer. Returned value is between 0 > >+ * and 32 (inclusive). > >+ */ > >+static uint32_t BIT_LENGTH(uint32_t x) > >+{ > >+ uint32_t k, c; > >+ > >+ k = NEQ(x, 0); > >+ c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4; > >+ c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3; > >+ c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2; > >+ c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1; > >+ k += GT(x, 0x0001); > >+ return k; > >+} > >+ > >+#define GE(x, y) NOT(GT(y, x)) > >+#define LT(x, y) GT(y, x) > >+#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y)) > >+ > >+/* > >+ * Integers 'i32' > >+ * -------------- > >+ * > >+ * The 'i32' functions implement computations on big integers using > >+ * an internal representation as an array of 32-bit integers. For > >+ * an array x[]: > >+ * -- x[0] contains the "announced bit length" of the integer > >+ * -- x[1], x[2]... contain the value in little-endian order (x[1] > >+ * contains the least significant 32 bits) > >+ * > >+ * Multiplications rely on the elementary 32x32->64 multiplication. > >+ * > >+ * The announced bit length specifies the number of bits that are > >+ * significant in the subsequent 32-bit words. Unused bits in the > >+ * last (most significant) word are set to 0; subsequent words are > >+ * uninitialized and need not exist at all. > >+ * > >+ * The execution time and memory access patterns of all computations > >+ * depend on the announced bit length, but not on the actual word > >+ * values. For modular integers, the announced bit length of any integer > >+ * modulo n is equal to the actual bit length of n; thus, computations > >+ * on modular integers are "constant-time" (only the modulus length may > >+ * leak). > >+ */ > >+ > >+/* > >+ * Extract one word from an integer. The offset is counted in bits. > >+ * The word MUST entirely fit within the word elements corresponding > >+ * to the announced bit length of a[]. > >+ */ > >+static uint32_t br_i32_word(const uint32_t *a, uint32_t off) > >+{ > >+ size_t u; > >+ unsigned j; > >+ > >+ u = (size_t)(off >> 5) + 1; > >+ j = (unsigned)off & 31; > >+ if (j == 0) { > >+ return a[u]; > >+ } else { > >+ return (a[u] >> j) | (a[u + 1] << (32 - j)); > >+ } > >+} > >+ > >+/* from BearSSL's src/int/i32_bitlen.c */ > >+ > >+/* > >+ * Compute the actual bit length of an integer. The argument x should > >+ * point to the first (least significant) value word of the integer. > >+ * The len 'xlen' contains the number of 32-bit words to access. > >+ * > >+ * CT: value or length of x does not leak. > >+ */ > >+static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen) > >+{ > >+ uint32_t tw, twk; > >+ > >+ tw = 0; > >+ twk = 0; > >+ while (xlen -- > 0) { > >+ uint32_t w, c; > >+ > >+ c = EQ(tw, 0); > >+ w = x[xlen]; > >+ tw = MUX(c, w, tw); > >+ twk = MUX(c, (uint32_t)xlen, twk); > >+ } > >+ return (twk << 5) + BIT_LENGTH(tw); > >+} > >+ > >+/* from BearSSL's src/int/i32_decode.c */ > >+ > >+/* > >+ * Decode an integer from its big-endian unsigned representation. The > >+ * "true" bit length of the integer is computed, but all words of x[] > >+ * corresponding to the full 'len' bytes of the source are set. > >+ * > >+ * CT: value or length of x does not leak. > >+ */ > >+static void br_i32_decode(uint32_t *x, const void *src, size_t len) > >+{ > >+ const unsigned char *buf; > >+ size_t u, v; > >+ > >+ buf = src; > >+ u = len; > >+ v = 1; > >+ for (;;) { > >+ if (u < 4) { > >+ uint32_t w; > >+ > >+ if (u < 2) { > >+ if (u == 0) { > >+ break; > >+ } else { > >+ w = buf[0]; > >+ } > >+ } else { > >+ if (u == 2) { > >+ w = br_dec16be(buf); > >+ } else { > >+ w = ((uint32_t)buf[0] << 16) > >+ | br_dec16be(buf + 1); > >+ } > >+ } > >+ x[v ++] = w; > >+ break; > >+ } else { > >+ u -= 4; > >+ x[v ++] = br_dec32be(buf + u); > >+ } > >+ } > >+ x[0] = br_i32_bit_length(x + 1, v - 1); > >+} > >+ > >+/* from BearSSL's src/int/i32_encode.c */ > >+ > >+/* > >+ * Encode an integer into its big-endian unsigned representation. The > >+ * output length in bytes is provided (parameter 'len'); if the length > >+ * is too short then the integer is appropriately truncated; if it is > >+ * too long then the extra bytes are set to 0. > >+ */ > >+static void br_i32_encode(void *dst, size_t len, const uint32_t *x) > >+{ > >+ unsigned char *buf; > >+ size_t k; > >+ > >+ buf = dst; > >+ > >+ /* > >+ * Compute the announced size of x in bytes; extra bytes are > >+ * filled with zeros. > >+ */ > >+ k = (x[0] + 7) >> 3; > >+ while (len > k) { > >+ *buf ++ = 0; > >+ len --; > >+ } > >+ > >+ /* > >+ * Now we use k as index within x[]. That index starts at 1; > >+ * we initialize it to the topmost complete word, and process > >+ * any remaining incomplete word. > >+ */ > >+ k = (len + 3) >> 2; > >+ switch (len & 3) { > >+ case 3: > >+ *buf ++ = x[k] >> 16; > >+ /* fall through */ > >+ case 2: > >+ *buf ++ = x[k] >> 8; > >+ /* fall through */ > >+ case 1: > >+ *buf ++ = x[k]; > >+ k --; > >+ } > >+ > >+ /* > >+ * Encode all complete words. > >+ */ > >+ while (k > 0) { > >+ br_enc32be(buf, x[k]); > >+ k --; > >+ buf += 4; > >+ } > >+} > >+ > >+/* from BearSSL's src/int/i32_ninv32.c */ > >+ > >+/* > >+ * Compute -(1/x) mod 2^32. If x is even, then this function returns 0. > >+ */ > >+static uint32_t br_i32_ninv32(uint32_t x) > >+{ > >+ uint32_t y; > >+ > >+ y = 2 - x; > >+ y *= 2 - y * x; > >+ y *= 2 - y * x; > >+ y *= 2 - y * x; > >+ y *= 2 - y * x; > >+ return MUX(x & 1, -y, 0); > >+} > >+ > >+/* from BearSSL's src/int/i32_add.c */ > >+ > >+/* > >+ * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[] > >+ * is unmodified, but the carry is still computed and returned. The > >+ * arrays a[] and b[] MUST have the same announced bit length. > >+ * > >+ * a[] and b[] MAY be the same array, but partial overlap is not allowed. > >+ */ > >+static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl) > >+{ > >+ uint32_t cc; > >+ size_t u, m; > >+ > >+ cc = 0; > >+ m = (a[0] + 63) >> 5; > >+ for (u = 1; u < m; u ++) { > >+ uint32_t aw, bw, naw; > >+ > >+ aw = a[u]; > >+ bw = b[u]; > >+ naw = aw + bw + cc; > >+ > >+ /* > >+ * Carry is 1 if naw < aw. Carry is also 1 if naw == aw > >+ * AND the carry was already 1. > >+ */ > >+ cc = (cc & EQ(naw, aw)) | LT(naw, aw); > >+ a[u] = MUX(ctl, naw, aw); > >+ } > >+ return cc; > >+} > >+ > >+/* from BearSSL's src/int/i32_sub.c */ > >+ > >+/* > >+ * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0, > >+ * then a[] is unmodified, but the carry is still computed and returned. > >+ * The arrays a[] and b[] MUST have the same announced bit length. > >+ * > >+ * a[] and b[] MAY be the same array, but partial overlap is not allowed. > >+ */ > >+static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl) > >+{ > >+ uint32_t cc; > >+ size_t u, m; > >+ > >+ cc = 0; > >+ m = (a[0] + 63) >> 5; > >+ for (u = 1; u < m; u ++) { > >+ uint32_t aw, bw, naw; > >+ > >+ aw = a[u]; > >+ bw = b[u]; > >+ naw = aw - bw - cc; > >+ > >+ /* > >+ * Carry is 1 if naw > aw. Carry is 1 also if naw == aw > >+ * AND the carry was already 1. > >+ */ > >+ cc = (cc & EQ(naw, aw)) | GT(naw, aw); > >+ a[u] = MUX(ctl, naw, aw); > >+ } > >+ return cc; > >+} > >+ > >+/* from BearSSL's src/int/i32_div32.c */ > >+ > >+/* > >+ * Constant-time division. The dividend hi:lo is divided by the > >+ * divisor d; the quotient is returned and the remainder is written > >+ * in *r. If hi == d, then the quotient does not fit on 32 bits; > >+ * returned value is thus truncated. If hi > d, returned values are > >+ * indeterminate. > >+ */ > >+static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r) > >+{ > >+ /* TODO: optimize this */ > >+ uint32_t q; > >+ uint32_t ch, cf; > >+ int k; > >+ > >+ q = 0; > >+ ch = EQ(hi, d); > >+ hi = MUX(ch, 0, hi); > >+ for (k = 31; k > 0; k --) { > >+ int j; > >+ uint32_t w, ctl, hi2, lo2; > >+ > >+ j = 32 - k; > >+ w = (hi << j) | (lo >> k); > >+ ctl = GE(w, d) | (hi >> k); > >+ hi2 = (w - d) >> j; > >+ lo2 = lo - (d << k); > >+ hi = MUX(ctl, hi2, hi); > >+ lo = MUX(ctl, lo2, lo); > >+ q |= ctl << k; > >+ } > >+ cf = GE(lo, d) | hi; > >+ q |= cf; > >+ *r = MUX(cf, lo - d, lo); > >+ return q; > >+} > >+ > >+/* > >+ * Wrapper for br_divrem(); the remainder is returned, and the quotient > >+ * is discarded. > >+ */ > >+static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d) > >+{ > >+ uint32_t r; > >+ > >+ br_divrem(hi, lo, d, &r); > >+ return r; > >+} > >+ > >+/* > >+ * Wrapper for br_divrem(); the quotient is returned, and the remainder > >+ * is discarded. > >+ */ > >+static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d) > >+{ > >+ uint32_t r; > >+ > >+ return br_divrem(hi, lo, d, &r); > >+} > >+ > >+/* from BearSSL's src/int/i32_muladd.c */ > >+ > >+/* > >+ * Multiply x[] by 2^32 and then add integer z, modulo m[]. This > >+ * function assumes that x[] and m[] have the same announced bit > >+ * length, and the announced bit length of m[] matches its true > >+ * bit length. > >+ * > >+ * x[] and m[] MUST be distinct arrays. > >+ * > >+ * CT: only the common announced bit length of x and m leaks, not > >+ * the values of x, z or m. > >+ */ > >+static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) > >+{ > >+ uint32_t m_bitlen; > >+ size_t u, mlen; > >+ uint32_t a0, a1, b0, hi, g, q, tb; > >+ uint32_t chf, clow, under, over; > >+ uint64_t cc; > >+ > >+ /* > >+ * We can test on the modulus bit length since we accept to > >+ * leak that length. > >+ */ > >+ m_bitlen = m[0]; > >+ if (m_bitlen == 0) { > >+ return; > >+ } > >+ if (m_bitlen <= 32) { > >+ x[1] = br_rem(x[1], z, m[1]); > >+ return; > >+ } > >+ mlen = (m_bitlen + 31) >> 5; > >+ > >+ /* > >+ * Principle: we estimate the quotient (x*2^32+z)/m by > >+ * doing a 64/32 division with the high words. > >+ * > >+ * Let: > >+ * w = 2^32 > >+ * a = (w*a0 + a1) * w^N + a2 > >+ * b = b0 * w^N + b2 > >+ * such that: > >+ * 0 <= a0 < w > >+ * 0 <= a1 < w > >+ * 0 <= a2 < w^N > >+ * w/2 <= b0 < w > >+ * 0 <= b2 < w^N > >+ * a < w*b > >+ * I.e. the two top words of a are a0:a1, the top word of b is > >+ * b0, we ensured that b0 is "full" (high bit set), and a is > >+ * such that the quotient q = a/b fits on one word (0 <= q < w). > >+ * > >+ * If a = b*q + r (with 0 <= r < q), we can estimate q by > >+ * doing an Euclidean division on the top words: > >+ * a0*w+a1 = b0*u + v (with 0 <= v < w) > >+ * Then the following holds: > >+ * 0 <= u <= w > >+ * u-2 <= q <= u > >+ */ > >+ a0 = br_i32_word(x, m_bitlen - 32); > >+ hi = x[mlen]; > >+ memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); > >+ x[1] = z; > >+ a1 = br_i32_word(x, m_bitlen - 32); > >+ b0 = br_i32_word(m, m_bitlen - 32); > >+ > >+ /* > >+ * We estimate a divisor q. If the quotient returned by br_div() > >+ * is g: > >+ * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF. > >+ * -- Otherwise: > >+ * -- if g == 0 then we set q = 0; > >+ * -- otherwise, we set q = g - 1. > >+ * The properties described above then ensure that the true > >+ * quotient is q-1, q or q+1. > >+ */ > >+ g = br_div(a0, a1, b0); > >+ q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1)); > >+ > >+ /* > >+ * We subtract q*m from x (with the extra high word of value 'hi'). > >+ * Since q may be off by 1 (in either direction), we may have to > >+ * add or subtract m afterwards. > >+ * > >+ * The 'tb' flag will be true (1) at the end of the loop if the > >+ * result is greater than or equal to the modulus (not counting > >+ * 'hi' or the carry). > >+ */ > >+ cc = 0; > >+ tb = 1; > >+ for (u = 1; u <= mlen; u ++) { > >+ uint32_t mw, zw, xw, nxw; > >+ uint64_t zl; > >+ > >+ mw = m[u]; > >+ zl = MUL(mw, q) + cc; > >+ cc = (uint32_t)(zl >> 32); > >+ zw = (uint32_t)zl; > >+ xw = x[u]; > >+ nxw = xw - zw; > >+ cc += (uint64_t)GT(nxw, xw); > >+ x[u] = nxw; > >+ tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); > >+ } > >+ > >+ /* > >+ * If we underestimated q, then either cc < hi (one extra bit > >+ * beyond the top array word), or cc == hi and tb is true (no > >+ * extra bit, but the result is not lower than the modulus). In > >+ * these cases we must subtract m once. > >+ * > >+ * Otherwise, we may have overestimated, which will show as > >+ * cc > hi (thus a negative result). Correction is adding m once. > >+ */ > >+ chf = (uint32_t)(cc >> 32); > >+ clow = (uint32_t)cc; > >+ over = chf | GT(clow, hi); > >+ under = ~over & (tb | (~chf & LT(clow, hi))); > >+ br_i32_add(x, m, over); > >+ br_i32_sub(x, m, under); > >+} > >+ > >+/* from BearSSL's src/int/i32_reduce.c */ > >+ > >+/* > >+ * Reduce an integer (a[]) modulo another (m[]). The result is written > >+ * in x[] and its announced bit length is set to be equal to that of m[]. > >+ * > >+ * x[] MUST be distinct from a[] and m[]. > >+ * > >+ * CT: only announced bit lengths leak, not values of x, a or m. > >+ */ > >+static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m) > >+{ > >+ uint32_t m_bitlen, a_bitlen; > >+ size_t mlen, alen, u; > >+ > >+ m_bitlen = m[0]; > >+ mlen = (m_bitlen + 31) >> 5; > >+ > >+ x[0] = m_bitlen; > >+ if (m_bitlen == 0) { > >+ return; > >+ } > >+ > >+ /* > >+ * If the source is shorter, then simply copy all words from a[] > >+ * and zero out the upper words. > >+ */ > >+ a_bitlen = a[0]; > >+ alen = (a_bitlen + 31) >> 5; > >+ if (a_bitlen < m_bitlen) { > >+ memcpy(x + 1, a + 1, alen * sizeof *a); > >+ for (u = alen; u < mlen; u ++) { > >+ x[u + 1] = 0; > >+ } > >+ return; > >+ } > >+ > >+ /* > >+ * The source length is at least equal to that of the modulus. > >+ * We must thus copy N-1 words, and input the remaining words > >+ * one by one. > >+ */ > >+ memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); > >+ x[mlen] = 0; > >+ for (u = 1 + alen - mlen; u > 0; u --) { > >+ br_i32_muladd_small(x, a[u], m); > >+ } > >+} > >+ > >+/** > >+ * rsa_free_key_prop() - Free key properties > >+ * @prop: Pointer to struct key_prop > >+ * > >+ * This function frees all the memories allocated by rsa_gen_key_prop(). > >+ */ > >+void rsa_free_key_prop(struct key_prop *prop) > >+{ > >+ if (!prop) > >+ return; > >+ > >+ free((void *)prop->modulus); > >+ free((void *)prop->public_exponent); > >+ free((void *)prop->rr); > >+ > >+ free(prop); > >+} > >+ > >+/** > >+ * rsa_gen_key_prop() - Generate key properties of RSA public key > >+ * @key: Specifies key data in DER format > >+ * @keylen: Length of @key > >+ * @prop: Generated key property > >+ * > >+ * This function takes a blob of encoded RSA public key data in DER > >+ * format, parse it and generate all the relevant properties > >+ * in key_prop structure. > >+ * Return a pointer to struct key_prop in @prop on success. > >+ * > >+ * Return: 0 on success, negative on error > >+ */ > >+int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop) > >+{ > >+ struct rsa_key rsa_key; > >+ uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL; > >+ const int max_rsa_size = 4096; > >+ int rlen, i, ret; > >+ > >+ *prop = calloc(sizeof(**prop), 1); > >+ n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); > >+ rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); > >+ rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); > >+ if (!(*prop) || !n || !rr || !rrtmp) { > >+ ret = -ENOMEM; > >+ goto err; > >+ } > >+ > >+ ret = rsa_parse_pub_key(&rsa_key, key, keylen); > >+ if (ret) > >+ goto err; > >+ > >+ /* modulus */ > >+ /* removing leading 0's */ > >+ for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++) > >+ ; > >+ (*prop)->num_bits = (rsa_key.n_sz - i) * 8; > >+ (*prop)->modulus = malloc(rsa_key.n_sz - i); > >+ if (!(*prop)->modulus) { > >+ ret = -ENOMEM; > >+ goto err; > >+ } > >+ memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i); > >+ > >+ /* exponent */ > >+ (*prop)->public_exponent = calloc(1, sizeof(uint64_t)); > >+ if (!(*prop)->public_exponent) { > >+ ret = -ENOMEM; > >+ goto err; > >+ } > >+ memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t) > >+ - rsa_key.e_sz, > >+ rsa_key.e, rsa_key.e_sz); > >+ (*prop)->exp_len = rsa_key.e_sz; > >+ > >+ /* n0 inverse */ > >+ br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i); > >+ (*prop)->n0inv = br_i32_ninv32(n[1]); > >+ > >+ /* R^2 mod n; R = 2^(num_bits) */ > >+ rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */ > >+ rr[0] = 0; > >+ *(uint8_t *)&rr[0] = (1 << (rlen % 8)); > >+ for (i = 1; i < (((rlen + 31) >> 5) + 1); i++) > >+ rr[i] = 0; > >+ br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1); > >+ br_i32_reduce(rr, rrtmp, n); > >+ > >+ rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */ > >+ (*prop)->rr = malloc(rlen); > >+ if (!(*prop)->rr) { > >+ ret = -ENOMEM; > >+ goto err; > >+ } > >+ br_i32_encode((void *)(*prop)->rr, rlen, rr); > >+ > >+ return 0; > >+ > >+err: > >+ free(n); > >+ free(rr); > >+ free(rrtmp); > >+ rsa_free_key_prop(*prop); > >+ return ret; > >+} > >
diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h index 8a428c4b6a1a..1da8af1bb83d 100644 --- a/include/u-boot/rsa-mod-exp.h +++ b/include/u-boot/rsa-mod-exp.h @@ -26,6 +26,29 @@ struct key_prop { uint32_t exp_len; /* Exponent length in number of uint8_t */ }; +/** + * rsa_gen_key_prop() - Generate key properties of RSA public key + * @key: Specifies key data in DER format + * @keylen: Length of @key + * @prop: Generated key property + * + * This function takes a blob of encoded RSA public key data in DER + * format, parse it and generate all the relevant properties + * in key_prop structure. + * Return a pointer to struct key_prop in @prop on success. + * + * Return: 0 on success, negative on error + */ +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **proc); + +/** + * rsa_free_key_prop() - Free key properties + * @prop: Pointer to struct key_prop + * + * This function frees all the memories allocated by rsa_gen_key_prop(). + */ +void rsa_free_key_prop(struct key_prop *prop); + /** * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw * diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig index 71e4c06bf883..d1d6f6cf64a3 100644 --- a/lib/rsa/Kconfig +++ b/lib/rsa/Kconfig @@ -33,6 +33,7 @@ config RSA_VERIFY config RSA_VERIFY_WITH_PKEY bool "Execute RSA verification without key parameters from FDT" depends on RSA + imply RSA_PUBLIC_KEY_PARSER help The standard RSA-signature verification code (FIT_SIGNATURE) uses pre-calculated key properties, that are stored in fdt blob, in diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile index c07305188e0c..14ed3cb4012b 100644 --- a/lib/rsa/Makefile +++ b/lib/rsa/Makefile @@ -6,4 +6,5 @@ # Wolfgang Denk, DENX Software Engineering, wd@denx.de. obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o +obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c new file mode 100644 index 000000000000..9464df009343 --- /dev/null +++ b/lib/rsa/rsa-keyprop.c @@ -0,0 +1,725 @@ +// SPDX-License-Identifier: GPL-2.0+ and MIT +/* + * RSA library - generate parameters for a public key + * + * Copyright (c) 2019 Linaro Limited + * Author: AKASHI Takahiro + * + * Big number routines in this file come from BearSSL: + * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org> + */ + +#include <common.h> +#include <image.h> +#include <malloc.h> +#include <asm/byteorder.h> +#include <crypto/internal/rsa.h> +#include <u-boot/rsa-mod-exp.h> + +/** + * br_dec16be() - Convert 16-bit big-endian integer to native + * @src: Pointer to data + * Return: Native-endian integer + */ +static unsigned br_dec16be(const void *src) +{ + return be16_to_cpup(src); +} + +/** + * br_dec32be() - Convert 32-bit big-endian integer to native + * @src: Pointer to data + * Return: Native-endian integer + */ +static uint32_t br_dec32be(const void *src) +{ + return be32_to_cpup(src); +} + +/** + * br_enc32be() - Convert native 32-bit integer to big-endian + * @dst: Pointer to buffer to store big-endian integer in + * @x: Native 32-bit integer + */ +static void br_enc32be(void *dst, uint32_t x) +{ + __be32 tmp; + + tmp = cpu_to_be32(x); + memcpy(dst, &tmp, sizeof(tmp)); +} + +/* from BearSSL's src/inner.h */ + +/* + * Negate a boolean. + */ +static uint32_t NOT(uint32_t ctl) +{ + return ctl ^ 1; +} + +/* + * Multiplexer: returns x if ctl == 1, y if ctl == 0. + */ +static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y) +{ + return y ^ (-ctl & (x ^ y)); +} + +/* + * Equality check: returns 1 if x == y, 0 otherwise. + */ +static uint32_t EQ(uint32_t x, uint32_t y) +{ + uint32_t q; + + q = x ^ y; + return NOT((q | -q) >> 31); +} + +/* + * Inequality check: returns 1 if x != y, 0 otherwise. + */ +static uint32_t NEQ(uint32_t x, uint32_t y) +{ + uint32_t q; + + q = x ^ y; + return (q | -q) >> 31; +} + +/* + * Comparison: returns 1 if x > y, 0 otherwise. + */ +static uint32_t GT(uint32_t x, uint32_t y) +{ + /* + * If both x < 2^31 and y < 2^31, then y-x will have its high + * bit set if x > y, cleared otherwise. + * + * If either x >= 2^31 or y >= 2^31 (but not both), then the + * result is the high bit of x. + * + * If both x >= 2^31 and y >= 2^31, then we can virtually + * subtract 2^31 from both, and we are back to the first case. + * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already + * fine. + */ + uint32_t z; + + z = y - x; + return (z ^ ((x ^ y) & (x ^ z))) >> 31; +} + +/* + * Compute the bit length of a 32-bit integer. Returned value is between 0 + * and 32 (inclusive). + */ +static uint32_t BIT_LENGTH(uint32_t x) +{ + uint32_t k, c; + + k = NEQ(x, 0); + c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4; + c = GT(x, 0x00FF); x = MUX(c, x >> 8, x); k += c << 3; + c = GT(x, 0x000F); x = MUX(c, x >> 4, x); k += c << 2; + c = GT(x, 0x0003); x = MUX(c, x >> 2, x); k += c << 1; + k += GT(x, 0x0001); + return k; +} + +#define GE(x, y) NOT(GT(y, x)) +#define LT(x, y) GT(y, x) +#define MUL(x, y) ((uint64_t)(x) * (uint64_t)(y)) + +/* + * Integers 'i32' + * -------------- + * + * The 'i32' functions implement computations on big integers using + * an internal representation as an array of 32-bit integers. For + * an array x[]: + * -- x[0] contains the "announced bit length" of the integer + * -- x[1], x[2]... contain the value in little-endian order (x[1] + * contains the least significant 32 bits) + * + * Multiplications rely on the elementary 32x32->64 multiplication. + * + * The announced bit length specifies the number of bits that are + * significant in the subsequent 32-bit words. Unused bits in the + * last (most significant) word are set to 0; subsequent words are + * uninitialized and need not exist at all. + * + * The execution time and memory access patterns of all computations + * depend on the announced bit length, but not on the actual word + * values. For modular integers, the announced bit length of any integer + * modulo n is equal to the actual bit length of n; thus, computations + * on modular integers are "constant-time" (only the modulus length may + * leak). + */ + +/* + * Extract one word from an integer. The offset is counted in bits. + * The word MUST entirely fit within the word elements corresponding + * to the announced bit length of a[]. + */ +static uint32_t br_i32_word(const uint32_t *a, uint32_t off) +{ + size_t u; + unsigned j; + + u = (size_t)(off >> 5) + 1; + j = (unsigned)off & 31; + if (j == 0) { + return a[u]; + } else { + return (a[u] >> j) | (a[u + 1] << (32 - j)); + } +} + +/* from BearSSL's src/int/i32_bitlen.c */ + +/* + * Compute the actual bit length of an integer. The argument x should + * point to the first (least significant) value word of the integer. + * The len 'xlen' contains the number of 32-bit words to access. + * + * CT: value or length of x does not leak. + */ +static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen) +{ + uint32_t tw, twk; + + tw = 0; + twk = 0; + while (xlen -- > 0) { + uint32_t w, c; + + c = EQ(tw, 0); + w = x[xlen]; + tw = MUX(c, w, tw); + twk = MUX(c, (uint32_t)xlen, twk); + } + return (twk << 5) + BIT_LENGTH(tw); +} + +/* from BearSSL's src/int/i32_decode.c */ + +/* + * Decode an integer from its big-endian unsigned representation. The + * "true" bit length of the integer is computed, but all words of x[] + * corresponding to the full 'len' bytes of the source are set. + * + * CT: value or length of x does not leak. + */ +static void br_i32_decode(uint32_t *x, const void *src, size_t len) +{ + const unsigned char *buf; + size_t u, v; + + buf = src; + u = len; + v = 1; + for (;;) { + if (u < 4) { + uint32_t w; + + if (u < 2) { + if (u == 0) { + break; + } else { + w = buf[0]; + } + } else { + if (u == 2) { + w = br_dec16be(buf); + } else { + w = ((uint32_t)buf[0] << 16) + | br_dec16be(buf + 1); + } + } + x[v ++] = w; + break; + } else { + u -= 4; + x[v ++] = br_dec32be(buf + u); + } + } + x[0] = br_i32_bit_length(x + 1, v - 1); +} + +/* from BearSSL's src/int/i32_encode.c */ + +/* + * Encode an integer into its big-endian unsigned representation. The + * output length in bytes is provided (parameter 'len'); if the length + * is too short then the integer is appropriately truncated; if it is + * too long then the extra bytes are set to 0. + */ +static void br_i32_encode(void *dst, size_t len, const uint32_t *x) +{ + unsigned char *buf; + size_t k; + + buf = dst; + + /* + * Compute the announced size of x in bytes; extra bytes are + * filled with zeros. + */ + k = (x[0] + 7) >> 3; + while (len > k) { + *buf ++ = 0; + len --; + } + + /* + * Now we use k as index within x[]. That index starts at 1; + * we initialize it to the topmost complete word, and process + * any remaining incomplete word. + */ + k = (len + 3) >> 2; + switch (len & 3) { + case 3: + *buf ++ = x[k] >> 16; + /* fall through */ + case 2: + *buf ++ = x[k] >> 8; + /* fall through */ + case 1: + *buf ++ = x[k]; + k --; + } + + /* + * Encode all complete words. + */ + while (k > 0) { + br_enc32be(buf, x[k]); + k --; + buf += 4; + } +} + +/* from BearSSL's src/int/i32_ninv32.c */ + +/* + * Compute -(1/x) mod 2^32. If x is even, then this function returns 0. + */ +static uint32_t br_i32_ninv32(uint32_t x) +{ + uint32_t y; + + y = 2 - x; + y *= 2 - y * x; + y *= 2 - y * x; + y *= 2 - y * x; + y *= 2 - y * x; + return MUX(x & 1, -y, 0); +} + +/* from BearSSL's src/int/i32_add.c */ + +/* + * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[] + * is unmodified, but the carry is still computed and returned. The + * arrays a[] and b[] MUST have the same announced bit length. + * + * a[] and b[] MAY be the same array, but partial overlap is not allowed. + */ +static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 63) >> 5; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw + bw + cc; + + /* + * Carry is 1 if naw < aw. Carry is also 1 if naw == aw + * AND the carry was already 1. + */ + cc = (cc & EQ(naw, aw)) | LT(naw, aw); + a[u] = MUX(ctl, naw, aw); + } + return cc; +} + +/* from BearSSL's src/int/i32_sub.c */ + +/* + * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0, + * then a[] is unmodified, but the carry is still computed and returned. + * The arrays a[] and b[] MUST have the same announced bit length. + * + * a[] and b[] MAY be the same array, but partial overlap is not allowed. + */ +static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl) +{ + uint32_t cc; + size_t u, m; + + cc = 0; + m = (a[0] + 63) >> 5; + for (u = 1; u < m; u ++) { + uint32_t aw, bw, naw; + + aw = a[u]; + bw = b[u]; + naw = aw - bw - cc; + + /* + * Carry is 1 if naw > aw. Carry is 1 also if naw == aw + * AND the carry was already 1. + */ + cc = (cc & EQ(naw, aw)) | GT(naw, aw); + a[u] = MUX(ctl, naw, aw); + } + return cc; +} + +/* from BearSSL's src/int/i32_div32.c */ + +/* + * Constant-time division. The dividend hi:lo is divided by the + * divisor d; the quotient is returned and the remainder is written + * in *r. If hi == d, then the quotient does not fit on 32 bits; + * returned value is thus truncated. If hi > d, returned values are + * indeterminate. + */ +static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r) +{ + /* TODO: optimize this */ + uint32_t q; + uint32_t ch, cf; + int k; + + q = 0; + ch = EQ(hi, d); + hi = MUX(ch, 0, hi); + for (k = 31; k > 0; k --) { + int j; + uint32_t w, ctl, hi2, lo2; + + j = 32 - k; + w = (hi << j) | (lo >> k); + ctl = GE(w, d) | (hi >> k); + hi2 = (w - d) >> j; + lo2 = lo - (d << k); + hi = MUX(ctl, hi2, hi); + lo = MUX(ctl, lo2, lo); + q |= ctl << k; + } + cf = GE(lo, d) | hi; + q |= cf; + *r = MUX(cf, lo - d, lo); + return q; +} + +/* + * Wrapper for br_divrem(); the remainder is returned, and the quotient + * is discarded. + */ +static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d) +{ + uint32_t r; + + br_divrem(hi, lo, d, &r); + return r; +} + +/* + * Wrapper for br_divrem(); the quotient is returned, and the remainder + * is discarded. + */ +static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d) +{ + uint32_t r; + + return br_divrem(hi, lo, d, &r); +} + +/* from BearSSL's src/int/i32_muladd.c */ + +/* + * Multiply x[] by 2^32 and then add integer z, modulo m[]. This + * function assumes that x[] and m[] have the same announced bit + * length, and the announced bit length of m[] matches its true + * bit length. + * + * x[] and m[] MUST be distinct arrays. + * + * CT: only the common announced bit length of x and m leaks, not + * the values of x, z or m. + */ +static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m) +{ + uint32_t m_bitlen; + size_t u, mlen; + uint32_t a0, a1, b0, hi, g, q, tb; + uint32_t chf, clow, under, over; + uint64_t cc; + + /* + * We can test on the modulus bit length since we accept to + * leak that length. + */ + m_bitlen = m[0]; + if (m_bitlen == 0) { + return; + } + if (m_bitlen <= 32) { + x[1] = br_rem(x[1], z, m[1]); + return; + } + mlen = (m_bitlen + 31) >> 5; + + /* + * Principle: we estimate the quotient (x*2^32+z)/m by + * doing a 64/32 division with the high words. + * + * Let: + * w = 2^32 + * a = (w*a0 + a1) * w^N + a2 + * b = b0 * w^N + b2 + * such that: + * 0 <= a0 < w + * 0 <= a1 < w + * 0 <= a2 < w^N + * w/2 <= b0 < w + * 0 <= b2 < w^N + * a < w*b + * I.e. the two top words of a are a0:a1, the top word of b is + * b0, we ensured that b0 is "full" (high bit set), and a is + * such that the quotient q = a/b fits on one word (0 <= q < w). + * + * If a = b*q + r (with 0 <= r < q), we can estimate q by + * doing an Euclidean division on the top words: + * a0*w+a1 = b0*u + v (with 0 <= v < w) + * Then the following holds: + * 0 <= u <= w + * u-2 <= q <= u + */ + a0 = br_i32_word(x, m_bitlen - 32); + hi = x[mlen]; + memmove(x + 2, x + 1, (mlen - 1) * sizeof *x); + x[1] = z; + a1 = br_i32_word(x, m_bitlen - 32); + b0 = br_i32_word(m, m_bitlen - 32); + + /* + * We estimate a divisor q. If the quotient returned by br_div() + * is g: + * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF. + * -- Otherwise: + * -- if g == 0 then we set q = 0; + * -- otherwise, we set q = g - 1. + * The properties described above then ensure that the true + * quotient is q-1, q or q+1. + */ + g = br_div(a0, a1, b0); + q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1)); + + /* + * We subtract q*m from x (with the extra high word of value 'hi'). + * Since q may be off by 1 (in either direction), we may have to + * add or subtract m afterwards. + * + * The 'tb' flag will be true (1) at the end of the loop if the + * result is greater than or equal to the modulus (not counting + * 'hi' or the carry). + */ + cc = 0; + tb = 1; + for (u = 1; u <= mlen; u ++) { + uint32_t mw, zw, xw, nxw; + uint64_t zl; + + mw = m[u]; + zl = MUL(mw, q) + cc; + cc = (uint32_t)(zl >> 32); + zw = (uint32_t)zl; + xw = x[u]; + nxw = xw - zw; + cc += (uint64_t)GT(nxw, xw); + x[u] = nxw; + tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw)); + } + + /* + * If we underestimated q, then either cc < hi (one extra bit + * beyond the top array word), or cc == hi and tb is true (no + * extra bit, but the result is not lower than the modulus). In + * these cases we must subtract m once. + * + * Otherwise, we may have overestimated, which will show as + * cc > hi (thus a negative result). Correction is adding m once. + */ + chf = (uint32_t)(cc >> 32); + clow = (uint32_t)cc; + over = chf | GT(clow, hi); + under = ~over & (tb | (~chf & LT(clow, hi))); + br_i32_add(x, m, over); + br_i32_sub(x, m, under); +} + +/* from BearSSL's src/int/i32_reduce.c */ + +/* + * Reduce an integer (a[]) modulo another (m[]). The result is written + * in x[] and its announced bit length is set to be equal to that of m[]. + * + * x[] MUST be distinct from a[] and m[]. + * + * CT: only announced bit lengths leak, not values of x, a or m. + */ +static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m) +{ + uint32_t m_bitlen, a_bitlen; + size_t mlen, alen, u; + + m_bitlen = m[0]; + mlen = (m_bitlen + 31) >> 5; + + x[0] = m_bitlen; + if (m_bitlen == 0) { + return; + } + + /* + * If the source is shorter, then simply copy all words from a[] + * and zero out the upper words. + */ + a_bitlen = a[0]; + alen = (a_bitlen + 31) >> 5; + if (a_bitlen < m_bitlen) { + memcpy(x + 1, a + 1, alen * sizeof *a); + for (u = alen; u < mlen; u ++) { + x[u + 1] = 0; + } + return; + } + + /* + * The source length is at least equal to that of the modulus. + * We must thus copy N-1 words, and input the remaining words + * one by one. + */ + memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a); + x[mlen] = 0; + for (u = 1 + alen - mlen; u > 0; u --) { + br_i32_muladd_small(x, a[u], m); + } +} + +/** + * rsa_free_key_prop() - Free key properties + * @prop: Pointer to struct key_prop + * + * This function frees all the memories allocated by rsa_gen_key_prop(). + */ +void rsa_free_key_prop(struct key_prop *prop) +{ + if (!prop) + return; + + free((void *)prop->modulus); + free((void *)prop->public_exponent); + free((void *)prop->rr); + + free(prop); +} + +/** + * rsa_gen_key_prop() - Generate key properties of RSA public key + * @key: Specifies key data in DER format + * @keylen: Length of @key + * @prop: Generated key property + * + * This function takes a blob of encoded RSA public key data in DER + * format, parse it and generate all the relevant properties + * in key_prop structure. + * Return a pointer to struct key_prop in @prop on success. + * + * Return: 0 on success, negative on error + */ +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop) +{ + struct rsa_key rsa_key; + uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL; + const int max_rsa_size = 4096; + int rlen, i, ret; + + *prop = calloc(sizeof(**prop), 1); + n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); + rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); + rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5)); + if (!(*prop) || !n || !rr || !rrtmp) { + ret = -ENOMEM; + goto err; + } + + ret = rsa_parse_pub_key(&rsa_key, key, keylen); + if (ret) + goto err; + + /* modulus */ + /* removing leading 0's */ + for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++) + ; + (*prop)->num_bits = (rsa_key.n_sz - i) * 8; + (*prop)->modulus = malloc(rsa_key.n_sz - i); + if (!(*prop)->modulus) { + ret = -ENOMEM; + goto err; + } + memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i); + + /* exponent */ + (*prop)->public_exponent = calloc(1, sizeof(uint64_t)); + if (!(*prop)->public_exponent) { + ret = -ENOMEM; + goto err; + } + memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t) + - rsa_key.e_sz, + rsa_key.e, rsa_key.e_sz); + (*prop)->exp_len = rsa_key.e_sz; + + /* n0 inverse */ + br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i); + (*prop)->n0inv = br_i32_ninv32(n[1]); + + /* R^2 mod n; R = 2^(num_bits) */ + rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */ + rr[0] = 0; + *(uint8_t *)&rr[0] = (1 << (rlen % 8)); + for (i = 1; i < (((rlen + 31) >> 5) + 1); i++) + rr[i] = 0; + br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1); + br_i32_reduce(rr, rrtmp, n); + + rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */ + (*prop)->rr = malloc(rlen); + if (!(*prop)->rr) { + ret = -ENOMEM; + goto err; + } + br_i32_encode((void *)(*prop)->rr, rlen, rr); + + return 0; + +err: + free(n); + free(rr); + free(rrtmp); + rsa_free_key_prop(*prop); + return ret; +}
In the current implementation of FIT_SIGNATURE, five parameters for a RSA public key are required while only two of them are essential. (See rsa-mod-exp.h and uImage.FIT/signature.txt) This is a result of considering relatively limited computer power and resources on embedded systems, while such a assumption may not be quite practical for other use cases. In this patch, added is a function, rsa_gen_key_prop(), which will generate additional parameters for other uses, in particular UEFI secure boot, on the fly. Note: the current code uses some "big number" routines from BearSSL for the calculation. Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org> --- include/u-boot/rsa-mod-exp.h | 23 ++ lib/rsa/Kconfig | 1 + lib/rsa/Makefile | 1 + lib/rsa/rsa-keyprop.c | 725 +++++++++++++++++++++++++++++++++++ 4 files changed, 750 insertions(+) create mode 100644 lib/rsa/rsa-keyprop.c