diff mbox series

[U-Boot,v4,4/6] lib: rsa: generate additional parameters for public key

Message ID 20191121001121.21854-5-takahiro.akashi@linaro.org
State Changes Requested
Delegated to: Tom Rini
Headers show
Series rsa: extend rsa_verify() for UEFI secure boot | expand

Commit Message

AKASHI Takahiro Nov. 21, 2019, 12:11 a.m. UTC
In the current implementation of FIT_SIGNATURE, five parameters for
a RSA public key are required while only two of them are essential.
(See rsa-mod-exp.h and uImage.FIT/signature.txt)
This is a result of considering relatively limited computer power
and resources on embedded systems, while such a assumption may not
be quite practical for other use cases.

In this patch, added is a function, rsa_gen_key_prop(), which will
generate additional parameters for other uses, in particular
UEFI secure boot, on the fly.

Note: the current code uses some "big number" routines from BearSSL
for the calculation.

Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 include/u-boot/rsa-mod-exp.h |  23 ++
 lib/rsa/Kconfig              |   1 +
 lib/rsa/Makefile             |   1 +
 lib/rsa/rsa-keyprop.c        | 725 +++++++++++++++++++++++++++++++++++
 4 files changed, 750 insertions(+)
 create mode 100644 lib/rsa/rsa-keyprop.c

Comments

Heinrich Schuchardt Jan. 8, 2020, 6:07 p.m. UTC | #1
On 11/21/19 1:11 AM, AKASHI Takahiro wrote:
> In the current implementation of FIT_SIGNATURE, five parameters for
> a RSA public key are required while only two of them are essential.
> (See rsa-mod-exp.h and uImage.FIT/signature.txt)
> This is a result of considering relatively limited computer power
> and resources on embedded systems, while such a assumption may not
> be quite practical for other use cases.
>
> In this patch, added is a function, rsa_gen_key_prop(), which will
> generate additional parameters for other uses, in particular
> UEFI secure boot, on the fly.
>
> Note: the current code uses some "big number" rouKtines from BearSSL
> for the calculation.
>
> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> ---
>   include/u-boot/rsa-mod-exp.h |  23 ++
>   lib/rsa/Kconfig              |   1 +
>   lib/rsa/Makefile             |   1 +
>   lib/rsa/rsa-keyprop.c        | 725 +++++++++++++++++++++++++++++++++++
>   4 files changed, 750 insertions(+)
>   create mode 100644 lib/rsa/rsa-keyprop.c
>
> diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h
> index 8a428c4b6a1a..1da8af1bb83d 100644
> --- a/include/u-boot/rsa-mod-exp.h
> +++ b/include/u-boot/rsa-mod-exp.h
> @@ -26,6 +26,29 @@ struct key_prop {
>   	uint32_t exp_len;	/* Exponent length in number of uint8_t */
>   };
>
> +/**
> + * rsa_gen_key_prop() - Generate key properties of RSA public key
> + * @key:	Specifies key data in DER format
> + * @keylen:	Length of @key
> + * @prop:	Generated key property
> + *
> + * This function takes a blob of encoded RSA public key data in DER
> + * format, parse it and generate all the relevant properties
> + * in key_prop structure.
> + * Return a pointer to struct key_prop in @prop on success.
> + *
> + * Return:	0 on success, negative on error
> + */
> +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **proc);
> +
> +/**
> + * rsa_free_key_prop() - Free key properties
> + * @prop:	Pointer to struct key_prop
> + *
> + * This function frees all the memories allocated by rsa_gen_key_prop().
> + */
> +void rsa_free_key_prop(struct key_prop *prop);
> +
>   /**
>    * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw
>    *
> diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig
> index 71e4c06bf883..d1d6f6cf64a3 100644
> --- a/lib/rsa/Kconfig
> +++ b/lib/rsa/Kconfig
> @@ -33,6 +33,7 @@ config RSA_VERIFY
>   config RSA_VERIFY_WITH_PKEY
>   	bool "Execute RSA verification without key parameters from FDT"
>   	depends on RSA
> +	imply RSA_PUBLIC_KEY_PARSER

Do we really need RSA_PUBLIC_KEY_PARSER whenever we use CONFIG_RSA=y?
E.g. on a system without the UEFI sub-system?

Otherwise simply let RSA_PUBLIC_KEY_PARSER depend on RSA.

>   	help
>   	  The standard RSA-signature verification code (FIT_SIGNATURE) uses
>   	  pre-calculated key properties, that are stored in fdt blob, in
> diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile
> index c07305188e0c..14ed3cb4012b 100644
> --- a/lib/rsa/Makefile
> +++ b/lib/rsa/Makefile
> @@ -6,4 +6,5 @@
>   # Wolfgang Denk, DENX Software Engineering, wd@denx.de.
>
>   obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o
> +obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o
>   obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o
> diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c
> new file mode 100644
> index 000000000000..9464df009343
> --- /dev/null
> +++ b/lib/rsa/rsa-keyprop.c
> @@ -0,0 +1,725 @@
> +// SPDX-License-Identifier: GPL-2.0+ and MIT
> +/*
> + * RSA library - generate parameters for a public key
> + *
> + * Copyright (c) 2019 Linaro Limited
> + * Author: AKASHI Takahiro
> + *
> + * Big number routines in this file come from BearSSL:
> + * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
> + */
> +
> +#include <common.h>
> +#include <image.h>
> +#include <malloc.h>
> +#include <asm/byteorder.h>
> +#include <crypto/internal/rsa.h>
> +#include <u-boot/rsa-mod-exp.h>
> +
> +/**
> + * br_dec16be() - Convert 16-bit big-endian integer to native
> + * @src:	Pointer to data
> + * Return:	Native-endian integer
> + */
> +static unsigned br_dec16be(const void *src)
> +{
> +	return be16_to_cpup(src);
> +}
> +
> +/**
> + * br_dec32be() - Convert 32-bit big-endian integer to native
> + * @src:	Pointer to data
> + * Return:	Native-endian integer
> + */
> +static uint32_t br_dec32be(const void *src)
> +{
> +	return be32_to_cpup(src);
> +}
> +
> +/**
> + * br_enc32be() - Convert native 32-bit integer to big-endian
> + * @dst:	Pointer to buffer to store big-endian integer in
> + * @x:		Native 32-bit integer
> + */
> +static void br_enc32be(void *dst, uint32_t x)
> +{
> +	__be32 tmp;
> +
> +	tmp = cpu_to_be32(x);
> +	memcpy(dst, &tmp, sizeof(tmp));
> +}
> +
> +/* from BearSSL's src/inner.h */
> +
> +/*
> + * Negate a boolean.
> + */
> +static uint32_t NOT(uint32_t ctl)
> +{
> +	return ctl ^ 1;
> +}
> +
> +/*
> + * Multiplexer: returns x if ctl == 1, y if ctl == 0.
> + */
> +static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y)
> +{
> +	return y ^ (-ctl & (x ^ y));
> +}
> +
> +/*
> + * Equality check: returns 1 if x == y, 0 otherwise.
> + */
> +static uint32_t EQ(uint32_t x, uint32_t y)
> +{
> +	uint32_t q;
> +
> +	q = x ^ y;
> +	return NOT((q | -q) >> 31);
> +}
> +
> +/*
> + * Inequality check: returns 1 if x != y, 0 otherwise.
> + */
> +static uint32_t NEQ(uint32_t x, uint32_t y)
> +{
> +	uint32_t q;
> +
> +	q = x ^ y;
> +	return (q | -q) >> 31;

We want to minimize the code size of U-Boot.

So, please, review this code and remove all of this bogus.

Best regards

Heinrich

> +}
> +
> +/*
> + * Comparison: returns 1 if x > y, 0 otherwise.
> + */
> +static uint32_t GT(uint32_t x, uint32_t y)
> +{
> +	/*
> +	 * If both x < 2^31 and y < 2^31, then y-x will have its high
> +	 * bit set if x > y, cleared otherwise.
> +	 *
> +	 * If either x >= 2^31 or y >= 2^31 (but not both), then the
> +	 * result is the high bit of x.
> +	 *
> +	 * If both x >= 2^31 and y >= 2^31, then we can virtually
> +	 * subtract 2^31 from both, and we are back to the first case.
> +	 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
> +	 * fine.
> +	 */
> +	uint32_t z;
> +
> +	z = y - x;
> +	return (z ^ ((x ^ y) & (x ^ z))) >> 31;
> +}
> +
> +/*
> + * Compute the bit length of a 32-bit integer. Returned value is between 0
> + * and 32 (inclusive).
> + */
> +static uint32_t BIT_LENGTH(uint32_t x)
> +{
> +	uint32_t k, c;
> +
> +	k = NEQ(x, 0);
> +	c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
> +	c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
> +	c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
> +	c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
> +	k += GT(x, 0x0001);
> +	return k;
> +}
> +
> +#define GE(x, y)   NOT(GT(y, x))
> +#define LT(x, y)   GT(y, x)
> +#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
> +
> +/*
> + * Integers 'i32'
> + * --------------
> + *
> + * The 'i32' functions implement computations on big integers using
> + * an internal representation as an array of 32-bit integers. For
> + * an array x[]:
> + *  -- x[0] contains the "announced bit length" of the integer
> + *  -- x[1], x[2]... contain the value in little-endian order (x[1]
> + *     contains the least significant 32 bits)
> + *
> + * Multiplications rely on the elementary 32x32->64 multiplication.
> + *
> + * The announced bit length specifies the number of bits that are
> + * significant in the subsequent 32-bit words. Unused bits in the
> + * last (most significant) word are set to 0; subsequent words are
> + * uninitialized and need not exist at all.
> + *
> + * The execution time and memory access patterns of all computations
> + * depend on the announced bit length, but not on the actual word
> + * values. For modular integers, the announced bit length of any integer
> + * modulo n is equal to the actual bit length of n; thus, computations
> + * on modular integers are "constant-time" (only the modulus length may
> + * leak).
> + */
> +
> +/*
> + * Extract one word from an integer. The offset is counted in bits.
> + * The word MUST entirely fit within the word elements corresponding
> + * to the announced bit length of a[].
> + */
> +static uint32_t br_i32_word(const uint32_t *a, uint32_t off)
> +{
> +	size_t u;
> +	unsigned j;
> +
> +	u = (size_t)(off >> 5) + 1;
> +	j = (unsigned)off & 31;
> +	if (j == 0) {
> +		return a[u];
> +	} else {
> +		return (a[u] >> j) | (a[u + 1] << (32 - j));
> +	}
> +}
> +
> +/* from BearSSL's src/int/i32_bitlen.c */
> +
> +/*
> + * Compute the actual bit length of an integer. The argument x should
> + * point to the first (least significant) value word of the integer.
> + * The len 'xlen' contains the number of 32-bit words to access.
> + *
> + * CT: value or length of x does not leak.
> + */
> +static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen)
> +{
> +	uint32_t tw, twk;
> +
> +	tw = 0;
> +	twk = 0;
> +	while (xlen -- > 0) {
> +		uint32_t w, c;
> +
> +		c = EQ(tw, 0);
> +		w = x[xlen];
> +		tw = MUX(c, w, tw);
> +		twk = MUX(c, (uint32_t)xlen, twk);
> +	}
> +	return (twk << 5) + BIT_LENGTH(tw);
> +}
> +
> +/* from BearSSL's src/int/i32_decode.c */
> +
> +/*
> + * Decode an integer from its big-endian unsigned representation. The
> + * "true" bit length of the integer is computed, but all words of x[]
> + * corresponding to the full 'len' bytes of the source are set.
> + *
> + * CT: value or length of x does not leak.
> + */
> +static void br_i32_decode(uint32_t *x, const void *src, size_t len)
> +{
> +	const unsigned char *buf;
> +	size_t u, v;
> +
> +	buf = src;
> +	u = len;
> +	v = 1;
> +	for (;;) {
> +		if (u < 4) {
> +			uint32_t w;
> +
> +			if (u < 2) {
> +				if (u == 0) {
> +					break;
> +				} else {
> +					w = buf[0];
> +				}
> +			} else {
> +				if (u == 2) {
> +					w = br_dec16be(buf);
> +				} else {
> +					w = ((uint32_t)buf[0] << 16)
> +						| br_dec16be(buf + 1);
> +				}
> +			}
> +			x[v ++] = w;
> +			break;
> +		} else {
> +			u -= 4;
> +			x[v ++] = br_dec32be(buf + u);
> +		}
> +	}
> +	x[0] = br_i32_bit_length(x + 1, v - 1);
> +}
> +
> +/* from BearSSL's src/int/i32_encode.c */
> +
> +/*
> + * Encode an integer into its big-endian unsigned representation. The
> + * output length in bytes is provided (parameter 'len'); if the length
> + * is too short then the integer is appropriately truncated; if it is
> + * too long then the extra bytes are set to 0.
> + */
> +static void br_i32_encode(void *dst, size_t len, const uint32_t *x)
> +{
> +	unsigned char *buf;
> +	size_t k;
> +
> +	buf = dst;
> +
> +	/*
> +	 * Compute the announced size of x in bytes; extra bytes are
> +	 * filled with zeros.
> +	 */
> +	k = (x[0] + 7) >> 3;
> +	while (len > k) {
> +		*buf ++ = 0;
> +		len --;
> +	}
> +
> +	/*
> +	 * Now we use k as index within x[]. That index starts at 1;
> +	 * we initialize it to the topmost complete word, and process
> +	 * any remaining incomplete word.
> +	 */
> +	k = (len + 3) >> 2;
> +	switch (len & 3) {
> +	case 3:
> +		*buf ++ = x[k] >> 16;
> +		/* fall through */
> +	case 2:
> +		*buf ++ = x[k] >> 8;
> +		/* fall through */
> +	case 1:
> +		*buf ++ = x[k];
> +		k --;
> +	}
> +
> +	/*
> +	 * Encode all complete words.
> +	 */
> +	while (k > 0) {
> +		br_enc32be(buf, x[k]);
> +		k --;
> +		buf += 4;
> +	}
> +}
> +
> +/* from BearSSL's src/int/i32_ninv32.c */
> +
> +/*
> + * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
> + */
> +static uint32_t br_i32_ninv32(uint32_t x)
> +{
> +	uint32_t y;
> +
> +	y = 2 - x;
> +	y *= 2 - y * x;
> +	y *= 2 - y * x;
> +	y *= 2 - y * x;
> +	y *= 2 - y * x;
> +	return MUX(x & 1, -y, 0);
> +}
> +
> +/* from BearSSL's src/int/i32_add.c */
> +
> +/*
> + * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
> + * is unmodified, but the carry is still computed and returned. The
> + * arrays a[] and b[] MUST have the same announced bit length.
> + *
> + * a[] and b[] MAY be the same array, but partial overlap is not allowed.
> + */
> +static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
> +{
> +	uint32_t cc;
> +	size_t u, m;
> +
> +	cc = 0;
> +	m = (a[0] + 63) >> 5;
> +	for (u = 1; u < m; u ++) {
> +		uint32_t aw, bw, naw;
> +
> +		aw = a[u];
> +		bw = b[u];
> +		naw = aw + bw + cc;
> +
> +		/*
> +		 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
> +		 * AND the carry was already 1.
> +		 */
> +		cc = (cc & EQ(naw, aw)) | LT(naw, aw);
> +		a[u] = MUX(ctl, naw, aw);
> +	}
> +	return cc;
> +}
> +
> +/* from BearSSL's src/int/i32_sub.c */
> +
> +/*
> + * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
> + * then a[] is unmodified, but the carry is still computed and returned.
> + * The arrays a[] and b[] MUST have the same announced bit length.
> + *
> + * a[] and b[] MAY be the same array, but partial overlap is not allowed.
> + */
> +static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
> +{
> +	uint32_t cc;
> +	size_t u, m;
> +
> +	cc = 0;
> +	m = (a[0] + 63) >> 5;
> +	for (u = 1; u < m; u ++) {
> +		uint32_t aw, bw, naw;
> +
> +		aw = a[u];
> +		bw = b[u];
> +		naw = aw - bw - cc;
> +
> +		/*
> +		 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
> +		 * AND the carry was already 1.
> +		 */
> +		cc = (cc & EQ(naw, aw)) | GT(naw, aw);
> +		a[u] = MUX(ctl, naw, aw);
> +	}
> +	return cc;
> +}
> +
> +/* from BearSSL's src/int/i32_div32.c */
> +
> +/*
> + * Constant-time division. The dividend hi:lo is divided by the
> + * divisor d; the quotient is returned and the remainder is written
> + * in *r. If hi == d, then the quotient does not fit on 32 bits;
> + * returned value is thus truncated. If hi > d, returned values are
> + * indeterminate.
> + */
> +static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
> +{
> +	/* TODO: optimize this */
> +	uint32_t q;
> +	uint32_t ch, cf;
> +	int k;
> +
> +	q = 0;
> +	ch = EQ(hi, d);
> +	hi = MUX(ch, 0, hi);
> +	for (k = 31; k > 0; k --) {
> +		int j;
> +		uint32_t w, ctl, hi2, lo2;
> +
> +		j = 32 - k;
> +		w = (hi << j) | (lo >> k);
> +		ctl = GE(w, d) | (hi >> k);
> +		hi2 = (w - d) >> j;
> +		lo2 = lo - (d << k);
> +		hi = MUX(ctl, hi2, hi);
> +		lo = MUX(ctl, lo2, lo);
> +		q |= ctl << k;
> +	}
> +	cf = GE(lo, d) | hi;
> +	q |= cf;
> +	*r = MUX(cf, lo - d, lo);
> +	return q;
> +}
> +
> +/*
> + * Wrapper for br_divrem(); the remainder is returned, and the quotient
> + * is discarded.
> + */
> +static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d)
> +{
> +	uint32_t r;
> +
> +	br_divrem(hi, lo, d, &r);
> +	return r;
> +}
> +
> +/*
> + * Wrapper for br_divrem(); the quotient is returned, and the remainder
> + * is discarded.
> + */
> +static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d)
> +{
> +	uint32_t r;
> +
> +	return br_divrem(hi, lo, d, &r);
> +}
> +
> +/* from BearSSL's src/int/i32_muladd.c */
> +
> +/*
> + * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
> + * function assumes that x[] and m[] have the same announced bit
> + * length, and the announced bit length of m[] matches its true
> + * bit length.
> + *
> + * x[] and m[] MUST be distinct arrays.
> + *
> + * CT: only the common announced bit length of x and m leaks, not
> + * the values of x, z or m.
> + */
> +static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
> +{
> +	uint32_t m_bitlen;
> +	size_t u, mlen;
> +	uint32_t a0, a1, b0, hi, g, q, tb;
> +	uint32_t chf, clow, under, over;
> +	uint64_t cc;
> +
> +	/*
> +	 * We can test on the modulus bit length since we accept to
> +	 * leak that length.
> +	 */
> +	m_bitlen = m[0];
> +	if (m_bitlen == 0) {
> +		return;
> +	}
> +	if (m_bitlen <= 32) {
> +		x[1] = br_rem(x[1], z, m[1]);
> +		return;
> +	}
> +	mlen = (m_bitlen + 31) >> 5;
> +
> +	/*
> +	 * Principle: we estimate the quotient (x*2^32+z)/m by
> +	 * doing a 64/32 division with the high words.
> +	 *
> +	 * Let:
> +	 *   w = 2^32
> +	 *   a = (w*a0 + a1) * w^N + a2
> +	 *   b = b0 * w^N + b2
> +	 * such that:
> +	 *   0 <= a0 < w
> +	 *   0 <= a1 < w
> +	 *   0 <= a2 < w^N
> +	 *   w/2 <= b0 < w
> +	 *   0 <= b2 < w^N
> +	 *   a < w*b
> +	 * I.e. the two top words of a are a0:a1, the top word of b is
> +	 * b0, we ensured that b0 is "full" (high bit set), and a is
> +	 * such that the quotient q = a/b fits on one word (0 <= q < w).
> +	 *
> +	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
> +	 * doing an Euclidean division on the top words:
> +	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
> +	 * Then the following holds:
> +	 *   0 <= u <= w
> +	 *   u-2 <= q <= u
> +	 */
> +	a0 = br_i32_word(x, m_bitlen - 32);
> +	hi = x[mlen];
> +	memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
> +	x[1] = z;
> +	a1 = br_i32_word(x, m_bitlen - 32);
> +	b0 = br_i32_word(m, m_bitlen - 32);
> +
> +	/*
> +	 * We estimate a divisor q. If the quotient returned by br_div()
> +	 * is g:
> +	 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
> +	 * -- Otherwise:
> +	 *    -- if g == 0 then we set q = 0;
> +	 *    -- otherwise, we set q = g - 1.
> +	 * The properties described above then ensure that the true
> +	 * quotient is q-1, q or q+1.
> +	 */
> +	g = br_div(a0, a1, b0);
> +	q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
> +
> +	/*
> +	 * We subtract q*m from x (with the extra high word of value 'hi').
> +	 * Since q may be off by 1 (in either direction), we may have to
> +	 * add or subtract m afterwards.
> +	 *
> +	 * The 'tb' flag will be true (1) at the end of the loop if the
> +	 * result is greater than or equal to the modulus (not counting
> +	 * 'hi' or the carry).
> +	 */
> +	cc = 0;
> +	tb = 1;
> +	for (u = 1; u <= mlen; u ++) {
> +		uint32_t mw, zw, xw, nxw;
> +		uint64_t zl;
> +
> +		mw = m[u];
> +		zl = MUL(mw, q) + cc;
> +		cc = (uint32_t)(zl >> 32);
> +		zw = (uint32_t)zl;
> +		xw = x[u];
> +		nxw = xw - zw;
> +		cc += (uint64_t)GT(nxw, xw);
> +		x[u] = nxw;
> +		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
> +	}
> +
> +	/*
> +	 * If we underestimated q, then either cc < hi (one extra bit
> +	 * beyond the top array word), or cc == hi and tb is true (no
> +	 * extra bit, but the result is not lower than the modulus). In
> +	 * these cases we must subtract m once.
> +	 *
> +	 * Otherwise, we may have overestimated, which will show as
> +	 * cc > hi (thus a negative result). Correction is adding m once.
> +	 */
> +	chf = (uint32_t)(cc >> 32);
> +	clow = (uint32_t)cc;
> +	over = chf | GT(clow, hi);
> +	under = ~over & (tb | (~chf & LT(clow, hi)));
> +	br_i32_add(x, m, over);
> +	br_i32_sub(x, m, under);
> +}
> +
> +/* from BearSSL's src/int/i32_reduce.c */
> +
> +/*
> + * Reduce an integer (a[]) modulo another (m[]). The result is written
> + * in x[] and its announced bit length is set to be equal to that of m[].
> + *
> + * x[] MUST be distinct from a[] and m[].
> + *
> + * CT: only announced bit lengths leak, not values of x, a or m.
> + */
> +static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
> +{
> +	uint32_t m_bitlen, a_bitlen;
> +	size_t mlen, alen, u;
> +
> +	m_bitlen = m[0];
> +	mlen = (m_bitlen + 31) >> 5;
> +
> +	x[0] = m_bitlen;
> +	if (m_bitlen == 0) {
> +		return;
> +	}
> +
> +	/*
> +	 * If the source is shorter, then simply copy all words from a[]
> +	 * and zero out the upper words.
> +	 */
> +	a_bitlen = a[0];
> +	alen = (a_bitlen + 31) >> 5;
> +	if (a_bitlen < m_bitlen) {
> +		memcpy(x + 1, a + 1, alen * sizeof *a);
> +		for (u = alen; u < mlen; u ++) {
> +			x[u + 1] = 0;
> +		}
> +		return;
> +	}
> +
> +	/*
> +	 * The source length is at least equal to that of the modulus.
> +	 * We must thus copy N-1 words, and input the remaining words
> +	 * one by one.
> +	 */
> +	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
> +	x[mlen] = 0;
> +	for (u = 1 + alen - mlen; u > 0; u --) {
> +		br_i32_muladd_small(x, a[u], m);
> +	}
> +}
> +
> +/**
> + * rsa_free_key_prop() - Free key properties
> + * @prop:	Pointer to struct key_prop
> + *
> + * This function frees all the memories allocated by rsa_gen_key_prop().
> + */
> +void rsa_free_key_prop(struct key_prop *prop)
> +{
> +	if (!prop)
> +		return;
> +
> +	free((void *)prop->modulus);
> +	free((void *)prop->public_exponent);
> +	free((void *)prop->rr);
> +
> +	free(prop);
> +}
> +
> +/**
> + * rsa_gen_key_prop() - Generate key properties of RSA public key
> + * @key:	Specifies key data in DER format
> + * @keylen:	Length of @key
> + * @prop:	Generated key property
> + *
> + * This function takes a blob of encoded RSA public key data in DER
> + * format, parse it and generate all the relevant properties
> + * in key_prop structure.
> + * Return a pointer to struct key_prop in @prop on success.
> + *
> + * Return:	0 on success, negative on error
> + */
> +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop)
> +{
> +	struct rsa_key rsa_key;
> +	uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL;
> +	const int max_rsa_size = 4096;
> +	int rlen, i, ret;
> +
> +	*prop = calloc(sizeof(**prop), 1);
> +	n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
> +	rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
> +	rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
> +	if (!(*prop) || !n || !rr || !rrtmp) {
> +		ret = -ENOMEM;
> +		goto err;
> +	}
> +
> +	ret = rsa_parse_pub_key(&rsa_key, key, keylen);
> +	if (ret)
> +		goto err;
> +
> +	/* modulus */
> +	/* removing leading 0's */
> +	for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++)
> +		;
> +	(*prop)->num_bits = (rsa_key.n_sz - i) * 8;
> +	(*prop)->modulus = malloc(rsa_key.n_sz - i);
> +	if (!(*prop)->modulus) {
> +		ret = -ENOMEM;
> +		goto err;
> +	}
> +	memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i);
> +
> +	/* exponent */
> +	(*prop)->public_exponent = calloc(1, sizeof(uint64_t));
> +	if (!(*prop)->public_exponent) {
> +		ret = -ENOMEM;
> +		goto err;
> +	}
> +	memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t)
> +						- rsa_key.e_sz,
> +	       rsa_key.e, rsa_key.e_sz);
> +	(*prop)->exp_len = rsa_key.e_sz;
> +
> +	/* n0 inverse */
> +	br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i);
> +	(*prop)->n0inv = br_i32_ninv32(n[1]);
> +
> +	/* R^2 mod n; R = 2^(num_bits) */
> +	rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */
> +	rr[0] = 0;
> +	*(uint8_t *)&rr[0] = (1 << (rlen % 8));
> +	for (i = 1; i < (((rlen + 31) >> 5) + 1); i++)
> +		rr[i] = 0;
> +	br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1);
> +	br_i32_reduce(rr, rrtmp, n);
> +
> +	rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */
> +	(*prop)->rr = malloc(rlen);
> +	if (!(*prop)->rr) {
> +		ret = -ENOMEM;
> +		goto err;
> +	}
> +	br_i32_encode((void *)(*prop)->rr, rlen, rr);
> +
> +	return 0;
> +
> +err:
> +	free(n);
> +	free(rr);
> +	free(rrtmp);
> +	rsa_free_key_prop(*prop);
> +	return ret;
> +}
>
Heinrich Schuchardt Jan. 8, 2020, 6:16 p.m. UTC | #2
On 1/8/20 7:07 PM, Heinrich Schuchardt wrote:
>
>
> On 11/21/19 1:11 AM, AKASHI Takahiro wrote:
>> In the current implementation of FIT_SIGNATURE, five parameters for
>> a RSA public key are required while only two of them are essential.
>> (See rsa-mod-exp.h and uImage.FIT/signature.txt)
>> This is a result of considering relatively limited computer power
>> and resources on embedded systems, while such a assumption may not
>> be quite practical for other use cases.
>>
>> In this patch, added is a function, rsa_gen_key_prop(), which will
>> generate additional parameters for other uses, in particular
>> UEFI secure boot, on the fly.
>>
>> Note: the current code uses some "big number" rouKtines from BearSSL
>> for the calculation.
>>
>> Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
>> ---
>>   include/u-boot/rsa-mod-exp.h |  23 ++
>>   lib/rsa/Kconfig              |   1 +
>>   lib/rsa/Makefile             |   1 +
>>   lib/rsa/rsa-keyprop.c        | 725 +++++++++++++++++++++++++++++++++++
>>   4 files changed, 750 insertions(+)
>>   create mode 100644 lib/rsa/rsa-keyprop.c
>>
>> diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h
>> index 8a428c4b6a1a..1da8af1bb83d 100644
>> --- a/include/u-boot/rsa-mod-exp.h
>> +++ b/include/u-boot/rsa-mod-exp.h
>> @@ -26,6 +26,29 @@ struct key_prop {
>>       uint32_t exp_len;    /* Exponent length in number of uint8_t */
>>   };
>>
>> +/**
>> + * rsa_gen_key_prop() - Generate key properties of RSA public key
>> + * @key:    Specifies key data in DER format
>> + * @keylen:    Length of @key
>> + * @prop:    Generated key property
>> + *
>> + * This function takes a blob of encoded RSA public key data in DER
>> + * format, parse it and generate all the relevant properties
>> + * in key_prop structure.
>> + * Return a pointer to struct key_prop in @prop on success.
>> + *
>> + * Return:    0 on success, negative on error
>> + */
>> +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct
>> key_prop **proc);
>> +
>> +/**
>> + * rsa_free_key_prop() - Free key properties
>> + * @prop:    Pointer to struct key_prop
>> + *
>> + * This function frees all the memories allocated by rsa_gen_key_prop().
>> + */
>> +void rsa_free_key_prop(struct key_prop *prop);
>> +
>>   /**
>>    * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw
>>    *
>> diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig
>> index 71e4c06bf883..d1d6f6cf64a3 100644
>> --- a/lib/rsa/Kconfig
>> +++ b/lib/rsa/Kconfig
>> @@ -33,6 +33,7 @@ config RSA_VERIFY
>>   config RSA_VERIFY_WITH_PKEY
>>       bool "Execute RSA verification without key parameters from FDT"
>>       depends on RSA
>> +    imply RSA_PUBLIC_KEY_PARSER
>
> Do we really need RSA_PUBLIC_KEY_PARSER whenever we use CONFIG_RSA=y?
> E.g. on a system without the UEFI sub-system?
>
> Otherwise simply let RSA_PUBLIC_KEY_PARSER depend on RSA.
>
>>       help
>>         The standard RSA-signature verification code (FIT_SIGNATURE) uses
>>         pre-calculated key properties, that are stored in fdt blob, in
>> diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile
>> index c07305188e0c..14ed3cb4012b 100644
>> --- a/lib/rsa/Makefile
>> +++ b/lib/rsa/Makefile
>> @@ -6,4 +6,5 @@
>>   # Wolfgang Denk, DENX Software Engineering, wd@denx.de.
>>
>>   obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o
>> +obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o
>>   obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o
>> diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c
>> new file mode 100644
>> index 000000000000..9464df009343
>> --- /dev/null
>> +++ b/lib/rsa/rsa-keyprop.c
>> @@ -0,0 +1,725 @@
>> +// SPDX-License-Identifier: GPL-2.0+ and MIT
>> +/*
>> + * RSA library - generate parameters for a public key
>> + *
>> + * Copyright (c) 2019 Linaro Limited
>> + * Author: AKASHI Takahiro
>> + *
>> + * Big number routines in this file come from BearSSL:
>> + * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
>> + */
>> +
>> +#include <common.h>
>> +#include <image.h>
>> +#include <malloc.h>
>> +#include <asm/byteorder.h>
>> +#include <crypto/internal/rsa.h>
>> +#include <u-boot/rsa-mod-exp.h>
>> +
>> +/**
>> + * br_dec16be() - Convert 16-bit big-endian integer to native
>> + * @src:    Pointer to data
>> + * Return:    Native-endian integer
>> + */
>> +static unsigned br_dec16be(const void *src)
>> +{
>> +    return be16_to_cpup(src);
>> +}
>> +
>> +/**
>> + * br_dec32be() - Convert 32-bit big-endian integer to native
>> + * @src:    Pointer to data
>> + * Return:    Native-endian integer
>> + */
>> +static uint32_t br_dec32be(const void *src)
>> +{
>> +    return be32_to_cpup(src);
>> +}
>> +
>> +/**
>> + * br_enc32be() - Convert native 32-bit integer to big-endian
>> + * @dst:    Pointer to buffer to store big-endian integer in
>> + * @x:        Native 32-bit integer
>> + */
>> +static void br_enc32be(void *dst, uint32_t x)
>> +{
>> +    __be32 tmp;
>> +
>> +    tmp = cpu_to_be32(x);
>> +    memcpy(dst, &tmp, sizeof(tmp));
>> +}
>> +
>> +/* from BearSSL's src/inner.h */
>> +
>> +/*
>> + * Negate a boolean.
>> + */
>> +static uint32_t NOT(uint32_t ctl)
>> +{
>> +    return ctl ^ 1;
>> +}
>> +
>> +/*
>> + * Multiplexer: returns x if ctl == 1, y if ctl == 0.
>> + */
>> +static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y)
>> +{
>> +    return y ^ (-ctl & (x ^ y));
>> +}
>> +
>> +/*
>> + * Equality check: returns 1 if x == y, 0 otherwise.
>> + */
>> +static uint32_t EQ(uint32_t x, uint32_t y)
>> +{
>> +    uint32_t q;
>> +
>> +    q = x ^ y;
>> +    return NOT((q | -q) >> 31);
>> +}
>> +
>> +/*
>> + * Inequality check: returns 1 if x != y, 0 otherwise.
>> + */
>> +static uint32_t NEQ(uint32_t x, uint32_t y)
>> +{
>> +    uint32_t q;
>> +
>> +    q = x ^ y;
>> +    return (q | -q) >> 31;
>
> We want to minimize the code size of U-Boot.
>
> So, please, review this code and remove all of this bogus.

Are timing attacks relevant in any of our UEFI use cases?

I thought we only are verifying signatures against public keys that are
anyway in reach of an adversery.

Best regards

Heinrich

>
>> +}
>> +
>> +/*
>> + * Comparison: returns 1 if x > y, 0 otherwise.
>> + */
>> +static uint32_t GT(uint32_t x, uint32_t y)
>> +{
>> +    /*
>> +     * If both x < 2^31 and y < 2^31, then y-x will have its high
>> +     * bit set if x > y, cleared otherwise.
>> +     *
>> +     * If either x >= 2^31 or y >= 2^31 (but not both), then the
>> +     * result is the high bit of x.
>> +     *
>> +     * If both x >= 2^31 and y >= 2^31, then we can virtually
>> +     * subtract 2^31 from both, and we are back to the first case.
>> +     * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
>> +     * fine.
>> +     */
>> +    uint32_t z;
>> +
>> +    z = y - x;
>> +    return (z ^ ((x ^ y) & (x ^ z))) >> 31;
>> +}
>> +
>> +/*
>> + * Compute the bit length of a 32-bit integer. Returned value is
>> between 0
>> + * and 32 (inclusive).
>> + */
>> +static uint32_t BIT_LENGTH(uint32_t x)
>> +{
>> +    uint32_t k, c;
>> +
>> +    k = NEQ(x, 0);
>> +    c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
>> +    c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
>> +    c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
>> +    c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
>> +    k += GT(x, 0x0001);
>> +    return k;
>> +}
>> +
>> +#define GE(x, y)   NOT(GT(y, x))
>> +#define LT(x, y)   GT(y, x)
>> +#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
>> +
>> +/*
>> + * Integers 'i32'
>> + * --------------
>> + *
>> + * The 'i32' functions implement computations on big integers using
>> + * an internal representation as an array of 32-bit integers. For
>> + * an array x[]:
>> + *  -- x[0] contains the "announced bit length" of the integer
>> + *  -- x[1], x[2]... contain the value in little-endian order (x[1]
>> + *     contains the least significant 32 bits)
>> + *
>> + * Multiplications rely on the elementary 32x32->64 multiplication.
>> + *
>> + * The announced bit length specifies the number of bits that are
>> + * significant in the subsequent 32-bit words. Unused bits in the
>> + * last (most significant) word are set to 0; subsequent words are
>> + * uninitialized and need not exist at all.
>> + *
>> + * The execution time and memory access patterns of all computations
>> + * depend on the announced bit length, but not on the actual word
>> + * values. For modular integers, the announced bit length of any integer
>> + * modulo n is equal to the actual bit length of n; thus, computations
>> + * on modular integers are "constant-time" (only the modulus length may
>> + * leak).
>> + */
>> +
>> +/*
>> + * Extract one word from an integer. The offset is counted in bits.
>> + * The word MUST entirely fit within the word elements corresponding
>> + * to the announced bit length of a[].
>> + */
>> +static uint32_t br_i32_word(const uint32_t *a, uint32_t off)
>> +{
>> +    size_t u;
>> +    unsigned j;
>> +
>> +    u = (size_t)(off >> 5) + 1;
>> +    j = (unsigned)off & 31;
>> +    if (j == 0) {
>> +        return a[u];
>> +    } else {
>> +        return (a[u] >> j) | (a[u + 1] << (32 - j));
>> +    }
>> +}
>> +
>> +/* from BearSSL's src/int/i32_bitlen.c */
>> +
>> +/*
>> + * Compute the actual bit length of an integer. The argument x should
>> + * point to the first (least significant) value word of the integer.
>> + * The len 'xlen' contains the number of 32-bit words to access.
>> + *
>> + * CT: value or length of x does not leak.
>> + */
>> +static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen)
>> +{
>> +    uint32_t tw, twk;
>> +
>> +    tw = 0;
>> +    twk = 0;
>> +    while (xlen -- > 0) {
>> +        uint32_t w, c;
>> +
>> +        c = EQ(tw, 0);
>> +        w = x[xlen];
>> +        tw = MUX(c, w, tw);
>> +        twk = MUX(c, (uint32_t)xlen, twk);
>> +    }
>> +    return (twk << 5) + BIT_LENGTH(tw);
>> +}
>> +
>> +/* from BearSSL's src/int/i32_decode.c */
>> +
>> +/*
>> + * Decode an integer from its big-endian unsigned representation. The
>> + * "true" bit length of the integer is computed, but all words of x[]
>> + * corresponding to the full 'len' bytes of the source are set.
>> + *
>> + * CT: value or length of x does not leak.
>> + */
>> +static void br_i32_decode(uint32_t *x, const void *src, size_t len)
>> +{
>> +    const unsigned char *buf;
>> +    size_t u, v;
>> +
>> +    buf = src;
>> +    u = len;
>> +    v = 1;
>> +    for (;;) {
>> +        if (u < 4) {
>> +            uint32_t w;
>> +
>> +            if (u < 2) {
>> +                if (u == 0) {
>> +                    break;
>> +                } else {
>> +                    w = buf[0];
>> +                }
>> +            } else {
>> +                if (u == 2) {
>> +                    w = br_dec16be(buf);
>> +                } else {
>> +                    w = ((uint32_t)buf[0] << 16)
>> +                        | br_dec16be(buf + 1);
>> +                }
>> +            }
>> +            x[v ++] = w;
>> +            break;
>> +        } else {
>> +            u -= 4;
>> +            x[v ++] = br_dec32be(buf + u);
>> +        }
>> +    }
>> +    x[0] = br_i32_bit_length(x + 1, v - 1);
>> +}
>> +
>> +/* from BearSSL's src/int/i32_encode.c */
>> +
>> +/*
>> + * Encode an integer into its big-endian unsigned representation. The
>> + * output length in bytes is provided (parameter 'len'); if the length
>> + * is too short then the integer is appropriately truncated; if it is
>> + * too long then the extra bytes are set to 0.
>> + */
>> +static void br_i32_encode(void *dst, size_t len, const uint32_t *x)
>> +{
>> +    unsigned char *buf;
>> +    size_t k;
>> +
>> +    buf = dst;
>> +
>> +    /*
>> +     * Compute the announced size of x in bytes; extra bytes are
>> +     * filled with zeros.
>> +     */
>> +    k = (x[0] + 7) >> 3;
>> +    while (len > k) {
>> +        *buf ++ = 0;
>> +        len --;
>> +    }
>> +
>> +    /*
>> +     * Now we use k as index within x[]. That index starts at 1;
>> +     * we initialize it to the topmost complete word, and process
>> +     * any remaining incomplete word.
>> +     */
>> +    k = (len + 3) >> 2;
>> +    switch (len & 3) {
>> +    case 3:
>> +        *buf ++ = x[k] >> 16;
>> +        /* fall through */
>> +    case 2:
>> +        *buf ++ = x[k] >> 8;
>> +        /* fall through */
>> +    case 1:
>> +        *buf ++ = x[k];
>> +        k --;
>> +    }
>> +
>> +    /*
>> +     * Encode all complete words.
>> +     */
>> +    while (k > 0) {
>> +        br_enc32be(buf, x[k]);
>> +        k --;
>> +        buf += 4;
>> +    }
>> +}
>> +
>> +/* from BearSSL's src/int/i32_ninv32.c */
>> +
>> +/*
>> + * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
>> + */
>> +static uint32_t br_i32_ninv32(uint32_t x)
>> +{
>> +    uint32_t y;
>> +
>> +    y = 2 - x;
>> +    y *= 2 - y * x;
>> +    y *= 2 - y * x;
>> +    y *= 2 - y * x;
>> +    y *= 2 - y * x;
>> +    return MUX(x & 1, -y, 0);
>> +}
>> +
>> +/* from BearSSL's src/int/i32_add.c */
>> +
>> +/*
>> + * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
>> + * is unmodified, but the carry is still computed and returned. The
>> + * arrays a[] and b[] MUST have the same announced bit length.
>> + *
>> + * a[] and b[] MAY be the same array, but partial overlap is not
>> allowed.
>> + */
>> +static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
>> +{
>> +    uint32_t cc;
>> +    size_t u, m;
>> +
>> +    cc = 0;
>> +    m = (a[0] + 63) >> 5;
>> +    for (u = 1; u < m; u ++) {
>> +        uint32_t aw, bw, naw;
>> +
>> +        aw = a[u];
>> +        bw = b[u];
>> +        naw = aw + bw + cc;
>> +
>> +        /*
>> +         * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
>> +         * AND the carry was already 1.
>> +         */
>> +        cc = (cc & EQ(naw, aw)) | LT(naw, aw);
>> +        a[u] = MUX(ctl, naw, aw);
>> +    }
>> +    return cc;
>> +}
>> +
>> +/* from BearSSL's src/int/i32_sub.c */
>> +
>> +/*
>> + * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
>> + * then a[] is unmodified, but the carry is still computed and returned.
>> + * The arrays a[] and b[] MUST have the same announced bit length.
>> + *
>> + * a[] and b[] MAY be the same array, but partial overlap is not
>> allowed.
>> + */
>> +static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
>> +{
>> +    uint32_t cc;
>> +    size_t u, m;
>> +
>> +    cc = 0;
>> +    m = (a[0] + 63) >> 5;
>> +    for (u = 1; u < m; u ++) {
>> +        uint32_t aw, bw, naw;
>> +
>> +        aw = a[u];
>> +        bw = b[u];
>> +        naw = aw - bw - cc;
>> +
>> +        /*
>> +         * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
>> +         * AND the carry was already 1.
>> +         */
>> +        cc = (cc & EQ(naw, aw)) | GT(naw, aw);
>> +        a[u] = MUX(ctl, naw, aw);
>> +    }
>> +    return cc;
>> +}
>> +
>> +/* from BearSSL's src/int/i32_div32.c */
>> +
>> +/*
>> + * Constant-time division. The dividend hi:lo is divided by the
>> + * divisor d; the quotient is returned and the remainder is written
>> + * in *r. If hi == d, then the quotient does not fit on 32 bits;
>> + * returned value is thus truncated. If hi > d, returned values are
>> + * indeterminate.
>> + */
>> +static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d,
>> uint32_t *r)
>> +{
>> +    /* TODO: optimize this */
>> +    uint32_t q;
>> +    uint32_t ch, cf;
>> +    int k;
>> +
>> +    q = 0;
>> +    ch = EQ(hi, d);
>> +    hi = MUX(ch, 0, hi);
>> +    for (k = 31; k > 0; k --) {
>> +        int j;
>> +        uint32_t w, ctl, hi2, lo2;
>> +
>> +        j = 32 - k;
>> +        w = (hi << j) | (lo >> k);
>> +        ctl = GE(w, d) | (hi >> k);
>> +        hi2 = (w - d) >> j;
>> +        lo2 = lo - (d << k);
>> +        hi = MUX(ctl, hi2, hi);
>> +        lo = MUX(ctl, lo2, lo);
>> +        q |= ctl << k;
>> +    }
>> +    cf = GE(lo, d) | hi;
>> +    q |= cf;
>> +    *r = MUX(cf, lo - d, lo);
>> +    return q;
>> +}
>> +
>> +/*
>> + * Wrapper for br_divrem(); the remainder is returned, and the quotient
>> + * is discarded.
>> + */
>> +static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d)
>> +{
>> +    uint32_t r;
>> +
>> +    br_divrem(hi, lo, d, &r);
>> +    return r;
>> +}
>> +
>> +/*
>> + * Wrapper for br_divrem(); the quotient is returned, and the remainder
>> + * is discarded.
>> + */
>> +static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d)
>> +{
>> +    uint32_t r;
>> +
>> +    return br_divrem(hi, lo, d, &r);
>> +}
>> +
>> +/* from BearSSL's src/int/i32_muladd.c */
>> +
>> +/*
>> + * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
>> + * function assumes that x[] and m[] have the same announced bit
>> + * length, and the announced bit length of m[] matches its true
>> + * bit length.
>> + *
>> + * x[] and m[] MUST be distinct arrays.
>> + *
>> + * CT: only the common announced bit length of x and m leaks, not
>> + * the values of x, z or m.
>> + */
>> +static void br_i32_muladd_small(uint32_t *x, uint32_t z, const
>> uint32_t *m)
>> +{
>> +    uint32_t m_bitlen;
>> +    size_t u, mlen;
>> +    uint32_t a0, a1, b0, hi, g, q, tb;
>> +    uint32_t chf, clow, under, over;
>> +    uint64_t cc;
>> +
>> +    /*
>> +     * We can test on the modulus bit length since we accept to
>> +     * leak that length.
>> +     */
>> +    m_bitlen = m[0];
>> +    if (m_bitlen == 0) {
>> +        return;
>> +    }
>> +    if (m_bitlen <= 32) {
>> +        x[1] = br_rem(x[1], z, m[1]);
>> +        return;
>> +    }
>> +    mlen = (m_bitlen + 31) >> 5;
>> +
>> +    /*
>> +     * Principle: we estimate the quotient (x*2^32+z)/m by
>> +     * doing a 64/32 division with the high words.
>> +     *
>> +     * Let:
>> +     *   w = 2^32
>> +     *   a = (w*a0 + a1) * w^N + a2
>> +     *   b = b0 * w^N + b2
>> +     * such that:
>> +     *   0 <= a0 < w
>> +     *   0 <= a1 < w
>> +     *   0 <= a2 < w^N
>> +     *   w/2 <= b0 < w
>> +     *   0 <= b2 < w^N
>> +     *   a < w*b
>> +     * I.e. the two top words of a are a0:a1, the top word of b is
>> +     * b0, we ensured that b0 is "full" (high bit set), and a is
>> +     * such that the quotient q = a/b fits on one word (0 <= q < w).
>> +     *
>> +     * If a = b*q + r (with 0 <= r < q), we can estimate q by
>> +     * doing an Euclidean division on the top words:
>> +     *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
>> +     * Then the following holds:
>> +     *   0 <= u <= w
>> +     *   u-2 <= q <= u
>> +     */
>> +    a0 = br_i32_word(x, m_bitlen - 32);
>> +    hi = x[mlen];
>> +    memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
>> +    x[1] = z;
>> +    a1 = br_i32_word(x, m_bitlen - 32);
>> +    b0 = br_i32_word(m, m_bitlen - 32);
>> +
>> +    /*
>> +     * We estimate a divisor q. If the quotient returned by br_div()
>> +     * is g:
>> +     * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
>> +     * -- Otherwise:
>> +     *    -- if g == 0 then we set q = 0;
>> +     *    -- otherwise, we set q = g - 1.
>> +     * The properties described above then ensure that the true
>> +     * quotient is q-1, q or q+1.
>> +     */
>> +    g = br_div(a0, a1, b0);
>> +    q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
>> +
>> +    /*
>> +     * We subtract q*m from x (with the extra high word of value 'hi').
>> +     * Since q may be off by 1 (in either direction), we may have to
>> +     * add or subtract m afterwards.
>> +     *
>> +     * The 'tb' flag will be true (1) at the end of the loop if the
>> +     * result is greater than or equal to the modulus (not counting
>> +     * 'hi' or the carry).
>> +     */
>> +    cc = 0;
>> +    tb = 1;
>> +    for (u = 1; u <= mlen; u ++) {
>> +        uint32_t mw, zw, xw, nxw;
>> +        uint64_t zl;
>> +
>> +        mw = m[u];
>> +        zl = MUL(mw, q) + cc;
>> +        cc = (uint32_t)(zl >> 32);
>> +        zw = (uint32_t)zl;
>> +        xw = x[u];
>> +        nxw = xw - zw;
>> +        cc += (uint64_t)GT(nxw, xw);
>> +        x[u] = nxw;
>> +        tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
>> +    }
>> +
>> +    /*
>> +     * If we underestimated q, then either cc < hi (one extra bit
>> +     * beyond the top array word), or cc == hi and tb is true (no
>> +     * extra bit, but the result is not lower than the modulus). In
>> +     * these cases we must subtract m once.
>> +     *
>> +     * Otherwise, we may have overestimated, which will show as
>> +     * cc > hi (thus a negative result). Correction is adding m once.
>> +     */
>> +    chf = (uint32_t)(cc >> 32);
>> +    clow = (uint32_t)cc;
>> +    over = chf | GT(clow, hi);
>> +    under = ~over & (tb | (~chf & LT(clow, hi)));
>> +    br_i32_add(x, m, over);
>> +    br_i32_sub(x, m, under);
>> +}
>> +
>> +/* from BearSSL's src/int/i32_reduce.c */
>> +
>> +/*
>> + * Reduce an integer (a[]) modulo another (m[]). The result is written
>> + * in x[] and its announced bit length is set to be equal to that of
>> m[].
>> + *
>> + * x[] MUST be distinct from a[] and m[].
>> + *
>> + * CT: only announced bit lengths leak, not values of x, a or m.
>> + */
>> +static void br_i32_reduce(uint32_t *x, const uint32_t *a, const
>> uint32_t *m)
>> +{
>> +    uint32_t m_bitlen, a_bitlen;
>> +    size_t mlen, alen, u;
>> +
>> +    m_bitlen = m[0];
>> +    mlen = (m_bitlen + 31) >> 5;
>> +
>> +    x[0] = m_bitlen;
>> +    if (m_bitlen == 0) {
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * If the source is shorter, then simply copy all words from a[]
>> +     * and zero out the upper words.
>> +     */
>> +    a_bitlen = a[0];
>> +    alen = (a_bitlen + 31) >> 5;
>> +    if (a_bitlen < m_bitlen) {
>> +        memcpy(x + 1, a + 1, alen * sizeof *a);
>> +        for (u = alen; u < mlen; u ++) {
>> +            x[u + 1] = 0;
>> +        }
>> +        return;
>> +    }
>> +
>> +    /*
>> +     * The source length is at least equal to that of the modulus.
>> +     * We must thus copy N-1 words, and input the remaining words
>> +     * one by one.
>> +     */
>> +    memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
>> +    x[mlen] = 0;
>> +    for (u = 1 + alen - mlen; u > 0; u --) {
>> +        br_i32_muladd_small(x, a[u], m);
>> +    }
>> +}
>> +
>> +/**
>> + * rsa_free_key_prop() - Free key properties
>> + * @prop:    Pointer to struct key_prop
>> + *
>> + * This function frees all the memories allocated by rsa_gen_key_prop().
>> + */
>> +void rsa_free_key_prop(struct key_prop *prop)
>> +{
>> +    if (!prop)
>> +        return;
>> +
>> +    free((void *)prop->modulus);
>> +    free((void *)prop->public_exponent);
>> +    free((void *)prop->rr);
>> +
>> +    free(prop);
>> +}
>> +
>> +/**
>> + * rsa_gen_key_prop() - Generate key properties of RSA public key
>> + * @key:    Specifies key data in DER format
>> + * @keylen:    Length of @key
>> + * @prop:    Generated key property
>> + *
>> + * This function takes a blob of encoded RSA public key data in DER
>> + * format, parse it and generate all the relevant properties
>> + * in key_prop structure.
>> + * Return a pointer to struct key_prop in @prop on success.
>> + *
>> + * Return:    0 on success, negative on error
>> + */
>> +int rsa_gen_key_prop(const void *key, uint32_t keylen, struct
>> key_prop **prop)
>> +{
>> +    struct rsa_key rsa_key;
>> +    uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL;
>> +    const int max_rsa_size = 4096;
>> +    int rlen, i, ret;
>> +
>> +    *prop = calloc(sizeof(**prop), 1);
>> +    n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
>> +    rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
>> +    rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
>> +    if (!(*prop) || !n || !rr || !rrtmp) {
>> +        ret = -ENOMEM;
>> +        goto err;
>> +    }
>> +
>> +    ret = rsa_parse_pub_key(&rsa_key, key, keylen);
>> +    if (ret)
>> +        goto err;
>> +
>> +    /* modulus */
>> +    /* removing leading 0's */
>> +    for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++)
>> +        ;
>> +    (*prop)->num_bits = (rsa_key.n_sz - i) * 8;
>> +    (*prop)->modulus = malloc(rsa_key.n_sz - i);
>> +    if (!(*prop)->modulus) {
>> +        ret = -ENOMEM;
>> +        goto err;
>> +    }
>> +    memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i);
>> +
>> +    /* exponent */
>> +    (*prop)->public_exponent = calloc(1, sizeof(uint64_t));
>> +    if (!(*prop)->public_exponent) {
>> +        ret = -ENOMEM;
>> +        goto err;
>> +    }
>> +    memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t)
>> +                        - rsa_key.e_sz,
>> +           rsa_key.e, rsa_key.e_sz);
>> +    (*prop)->exp_len = rsa_key.e_sz;
>> +
>> +    /* n0 inverse */
>> +    br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i);
>> +    (*prop)->n0inv = br_i32_ninv32(n[1]);
>> +
>> +    /* R^2 mod n; R = 2^(num_bits) */
>> +    rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */
>> +    rr[0] = 0;
>> +    *(uint8_t *)&rr[0] = (1 << (rlen % 8));
>> +    for (i = 1; i < (((rlen + 31) >> 5) + 1); i++)
>> +        rr[i] = 0;
>> +    br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1);
>> +    br_i32_reduce(rr, rrtmp, n);
>> +
>> +    rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */
>> +    (*prop)->rr = malloc(rlen);
>> +    if (!(*prop)->rr) {
>> +        ret = -ENOMEM;
>> +        goto err;
>> +    }
>> +    br_i32_encode((void *)(*prop)->rr, rlen, rr);
>> +
>> +    return 0;
>> +
>> +err:
>> +    free(n);
>> +    free(rr);
>> +    free(rrtmp);
>> +    rsa_free_key_prop(*prop);
>> +    return ret;
>> +}
>>
AKASHI Takahiro Jan. 14, 2020, 7:15 a.m. UTC | #3
On Wed, Jan 08, 2020 at 07:07:01PM +0100, Heinrich Schuchardt wrote:
> 
> 
> On 11/21/19 1:11 AM, AKASHI Takahiro wrote:
> >In the current implementation of FIT_SIGNATURE, five parameters for
> >a RSA public key are required while only two of them are essential.
> >(See rsa-mod-exp.h and uImage.FIT/signature.txt)
> >This is a result of considering relatively limited computer power
> >and resources on embedded systems, while such a assumption may not
> >be quite practical for other use cases.
> >
> >In this patch, added is a function, rsa_gen_key_prop(), which will
> >generate additional parameters for other uses, in particular
> >UEFI secure boot, on the fly.
> >
> >Note: the current code uses some "big number" rouKtines from BearSSL
> >for the calculation.
> >
> >Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
> >---
> >  include/u-boot/rsa-mod-exp.h |  23 ++
> >  lib/rsa/Kconfig              |   1 +
> >  lib/rsa/Makefile             |   1 +
> >  lib/rsa/rsa-keyprop.c        | 725 +++++++++++++++++++++++++++++++++++
> >  4 files changed, 750 insertions(+)
> >  create mode 100644 lib/rsa/rsa-keyprop.c
> >
> >diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h
> >index 8a428c4b6a1a..1da8af1bb83d 100644
> >--- a/include/u-boot/rsa-mod-exp.h
> >+++ b/include/u-boot/rsa-mod-exp.h
> >@@ -26,6 +26,29 @@ struct key_prop {
> >  	uint32_t exp_len;	/* Exponent length in number of uint8_t */
> >  };
> >
> >+/**
> >+ * rsa_gen_key_prop() - Generate key properties of RSA public key
> >+ * @key:	Specifies key data in DER format
> >+ * @keylen:	Length of @key
> >+ * @prop:	Generated key property
> >+ *
> >+ * This function takes a blob of encoded RSA public key data in DER
> >+ * format, parse it and generate all the relevant properties
> >+ * in key_prop structure.
> >+ * Return a pointer to struct key_prop in @prop on success.
> >+ *
> >+ * Return:	0 on success, negative on error
> >+ */
> >+int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **proc);
> >+
> >+/**
> >+ * rsa_free_key_prop() - Free key properties
> >+ * @prop:	Pointer to struct key_prop
> >+ *
> >+ * This function frees all the memories allocated by rsa_gen_key_prop().
> >+ */
> >+void rsa_free_key_prop(struct key_prop *prop);
> >+
> >  /**
> >   * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw
> >   *
> >diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig
> >index 71e4c06bf883..d1d6f6cf64a3 100644
> >--- a/lib/rsa/Kconfig
> >+++ b/lib/rsa/Kconfig
> >@@ -33,6 +33,7 @@ config RSA_VERIFY
> >  config RSA_VERIFY_WITH_PKEY
> >  	bool "Execute RSA verification without key parameters from FDT"
> >  	depends on RSA
> >+	imply RSA_PUBLIC_KEY_PARSER
> 
> Do we really need RSA_PUBLIC_KEY_PARSER whenever we use CONFIG_RSA=y?

???
RSA_PUBLIC_KEY_PARSER will be selected only if RSA_VERIFY_WITH_PKEY
is enabled. I avoided to use 'select' here because RSA_PUBLIC_KEY_PARSER
is also 'selectable.'

> E.g. on a system without the UEFI sub-system?
> 
> Otherwise simply let RSA_PUBLIC_KEY_PARSER depend on RSA.

Such a dependency sounds odd to me.

> 
> >  	help
> >  	  The standard RSA-signature verification code (FIT_SIGNATURE) uses
> >  	  pre-calculated key properties, that are stored in fdt blob, in
> >diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile
> >index c07305188e0c..14ed3cb4012b 100644
> >--- a/lib/rsa/Makefile
> >+++ b/lib/rsa/Makefile
> >@@ -6,4 +6,5 @@
> >  # Wolfgang Denk, DENX Software Engineering, wd@denx.de.
> >
> >  obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o
> >+obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o
> >  obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o
> >diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c
> >new file mode 100644
> >index 000000000000..9464df009343
> >--- /dev/null
> >+++ b/lib/rsa/rsa-keyprop.c
> >@@ -0,0 +1,725 @@
> >+// SPDX-License-Identifier: GPL-2.0+ and MIT
> >+/*
> >+ * RSA library - generate parameters for a public key
> >+ *
> >+ * Copyright (c) 2019 Linaro Limited
> >+ * Author: AKASHI Takahiro
> >+ *
> >+ * Big number routines in this file come from BearSSL:
> >+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
> >+ */
> >+
> >+#include <common.h>
> >+#include <image.h>
> >+#include <malloc.h>
> >+#include <asm/byteorder.h>
> >+#include <crypto/internal/rsa.h>
> >+#include <u-boot/rsa-mod-exp.h>
> >+
> >+/**
> >+ * br_dec16be() - Convert 16-bit big-endian integer to native
> >+ * @src:	Pointer to data
> >+ * Return:	Native-endian integer
> >+ */
> >+static unsigned br_dec16be(const void *src)
> >+{
> >+	return be16_to_cpup(src);
> >+}
> >+
> >+/**
> >+ * br_dec32be() - Convert 32-bit big-endian integer to native
> >+ * @src:	Pointer to data
> >+ * Return:	Native-endian integer
> >+ */
> >+static uint32_t br_dec32be(const void *src)
> >+{
> >+	return be32_to_cpup(src);
> >+}
> >+
> >+/**
> >+ * br_enc32be() - Convert native 32-bit integer to big-endian
> >+ * @dst:	Pointer to buffer to store big-endian integer in
> >+ * @x:		Native 32-bit integer
> >+ */
> >+static void br_enc32be(void *dst, uint32_t x)
> >+{
> >+	__be32 tmp;
> >+
> >+	tmp = cpu_to_be32(x);
> >+	memcpy(dst, &tmp, sizeof(tmp));
> >+}
> >+
> >+/* from BearSSL's src/inner.h */
> >+
> >+/*
> >+ * Negate a boolean.
> >+ */
> >+static uint32_t NOT(uint32_t ctl)
> >+{
> >+	return ctl ^ 1;
> >+}
> >+
> >+/*
> >+ * Multiplexer: returns x if ctl == 1, y if ctl == 0.
> >+ */
> >+static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y)
> >+{
> >+	return y ^ (-ctl & (x ^ y));
> >+}
> >+
> >+/*
> >+ * Equality check: returns 1 if x == y, 0 otherwise.
> >+ */
> >+static uint32_t EQ(uint32_t x, uint32_t y)
> >+{
> >+	uint32_t q;
> >+
> >+	q = x ^ y;
> >+	return NOT((q | -q) >> 31);
> >+}
> >+
> >+/*
> >+ * Inequality check: returns 1 if x != y, 0 otherwise.
> >+ */
> >+static uint32_t NEQ(uint32_t x, uint32_t y)
> >+{
> >+	uint32_t q;
> >+
> >+	q = x ^ y;
> >+	return (q | -q) >> 31;
> 
> We want to minimize the code size of U-Boot.

I don't get your point. Please elaborate your concern.

> So, please, review this code and remove all of this bogus.

Which part of the code do you mention as 'bogus'?

Thanks,
-Takahiro Akashi

> 
> Best regards
> 
> Heinrich
> 
> >+}
> >+
> >+/*
> >+ * Comparison: returns 1 if x > y, 0 otherwise.
> >+ */
> >+static uint32_t GT(uint32_t x, uint32_t y)
> >+{
> >+	/*
> >+	 * If both x < 2^31 and y < 2^31, then y-x will have its high
> >+	 * bit set if x > y, cleared otherwise.
> >+	 *
> >+	 * If either x >= 2^31 or y >= 2^31 (but not both), then the
> >+	 * result is the high bit of x.
> >+	 *
> >+	 * If both x >= 2^31 and y >= 2^31, then we can virtually
> >+	 * subtract 2^31 from both, and we are back to the first case.
> >+	 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
> >+	 * fine.
> >+	 */
> >+	uint32_t z;
> >+
> >+	z = y - x;
> >+	return (z ^ ((x ^ y) & (x ^ z))) >> 31;
> >+}
> >+
> >+/*
> >+ * Compute the bit length of a 32-bit integer. Returned value is between 0
> >+ * and 32 (inclusive).
> >+ */
> >+static uint32_t BIT_LENGTH(uint32_t x)
> >+{
> >+	uint32_t k, c;
> >+
> >+	k = NEQ(x, 0);
> >+	c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
> >+	c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
> >+	c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
> >+	c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
> >+	k += GT(x, 0x0001);
> >+	return k;
> >+}
> >+
> >+#define GE(x, y)   NOT(GT(y, x))
> >+#define LT(x, y)   GT(y, x)
> >+#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
> >+
> >+/*
> >+ * Integers 'i32'
> >+ * --------------
> >+ *
> >+ * The 'i32' functions implement computations on big integers using
> >+ * an internal representation as an array of 32-bit integers. For
> >+ * an array x[]:
> >+ *  -- x[0] contains the "announced bit length" of the integer
> >+ *  -- x[1], x[2]... contain the value in little-endian order (x[1]
> >+ *     contains the least significant 32 bits)
> >+ *
> >+ * Multiplications rely on the elementary 32x32->64 multiplication.
> >+ *
> >+ * The announced bit length specifies the number of bits that are
> >+ * significant in the subsequent 32-bit words. Unused bits in the
> >+ * last (most significant) word are set to 0; subsequent words are
> >+ * uninitialized and need not exist at all.
> >+ *
> >+ * The execution time and memory access patterns of all computations
> >+ * depend on the announced bit length, but not on the actual word
> >+ * values. For modular integers, the announced bit length of any integer
> >+ * modulo n is equal to the actual bit length of n; thus, computations
> >+ * on modular integers are "constant-time" (only the modulus length may
> >+ * leak).
> >+ */
> >+
> >+/*
> >+ * Extract one word from an integer. The offset is counted in bits.
> >+ * The word MUST entirely fit within the word elements corresponding
> >+ * to the announced bit length of a[].
> >+ */
> >+static uint32_t br_i32_word(const uint32_t *a, uint32_t off)
> >+{
> >+	size_t u;
> >+	unsigned j;
> >+
> >+	u = (size_t)(off >> 5) + 1;
> >+	j = (unsigned)off & 31;
> >+	if (j == 0) {
> >+		return a[u];
> >+	} else {
> >+		return (a[u] >> j) | (a[u + 1] << (32 - j));
> >+	}
> >+}
> >+
> >+/* from BearSSL's src/int/i32_bitlen.c */
> >+
> >+/*
> >+ * Compute the actual bit length of an integer. The argument x should
> >+ * point to the first (least significant) value word of the integer.
> >+ * The len 'xlen' contains the number of 32-bit words to access.
> >+ *
> >+ * CT: value or length of x does not leak.
> >+ */
> >+static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen)
> >+{
> >+	uint32_t tw, twk;
> >+
> >+	tw = 0;
> >+	twk = 0;
> >+	while (xlen -- > 0) {
> >+		uint32_t w, c;
> >+
> >+		c = EQ(tw, 0);
> >+		w = x[xlen];
> >+		tw = MUX(c, w, tw);
> >+		twk = MUX(c, (uint32_t)xlen, twk);
> >+	}
> >+	return (twk << 5) + BIT_LENGTH(tw);
> >+}
> >+
> >+/* from BearSSL's src/int/i32_decode.c */
> >+
> >+/*
> >+ * Decode an integer from its big-endian unsigned representation. The
> >+ * "true" bit length of the integer is computed, but all words of x[]
> >+ * corresponding to the full 'len' bytes of the source are set.
> >+ *
> >+ * CT: value or length of x does not leak.
> >+ */
> >+static void br_i32_decode(uint32_t *x, const void *src, size_t len)
> >+{
> >+	const unsigned char *buf;
> >+	size_t u, v;
> >+
> >+	buf = src;
> >+	u = len;
> >+	v = 1;
> >+	for (;;) {
> >+		if (u < 4) {
> >+			uint32_t w;
> >+
> >+			if (u < 2) {
> >+				if (u == 0) {
> >+					break;
> >+				} else {
> >+					w = buf[0];
> >+				}
> >+			} else {
> >+				if (u == 2) {
> >+					w = br_dec16be(buf);
> >+				} else {
> >+					w = ((uint32_t)buf[0] << 16)
> >+						| br_dec16be(buf + 1);
> >+				}
> >+			}
> >+			x[v ++] = w;
> >+			break;
> >+		} else {
> >+			u -= 4;
> >+			x[v ++] = br_dec32be(buf + u);
> >+		}
> >+	}
> >+	x[0] = br_i32_bit_length(x + 1, v - 1);
> >+}
> >+
> >+/* from BearSSL's src/int/i32_encode.c */
> >+
> >+/*
> >+ * Encode an integer into its big-endian unsigned representation. The
> >+ * output length in bytes is provided (parameter 'len'); if the length
> >+ * is too short then the integer is appropriately truncated; if it is
> >+ * too long then the extra bytes are set to 0.
> >+ */
> >+static void br_i32_encode(void *dst, size_t len, const uint32_t *x)
> >+{
> >+	unsigned char *buf;
> >+	size_t k;
> >+
> >+	buf = dst;
> >+
> >+	/*
> >+	 * Compute the announced size of x in bytes; extra bytes are
> >+	 * filled with zeros.
> >+	 */
> >+	k = (x[0] + 7) >> 3;
> >+	while (len > k) {
> >+		*buf ++ = 0;
> >+		len --;
> >+	}
> >+
> >+	/*
> >+	 * Now we use k as index within x[]. That index starts at 1;
> >+	 * we initialize it to the topmost complete word, and process
> >+	 * any remaining incomplete word.
> >+	 */
> >+	k = (len + 3) >> 2;
> >+	switch (len & 3) {
> >+	case 3:
> >+		*buf ++ = x[k] >> 16;
> >+		/* fall through */
> >+	case 2:
> >+		*buf ++ = x[k] >> 8;
> >+		/* fall through */
> >+	case 1:
> >+		*buf ++ = x[k];
> >+		k --;
> >+	}
> >+
> >+	/*
> >+	 * Encode all complete words.
> >+	 */
> >+	while (k > 0) {
> >+		br_enc32be(buf, x[k]);
> >+		k --;
> >+		buf += 4;
> >+	}
> >+}
> >+
> >+/* from BearSSL's src/int/i32_ninv32.c */
> >+
> >+/*
> >+ * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
> >+ */
> >+static uint32_t br_i32_ninv32(uint32_t x)
> >+{
> >+	uint32_t y;
> >+
> >+	y = 2 - x;
> >+	y *= 2 - y * x;
> >+	y *= 2 - y * x;
> >+	y *= 2 - y * x;
> >+	y *= 2 - y * x;
> >+	return MUX(x & 1, -y, 0);
> >+}
> >+
> >+/* from BearSSL's src/int/i32_add.c */
> >+
> >+/*
> >+ * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
> >+ * is unmodified, but the carry is still computed and returned. The
> >+ * arrays a[] and b[] MUST have the same announced bit length.
> >+ *
> >+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
> >+ */
> >+static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
> >+{
> >+	uint32_t cc;
> >+	size_t u, m;
> >+
> >+	cc = 0;
> >+	m = (a[0] + 63) >> 5;
> >+	for (u = 1; u < m; u ++) {
> >+		uint32_t aw, bw, naw;
> >+
> >+		aw = a[u];
> >+		bw = b[u];
> >+		naw = aw + bw + cc;
> >+
> >+		/*
> >+		 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
> >+		 * AND the carry was already 1.
> >+		 */
> >+		cc = (cc & EQ(naw, aw)) | LT(naw, aw);
> >+		a[u] = MUX(ctl, naw, aw);
> >+	}
> >+	return cc;
> >+}
> >+
> >+/* from BearSSL's src/int/i32_sub.c */
> >+
> >+/*
> >+ * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
> >+ * then a[] is unmodified, but the carry is still computed and returned.
> >+ * The arrays a[] and b[] MUST have the same announced bit length.
> >+ *
> >+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
> >+ */
> >+static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
> >+{
> >+	uint32_t cc;
> >+	size_t u, m;
> >+
> >+	cc = 0;
> >+	m = (a[0] + 63) >> 5;
> >+	for (u = 1; u < m; u ++) {
> >+		uint32_t aw, bw, naw;
> >+
> >+		aw = a[u];
> >+		bw = b[u];
> >+		naw = aw - bw - cc;
> >+
> >+		/*
> >+		 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
> >+		 * AND the carry was already 1.
> >+		 */
> >+		cc = (cc & EQ(naw, aw)) | GT(naw, aw);
> >+		a[u] = MUX(ctl, naw, aw);
> >+	}
> >+	return cc;
> >+}
> >+
> >+/* from BearSSL's src/int/i32_div32.c */
> >+
> >+/*
> >+ * Constant-time division. The dividend hi:lo is divided by the
> >+ * divisor d; the quotient is returned and the remainder is written
> >+ * in *r. If hi == d, then the quotient does not fit on 32 bits;
> >+ * returned value is thus truncated. If hi > d, returned values are
> >+ * indeterminate.
> >+ */
> >+static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
> >+{
> >+	/* TODO: optimize this */
> >+	uint32_t q;
> >+	uint32_t ch, cf;
> >+	int k;
> >+
> >+	q = 0;
> >+	ch = EQ(hi, d);
> >+	hi = MUX(ch, 0, hi);
> >+	for (k = 31; k > 0; k --) {
> >+		int j;
> >+		uint32_t w, ctl, hi2, lo2;
> >+
> >+		j = 32 - k;
> >+		w = (hi << j) | (lo >> k);
> >+		ctl = GE(w, d) | (hi >> k);
> >+		hi2 = (w - d) >> j;
> >+		lo2 = lo - (d << k);
> >+		hi = MUX(ctl, hi2, hi);
> >+		lo = MUX(ctl, lo2, lo);
> >+		q |= ctl << k;
> >+	}
> >+	cf = GE(lo, d) | hi;
> >+	q |= cf;
> >+	*r = MUX(cf, lo - d, lo);
> >+	return q;
> >+}
> >+
> >+/*
> >+ * Wrapper for br_divrem(); the remainder is returned, and the quotient
> >+ * is discarded.
> >+ */
> >+static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d)
> >+{
> >+	uint32_t r;
> >+
> >+	br_divrem(hi, lo, d, &r);
> >+	return r;
> >+}
> >+
> >+/*
> >+ * Wrapper for br_divrem(); the quotient is returned, and the remainder
> >+ * is discarded.
> >+ */
> >+static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d)
> >+{
> >+	uint32_t r;
> >+
> >+	return br_divrem(hi, lo, d, &r);
> >+}
> >+
> >+/* from BearSSL's src/int/i32_muladd.c */
> >+
> >+/*
> >+ * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
> >+ * function assumes that x[] and m[] have the same announced bit
> >+ * length, and the announced bit length of m[] matches its true
> >+ * bit length.
> >+ *
> >+ * x[] and m[] MUST be distinct arrays.
> >+ *
> >+ * CT: only the common announced bit length of x and m leaks, not
> >+ * the values of x, z or m.
> >+ */
> >+static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
> >+{
> >+	uint32_t m_bitlen;
> >+	size_t u, mlen;
> >+	uint32_t a0, a1, b0, hi, g, q, tb;
> >+	uint32_t chf, clow, under, over;
> >+	uint64_t cc;
> >+
> >+	/*
> >+	 * We can test on the modulus bit length since we accept to
> >+	 * leak that length.
> >+	 */
> >+	m_bitlen = m[0];
> >+	if (m_bitlen == 0) {
> >+		return;
> >+	}
> >+	if (m_bitlen <= 32) {
> >+		x[1] = br_rem(x[1], z, m[1]);
> >+		return;
> >+	}
> >+	mlen = (m_bitlen + 31) >> 5;
> >+
> >+	/*
> >+	 * Principle: we estimate the quotient (x*2^32+z)/m by
> >+	 * doing a 64/32 division with the high words.
> >+	 *
> >+	 * Let:
> >+	 *   w = 2^32
> >+	 *   a = (w*a0 + a1) * w^N + a2
> >+	 *   b = b0 * w^N + b2
> >+	 * such that:
> >+	 *   0 <= a0 < w
> >+	 *   0 <= a1 < w
> >+	 *   0 <= a2 < w^N
> >+	 *   w/2 <= b0 < w
> >+	 *   0 <= b2 < w^N
> >+	 *   a < w*b
> >+	 * I.e. the two top words of a are a0:a1, the top word of b is
> >+	 * b0, we ensured that b0 is "full" (high bit set), and a is
> >+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
> >+	 *
> >+	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
> >+	 * doing an Euclidean division on the top words:
> >+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
> >+	 * Then the following holds:
> >+	 *   0 <= u <= w
> >+	 *   u-2 <= q <= u
> >+	 */
> >+	a0 = br_i32_word(x, m_bitlen - 32);
> >+	hi = x[mlen];
> >+	memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
> >+	x[1] = z;
> >+	a1 = br_i32_word(x, m_bitlen - 32);
> >+	b0 = br_i32_word(m, m_bitlen - 32);
> >+
> >+	/*
> >+	 * We estimate a divisor q. If the quotient returned by br_div()
> >+	 * is g:
> >+	 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
> >+	 * -- Otherwise:
> >+	 *    -- if g == 0 then we set q = 0;
> >+	 *    -- otherwise, we set q = g - 1.
> >+	 * The properties described above then ensure that the true
> >+	 * quotient is q-1, q or q+1.
> >+	 */
> >+	g = br_div(a0, a1, b0);
> >+	q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
> >+
> >+	/*
> >+	 * We subtract q*m from x (with the extra high word of value 'hi').
> >+	 * Since q may be off by 1 (in either direction), we may have to
> >+	 * add or subtract m afterwards.
> >+	 *
> >+	 * The 'tb' flag will be true (1) at the end of the loop if the
> >+	 * result is greater than or equal to the modulus (not counting
> >+	 * 'hi' or the carry).
> >+	 */
> >+	cc = 0;
> >+	tb = 1;
> >+	for (u = 1; u <= mlen; u ++) {
> >+		uint32_t mw, zw, xw, nxw;
> >+		uint64_t zl;
> >+
> >+		mw = m[u];
> >+		zl = MUL(mw, q) + cc;
> >+		cc = (uint32_t)(zl >> 32);
> >+		zw = (uint32_t)zl;
> >+		xw = x[u];
> >+		nxw = xw - zw;
> >+		cc += (uint64_t)GT(nxw, xw);
> >+		x[u] = nxw;
> >+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
> >+	}
> >+
> >+	/*
> >+	 * If we underestimated q, then either cc < hi (one extra bit
> >+	 * beyond the top array word), or cc == hi and tb is true (no
> >+	 * extra bit, but the result is not lower than the modulus). In
> >+	 * these cases we must subtract m once.
> >+	 *
> >+	 * Otherwise, we may have overestimated, which will show as
> >+	 * cc > hi (thus a negative result). Correction is adding m once.
> >+	 */
> >+	chf = (uint32_t)(cc >> 32);
> >+	clow = (uint32_t)cc;
> >+	over = chf | GT(clow, hi);
> >+	under = ~over & (tb | (~chf & LT(clow, hi)));
> >+	br_i32_add(x, m, over);
> >+	br_i32_sub(x, m, under);
> >+}
> >+
> >+/* from BearSSL's src/int/i32_reduce.c */
> >+
> >+/*
> >+ * Reduce an integer (a[]) modulo another (m[]). The result is written
> >+ * in x[] and its announced bit length is set to be equal to that of m[].
> >+ *
> >+ * x[] MUST be distinct from a[] and m[].
> >+ *
> >+ * CT: only announced bit lengths leak, not values of x, a or m.
> >+ */
> >+static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
> >+{
> >+	uint32_t m_bitlen, a_bitlen;
> >+	size_t mlen, alen, u;
> >+
> >+	m_bitlen = m[0];
> >+	mlen = (m_bitlen + 31) >> 5;
> >+
> >+	x[0] = m_bitlen;
> >+	if (m_bitlen == 0) {
> >+		return;
> >+	}
> >+
> >+	/*
> >+	 * If the source is shorter, then simply copy all words from a[]
> >+	 * and zero out the upper words.
> >+	 */
> >+	a_bitlen = a[0];
> >+	alen = (a_bitlen + 31) >> 5;
> >+	if (a_bitlen < m_bitlen) {
> >+		memcpy(x + 1, a + 1, alen * sizeof *a);
> >+		for (u = alen; u < mlen; u ++) {
> >+			x[u + 1] = 0;
> >+		}
> >+		return;
> >+	}
> >+
> >+	/*
> >+	 * The source length is at least equal to that of the modulus.
> >+	 * We must thus copy N-1 words, and input the remaining words
> >+	 * one by one.
> >+	 */
> >+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
> >+	x[mlen] = 0;
> >+	for (u = 1 + alen - mlen; u > 0; u --) {
> >+		br_i32_muladd_small(x, a[u], m);
> >+	}
> >+}
> >+
> >+/**
> >+ * rsa_free_key_prop() - Free key properties
> >+ * @prop:	Pointer to struct key_prop
> >+ *
> >+ * This function frees all the memories allocated by rsa_gen_key_prop().
> >+ */
> >+void rsa_free_key_prop(struct key_prop *prop)
> >+{
> >+	if (!prop)
> >+		return;
> >+
> >+	free((void *)prop->modulus);
> >+	free((void *)prop->public_exponent);
> >+	free((void *)prop->rr);
> >+
> >+	free(prop);
> >+}
> >+
> >+/**
> >+ * rsa_gen_key_prop() - Generate key properties of RSA public key
> >+ * @key:	Specifies key data in DER format
> >+ * @keylen:	Length of @key
> >+ * @prop:	Generated key property
> >+ *
> >+ * This function takes a blob of encoded RSA public key data in DER
> >+ * format, parse it and generate all the relevant properties
> >+ * in key_prop structure.
> >+ * Return a pointer to struct key_prop in @prop on success.
> >+ *
> >+ * Return:	0 on success, negative on error
> >+ */
> >+int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop)
> >+{
> >+	struct rsa_key rsa_key;
> >+	uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL;
> >+	const int max_rsa_size = 4096;
> >+	int rlen, i, ret;
> >+
> >+	*prop = calloc(sizeof(**prop), 1);
> >+	n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
> >+	rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
> >+	rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
> >+	if (!(*prop) || !n || !rr || !rrtmp) {
> >+		ret = -ENOMEM;
> >+		goto err;
> >+	}
> >+
> >+	ret = rsa_parse_pub_key(&rsa_key, key, keylen);
> >+	if (ret)
> >+		goto err;
> >+
> >+	/* modulus */
> >+	/* removing leading 0's */
> >+	for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++)
> >+		;
> >+	(*prop)->num_bits = (rsa_key.n_sz - i) * 8;
> >+	(*prop)->modulus = malloc(rsa_key.n_sz - i);
> >+	if (!(*prop)->modulus) {
> >+		ret = -ENOMEM;
> >+		goto err;
> >+	}
> >+	memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i);
> >+
> >+	/* exponent */
> >+	(*prop)->public_exponent = calloc(1, sizeof(uint64_t));
> >+	if (!(*prop)->public_exponent) {
> >+		ret = -ENOMEM;
> >+		goto err;
> >+	}
> >+	memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t)
> >+						- rsa_key.e_sz,
> >+	       rsa_key.e, rsa_key.e_sz);
> >+	(*prop)->exp_len = rsa_key.e_sz;
> >+
> >+	/* n0 inverse */
> >+	br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i);
> >+	(*prop)->n0inv = br_i32_ninv32(n[1]);
> >+
> >+	/* R^2 mod n; R = 2^(num_bits) */
> >+	rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */
> >+	rr[0] = 0;
> >+	*(uint8_t *)&rr[0] = (1 << (rlen % 8));
> >+	for (i = 1; i < (((rlen + 31) >> 5) + 1); i++)
> >+		rr[i] = 0;
> >+	br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1);
> >+	br_i32_reduce(rr, rrtmp, n);
> >+
> >+	rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */
> >+	(*prop)->rr = malloc(rlen);
> >+	if (!(*prop)->rr) {
> >+		ret = -ENOMEM;
> >+		goto err;
> >+	}
> >+	br_i32_encode((void *)(*prop)->rr, rlen, rr);
> >+
> >+	return 0;
> >+
> >+err:
> >+	free(n);
> >+	free(rr);
> >+	free(rrtmp);
> >+	rsa_free_key_prop(*prop);
> >+	return ret;
> >+}
> >
diff mbox series

Patch

diff --git a/include/u-boot/rsa-mod-exp.h b/include/u-boot/rsa-mod-exp.h
index 8a428c4b6a1a..1da8af1bb83d 100644
--- a/include/u-boot/rsa-mod-exp.h
+++ b/include/u-boot/rsa-mod-exp.h
@@ -26,6 +26,29 @@  struct key_prop {
 	uint32_t exp_len;	/* Exponent length in number of uint8_t */
 };
 
+/**
+ * rsa_gen_key_prop() - Generate key properties of RSA public key
+ * @key:	Specifies key data in DER format
+ * @keylen:	Length of @key
+ * @prop:	Generated key property
+ *
+ * This function takes a blob of encoded RSA public key data in DER
+ * format, parse it and generate all the relevant properties
+ * in key_prop structure.
+ * Return a pointer to struct key_prop in @prop on success.
+ *
+ * Return:	0 on success, negative on error
+ */
+int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **proc);
+
+/**
+ * rsa_free_key_prop() - Free key properties
+ * @prop:	Pointer to struct key_prop
+ *
+ * This function frees all the memories allocated by rsa_gen_key_prop().
+ */
+void rsa_free_key_prop(struct key_prop *prop);
+
 /**
  * rsa_mod_exp_sw() - Perform RSA Modular Exponentiation in sw
  *
diff --git a/lib/rsa/Kconfig b/lib/rsa/Kconfig
index 71e4c06bf883..d1d6f6cf64a3 100644
--- a/lib/rsa/Kconfig
+++ b/lib/rsa/Kconfig
@@ -33,6 +33,7 @@  config RSA_VERIFY
 config RSA_VERIFY_WITH_PKEY
 	bool "Execute RSA verification without key parameters from FDT"
 	depends on RSA
+	imply RSA_PUBLIC_KEY_PARSER
 	help
 	  The standard RSA-signature verification code (FIT_SIGNATURE) uses
 	  pre-calculated key properties, that are stored in fdt blob, in
diff --git a/lib/rsa/Makefile b/lib/rsa/Makefile
index c07305188e0c..14ed3cb4012b 100644
--- a/lib/rsa/Makefile
+++ b/lib/rsa/Makefile
@@ -6,4 +6,5 @@ 
 # Wolfgang Denk, DENX Software Engineering, wd@denx.de.
 
 obj-$(CONFIG_$(SPL_)RSA_VERIFY) += rsa-verify.o rsa-checksum.o
+obj-$(CONFIG_RSA_VERIFY_WITH_PKEY) += rsa-keyprop.o
 obj-$(CONFIG_RSA_SOFTWARE_EXP) += rsa-mod-exp.o
diff --git a/lib/rsa/rsa-keyprop.c b/lib/rsa/rsa-keyprop.c
new file mode 100644
index 000000000000..9464df009343
--- /dev/null
+++ b/lib/rsa/rsa-keyprop.c
@@ -0,0 +1,725 @@ 
+// SPDX-License-Identifier: GPL-2.0+ and MIT
+/*
+ * RSA library - generate parameters for a public key
+ *
+ * Copyright (c) 2019 Linaro Limited
+ * Author: AKASHI Takahiro
+ *
+ * Big number routines in this file come from BearSSL:
+ * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
+ */
+
+#include <common.h>
+#include <image.h>
+#include <malloc.h>
+#include <asm/byteorder.h>
+#include <crypto/internal/rsa.h>
+#include <u-boot/rsa-mod-exp.h>
+
+/**
+ * br_dec16be() - Convert 16-bit big-endian integer to native
+ * @src:	Pointer to data
+ * Return:	Native-endian integer
+ */
+static unsigned br_dec16be(const void *src)
+{
+	return be16_to_cpup(src);
+}
+
+/**
+ * br_dec32be() - Convert 32-bit big-endian integer to native
+ * @src:	Pointer to data
+ * Return:	Native-endian integer
+ */
+static uint32_t br_dec32be(const void *src)
+{
+	return be32_to_cpup(src);
+}
+
+/**
+ * br_enc32be() - Convert native 32-bit integer to big-endian
+ * @dst:	Pointer to buffer to store big-endian integer in
+ * @x:		Native 32-bit integer
+ */
+static void br_enc32be(void *dst, uint32_t x)
+{
+	__be32 tmp;
+
+	tmp = cpu_to_be32(x);
+	memcpy(dst, &tmp, sizeof(tmp));
+}
+
+/* from BearSSL's src/inner.h */
+
+/*
+ * Negate a boolean.
+ */
+static uint32_t NOT(uint32_t ctl)
+{
+	return ctl ^ 1;
+}
+
+/*
+ * Multiplexer: returns x if ctl == 1, y if ctl == 0.
+ */
+static uint32_t MUX(uint32_t ctl, uint32_t x, uint32_t y)
+{
+	return y ^ (-ctl & (x ^ y));
+}
+
+/*
+ * Equality check: returns 1 if x == y, 0 otherwise.
+ */
+static uint32_t EQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return NOT((q | -q) >> 31);
+}
+
+/*
+ * Inequality check: returns 1 if x != y, 0 otherwise.
+ */
+static uint32_t NEQ(uint32_t x, uint32_t y)
+{
+	uint32_t q;
+
+	q = x ^ y;
+	return (q | -q) >> 31;
+}
+
+/*
+ * Comparison: returns 1 if x > y, 0 otherwise.
+ */
+static uint32_t GT(uint32_t x, uint32_t y)
+{
+	/*
+	 * If both x < 2^31 and y < 2^31, then y-x will have its high
+	 * bit set if x > y, cleared otherwise.
+	 *
+	 * If either x >= 2^31 or y >= 2^31 (but not both), then the
+	 * result is the high bit of x.
+	 *
+	 * If both x >= 2^31 and y >= 2^31, then we can virtually
+	 * subtract 2^31 from both, and we are back to the first case.
+	 * Since (y-2^31)-(x-2^31) = y-x, the subtraction is already
+	 * fine.
+	 */
+	uint32_t z;
+
+	z = y - x;
+	return (z ^ ((x ^ y) & (x ^ z))) >> 31;
+}
+
+/*
+ * Compute the bit length of a 32-bit integer. Returned value is between 0
+ * and 32 (inclusive).
+ */
+static uint32_t BIT_LENGTH(uint32_t x)
+{
+	uint32_t k, c;
+
+	k = NEQ(x, 0);
+	c = GT(x, 0xFFFF); x = MUX(c, x >> 16, x); k += c << 4;
+	c = GT(x, 0x00FF); x = MUX(c, x >>  8, x); k += c << 3;
+	c = GT(x, 0x000F); x = MUX(c, x >>  4, x); k += c << 2;
+	c = GT(x, 0x0003); x = MUX(c, x >>  2, x); k += c << 1;
+	k += GT(x, 0x0001);
+	return k;
+}
+
+#define GE(x, y)   NOT(GT(y, x))
+#define LT(x, y)   GT(y, x)
+#define MUL(x, y)   ((uint64_t)(x) * (uint64_t)(y))
+
+/*
+ * Integers 'i32'
+ * --------------
+ *
+ * The 'i32' functions implement computations on big integers using
+ * an internal representation as an array of 32-bit integers. For
+ * an array x[]:
+ *  -- x[0] contains the "announced bit length" of the integer
+ *  -- x[1], x[2]... contain the value in little-endian order (x[1]
+ *     contains the least significant 32 bits)
+ *
+ * Multiplications rely on the elementary 32x32->64 multiplication.
+ *
+ * The announced bit length specifies the number of bits that are
+ * significant in the subsequent 32-bit words. Unused bits in the
+ * last (most significant) word are set to 0; subsequent words are
+ * uninitialized and need not exist at all.
+ *
+ * The execution time and memory access patterns of all computations
+ * depend on the announced bit length, but not on the actual word
+ * values. For modular integers, the announced bit length of any integer
+ * modulo n is equal to the actual bit length of n; thus, computations
+ * on modular integers are "constant-time" (only the modulus length may
+ * leak).
+ */
+
+/*
+ * Extract one word from an integer. The offset is counted in bits.
+ * The word MUST entirely fit within the word elements corresponding
+ * to the announced bit length of a[].
+ */
+static uint32_t br_i32_word(const uint32_t *a, uint32_t off)
+{
+	size_t u;
+	unsigned j;
+
+	u = (size_t)(off >> 5) + 1;
+	j = (unsigned)off & 31;
+	if (j == 0) {
+		return a[u];
+	} else {
+		return (a[u] >> j) | (a[u + 1] << (32 - j));
+	}
+}
+
+/* from BearSSL's src/int/i32_bitlen.c */
+
+/*
+ * Compute the actual bit length of an integer. The argument x should
+ * point to the first (least significant) value word of the integer.
+ * The len 'xlen' contains the number of 32-bit words to access.
+ *
+ * CT: value or length of x does not leak.
+ */
+static uint32_t br_i32_bit_length(uint32_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 5) + BIT_LENGTH(tw);
+}
+
+/* from BearSSL's src/int/i32_decode.c */
+
+/*
+ * Decode an integer from its big-endian unsigned representation. The
+ * "true" bit length of the integer is computed, but all words of x[]
+ * corresponding to the full 'len' bytes of the source are set.
+ *
+ * CT: value or length of x does not leak.
+ */
+static void br_i32_decode(uint32_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t u, v;
+
+	buf = src;
+	u = len;
+	v = 1;
+	for (;;) {
+		if (u < 4) {
+			uint32_t w;
+
+			if (u < 2) {
+				if (u == 0) {
+					break;
+				} else {
+					w = buf[0];
+				}
+			} else {
+				if (u == 2) {
+					w = br_dec16be(buf);
+				} else {
+					w = ((uint32_t)buf[0] << 16)
+						| br_dec16be(buf + 1);
+				}
+			}
+			x[v ++] = w;
+			break;
+		} else {
+			u -= 4;
+			x[v ++] = br_dec32be(buf + u);
+		}
+	}
+	x[0] = br_i32_bit_length(x + 1, v - 1);
+}
+
+/* from BearSSL's src/int/i32_encode.c */
+
+/*
+ * Encode an integer into its big-endian unsigned representation. The
+ * output length in bytes is provided (parameter 'len'); if the length
+ * is too short then the integer is appropriately truncated; if it is
+ * too long then the extra bytes are set to 0.
+ */
+static void br_i32_encode(void *dst, size_t len, const uint32_t *x)
+{
+	unsigned char *buf;
+	size_t k;
+
+	buf = dst;
+
+	/*
+	 * Compute the announced size of x in bytes; extra bytes are
+	 * filled with zeros.
+	 */
+	k = (x[0] + 7) >> 3;
+	while (len > k) {
+		*buf ++ = 0;
+		len --;
+	}
+
+	/*
+	 * Now we use k as index within x[]. That index starts at 1;
+	 * we initialize it to the topmost complete word, and process
+	 * any remaining incomplete word.
+	 */
+	k = (len + 3) >> 2;
+	switch (len & 3) {
+	case 3:
+		*buf ++ = x[k] >> 16;
+		/* fall through */
+	case 2:
+		*buf ++ = x[k] >> 8;
+		/* fall through */
+	case 1:
+		*buf ++ = x[k];
+		k --;
+	}
+
+	/*
+	 * Encode all complete words.
+	 */
+	while (k > 0) {
+		br_enc32be(buf, x[k]);
+		k --;
+		buf += 4;
+	}
+}
+
+/* from BearSSL's src/int/i32_ninv32.c */
+
+/*
+ * Compute -(1/x) mod 2^32. If x is even, then this function returns 0.
+ */
+static uint32_t br_i32_ninv32(uint32_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	y *= 2 - y * x;
+	return MUX(x & 1, -y, 0);
+}
+
+/* from BearSSL's src/int/i32_add.c */
+
+/*
+ * Add b[] to a[] and return the carry (0 or 1). If ctl is 0, then a[]
+ * is unmodified, but the carry is still computed and returned. The
+ * arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+static uint32_t br_i32_add(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+
+		/*
+		 * Carry is 1 if naw < aw. Carry is also 1 if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | LT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
+
+/* from BearSSL's src/int/i32_sub.c */
+
+/*
+ * Subtract b[] from a[] and return the carry (0 or 1). If ctl is 0,
+ * then a[] is unmodified, but the carry is still computed and returned.
+ * The arrays a[] and b[] MUST have the same announced bit length.
+ *
+ * a[] and b[] MAY be the same array, but partial overlap is not allowed.
+ */
+static uint32_t br_i32_sub(uint32_t *a, const uint32_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 63) >> 5;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+
+		/*
+		 * Carry is 1 if naw > aw. Carry is 1 also if naw == aw
+		 * AND the carry was already 1.
+		 */
+		cc = (cc & EQ(naw, aw)) | GT(naw, aw);
+		a[u] = MUX(ctl, naw, aw);
+	}
+	return cc;
+}
+
+/* from BearSSL's src/int/i32_div32.c */
+
+/*
+ * Constant-time division. The dividend hi:lo is divided by the
+ * divisor d; the quotient is returned and the remainder is written
+ * in *r. If hi == d, then the quotient does not fit on 32 bits;
+ * returned value is thus truncated. If hi > d, returned values are
+ * indeterminate.
+ */
+static uint32_t br_divrem(uint32_t hi, uint32_t lo, uint32_t d, uint32_t *r)
+{
+	/* TODO: optimize this */
+	uint32_t q;
+	uint32_t ch, cf;
+	int k;
+
+	q = 0;
+	ch = EQ(hi, d);
+	hi = MUX(ch, 0, hi);
+	for (k = 31; k > 0; k --) {
+		int j;
+		uint32_t w, ctl, hi2, lo2;
+
+		j = 32 - k;
+		w = (hi << j) | (lo >> k);
+		ctl = GE(w, d) | (hi >> k);
+		hi2 = (w - d) >> j;
+		lo2 = lo - (d << k);
+		hi = MUX(ctl, hi2, hi);
+		lo = MUX(ctl, lo2, lo);
+		q |= ctl << k;
+	}
+	cf = GE(lo, d) | hi;
+	q |= cf;
+	*r = MUX(cf, lo - d, lo);
+	return q;
+}
+
+/*
+ * Wrapper for br_divrem(); the remainder is returned, and the quotient
+ * is discarded.
+ */
+static uint32_t br_rem(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	br_divrem(hi, lo, d, &r);
+	return r;
+}
+
+/*
+ * Wrapper for br_divrem(); the quotient is returned, and the remainder
+ * is discarded.
+ */
+static uint32_t br_div(uint32_t hi, uint32_t lo, uint32_t d)
+{
+	uint32_t r;
+
+	return br_divrem(hi, lo, d, &r);
+}
+
+/* from BearSSL's src/int/i32_muladd.c */
+
+/*
+ * Multiply x[] by 2^32 and then add integer z, modulo m[]. This
+ * function assumes that x[] and m[] have the same announced bit
+ * length, and the announced bit length of m[] matches its true
+ * bit length.
+ *
+ * x[] and m[] MUST be distinct arrays.
+ *
+ * CT: only the common announced bit length of x and m leaks, not
+ * the values of x, z or m.
+ */
+static void br_i32_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
+{
+	uint32_t m_bitlen;
+	size_t u, mlen;
+	uint32_t a0, a1, b0, hi, g, q, tb;
+	uint32_t chf, clow, under, over;
+	uint64_t cc;
+
+	/*
+	 * We can test on the modulus bit length since we accept to
+	 * leak that length.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 32) {
+		x[1] = br_rem(x[1], z, m[1]);
+		return;
+	}
+	mlen = (m_bitlen + 31) >> 5;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^32+z)/m by
+	 * doing a 64/32 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^32
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
+	 * doing an Euclidean division on the top words:
+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	a0 = br_i32_word(x, m_bitlen - 32);
+	hi = x[mlen];
+	memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+	x[1] = z;
+	a1 = br_i32_word(x, m_bitlen - 32);
+	b0 = br_i32_word(m, m_bitlen - 32);
+
+	/*
+	 * We estimate a divisor q. If the quotient returned by br_div()
+	 * is g:
+	 * -- If a0 == b0 then g == 0; we want q = 0xFFFFFFFF.
+	 * -- Otherwise:
+	 *    -- if g == 0 then we set q = 0;
+	 *    -- otherwise, we set q = g - 1.
+	 * The properties described above then ensure that the true
+	 * quotient is q-1, q or q+1.
+	 */
+	g = br_div(a0, a1, b0);
+	q = MUX(EQ(a0, b0), 0xFFFFFFFF, MUX(EQ(g, 0), 0, g - 1));
+
+	/*
+	 * We subtract q*m from x (with the extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zw, xw, nxw;
+		uint64_t zl;
+
+		mw = m[u];
+		zl = MUL(mw, q) + cc;
+		cc = (uint32_t)(zl >> 32);
+		zw = (uint32_t)zl;
+		xw = x[u];
+		nxw = xw - zw;
+		cc += (uint64_t)GT(nxw, xw);
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus). In
+	 * these cases we must subtract m once.
+	 *
+	 * Otherwise, we may have overestimated, which will show as
+	 * cc > hi (thus a negative result). Correction is adding m once.
+	 */
+	chf = (uint32_t)(cc >> 32);
+	clow = (uint32_t)cc;
+	over = chf | GT(clow, hi);
+	under = ~over & (tb | (~chf & LT(clow, hi)));
+	br_i32_add(x, m, over);
+	br_i32_sub(x, m, under);
+}
+
+/* from BearSSL's src/int/i32_reduce.c */
+
+/*
+ * Reduce an integer (a[]) modulo another (m[]). The result is written
+ * in x[] and its announced bit length is set to be equal to that of m[].
+ *
+ * x[] MUST be distinct from a[] and m[].
+ *
+ * CT: only announced bit lengths leak, not values of x, a or m.
+ */
+static void br_i32_reduce(uint32_t *x, const uint32_t *a, const uint32_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 31) >> 5;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 31) >> 5;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i32_muladd_small(x, a[u], m);
+	}
+}
+
+/**
+ * rsa_free_key_prop() - Free key properties
+ * @prop:	Pointer to struct key_prop
+ *
+ * This function frees all the memories allocated by rsa_gen_key_prop().
+ */
+void rsa_free_key_prop(struct key_prop *prop)
+{
+	if (!prop)
+		return;
+
+	free((void *)prop->modulus);
+	free((void *)prop->public_exponent);
+	free((void *)prop->rr);
+
+	free(prop);
+}
+
+/**
+ * rsa_gen_key_prop() - Generate key properties of RSA public key
+ * @key:	Specifies key data in DER format
+ * @keylen:	Length of @key
+ * @prop:	Generated key property
+ *
+ * This function takes a blob of encoded RSA public key data in DER
+ * format, parse it and generate all the relevant properties
+ * in key_prop structure.
+ * Return a pointer to struct key_prop in @prop on success.
+ *
+ * Return:	0 on success, negative on error
+ */
+int rsa_gen_key_prop(const void *key, uint32_t keylen, struct key_prop **prop)
+{
+	struct rsa_key rsa_key;
+	uint32_t *n = NULL, *rr = NULL, *rrtmp = NULL;
+	const int max_rsa_size = 4096;
+	int rlen, i, ret;
+
+	*prop = calloc(sizeof(**prop), 1);
+	n = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
+	rr = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
+	rrtmp = calloc(sizeof(uint32_t), 1 + (max_rsa_size >> 5));
+	if (!(*prop) || !n || !rr || !rrtmp) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = rsa_parse_pub_key(&rsa_key, key, keylen);
+	if (ret)
+		goto err;
+
+	/* modulus */
+	/* removing leading 0's */
+	for (i = 0; i < rsa_key.n_sz && !rsa_key.n[i]; i++)
+		;
+	(*prop)->num_bits = (rsa_key.n_sz - i) * 8;
+	(*prop)->modulus = malloc(rsa_key.n_sz - i);
+	if (!(*prop)->modulus) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	memcpy((void *)(*prop)->modulus, &rsa_key.n[i], rsa_key.n_sz - i);
+
+	/* exponent */
+	(*prop)->public_exponent = calloc(1, sizeof(uint64_t));
+	if (!(*prop)->public_exponent) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	memcpy((void *)(*prop)->public_exponent + sizeof(uint64_t)
+						- rsa_key.e_sz,
+	       rsa_key.e, rsa_key.e_sz);
+	(*prop)->exp_len = rsa_key.e_sz;
+
+	/* n0 inverse */
+	br_i32_decode(n, &rsa_key.n[i], rsa_key.n_sz - i);
+	(*prop)->n0inv = br_i32_ninv32(n[1]);
+
+	/* R^2 mod n; R = 2^(num_bits) */
+	rlen = (*prop)->num_bits * 2; /* #bits of R^2 = (2^num_bits)^2 */
+	rr[0] = 0;
+	*(uint8_t *)&rr[0] = (1 << (rlen % 8));
+	for (i = 1; i < (((rlen + 31) >> 5) + 1); i++)
+		rr[i] = 0;
+	br_i32_decode(rrtmp, rr, ((rlen + 7) >> 3) + 1);
+	br_i32_reduce(rr, rrtmp, n);
+
+	rlen = ((*prop)->num_bits + 7) >> 3; /* #bytes of R^2 mod n */
+	(*prop)->rr = malloc(rlen);
+	if (!(*prop)->rr) {
+		ret = -ENOMEM;
+		goto err;
+	}
+	br_i32_encode((void *)(*prop)->rr, rlen, rr);
+
+	return 0;
+
+err:
+	free(n);
+	free(rr);
+	free(rrtmp);
+	rsa_free_key_prop(*prop);
+	return ret;
+}