Message ID | 20210413080755.73572-11-joel@jms.id.au |
---|---|
State | New |
Headers | show |
Series | Use HACE to | expand |
Hi Joel, On 4/13/2021 5:07 AM, Joel Stanley wrote: > The HACE supports MD5, SHA1 and SHA2 family hash functions. This driver > uses it in a polling mode to perform hash calculations over buffers > placed in DRAM. > > Co-developed-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> > Signed-off-by: Joel Stanley <joel@jms.id.au> > --- > drivers/crypto/Kconfig | 16 +++ > drivers/crypto/Makefile | 1 + > drivers/crypto/aspeed_hace.c | 250 +++++++++++++++++++++++++++++++++++ > 3 files changed, 267 insertions(+) > create mode 100644 drivers/crypto/aspeed_hace.c > > diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig > index 1ea116be7503..f78e41e0e9e7 100644 > --- a/drivers/crypto/Kconfig > +++ b/drivers/crypto/Kconfig > @@ -2,4 +2,20 @@ menu "Hardware crypto devices" > > source drivers/crypto/fsl/Kconfig > > +config ASPEED_HACE > + bool "ASPEED Hash and Crypto Engine" > + select SHA_HW_ACCEL > + select SHA_PROG_HW_ACCEL > + depends on ASPEED_AST2600 > + help > + Select this option to enable a driver for using the SHA engine in > + the ASPEED BMC SoCs. > + > + Enabling this allows the use of SHA operations in hardware without requiring the > + SHA software implementations, saving code size. > + > + Due to hardware limitations it cannot be used with a FIT placed in SPI > + FLASH. Data can only be hashed if it is in SDRAM, making this relevant > + for MMC and network boot only. > + > endmenu > diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile > index efbd1d3fca05..ac93b1295954 100644 > --- a/drivers/crypto/Makefile > +++ b/drivers/crypto/Makefile > @@ -4,5 +4,6 @@ > # http://www.samsung.com > > obj-$(CONFIG_EXYNOS_ACE_SHA) += ace_sha.o > +obj-$(CONFIG_ASPEED_HACE) += aspeed_hace.o > obj-y += rsa_mod_exp/ > obj-y += fsl/ > diff --git a/drivers/crypto/aspeed_hace.c b/drivers/crypto/aspeed_hace.c > new file mode 100644 > index 000000000000..473d4d7391b7 > --- /dev/null > +++ b/drivers/crypto/aspeed_hace.c > @@ -0,0 +1,250 @@ > +/* > + * (C) Copyright ASPEED Technology Inc. > + * Copyright 2021 IBM Corp. > + * > + * SPDX-License-Identifier: GPL-2.0-or-later > + */ > + > +#include <common.h> > +#include <clk.h> > + > +#include <log.h> > +#include <asm/io.h> > +#include <malloc.h> > +#include <hash.h> > + > +#include <dm/device.h> > +#include <dm/fdtaddr.h> > + > +#include <linux/bitops.h> > +#include <linux/delay.h> > +#include <linux/kernel.h> > +#include <linux/iopoll.h> > + > +#define ASPEED_HACE_STS 0x1C > +#define HACE_RSA_ISR BIT(13) > +#define HACE_CRYPTO_ISR BIT(12) > +#define HACE_HASH_ISR BIT(9) > +#define HACE_RSA_BUSY BIT(2) > +#define HACE_CRYPTO_BUSY BIT(1) > +#define HACE_HASH_BUSY BIT(0) > +#define ASPEED_HACE_HASH_SRC 0x20 > +#define ASPEED_HACE_HASH_DIGEST_BUFF 0x24 > +#define ASPEED_HACE_HASH_KEY_BUFF 0x28 > +#define ASPEED_HACE_HASH_DATA_LEN 0x2C > +#define HACE_SG_LAST BIT(31) > +#define ASPEED_HACE_HASH_CMD 0x30 > +#define HACE_SHA_BE_EN BIT(3) > +#define HACE_MD5_LE_EN BIT(2) > +#define HACE_ALGO_MD5 0 > +#define HACE_ALGO_SHA1 BIT(5) > +#define HACE_ALGO_SHA224 BIT(6) > +#define HACE_ALGO_SHA256 (BIT(4) | BIT(6)) > +#define HACE_ALGO_SHA512 (BIT(5) | BIT(6)) > +#define HACE_ALGO_SHA384 (BIT(5) | BIT(6) | BIT(10)) > +#define HACE_SG_EN BIT(18) > + > +#define ASPEED_MAX_SG 32 > + > +struct aspeed_sg { > + u32 len; > + u32 addr; > +}; > + > +struct aspeed_hash_ctx { > + u32 method; > + u32 digest_size; > + u32 len; > + u32 count; > + struct aspeed_sg list[ASPEED_MAX_SG] __attribute__((aligned(8))); > +}; > + > +struct aspeed_hace { > + struct clk clk; > +}; > + > +static phys_addr_t base; > + > +static int aspeed_hace_wait_completion(u32 reg, u32 flag, int timeout_us) > +{ > + u32 val; > + > + return readl_poll_timeout(reg, val, (val & flag) == flag, timeout_us); > +} > + > +static int digest_object(const void *src, unsigned int length, void *digest, > + u32 method) > +{ > + if (!((u32)src & BIT(31))) { > + debug("HACE src out of bounds: can only copy from SDRAM\n"); > + return -EINVAL; > + } > + > + if ((u32)digest & 0x7) { > + debug("HACE dest alignment incorrect: %p\n", digest); > + return -EINVAL; > + } > + > + writel((u32)src, base + ASPEED_HACE_HASH_SRC); > + writel((u32)digest, base + ASPEED_HACE_HASH_DIGEST_BUFF); > + writel(length, base + ASPEED_HACE_HASH_DATA_LEN); > + writel(HACE_SHA_BE_EN | method, base + ASPEED_HACE_HASH_CMD); > + > + /* SHA512 hashing appears to have a througput of about 12MB/s */ > + return aspeed_hace_wait_completion(base + ASPEED_HACE_STS, > + HACE_HASH_ISR, > + 1000 + (length >> 3)); In some of my previous testing with the un-cleaned patchset (https://github.com/klauskiwi/u-boot/tree/hace_sg_work), the Qemu implementation (Cedric's Aspeed-6.0 branch) worked fine, but on hardware I was getting errors until I explicitly cleared the HACE_HASH_ISR before attemptinga new command.. It makes sense since the readl_poll_timeout() would return immediately, without completing the command, if the HASH_ISR bit is set. > +} > + > +void hw_sha1(const unsigned char *pbuf, unsigned int buf_len, > + unsigned char *pout, unsigned int chunk_size) > +{ > + int rc; > + > + rc = digest_object(pbuf, buf_len, pout, HACE_ALGO_SHA1); > + if (rc) > + debug("HACE failure: %d\n", rc); > +} > + > +void hw_sha256(const unsigned char *pbuf, unsigned int buf_len, > + unsigned char *pout, unsigned int chunk_size) > +{ > + int rc; > + > + rc = digest_object(pbuf, buf_len, pout, HACE_ALGO_SHA256); > + if (rc) > + debug("HACE failure: %d\n", rc); > +} > + > +void hw_sha512(const unsigned char *pbuf, unsigned int buf_len, > + unsigned char *pout, unsigned int chunk_size) > +{ > + int rc; > + > + rc = digest_object(pbuf, buf_len, pout, HACE_ALGO_SHA512); > + if (rc) > + debug("HACE failure: %d\n", rc); > +} > + > +#if IS_ENABLED(CONFIG_SHA_PROG_HW_ACCEL) > +int hw_sha_init(struct hash_algo *algo, void **ctxp) > +{ > + struct aspeed_hash_ctx *ctx; > + u32 method; > + > + if (!strcmp(algo->name, "sha1")) { > + method = HACE_ALGO_SHA1; > + } > + else if (!strcmp(algo->name, "sha256")) { > + method = HACE_ALGO_SHA256; > + } > + else if (!strcmp(algo->name, "sha512")) { > + method = HACE_ALGO_SHA512; > + } > + else { > + return -ENOTSUPP; > + } > + > + ctx = calloc(1, sizeof(*ctx)); > + > + if (ctx == NULL) { > + debug("Cannot allocate memory for context\n"); > + return -ENOMEM; > + } > + ctx->method = method | HACE_SG_EN; > + ctx->digest_size = algo->digest_size; > + *ctxp = ctx; > + > + return 0; > +} > + > +int hw_sha_update(struct hash_algo *algo, void *hash_ctx, const void *buf, > + unsigned int size, int is_last) > +{ > + struct aspeed_hash_ctx *ctx = hash_ctx; > + struct aspeed_sg *sg = &ctx->list[ctx->count]; > + > + if (ctx->count >= ARRAY_SIZE(ctx->list)) { > + debug("HACE error: Reached maximum number of hash segments\n"); > + free(ctx); > + return -EINVAL; > + } > + > + sg->addr = (u32)buf; > + sg->len = size; > + if (is_last) > + sg->len |= HACE_SG_LAST; > + > + ctx->count++; > + ctx->len += size; > + > + return 0; > +} > + > +int hw_sha_finish(struct hash_algo *algo, void *hash_ctx, void *dest_buf, int size) > +{ > + struct aspeed_hash_ctx *ctx = hash_ctx; > + int rc; > + > + if (size < ctx->digest_size) { > + debug("HACE error: insufficient size on destination buffer\n"); > + free(ctx); > + return -EINVAL; > + } > + > + rc = digest_object(ctx->list, ctx->len, dest_buf, ctx->method); > + if (rc) > + debug("HACE Scatter-Gather failure\n"); > + > + free(ctx); > + > + return rc; > +} > +#endif > + > +static int aspeed_hace_probe(struct udevice *dev) > +{ > + struct aspeed_hace *hace = dev_get_priv(dev); > + int ret; > + > + ret = clk_get_by_index(dev, 0, &hace->clk); > + if (ret < 0) { > + debug("Can't get clock for %s: %d\n", dev->name, ret); > + return ret; > + } > + > + ret = clk_enable(&hace->clk); > + if (ret) { > + debug("Failed to enable fsi clock (%d)\n", ret); > + return ret; > + } > + > + /* As the crypto code does not pass us any driver state */ > + base = devfdt_get_addr(dev); > + > + return ret; > +} > + > +static int aspeed_hace_remove(struct udevice *dev) > +{ > + struct aspeed_hace *hace = dev_get_priv(dev); > + > + clk_disable(&hace->clk); > + > + return 0; > +} > + > +static const struct udevice_id aspeed_hace_ids[] = { > + { .compatible = "aspeed,ast2600-hace" }, > + { } > +}; > + > +U_BOOT_DRIVER(aspeed_hace) = { > + .name = "aspeed_hace", > + .id = UCLASS_MISC, > + .of_match = aspeed_hace_ids, > + .probe = aspeed_hace_probe, > + .remove = aspeed_hace_remove, > + .priv_auto_alloc_size = sizeof(struct aspeed_hace), > + .flags = DM_FLAG_PRE_RELOC, > +}; > I've tested your patchset with Cedric's Aspeed-6.0 but looks like the probe function is never called. Reading through the code a bit more, looks like you need to explicitly probe this device somewhere in board_init_r (that is, after sdram was initialized), since functions like dm_scan_fdt() and dm_extended_scan_fdt() will only scan subnodes of the top level, and the clocks node. This is what I get (with some added printfs of mine): qemu-system-arm: warning: Aspeed iBT has no chardev backend qemu-system-arm: warning: nic ftgmac100.0 has no peer qemu-system-arm: warning: nic ftgmac100.1 has no peer qemu-system-arm: warning: nic ftgmac100.2 has no peer qemu-system-arm: warning: nic ftgmac100.3 has no peer aspeed_timer_ctrl_pulse_enable: Timer does not support pulse mode aspeed_timer_ctrl_pulse_enable: Timer does not support pulse mode aspeed_timer_ctrl_pulse_enable: Timer does not support pulse mode aspeed_timer_ctrl_pulse_enable: Timer does not support pulse mode aspeed_ast2600_scu_write: SCU is locked! aspeed_ast2600_scu_write: SCU is locked! aspeed_smc_write: not implemented: 0x18 U-Boot SPL 2019.04 (Apr 13 2021 - 17:57:20 +0000) aspeed_soc.io: unimplemented device read (size 4, offset 0x0f500c) aspeed_soc.io: unimplemented device write (size 4, offset 0x0f500c, value 0x00000000) Trying to boot from MMC1 SD: CMD8 in a wrong state ## Checking hash(es) for Image uboot ... sha512,rsa4096:autogenerated-uboot-4096-key digest_object: ASPEED_HACE_STS='0xe59ff03c' digest_object: SCU080h='0xf7ff7f8a' digest_object: writing '0x90200104' to ASPEED_HACE_HASH_SRC digest_object: writing '0x902ffa50' to ASPEED_HACE_HASH_DIGEST_BUFF digest_object: writing '0x0005ccd4' to ASPEED_HACE_HASH_DATA_LEN digest_object: writing '0x00040068' to ASPEED_HACE_HASH_CMD HACE Scatter-Gather failure rsa_verify: Error in checksum calculation - Failed to verify required signature 'key-autogenerated-uboot-4096-key' error! Unable to verify required signature for '' hash node in 'uboot' image node mmc_load_image_raw_sector: mmc block read error Trying to boot from UART CCQEMU: Terminated
On 4/13/2021 5:41 PM, Klaus Heinrich Kiwi wrote: > I've tested your patchset with Cedric's Aspeed-6.0 but looks > like the probe function is never called. Reading through the code > a bit more, looks like you need to explicitly probe this device > somewhere in board_init_r (that is, after sdram was initialized), > since functions like dm_scan_fdt() and dm_extended_scan_fdt() will > only scan subnodes of the top level, and the clocks node. > > This is what I get (with some added printfs of mine): I've played around a bit more, and got it to work on Qemu with the following changes: * Added a board-specific spl_board_init() initializing the HACE driver at the SPL's board_init_r() timeframe. Enabled that on the defconfig file. * Because the driver model is using some pre-sdram malloc pool space, the changes above were causing the probing of the sdram itself to fail. Corrected by increasing the pre-sdram malloc pool to 0x1000. From 4ddc9fe2727a54f2d8c088d1c7046f4a3ab56314 Mon Sep 17 00:00:00 2001 From: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> Date: Wed, 14 Apr 2021 16:29:15 -0300 Subject: [PATCH] fixup: Enable HACE probing on SPL, expand malloc pool Signed-off-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> --- arch/arm/mach-aspeed/ast2600/spl.c | 15 +++++++++++++++ configs/ast2600_openbmc_spl_emmc_defconfig | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-aspeed/ast2600/spl.c b/arch/arm/mach-aspeed/ast2600/spl.c index 527e14f8e3..2b1e8477eb 100644 --- a/arch/arm/mach-aspeed/ast2600/spl.c +++ b/arch/arm/mach-aspeed/ast2600/spl.c @@ -86,3 +86,18 @@ struct image_header *spl_get_load_buffer(ssize_t offset, size_t size) { return (struct image_header *)(CONFIG_SYS_LOAD_ADDR); } + +#if CONFIG_IS_ENABLED(BOARD_INIT) +void spl_board_init(void) +{ +#ifdef CONFIG_ASPEED_HACE + struct udevice *dev; + + if (uclass_get_device_by_driver(UCLASS_MISC, DM_GET_DRIVER(aspeed_hace), + &dev)) + debug("Warning: HACE initialization failure\n"); +#endif + + return; +} +#endif /* CONFIG_IS_ENABLED(BOARD_INIT) */ \ No newline at end of file diff --git a/configs/ast2600_openbmc_spl_emmc_defconfig b/configs/ast2600_openbmc_spl_emmc_defconfig index 05069084cb..e856df2398 100644 --- a/configs/ast2600_openbmc_spl_emmc_defconfig +++ b/configs/ast2600_openbmc_spl_emmc_defconfig @@ -24,7 +24,7 @@ CONFIG_ASPEED_KERNEL_FIT_DRAM_BASE=0x83000000 CONFIG_TARGET_EVB_AST2600A1=y CONFIG_SPL_LIBCOMMON_SUPPORT=y CONFIG_SPL_LIBGENERIC_SUPPORT=y -CONFIG_SYS_MALLOC_F_LEN=0x800 +CONFIG_SYS_MALLOC_F_LEN=0x1000 CONFIG_SPL_MMC_SUPPORT=y CONFIG_SPL_SERIAL_SUPPORT=y CONFIG_SPL_DRIVERS_MISC_SUPPORT=y @@ -48,6 +48,7 @@ CONFIG_SYS_CONSOLE_ENV_OVERWRITE=y CONFIG_DISPLAY_BOARDINFO_LATE=y CONFIG_ARCH_EARLY_INIT_R=y CONFIG_BOARD_EARLY_INIT_F=y +CONFIG_SPL_BOARD_INIT=y # CONFIG_SPL_LEGACY_IMAGE_SUPPORT is not set CONFIG_SPL_SYS_MALLOC_SIMPLE=y CONFIG_SPL_STACK_R=y
On Wed, 14 Apr 2021 at 20:28, Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> wrote: > > > > On 4/13/2021 5:41 PM, Klaus Heinrich Kiwi wrote: > > I've tested your patchset with Cedric's Aspeed-6.0 but looks > > like the probe function is never called. Reading through the code > > a bit more, looks like you need to explicitly probe this device > > somewhere in board_init_r (that is, after sdram was initialized), > > since functions like dm_scan_fdt() and dm_extended_scan_fdt() will > > only scan subnodes of the top level, and the clocks node. > > > > This is what I get (with some added printfs of mine): > > I've played around a bit more, and got it to work on Qemu with the following changes: > > * Added a board-specific spl_board_init() initializing the HACE driver at > the SPL's board_init_r() timeframe. Enabled that on the defconfig file. > * Because the driver model is using some pre-sdram malloc pool space, > the changes above were causing the probing of the sdram itself to fail. > Corrected by increasing the pre-sdram malloc pool to 0x1000. Thanks. I had added something similar when debugging this yesterday. I had tested the changes, but must have mixed up which images were being loaded into qemu. > However, when I tried to test it on a Rainier, it failed: > U-Boot SPL 2019.04 (Apr 14 2021 - 19:31:59 +0000) > already initialized, Trying to boot from MMC1 > ## Checking hash(es) for Image uboot ... sha512,rsa4096:autogenerated-uboot-4096-key- Failed to verify required signature 'key-autogenera' > error! > Unable to verify required signature for '' hash node in 'uboot' image node > mmc_load_image_raw_sector: mmc block read error > Trying to boot from UART > CCCCC �P > > (and yes, I had since disabled my debugging printf's). I wonder if the HASH_ISR > may need to be explicitly cleared, although I'd expect it to work for the first > command at least. > > Another interesting thing is that the SPL tries to boot from UART, but neither > my fitImages, Legacy images or even RAW images are working.. Not sure if we need > some special handling of those images before feeding them to the spl ymodem loader? I wasn't able to get the SPL to load any images - raw binaries or FIT - from eMMC either. Something is going wrong, but I am unsure what it is. I will continue to debug. Cheers, Joel
On 4/14/2021 11:32 PM, Joel Stanley wrote: >> Another interesting thing is that the SPL tries to boot from UART, but neither >> my fitImages, Legacy images or even RAW images are working.. Not sure if we need >> some special handling of those images before feeding them to the spl ymodem loader? > I wasn't able to get the SPL to load any images - raw binaries or FIT > - from eMMC either. Something is going wrong, but I am unsure what it > is. I will continue to debug. I was able to make it work on real hardware (rainier100) with the following changes (in addition to the other fixups already mentioned in this thread): From a2a2819455ec5de689fd0702cce20bfb18ec11b1 Mon Sep 17 00:00:00 2001 From: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> Date: Thu, 15 Apr 2021 15:16:37 -0300 Subject: [PATCH] HACE fixups: * The AST2600 workbook mentions that the list structure passed to ASPEED_HACE_HASH_SRC needs to be 8-byte aligned. Normally, glibc's malloc() would always align memory to (at least) 8-bytes, but that's the case with u-boot's pre-sdram malloc() implementation. So we need to explicitly align the context to 8-bytes with malign(). * The __atribute__ ((align 8)) doesn't have an effect in struct elements. Remove it. * Since the struct aspeed_hash_ctx->list element is what we need to make sure is aligned to 9-bytes, move that to the first element of the array, and call-out the fact that this needs to be aligned in the declaration. * Clear HACE_HASH_ISR before issuing new command Signed-off-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> --- drivers/crypto/aspeed_hace.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/aspeed_hace.c b/drivers/crypto/aspeed_hace.c index 473d4d7391..0551fe6c83 100644 --- a/drivers/crypto/aspeed_hace.c +++ b/drivers/crypto/aspeed_hace.c @@ -51,12 +51,19 @@ struct aspeed_sg { u32 addr; }; + +/* + * Note: element 'list' below needs to be 8-byte aligned, + * keep it as the first element so that we can always + * guarantee that when allocating the struct (that should + * also be 8-byte aligned) + */ struct aspeed_hash_ctx { + struct aspeed_sg list[ASPEED_MAX_SG]; u32 method; u32 digest_size; u32 len; u32 count; - struct aspeed_sg list[ASPEED_MAX_SG] __attribute__((aligned(8))); }; struct aspeed_hace { @@ -85,6 +92,9 @@ static int digest_object(const void *src, unsigned int length, void *digest, return -EINVAL; } + /* clear any pending interrupt */ + writel(HACE_HASH_ISR, base + ASPEED_HACE_STS); + writel((u32)src, base + ASPEED_HACE_HASH_SRC); writel((u32)digest, base + ASPEED_HACE_HASH_DIGEST_BUFF); writel(length, base + ASPEED_HACE_HASH_DATA_LEN); @@ -145,12 +155,13 @@ int hw_sha_init(struct hash_algo *algo, void **ctxp) return -ENOTSUPP; } - ctx = calloc(1, sizeof(*ctx)); + ctx = memalign(8, sizeof(struct aspeed_hash_ctx)); if (ctx == NULL) { debug("Cannot allocate memory for context\n"); return -ENOMEM; } + ctx->len = ctx->count = 0; ctx->method = method | HACE_SG_EN; ctx->digest_size = algo->digest_size; *ctxp = ctx;
On Thu, 15 Apr 2021 at 21:38, Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> wrote: > > > > On 4/14/2021 11:32 PM, Joel Stanley wrote: > >> Another interesting thing is that the SPL tries to boot from UART, but neither > >> my fitImages, Legacy images or even RAW images are working.. Not sure if we need > >> some special handling of those images before feeding them to the spl ymodem loader? > > I wasn't able to get the SPL to load any images - raw binaries or FIT > > - from eMMC either. Something is going wrong, but I am unsure what it > > is. I will continue to debug. > > I was able to make it work on real hardware (rainier100) with the following changes > (in addition to the other fixups already mentioned in this thread): > > From a2a2819455ec5de689fd0702cce20bfb18ec11b1 Mon Sep 17 00:00:00 2001 > From: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> > Date: Thu, 15 Apr 2021 15:16:37 -0300 > Subject: [PATCH] HACE fixups: > > * The AST2600 workbook mentions that the list structure passed to > ASPEED_HACE_HASH_SRC needs to be 8-byte aligned. Normally, glibc's > malloc() would always align memory to (at least) 8-bytes, but that's > the case with u-boot's pre-sdram malloc() implementation. So we need > to explicitly align the context to 8-bytes with malign(). We're not using the HACE engine pre-SDRAM, so we should be ok. common/dlmalloc.c: 8-byte alignment is guaranteed by normal malloc calls, so don't bother calling memalign with an argument of 8 or less. Were you able to observe any allocations that were not aligned? > > * The __atribute__ ((align 8)) doesn't have an effect in struct > elements. Remove it. Agreed. > > * Since the struct aspeed_hash_ctx->list element is what we need to > make sure is aligned to 9-bytes, move that to the first element of > the array, and call-out the fact that this needs to be aligned in > the declaration. 9 bytes? Did you mean 8? Regardless, the array in the structure should be aligned as there will be no padding earlier in the struct. I have added a runtime check for this and have not hit that in my testing so far. > > * Clear HACE_HASH_ISR before issuing new command This makes sense. I will send out a new series for review. I've tested it thoroughly in qemu, and have tested the u-boot -> Linux path on hardware. Cheers, Joel > > Signed-off-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> > --- > drivers/crypto/aspeed_hace.c | 15 +++++++++++++-- > 1 file changed, 13 insertions(+), 2 deletions(-) > > diff --git a/drivers/crypto/aspeed_hace.c b/drivers/crypto/aspeed_hace.c > index 473d4d7391..0551fe6c83 100644 > --- a/drivers/crypto/aspeed_hace.c > +++ b/drivers/crypto/aspeed_hace.c > @@ -51,12 +51,19 @@ struct aspeed_sg { > u32 addr; > }; > > + > +/* > + * Note: element 'list' below needs to be 8-byte aligned, > + * keep it as the first element so that we can always > + * guarantee that when allocating the struct (that should > + * also be 8-byte aligned) > + */ > struct aspeed_hash_ctx { > + struct aspeed_sg list[ASPEED_MAX_SG]; > u32 method; > u32 digest_size; > u32 len; > u32 count; > - struct aspeed_sg list[ASPEED_MAX_SG] __attribute__((aligned(8))); > }; > > struct aspeed_hace { > @@ -85,6 +92,9 @@ static int digest_object(const void *src, unsigned int length, void *digest, > return -EINVAL; > } > > + /* clear any pending interrupt */ > + writel(HACE_HASH_ISR, base + ASPEED_HACE_STS); > + > writel((u32)src, base + ASPEED_HACE_HASH_SRC); > writel((u32)digest, base + ASPEED_HACE_HASH_DIGEST_BUFF); > writel(length, base + ASPEED_HACE_HASH_DATA_LEN); > @@ -145,12 +155,13 @@ int hw_sha_init(struct hash_algo *algo, void **ctxp) > return -ENOTSUPP; > } > > - ctx = calloc(1, sizeof(*ctx)); > + ctx = memalign(8, sizeof(struct aspeed_hash_ctx)); > > if (ctx == NULL) { > debug("Cannot allocate memory for context\n"); > return -ENOMEM; > } > + ctx->len = ctx->count = 0; > ctx->method = method | HACE_SG_EN; > ctx->digest_size = algo->digest_size; > *ctxp = ctx; > -- > 2.25.1 > > > > > > -- > Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com>
Hi Joel, On 4/19/2021 9:49 AM, Joel Stanley wrote: > On Thu, 15 Apr 2021 at 21:38, Klaus Heinrich Kiwi >> >> * The AST2600 workbook mentions that the list structure passed to >> ASPEED_HACE_HASH_SRC needs to be 8-byte aligned. Normally, glibc's >> malloc() would always align memory to (at least) 8-bytes, but that's >> the case with u-boot's pre-sdram malloc() implementation. So we need >> to explicitly align the context to 8-bytes with malign(). > > We're not using the HACE engine pre-SDRAM, so we should be ok. > > common/dlmalloc.c: > > 8-byte alignment is guaranteed by normal malloc calls, so don't > bother calling memalign with an argument of 8 or less. > > Were you able to observe any allocations that were not aligned? yes, I thought I had. And by pre-sdram I guess I was trying to say pre-relocation. But that might have been an artifact of trying to probe/initialize hace in the SPL, so I might have mixed things. I was sure that Qemu and Hardware were behaving differently, since the same image would work on qemu but fail on hardware. Perhaps we should put a warning on Qemu for unaligned access instead of just masking. >> >> * The __atribute__ ((align 8)) doesn't have an effect in struct >> elements. Remove it. > > Agreed. > >> >> * Since the struct aspeed_hash_ctx->list element is what we need to >> make sure is aligned to 9-bytes, move that to the first element of >> the array, and call-out the fact that this needs to be aligned in >> the declaration. > > 9 bytes? Did you mean 8? > I did. A typo. > Regardless, the array in the structure should be aligned as there will > be no padding earlier in the struct. I have added a runtime check for > this and have not hit that in my testing so far. > My change involved moving the array to the first element exactly to not risk changing the u32 elements before it to something else and breaking alignment. >> >> * Clear HACE_HASH_ISR before issuing new command > > This makes sense. > > I will send out a new series for review. I've tested it thoroughly in > qemu, and have tested the u-boot -> Linux path on hardware. > > Cheers, > > Joel Sounds good. I can give it a spin on rain100 and check if it's all good.
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 1ea116be7503..f78e41e0e9e7 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -2,4 +2,20 @@ menu "Hardware crypto devices" source drivers/crypto/fsl/Kconfig +config ASPEED_HACE + bool "ASPEED Hash and Crypto Engine" + select SHA_HW_ACCEL + select SHA_PROG_HW_ACCEL + depends on ASPEED_AST2600 + help + Select this option to enable a driver for using the SHA engine in + the ASPEED BMC SoCs. + + Enabling this allows the use of SHA operations in hardware without requiring the + SHA software implementations, saving code size. + + Due to hardware limitations it cannot be used with a FIT placed in SPI + FLASH. Data can only be hashed if it is in SDRAM, making this relevant + for MMC and network boot only. + endmenu diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index efbd1d3fca05..ac93b1295954 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -4,5 +4,6 @@ # http://www.samsung.com obj-$(CONFIG_EXYNOS_ACE_SHA) += ace_sha.o +obj-$(CONFIG_ASPEED_HACE) += aspeed_hace.o obj-y += rsa_mod_exp/ obj-y += fsl/ diff --git a/drivers/crypto/aspeed_hace.c b/drivers/crypto/aspeed_hace.c new file mode 100644 index 000000000000..473d4d7391b7 --- /dev/null +++ b/drivers/crypto/aspeed_hace.c @@ -0,0 +1,250 @@ +/* + * (C) Copyright ASPEED Technology Inc. + * Copyright 2021 IBM Corp. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <common.h> +#include <clk.h> + +#include <log.h> +#include <asm/io.h> +#include <malloc.h> +#include <hash.h> + +#include <dm/device.h> +#include <dm/fdtaddr.h> + +#include <linux/bitops.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/iopoll.h> + +#define ASPEED_HACE_STS 0x1C +#define HACE_RSA_ISR BIT(13) +#define HACE_CRYPTO_ISR BIT(12) +#define HACE_HASH_ISR BIT(9) +#define HACE_RSA_BUSY BIT(2) +#define HACE_CRYPTO_BUSY BIT(1) +#define HACE_HASH_BUSY BIT(0) +#define ASPEED_HACE_HASH_SRC 0x20 +#define ASPEED_HACE_HASH_DIGEST_BUFF 0x24 +#define ASPEED_HACE_HASH_KEY_BUFF 0x28 +#define ASPEED_HACE_HASH_DATA_LEN 0x2C +#define HACE_SG_LAST BIT(31) +#define ASPEED_HACE_HASH_CMD 0x30 +#define HACE_SHA_BE_EN BIT(3) +#define HACE_MD5_LE_EN BIT(2) +#define HACE_ALGO_MD5 0 +#define HACE_ALGO_SHA1 BIT(5) +#define HACE_ALGO_SHA224 BIT(6) +#define HACE_ALGO_SHA256 (BIT(4) | BIT(6)) +#define HACE_ALGO_SHA512 (BIT(5) | BIT(6)) +#define HACE_ALGO_SHA384 (BIT(5) | BIT(6) | BIT(10)) +#define HACE_SG_EN BIT(18) + +#define ASPEED_MAX_SG 32 + +struct aspeed_sg { + u32 len; + u32 addr; +}; + +struct aspeed_hash_ctx { + u32 method; + u32 digest_size; + u32 len; + u32 count; + struct aspeed_sg list[ASPEED_MAX_SG] __attribute__((aligned(8))); +}; + +struct aspeed_hace { + struct clk clk; +}; + +static phys_addr_t base; + +static int aspeed_hace_wait_completion(u32 reg, u32 flag, int timeout_us) +{ + u32 val; + + return readl_poll_timeout(reg, val, (val & flag) == flag, timeout_us); +} + +static int digest_object(const void *src, unsigned int length, void *digest, + u32 method) +{ + if (!((u32)src & BIT(31))) { + debug("HACE src out of bounds: can only copy from SDRAM\n"); + return -EINVAL; + } + + if ((u32)digest & 0x7) { + debug("HACE dest alignment incorrect: %p\n", digest); + return -EINVAL; + } + + writel((u32)src, base + ASPEED_HACE_HASH_SRC); + writel((u32)digest, base + ASPEED_HACE_HASH_DIGEST_BUFF); + writel(length, base + ASPEED_HACE_HASH_DATA_LEN); + writel(HACE_SHA_BE_EN | method, base + ASPEED_HACE_HASH_CMD); + + /* SHA512 hashing appears to have a througput of about 12MB/s */ + return aspeed_hace_wait_completion(base + ASPEED_HACE_STS, + HACE_HASH_ISR, + 1000 + (length >> 3)); +} + +void hw_sha1(const unsigned char *pbuf, unsigned int buf_len, + unsigned char *pout, unsigned int chunk_size) +{ + int rc; + + rc = digest_object(pbuf, buf_len, pout, HACE_ALGO_SHA1); + if (rc) + debug("HACE failure: %d\n", rc); +} + +void hw_sha256(const unsigned char *pbuf, unsigned int buf_len, + unsigned char *pout, unsigned int chunk_size) +{ + int rc; + + rc = digest_object(pbuf, buf_len, pout, HACE_ALGO_SHA256); + if (rc) + debug("HACE failure: %d\n", rc); +} + +void hw_sha512(const unsigned char *pbuf, unsigned int buf_len, + unsigned char *pout, unsigned int chunk_size) +{ + int rc; + + rc = digest_object(pbuf, buf_len, pout, HACE_ALGO_SHA512); + if (rc) + debug("HACE failure: %d\n", rc); +} + +#if IS_ENABLED(CONFIG_SHA_PROG_HW_ACCEL) +int hw_sha_init(struct hash_algo *algo, void **ctxp) +{ + struct aspeed_hash_ctx *ctx; + u32 method; + + if (!strcmp(algo->name, "sha1")) { + method = HACE_ALGO_SHA1; + } + else if (!strcmp(algo->name, "sha256")) { + method = HACE_ALGO_SHA256; + } + else if (!strcmp(algo->name, "sha512")) { + method = HACE_ALGO_SHA512; + } + else { + return -ENOTSUPP; + } + + ctx = calloc(1, sizeof(*ctx)); + + if (ctx == NULL) { + debug("Cannot allocate memory for context\n"); + return -ENOMEM; + } + ctx->method = method | HACE_SG_EN; + ctx->digest_size = algo->digest_size; + *ctxp = ctx; + + return 0; +} + +int hw_sha_update(struct hash_algo *algo, void *hash_ctx, const void *buf, + unsigned int size, int is_last) +{ + struct aspeed_hash_ctx *ctx = hash_ctx; + struct aspeed_sg *sg = &ctx->list[ctx->count]; + + if (ctx->count >= ARRAY_SIZE(ctx->list)) { + debug("HACE error: Reached maximum number of hash segments\n"); + free(ctx); + return -EINVAL; + } + + sg->addr = (u32)buf; + sg->len = size; + if (is_last) + sg->len |= HACE_SG_LAST; + + ctx->count++; + ctx->len += size; + + return 0; +} + +int hw_sha_finish(struct hash_algo *algo, void *hash_ctx, void *dest_buf, int size) +{ + struct aspeed_hash_ctx *ctx = hash_ctx; + int rc; + + if (size < ctx->digest_size) { + debug("HACE error: insufficient size on destination buffer\n"); + free(ctx); + return -EINVAL; + } + + rc = digest_object(ctx->list, ctx->len, dest_buf, ctx->method); + if (rc) + debug("HACE Scatter-Gather failure\n"); + + free(ctx); + + return rc; +} +#endif + +static int aspeed_hace_probe(struct udevice *dev) +{ + struct aspeed_hace *hace = dev_get_priv(dev); + int ret; + + ret = clk_get_by_index(dev, 0, &hace->clk); + if (ret < 0) { + debug("Can't get clock for %s: %d\n", dev->name, ret); + return ret; + } + + ret = clk_enable(&hace->clk); + if (ret) { + debug("Failed to enable fsi clock (%d)\n", ret); + return ret; + } + + /* As the crypto code does not pass us any driver state */ + base = devfdt_get_addr(dev); + + return ret; +} + +static int aspeed_hace_remove(struct udevice *dev) +{ + struct aspeed_hace *hace = dev_get_priv(dev); + + clk_disable(&hace->clk); + + return 0; +} + +static const struct udevice_id aspeed_hace_ids[] = { + { .compatible = "aspeed,ast2600-hace" }, + { } +}; + +U_BOOT_DRIVER(aspeed_hace) = { + .name = "aspeed_hace", + .id = UCLASS_MISC, + .of_match = aspeed_hace_ids, + .probe = aspeed_hace_probe, + .remove = aspeed_hace_remove, + .priv_auto_alloc_size = sizeof(struct aspeed_hace), + .flags = DM_FLAG_PRE_RELOC, +};
The HACE supports MD5, SHA1 and SHA2 family hash functions. This driver uses it in a polling mode to perform hash calculations over buffers placed in DRAM. Co-developed-by: Klaus Heinrich Kiwi <klaus@linux.vnet.ibm.com> Signed-off-by: Joel Stanley <joel@jms.id.au> --- drivers/crypto/Kconfig | 16 +++ drivers/crypto/Makefile | 1 + drivers/crypto/aspeed_hace.c | 250 +++++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+) create mode 100644 drivers/crypto/aspeed_hace.c