diff mbox series

[v2,2/2] adu: Add arugments for block size

Message ID 20181211050526.18536-2-alistair@popple.id.au
State Superseded
Headers show
Series [v2,1/2] optcmd: Add parser for 8-bit power of 2 integers | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied
snowpatch_ozlabs/build-multiarch success Test build-multiarch on branch master

Commit Message

Alistair Popple Dec. 11, 2018, 5:05 a.m. UTC
Not all memory can be read with the default ADU block size of 8
bytes. Specifically cache-inhibited access to some MMIO regions such
as PCIe BAR spaces requires 4 byte accesses to avoid check stopping
the machine.

This patch adds an argument to the put/getmem commands to allow a
specific block size to be selected.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---

Changes since v1:
	- Fixed up comments
	- Removed debug code

libpdbg/adu.c     | 140 +++++++++++++++++++++++++++++++++++++-----------------
 libpdbg/libpdbg.h |  18 ++++---
 libpdbg/target.h  |   4 +-
 src/mem.c         |  14 +++---
 src/pdbgproxy.c   |   4 +-
 src/thread.c      |   2 +-
 6 files changed, 121 insertions(+), 61 deletions(-)

--
2.11.0

Comments

Amitay Isaacs Dec. 12, 2018, 6:30 a.m. UTC | #1
On Tue, 2018-12-11 at 16:05 +1100, Alistair Popple wrote:
> Not all memory can be read with the default ADU block size of 8
> bytes. Specifically cache-inhibited access to some MMIO regions such
> as PCIe BAR spaces requires 4 byte accesses to avoid check stopping
> the machine.
> 
> This patch adds an argument to the put/getmem commands to allow a
> specific block size to be selected.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
> 
> Changes since v1:
> 	- Fixed up comments
> 	- Removed debug code
> 
> libpdbg/adu.c     | 140 +++++++++++++++++++++++++++++++++++++------
> -----------
>  libpdbg/libpdbg.h |  18 ++++---
>  libpdbg/target.h  |   4 +-
>  src/mem.c         |  14 +++---
>  src/pdbgproxy.c   |   4 +-
>  src/thread.c      |   2 +-
>  6 files changed, 121 insertions(+), 61 deletions(-)
> 
> diff --git a/libpdbg/adu.c b/libpdbg/adu.c
> index b67a43e..5396213 100644
> --- a/libpdbg/adu.c
> +++ b/libpdbg/adu.c
> @@ -84,20 +84,48 @@
>  #define FBC_ALTD_DATA_DONE	PPC_BIT(3)
>  #define FBC_ALTD_PBINIT_MISSING PPC_BIT(18)
> 
> +/* There are more general implementations of this with a loop and
> more
> + * performant implementations using GCC builtins which aren't
> + * portable. Given we only need a limited domain this is quick, easy
> + * and portable. */
> +uint8_t blog2(uint8_t x)
> +{
> +	switch(x) {
> +	case 1:
> +		return 0;
> +	case 2:
> +		return 1;
> +	case 4:
> +		return 2;
> +	case 8:
> +		return 3;
> +	case 16:
> +		return 4;
> +	case 32:
> +		return 5;
> +	case 64:
> +		return 6;
> +	default:
> +		assert(0);
> +	}
> +}
> +
>  int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
> -	       uint8_t *output, uint64_t size)
> +	       uint8_t *output, uint64_t size, uint8_t block_size)
>  {
> -	return __adu_getmem(adu_target, start_addr, output, size,
> false);
> +	return __adu_getmem(adu_target, start_addr, output,
> +			    size, false, block_size);
>  }
> 
>  int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t
> start_addr,
> -		  uint8_t *output, uint64_t size)
> +		  uint8_t *output, uint64_t size, uint8_t block_size)
>  {
> -	return __adu_getmem(adu_target, start_addr, output, size,
> true);
> +	return __adu_getmem(adu_target, start_addr, output,
> +			    size, true, block_size);
>  }
> 
>  int __adu_getmem(struct pdbg_target *adu_target, uint64_t
> start_addr,
> -		 uint8_t *output, uint64_t size, bool ci)
> +		 uint8_t *output, uint64_t size, bool ci, uint8_t
> block_size)
>  {
>  	struct adu *adu;
>  	uint8_t *output0;
> @@ -109,33 +137,34 @@ int __adu_getmem(struct pdbg_target
> *adu_target, uint64_t start_addr,
> 
>  	output0 = output;
> 
> -	/* Align start address to 8-byte boundary */
> -	addr0 = 8 * (start_addr / 8);
> +	/* Align start address to block_sized boundary */
> +	addr0 = block_size * (start_addr / block_size);
> 
> -	/* We read data in 8-byte aligned chunks */
> -	for (addr = addr0; addr < start_addr + size; addr += 8) {
> +	/* We read data in block_sized aligned chunks */
> +	for (addr = addr0; addr < start_addr + size; addr +=
> block_size) {
>  		uint64_t data;
> 
> -		if (adu->getmem(adu, addr, &data, ci))
> +		if (adu->getmem(adu, addr, &data, ci, block_size))
>  			return -1;
> 
> -		/* ADU returns data in big-endian form in the register
> */
> +		/* ADU returns data in big-endian form in the register.
> */
>  		data = __builtin_bswap64(data);
> +		data >>= (addr & 0x7ull)*8;

The above line will be better if written based block_size (as
discussed).

> 
>  		if (addr < start_addr) {
>  			size_t offset = start_addr - addr;
> -			size_t n = (size <= 8-offset ? size : 8-
> offset);
> +			size_t n = (size <= block_size-offset ? size :
> block_size-offset);
> 
>  			memcpy(output, ((uint8_t *) &data) + offset,
> n);
>  			output += n;
> -		} else if (addr + 8 > start_addr + size) {
> +		} else if (addr + block_size > start_addr + size) {
>  			uint64_t offset = start_addr + size - addr;
> 
>  			memcpy(output, &data, offset);
>  			output += offset;
>  		} else {
> -			memcpy(output, &data, 8);
> -			output += 8;
> +			memcpy(output, &data, block_size);
> +			output += block_size;
>  		}
> 
>  		pdbg_progress_tick(output - output0, size);
> @@ -147,19 +176,19 @@ int __adu_getmem(struct pdbg_target
> *adu_target, uint64_t start_addr,
>  }
> 
>  int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
> -	       uint8_t *output, uint64_t size)
> +	       uint8_t *output, uint64_t size, uint8_t block_size)
>  {
> -	return __adu_putmem(adu_target, start_addr, output, size,
> false);
> +	return __adu_putmem(adu_target, start_addr, output, size,
> false, block_size);
>  }
> 
>  int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t
> start_addr,
> -		  uint8_t *output, uint64_t size)
> +		  uint8_t *output, uint64_t size, uint8_t block_size)
>  {
> -	return __adu_putmem(adu_target, start_addr, output, size,
> true);
> +	return __adu_putmem(adu_target, start_addr, output, size, true,
> block_size);
>  }
> 
>  int __adu_putmem(struct pdbg_target *adu_target, uint64_t
> start_addr,
> -		 uint8_t *input, uint64_t size, bool ci)
> +		 uint8_t *input, uint64_t size, bool ci, uint8_t
> block_size)
>  {
>  	struct adu *adu;
>  	int rc = 0, tsize;
> @@ -169,20 +198,25 @@ int __adu_putmem(struct pdbg_target
> *adu_target, uint64_t start_addr,
>  	adu = target_to_adu(adu_target);
>  	end_addr = start_addr + size;
>  	for (addr = start_addr; addr < end_addr; addr += tsize, input
> += tsize) {
> -		if ((addr % 8) || (addr + 8 > end_addr)) {
> -			/* If the address is not 64-bit aligned we
> -			 * copy in a byte at a time until it is. */
> +		if ((addr % block_size) || (addr + block_size >
> end_addr)) {
> +			/* If the address is not aligned to block_size
> +			 * we copy the data in one byte at a time
> +			 * until it is aligned. */
>  			tsize = 1;
> 
> -			/* Copy the input data in with correct
> alignment */
> +			/* Copy the input data in with correct
> +			 * alignment. Bytes need to aligned to the
> +			 * correct byte offset in the data register
> +			 * regardless of address. */
>  			data = ((uint64_t) *input) << 8*(8 - (addr % 8)
> - 1);
>  		} else {
> -			tsize = 8;
> -			memcpy(&data, input, sizeof(data));
> +			tsize = block_size;
> +			memcpy(&data, input, block_size);
>  			data = __builtin_bswap64(data);
> +			data >>= (addr & 7ull)*8;

Same thing here too.

>  		}
> 
> -		adu->putmem(adu, addr, data, tsize, ci);
> +		adu->putmem(adu, addr, data, tsize, ci, block_size);
>  		pdbg_progress_tick(addr - start_addr, size);
>  	}
> 
> @@ -234,7 +268,8 @@ static int adu_reset(struct adu *adu)
>  	return 0;
>  }
> 
> -static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data, int ci)
> +static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data,
> +			 int ci, uint8_t block_size)

It might be better to add a new commands instead of modifying the
existing getmem/putmem behaviour.

>  {
>  	uint64_t ctrl_reg, cmd_reg, val;
>  	int rc = 0;
> @@ -242,12 +277,15 @@ static int p8_adu_getmem(struct adu *adu,
> uint64_t addr, uint64_t *data, int ci)
>  	CHECK_ERR(adu_lock(adu));
> 
>  	ctrl_reg = P8_TTYPE_TREAD;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_CI_PARTIAL_READ);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_DMA_PARTIAL_READ);
> -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, 8);
> +		block_size = 0;
> +	}
> +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> 
>  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> P8_ALTD_CMD_REG, &cmd_reg));
>  	cmd_reg |= FBC_ALTD_START_OP;
> @@ -292,19 +330,23 @@ out:
> 
>  }
> 
> -int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> size, int ci)
> +int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> size,
> +		  int ci, uint8_t block_size)
>  {
>  	int rc = 0;
>  	uint64_t cmd_reg, ctrl_reg, val;
>  	CHECK_ERR(adu_lock(adu));
> 
>  	ctrl_reg = P8_TTYPE_TWRITE;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_CI_PARTIAL_WRITE);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_DMA_PARTIAL_WRITE);
> -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, size);
> +		block_size <<= 1;
> +	}
> +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> 
>  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> P8_ALTD_CMD_REG, &cmd_reg));
>  	cmd_reg |= FBC_ALTD_START_OP;
> @@ -349,19 +391,25 @@ out:
>  	return rc;
>  }
> 
> -static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data, int ci)
> +static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data,
> +			 int ci, uint8_t block_size)
>  {
>  	uint64_t ctrl_reg, cmd_reg, val;
> 
>  	cmd_reg = P9_TTYPE_TREAD;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_CI_PARTIAL_READ);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_DMA_PARTIAL_READ);
> 
> -	/* For a read size is apparently always 0 */
> -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, 0);
> +		/* For normal reads the size is ignored as HW always
> +		 * returns a cache line */
> +		block_size = 0;
> +	}
> +
> +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
>   	cmd_reg |= FBC_ALTD_START_OP;
>  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
>  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> DROP_PRIORITY_LOW);
> @@ -400,7 +448,8 @@ retry:
>  	return 0;
>  }
> 
> -static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> data, int size, int ci)
> +static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> data, int size,
> +			 int ci, uint8_t block_size)
>  {
>  	uint64_t ctrl_reg, cmd_reg, val;
> 
> @@ -409,12 +458,15 @@ static int p9_adu_putmem(struct adu *adu,
> uint64_t addr, uint64_t data, int size
>  	size <<= 1;
> 
>  	cmd_reg = P9_TTYPE_TWRITE;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_CI_PARTIAL_WRITE);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_DMA_PARTIAL_WRITE);
> -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, size);
> +		block_size <<= 1;
> +	}
> +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
>   	cmd_reg |= FBC_ALTD_START_OP;
>  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
>  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> DROP_PRIORITY_LOW);
> diff --git a/libpdbg/libpdbg.h b/libpdbg/libpdbg.h
> index 301c2c8..9ac8804 100644
> --- a/libpdbg/libpdbg.h
> +++ b/libpdbg/libpdbg.h
> @@ -204,12 +204,18 @@ int htm_status(struct pdbg_target *target);
>  int htm_dump(struct pdbg_target *target, char *filename);
>  int htm_record(struct pdbg_target *target, char *filename);
> 
> -int adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput, uint64_t size);
> -int adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *input, uint64_t size);
> -int adu_getmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput, uint64_t size);
> -int adu_putmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> *input, uint64_t size);
> -int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput, uint64_t size, bool ci);
> -int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *input, uint64_t size, bool ci);
> +int adu_getmem(struct pdbg_target *target, uint64_t addr,
> +	       uint8_t *ouput, uint64_t size, uint8_t block_size);
> +int adu_putmem(struct pdbg_target *target, uint64_t addr,
> +	       uint8_t *input, uint64_t size, uint8_t block_size);
> +int adu_getmem_ci(struct pdbg_target *target, uint64_t addr,
> +		  uint8_t *ouput, uint64_t size, uint8_t block_size);
> +int adu_putmem_ci(struct pdbg_target *target, uint64_t addr,
> +		  uint8_t *input, uint64_t size, uint8_t block_size);
> +int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput,
> +		 uint64_t size, bool ci, uint8_t block_size);
> +int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *input,
> +		 uint64_t size, bool ci, uint8_t block_size);
> 
>  int opb_read(struct pdbg_target *target, uint32_t addr, uint32_t
> *data);
>  int opb_write(struct pdbg_target *target, uint32_t addr, uint32_t
> data);
> diff --git a/libpdbg/target.h b/libpdbg/target.h
> index 7cc855d..16ae304 100644
> --- a/libpdbg/target.h
> +++ b/libpdbg/target.h
> @@ -107,8 +107,8 @@ struct htm {
> 
>  struct adu {
>  	struct pdbg_target target;
> -	int (*getmem)(struct adu *, uint64_t, uint64_t *, int);
> -	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int);
> +	int (*getmem)(struct adu *, uint64_t, uint64_t *, int,
> uint8_t);
> +	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int,
> uint8_t);
>  };
>  #define target_to_adu(x) container_of(x, struct adu, target)
> 
> diff --git a/src/mem.c b/src/mem.c
> index ce099c2..b578f95 100644
> --- a/src/mem.c
> +++ b/src/mem.c
> @@ -39,8 +39,10 @@ struct mem_flags {
>  };
> 
>  #define MEM_CI_FLAG ("--ci", ci, parse_flag_noarg, false)
> +#define BLOCK_SIZE (parse_number8_pow2, "8")
> 
> -static int getmem(uint64_t addr, uint64_t size, struct mem_flags
> flags)
> +static int getmem(uint64_t addr, uint64_t size,
> +		  uint8_t block_size, struct mem_flags flags)
>  {
>  	struct pdbg_target *target;
>  	uint8_t *buf;
> @@ -59,7 +61,7 @@ static int getmem(uint64_t addr, uint64_t size,
> struct mem_flags flags)
> 
>  		pdbg_set_progress_tick(progress_tick);
>  		progress_init();
> -		if (!__adu_getmem(target, addr, buf, size, flags.ci)) {
> +		if (!__adu_getmem(target, addr, buf, size, flags.ci,
> block_size)) {
>  			if (write(STDOUT_FILENO, buf, size) < 0)
>  				PR_ERROR("Unable to write stdout.\n");
>  			else
> @@ -74,10 +76,10 @@ static int getmem(uint64_t addr, uint64_t size,
> struct mem_flags flags)
>  	return rc;
> 
>  }
> -OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA),
> +OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA,
> BLOCK_SIZE),
>  			     mem_flags, (MEM_CI_FLAG));
> 
> -static int putmem(uint64_t addr, struct mem_flags flags)
> +static int putmem(uint64_t addr, uint8_t block_size, struct
> mem_flags flags)
>  {
>  	uint8_t *buf;
>  	int read_size, rc = 0;
> @@ -98,7 +100,7 @@ static int putmem(uint64_t addr, struct mem_flags
> flags)
>  		if (read_size <= 0)
>  			break;
> 
> -		if (__adu_putmem(adu_target, addr, buf, read_size,
> flags.ci)) {
> +		if (__adu_putmem(adu_target, addr, buf, read_size,
> flags.ci, block_size)) {
>  			rc = 0;
>  			printf("Unable to write memory.\n");
>  			break;
> @@ -111,5 +113,5 @@ static int putmem(uint64_t addr, struct mem_flags
> flags)
>  	free(buf);
>  	return rc;
>  }
> -OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS),
> +OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS, BLOCK_SIZE),
>  			     mem_flags, (MEM_CI_FLAG));
> diff --git a/src/pdbgproxy.c b/src/pdbgproxy.c
> index dedea7a..877cd4c 100644
> --- a/src/pdbgproxy.c
> +++ b/src/pdbgproxy.c
> @@ -223,7 +223,7 @@ static void get_mem(uint64_t *stack, void *priv)
> 
>  	linear_map = get_real_addr(addr);
>  	if (linear_map != -1UL) {
> -		if (adu_getmem(adu_target, linear_map, (uint8_t *)
> data, len)) {
> +	if (adu_getmem(adu_target, linear_map, (uint8_t *) data, len,
> 8)) {
>  			PR_ERROR("Unable to read memory\n");
>  			err = 1;
>  		}
> @@ -293,7 +293,7 @@ static void put_mem(uint64_t *stack, void *priv)
> 
>  	PR_INFO("put_mem 0x%016" PRIx64 " = 0x%016" PRIx64 "\n", addr,
> stack[2]);
> 
> -	if (adu_putmem(adu_target, addr, data, len)) {
> +	if (adu_putmem(adu_target, addr, data, len, 8)) {
>  		PR_ERROR("Unable to write memory\n");
>  		err = 3;
>  	}
> diff --git a/src/thread.c b/src/thread.c
> index 1fd448d..2879c19 100644
> --- a/src/thread.c
> +++ b/src/thread.c
> @@ -35,7 +35,7 @@ static bool is_real_address(struct thread_regs
> *regs, uint64_t addr)
> 
>  static int load8(struct pdbg_target *target, uint64_t addr, uint64_t
> *value)
>  {
> -	if (adu_getmem(target, addr, (uint8_t *)value, 8)) {
> +	if (adu_getmem(target, addr, (uint8_t *)value, 8, 8)) {
>  		pdbg_log(PDBG_ERROR, "Unable to read memory
> address=%016" PRIx64 ".\n", addr);
>  		return 0;
>  	}
> --
> 2.11.0

Amitay.
Alistair Popple Dec. 14, 2018, 1 a.m. UTC | #2
On Wednesday, 12 December 2018 5:30:11 PM AEDT Amitay Isaacs wrote:
> On Tue, 2018-12-11 at 16:05 +1100, Alistair Popple wrote:
> > Not all memory can be read with the default ADU block size of 8
> > bytes. Specifically cache-inhibited access to some MMIO regions such
> > as PCIe BAR spaces requires 4 byte accesses to avoid check stopping
> > the machine.
> > 
> > This patch adds an argument to the put/getmem commands to allow a
> > specific block size to be selected.
> > 
> > Signed-off-by: Alistair Popple <alistair@popple.id.au>
> > ---
> > 
> > Changes since v1:
> > 	- Fixed up comments
> > 	- Removed debug code
> > 
> > libpdbg/adu.c     | 140 +++++++++++++++++++++++++++++++++++++------
> > -----------
> > 
> >  libpdbg/libpdbg.h |  18 ++++---
> >  libpdbg/target.h  |   4 +-
> >  src/mem.c         |  14 +++---
> >  src/pdbgproxy.c   |   4 +-
> >  src/thread.c      |   2 +-
> >  6 files changed, 121 insertions(+), 61 deletions(-)
> > 
> > diff --git a/libpdbg/adu.c b/libpdbg/adu.c
> > index b67a43e..5396213 100644
> > --- a/libpdbg/adu.c
> > +++ b/libpdbg/adu.c
> > @@ -84,20 +84,48 @@
> > 
> >  #define FBC_ALTD_DATA_DONE	PPC_BIT(3)
> >  #define FBC_ALTD_PBINIT_MISSING PPC_BIT(18)
> > 
> > +/* There are more general implementations of this with a loop and
> > more
> > + * performant implementations using GCC builtins which aren't
> > + * portable. Given we only need a limited domain this is quick, easy
> > + * and portable. */
> > +uint8_t blog2(uint8_t x)
> > +{
> > +	switch(x) {
> > +	case 1:
> > +		return 0;
> > +	case 2:
> > +		return 1;
> > +	case 4:
> > +		return 2;
> > +	case 8:
> > +		return 3;
> > +	case 16:
> > +		return 4;
> > +	case 32:
> > +		return 5;
> > +	case 64:
> > +		return 6;
> > +	default:
> > +		assert(0);
> > +	}
> > +}
> > +
> > 
> >  int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
> > 
> > -	       uint8_t *output, uint64_t size)
> > +	       uint8_t *output, uint64_t size, uint8_t block_size)
> > 
> >  {
> > 
> > -	return __adu_getmem(adu_target, start_addr, output, size,
> > false);
> > +	return __adu_getmem(adu_target, start_addr, output,
> > +			    size, false, block_size);
> > 
> >  }
> >  
> >  int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t
> > 
> > start_addr,
> > -		  uint8_t *output, uint64_t size)
> > +		  uint8_t *output, uint64_t size, uint8_t block_size)
> > 
> >  {
> > 
> > -	return __adu_getmem(adu_target, start_addr, output, size,
> > true);
> > +	return __adu_getmem(adu_target, start_addr, output,
> > +			    size, true, block_size);
> > 
> >  }
> >  
> >  int __adu_getmem(struct pdbg_target *adu_target, uint64_t
> > 
> > start_addr,
> > -		 uint8_t *output, uint64_t size, bool ci)
> > +		 uint8_t *output, uint64_t size, bool ci, uint8_t
> > block_size)
> > 
> >  {
> >  
> >  	struct adu *adu;
> >  	uint8_t *output0;
> > 
> > @@ -109,33 +137,34 @@ int __adu_getmem(struct pdbg_target
> > *adu_target, uint64_t start_addr,
> > 
> >  	output0 = output;
> > 
> > -	/* Align start address to 8-byte boundary */
> > -	addr0 = 8 * (start_addr / 8);
> > +	/* Align start address to block_sized boundary */
> > +	addr0 = block_size * (start_addr / block_size);
> > 
> > -	/* We read data in 8-byte aligned chunks */
> > -	for (addr = addr0; addr < start_addr + size; addr += 8) {
> > +	/* We read data in block_sized aligned chunks */
> > +	for (addr = addr0; addr < start_addr + size; addr +=
> > block_size) {
> > 
> >  		uint64_t data;
> > 
> > -		if (adu->getmem(adu, addr, &data, ci))
> > +		if (adu->getmem(adu, addr, &data, ci, block_size))
> > 
> >  			return -1;
> > 
> > -		/* ADU returns data in big-endian form in the register
> > */
> > +		/* ADU returns data in big-endian form in the register.
> > */
> > 
> >  		data = __builtin_bswap64(data);
> > 
> > +		data >>= (addr & 0x7ull)*8;
> 
> The above line will be better if written based block_size (as
> discussed).

Actually I don't think that will work. The alignment in the hardware data 
register is static and does not depend on block size. Take for example writing 
0xdeadbeef with a block size of 4. If we want to write this to address 0x4 we 
need data == 0xdeadbeef but if writing to address 0x0 we need data == 
0xdeadbeef00000000.

> >  		if (addr < start_addr) {
> >  		
> >  			size_t offset = start_addr - addr;
> > 
> > -			size_t n = (size <= 8-offset ? size : 8-
> > offset);
> > +			size_t n = (size <= block_size-offset ? size :
> > block_size-offset);
> > 
> >  			memcpy(output, ((uint8_t *) &data) + offset,
> > 
> > n);
> > 
> >  			output += n;
> > 
> > -		} else if (addr + 8 > start_addr + size) {
> > +		} else if (addr + block_size > start_addr + size) {
> > 
> >  			uint64_t offset = start_addr + size - addr;
> >  			
> >  			memcpy(output, &data, offset);
> >  			output += offset;
> >  		
> >  		} else {
> > 
> > -			memcpy(output, &data, 8);
> > -			output += 8;
> > +			memcpy(output, &data, block_size);
> > +			output += block_size;
> > 
> >  		}
> >  		
> >  		pdbg_progress_tick(output - output0, size);
> > 
> > @@ -147,19 +176,19 @@ int __adu_getmem(struct pdbg_target
> > *adu_target, uint64_t start_addr,
> > 
> >  }
> >  
> >  int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
> > 
> > -	       uint8_t *output, uint64_t size)
> > +	       uint8_t *output, uint64_t size, uint8_t block_size)
> > 
> >  {
> > 
> > -	return __adu_putmem(adu_target, start_addr, output, size,
> > false);
> > +	return __adu_putmem(adu_target, start_addr, output, size,
> > false, block_size);
> > 
> >  }
> >  
> >  int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t
> > 
> > start_addr,
> > -		  uint8_t *output, uint64_t size)
> > +		  uint8_t *output, uint64_t size, uint8_t block_size)
> > 
> >  {
> > 
> > -	return __adu_putmem(adu_target, start_addr, output, size,
> > true);
> > +	return __adu_putmem(adu_target, start_addr, output, size, true,
> > block_size);
> > 
> >  }
> >  
> >  int __adu_putmem(struct pdbg_target *adu_target, uint64_t
> > 
> > start_addr,
> > -		 uint8_t *input, uint64_t size, bool ci)
> > +		 uint8_t *input, uint64_t size, bool ci, uint8_t
> > block_size)
> > 
> >  {
> >  
> >  	struct adu *adu;
> >  	int rc = 0, tsize;
> > 
> > @@ -169,20 +198,25 @@ int __adu_putmem(struct pdbg_target
> > *adu_target, uint64_t start_addr,
> > 
> >  	adu = target_to_adu(adu_target);
> >  	end_addr = start_addr + size;
> >  	for (addr = start_addr; addr < end_addr; addr += tsize, input
> > 
> > += tsize) {
> > -		if ((addr % 8) || (addr + 8 > end_addr)) {
> > -			/* If the address is not 64-bit aligned we
> > -			 * copy in a byte at a time until it is. */
> > +		if ((addr % block_size) || (addr + block_size >
> > end_addr)) {
> > +			/* If the address is not aligned to block_size
> > +			 * we copy the data in one byte at a time
> > +			 * until it is aligned. */
> > 
> >  			tsize = 1;
> > 
> > -			/* Copy the input data in with correct
> > alignment */
> > +			/* Copy the input data in with correct
> > +			 * alignment. Bytes need to aligned to the
> > +			 * correct byte offset in the data register
> > +			 * regardless of address. */
> > 
> >  			data = ((uint64_t) *input) << 8*(8 - (addr % 8)
> > 
> > - 1);
> > 
> >  		} else {
> > 
> > -			tsize = 8;
> > -			memcpy(&data, input, sizeof(data));
> > +			tsize = block_size;
> > +			memcpy(&data, input, block_size);
> > 
> >  			data = __builtin_bswap64(data);
> > 
> > +			data >>= (addr & 7ull)*8;
> 
> Same thing here too.
> 
> >  		}
> > 
> > -		adu->putmem(adu, addr, data, tsize, ci);
> > +		adu->putmem(adu, addr, data, tsize, ci, block_size);
> > 
> >  		pdbg_progress_tick(addr - start_addr, size);
> >  	
> >  	}
> > 
> > @@ -234,7 +268,8 @@ static int adu_reset(struct adu *adu)
> > 
> >  	return 0;
> >  
> >  }
> > 
> > -static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > *data, int ci)
> > +static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > *data,
> > +			 int ci, uint8_t block_size)
> 
> It might be better to add a new commands instead of modifying the
> existing getmem/putmem behaviour.

Yeah, as discussed offline I am going to refactoring these callbacks so the 
abstraction is a little less leaky. However p8_adu_getmem, etc. are not 
exposed API functions so in the mean time I'd like to merge this so the 
downstream project which depends on this functionality can make progress as I 
don't think it will hinder the refactoring.

- Alistair

> >  {
> >  
> >  	uint64_t ctrl_reg, cmd_reg, val;
> >  	int rc = 0;
> > 
> > @@ -242,12 +277,15 @@ static int p8_adu_getmem(struct adu *adu,
> > uint64_t addr, uint64_t *data, int ci)
> > 
> >  	CHECK_ERR(adu_lock(adu));
> >  	
> >  	ctrl_reg = P8_TTYPE_TREAD;
> > 
> > -	if (ci)
> > +	if (ci) {
> > 
> >  		/* Do cache inhibited access */
> >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > 
> > P8_TTYPE_CI_PARTIAL_READ);
> > -	else
> > +		block_size = (blog2(block_size) + 1) << 1;
> > +	} else {
> > 
> >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > 
> > P8_TTYPE_DMA_PARTIAL_READ);
> > -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, 8);
> > +		block_size = 0;
> > +	}
> > +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> > 
> >  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> > 
> > P8_ALTD_CMD_REG, &cmd_reg));
> > 
> >  	cmd_reg |= FBC_ALTD_START_OP;
> > 
> > @@ -292,19 +330,23 @@ out:
> >  }
> > 
> > -int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> > size, int ci)
> > +int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> > size,
> > +		  int ci, uint8_t block_size)
> > 
> >  {
> >  
> >  	int rc = 0;
> >  	uint64_t cmd_reg, ctrl_reg, val;
> >  	CHECK_ERR(adu_lock(adu));
> >  	
> >  	ctrl_reg = P8_TTYPE_TWRITE;
> > 
> > -	if (ci)
> > +	if (ci) {
> > 
> >  		/* Do cache inhibited access */
> >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > 
> > P8_TTYPE_CI_PARTIAL_WRITE);
> > -	else
> > +		block_size = (blog2(block_size) + 1) << 1;
> > +	} else {
> > 
> >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > 
> > P8_TTYPE_DMA_PARTIAL_WRITE);
> > -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, size);
> > +		block_size <<= 1;
> > +	}
> > +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> > 
> >  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> > 
> > P8_ALTD_CMD_REG, &cmd_reg));
> > 
> >  	cmd_reg |= FBC_ALTD_START_OP;
> > 
> > @@ -349,19 +391,25 @@ out:
> >  	return rc;
> >  
> >  }
> > 
> > -static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > *data, int ci)
> > +static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > *data,
> > +			 int ci, uint8_t block_size)
> > 
> >  {
> >  
> >  	uint64_t ctrl_reg, cmd_reg, val;
> >  	
> >  	cmd_reg = P9_TTYPE_TREAD;
> > 
> > -	if (ci)
> > +	if (ci) {
> > 
> >  		/* Do cache inhibited access */
> >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > 
> > P9_TTYPE_CI_PARTIAL_READ);
> > -	else
> > +		block_size = (blog2(block_size) + 1) << 1;
> > +	} else {
> > 
> >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > 
> > P9_TTYPE_DMA_PARTIAL_READ);
> > 
> > -	/* For a read size is apparently always 0 */
> > -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, 0);
> > +		/* For normal reads the size is ignored as HW always
> > +		 * returns a cache line */
> > +		block_size = 0;
> > +	}
> > +
> > +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
> > 
> >   	cmd_reg |= FBC_ALTD_START_OP;
> >  	
> >  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
> >  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> > 
> > DROP_PRIORITY_LOW);
> > 
> > @@ -400,7 +448,8 @@ retry:
> >  	return 0;
> >  
> >  }
> > 
> > -static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> > data, int size, int ci)
> > +static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> > data, int size,
> > +			 int ci, uint8_t block_size)
> > 
> >  {
> >  
> >  	uint64_t ctrl_reg, cmd_reg, val;
> > 
> > @@ -409,12 +458,15 @@ static int p9_adu_putmem(struct adu *adu,
> > uint64_t addr, uint64_t data, int size
> > 
> >  	size <<= 1;
> >  	
> >  	cmd_reg = P9_TTYPE_TWRITE;
> > 
> > -	if (ci)
> > +	if (ci) {
> > 
> >  		/* Do cache inhibited access */
> >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > 
> > P9_TTYPE_CI_PARTIAL_WRITE);
> > -	else
> > +		block_size = (blog2(block_size) + 1) << 1;
> > +	} else {
> > 
> >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > 
> > P9_TTYPE_DMA_PARTIAL_WRITE);
> > -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, size);
> > +		block_size <<= 1;
> > +	}
> > +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
> > 
> >   	cmd_reg |= FBC_ALTD_START_OP;
> >  	
> >  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
> >  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> > 
> > DROP_PRIORITY_LOW);
> > diff --git a/libpdbg/libpdbg.h b/libpdbg/libpdbg.h
> > index 301c2c8..9ac8804 100644
> > --- a/libpdbg/libpdbg.h
> > +++ b/libpdbg/libpdbg.h
> > @@ -204,12 +204,18 @@ int htm_status(struct pdbg_target *target);
> > 
> >  int htm_dump(struct pdbg_target *target, char *filename);
> >  int htm_record(struct pdbg_target *target, char *filename);
> > 
> > -int adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > *ouput, uint64_t size);
> > -int adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > *input, uint64_t size);
> > -int adu_getmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> > *ouput, uint64_t size);
> > -int adu_putmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> > *input, uint64_t size);
> > -int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > *ouput, uint64_t size, bool ci);
> > -int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > *input, uint64_t size, bool ci);
> > +int adu_getmem(struct pdbg_target *target, uint64_t addr,
> > +	       uint8_t *ouput, uint64_t size, uint8_t block_size);
> > +int adu_putmem(struct pdbg_target *target, uint64_t addr,
> > +	       uint8_t *input, uint64_t size, uint8_t block_size);
> > +int adu_getmem_ci(struct pdbg_target *target, uint64_t addr,
> > +		  uint8_t *ouput, uint64_t size, uint8_t block_size);
> > +int adu_putmem_ci(struct pdbg_target *target, uint64_t addr,
> > +		  uint8_t *input, uint64_t size, uint8_t block_size);
> > +int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > *ouput,
> > +		 uint64_t size, bool ci, uint8_t block_size);
> > +int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > *input,
> > +		 uint64_t size, bool ci, uint8_t block_size);
> > 
> >  int opb_read(struct pdbg_target *target, uint32_t addr, uint32_t
> > 
> > *data);
> > 
> >  int opb_write(struct pdbg_target *target, uint32_t addr, uint32_t
> > 
> > data);
> > diff --git a/libpdbg/target.h b/libpdbg/target.h
> > index 7cc855d..16ae304 100644
> > --- a/libpdbg/target.h
> > +++ b/libpdbg/target.h
> > @@ -107,8 +107,8 @@ struct htm {
> > 
> >  struct adu {
> >  
> >  	struct pdbg_target target;
> > 
> > -	int (*getmem)(struct adu *, uint64_t, uint64_t *, int);
> > -	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int);
> > +	int (*getmem)(struct adu *, uint64_t, uint64_t *, int,
> > uint8_t);
> > +	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int,
> > uint8_t);
> > 
> >  };
> >  #define target_to_adu(x) container_of(x, struct adu, target)
> > 
> > diff --git a/src/mem.c b/src/mem.c
> > index ce099c2..b578f95 100644
> > --- a/src/mem.c
> > +++ b/src/mem.c
> > @@ -39,8 +39,10 @@ struct mem_flags {
> > 
> >  };
> >  
> >  #define MEM_CI_FLAG ("--ci", ci, parse_flag_noarg, false)
> > 
> > +#define BLOCK_SIZE (parse_number8_pow2, "8")
> > 
> > -static int getmem(uint64_t addr, uint64_t size, struct mem_flags
> > flags)
> > +static int getmem(uint64_t addr, uint64_t size,
> > +		  uint8_t block_size, struct mem_flags flags)
> > 
> >  {
> >  
> >  	struct pdbg_target *target;
> >  	uint8_t *buf;
> > 
> > @@ -59,7 +61,7 @@ static int getmem(uint64_t addr, uint64_t size,
> > struct mem_flags flags)
> > 
> >  		pdbg_set_progress_tick(progress_tick);
> >  		progress_init();
> > 
> > -		if (!__adu_getmem(target, addr, buf, size, flags.ci)) {
> > +		if (!__adu_getmem(target, addr, buf, size, flags.ci,
> > block_size)) {
> > 
> >  			if (write(STDOUT_FILENO, buf, size) < 0)
> >  			
> >  				PR_ERROR("Unable to write stdout.\n");
> >  			
> >  			else
> > 
> > @@ -74,10 +76,10 @@ static int getmem(uint64_t addr, uint64_t size,
> > struct mem_flags flags)
> > 
> >  	return rc;
> >  
> >  }
> > 
> > -OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA),
> > +OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA,
> > BLOCK_SIZE),
> > 
> >  			     mem_flags, (MEM_CI_FLAG));
> > 
> > -static int putmem(uint64_t addr, struct mem_flags flags)
> > +static int putmem(uint64_t addr, uint8_t block_size, struct
> > mem_flags flags)
> > 
> >  {
> >  
> >  	uint8_t *buf;
> >  	int read_size, rc = 0;
> > 
> > @@ -98,7 +100,7 @@ static int putmem(uint64_t addr, struct mem_flags
> > flags)
> > 
> >  		if (read_size <= 0)
> >  		
> >  			break;
> > 
> > -		if (__adu_putmem(adu_target, addr, buf, read_size,
> > flags.ci)) {
> > +		if (__adu_putmem(adu_target, addr, buf, read_size,
> > flags.ci, block_size)) {
> > 
> >  			rc = 0;
> >  			printf("Unable to write memory.\n");
> >  			break;
> > 
> > @@ -111,5 +113,5 @@ static int putmem(uint64_t addr, struct mem_flags
> > flags)
> > 
> >  	free(buf);
> >  	return rc;
> >  
> >  }
> > 
> > -OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS),
> > +OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS, BLOCK_SIZE),
> > 
> >  			     mem_flags, (MEM_CI_FLAG));
> > 
> > diff --git a/src/pdbgproxy.c b/src/pdbgproxy.c
> > index dedea7a..877cd4c 100644
> > --- a/src/pdbgproxy.c
> > +++ b/src/pdbgproxy.c
> > @@ -223,7 +223,7 @@ static void get_mem(uint64_t *stack, void *priv)
> > 
> >  	linear_map = get_real_addr(addr);
> >  	if (linear_map != -1UL) {
> > 
> > -		if (adu_getmem(adu_target, linear_map, (uint8_t *)
> > data, len)) {
> > +	if (adu_getmem(adu_target, linear_map, (uint8_t *) data, len,
> > 8)) {
> > 
> >  			PR_ERROR("Unable to read memory\n");
> >  			err = 1;
> >  		
> >  		}
> > 
> > @@ -293,7 +293,7 @@ static void put_mem(uint64_t *stack, void *priv)
> > 
> >  	PR_INFO("put_mem 0x%016" PRIx64 " = 0x%016" PRIx64 "\n", addr,
> > 
> > stack[2]);
> > 
> > -	if (adu_putmem(adu_target, addr, data, len)) {
> > +	if (adu_putmem(adu_target, addr, data, len, 8)) {
> > 
> >  		PR_ERROR("Unable to write memory\n");
> >  		err = 3;
> >  	
> >  	}
> > 
> > diff --git a/src/thread.c b/src/thread.c
> > index 1fd448d..2879c19 100644
> > --- a/src/thread.c
> > +++ b/src/thread.c
> > @@ -35,7 +35,7 @@ static bool is_real_address(struct thread_regs
> > *regs, uint64_t addr)
> > 
> >  static int load8(struct pdbg_target *target, uint64_t addr, uint64_t
> > 
> > *value)
> > 
> >  {
> > 
> > -	if (adu_getmem(target, addr, (uint8_t *)value, 8)) {
> > +	if (adu_getmem(target, addr, (uint8_t *)value, 8, 8)) {
> > 
> >  		pdbg_log(PDBG_ERROR, "Unable to read memory
> > 
> > address=%016" PRIx64 ".\n", addr);
> > 
> >  		return 0;
> >  	
> >  	}
> > 
> > --
> > 2.11.0
> 
> Amitay.
Alistair Popple Dec. 14, 2018, 6:04 a.m. UTC | #3
On Friday, 14 December 2018 12:00:09 PM AEDT Alistair Popple wrote:
> On Wednesday, 12 December 2018 5:30:11 PM AEDT Amitay Isaacs wrote:
> > On Tue, 2018-12-11 at 16:05 +1100, Alistair Popple wrote:
> > > Not all memory can be read with the default ADU block size of 8
> > > bytes. Specifically cache-inhibited access to some MMIO regions such
> > > as PCIe BAR spaces requires 4 byte accesses to avoid check stopping
> > > the machine.
> > > 
> > > This patch adds an argument to the put/getmem commands to allow a
> > > specific block size to be selected.
> > > 
> > > Signed-off-by: Alistair Popple <alistair@popple.id.au>
> > > ---
> > > 
> > > Changes since v1:
> > > 	- Fixed up comments
> > > 	- Removed debug code
> > > 
> > > libpdbg/adu.c     | 140 +++++++++++++++++++++++++++++++++++++------
> > > -----------
> > > 
> > >  libpdbg/libpdbg.h |  18 ++++---
> > >  libpdbg/target.h  |   4 +-
> > >  src/mem.c         |  14 +++---
> > >  src/pdbgproxy.c   |   4 +-
> > >  src/thread.c      |   2 +-
> > >  6 files changed, 121 insertions(+), 61 deletions(-)
> > > 
> > > diff --git a/libpdbg/adu.c b/libpdbg/adu.c
> > > index b67a43e..5396213 100644
> > > --- a/libpdbg/adu.c
> > > +++ b/libpdbg/adu.c
> > > @@ -84,20 +84,48 @@
> > > 
> > >  #define FBC_ALTD_DATA_DONE	PPC_BIT(3)
> > >  #define FBC_ALTD_PBINIT_MISSING PPC_BIT(18)
> > > 
> > > +/* There are more general implementations of this with a loop and
> > > more
> > > + * performant implementations using GCC builtins which aren't
> > > + * portable. Given we only need a limited domain this is quick, easy
> > > + * and portable. */
> > > +uint8_t blog2(uint8_t x)
> > > +{
> > > +	switch(x) {
> > > +	case 1:
> > > +		return 0;
> > > +	case 2:
> > > +		return 1;
> > > +	case 4:
> > > +		return 2;
> > > +	case 8:
> > > +		return 3;
> > > +	case 16:
> > > +		return 4;
> > > +	case 32:
> > > +		return 5;
> > > +	case 64:
> > > +		return 6;
> > > +	default:
> > > +		assert(0);
> > > +	}
> > > +}
> > > +
> > > 
> > >  int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
> > > 
> > > -	       uint8_t *output, uint64_t size)
> > > +	       uint8_t *output, uint64_t size, uint8_t block_size)
> > > 
> > >  {
> > > 
> > > -	return __adu_getmem(adu_target, start_addr, output, size,
> > > false);
> > > +	return __adu_getmem(adu_target, start_addr, output,
> > > +			    size, false, block_size);
> > > 
> > >  }
> > >  
> > >  int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t
> > > 
> > > start_addr,
> > > -		  uint8_t *output, uint64_t size)
> > > +		  uint8_t *output, uint64_t size, uint8_t block_size)
> > > 
> > >  {
> > > 
> > > -	return __adu_getmem(adu_target, start_addr, output, size,
> > > true);
> > > +	return __adu_getmem(adu_target, start_addr, output,
> > > +			    size, true, block_size);
> > > 
> > >  }
> > >  
> > >  int __adu_getmem(struct pdbg_target *adu_target, uint64_t
> > > 
> > > start_addr,
> > > -		 uint8_t *output, uint64_t size, bool ci)
> > > +		 uint8_t *output, uint64_t size, bool ci, uint8_t
> > > block_size)
> > > 
> > >  {
> > >  
> > >  	struct adu *adu;
> > >  	uint8_t *output0;
> > > 
> > > @@ -109,33 +137,34 @@ int __adu_getmem(struct pdbg_target
> > > *adu_target, uint64_t start_addr,
> > > 
> > >  	output0 = output;
> > > 
> > > -	/* Align start address to 8-byte boundary */
> > > -	addr0 = 8 * (start_addr / 8);
> > > +	/* Align start address to block_sized boundary */
> > > +	addr0 = block_size * (start_addr / block_size);
> > > 
> > > -	/* We read data in 8-byte aligned chunks */
> > > -	for (addr = addr0; addr < start_addr + size; addr += 8) {
> > > +	/* We read data in block_sized aligned chunks */
> > > +	for (addr = addr0; addr < start_addr + size; addr +=
> > > block_size) {
> > > 
> > >  		uint64_t data;
> > > 
> > > -		if (adu->getmem(adu, addr, &data, ci))
> > > +		if (adu->getmem(adu, addr, &data, ci, block_size))
> > > 
> > >  			return -1;
> > > 
> > > -		/* ADU returns data in big-endian form in the register
> > > */
> > > +		/* ADU returns data in big-endian form in the register.
> > > */
> > > 
> > >  		data = __builtin_bswap64(data);
> > > 
> > > +		data >>= (addr & 0x7ull)*8;
> > 
> > The above line will be better if written based block_size (as
> > discussed).
> 
> Actually I don't think that will work. The alignment in the hardware data
> register is static and does not depend on block size. Take for example
> writing 0xdeadbeef with a block size of 4. If we want to write this to
> address 0x4 we need data == 0xdeadbeef but if writing to address 0x0 we
> need data == 0xdeadbeef00000000.
> 
> > >  		if (addr < start_addr) {
> > >  		
> > >  			size_t offset = start_addr - addr;
> > > 
> > > -			size_t n = (size <= 8-offset ? size : 8-
> > > offset);
> > > +			size_t n = (size <= block_size-offset ? size :
> > > block_size-offset);
> > > 
> > >  			memcpy(output, ((uint8_t *) &data) + offset,
> > > 
> > > n);
> > > 
> > >  			output += n;
> > > 
> > > -		} else if (addr + 8 > start_addr + size) {
> > > +		} else if (addr + block_size > start_addr + size) {
> > > 
> > >  			uint64_t offset = start_addr + size - addr;
> > >  			
> > >  			memcpy(output, &data, offset);
> > >  			output += offset;
> > >  		
> > >  		} else {
> > > 
> > > -			memcpy(output, &data, 8);
> > > -			output += 8;
> > > +			memcpy(output, &data, block_size);
> > > +			output += block_size;
> > > 
> > >  		}
> > >  		
> > >  		pdbg_progress_tick(output - output0, size);
> > > 
> > > @@ -147,19 +176,19 @@ int __adu_getmem(struct pdbg_target
> > > *adu_target, uint64_t start_addr,
> > > 
> > >  }
> > >  
> > >  int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
> > > 
> > > -	       uint8_t *output, uint64_t size)
> > > +	       uint8_t *output, uint64_t size, uint8_t block_size)
> > > 
> > >  {
> > > 
> > > -	return __adu_putmem(adu_target, start_addr, output, size,
> > > false);
> > > +	return __adu_putmem(adu_target, start_addr, output, size,
> > > false, block_size);
> > > 
> > >  }
> > >  
> > >  int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t
> > > 
> > > start_addr,
> > > -		  uint8_t *output, uint64_t size)
> > > +		  uint8_t *output, uint64_t size, uint8_t block_size)
> > > 
> > >  {
> > > 
> > > -	return __adu_putmem(adu_target, start_addr, output, size,
> > > true);
> > > +	return __adu_putmem(adu_target, start_addr, output, size, true,
> > > block_size);
> > > 
> > >  }
> > >  
> > >  int __adu_putmem(struct pdbg_target *adu_target, uint64_t
> > > 
> > > start_addr,
> > > -		 uint8_t *input, uint64_t size, bool ci)
> > > +		 uint8_t *input, uint64_t size, bool ci, uint8_t
> > > block_size)
> > > 
> > >  {
> > >  
> > >  	struct adu *adu;
> > >  	int rc = 0, tsize;
> > > 
> > > @@ -169,20 +198,25 @@ int __adu_putmem(struct pdbg_target
> > > *adu_target, uint64_t start_addr,
> > > 
> > >  	adu = target_to_adu(adu_target);
> > >  	end_addr = start_addr + size;
> > >  	for (addr = start_addr; addr < end_addr; addr += tsize, input
> > > 
> > > += tsize) {
> > > -		if ((addr % 8) || (addr + 8 > end_addr)) {
> > > -			/* If the address is not 64-bit aligned we
> > > -			 * copy in a byte at a time until it is. */
> > > +		if ((addr % block_size) || (addr + block_size >
> > > end_addr)) {
> > > +			/* If the address is not aligned to block_size
> > > +			 * we copy the data in one byte at a time
> > > +			 * until it is aligned. */
> > > 
> > >  			tsize = 1;
> > > 
> > > -			/* Copy the input data in with correct
> > > alignment */
> > > +			/* Copy the input data in with correct
> > > +			 * alignment. Bytes need to aligned to the
> > > +			 * correct byte offset in the data register
> > > +			 * regardless of address. */
> > > 
> > >  			data = ((uint64_t) *input) << 8*(8 - (addr % 8)
> > > 
> > > - 1);
> > > 
> > >  		} else {
> > > 
> > > -			tsize = 8;
> > > -			memcpy(&data, input, sizeof(data));
> > > +			tsize = block_size;
> > > +			memcpy(&data, input, block_size);
> > > 
> > >  			data = __builtin_bswap64(data);
> > > 
> > > +			data >>= (addr & 7ull)*8;
> > 
> > Same thing here too.
> > 
> > >  		}
> > > 
> > > -		adu->putmem(adu, addr, data, tsize, ci);
> > > +		adu->putmem(adu, addr, data, tsize, ci, block_size);
> > > 
> > >  		pdbg_progress_tick(addr - start_addr, size);
> > >  	
> > >  	}
> > > 
> > > @@ -234,7 +268,8 @@ static int adu_reset(struct adu *adu)
> > > 
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > -static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > > *data, int ci)
> > > +static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > > *data,
> > > +			 int ci, uint8_t block_size)
> > 
> > It might be better to add a new commands instead of modifying the
> > existing getmem/putmem behaviour.
> 
> Yeah, as discussed offline I am going to refactoring these callbacks so the
> abstraction is a little less leaky. However p8_adu_getmem, etc. are not
> exposed API functions so in the mean time I'd like to merge this so the
> downstream project which depends on this functionality can make progress as
> I don't think it will hinder the refactoring.

Having looked at the code today I think I see what you mean. We can still add 
block_size to the above commands but I agree we should avoid changing the 
external API functions (ie. adu_getmem, etc.). Will do a v3 which adds new API 
functions for block_size.

- Alistair

> - Alistair
> 
> > >  {
> > >  
> > >  	uint64_t ctrl_reg, cmd_reg, val;
> > >  	int rc = 0;
> > > 
> > > @@ -242,12 +277,15 @@ static int p8_adu_getmem(struct adu *adu,
> > > uint64_t addr, uint64_t *data, int ci)
> > > 
> > >  	CHECK_ERR(adu_lock(adu));
> > >  	
> > >  	ctrl_reg = P8_TTYPE_TREAD;
> > > 
> > > -	if (ci)
> > > +	if (ci) {
> > > 
> > >  		/* Do cache inhibited access */
> > >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > > 
> > > P8_TTYPE_CI_PARTIAL_READ);
> > > -	else
> > > +		block_size = (blog2(block_size) + 1) << 1;
> > > +	} else {
> > > 
> > >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > > 
> > > P8_TTYPE_DMA_PARTIAL_READ);
> > > -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, 8);
> > > +		block_size = 0;
> > > +	}
> > > +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> > > 
> > >  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> > > 
> > > P8_ALTD_CMD_REG, &cmd_reg));
> > > 
> > >  	cmd_reg |= FBC_ALTD_START_OP;
> > > 
> > > @@ -292,19 +330,23 @@ out:
> > >  }
> > > 
> > > -int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> > > size, int ci)
> > > +int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> > > size,
> > > +		  int ci, uint8_t block_size)
> > > 
> > >  {
> > >  
> > >  	int rc = 0;
> > >  	uint64_t cmd_reg, ctrl_reg, val;
> > >  	CHECK_ERR(adu_lock(adu));
> > >  	
> > >  	ctrl_reg = P8_TTYPE_TWRITE;
> > > 
> > > -	if (ci)
> > > +	if (ci) {
> > > 
> > >  		/* Do cache inhibited access */
> > >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > > 
> > > P8_TTYPE_CI_PARTIAL_WRITE);
> > > -	else
> > > +		block_size = (blog2(block_size) + 1) << 1;
> > > +	} else {
> > > 
> > >  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> > > 
> > > P8_TTYPE_DMA_PARTIAL_WRITE);
> > > -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, size);
> > > +		block_size <<= 1;
> > > +	}
> > > +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> > > 
> > >  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> > > 
> > > P8_ALTD_CMD_REG, &cmd_reg));
> > > 
> > >  	cmd_reg |= FBC_ALTD_START_OP;
> > > 
> > > @@ -349,19 +391,25 @@ out:
> > >  	return rc;
> > >  
> > >  }
> > > 
> > > -static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > > *data, int ci)
> > > +static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> > > *data,
> > > +			 int ci, uint8_t block_size)
> > > 
> > >  {
> > >  
> > >  	uint64_t ctrl_reg, cmd_reg, val;
> > >  	
> > >  	cmd_reg = P9_TTYPE_TREAD;
> > > 
> > > -	if (ci)
> > > +	if (ci) {
> > > 
> > >  		/* Do cache inhibited access */
> > >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > > 
> > > P9_TTYPE_CI_PARTIAL_READ);
> > > -	else
> > > +		block_size = (blog2(block_size) + 1) << 1;
> > > +	} else {
> > > 
> > >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > > 
> > > P9_TTYPE_DMA_PARTIAL_READ);
> > > 
> > > -	/* For a read size is apparently always 0 */
> > > -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, 0);
> > > +		/* For normal reads the size is ignored as HW always
> > > +		 * returns a cache line */
> > > +		block_size = 0;
> > > +	}
> > > +
> > > +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
> > > 
> > >   	cmd_reg |= FBC_ALTD_START_OP;
> > >  	
> > >  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
> > >  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> > > 
> > > DROP_PRIORITY_LOW);
> > > 
> > > @@ -400,7 +448,8 @@ retry:
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > -static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> > > data, int size, int ci)
> > > +static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> > > data, int size,
> > > +			 int ci, uint8_t block_size)
> > > 
> > >  {
> > >  
> > >  	uint64_t ctrl_reg, cmd_reg, val;
> > > 
> > > @@ -409,12 +458,15 @@ static int p9_adu_putmem(struct adu *adu,
> > > uint64_t addr, uint64_t data, int size
> > > 
> > >  	size <<= 1;
> > >  	
> > >  	cmd_reg = P9_TTYPE_TWRITE;
> > > 
> > > -	if (ci)
> > > +	if (ci) {
> > > 
> > >  		/* Do cache inhibited access */
> > >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > > 
> > > P9_TTYPE_CI_PARTIAL_WRITE);
> > > -	else
> > > +		block_size = (blog2(block_size) + 1) << 1;
> > > +	} else {
> > > 
> > >  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> > > 
> > > P9_TTYPE_DMA_PARTIAL_WRITE);
> > > -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, size);
> > > +		block_size <<= 1;
> > > +	}
> > > +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
> > > 
> > >   	cmd_reg |= FBC_ALTD_START_OP;
> > >  	
> > >  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
> > >  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> > > 
> > > DROP_PRIORITY_LOW);
> > > diff --git a/libpdbg/libpdbg.h b/libpdbg/libpdbg.h
> > > index 301c2c8..9ac8804 100644
> > > --- a/libpdbg/libpdbg.h
> > > +++ b/libpdbg/libpdbg.h
> > > @@ -204,12 +204,18 @@ int htm_status(struct pdbg_target *target);
> > > 
> > >  int htm_dump(struct pdbg_target *target, char *filename);
> > >  int htm_record(struct pdbg_target *target, char *filename);
> > > 
> > > -int adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *ouput, uint64_t size);
> > > -int adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *input, uint64_t size);
> > > -int adu_getmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *ouput, uint64_t size);
> > > -int adu_putmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *input, uint64_t size);
> > > -int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *ouput, uint64_t size, bool ci);
> > > -int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *input, uint64_t size, bool ci);
> > > +int adu_getmem(struct pdbg_target *target, uint64_t addr,
> > > +	       uint8_t *ouput, uint64_t size, uint8_t block_size);
> > > +int adu_putmem(struct pdbg_target *target, uint64_t addr,
> > > +	       uint8_t *input, uint64_t size, uint8_t block_size);
> > > +int adu_getmem_ci(struct pdbg_target *target, uint64_t addr,
> > > +		  uint8_t *ouput, uint64_t size, uint8_t block_size);
> > > +int adu_putmem_ci(struct pdbg_target *target, uint64_t addr,
> > > +		  uint8_t *input, uint64_t size, uint8_t block_size);
> > > +int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *ouput,
> > > +		 uint64_t size, bool ci, uint8_t block_size);
> > > +int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> > > *input,
> > > +		 uint64_t size, bool ci, uint8_t block_size);
> > > 
> > >  int opb_read(struct pdbg_target *target, uint32_t addr, uint32_t
> > > 
> > > *data);
> > > 
> > >  int opb_write(struct pdbg_target *target, uint32_t addr, uint32_t
> > > 
> > > data);
> > > diff --git a/libpdbg/target.h b/libpdbg/target.h
> > > index 7cc855d..16ae304 100644
> > > --- a/libpdbg/target.h
> > > +++ b/libpdbg/target.h
> > > @@ -107,8 +107,8 @@ struct htm {
> > > 
> > >  struct adu {
> > >  
> > >  	struct pdbg_target target;
> > > 
> > > -	int (*getmem)(struct adu *, uint64_t, uint64_t *, int);
> > > -	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int);
> > > +	int (*getmem)(struct adu *, uint64_t, uint64_t *, int,
> > > uint8_t);
> > > +	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int,
> > > uint8_t);
> > > 
> > >  };
> > >  #define target_to_adu(x) container_of(x, struct adu, target)
> > > 
> > > diff --git a/src/mem.c b/src/mem.c
> > > index ce099c2..b578f95 100644
> > > --- a/src/mem.c
> > > +++ b/src/mem.c
> > > @@ -39,8 +39,10 @@ struct mem_flags {
> > > 
> > >  };
> > >  
> > >  #define MEM_CI_FLAG ("--ci", ci, parse_flag_noarg, false)
> > > 
> > > +#define BLOCK_SIZE (parse_number8_pow2, "8")
> > > 
> > > -static int getmem(uint64_t addr, uint64_t size, struct mem_flags
> > > flags)
> > > +static int getmem(uint64_t addr, uint64_t size,
> > > +		  uint8_t block_size, struct mem_flags flags)
> > > 
> > >  {
> > >  
> > >  	struct pdbg_target *target;
> > >  	uint8_t *buf;
> > > 
> > > @@ -59,7 +61,7 @@ static int getmem(uint64_t addr, uint64_t size,
> > > struct mem_flags flags)
> > > 
> > >  		pdbg_set_progress_tick(progress_tick);
> > >  		progress_init();
> > > 
> > > -		if (!__adu_getmem(target, addr, buf, size, flags.ci)) {
> > > +		if (!__adu_getmem(target, addr, buf, size, flags.ci,
> > > block_size)) {
> > > 
> > >  			if (write(STDOUT_FILENO, buf, size) < 0)
> > >  			
> > >  				PR_ERROR("Unable to write stdout.\n");
> > >  			
> > >  			else
> > > 
> > > @@ -74,10 +76,10 @@ static int getmem(uint64_t addr, uint64_t size,
> > > struct mem_flags flags)
> > > 
> > >  	return rc;
> > >  
> > >  }
> > > 
> > > -OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA),
> > > +OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA,
> > > BLOCK_SIZE),
> > > 
> > >  			     mem_flags, (MEM_CI_FLAG));
> > > 
> > > -static int putmem(uint64_t addr, struct mem_flags flags)
> > > +static int putmem(uint64_t addr, uint8_t block_size, struct
> > > mem_flags flags)
> > > 
> > >  {
> > >  
> > >  	uint8_t *buf;
> > >  	int read_size, rc = 0;
> > > 
> > > @@ -98,7 +100,7 @@ static int putmem(uint64_t addr, struct mem_flags
> > > flags)
> > > 
> > >  		if (read_size <= 0)
> > >  		
> > >  			break;
> > > 
> > > -		if (__adu_putmem(adu_target, addr, buf, read_size,
> > > flags.ci)) {
> > > +		if (__adu_putmem(adu_target, addr, buf, read_size,
> > > flags.ci, block_size)) {
> > > 
> > >  			rc = 0;
> > >  			printf("Unable to write memory.\n");
> > >  			break;
> > > 
> > > @@ -111,5 +113,5 @@ static int putmem(uint64_t addr, struct mem_flags
> > > flags)
> > > 
> > >  	free(buf);
> > >  	return rc;
> > >  
> > >  }
> > > 
> > > -OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS),
> > > +OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS, BLOCK_SIZE),
> > > 
> > >  			     mem_flags, (MEM_CI_FLAG));
> > > 
> > > diff --git a/src/pdbgproxy.c b/src/pdbgproxy.c
> > > index dedea7a..877cd4c 100644
> > > --- a/src/pdbgproxy.c
> > > +++ b/src/pdbgproxy.c
> > > @@ -223,7 +223,7 @@ static void get_mem(uint64_t *stack, void *priv)
> > > 
> > >  	linear_map = get_real_addr(addr);
> > >  	if (linear_map != -1UL) {
> > > 
> > > -		if (adu_getmem(adu_target, linear_map, (uint8_t *)
> > > data, len)) {
> > > +	if (adu_getmem(adu_target, linear_map, (uint8_t *) data, len,
> > > 8)) {
> > > 
> > >  			PR_ERROR("Unable to read memory\n");
> > >  			err = 1;
> > >  		
> > >  		}
> > > 
> > > @@ -293,7 +293,7 @@ static void put_mem(uint64_t *stack, void *priv)
> > > 
> > >  	PR_INFO("put_mem 0x%016" PRIx64 " = 0x%016" PRIx64 "\n", addr,
> > > 
> > > stack[2]);
> > > 
> > > -	if (adu_putmem(adu_target, addr, data, len)) {
> > > +	if (adu_putmem(adu_target, addr, data, len, 8)) {
> > > 
> > >  		PR_ERROR("Unable to write memory\n");
> > >  		err = 3;
> > >  	
> > >  	}
> > > 
> > > diff --git a/src/thread.c b/src/thread.c
> > > index 1fd448d..2879c19 100644
> > > --- a/src/thread.c
> > > +++ b/src/thread.c
> > > @@ -35,7 +35,7 @@ static bool is_real_address(struct thread_regs
> > > *regs, uint64_t addr)
> > > 
> > >  static int load8(struct pdbg_target *target, uint64_t addr, uint64_t
> > > 
> > > *value)
> > > 
> > >  {
> > > 
> > > -	if (adu_getmem(target, addr, (uint8_t *)value, 8)) {
> > > +	if (adu_getmem(target, addr, (uint8_t *)value, 8, 8)) {
> > > 
> > >  		pdbg_log(PDBG_ERROR, "Unable to read memory
> > > 
> > > address=%016" PRIx64 ".\n", addr);
> > > 
> > >  		return 0;
> > >  	
> > >  	}
> > > 
> > > --
> > > 2.11.0
> > 
> > Amitay.
diff mbox series

Patch

diff --git a/libpdbg/adu.c b/libpdbg/adu.c
index b67a43e..5396213 100644
--- a/libpdbg/adu.c
+++ b/libpdbg/adu.c
@@ -84,20 +84,48 @@ 
 #define FBC_ALTD_DATA_DONE	PPC_BIT(3)
 #define FBC_ALTD_PBINIT_MISSING PPC_BIT(18)

+/* There are more general implementations of this with a loop and more
+ * performant implementations using GCC builtins which aren't
+ * portable. Given we only need a limited domain this is quick, easy
+ * and portable. */
+uint8_t blog2(uint8_t x)
+{
+	switch(x) {
+	case 1:
+		return 0;
+	case 2:
+		return 1;
+	case 4:
+		return 2;
+	case 8:
+		return 3;
+	case 16:
+		return 4;
+	case 32:
+		return 5;
+	case 64:
+		return 6;
+	default:
+		assert(0);
+	}
+}
+
 int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
-	       uint8_t *output, uint64_t size)
+	       uint8_t *output, uint64_t size, uint8_t block_size)
 {
-	return __adu_getmem(adu_target, start_addr, output, size, false);
+	return __adu_getmem(adu_target, start_addr, output,
+			    size, false, block_size);
 }

 int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t start_addr,
-		  uint8_t *output, uint64_t size)
+		  uint8_t *output, uint64_t size, uint8_t block_size)
 {
-	return __adu_getmem(adu_target, start_addr, output, size, true);
+	return __adu_getmem(adu_target, start_addr, output,
+			    size, true, block_size);
 }

 int __adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
-		 uint8_t *output, uint64_t size, bool ci)
+		 uint8_t *output, uint64_t size, bool ci, uint8_t block_size)
 {
 	struct adu *adu;
 	uint8_t *output0;
@@ -109,33 +137,34 @@  int __adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,

 	output0 = output;

-	/* Align start address to 8-byte boundary */
-	addr0 = 8 * (start_addr / 8);
+	/* Align start address to block_sized boundary */
+	addr0 = block_size * (start_addr / block_size);

-	/* We read data in 8-byte aligned chunks */
-	for (addr = addr0; addr < start_addr + size; addr += 8) {
+	/* We read data in block_sized aligned chunks */
+	for (addr = addr0; addr < start_addr + size; addr += block_size) {
 		uint64_t data;

-		if (adu->getmem(adu, addr, &data, ci))
+		if (adu->getmem(adu, addr, &data, ci, block_size))
 			return -1;

-		/* ADU returns data in big-endian form in the register */
+		/* ADU returns data in big-endian form in the register. */
 		data = __builtin_bswap64(data);
+		data >>= (addr & 0x7ull)*8;

 		if (addr < start_addr) {
 			size_t offset = start_addr - addr;
-			size_t n = (size <= 8-offset ? size : 8-offset);
+			size_t n = (size <= block_size-offset ? size : block_size-offset);

 			memcpy(output, ((uint8_t *) &data) + offset, n);
 			output += n;
-		} else if (addr + 8 > start_addr + size) {
+		} else if (addr + block_size > start_addr + size) {
 			uint64_t offset = start_addr + size - addr;

 			memcpy(output, &data, offset);
 			output += offset;
 		} else {
-			memcpy(output, &data, 8);
-			output += 8;
+			memcpy(output, &data, block_size);
+			output += block_size;
 		}

 		pdbg_progress_tick(output - output0, size);
@@ -147,19 +176,19 @@  int __adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
 }

 int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
-	       uint8_t *output, uint64_t size)
+	       uint8_t *output, uint64_t size, uint8_t block_size)
 {
-	return __adu_putmem(adu_target, start_addr, output, size, false);
+	return __adu_putmem(adu_target, start_addr, output, size, false, block_size);
 }

 int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t start_addr,
-		  uint8_t *output, uint64_t size)
+		  uint8_t *output, uint64_t size, uint8_t block_size)
 {
-	return __adu_putmem(adu_target, start_addr, output, size, true);
+	return __adu_putmem(adu_target, start_addr, output, size, true, block_size);
 }

 int __adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
-		 uint8_t *input, uint64_t size, bool ci)
+		 uint8_t *input, uint64_t size, bool ci, uint8_t block_size)
 {
 	struct adu *adu;
 	int rc = 0, tsize;
@@ -169,20 +198,25 @@  int __adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
 	adu = target_to_adu(adu_target);
 	end_addr = start_addr + size;
 	for (addr = start_addr; addr < end_addr; addr += tsize, input += tsize) {
-		if ((addr % 8) || (addr + 8 > end_addr)) {
-			/* If the address is not 64-bit aligned we
-			 * copy in a byte at a time until it is. */
+		if ((addr % block_size) || (addr + block_size > end_addr)) {
+			/* If the address is not aligned to block_size
+			 * we copy the data in one byte at a time
+			 * until it is aligned. */
 			tsize = 1;

-			/* Copy the input data in with correct alignment */
+			/* Copy the input data in with correct
+			 * alignment. Bytes need to aligned to the
+			 * correct byte offset in the data register
+			 * regardless of address. */
 			data = ((uint64_t) *input) << 8*(8 - (addr % 8) - 1);
 		} else {
-			tsize = 8;
-			memcpy(&data, input, sizeof(data));
+			tsize = block_size;
+			memcpy(&data, input, block_size);
 			data = __builtin_bswap64(data);
+			data >>= (addr & 7ull)*8;
 		}

-		adu->putmem(adu, addr, data, tsize, ci);
+		adu->putmem(adu, addr, data, tsize, ci, block_size);
 		pdbg_progress_tick(addr - start_addr, size);
 	}

@@ -234,7 +268,8 @@  static int adu_reset(struct adu *adu)
 	return 0;
 }

-static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data, int ci)
+static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data,
+			 int ci, uint8_t block_size)
 {
 	uint64_t ctrl_reg, cmd_reg, val;
 	int rc = 0;
@@ -242,12 +277,15 @@  static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data, int ci)
 	CHECK_ERR(adu_lock(adu));

 	ctrl_reg = P8_TTYPE_TREAD;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_CI_PARTIAL_READ);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_DMA_PARTIAL_READ);
-	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, 8);
+		block_size = 0;
+	}
+	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);

 	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target, P8_ALTD_CMD_REG, &cmd_reg));
 	cmd_reg |= FBC_ALTD_START_OP;
@@ -292,19 +330,23 @@  out:

 }

-int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size, int ci)
+int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size,
+		  int ci, uint8_t block_size)
 {
 	int rc = 0;
 	uint64_t cmd_reg, ctrl_reg, val;
 	CHECK_ERR(adu_lock(adu));

 	ctrl_reg = P8_TTYPE_TWRITE;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_CI_PARTIAL_WRITE);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_DMA_PARTIAL_WRITE);
-	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, size);
+		block_size <<= 1;
+	}
+	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);

 	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target, P8_ALTD_CMD_REG, &cmd_reg));
 	cmd_reg |= FBC_ALTD_START_OP;
@@ -349,19 +391,25 @@  out:
 	return rc;
 }

-static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data, int ci)
+static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data,
+			 int ci, uint8_t block_size)
 {
 	uint64_t ctrl_reg, cmd_reg, val;

 	cmd_reg = P9_TTYPE_TREAD;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_CI_PARTIAL_READ);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_DMA_PARTIAL_READ);

-	/* For a read size is apparently always 0 */
-	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, 0);
+		/* For normal reads the size is ignored as HW always
+		 * returns a cache line */
+		block_size = 0;
+	}
+
+	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
  	cmd_reg |= FBC_ALTD_START_OP;
 	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
 	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg, DROP_PRIORITY_LOW);
@@ -400,7 +448,8 @@  retry:
 	return 0;
 }

-static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size, int ci)
+static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size,
+			 int ci, uint8_t block_size)
 {
 	uint64_t ctrl_reg, cmd_reg, val;

@@ -409,12 +458,15 @@  static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size
 	size <<= 1;

 	cmd_reg = P9_TTYPE_TWRITE;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_CI_PARTIAL_WRITE);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_DMA_PARTIAL_WRITE);
-	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, size);
+		block_size <<= 1;
+	}
+	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
  	cmd_reg |= FBC_ALTD_START_OP;
 	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
 	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg, DROP_PRIORITY_LOW);
diff --git a/libpdbg/libpdbg.h b/libpdbg/libpdbg.h
index 301c2c8..9ac8804 100644
--- a/libpdbg/libpdbg.h
+++ b/libpdbg/libpdbg.h
@@ -204,12 +204,18 @@  int htm_status(struct pdbg_target *target);
 int htm_dump(struct pdbg_target *target, char *filename);
 int htm_record(struct pdbg_target *target, char *filename);

-int adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t *ouput, uint64_t size);
-int adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t *input, uint64_t size);
-int adu_getmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t *ouput, uint64_t size);
-int adu_putmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t *input, uint64_t size);
-int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t *ouput, uint64_t size, bool ci);
-int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t *input, uint64_t size, bool ci);
+int adu_getmem(struct pdbg_target *target, uint64_t addr,
+	       uint8_t *ouput, uint64_t size, uint8_t block_size);
+int adu_putmem(struct pdbg_target *target, uint64_t addr,
+	       uint8_t *input, uint64_t size, uint8_t block_size);
+int adu_getmem_ci(struct pdbg_target *target, uint64_t addr,
+		  uint8_t *ouput, uint64_t size, uint8_t block_size);
+int adu_putmem_ci(struct pdbg_target *target, uint64_t addr,
+		  uint8_t *input, uint64_t size, uint8_t block_size);
+int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t *ouput,
+		 uint64_t size, bool ci, uint8_t block_size);
+int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t *input,
+		 uint64_t size, bool ci, uint8_t block_size);

 int opb_read(struct pdbg_target *target, uint32_t addr, uint32_t *data);
 int opb_write(struct pdbg_target *target, uint32_t addr, uint32_t data);
diff --git a/libpdbg/target.h b/libpdbg/target.h
index 7cc855d..16ae304 100644
--- a/libpdbg/target.h
+++ b/libpdbg/target.h
@@ -107,8 +107,8 @@  struct htm {

 struct adu {
 	struct pdbg_target target;
-	int (*getmem)(struct adu *, uint64_t, uint64_t *, int);
-	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int);
+	int (*getmem)(struct adu *, uint64_t, uint64_t *, int, uint8_t);
+	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int, uint8_t);
 };
 #define target_to_adu(x) container_of(x, struct adu, target)

diff --git a/src/mem.c b/src/mem.c
index ce099c2..b578f95 100644
--- a/src/mem.c
+++ b/src/mem.c
@@ -39,8 +39,10 @@  struct mem_flags {
 };

 #define MEM_CI_FLAG ("--ci", ci, parse_flag_noarg, false)
+#define BLOCK_SIZE (parse_number8_pow2, "8")

-static int getmem(uint64_t addr, uint64_t size, struct mem_flags flags)
+static int getmem(uint64_t addr, uint64_t size,
+		  uint8_t block_size, struct mem_flags flags)
 {
 	struct pdbg_target *target;
 	uint8_t *buf;
@@ -59,7 +61,7 @@  static int getmem(uint64_t addr, uint64_t size, struct mem_flags flags)

 		pdbg_set_progress_tick(progress_tick);
 		progress_init();
-		if (!__adu_getmem(target, addr, buf, size, flags.ci)) {
+		if (!__adu_getmem(target, addr, buf, size, flags.ci, block_size)) {
 			if (write(STDOUT_FILENO, buf, size) < 0)
 				PR_ERROR("Unable to write stdout.\n");
 			else
@@ -74,10 +76,10 @@  static int getmem(uint64_t addr, uint64_t size, struct mem_flags flags)
 	return rc;

 }
-OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA),
+OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA, BLOCK_SIZE),
 			     mem_flags, (MEM_CI_FLAG));

-static int putmem(uint64_t addr, struct mem_flags flags)
+static int putmem(uint64_t addr, uint8_t block_size, struct mem_flags flags)
 {
 	uint8_t *buf;
 	int read_size, rc = 0;
@@ -98,7 +100,7 @@  static int putmem(uint64_t addr, struct mem_flags flags)
 		if (read_size <= 0)
 			break;

-		if (__adu_putmem(adu_target, addr, buf, read_size, flags.ci)) {
+		if (__adu_putmem(adu_target, addr, buf, read_size, flags.ci, block_size)) {
 			rc = 0;
 			printf("Unable to write memory.\n");
 			break;
@@ -111,5 +113,5 @@  static int putmem(uint64_t addr, struct mem_flags flags)
 	free(buf);
 	return rc;
 }
-OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS),
+OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS, BLOCK_SIZE),
 			     mem_flags, (MEM_CI_FLAG));
diff --git a/src/pdbgproxy.c b/src/pdbgproxy.c
index dedea7a..877cd4c 100644
--- a/src/pdbgproxy.c
+++ b/src/pdbgproxy.c
@@ -223,7 +223,7 @@  static void get_mem(uint64_t *stack, void *priv)

 	linear_map = get_real_addr(addr);
 	if (linear_map != -1UL) {
-		if (adu_getmem(adu_target, linear_map, (uint8_t *) data, len)) {
+	if (adu_getmem(adu_target, linear_map, (uint8_t *) data, len, 8)) {
 			PR_ERROR("Unable to read memory\n");
 			err = 1;
 		}
@@ -293,7 +293,7 @@  static void put_mem(uint64_t *stack, void *priv)

 	PR_INFO("put_mem 0x%016" PRIx64 " = 0x%016" PRIx64 "\n", addr, stack[2]);

-	if (adu_putmem(adu_target, addr, data, len)) {
+	if (adu_putmem(adu_target, addr, data, len, 8)) {
 		PR_ERROR("Unable to write memory\n");
 		err = 3;
 	}
diff --git a/src/thread.c b/src/thread.c
index 1fd448d..2879c19 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -35,7 +35,7 @@  static bool is_real_address(struct thread_regs *regs, uint64_t addr)

 static int load8(struct pdbg_target *target, uint64_t addr, uint64_t *value)
 {
-	if (adu_getmem(target, addr, (uint8_t *)value, 8)) {
+	if (adu_getmem(target, addr, (uint8_t *)value, 8, 8)) {
 		pdbg_log(PDBG_ERROR, "Unable to read memory address=%016" PRIx64 ".\n", addr);
 		return 0;
 	}