diff mbox series

[RESEND,v4] adu: Add arugments for block size

Message ID 20181218013336.17653-1-alistair@popple.id.au
State Accepted
Headers show
Series [RESEND,v4] adu: Add arugments for block size | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success master/apply_patch Successfully applied
snowpatch_ozlabs/build-multiarch fail Test build-multiarch on branch master

Commit Message

Alistair Popple Dec. 18, 2018, 1:33 a.m. UTC
Not all memory can be read with the default ADU block size of 8
bytes. Specifically cache-inhibited access to some MMIO regions such
as PCIe BAR spaces requires 4 byte accesses to avoid check stopping
the machine.

This patch adds library functions to read/write IO memory which takes
a block size argument and two new put/getmemio commands to allow a
specific block size to be specified from the command line.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
---

Changes since v1:
	- Fixed up comments
	- Removed debug code

Changes since v2:
	- Added new adu_getmem_io and adu_putmem_io API
	  calls instead of changing existing API.

Changes since v3:
	- Added getmemio and putmemio command line commands instead of
          adding block size to existing commands.

 libpdbg/adu.c     | 181 +++++++++++++++++++++++++++++++++++++++---------------
 libpdbg/libpdbg.h |  22 +++++--
 libpdbg/target.h  |   4 +-
 src/main.c        |  12 ++--
 src/mem.c         |  63 +++++++++++++++----
 5 files changed, 207 insertions(+), 75 deletions(-)

--
2.11.0

Comments

Amitay Isaacs Dec. 18, 2018, 2:35 a.m. UTC | #1
Reviewed-by: Amitay Isaacs <amitay@ozlabs.org>

Minor fixup required to command usage mentioned below.

On Tue, 2018-12-18 at 12:33 +1100, Alistair Popple wrote:
> Not all memory can be read with the default ADU block size of 8
> bytes. Specifically cache-inhibited access to some MMIO regions such
> as PCIe BAR spaces requires 4 byte accesses to avoid check stopping
> the machine.
> 
> This patch adds library functions to read/write IO memory which takes
> a block size argument and two new put/getmemio commands to allow a
> specific block size to be specified from the command line.
> 
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
> ---
> 
> Changes since v1:
> 	- Fixed up comments
> 	- Removed debug code
> 
> Changes since v2:
> 	- Added new adu_getmem_io and adu_putmem_io API
> 	  calls instead of changing existing API.
> 
> Changes since v3:
> 	- Added getmemio and putmemio command line commands instead of
>           adding block size to existing commands.
> 
>  libpdbg/adu.c     | 181 +++++++++++++++++++++++++++++++++++++++-----
> ----------
>  libpdbg/libpdbg.h |  22 +++++--
>  libpdbg/target.h  |   4 +-
>  src/main.c        |  12 ++--
>  src/mem.c         |  63 +++++++++++++++----
>  5 files changed, 207 insertions(+), 75 deletions(-)
> 
> diff --git a/libpdbg/adu.c b/libpdbg/adu.c
> index b67a43e..5cb1373 100644
> --- a/libpdbg/adu.c
> +++ b/libpdbg/adu.c
> @@ -84,20 +84,34 @@
>  #define FBC_ALTD_DATA_DONE	PPC_BIT(3)
>  #define FBC_ALTD_PBINIT_MISSING PPC_BIT(18)
> 
> -int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
> -	       uint8_t *output, uint64_t size)
> -{
> -	return __adu_getmem(adu_target, start_addr, output, size,
> false);
> -}
> -
> -int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t
> start_addr,
> -		  uint8_t *output, uint64_t size)
> +/* There are more general implementations of this with a loop and
> more
> + * performant implementations using GCC builtins which aren't
> + * portable. Given we only need a limited domain this is quick, easy
> + * and portable. */
> +uint8_t blog2(uint8_t x)
>  {
> -	return __adu_getmem(adu_target, start_addr, output, size,
> true);
> +	switch(x) {
> +	case 1:
> +		return 0;
> +	case 2:
> +		return 1;
> +	case 4:
> +		return 2;
> +	case 8:
> +		return 3;
> +	case 16:
> +		return 4;
> +	case 32:
> +		return 5;
> +	case 64:
> +		return 6;
> +	default:
> +		assert(0);
> +	}
>  }
> 
> -int __adu_getmem(struct pdbg_target *adu_target, uint64_t
> start_addr,
> -		 uint8_t *output, uint64_t size, bool ci)
> +static int __adu_getmem_blocksize(struct pdbg_target *adu_target,
> uint64_t start_addr,
> +				  uint8_t *output, uint64_t size,
> uint8_t block_size, bool ci)
>  {
>  	struct adu *adu;
>  	uint8_t *output0;
> @@ -109,33 +123,34 @@ int __adu_getmem(struct pdbg_target
> *adu_target, uint64_t start_addr,
> 
>  	output0 = output;
> 
> -	/* Align start address to 8-byte boundary */
> -	addr0 = 8 * (start_addr / 8);
> +	/* Align start address to block_sized boundary */
> +	addr0 = block_size * (start_addr / block_size);
> 
> -	/* We read data in 8-byte aligned chunks */
> -	for (addr = addr0; addr < start_addr + size; addr += 8) {
> +	/* We read data in block_sized aligned chunks */
> +	for (addr = addr0; addr < start_addr + size; addr +=
> block_size) {
>  		uint64_t data;
> 
> -		if (adu->getmem(adu, addr, &data, ci))
> +		if (adu->getmem(adu, addr, &data, ci, block_size))
>  			return -1;
> 
> -		/* ADU returns data in big-endian form in the register
> */
> +		/* ADU returns data in big-endian form in the register.
> */
>  		data = __builtin_bswap64(data);
> +		data >>= (addr & 0x7ull)*8;
> 
>  		if (addr < start_addr) {
>  			size_t offset = start_addr - addr;
> -			size_t n = (size <= 8-offset ? size : 8-
> offset);
> +			size_t n = (size <= block_size-offset ? size :
> block_size-offset);
> 
>  			memcpy(output, ((uint8_t *) &data) + offset,
> n);
>  			output += n;
> -		} else if (addr + 8 > start_addr + size) {
> +		} else if (addr + block_size > start_addr + size) {
>  			uint64_t offset = start_addr + size - addr;
> 
>  			memcpy(output, &data, offset);
>  			output += offset;
>  		} else {
> -			memcpy(output, &data, 8);
> -			output += 8;
> +			memcpy(output, &data, block_size);
> +			output += block_size;
>  		}
> 
>  		pdbg_progress_tick(output - output0, size);
> @@ -146,20 +161,37 @@ int __adu_getmem(struct pdbg_target
> *adu_target, uint64_t start_addr,
>  	return rc;
>  }
> 
> -int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
> +int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
>  	       uint8_t *output, uint64_t size)
>  {
> -	return __adu_putmem(adu_target, start_addr, output, size,
> false);
> +	return __adu_getmem_blocksize(adu_target, start_addr, output,
> +				      size, 8, false);
>  }
> 
> -int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t
> start_addr,
>  		  uint8_t *output, uint64_t size)
>  {
> -	return __adu_putmem(adu_target, start_addr, output, size,
> true);
> +	return __adu_getmem_blocksize(adu_target, start_addr, output,
> +			    size, 8, true);
>  }
> 
> -int __adu_putmem(struct pdbg_target *adu_target, uint64_t
> start_addr,
> -		 uint8_t *input, uint64_t size, bool ci)
> +int adu_getmem_io(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +		  uint8_t *output, uint64_t size, uint8_t blocksize)
> +{
> +	/* There is no equivalent for cachable memory as blocksize
> +	 * does not apply to cachable reads */
> +	return __adu_getmem_blocksize(adu_target, start_addr, output,
> +				      size, blocksize, true);
> +}
> +
> +int __adu_getmem(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +		 uint8_t *output, uint64_t size, bool ci)
> +{
> +	return __adu_getmem_blocksize(adu_target, start_addr, output,
> +				      size, 8, ci);
> +}
> +static int __adu_putmem_blocksize(struct pdbg_target *adu_target,
> uint64_t start_addr,
> +				  uint8_t *input, uint64_t size,
> uint8_t block_size, bool ci)
>  {
>  	struct adu *adu;
>  	int rc = 0, tsize;
> @@ -169,20 +201,25 @@ int __adu_putmem(struct pdbg_target
> *adu_target, uint64_t start_addr,
>  	adu = target_to_adu(adu_target);
>  	end_addr = start_addr + size;
>  	for (addr = start_addr; addr < end_addr; addr += tsize, input
> += tsize) {
> -		if ((addr % 8) || (addr + 8 > end_addr)) {
> -			/* If the address is not 64-bit aligned we
> -			 * copy in a byte at a time until it is. */
> +		if ((addr % block_size) || (addr + block_size >
> end_addr)) {
> +			/* If the address is not aligned to block_size
> +			 * we copy the data in one byte at a time
> +			 * until it is aligned. */
>  			tsize = 1;
> 
> -			/* Copy the input data in with correct
> alignment */
> +			/* Copy the input data in with correct
> +			 * alignment. Bytes need to aligned to the
> +			 * correct byte offset in the data register
> +			 * regardless of address. */
>  			data = ((uint64_t) *input) << 8*(8 - (addr % 8)
> - 1);
>  		} else {
> -			tsize = 8;
> -			memcpy(&data, input, sizeof(data));
> +			tsize = block_size;
> +			memcpy(&data, input, block_size);
>  			data = __builtin_bswap64(data);
> +			data >>= (addr & 7ull)*8;
>  		}
> 
> -		adu->putmem(adu, addr, data, tsize, ci);
> +		adu->putmem(adu, addr, data, tsize, ci, block_size);
>  		pdbg_progress_tick(addr - start_addr, size);
>  	}
> 
> @@ -191,6 +228,30 @@ int __adu_putmem(struct pdbg_target *adu_target,
> uint64_t start_addr,
>  	return rc;
>  }
> 
> +int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
> +	       uint8_t *input, uint64_t size)
> +{
> +	return __adu_putmem_blocksize(adu_target, start_addr, input,
> size, 8, false);
> +}
> +
> +int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +		  uint8_t *input, uint64_t size)
> +{
> +	return __adu_putmem_blocksize(adu_target, start_addr, input,
> size, 8, true);
> +}
> +
> +int adu_putmem_io(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +		  uint8_t *input, uint64_t size, uint8_t block_size)
> +{
> +	return __adu_putmem_blocksize(adu_target, start_addr, input,
> size, block_size, true);
> +}
> +
> +int __adu_putmem(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +		 uint8_t *input, uint64_t size, bool ci)
> +{
> +	return __adu_putmem_blocksize(adu_target, start_addr, input,
> size, 8, ci);
> +}
> +
>  static int adu_lock(struct adu *adu)
>  {
>  	uint64_t val;
> @@ -234,7 +295,8 @@ static int adu_reset(struct adu *adu)
>  	return 0;
>  }
> 
> -static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data, int ci)
> +static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data,
> +			 int ci, uint8_t block_size)
>  {
>  	uint64_t ctrl_reg, cmd_reg, val;
>  	int rc = 0;
> @@ -242,12 +304,15 @@ static int p8_adu_getmem(struct adu *adu,
> uint64_t addr, uint64_t *data, int ci)
>  	CHECK_ERR(adu_lock(adu));
> 
>  	ctrl_reg = P8_TTYPE_TREAD;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_CI_PARTIAL_READ);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_DMA_PARTIAL_READ);
> -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, 8);
> +		block_size = 0;
> +	}
> +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> 
>  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> P8_ALTD_CMD_REG, &cmd_reg));
>  	cmd_reg |= FBC_ALTD_START_OP;
> @@ -292,19 +357,23 @@ out:
> 
>  }
> 
> -int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> size, int ci)
> +int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int
> size,
> +		  int ci, uint8_t block_size)
>  {
>  	int rc = 0;
>  	uint64_t cmd_reg, ctrl_reg, val;
>  	CHECK_ERR(adu_lock(adu));
> 
>  	ctrl_reg = P8_TTYPE_TWRITE;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_CI_PARTIAL_WRITE);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg,
> P8_TTYPE_DMA_PARTIAL_WRITE);
> -	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, size);
> +		block_size <<= 1;
> +	}
> +	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);
> 
>  	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target,
> P8_ALTD_CMD_REG, &cmd_reg));
>  	cmd_reg |= FBC_ALTD_START_OP;
> @@ -349,19 +418,25 @@ out:
>  	return rc;
>  }
> 
> -static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data, int ci)
> +static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t
> *data,
> +			 int ci, uint8_t block_size)
>  {
>  	uint64_t ctrl_reg, cmd_reg, val;
> 
>  	cmd_reg = P9_TTYPE_TREAD;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_CI_PARTIAL_READ);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_DMA_PARTIAL_READ);
> 
> -	/* For a read size is apparently always 0 */
> -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, 0);
> +		/* For normal reads the size is ignored as HW always
> +		 * returns a cache line */
> +		block_size = 0;
> +	}
> +
> +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
>   	cmd_reg |= FBC_ALTD_START_OP;
>  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
>  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> DROP_PRIORITY_LOW);
> @@ -400,7 +475,8 @@ retry:
>  	return 0;
>  }
> 
> -static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> data, int size, int ci)
> +static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t
> data, int size,
> +			 int ci, uint8_t block_size)
>  {
>  	uint64_t ctrl_reg, cmd_reg, val;
> 
> @@ -409,12 +485,15 @@ static int p9_adu_putmem(struct adu *adu,
> uint64_t addr, uint64_t data, int size
>  	size <<= 1;
> 
>  	cmd_reg = P9_TTYPE_TWRITE;
> -	if (ci)
> +	if (ci) {
>  		/* Do cache inhibited access */
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_CI_PARTIAL_WRITE);
> -	else
> +		block_size = (blog2(block_size) + 1) << 1;
> +	} else {
>  		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg,
> P9_TTYPE_DMA_PARTIAL_WRITE);
> -	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, size);
> +		block_size <<= 1;
> +	}
> +	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
>   	cmd_reg |= FBC_ALTD_START_OP;
>  	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
>  	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg,
> DROP_PRIORITY_LOW);
> diff --git a/libpdbg/libpdbg.h b/libpdbg/libpdbg.h
> index 301c2c8..ee8437f 100644
> --- a/libpdbg/libpdbg.h
> +++ b/libpdbg/libpdbg.h
> @@ -204,12 +204,22 @@ int htm_status(struct pdbg_target *target);
>  int htm_dump(struct pdbg_target *target, char *filename);
>  int htm_record(struct pdbg_target *target, char *filename);
> 
> -int adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput, uint64_t size);
> -int adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *input, uint64_t size);
> -int adu_getmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput, uint64_t size);
> -int adu_putmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t
> *input, uint64_t size);
> -int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput, uint64_t size, bool ci);
> -int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *input, uint64_t size, bool ci);
> +int adu_getmem(struct pdbg_target *target, uint64_t addr,
> +	       uint8_t *ouput, uint64_t size);
> +int adu_putmem(struct pdbg_target *target, uint64_t addr,
> +	       uint8_t *input, uint64_t size);
> +int adu_getmem_ci(struct pdbg_target *target, uint64_t addr,
> +		  uint8_t *ouput, uint64_t size);
> +int adu_putmem_ci(struct pdbg_target *target, uint64_t addr,
> +		  uint8_t *input, uint64_t size);
> +int adu_getmem_io(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +		  uint8_t *output, uint64_t size, uint8_t blocksize);
> +int adu_putmem_io(struct pdbg_target *adu_target, uint64_t
> start_addr,
> +		  uint8_t *input, uint64_t size, uint8_t block_size);
> +int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *ouput,
> +		 uint64_t size, bool ci);
> +int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t
> *input,
> +		 uint64_t size, bool ci);
> 
>  int opb_read(struct pdbg_target *target, uint32_t addr, uint32_t
> *data);
>  int opb_write(struct pdbg_target *target, uint32_t addr, uint32_t
> data);
> diff --git a/libpdbg/target.h b/libpdbg/target.h
> index 7cc855d..16ae304 100644
> --- a/libpdbg/target.h
> +++ b/libpdbg/target.h
> @@ -107,8 +107,8 @@ struct htm {
> 
>  struct adu {
>  	struct pdbg_target target;
> -	int (*getmem)(struct adu *, uint64_t, uint64_t *, int);
> -	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int);
> +	int (*getmem)(struct adu *, uint64_t, uint64_t *, int,
> uint8_t);
> +	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int,
> uint8_t);
>  };
>  #define target_to_adu(x) container_of(x, struct adu, target)
> 
> diff --git a/src/main.c b/src/main.c
> index 1f68fa8..eb8c9ec 100644
> --- a/src/main.c
> +++ b/src/main.c
> @@ -91,8 +91,9 @@ extern struct optcmd_cmd
>  	optcmd_getnia, optcmd_putnia, optcmd_getmsr, optcmd_putmsr,
>  	optcmd_getring, optcmd_start, optcmd_stop, optcmd_step,
>  	optcmd_threadstatus, optcmd_sreset, optcmd_regs, optcmd_probe,
> -	optcmd_getmem, optcmd_putmem, optcmd_getxer, optcmd_putxer,
> -	optcmd_getcr, optcmd_putcr, optcmd_gdbserver;
> +	optcmd_getmem, optcmd_putmem, optcmd_getmemio, optcmd_putmemio,
> +	optcmd_getxer, optcmd_putxer, optcmd_getcr, optcmd_putcr,
> +	optcmd_gdbserver;
> 
>  static struct optcmd_cmd *cmds[] = {
>  	&optcmd_getscom, &optcmd_putscom, &optcmd_getcfam,
> &optcmd_putcfam,
> @@ -100,8 +101,9 @@ static struct optcmd_cmd *cmds[] = {
>  	&optcmd_getnia, &optcmd_putnia, &optcmd_getmsr, &optcmd_putmsr,
>  	&optcmd_getring, &optcmd_start, &optcmd_stop, &optcmd_step,
>  	&optcmd_threadstatus, &optcmd_sreset, &optcmd_regs,
> &optcmd_probe,
> -	&optcmd_getmem, &optcmd_putmem, &optcmd_getxer, &optcmd_putxer,
> -	&optcmd_getcr, &optcmd_putcr, &optcmd_gdbserver,
> +	&optcmd_getmem, &optcmd_putmem, &optcmd_getmemio,
> &optcmd_putmemio,
> +	&optcmd_getxer, &optcmd_putxer, &optcmd_getcr, &optcmd_putcr,
> +	&optcmd_gdbserver,
>  };
> 
>  /* Purely for printing usage text. We could integrate printing
> argument and flag
> @@ -136,7 +138,9 @@ static struct action actions[] = {
>  	{ "getscom", "<address>", "Read system scom" },
>  	{ "putscom", "<address> <value> [<mask>]", "Write system scom"
> },
>  	{ "getmem",  "<address> <count>", "Read system memory" },
> +	{ "getmemio", "<address> <count> [<block size>]", "Read memory
> cache inhibited with specified transfer size" },
>  	{ "putmem",  "<address>", "Write to system memory" },
> +	{ "putmemio", "<address> [<block size>]", "Write system memory
> cache inhibited with specified transfer size" },

block size is not optional argument, so should not be in [].

>  	{ "threadstatus", "", "Print the status of a thread" },
>  	{ "sreset",  "", "Reset" },
>  	{ "regs",  "[--backtrace]", "State (optionally display
> backtrace)" },
> diff --git a/src/mem.c b/src/mem.c
> index ce099c2..7bec152 100644
> --- a/src/mem.c
> +++ b/src/mem.c
> @@ -39,8 +39,9 @@ struct mem_flags {
>  };
> 
>  #define MEM_CI_FLAG ("--ci", ci, parse_flag_noarg, false)
> +#define BLOCK_SIZE (parse_number8_pow2, "8")
> 
> -static int getmem(uint64_t addr, uint64_t size, struct mem_flags
> flags)
> +static int _getmem(uint64_t addr, uint64_t size, uint8_t block_size)
>  {
>  	struct pdbg_target *target;
>  	uint8_t *buf;
> @@ -59,14 +60,19 @@ static int getmem(uint64_t addr, uint64_t size,
> struct mem_flags flags)
> 
>  		pdbg_set_progress_tick(progress_tick);
>  		progress_init();
> -		if (!__adu_getmem(target, addr, buf, size, flags.ci)) {
> -			if (write(STDOUT_FILENO, buf, size) < 0)
> -				PR_ERROR("Unable to write stdout.\n");
> -			else
> -				rc++;
> -		} else
> +		if (block_size)
> +			rc = adu_getmem_io(target, addr, buf, size,
> block_size);
> +		else
> +			rc = adu_getmem(target, addr, buf, size);
> +
> +		if (rc)
>  			PR_ERROR("Unable to read memory.\n");
> -			/* We only ever care about getting memory from
> a single processor */
> +
> +		if (write(STDOUT_FILENO, buf, size) < 0)
> +			PR_ERROR("Unable to write stdout.\n");
> +		else
> +				rc++;
> +
>  		progress_end();
>  		break;
>  	}
> @@ -74,10 +80,24 @@ static int getmem(uint64_t addr, uint64_t size,
> struct mem_flags flags)
>  	return rc;
> 
>  }
> +
> +static int getmem(uint64_t addr, uint64_t size, struct mem_flags
> flags)
> +{
> +	if (flags.ci)
> +		return _getmem(addr, size, 8);
> +	else
> +		return _getmem(addr, size, 0);
> +}
>  OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA),
>  			     mem_flags, (MEM_CI_FLAG));
> 
> -static int putmem(uint64_t addr, struct mem_flags flags)
> +static int getmemio(uint64_t addr, uint64_t size, uint8_t
> block_size)
> +{
> +	return _getmem(addr, size, block_size);
> +}
> +OPTCMD_DEFINE_CMD_WITH_ARGS(getmemio, getmemio, (ADDRESS, DATA,
> BLOCK_SIZE));
> +
> +static int _putmem(uint64_t addr, uint8_t block_size)
>  {
>  	uint8_t *buf;
>  	int read_size, rc = 0;
> @@ -98,11 +118,17 @@ static int putmem(uint64_t addr, struct
> mem_flags flags)
>  		if (read_size <= 0)
>  			break;
> 
> -		if (__adu_putmem(adu_target, addr, buf, read_size,
> flags.ci)) {
> +		if (block_size)
> +			rc = adu_putmem_io(adu_target, addr, buf,
> read_size, block_size);
> +		else
> +			rc = adu_putmem(adu_target, addr, buf,
> read_size);
> +
> +		if (rc) {
>  			rc = 0;
>  			printf("Unable to write memory.\n");
>  			break;
>  		}
> +
>  		rc += read_size;
>  	} while (read_size > 0);
>  	progress_end();
> @@ -111,5 +137,18 @@ static int putmem(uint64_t addr, struct
> mem_flags flags)
>  	free(buf);
>  	return rc;
>  }
> -OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS),
> -			     mem_flags, (MEM_CI_FLAG));
> +
> +static int putmem(uint64_t addr, struct mem_flags flags)
> +{
> +	if (flags.ci)
> +		return _putmem(addr, 8);
> +	else
> +		return _putmem(addr, 0);
> +}
> +OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS), mem_flags,
> (MEM_CI_FLAG));
> +
> +static int putmemio(uint64_t addr, uint8_t block_size)
> +{
> +	return _putmem(addr, block_size);
> +}
> +OPTCMD_DEFINE_CMD_WITH_ARGS(putmemio, putmemio, (ADDRESS,
> BLOCK_SIZE));
> --
> 2.11.0

Amitay.
diff mbox series

Patch

diff --git a/libpdbg/adu.c b/libpdbg/adu.c
index b67a43e..5cb1373 100644
--- a/libpdbg/adu.c
+++ b/libpdbg/adu.c
@@ -84,20 +84,34 @@ 
 #define FBC_ALTD_DATA_DONE	PPC_BIT(3)
 #define FBC_ALTD_PBINIT_MISSING PPC_BIT(18)

-int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
-	       uint8_t *output, uint64_t size)
-{
-	return __adu_getmem(adu_target, start_addr, output, size, false);
-}
-
-int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t start_addr,
-		  uint8_t *output, uint64_t size)
+/* There are more general implementations of this with a loop and more
+ * performant implementations using GCC builtins which aren't
+ * portable. Given we only need a limited domain this is quick, easy
+ * and portable. */
+uint8_t blog2(uint8_t x)
 {
-	return __adu_getmem(adu_target, start_addr, output, size, true);
+	switch(x) {
+	case 1:
+		return 0;
+	case 2:
+		return 1;
+	case 4:
+		return 2;
+	case 8:
+		return 3;
+	case 16:
+		return 4;
+	case 32:
+		return 5;
+	case 64:
+		return 6;
+	default:
+		assert(0);
+	}
 }

-int __adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
-		 uint8_t *output, uint64_t size, bool ci)
+static int __adu_getmem_blocksize(struct pdbg_target *adu_target, uint64_t start_addr,
+				  uint8_t *output, uint64_t size, uint8_t block_size, bool ci)
 {
 	struct adu *adu;
 	uint8_t *output0;
@@ -109,33 +123,34 @@  int __adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,

 	output0 = output;

-	/* Align start address to 8-byte boundary */
-	addr0 = 8 * (start_addr / 8);
+	/* Align start address to block_sized boundary */
+	addr0 = block_size * (start_addr / block_size);

-	/* We read data in 8-byte aligned chunks */
-	for (addr = addr0; addr < start_addr + size; addr += 8) {
+	/* We read data in block_sized aligned chunks */
+	for (addr = addr0; addr < start_addr + size; addr += block_size) {
 		uint64_t data;

-		if (adu->getmem(adu, addr, &data, ci))
+		if (adu->getmem(adu, addr, &data, ci, block_size))
 			return -1;

-		/* ADU returns data in big-endian form in the register */
+		/* ADU returns data in big-endian form in the register. */
 		data = __builtin_bswap64(data);
+		data >>= (addr & 0x7ull)*8;

 		if (addr < start_addr) {
 			size_t offset = start_addr - addr;
-			size_t n = (size <= 8-offset ? size : 8-offset);
+			size_t n = (size <= block_size-offset ? size : block_size-offset);

 			memcpy(output, ((uint8_t *) &data) + offset, n);
 			output += n;
-		} else if (addr + 8 > start_addr + size) {
+		} else if (addr + block_size > start_addr + size) {
 			uint64_t offset = start_addr + size - addr;

 			memcpy(output, &data, offset);
 			output += offset;
 		} else {
-			memcpy(output, &data, 8);
-			output += 8;
+			memcpy(output, &data, block_size);
+			output += block_size;
 		}

 		pdbg_progress_tick(output - output0, size);
@@ -146,20 +161,37 @@  int __adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
 	return rc;
 }

-int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
+int adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
 	       uint8_t *output, uint64_t size)
 {
-	return __adu_putmem(adu_target, start_addr, output, size, false);
+	return __adu_getmem_blocksize(adu_target, start_addr, output,
+				      size, 8, false);
 }

-int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t start_addr,
+int adu_getmem_ci(struct pdbg_target *adu_target, uint64_t start_addr,
 		  uint8_t *output, uint64_t size)
 {
-	return __adu_putmem(adu_target, start_addr, output, size, true);
+	return __adu_getmem_blocksize(adu_target, start_addr, output,
+			    size, 8, true);
 }

-int __adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
-		 uint8_t *input, uint64_t size, bool ci)
+int adu_getmem_io(struct pdbg_target *adu_target, uint64_t start_addr,
+		  uint8_t *output, uint64_t size, uint8_t blocksize)
+{
+	/* There is no equivalent for cachable memory as blocksize
+	 * does not apply to cachable reads */
+	return __adu_getmem_blocksize(adu_target, start_addr, output,
+				      size, blocksize, true);
+}
+
+int __adu_getmem(struct pdbg_target *adu_target, uint64_t start_addr,
+		 uint8_t *output, uint64_t size, bool ci)
+{
+	return __adu_getmem_blocksize(adu_target, start_addr, output,
+				      size, 8, ci);
+}
+static int __adu_putmem_blocksize(struct pdbg_target *adu_target, uint64_t start_addr,
+				  uint8_t *input, uint64_t size, uint8_t block_size, bool ci)
 {
 	struct adu *adu;
 	int rc = 0, tsize;
@@ -169,20 +201,25 @@  int __adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
 	adu = target_to_adu(adu_target);
 	end_addr = start_addr + size;
 	for (addr = start_addr; addr < end_addr; addr += tsize, input += tsize) {
-		if ((addr % 8) || (addr + 8 > end_addr)) {
-			/* If the address is not 64-bit aligned we
-			 * copy in a byte at a time until it is. */
+		if ((addr % block_size) || (addr + block_size > end_addr)) {
+			/* If the address is not aligned to block_size
+			 * we copy the data in one byte at a time
+			 * until it is aligned. */
 			tsize = 1;

-			/* Copy the input data in with correct alignment */
+			/* Copy the input data in with correct
+			 * alignment. Bytes need to aligned to the
+			 * correct byte offset in the data register
+			 * regardless of address. */
 			data = ((uint64_t) *input) << 8*(8 - (addr % 8) - 1);
 		} else {
-			tsize = 8;
-			memcpy(&data, input, sizeof(data));
+			tsize = block_size;
+			memcpy(&data, input, block_size);
 			data = __builtin_bswap64(data);
+			data >>= (addr & 7ull)*8;
 		}

-		adu->putmem(adu, addr, data, tsize, ci);
+		adu->putmem(adu, addr, data, tsize, ci, block_size);
 		pdbg_progress_tick(addr - start_addr, size);
 	}

@@ -191,6 +228,30 @@  int __adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
 	return rc;
 }

+int adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
+	       uint8_t *input, uint64_t size)
+{
+	return __adu_putmem_blocksize(adu_target, start_addr, input, size, 8, false);
+}
+
+int adu_putmem_ci(struct pdbg_target *adu_target, uint64_t start_addr,
+		  uint8_t *input, uint64_t size)
+{
+	return __adu_putmem_blocksize(adu_target, start_addr, input, size, 8, true);
+}
+
+int adu_putmem_io(struct pdbg_target *adu_target, uint64_t start_addr,
+		  uint8_t *input, uint64_t size, uint8_t block_size)
+{
+	return __adu_putmem_blocksize(adu_target, start_addr, input, size, block_size, true);
+}
+
+int __adu_putmem(struct pdbg_target *adu_target, uint64_t start_addr,
+		 uint8_t *input, uint64_t size, bool ci)
+{
+	return __adu_putmem_blocksize(adu_target, start_addr, input, size, 8, ci);
+}
+
 static int adu_lock(struct adu *adu)
 {
 	uint64_t val;
@@ -234,7 +295,8 @@  static int adu_reset(struct adu *adu)
 	return 0;
 }

-static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data, int ci)
+static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data,
+			 int ci, uint8_t block_size)
 {
 	uint64_t ctrl_reg, cmd_reg, val;
 	int rc = 0;
@@ -242,12 +304,15 @@  static int p8_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data, int ci)
 	CHECK_ERR(adu_lock(adu));

 	ctrl_reg = P8_TTYPE_TREAD;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_CI_PARTIAL_READ);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_DMA_PARTIAL_READ);
-	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, 8);
+		block_size = 0;
+	}
+	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);

 	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target, P8_ALTD_CMD_REG, &cmd_reg));
 	cmd_reg |= FBC_ALTD_START_OP;
@@ -292,19 +357,23 @@  out:

 }

-int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size, int ci)
+int p8_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size,
+		  int ci, uint8_t block_size)
 {
 	int rc = 0;
 	uint64_t cmd_reg, ctrl_reg, val;
 	CHECK_ERR(adu_lock(adu));

 	ctrl_reg = P8_TTYPE_TWRITE;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_CI_PARTIAL_WRITE);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		ctrl_reg = SETFIELD(P8_FBC_ALTD_TTYPE, ctrl_reg, P8_TTYPE_DMA_PARTIAL_WRITE);
-	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, size);
+		block_size <<= 1;
+	}
+	ctrl_reg = SETFIELD(P8_FBC_ALTD_TSIZE, ctrl_reg, block_size);

 	CHECK_ERR_GOTO(out, rc = pib_read(&adu->target, P8_ALTD_CMD_REG, &cmd_reg));
 	cmd_reg |= FBC_ALTD_START_OP;
@@ -349,19 +418,25 @@  out:
 	return rc;
 }

-static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data, int ci)
+static int p9_adu_getmem(struct adu *adu, uint64_t addr, uint64_t *data,
+			 int ci, uint8_t block_size)
 {
 	uint64_t ctrl_reg, cmd_reg, val;

 	cmd_reg = P9_TTYPE_TREAD;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_CI_PARTIAL_READ);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_DMA_PARTIAL_READ);

-	/* For a read size is apparently always 0 */
-	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, 0);
+		/* For normal reads the size is ignored as HW always
+		 * returns a cache line */
+		block_size = 0;
+	}
+
+	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
  	cmd_reg |= FBC_ALTD_START_OP;
 	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
 	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg, DROP_PRIORITY_LOW);
@@ -400,7 +475,8 @@  retry:
 	return 0;
 }

-static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size, int ci)
+static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size,
+			 int ci, uint8_t block_size)
 {
 	uint64_t ctrl_reg, cmd_reg, val;

@@ -409,12 +485,15 @@  static int p9_adu_putmem(struct adu *adu, uint64_t addr, uint64_t data, int size
 	size <<= 1;

 	cmd_reg = P9_TTYPE_TWRITE;
-	if (ci)
+	if (ci) {
 		/* Do cache inhibited access */
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_CI_PARTIAL_WRITE);
-	else
+		block_size = (blog2(block_size) + 1) << 1;
+	} else {
 		cmd_reg = SETFIELD(P9_FBC_ALTD_TTYPE, cmd_reg, P9_TTYPE_DMA_PARTIAL_WRITE);
-	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, size);
+		block_size <<= 1;
+	}
+	cmd_reg = SETFIELD(P9_FBC_ALTD_TSIZE, cmd_reg, block_size);
  	cmd_reg |= FBC_ALTD_START_OP;
 	cmd_reg = SETFIELD(FBC_ALTD_SCOPE, cmd_reg, SCOPE_REMOTE);
 	cmd_reg = SETFIELD(FBC_ALTD_DROP_PRIORITY, cmd_reg, DROP_PRIORITY_LOW);
diff --git a/libpdbg/libpdbg.h b/libpdbg/libpdbg.h
index 301c2c8..ee8437f 100644
--- a/libpdbg/libpdbg.h
+++ b/libpdbg/libpdbg.h
@@ -204,12 +204,22 @@  int htm_status(struct pdbg_target *target);
 int htm_dump(struct pdbg_target *target, char *filename);
 int htm_record(struct pdbg_target *target, char *filename);

-int adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t *ouput, uint64_t size);
-int adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t *input, uint64_t size);
-int adu_getmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t *ouput, uint64_t size);
-int adu_putmem_ci(struct pdbg_target *target, uint64_t addr, uint8_t *input, uint64_t size);
-int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t *ouput, uint64_t size, bool ci);
-int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t *input, uint64_t size, bool ci);
+int adu_getmem(struct pdbg_target *target, uint64_t addr,
+	       uint8_t *ouput, uint64_t size);
+int adu_putmem(struct pdbg_target *target, uint64_t addr,
+	       uint8_t *input, uint64_t size);
+int adu_getmem_ci(struct pdbg_target *target, uint64_t addr,
+		  uint8_t *ouput, uint64_t size);
+int adu_putmem_ci(struct pdbg_target *target, uint64_t addr,
+		  uint8_t *input, uint64_t size);
+int adu_getmem_io(struct pdbg_target *adu_target, uint64_t start_addr,
+		  uint8_t *output, uint64_t size, uint8_t blocksize);
+int adu_putmem_io(struct pdbg_target *adu_target, uint64_t start_addr,
+		  uint8_t *input, uint64_t size, uint8_t block_size);
+int __adu_getmem(struct pdbg_target *target, uint64_t addr, uint8_t *ouput,
+		 uint64_t size, bool ci);
+int __adu_putmem(struct pdbg_target *target, uint64_t addr, uint8_t *input,
+		 uint64_t size, bool ci);

 int opb_read(struct pdbg_target *target, uint32_t addr, uint32_t *data);
 int opb_write(struct pdbg_target *target, uint32_t addr, uint32_t data);
diff --git a/libpdbg/target.h b/libpdbg/target.h
index 7cc855d..16ae304 100644
--- a/libpdbg/target.h
+++ b/libpdbg/target.h
@@ -107,8 +107,8 @@  struct htm {

 struct adu {
 	struct pdbg_target target;
-	int (*getmem)(struct adu *, uint64_t, uint64_t *, int);
-	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int);
+	int (*getmem)(struct adu *, uint64_t, uint64_t *, int, uint8_t);
+	int (*putmem)(struct adu *, uint64_t, uint64_t, int, int, uint8_t);
 };
 #define target_to_adu(x) container_of(x, struct adu, target)

diff --git a/src/main.c b/src/main.c
index 1f68fa8..eb8c9ec 100644
--- a/src/main.c
+++ b/src/main.c
@@ -91,8 +91,9 @@  extern struct optcmd_cmd
 	optcmd_getnia, optcmd_putnia, optcmd_getmsr, optcmd_putmsr,
 	optcmd_getring, optcmd_start, optcmd_stop, optcmd_step,
 	optcmd_threadstatus, optcmd_sreset, optcmd_regs, optcmd_probe,
-	optcmd_getmem, optcmd_putmem, optcmd_getxer, optcmd_putxer,
-	optcmd_getcr, optcmd_putcr, optcmd_gdbserver;
+	optcmd_getmem, optcmd_putmem, optcmd_getmemio, optcmd_putmemio,
+	optcmd_getxer, optcmd_putxer, optcmd_getcr, optcmd_putcr,
+	optcmd_gdbserver;

 static struct optcmd_cmd *cmds[] = {
 	&optcmd_getscom, &optcmd_putscom, &optcmd_getcfam, &optcmd_putcfam,
@@ -100,8 +101,9 @@  static struct optcmd_cmd *cmds[] = {
 	&optcmd_getnia, &optcmd_putnia, &optcmd_getmsr, &optcmd_putmsr,
 	&optcmd_getring, &optcmd_start, &optcmd_stop, &optcmd_step,
 	&optcmd_threadstatus, &optcmd_sreset, &optcmd_regs, &optcmd_probe,
-	&optcmd_getmem, &optcmd_putmem, &optcmd_getxer, &optcmd_putxer,
-	&optcmd_getcr, &optcmd_putcr, &optcmd_gdbserver,
+	&optcmd_getmem, &optcmd_putmem, &optcmd_getmemio, &optcmd_putmemio,
+	&optcmd_getxer, &optcmd_putxer, &optcmd_getcr, &optcmd_putcr,
+	&optcmd_gdbserver,
 };

 /* Purely for printing usage text. We could integrate printing argument and flag
@@ -136,7 +138,9 @@  static struct action actions[] = {
 	{ "getscom", "<address>", "Read system scom" },
 	{ "putscom", "<address> <value> [<mask>]", "Write system scom" },
 	{ "getmem",  "<address> <count>", "Read system memory" },
+	{ "getmemio", "<address> <count> [<block size>]", "Read memory cache inhibited with specified transfer size" },
 	{ "putmem",  "<address>", "Write to system memory" },
+	{ "putmemio", "<address> [<block size>]", "Write system memory cache inhibited with specified transfer size" },
 	{ "threadstatus", "", "Print the status of a thread" },
 	{ "sreset",  "", "Reset" },
 	{ "regs",  "[--backtrace]", "State (optionally display backtrace)" },
diff --git a/src/mem.c b/src/mem.c
index ce099c2..7bec152 100644
--- a/src/mem.c
+++ b/src/mem.c
@@ -39,8 +39,9 @@  struct mem_flags {
 };

 #define MEM_CI_FLAG ("--ci", ci, parse_flag_noarg, false)
+#define BLOCK_SIZE (parse_number8_pow2, "8")

-static int getmem(uint64_t addr, uint64_t size, struct mem_flags flags)
+static int _getmem(uint64_t addr, uint64_t size, uint8_t block_size)
 {
 	struct pdbg_target *target;
 	uint8_t *buf;
@@ -59,14 +60,19 @@  static int getmem(uint64_t addr, uint64_t size, struct mem_flags flags)

 		pdbg_set_progress_tick(progress_tick);
 		progress_init();
-		if (!__adu_getmem(target, addr, buf, size, flags.ci)) {
-			if (write(STDOUT_FILENO, buf, size) < 0)
-				PR_ERROR("Unable to write stdout.\n");
-			else
-				rc++;
-		} else
+		if (block_size)
+			rc = adu_getmem_io(target, addr, buf, size, block_size);
+		else
+			rc = adu_getmem(target, addr, buf, size);
+
+		if (rc)
 			PR_ERROR("Unable to read memory.\n");
-			/* We only ever care about getting memory from a single processor */
+
+		if (write(STDOUT_FILENO, buf, size) < 0)
+			PR_ERROR("Unable to write stdout.\n");
+		else
+				rc++;
+
 		progress_end();
 		break;
 	}
@@ -74,10 +80,24 @@  static int getmem(uint64_t addr, uint64_t size, struct mem_flags flags)
 	return rc;

 }
+
+static int getmem(uint64_t addr, uint64_t size, struct mem_flags flags)
+{
+	if (flags.ci)
+		return _getmem(addr, size, 8);
+	else
+		return _getmem(addr, size, 0);
+}
 OPTCMD_DEFINE_CMD_WITH_FLAGS(getmem, getmem, (ADDRESS, DATA),
 			     mem_flags, (MEM_CI_FLAG));

-static int putmem(uint64_t addr, struct mem_flags flags)
+static int getmemio(uint64_t addr, uint64_t size, uint8_t block_size)
+{
+	return _getmem(addr, size, block_size);
+}
+OPTCMD_DEFINE_CMD_WITH_ARGS(getmemio, getmemio, (ADDRESS, DATA, BLOCK_SIZE));
+
+static int _putmem(uint64_t addr, uint8_t block_size)
 {
 	uint8_t *buf;
 	int read_size, rc = 0;
@@ -98,11 +118,17 @@  static int putmem(uint64_t addr, struct mem_flags flags)
 		if (read_size <= 0)
 			break;

-		if (__adu_putmem(adu_target, addr, buf, read_size, flags.ci)) {
+		if (block_size)
+			rc = adu_putmem_io(adu_target, addr, buf, read_size, block_size);
+		else
+			rc = adu_putmem(adu_target, addr, buf, read_size);
+
+		if (rc) {
 			rc = 0;
 			printf("Unable to write memory.\n");
 			break;
 		}
+
 		rc += read_size;
 	} while (read_size > 0);
 	progress_end();
@@ -111,5 +137,18 @@  static int putmem(uint64_t addr, struct mem_flags flags)
 	free(buf);
 	return rc;
 }
-OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS),
-			     mem_flags, (MEM_CI_FLAG));
+
+static int putmem(uint64_t addr, struct mem_flags flags)
+{
+	if (flags.ci)
+		return _putmem(addr, 8);
+	else
+		return _putmem(addr, 0);
+}
+OPTCMD_DEFINE_CMD_WITH_FLAGS(putmem, putmem, (ADDRESS), mem_flags, (MEM_CI_FLAG));
+
+static int putmemio(uint64_t addr, uint8_t block_size)
+{
+	return _putmem(addr, block_size);
+}
+OPTCMD_DEFINE_CMD_WITH_ARGS(putmemio, putmemio, (ADDRESS, BLOCK_SIZE));