[1/2] i2c: ismt: 16-byte align the DMA buffer address

Message ID 20170818160128.21228-2-radu.rendec@gmail.com
State Superseded
Headers show

Commit Message

Radu Rendec Aug. 18, 2017, 4:01 p.m.
Use only a portion of the data buffer for DMA transfers, which is always
16-byte aligned. This makes the DMA buffer address 16-byte aligned and
compensates for spurious hardware parity errors that may appear when the
DMA buffer address is not 16-byte aligned.

The data buffer is enlarged in order to accommodate any possible 16-byte
alignment offset and changes the DMA code to only use a portion of the
data buffer, which is 16-byte aligned.

The symptom of the hardware issue is the same as the one addressed in
v3.12-rc2-5-gbf41691 and manifests by transfers failing with EIO, with
bit 9 being set in the ERRSTS register.

Signed-off-by: Radu Rendec <radu.rendec@gmail.com>
---
 drivers/i2c/busses/i2c-ismt.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

Comments

Wolfram Sang Aug. 27, 2017, 2:37 p.m. | #1
On Fri, Aug 18, 2017 at 05:01:27PM +0100, Radu Rendec wrote:
> Use only a portion of the data buffer for DMA transfers, which is always
> 16-byte aligned. This makes the DMA buffer address 16-byte aligned and
> compensates for spurious hardware parity errors that may appear when the
> DMA buffer address is not 16-byte aligned.
> 
> The data buffer is enlarged in order to accommodate any possible 16-byte
> alignment offset and changes the DMA code to only use a portion of the
> data buffer, which is 16-byte aligned.
> 
> The symptom of the hardware issue is the same as the one addressed in
> v3.12-rc2-5-gbf41691 and manifests by transfers failing with EIO, with
> bit 9 being set in the ERRSTS register.
> 
> Signed-off-by: Radu Rendec <radu.rendec@gmail.com>

Please use scripts/get_maintainers.pl as cc-cmd of git-send-email. This
would have added Seth and Neil (driver maintainers) to CC automatically.
I have done so now.

> ---
>  drivers/i2c/busses/i2c-ismt.c | 37 +++++++++++++++++++------------------
>  1 file changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
> index e98e44e..ccce0ca 100644
> --- a/drivers/i2c/busses/i2c-ismt.c
> +++ b/drivers/i2c/busses/i2c-ismt.c
> @@ -172,7 +172,7 @@ struct ismt_priv {
>  	dma_addr_t io_rng_dma;			/* descriptor HW base addr */
>  	u8 head;				/* ring buffer head pointer */
>  	struct completion cmp;			/* interrupt completion */
> -	u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1];	/* temp R/W data buffer */
> +	u8 buffer[I2C_SMBUS_BLOCK_MAX + 16];	/* temp R/W data buffer */

The IIO subsystems uses ____cacheline_aligned. I have never looked into
this closely, but it probably is cleaner than working with ALIGN on an
oversized buffer? Dunno why ____cacheline_aligned has four underscores
at the beginning, though...

>  };
>  
>  /**
> @@ -320,7 +320,7 @@ static int ismt_process_desc(const struct ismt_desc *desc,
>  			     struct ismt_priv *priv, int size,
>  			     char read_write)
>  {
> -	u8 *dma_buffer = priv->dma_buffer;
> +	u8 *dma_buffer = (u8*)ALIGN((unsigned long)&priv->buffer[0], 16);

The fixed value here might not work on all (future?) generations?


>  
>  	dev_dbg(&priv->pci_dev->dev, "Processing completed descriptor\n");
>  	__ismt_desc_dump(&priv->pci_dev->dev, desc);
> @@ -388,11 +388,12 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  	struct ismt_desc *desc;
>  	struct ismt_priv *priv = i2c_get_adapdata(adap);
>  	struct device *dev = &priv->pci_dev->dev;
> +	u8 *dma_buffer = (u8*)ALIGN((unsigned long)&priv->buffer[0], 16);
>  
>  	desc = &priv->hw[priv->head];
>  
>  	/* Initialize the DMA buffer */
> -	memset(priv->dma_buffer, 0, sizeof(priv->dma_buffer));
> +	memset(priv->buffer, 0, sizeof(priv->buffer));
>  
>  	/* Initialize the descriptor */
>  	memset(desc, 0, sizeof(struct ismt_desc));
> @@ -441,8 +442,8 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			desc->wr_len_cmd = 2;
>  			dma_size = 2;
>  			dma_direction = DMA_TO_DEVICE;
> -			priv->dma_buffer[0] = command;
> -			priv->dma_buffer[1] = data->byte;
> +			dma_buffer[0] = command;
> +			dma_buffer[1] = data->byte;
>  		} else {
>  			/* Read Byte */
>  			dev_dbg(dev, "I2C_SMBUS_BYTE_DATA:  READ\n");
> @@ -461,9 +462,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			desc->wr_len_cmd = 3;
>  			dma_size = 3;
>  			dma_direction = DMA_TO_DEVICE;
> -			priv->dma_buffer[0] = command;
> -			priv->dma_buffer[1] = data->word & 0xff;
> -			priv->dma_buffer[2] = data->word >> 8;
> +			dma_buffer[0] = command;
> +			dma_buffer[1] = data->word & 0xff;
> +			dma_buffer[2] = data->word >> 8;
>  		} else {
>  			/* Read Word */
>  			dev_dbg(dev, "I2C_SMBUS_WORD_DATA:  READ\n");
> @@ -481,9 +482,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  		desc->rd_len = 2;
>  		dma_size = 3;
>  		dma_direction = DMA_BIDIRECTIONAL;
> -		priv->dma_buffer[0] = command;
> -		priv->dma_buffer[1] = data->word & 0xff;
> -		priv->dma_buffer[2] = data->word >> 8;
> +		dma_buffer[0] = command;
> +		dma_buffer[1] = data->word & 0xff;
> +		dma_buffer[2] = data->word >> 8;
>  		break;
>  
>  	case I2C_SMBUS_BLOCK_DATA:
> @@ -494,8 +495,8 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			dma_direction = DMA_TO_DEVICE;
>  			desc->wr_len_cmd = dma_size;
>  			desc->control |= ISMT_DESC_BLK;
> -			priv->dma_buffer[0] = command;
> -			memcpy(&priv->dma_buffer[1], &data->block[1], dma_size - 1);
> +			dma_buffer[0] = command;
> +			memcpy(&dma_buffer[1], &data->block[1], dma_size - 1);
>  		} else {
>  			/* Block Read */
>  			dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA:  READ\n");
> @@ -522,8 +523,8 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			dma_direction = DMA_TO_DEVICE;
>  			desc->wr_len_cmd = dma_size;
>  			desc->control |= ISMT_DESC_I2C;
> -			priv->dma_buffer[0] = command;
> -			memcpy(&priv->dma_buffer[1], &data->block[1], dma_size - 1);
> +			dma_buffer[0] = command;
> +			memcpy(&dma_buffer[1], &data->block[1], dma_size - 1);
>  		} else {
>  			/* i2c Block Read */
>  			dev_dbg(dev, "I2C_SMBUS_I2C_BLOCK_DATA:  READ\n");
> @@ -552,18 +553,18 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  	if (dma_size != 0) {
>  		dev_dbg(dev, " dev=%p\n", dev);
>  		dev_dbg(dev, " data=%p\n", data);
> -		dev_dbg(dev, " dma_buffer=%p\n", priv->dma_buffer);
> +		dev_dbg(dev, " dma_buffer=%p\n", dma_buffer);
>  		dev_dbg(dev, " dma_size=%d\n", dma_size);
>  		dev_dbg(dev, " dma_direction=%d\n", dma_direction);
>  
>  		dma_addr = dma_map_single(dev,
> -				      priv->dma_buffer,
> +				      dma_buffer,
>  				      dma_size,
>  				      dma_direction);
>  
>  		if (dma_mapping_error(dev, dma_addr)) {
>  			dev_err(dev, "Error in mapping dma buffer %p\n",
> -				priv->dma_buffer);
> +				dma_buffer);
>  			return -EIO;
>  		}
>  
> -- 
> 2.9.5
>
Radu Rendec Aug. 29, 2017, 4:37 p.m. | #2
On Sun, 2017-08-27 at 16:37 +0200, Wolfram Sang wrote:
> Please use scripts/get_maintainers.pl as cc-cmd of git-send-email. This
> would have added Seth and Neil (driver maintainers) to CC automatically.
> I have done so now.

My bad; sorry about that and thanks for adding Seth and Neil.

> The IIO subsystems uses ____cacheline_aligned. I have never looked into
> this closely, but it probably is cleaner than working with ALIGN on an
> oversized buffer? Dunno why ____cacheline_aligned has four underscores
> at the beginning, though...

I haven't looked into that either; it seems that ____cacheline_aligned
is just __attribute__((__aligned__(SMP_CACHE_BYTES))) unless already
defined by the architecture (see include/linux/cache.h).

I believe using this on a (whole) structure only makes sense with
statically allocated structures and instructs the compiler to allocate
the structure at an aligned address. My guess is that it has no effect
on dynamically allocated structures (such as struct ismt_priv).

If you were thinking about using ____cacheline_aligned just on the
dma_buffer field, then it would align the field with respect to the
beginning of the structure (i.e. insert the required padding), but the
field would still be misaligned if the structure itself is misaligned.

> @@ -320,7 +320,7 @@ static int ismt_process_desc(const struct
> > ismt_desc *desc,
> >  			     struct ismt_priv *priv, int size,
> >  			     char read_write)
> >  {
> > -	u8 *dma_buffer = priv->dma_buffer;
> > +	u8 *dma_buffer = (u8*)ALIGN((unsigned long)&priv->buffer[0], 16);
> 
> The fixed value here might not work on all (future?) generations?

Good question. In theory this alignment should not be necessary at all.
There is no such requirement in the public datasheet, which is probably
why it was not implemented in the driver in the first place.

This is related to some hardware errata and the problem probably occurs
only with some specific hardware revisions. Hard to say what happens
with future generations. But at least it should not make any difference
to the other (existing) generations.

In any case, it's probably better to use a macro (e.g. #define
ISMT_DMA_ALIGN 16) rather than hardcoding the "16" value.

Radu
Neil Horman Jan. 4, 2018, 1:46 p.m. | #3
On Fri, Aug 18, 2017 at 05:01:27PM +0100, Radu Rendec wrote:
> Use only a portion of the data buffer for DMA transfers, which is always
> 16-byte aligned. This makes the DMA buffer address 16-byte aligned and
> compensates for spurious hardware parity errors that may appear when the
> DMA buffer address is not 16-byte aligned.
> 
> The data buffer is enlarged in order to accommodate any possible 16-byte
> alignment offset and changes the DMA code to only use a portion of the
> data buffer, which is 16-byte aligned.
> 
> The symptom of the hardware issue is the same as the one addressed in
> v3.12-rc2-5-gbf41691 and manifests by transfers failing with EIO, with
> bit 9 being set in the ERRSTS register.
> 
> Signed-off-by: Radu Rendec <radu.rendec@gmail.com>
Why not just use the alligned attribute here for buffer? 

https://gcc.gnu.org/onlinedocs/gcc-3.3/gcc/Type-Attributes.html

That would save you over allocation when possible and keep you from needing to
create a private aligned variable.

Neil

> ---
>  drivers/i2c/busses/i2c-ismt.c | 37 +++++++++++++++++++------------------
>  1 file changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
> index e98e44e..ccce0ca 100644
> --- a/drivers/i2c/busses/i2c-ismt.c
> +++ b/drivers/i2c/busses/i2c-ismt.c
> @@ -172,7 +172,7 @@ struct ismt_priv {
>  	dma_addr_t io_rng_dma;			/* descriptor HW base addr */
>  	u8 head;				/* ring buffer head pointer */
>  	struct completion cmp;			/* interrupt completion */
> -	u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1];	/* temp R/W data buffer */
> +	u8 buffer[I2C_SMBUS_BLOCK_MAX + 16];	/* temp R/W data buffer */
>  };
>  
>  /**
> @@ -320,7 +320,7 @@ static int ismt_process_desc(const struct ismt_desc *desc,
>  			     struct ismt_priv *priv, int size,
>  			     char read_write)
>  {
> -	u8 *dma_buffer = priv->dma_buffer;
> +	u8 *dma_buffer = (u8*)ALIGN((unsigned long)&priv->buffer[0], 16);
>  
>  	dev_dbg(&priv->pci_dev->dev, "Processing completed descriptor\n");
>  	__ismt_desc_dump(&priv->pci_dev->dev, desc);
> @@ -388,11 +388,12 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  	struct ismt_desc *desc;
>  	struct ismt_priv *priv = i2c_get_adapdata(adap);
>  	struct device *dev = &priv->pci_dev->dev;
> +	u8 *dma_buffer = (u8*)ALIGN((unsigned long)&priv->buffer[0], 16);
>  
>  	desc = &priv->hw[priv->head];
>  
>  	/* Initialize the DMA buffer */
> -	memset(priv->dma_buffer, 0, sizeof(priv->dma_buffer));
> +	memset(priv->buffer, 0, sizeof(priv->buffer));
>  
>  	/* Initialize the descriptor */
>  	memset(desc, 0, sizeof(struct ismt_desc));
> @@ -441,8 +442,8 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			desc->wr_len_cmd = 2;
>  			dma_size = 2;
>  			dma_direction = DMA_TO_DEVICE;
> -			priv->dma_buffer[0] = command;
> -			priv->dma_buffer[1] = data->byte;
> +			dma_buffer[0] = command;
> +			dma_buffer[1] = data->byte;
>  		} else {
>  			/* Read Byte */
>  			dev_dbg(dev, "I2C_SMBUS_BYTE_DATA:  READ\n");
> @@ -461,9 +462,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			desc->wr_len_cmd = 3;
>  			dma_size = 3;
>  			dma_direction = DMA_TO_DEVICE;
> -			priv->dma_buffer[0] = command;
> -			priv->dma_buffer[1] = data->word & 0xff;
> -			priv->dma_buffer[2] = data->word >> 8;
> +			dma_buffer[0] = command;
> +			dma_buffer[1] = data->word & 0xff;
> +			dma_buffer[2] = data->word >> 8;
>  		} else {
>  			/* Read Word */
>  			dev_dbg(dev, "I2C_SMBUS_WORD_DATA:  READ\n");
> @@ -481,9 +482,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  		desc->rd_len = 2;
>  		dma_size = 3;
>  		dma_direction = DMA_BIDIRECTIONAL;
> -		priv->dma_buffer[0] = command;
> -		priv->dma_buffer[1] = data->word & 0xff;
> -		priv->dma_buffer[2] = data->word >> 8;
> +		dma_buffer[0] = command;
> +		dma_buffer[1] = data->word & 0xff;
> +		dma_buffer[2] = data->word >> 8;
>  		break;
>  
>  	case I2C_SMBUS_BLOCK_DATA:
> @@ -494,8 +495,8 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			dma_direction = DMA_TO_DEVICE;
>  			desc->wr_len_cmd = dma_size;
>  			desc->control |= ISMT_DESC_BLK;
> -			priv->dma_buffer[0] = command;
> -			memcpy(&priv->dma_buffer[1], &data->block[1], dma_size - 1);
> +			dma_buffer[0] = command;
> +			memcpy(&dma_buffer[1], &data->block[1], dma_size - 1);
>  		} else {
>  			/* Block Read */
>  			dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA:  READ\n");
> @@ -522,8 +523,8 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  			dma_direction = DMA_TO_DEVICE;
>  			desc->wr_len_cmd = dma_size;
>  			desc->control |= ISMT_DESC_I2C;
> -			priv->dma_buffer[0] = command;
> -			memcpy(&priv->dma_buffer[1], &data->block[1], dma_size - 1);
> +			dma_buffer[0] = command;
> +			memcpy(&dma_buffer[1], &data->block[1], dma_size - 1);
>  		} else {
>  			/* i2c Block Read */
>  			dev_dbg(dev, "I2C_SMBUS_I2C_BLOCK_DATA:  READ\n");
> @@ -552,18 +553,18 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
>  	if (dma_size != 0) {
>  		dev_dbg(dev, " dev=%p\n", dev);
>  		dev_dbg(dev, " data=%p\n", data);
> -		dev_dbg(dev, " dma_buffer=%p\n", priv->dma_buffer);
> +		dev_dbg(dev, " dma_buffer=%p\n", dma_buffer);
>  		dev_dbg(dev, " dma_size=%d\n", dma_size);
>  		dev_dbg(dev, " dma_direction=%d\n", dma_direction);
>  
>  		dma_addr = dma_map_single(dev,
> -				      priv->dma_buffer,
> +				      dma_buffer,
>  				      dma_size,
>  				      dma_direction);
>  
>  		if (dma_mapping_error(dev, dma_addr)) {
>  			dev_err(dev, "Error in mapping dma buffer %p\n",
> -				priv->dma_buffer);
> +				dma_buffer);
>  			return -EIO;
>  		}
>
Radu Rendec Jan. 4, 2018, 3:42 p.m. | #4
On Thu, 2018-01-04 at 08:46 -0500, Neil Horman wrote:
> On Fri, Aug 18, 2017 at 05:01:27PM +0100, Radu Rendec wrote:
> > Use only a portion of the data buffer for DMA transfers, which is always
> > 16-byte aligned. This makes the DMA buffer address 16-byte aligned and
> > compensates for spurious hardware parity errors that may appear when the
> > DMA buffer address is not 16-byte aligned.
> > 
> > The data buffer is enlarged in order to accommodate any possible 16-byte
> > alignment offset and changes the DMA code to only use a portion of the
> > data buffer, which is 16-byte aligned.
> > 
> > The symptom of the hardware issue is the same as the one addressed in
> > v3.12-rc2-5-gbf41691 and manifests by transfers failing with EIO, with
> > bit 9 being set in the ERRSTS register.
> > 
> > Signed-off-by: Radu Rendec <radu.rendec@gmail.com>
> 
> Why not just use the alligned attribute here for buffer? 
> 
> https://gcc.gnu.org/onlinedocs/gcc-3.3/gcc/Type-Attributes.html
> 
> That would save you over allocation when possible and keep you from needing to
> create a private aligned variable.

First of all, thanks for reviewing this!

I believe the aligned() attribute cannot be used here because the whole
ismt_priv structure is dynamically allocated (the allocation is done at
the beginning of the ismt_probe() function).

Even with a statically allocated structure, aligning the whole
structure does not guarantee the alignment of the dma_buffer field,
because the field offset within the structure can change (e.g. if other
fields are added, removed, moved around etc.).

On the other hand, I realized I could have used the PTR_ALIGN macro to
calculate the aligned address. That would probably make the code less
ugly and more intuitive. So I would at least resubmit the patch using
PTR_ALIGN instead of ALIGN and explicit casts - if you agree to the
current solution, of course.

Thanks,
Radu
Neil Horman Jan. 4, 2018, 4:22 p.m. | #5
On Thu, Jan 04, 2018 at 03:42:09PM +0000, Radu Rendec wrote:
> On Thu, 2018-01-04 at 08:46 -0500, Neil Horman wrote:
> > On Fri, Aug 18, 2017 at 05:01:27PM +0100, Radu Rendec wrote:
> > > Use only a portion of the data buffer for DMA transfers, which is always
> > > 16-byte aligned. This makes the DMA buffer address 16-byte aligned and
> > > compensates for spurious hardware parity errors that may appear when the
> > > DMA buffer address is not 16-byte aligned.
> > > 
> > > The data buffer is enlarged in order to accommodate any possible 16-byte
> > > alignment offset and changes the DMA code to only use a portion of the
> > > data buffer, which is 16-byte aligned.
> > > 
> > > The symptom of the hardware issue is the same as the one addressed in
> > > v3.12-rc2-5-gbf41691 and manifests by transfers failing with EIO, with
> > > bit 9 being set in the ERRSTS register.
> > > 
> > > Signed-off-by: Radu Rendec <radu.rendec@gmail.com>
> > 
> > Why not just use the alligned attribute here for buffer? 
> > 
> > https://gcc.gnu.org/onlinedocs/gcc-3.3/gcc/Type-Attributes.html
> > 
> > That would save you over allocation when possible and keep you from needing to
> > create a private aligned variable.
> 
> First of all, thanks for reviewing this!
> 
No problem!

> I believe the aligned() attribute cannot be used here because the whole
> ismt_priv structure is dynamically allocated (the allocation is done at
> the beginning of the ismt_probe() function).
> 
> Even with a statically allocated structure, aligning the whole
> structure does not guarantee the alignment of the dma_buffer field,
> because the field offset within the structure can change (e.g. if other
> fields are added, removed, moved around etc.).
> 
> On the other hand, I realized I could have used the PTR_ALIGN macro to
> calculate the aligned address. That would probably make the code less
> ugly and more intuitive. So I would at least resubmit the patch using
> PTR_ALIGN instead of ALIGN and explicit casts - if you agree to the
> current solution, of course.
> 
> Thanks,
> Radu
> 
> 

For reference, you can do it like this:

#include <stdlib.h>
#include <stdio.h>


struct buffer_type { char b[3]; };
struct ubuffer_type { char b[3]; } __attribute__((aligned(16)));

struct s1 {
        char foo;
        struct buffer_type bar;
};

struct s2 {
        char foo;
        struct ubuffer_type bar;
};

int main(int argc, char **argv)
{
        struct s1 *p1;
        struct s2 *p2;

        p1 = malloc(sizeof(struct s1));
        p2 = malloc(sizeof(struct s2));

        printf("addr of p1->bar = %p\n", &p1->bar);
        printf("addr of p2->bar = %p\n", &p2->bar);

        free(p1);
        free(p2);

        return 0;
}



Though as I look at that, you're probably right, using PTR_ALIGN would probably
be prettier.  I'd be ok with that approach

Best
Neil
Radu Rendec Jan. 4, 2018, 5:01 p.m. | #6
On Thu, 2018-01-04 at 11:22 -0500, Neil Horman wrote:
> On Thu, Jan 04, 2018 at 03:42:09PM +0000, Radu Rendec wrote:
> > I believe the aligned() attribute cannot be used here because the whole
> > ismt_priv structure is dynamically allocated (the allocation is done at
> > the beginning of the ismt_probe() function).
> > 
> > Even with a statically allocated structure, aligning the whole
> > structure does not guarantee the alignment of the dma_buffer field,
> > because the field offset within the structure can change (e.g. if other
> > fields are added, removed, moved around etc.).
> 
> For reference, you can do it like this:
> 
> #include <stdlib.h>
> #include <stdio.h>
> 
> 
> struct buffer_type { char b[3]; };
> struct ubuffer_type { char b[3]; } __attribute__((aligned(16)));
> 
> struct s1 {
>         char foo;
>         struct buffer_type bar;
> };
> 
> struct s2 {
>         char foo;
>         struct ubuffer_type bar;
> };
> 
> int main(int argc, char **argv)
> {
>         struct s1 *p1;
>         struct s2 *p2;
> 
>         p1 = malloc(sizeof(struct s1));
>         p2 = malloc(sizeof(struct s2));
> 
>         printf("addr of p1->bar = %p\n", &p1->bar);
>         printf("addr of p2->bar = %p\n", &p2->bar);
> 
>         free(p1);
>         free(p2);
> 
>         return 0;
> }

Hmm... I just tried this on my workstation (x86, 32bit):

[rrendec@rrendec ~]$ gcc -o align align.c
[rrendec@rrendec ~]$ ./align 
addr of p1->bar = 0x9358009
addr of p2->bar = 0x9358028
[rrendec@rrendec ~]$ ./align 
addr of p1->bar = 0x856b009
addr of p2->bar = 0x856b028

The most significant 4 digits are always different (probably due to
address randomization), but the least significant 3 are consistently
0x009 and 0x028, neither of which is 16 byte aligned.

But I think this is normal, because malloc() gets just a size parameter
and has no knowledge of the intended data type of the memory area that
it allocates. It's probably the same with kmalloc() and friends.

> Though as I look at that, you're probably right, using PTR_ALIGN would probably
> be prettier.  I'd be ok with that approach

That's great. I'll prepare an updated patch with PTR_ALIGN and post it
shortly.

Thanks,
Radu
Neil Horman Jan. 5, 2018, 1:47 a.m. | #7
On Thu, Jan 04, 2018 at 05:01:16PM +0000, Radu Rendec wrote:
> On Thu, 2018-01-04 at 11:22 -0500, Neil Horman wrote:
> > On Thu, Jan 04, 2018 at 03:42:09PM +0000, Radu Rendec wrote:
> > > I believe the aligned() attribute cannot be used here because the whole
> > > ismt_priv structure is dynamically allocated (the allocation is done at
> > > the beginning of the ismt_probe() function).
> > > 
> > > Even with a statically allocated structure, aligning the whole
> > > structure does not guarantee the alignment of the dma_buffer field,
> > > because the field offset within the structure can change (e.g. if other
> > > fields are added, removed, moved around etc.).
> > 
> > For reference, you can do it like this:
> > 
> > #include <stdlib.h>
> > #include <stdio.h>
> > 
> > 
> > struct buffer_type { char b[3]; };
> > struct ubuffer_type { char b[3]; } __attribute__((aligned(16)));
> > 
> > struct s1 {
> >         char foo;
> >         struct buffer_type bar;
> > };
> > 
> > struct s2 {
> >         char foo;
> >         struct ubuffer_type bar;
> > };
> > 
> > int main(int argc, char **argv)
> > {
> >         struct s1 *p1;
> >         struct s2 *p2;
> > 
> >         p1 = malloc(sizeof(struct s1));
> >         p2 = malloc(sizeof(struct s2));
> > 
> >         printf("addr of p1->bar = %p\n", &p1->bar);
> >         printf("addr of p2->bar = %p\n", &p2->bar);
> > 
> >         free(p1);
> >         free(p2);
> > 
> >         return 0;
> > }
> 
> Hmm... I just tried this on my workstation (x86, 32bit):
> 
> [rrendec@rrendec ~]$ gcc -o align align.c
> [rrendec@rrendec ~]$ ./align 
> addr of p1->bar = 0x9358009
> addr of p2->bar = 0x9358028
> [rrendec@rrendec ~]$ ./align 
> addr of p1->bar = 0x856b009
> addr of p2->bar = 0x856b028
> 
> The most significant 4 digits are always different (probably due to
> address randomization), but the least significant 3 are consistently
> 0x009 and 0x028, neither of which is 16 byte aligned.
> 
> But I think this is normal, because malloc() gets just a size parameter
> and has no knowledge of the intended data type of the memory area that
> it allocates. It's probably the same with kmalloc() and friends.
> 
Yeah, I was under the impression that malloc provided memory in blocks aligned
to a maximum arch specific alignment, and then gcc could just pad the struct
size to guarantee a more granular alignment, but on doing some more reading,
alignment support only applies to bss and data storage, at least for the gcc
linker.  PTR_ALIGN is the way to go here.

> > Though as I look at that, you're probably right, using PTR_ALIGN would probably
> > be prettier.  I'd be ok with that approach
> 
> That's great. I'll prepare an updated patch with PTR_ALIGN and post it
> shortly.
> 
Thanks!
Neil

> Thanks,
> Radu
> 
>
Radu Rendec Jan. 15, 2018, 12:33 p.m. | #8
On Thu, 2018-01-04 at 20:47 -0500, Neil Horman wrote:
> On Thu, Jan 04, 2018 at 05:01:16PM +0000, Radu Rendec wrote:
> > On Thu, 2018-01-04 at 11:22 -0500, Neil Horman wrote:
> > > On Thu, Jan 04, 2018 at 03:42:09PM +0000, Radu Rendec wrote:
> > > > I believe the aligned() attribute cannot be used here because the whole
> > > > ismt_priv structure is dynamically allocated (the allocation is done at
> > > > the beginning of the ismt_probe() function).
> > > > 
> > > > Even with a statically allocated structure, aligning the whole
> > > > structure does not guarantee the alignment of the dma_buffer field,
> > > > because the field offset within the structure can change (e.g. if other
> > > > fields are added, removed, moved around etc.).
> > > 
> > > For reference, you can do it like this:
> > > 
> > > #include <stdlib.h>
> > > #include <stdio.h>
> > > 
> > > 
> > > struct buffer_type { char b[3]; };
> > > struct ubuffer_type { char b[3]; } __attribute__((aligned(16)));
> > > 
> > > struct s1 {
> > >         char foo;
> > >         struct buffer_type bar;
> > > };
> > > 
> > > struct s2 {
> > >         char foo;
> > >         struct ubuffer_type bar;
> > > };
> > > 
> > > int main(int argc, char **argv)
> > > {
> > >         struct s1 *p1;
> > >         struct s2 *p2;
> > > 
> > >         p1 = malloc(sizeof(struct s1));
> > >         p2 = malloc(sizeof(struct s2));
> > > 
> > >         printf("addr of p1->bar = %p\n", &p1->bar);
> > >         printf("addr of p2->bar = %p\n", &p2->bar);
> > > 
> > >         free(p1);
> > >         free(p2);
> > > 
> > >         return 0;
> > > }
> > 
> > Hmm... I just tried this on my workstation (x86, 32bit):
> > 
> > [rrendec@rrendec ~]$ gcc -o align align.c
> > [rrendec@rrendec ~]$ ./align 
> > addr of p1->bar = 0x9358009
> > addr of p2->bar = 0x9358028
> > [rrendec@rrendec ~]$ ./align 
> > addr of p1->bar = 0x856b009
> > addr of p2->bar = 0x856b028
> > 
> > The most significant 4 digits are always different (probably due to
> > address randomization), but the least significant 3 are consistently
> > 0x009 and 0x028, neither of which is 16 byte aligned.
> > 
> > But I think this is normal, because malloc() gets just a size parameter
> > and has no knowledge of the intended data type of the memory area that
> > it allocates. It's probably the same with kmalloc() and friends.
> > 
> 
> Yeah, I was under the impression that malloc provided memory in blocks aligned
> to a maximum arch specific alignment, and then gcc could just pad the struct
> size to guarantee a more granular alignment, but on doing some more reading,
> alignment support only applies to bss and data storage, at least for the gcc
> linker.  PTR_ALIGN is the way to go here.
> 
> > > Though as I look at that, you're probably right, using PTR_ALIGN would probably
> > > be prettier.  I'd be ok with that approach
> > 

Glad that we sorted this out.

> > That's great. I'll prepare an updated patch with PTR_ALIGN and post it
> > shortly.
> > 
> 
> Thanks!

I sent the updated patch with PTR_ALIGN on Jan/4. Have you had a chance
to look at it? Is there anything that can be further improved?

Thanks,
Radu
Neil Horman Jan. 15, 2018, 6:53 p.m. | #9
On Mon, Jan 15, 2018 at 12:33:20PM +0000, Radu Rendec wrote:
> On Thu, 2018-01-04 at 20:47 -0500, Neil Horman wrote:
> > On Thu, Jan 04, 2018 at 05:01:16PM +0000, Radu Rendec wrote:
> > > On Thu, 2018-01-04 at 11:22 -0500, Neil Horman wrote:
> > > > On Thu, Jan 04, 2018 at 03:42:09PM +0000, Radu Rendec wrote:
> > > > > I believe the aligned() attribute cannot be used here because the whole
> > > > > ismt_priv structure is dynamically allocated (the allocation is done at
> > > > > the beginning of the ismt_probe() function).
> > > > > 
> > > > > Even with a statically allocated structure, aligning the whole
> > > > > structure does not guarantee the alignment of the dma_buffer field,
> > > > > because the field offset within the structure can change (e.g. if other
> > > > > fields are added, removed, moved around etc.).
> > > > 
> > > > For reference, you can do it like this:
> > > > 
> > > > #include <stdlib.h>
> > > > #include <stdio.h>
> > > > 
> > > > 
> > > > struct buffer_type { char b[3]; };
> > > > struct ubuffer_type { char b[3]; } __attribute__((aligned(16)));
> > > > 
> > > > struct s1 {
> > > >         char foo;
> > > >         struct buffer_type bar;
> > > > };
> > > > 
> > > > struct s2 {
> > > >         char foo;
> > > >         struct ubuffer_type bar;
> > > > };
> > > > 
> > > > int main(int argc, char **argv)
> > > > {
> > > >         struct s1 *p1;
> > > >         struct s2 *p2;
> > > > 
> > > >         p1 = malloc(sizeof(struct s1));
> > > >         p2 = malloc(sizeof(struct s2));
> > > > 
> > > >         printf("addr of p1->bar = %p\n", &p1->bar);
> > > >         printf("addr of p2->bar = %p\n", &p2->bar);
> > > > 
> > > >         free(p1);
> > > >         free(p2);
> > > > 
> > > >         return 0;
> > > > }
> > > 
> > > Hmm... I just tried this on my workstation (x86, 32bit):
> > > 
> > > [rrendec@rrendec ~]$ gcc -o align align.c
> > > [rrendec@rrendec ~]$ ./align 
> > > addr of p1->bar = 0x9358009
> > > addr of p2->bar = 0x9358028
> > > [rrendec@rrendec ~]$ ./align 
> > > addr of p1->bar = 0x856b009
> > > addr of p2->bar = 0x856b028
> > > 
> > > The most significant 4 digits are always different (probably due to
> > > address randomization), but the least significant 3 are consistently
> > > 0x009 and 0x028, neither of which is 16 byte aligned.
> > > 
> > > But I think this is normal, because malloc() gets just a size parameter
> > > and has no knowledge of the intended data type of the memory area that
> > > it allocates. It's probably the same with kmalloc() and friends.
> > > 
> > 
> > Yeah, I was under the impression that malloc provided memory in blocks aligned
> > to a maximum arch specific alignment, and then gcc could just pad the struct
> > size to guarantee a more granular alignment, but on doing some more reading,
> > alignment support only applies to bss and data storage, at least for the gcc
> > linker.  PTR_ALIGN is the way to go here.
> > 
> > > > Though as I look at that, you're probably right, using PTR_ALIGN would probably
> > > > be prettier.  I'd be ok with that approach
> > > 
> 
> Glad that we sorted this out.
> 
> > > That's great. I'll prepare an updated patch with PTR_ALIGN and post it
> > > shortly.
> > > 
> > 
> > Thanks!
> 
> I sent the updated patch with PTR_ALIGN on Jan/4. Have you had a chance
> to look at it? Is there anything that can be further improved?
> 
hmm, I didn't see it, and I can't seem to find it on the i2c list archives here:
https://marc.info/?l=linux-i2c&r=1&b=201801&w=2

Can you point it out or resend it please?

Best
Neil

> Thanks,
> Radu
> 
>
Wolfram Sang Jan. 15, 2018, 7:02 p.m. | #10
> > I sent the updated patch with PTR_ALIGN on Jan/4. Have you had a chance
> > to look at it? Is there anything that can be further improved?
> > 
> hmm, I didn't see it, and I can't seem to find it on the i2c list archives here:
> https://marc.info/?l=linux-i2c&r=1&b=201801&w=2
> 
> Can you point it out or resend it please?

http://patchwork.ozlabs.org/patch/855763/

You can also download it as mbox from there.
Neil Horman Jan. 15, 2018, 7:53 p.m. | #11
On Mon, Jan 15, 2018 at 08:02:26PM +0100, Wolfram Sang wrote:
> 
> > > I sent the updated patch with PTR_ALIGN on Jan/4. Have you had a chance
> > > to look at it? Is there anything that can be further improved?
> > > 
> > hmm, I didn't see it, and I can't seem to find it on the i2c list archives here:
> > https://marc.info/?l=linux-i2c&r=1&b=201801&w=2
> > 
> > Can you point it out or resend it please?
> 
> http://patchwork.ozlabs.org/patch/855763/
> 
> You can also download it as mbox from there.
> 

Thanks, No need, its got the changes we agreed on.  

Acked-by: Neil Horman <nhorman@tuxdriver.com>

Patch

diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index e98e44e..ccce0ca 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -172,7 +172,7 @@  struct ismt_priv {
 	dma_addr_t io_rng_dma;			/* descriptor HW base addr */
 	u8 head;				/* ring buffer head pointer */
 	struct completion cmp;			/* interrupt completion */
-	u8 dma_buffer[I2C_SMBUS_BLOCK_MAX + 1];	/* temp R/W data buffer */
+	u8 buffer[I2C_SMBUS_BLOCK_MAX + 16];	/* temp R/W data buffer */
 };
 
 /**
@@ -320,7 +320,7 @@  static int ismt_process_desc(const struct ismt_desc *desc,
 			     struct ismt_priv *priv, int size,
 			     char read_write)
 {
-	u8 *dma_buffer = priv->dma_buffer;
+	u8 *dma_buffer = (u8*)ALIGN((unsigned long)&priv->buffer[0], 16);
 
 	dev_dbg(&priv->pci_dev->dev, "Processing completed descriptor\n");
 	__ismt_desc_dump(&priv->pci_dev->dev, desc);
@@ -388,11 +388,12 @@  static int ismt_access(struct i2c_adapter *adap, u16 addr,
 	struct ismt_desc *desc;
 	struct ismt_priv *priv = i2c_get_adapdata(adap);
 	struct device *dev = &priv->pci_dev->dev;
+	u8 *dma_buffer = (u8*)ALIGN((unsigned long)&priv->buffer[0], 16);
 
 	desc = &priv->hw[priv->head];
 
 	/* Initialize the DMA buffer */
-	memset(priv->dma_buffer, 0, sizeof(priv->dma_buffer));
+	memset(priv->buffer, 0, sizeof(priv->buffer));
 
 	/* Initialize the descriptor */
 	memset(desc, 0, sizeof(struct ismt_desc));
@@ -441,8 +442,8 @@  static int ismt_access(struct i2c_adapter *adap, u16 addr,
 			desc->wr_len_cmd = 2;
 			dma_size = 2;
 			dma_direction = DMA_TO_DEVICE;
-			priv->dma_buffer[0] = command;
-			priv->dma_buffer[1] = data->byte;
+			dma_buffer[0] = command;
+			dma_buffer[1] = data->byte;
 		} else {
 			/* Read Byte */
 			dev_dbg(dev, "I2C_SMBUS_BYTE_DATA:  READ\n");
@@ -461,9 +462,9 @@  static int ismt_access(struct i2c_adapter *adap, u16 addr,
 			desc->wr_len_cmd = 3;
 			dma_size = 3;
 			dma_direction = DMA_TO_DEVICE;
-			priv->dma_buffer[0] = command;
-			priv->dma_buffer[1] = data->word & 0xff;
-			priv->dma_buffer[2] = data->word >> 8;
+			dma_buffer[0] = command;
+			dma_buffer[1] = data->word & 0xff;
+			dma_buffer[2] = data->word >> 8;
 		} else {
 			/* Read Word */
 			dev_dbg(dev, "I2C_SMBUS_WORD_DATA:  READ\n");
@@ -481,9 +482,9 @@  static int ismt_access(struct i2c_adapter *adap, u16 addr,
 		desc->rd_len = 2;
 		dma_size = 3;
 		dma_direction = DMA_BIDIRECTIONAL;
-		priv->dma_buffer[0] = command;
-		priv->dma_buffer[1] = data->word & 0xff;
-		priv->dma_buffer[2] = data->word >> 8;
+		dma_buffer[0] = command;
+		dma_buffer[1] = data->word & 0xff;
+		dma_buffer[2] = data->word >> 8;
 		break;
 
 	case I2C_SMBUS_BLOCK_DATA:
@@ -494,8 +495,8 @@  static int ismt_access(struct i2c_adapter *adap, u16 addr,
 			dma_direction = DMA_TO_DEVICE;
 			desc->wr_len_cmd = dma_size;
 			desc->control |= ISMT_DESC_BLK;
-			priv->dma_buffer[0] = command;
-			memcpy(&priv->dma_buffer[1], &data->block[1], dma_size - 1);
+			dma_buffer[0] = command;
+			memcpy(&dma_buffer[1], &data->block[1], dma_size - 1);
 		} else {
 			/* Block Read */
 			dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA:  READ\n");
@@ -522,8 +523,8 @@  static int ismt_access(struct i2c_adapter *adap, u16 addr,
 			dma_direction = DMA_TO_DEVICE;
 			desc->wr_len_cmd = dma_size;
 			desc->control |= ISMT_DESC_I2C;
-			priv->dma_buffer[0] = command;
-			memcpy(&priv->dma_buffer[1], &data->block[1], dma_size - 1);
+			dma_buffer[0] = command;
+			memcpy(&dma_buffer[1], &data->block[1], dma_size - 1);
 		} else {
 			/* i2c Block Read */
 			dev_dbg(dev, "I2C_SMBUS_I2C_BLOCK_DATA:  READ\n");
@@ -552,18 +553,18 @@  static int ismt_access(struct i2c_adapter *adap, u16 addr,
 	if (dma_size != 0) {
 		dev_dbg(dev, " dev=%p\n", dev);
 		dev_dbg(dev, " data=%p\n", data);
-		dev_dbg(dev, " dma_buffer=%p\n", priv->dma_buffer);
+		dev_dbg(dev, " dma_buffer=%p\n", dma_buffer);
 		dev_dbg(dev, " dma_size=%d\n", dma_size);
 		dev_dbg(dev, " dma_direction=%d\n", dma_direction);
 
 		dma_addr = dma_map_single(dev,
-				      priv->dma_buffer,
+				      dma_buffer,
 				      dma_size,
 				      dma_direction);
 
 		if (dma_mapping_error(dev, dma_addr)) {
 			dev_err(dev, "Error in mapping dma buffer %p\n",
-				priv->dma_buffer);
+				dma_buffer);
 			return -EIO;
 		}