diff mbox

[2/2] drivers/vfio: Support EEH error injection

Message ID 1426055651-22925-2-git-send-email-gwshan@linux.vnet.ibm.com (mailing list archive)
State Superseded
Headers show

Commit Message

Gavin Shan March 11, 2015, 6:34 a.m. UTC
The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
---
 Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
 drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
 include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
 3 files changed, 80 insertions(+), 15 deletions(-)

Comments

David Gibson March 12, 2015, 12:57 a.m. UTC | #1
On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
> to inject the specified EEH error, which is represented by
> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
>  3 files changed, 80 insertions(+), 15 deletions(-)
> 
> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
> index 96978ec..2e7f736 100644
> --- a/Documentation/vfio.txt
> +++ b/Documentation/vfio.txt
> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
>  
>  The code flow from the example above should be slightly changed:
>  
> -	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
> +	struct vfio_eeh_pe_op *pe_op;
> +	struct vfio_eeh_pe_err *pe_err;
> +
> +	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
> +	pe_err = (void *)pe_op + sizeof(*pe_op);
> +	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);

Surely that argsz can't be correct for most of the operations.  The
extended structure should only be there for the error inject ioctl,
yes?
Gavin Shan March 12, 2015, 3:16 a.m. UTC | #2
On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
>On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
>> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> to inject the specified EEH error, which is represented by
>> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
>>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
>>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
>>  3 files changed, 80 insertions(+), 15 deletions(-)
>> 
>> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> index 96978ec..2e7f736 100644
>> --- a/Documentation/vfio.txt
>> +++ b/Documentation/vfio.txt
>> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
>>  
>>  The code flow from the example above should be slightly changed:
>>  
>> -	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
>> +	struct vfio_eeh_pe_op *pe_op;
>> +	struct vfio_eeh_pe_err *pe_err;
>> +
>> +	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
>> +	pe_err = (void *)pe_op + sizeof(*pe_op);
>> +	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>
>Surely that argsz can't be correct for most of the operations.  The
>extended structure should only be there for the error inject ioctl,
>yes?
>

argsz isn't appropriate for most cases because kernel has the check
"expected_argsz < passed_argsz", not "expected_argsz == passed_argsz".
However, I'll fix it as follows to avoid confusion after collecting
more comments:

	struct vfio_eeh_pe_op *pe_op;
	struct vfio_eeh_pe_err *pe_err;

	/* For all cases except error injection */
	pe_op = malloc(sizeof(*pe_op));
	pe_op->argsz = sizeof(*pe_op);

	/* For error injection case here */
	pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
	pe_err = (void *)pe_op + sizeof(*pe_op);

Thanks,
Gavin

>-- 
>David Gibson			| I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
>				| _way_ _around_!
>http://www.ozlabs.org/~dgibson
David Gibson March 12, 2015, 4:21 a.m. UTC | #3
On Thu, Mar 12, 2015 at 02:16:42PM +1100, Gavin Shan wrote:
> On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
> >On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
> >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
> >> to inject the specified EEH error, which is represented by
> >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
> >> 
> >> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> >> ---
> >>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
> >>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
> >>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
> >>  3 files changed, 80 insertions(+), 15 deletions(-)
> >> 
> >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
> >> index 96978ec..2e7f736 100644
> >> --- a/Documentation/vfio.txt
> >> +++ b/Documentation/vfio.txt
> >> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
> >>  
> >>  The code flow from the example above should be slightly changed:
> >>  
> >> -	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
> >> +	struct vfio_eeh_pe_op *pe_op;
> >> +	struct vfio_eeh_pe_err *pe_err;
> >> +
> >> +	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
> >> +	pe_err = (void *)pe_op + sizeof(*pe_op);
> >> +	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
> >
> >Surely that argsz can't be correct for most of the operations.  The
> >extended structure should only be there for the error inject ioctl,
> >yes?
> >
> 
> argsz isn't appropriate for most cases because kernel has the check
> "expected_argsz < passed_argsz", not "expected_argsz ==
> passed_argsz".

It works for now, but if any of those calls was extended with more
data, it would break horribly.  By setting the argsz greater than
necessary, you're effectively passing uninitialized data to the
ioctl().  At the moment, the ioctl() ignores it, but the whole point
of the argsz value is that in the future, it might not.

> However, I'll fix it as follows to avoid confusion after collecting
> more comments:
> 
> 	struct vfio_eeh_pe_op *pe_op;
> 	struct vfio_eeh_pe_err *pe_err;
> 
> 	/* For all cases except error injection */
> 	pe_op = malloc(sizeof(*pe_op));
> 	pe_op->argsz = sizeof(*pe_op);
> 
> 	/* For error injection case here */
> 	pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
> 	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
> 	pe_err = (void *)pe_op + sizeof(*pe_op);
> 
> Thanks,
> Gavin
> 
> 
>
Gavin Shan March 12, 2015, 5:01 a.m. UTC | #4
On Thu, Mar 12, 2015 at 03:21:29PM +1100, David Gibson wrote:
>On Thu, Mar 12, 2015 at 02:16:42PM +1100, Gavin Shan wrote:
>> On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
>> >On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
>> >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> >> to inject the specified EEH error, which is represented by
>> >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> >> 
>> >> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> >> ---
>> >>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
>> >>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
>> >>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
>> >>  3 files changed, 80 insertions(+), 15 deletions(-)
>> >> 
>> >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> >> index 96978ec..2e7f736 100644
>> >> --- a/Documentation/vfio.txt
>> >> +++ b/Documentation/vfio.txt
>> >> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
>> >>  
>> >>  The code flow from the example above should be slightly changed:
>> >>  
>> >> -	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
>> >> +	struct vfio_eeh_pe_op *pe_op;
>> >> +	struct vfio_eeh_pe_err *pe_err;
>> >> +
>> >> +	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
>> >> +	pe_err = (void *)pe_op + sizeof(*pe_op);
>> >> +	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>> >
>> >Surely that argsz can't be correct for most of the operations.  The
>> >extended structure should only be there for the error inject ioctl,
>> >yes?
>> >
>> 
>> argsz isn't appropriate for most cases because kernel has the check
>> "expected_argsz < passed_argsz", not "expected_argsz ==
>> passed_argsz".
>
>It works for now, but if any of those calls was extended with more
>data, it would break horribly.  By setting the argsz greater than
>necessary, you're effectively passing uninitialized data to the
>ioctl().  At the moment, the ioctl() ignores it, but the whole point
>of the argsz value is that in the future, it might not.
>

Thank you for more explanation. I agree that it's worthy to pass precise
argument size. I'll fix it as below in next revision:

>> However, I'll fix it as follows to avoid confusion after collecting
>> more comments:
>> 
>> 	struct vfio_eeh_pe_op *pe_op;
>> 	struct vfio_eeh_pe_err *pe_err;
>> 
>> 	/* For all cases except error injection */
>> 	pe_op = malloc(sizeof(*pe_op));
>> 	pe_op->argsz = sizeof(*pe_op);
>> 
>> 	/* For error injection case here */
>> 	pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
>> 	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>> 	pe_err = (void *)pe_op + sizeof(*pe_op);
>> 

Thanks,
Gavin

>
>-- 
>David Gibson			| I'll have my music baroque, and my code
>david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
>				| _way_ _around_!
>http://www.ozlabs.org/~dgibson
Alex Williamson March 13, 2015, 8:28 p.m. UTC | #5
On Thu, 2015-03-12 at 15:21 +1100, David Gibson wrote:
> On Thu, Mar 12, 2015 at 02:16:42PM +1100, Gavin Shan wrote:
> > On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
> > >On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
> > >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
> > >> to inject the specified EEH error, which is represented by
> > >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
> > >> 
> > >> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> > >> ---
> > >>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
> > >>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
> > >>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
> > >>  3 files changed, 80 insertions(+), 15 deletions(-)
> > >> 
> > >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
> > >> index 96978ec..2e7f736 100644
> > >> --- a/Documentation/vfio.txt
> > >> +++ b/Documentation/vfio.txt
> > >> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
> > >>  
> > >>  The code flow from the example above should be slightly changed:
> > >>  
> > >> -	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
> > >> +	struct vfio_eeh_pe_op *pe_op;
> > >> +	struct vfio_eeh_pe_err *pe_err;
> > >> +
> > >> +	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
> > >> +	pe_err = (void *)pe_op + sizeof(*pe_op);
> > >> +	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
> > >
> > >Surely that argsz can't be correct for most of the operations.  The
> > >extended structure should only be there for the error inject ioctl,
> > >yes?
> > >
> > 
> > argsz isn't appropriate for most cases because kernel has the check
> > "expected_argsz < passed_argsz", not "expected_argsz ==
> > passed_argsz".
> 
> It works for now, but if any of those calls was extended with more
> data, it would break horribly.  By setting the argsz greater than
> necessary, you're effectively passing uninitialized data to the
> ioctl().  At the moment, the ioctl() ignores it, but the whole point
> of the argsz value is that in the future, it might not.

argsz tells us how much data the user is passing, we're always going to
need to figure out what the extra data is, so I don't really see the
point of this objection.  In fact, it might make use of this interface
quite a bit easier if vfio_eeh_pe_op ended with a union including
vfio_eeh_pe_err.  op == VFIO_EEH_PE_INJECT_ERR defines that the user has
passed vfio_eeh_pe_err in the union, other ops may add new unions later.
Thanks,

Alex

> > However, I'll fix it as follows to avoid confusion after collecting
> > more comments:
> > 
> > 	struct vfio_eeh_pe_op *pe_op;
> > 	struct vfio_eeh_pe_err *pe_err;
> > 
> > 	/* For all cases except error injection */
> > 	pe_op = malloc(sizeof(*pe_op));
> > 	pe_op->argsz = sizeof(*pe_op);
> > 
> > 	/* For error injection case here */
> > 	pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
> > 	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
> > 	pe_err = (void *)pe_op + sizeof(*pe_op);
> > 
> > Thanks,
> > Gavin
> > 
> > 
> > 
>
Alex Williamson March 13, 2015, 8:35 p.m. UTC | #6
On Wed, 2015-03-11 at 17:34 +1100, Gavin Shan wrote:
> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
> to inject the specified EEH error, which is represented by
> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
> 
> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> ---
>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
>  3 files changed, 80 insertions(+), 15 deletions(-)
> 
> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
[snip]
> @@ -490,6 +499,29 @@ struct vfio_eeh_pe_op {
>  #define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
>  #define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
> +#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
> +#define  VFIO_EEH_ERR_TYPE_32		0	/* 32-bits EEH error type    */
> +#define  VFIO_EEH_ERR_TYPE_64		1	/* 64-bits EEH error type    */
> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR		0	/* Memory load  */
> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA		1
> +#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR		2	/* IO load      */
> +#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA		3
> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR		4	/* Config load  */
> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA		5
> +#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR		6	/* Memory store */
> +#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA		7
> +#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR		8	/* IO store     */
> +#define  VFIO_EEH_ERR_FUNC_ST_IO_DATA		9
> +#define  VFIO_EEH_ERR_FUNC_ST_CFG_ADDR		10	/* Config store */
> +#define  VFIO_EEH_ERR_FUNC_ST_CFG_DATA		11
> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_ADDR		12	/* DMA read     */
> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_DATA		13
> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_MASTER	14
> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_TARGET	15
> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_ADDR		16	/* DMA write    */
> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_DATA		17
> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_MASTER	18
> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_TARGET	19

This data duplication from patch 1/2 is kind of concerning.  In one case
we're adding to arch/powerpc/include/asm/eeh.h, which is a kernel
internal interface and entirely changeable, in the other we're matching
those current definitions in uapi, which needs to be stable.  Are these
indexes part of a spec that we can rely on them being stable or do we
need some sort of translation layer to go from the vfio uapi defined
value to the kernel internal version?  Thanks,

Alex
Gavin Shan March 15, 2015, 10:49 p.m. UTC | #7
On Fri, Mar 13, 2015 at 02:28:09PM -0600, Alex Williamson wrote:
>On Thu, 2015-03-12 at 15:21 +1100, David Gibson wrote:
>> On Thu, Mar 12, 2015 at 02:16:42PM +1100, Gavin Shan wrote:
>> > On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
>> > >On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
>> > >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> > >> to inject the specified EEH error, which is represented by
>> > >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> > >> 
>> > >> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> > >> ---
>> > >>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
>> > >>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
>> > >>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
>> > >>  3 files changed, 80 insertions(+), 15 deletions(-)
>> > >> 
>> > >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
>> > >> index 96978ec..2e7f736 100644
>> > >> --- a/Documentation/vfio.txt
>> > >> +++ b/Documentation/vfio.txt
>> > >> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
>> > >>  
>> > >>  The code flow from the example above should be slightly changed:
>> > >>  
>> > >> -	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
>> > >> +	struct vfio_eeh_pe_op *pe_op;
>> > >> +	struct vfio_eeh_pe_err *pe_err;
>> > >> +
>> > >> +	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
>> > >> +	pe_err = (void *)pe_op + sizeof(*pe_op);
>> > >> +	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>> > >
>> > >Surely that argsz can't be correct for most of the operations.  The
>> > >extended structure should only be there for the error inject ioctl,
>> > >yes?
>> > >
>> > 
>> > argsz isn't appropriate for most cases because kernel has the check
>> > "expected_argsz < passed_argsz", not "expected_argsz ==
>> > passed_argsz".
>> 
>> It works for now, but if any of those calls was extended with more
>> data, it would break horribly.  By setting the argsz greater than
>> necessary, you're effectively passing uninitialized data to the
>> ioctl().  At the moment, the ioctl() ignores it, but the whole point
>> of the argsz value is that in the future, it might not.
>
>argsz tells us how much data the user is passing, we're always going to
>need to figure out what the extra data is, so I don't really see the
>point of this objection.  In fact, it might make use of this interface
>quite a bit easier if vfio_eeh_pe_op ended with a union including
>vfio_eeh_pe_err.  op == VFIO_EEH_PE_INJECT_ERR defines that the user has
>passed vfio_eeh_pe_err in the union, other ops may add new unions later.
>Thanks,
>

Ok. I'll have following data struct in next revision:

struct vfio_eeh_pe_err {
	__u32 type;
	__u32 func;
	__u64 addr;
	__u64 mask;
};

struct vfio_eeh_pe_op {
	__u32 argsz;
        __u32 flags;
        __u32 op;
	union {
		struct vfio_eeh_pe_err err;
	};
};

Thanks,
Gavin

>Alex
>
>> > However, I'll fix it as follows to avoid confusion after collecting
>> > more comments:
>> > 
>> > 	struct vfio_eeh_pe_op *pe_op;
>> > 	struct vfio_eeh_pe_err *pe_err;
>> > 
>> > 	/* For all cases except error injection */
>> > 	pe_op = malloc(sizeof(*pe_op));
>> > 	pe_op->argsz = sizeof(*pe_op);
>> > 
>> > 	/* For error injection case here */
>> > 	pe_op = realloc(sizeof(*pe_op) + sizeof(*pe_err));
>> > 	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
>> > 	pe_err = (void *)pe_op + sizeof(*pe_op);
>> > 
>> > Thanks,
>> > Gavin
>> > 
>> > 
>> > 
>> 
>
>
>
Gavin Shan March 15, 2015, 10:55 p.m. UTC | #8
On Fri, Mar 13, 2015 at 02:35:18PM -0600, Alex Williamson wrote:
>On Wed, 2015-03-11 at 17:34 +1100, Gavin Shan wrote:
>> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
>> to inject the specified EEH error, which is represented by
>> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
>> 
>> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
>> ---
>>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
>>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
>>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
>>  3 files changed, 80 insertions(+), 15 deletions(-)
>> 
>> diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
>[snip]
>> @@ -490,6 +499,29 @@ struct vfio_eeh_pe_op {
>>  #define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
>>  #define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
>>  #define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
>> +#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
>> +#define  VFIO_EEH_ERR_TYPE_32		0	/* 32-bits EEH error type    */
>> +#define  VFIO_EEH_ERR_TYPE_64		1	/* 64-bits EEH error type    */
>> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR		0	/* Memory load  */
>> +#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA		1
>> +#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR		2	/* IO load      */
>> +#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA		3
>> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR		4	/* Config load  */
>> +#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA		5
>> +#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR		6	/* Memory store */
>> +#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA		7
>> +#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR		8	/* IO store     */
>> +#define  VFIO_EEH_ERR_FUNC_ST_IO_DATA		9
>> +#define  VFIO_EEH_ERR_FUNC_ST_CFG_ADDR		10	/* Config store */
>> +#define  VFIO_EEH_ERR_FUNC_ST_CFG_DATA		11
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_ADDR		12	/* DMA read     */
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_DATA		13
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_MASTER	14
>> +#define  VFIO_EEH_ERR_FUNC_DMA_RD_TARGET	15
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_ADDR		16	/* DMA write    */
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_DATA		17
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_MASTER	18
>> +#define  VFIO_EEH_ERR_FUNC_DMA_WR_TARGET	19
>
>This data duplication from patch 1/2 is kind of concerning.  In one case
>we're adding to arch/powerpc/include/asm/eeh.h, which is a kernel
>internal interface and entirely changeable, in the other we're matching
>those current definitions in uapi, which needs to be stable.  Are these
>indexes part of a spec that we can rely on them being stable or do we
>need some sort of translation layer to go from the vfio uapi defined
>value to the kernel internal version?  Thanks,
>

All those constants are defined by PAPR specification, and those constants
defined here or by PATCH[1/2] aren't expected to be changed.

Thanks,
Gavin

>Alex
>
>
David Gibson March 16, 2015, 1:01 a.m. UTC | #9
On Fri, Mar 13, 2015 at 02:28:09PM -0600, Alex Williamson wrote:
> On Thu, 2015-03-12 at 15:21 +1100, David Gibson wrote:
> > On Thu, Mar 12, 2015 at 02:16:42PM +1100, Gavin Shan wrote:
> > > On Thu, Mar 12, 2015 at 11:57:21AM +1100, David Gibson wrote:
> > > >On Wed, Mar 11, 2015 at 05:34:11PM +1100, Gavin Shan wrote:
> > > >> The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
> > > >> to inject the specified EEH error, which is represented by
> > > >> (struct vfio_eeh_pe_err), to the indicated PE for testing purpose.
> > > >> 
> > > >> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
> > > >> ---
> > > >>  Documentation/vfio.txt        | 47 ++++++++++++++++++++++++++++++-------------
> > > >>  drivers/vfio/vfio_spapr_eeh.c | 14 +++++++++++++
> > > >>  include/uapi/linux/vfio.h     | 34 ++++++++++++++++++++++++++++++-
> > > >>  3 files changed, 80 insertions(+), 15 deletions(-)
> > > >> 
> > > >> diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
> > > >> index 96978ec..2e7f736 100644
> > > >> --- a/Documentation/vfio.txt
> > > >> +++ b/Documentation/vfio.txt
> > > >> @@ -328,7 +328,13 @@ So 4 additional ioctls have been added:
> > > >>  
> > > >>  The code flow from the example above should be slightly changed:
> > > >>  
> > > >> -	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
> > > >> +	struct vfio_eeh_pe_op *pe_op;
> > > >> +	struct vfio_eeh_pe_err *pe_err;
> > > >> +
> > > >> +	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
> > > >> +	pe_err = (void *)pe_op + sizeof(*pe_op);
> > > >> +	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
> > > >
> > > >Surely that argsz can't be correct for most of the operations.  The
> > > >extended structure should only be there for the error inject ioctl,
> > > >yes?
> > > >
> > > 
> > > argsz isn't appropriate for most cases because kernel has the check
> > > "expected_argsz < passed_argsz", not "expected_argsz ==
> > > passed_argsz".
> > 
> > It works for now, but if any of those calls was extended with more
> > data, it would break horribly.  By setting the argsz greater than
> > necessary, you're effectively passing uninitialized data to the
> > ioctl().  At the moment, the ioctl() ignores it, but the whole point
> > of the argsz value is that in the future, it might not.
> 
> argsz tells us how much data the user is passing, we're always going to
> need to figure out what the extra data is, so I don't really see the
> point of this objection.  In fact, it might make use of this interface
> quite a bit easier if vfio_eeh_pe_op ended with a union including
> vfio_eeh_pe_err.  op == VFIO_EEH_PE_INJECT_ERR defines that the user has
> passed vfio_eeh_pe_err in the union, other ops may add new unions later.

*thinks*

Yeah, I missed the fact that all these ioctls are using a common
arguments structure.  In which case, yes, absolutely the error field
should be added to that structure rather than bolted on afterwards.

A union is the obvious way to do that, but I think we should make sure
to include a padding field in that union as well, so we've got some
room to move if we both need to add other union options and add common
things after the union in future.
>
diff mbox

Patch

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978ec..2e7f736 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -328,7 +328,13 @@  So 4 additional ioctls have been added:
 
 The code flow from the example above should be slightly changed:
 
-	struct vfio_eeh_pe_op pe_op = { .argsz = sizeof(pe_op), .flags = 0 };
+	struct vfio_eeh_pe_op *pe_op;
+	struct vfio_eeh_pe_err *pe_err;
+
+	pe_op = malloc(sizeof(*pe_op) + sizeof(*pe_err));
+	pe_err = (void *)pe_op + sizeof(*pe_op);
+	pe_op->argsz = sizeof(*pe_op) + sizeof(*pe_err);
+	pe_op->flags = 0;
 
 	.....
 	/* Add the group to the container */
@@ -367,8 +373,8 @@  The code flow from the example above should be slightly changed:
 	ioctl(container, VFIO_CHECK_EXTENSION, VFIO_EEH);
 
 	/* Enable the EEH functionality on the device */
-	pe_op.op = VFIO_EEH_PE_ENABLE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op->op = VFIO_EEH_PE_ENABLE;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
 	/* You're suggested to create additional data struct to represent
 	 * PE, and put child devices belonging to same IOMMU group to the
@@ -376,8 +382,8 @@  The code flow from the example above should be slightly changed:
 	 */
 
 	/* Check the PE's state and make sure it's in functional state */
-	pe_op.op = VFIO_EEH_PE_GET_STATE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op->op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
 	/* Save device state using pci_save_state().
 	 * EEH should be enabled on the specified device.
@@ -385,11 +391,24 @@  The code flow from the example above should be slightly changed:
 
 	....
 
+	/* Inject EEH error, which is expected to be caused by 32-bits
+	 * config load.
+	 */
+	pe_err->type = VFIO_EEH_ERR_TYPE_32;
+	pe_err->func = VFIO_EEH_ERR_FUNC_LD_CFG_ADDR;
+	pe_err->addr = 0ul;
+	pe_err->mask = 0ul;
+	pe_op->op = VFIO_EEH_PE_INJECT_ERR;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
+
+	....
+
 	/* When 0xFF's returned from reading PCI config space or IO BARs
 	 * of the PCI device. Check the PE's state to see if that has been
 	 * frozen.
 	 */
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op->op = VFIO_EEH_PE_GET_STATE;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
 	/* Waiting for pending PCI transactions to be completed and don't
 	 * produce any more PCI traffic from/to the affected PE until
@@ -400,22 +419,22 @@  The code flow from the example above should be slightly changed:
 	 * standard part of PCI config space, AER registers are dumped
 	 * as logs for further analysis.
 	 */
-	pe_op.op = VFIO_EEH_PE_UNFREEZE_IO;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op->op = VFIO_EEH_PE_UNFREEZE_IO;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
 	/*
 	 * Issue PE reset: hot or fundamental reset. Usually, hot reset
 	 * is enough. However, the firmware of some PCI adapters would
 	 * require fundamental reset.
 	 */
-	pe_op.op = VFIO_EEH_PE_RESET_HOT;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
-	pe_op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op->op = VFIO_EEH_PE_RESET_HOT;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
+	pe_op->op = VFIO_EEH_PE_RESET_DEACTIVATE;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
 	/* Configure the PCI bridges for the affected PE */
-	pe_op.op = VFIO_EEH_PE_CONFIGURE;
-	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+	pe_op->op = VFIO_EEH_PE_CONFIGURE;
+	ioctl(container, VFIO_EEH_PE_OP, pe_op);
 
 	/* Restored state we saved at initialization time. pci_restore_state()
 	 * is good enough as an example.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db..4f1ebc1 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,20 @@  long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 		case VFIO_EEH_PE_CONFIGURE:
 			ret = eeh_pe_configure(pe);
 			break;
+		case VFIO_EEH_PE_INJECT_ERR: {
+			struct vfio_eeh_pe_err err;
+
+			if (op.argsz < minsz + sizeof(err))
+				return -EINVAL;
+			if (copy_from_user(&err,
+					   (void __user *)(arg + minsz),
+					   sizeof(err)))
+				return -EFAULT;
+
+			ret = eeh_pe_inject_err(pe, err.type, err.func,
+						err.addr, err.mask);
+			break;
+		}
 		default:
 			ret = -EINVAL;
 		}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c3..0b32422 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -468,12 +468,21 @@  struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+	__u32 type;
+	__u32 func;
+	__u64 addr;
+	__u64 mask;
+};
+
 struct vfio_eeh_pe_op {
 	__u32 argsz;
 	__u32 flags;
 	__u32 op;
+	__u8  data[];
 };
 
 #define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
@@ -490,6 +499,29 @@  struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
+#define  VFIO_EEH_ERR_TYPE_32		0	/* 32-bits EEH error type    */
+#define  VFIO_EEH_ERR_TYPE_64		1	/* 64-bits EEH error type    */
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_ADDR		0	/* Memory load  */
+#define  VFIO_EEH_ERR_FUNC_LD_MEM_DATA		1
+#define  VFIO_EEH_ERR_FUNC_LD_IO_ADDR		2	/* IO load      */
+#define  VFIO_EEH_ERR_FUNC_LD_IO_DATA		3
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_ADDR		4	/* Config load  */
+#define  VFIO_EEH_ERR_FUNC_LD_CFG_DATA		5
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_ADDR		6	/* Memory store */
+#define  VFIO_EEH_ERR_FUNC_ST_MEM_DATA		7
+#define  VFIO_EEH_ERR_FUNC_ST_IO_ADDR		8	/* IO store     */
+#define  VFIO_EEH_ERR_FUNC_ST_IO_DATA		9
+#define  VFIO_EEH_ERR_FUNC_ST_CFG_ADDR		10	/* Config store */
+#define  VFIO_EEH_ERR_FUNC_ST_CFG_DATA		11
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_ADDR		12	/* DMA read     */
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_DATA		13
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_MASTER	14
+#define  VFIO_EEH_ERR_FUNC_DMA_RD_TARGET	15
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_ADDR		16	/* DMA write    */
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_DATA		17
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_MASTER	18
+#define  VFIO_EEH_ERR_FUNC_DMA_WR_TARGET	19
 
 #define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)