diff mbox series

[4/9] s390x: refactor error handling for SSCH and RSCH

Message ID 20170830163609.50260-5-pasic@linux.vnet.ibm.com
State New
Headers show
Series | expand

Commit Message

Halil Pasic Aug. 30, 2017, 4:36 p.m. UTC
Simplify the error handling of the SSCH and RSCH handler avoiding
arbitrary and cryptic error codes being mapped to what a subchannel is
supposed to do.  Let the code detecting the condition tell how it's to be
handled in a less ambiguous way.  It's best to handle SSCH and RSCH in
one go as the emulation of the two shares a lot of code.

Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
Acked-by: Pierre Morel<pmorel@linux.vnet.ibm.com>

---
Notes:
Funny, we had a different swich-case for SSCH and RSCH. For
virtual it did not matter, but for passtrough it could. In practice
-EINVAL from the kernel would have been mapped to cc 2 in case of
RSCH and to cc 1 in case of SSHC which is absurd. Same goes for
-EBUSY from kernel which is correctly mapped to cc 2 in case of
SSCH, but for RSCH it gets mapped to cc 1 which is also absurd.
---
 hw/s390x/css.c              | 86 ++++++++++++++-------------------------------
 hw/s390x/s390-ccw.c         |  8 ++---
 hw/vfio/ccw.c               | 32 +++++++++++++----
 include/hw/s390x/css.h      | 30 ++++++++++++----
 include/hw/s390x/s390-ccw.h |  2 +-
 target/s390x/ioinst.c       | 61 +++++++++-----------------------
 6 files changed, 97 insertions(+), 122 deletions(-)

Comments

Cornelia Huck Aug. 31, 2017, 9:55 a.m. UTC | #1
On Wed, 30 Aug 2017 18:36:04 +0200
Halil Pasic <pasic@linux.vnet.ibm.com> wrote:

> Simplify the error handling of the SSCH and RSCH handler avoiding
> arbitrary and cryptic error codes being mapped to what a subchannel is
> supposed to do.  Let the code detecting the condition tell how it's to be
> handled in a less ambiguous way.  It's best to handle SSCH and RSCH in
> one go as the emulation of the two shares a lot of code.

So determining the return code at the point in time where we can
instead of trying to map to return codes and back again makes quite a
bit of sense, but I'll have to look at the rest of this. For one thing,
would a better split to introduce the cc-reporting infrastructure first
and then convert the different I/O functions?

> 
> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
> Acked-by: Pierre Morel<pmorel@linux.vnet.ibm.com>
> 
> ---
> Notes:
> Funny, we had a different swich-case for SSCH and RSCH. For
> virtual it did not matter, but for passtrough it could. In practice
> -EINVAL from the kernel would have been mapped to cc 2 in case of
> RSCH and to cc 1 in case of SSHC which is absurd. Same goes for
> -EBUSY from kernel which is correctly mapped to cc 2 in case of
> SSCH, but for RSCH it gets mapped to cc 1 which is also absurd.
> ---
>  hw/s390x/css.c              | 86 ++++++++++++++-------------------------------
>  hw/s390x/s390-ccw.c         |  8 ++---
>  hw/vfio/ccw.c               | 32 +++++++++++++----
>  include/hw/s390x/css.h      | 30 ++++++++++++----
>  include/hw/s390x/s390-ccw.h |  2 +-
>  target/s390x/ioinst.c       | 61 +++++++++-----------------------
>  6 files changed, 97 insertions(+), 122 deletions(-)
> 
> diff --git a/hw/s390x/css.c b/hw/s390x/css.c
> index bc47bf5b20..1102642c10 100644
> --- a/hw/s390x/css.c
> +++ b/hw/s390x/css.c
> @@ -1015,12 +1015,11 @@ static void sch_handle_start_func_virtual(SubchDev *sch)
>  
>  }
>  
> -static int sch_handle_start_func_passthrough(SubchDev *sch)
> +static void sch_handle_start_func_passthrough(SubchDev *sch)
>  {
>  
>      PMCW *p = &sch->curr_status.pmcw;
>      SCSW *s = &sch->curr_status.scsw;
> -    int ret;
>  
>      ORB *orb = &sch->orb;
>      if (!(s->ctrl & SCSW_ACTL_SUSP)) {
> @@ -1034,28 +1033,10 @@ static int sch_handle_start_func_passthrough(SubchDev *sch)
>       */
>      if (!(orb->ctrl0 & ORB_CTRL0_MASK_PFCH) ||
>          !(orb->ctrl0 & ORB_CTRL0_MASK_C64)) {
> -        return -ENODEV;
> +        sch->iret.cc = 3;

Same as already commented: I don't think cc 3 is a good match.

>      }
>  
> -    ret = s390_ccw_cmd_request(orb, s, sch->driver_data);
> -    switch (ret) {
> -    /* Currently we don't update control block and just return the cc code. */
> -    case 0:
> -        break;
> -    case -EBUSY:
> -        break;
> -    case -ENODEV:
> -        break;
> -    case -EFAULT:
> -         break;
> -    case -EACCES:
> -        /* Let's reflect an inaccessible host device by cc 3. */
> -    default:
> -        /* Let's make all other return codes map to cc 3.  */
> -        ret = -ENODEV;
> -    };
> -
> -    return ret;
> +    s390_ccw_cmd_request(sch);

As you change the handling anyway: Don't change this logic in prior
patches?

>  }
>  
>  /*
> @@ -1064,7 +1045,7 @@ static int sch_handle_start_func_passthrough(SubchDev *sch)
>   * read/writes) asynchronous later on if we start supporting more than
>   * our current very simple devices.
>   */
> -int do_subchannel_work_virtual(SubchDev *sch)
> +void do_subchannel_work_virtual(SubchDev *sch)
>  {
>  
>      SCSW *s = &sch->curr_status.scsw;
> @@ -1078,41 +1059,35 @@ int do_subchannel_work_virtual(SubchDev *sch)
>          sch_handle_start_func_virtual(sch);
>      } else {
>          /* Cannot happen. */
> -        return -ENODEV;
> +        sch->iret.cc = 3;

See comment for the last patch.

>      }
>      css_inject_io_interrupt(sch);
> -    return 0;
>  }
>  
> -int do_subchannel_work_passthrough(SubchDev *sch)
> +void do_subchannel_work_passthrough(SubchDev *sch)
>  {
> -    int ret;
>      SCSW *s = &sch->curr_status.scsw;
>  
>      if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
>          /* TODO: Clear handling */
>          sch_handle_clear_func(sch);
> -        ret = 0;
>      } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
>          /* TODO: Halt handling */
>          sch_handle_halt_func(sch);
> -        ret = 0;
>      } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
> -        ret = sch_handle_start_func_passthrough(sch);
> +        sch_handle_start_func_passthrough(sch);
>      } else {
>          /* Cannot happen. */
> -        return -ENODEV;
> +        sch->iret.cc = 3;

ftcl == 0 should have been rejected already by the function handlers
here as well, shouldn't it?

>      }
> -
> -    return ret;
>  }
>  
> -static int do_subchannel_work(SubchDev *sch)
> +static void do_subchannel_work(SubchDev *sch)
>  {
>      if (sch->do_subchannel_work) {
> -        return sch->do_subchannel_work(sch);
> +        sch->do_subchannel_work(sch);
>      } else {
> -        return -ENODEV;
> +        sch->iret.cc = 3;

See my comment for a prior patch.

>      }
>  }
>  

(...)

> diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
> index 5c5fe6b202..d093181a9e 100644
> --- a/include/hw/s390x/css.h
> +++ b/include/hw/s390x/css.h
> @@ -94,13 +94,31 @@ struct SubchDev {
>      /* transport-provided data: */
>      int (*ccw_cb) (SubchDev *, CCW1);
>      void (*disable_cb)(SubchDev *);
> -    int (*do_subchannel_work) (SubchDev *);
> +    void (*do_subchannel_work) (SubchDev *);
>      SenseId id;
>      void *driver_data;
> +    /* io instructions conclude according to iret */
> +    struct {
> +        /*
> +         * General semantic of cc codes of IO instructions is (brief):
> +         * 0 -- produced expected result
> +         * 1 -- produced alternate result
> +         * 2 -- ineffective, because busy with previously initiated function
> +         * 3 -- ineffective, not operational

I'm not sure you can summarize this that way in all cases.

Also, what does "ineffective" mean? If I get a cc 1 for, say, ssch, I
don't expect something either as the subchannel was already status
pending.

> +         */
> +        uint32_t cc:4;
> +        bool pgm_chk:1;

This looks weird?

> +        uint32_t irq_code;
> +    } iret;
>  };
>  
>  extern const VMStateDescription vmstate_subch_dev;

(...)

> @@ -238,33 +236,17 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb)
>      }
>      trace_ioinst_sch_id("ssch", cssid, ssid, schid);
>      sch = css_find_subch(m, cssid, ssid, schid);
> -    if (sch && css_subch_visible(sch)) {
> -        ret = css_do_ssch(sch, &orb);
> +    if (!sch || !css_subch_visible(sch)) {
> +        setcc(cpu, 3);
> +        return;
>      }
> -    switch (ret) {
> -    case -ENODEV:
> -        cc = 3;
> -        break;
> -    case -EBUSY:
> -        cc = 2;
> -        break;
> -    case -EFAULT:
> -        /*
> -         * TODO:
> -         * I'm wondering whether there is something better
> -         * to do for us here (like setting some device or
> -         * subchannel status).
> -         */

You removed the TODO :(

There still might be a better way to reflect this...

> -        program_interrupt(env, PGM_ADDRESSING, 4);
> +    css_subch_clear_iret(sch);
> +    css_do_ssch(sch, &orb);
> +    if (sch->iret.pgm_chk) {
> +        program_interrupt(env, sch->iret.irq_code, 4);
>          return;
> -    case 0:
> -        cc = 0;
> -        break;
> -    default:
> -        cc = 1;
> -        break;
>      }
> -    setcc(cpu, cc);
> +    setcc(cpu, sch->iret.cc);
>  }
>  
>  void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb)
Halil Pasic Sept. 5, 2017, 3:55 p.m. UTC | #2
On 08/31/2017 11:55 AM, Cornelia Huck wrote:
> On Wed, 30 Aug 2017 18:36:04 +0200
> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> 
>> Simplify the error handling of the SSCH and RSCH handler avoiding
>> arbitrary and cryptic error codes being mapped to what a subchannel is
>> supposed to do.  Let the code detecting the condition tell how it's to be
>> handled in a less ambiguous way.  It's best to handle SSCH and RSCH in
>> one go as the emulation of the two shares a lot of code.
> 
> So determining the return code at the point in time where we can
> instead of trying to map to return codes and back again makes quite a
> bit of sense, but I'll have to look at the rest of this.


Looging forward to.

> For one thing,
> would a better split to introduce the cc-reporting infrastructure first
> and then convert the different I/O functions?
> 

Speaks nothing against it, although I don't see anything wrong with
not introducing unused infrastructure (that is introducing
the infrastructure and it's first user together). As you prefer.

>>
>> Signed-off-by: Halil Pasic <pasic@linux.vnet.ibm.com>
>> Acked-by: Pierre Morel<pmorel@linux.vnet.ibm.com>
>>
>> ---
>> Notes:
>> Funny, we had a different swich-case for SSCH and RSCH. For
>> virtual it did not matter, but for passtrough it could. In practice
>> -EINVAL from the kernel would have been mapped to cc 2 in case of
>> RSCH and to cc 1 in case of SSHC which is absurd. Same goes for
>> -EBUSY from kernel which is correctly mapped to cc 2 in case of
>> SSCH, but for RSCH it gets mapped to cc 1 which is also absurd.
>> ---
>>  hw/s390x/css.c              | 86 ++++++++++++++-------------------------------
>>  hw/s390x/s390-ccw.c         |  8 ++---
>>  hw/vfio/ccw.c               | 32 +++++++++++++----
>>  include/hw/s390x/css.h      | 30 ++++++++++++----
>>  include/hw/s390x/s390-ccw.h |  2 +-
>>  target/s390x/ioinst.c       | 61 +++++++++-----------------------
>>  6 files changed, 97 insertions(+), 122 deletions(-)
>>
>> diff --git a/hw/s390x/css.c b/hw/s390x/css.c
>> index bc47bf5b20..1102642c10 100644
>> --- a/hw/s390x/css.c
>> +++ b/hw/s390x/css.c
>> @@ -1015,12 +1015,11 @@ static void sch_handle_start_func_virtual(SubchDev *sch)
>>  
>>  }
>>  
>> -static int sch_handle_start_func_passthrough(SubchDev *sch)
>> +static void sch_handle_start_func_passthrough(SubchDev *sch)
>>  {
>>  
>>      PMCW *p = &sch->curr_status.pmcw;
>>      SCSW *s = &sch->curr_status.scsw;
>> -    int ret;
>>  
>>      ORB *orb = &sch->orb;
>>      if (!(s->ctrl & SCSW_ACTL_SUSP)) {
>> @@ -1034,28 +1033,10 @@ static int sch_handle_start_func_passthrough(SubchDev *sch)
>>       */
>>      if (!(orb->ctrl0 & ORB_CTRL0_MASK_PFCH) ||
>>          !(orb->ctrl0 & ORB_CTRL0_MASK_C64)) {
>> -        return -ENODEV;
>> +        sch->iret.cc = 3;
> 
> Same as already commented: I don't think cc 3 is a good match.
> 

Yeah, this is just dumb conversion.

>>      }
>>  
>> -    ret = s390_ccw_cmd_request(orb, s, sch->driver_data);
>> -    switch (ret) {
>> -    /* Currently we don't update control block and just return the cc code. */
>> -    case 0:
>> -        break;
>> -    case -EBUSY:
>> -        break;
>> -    case -ENODEV:
>> -        break;
>> -    case -EFAULT:
>> -         break;
>> -    case -EACCES:
>> -        /* Let's reflect an inaccessible host device by cc 3. */
>> -    default:
>> -        /* Let's make all other return codes map to cc 3.  */
>> -        ret = -ENODEV;
>> -    };
>> -
>> -    return ret;
>> +    s390_ccw_cmd_request(sch);
> 
> As you change the handling anyway: Don't change this logic in prior
> patches?

I preserve the logic as altered by the previous patches (at least,
that is the intention). This is the mapping around part which is going
away if we push the handling down.

> 
>>  }
>>  
>>  /*
>> @@ -1064,7 +1045,7 @@ static int sch_handle_start_func_passthrough(SubchDev *sch)
>>   * read/writes) asynchronous later on if we start supporting more than
>>   * our current very simple devices.
>>   */
>> -int do_subchannel_work_virtual(SubchDev *sch)
>> +void do_subchannel_work_virtual(SubchDev *sch)
>>  {
>>  
>>      SCSW *s = &sch->curr_status.scsw;
>> @@ -1078,41 +1059,35 @@ int do_subchannel_work_virtual(SubchDev *sch)
>>          sch_handle_start_func_virtual(sch);
>>      } else {
>>          /* Cannot happen. */
>> -        return -ENODEV;
>> +        sch->iret.cc = 3;
> 
> See comment for the last patch.
> 

Again just a conversion. If we don't do it in the previous patch
it has to be done differently in this patch.

>>      }
>>      css_inject_io_interrupt(sch);
>> -    return 0;
>>  }
>>  
>> -int do_subchannel_work_passthrough(SubchDev *sch)
>> +void do_subchannel_work_passthrough(SubchDev *sch)
>>  {
>> -    int ret;
>>      SCSW *s = &sch->curr_status.scsw;
>>  
>>      if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
>>          /* TODO: Clear handling */
>>          sch_handle_clear_func(sch);
>> -        ret = 0;
>>      } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
>>          /* TODO: Halt handling */
>>          sch_handle_halt_func(sch);
>> -        ret = 0;
>>      } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
>> -        ret = sch_handle_start_func_passthrough(sch);
>> +        sch_handle_start_func_passthrough(sch);
>>      } else {
>>          /* Cannot happen. */
>> -        return -ENODEV;
>> +        sch->iret.cc = 3;
> 
> ftcl == 0 should have been rejected already by the function handlers
> here as well, shouldn't it?

Which function handlers. Basically the instruction handlers set fctl
and call do_subchannel_work to get the indicated work done.

> 
>>      }
>> -
>> -    return ret;
>>  }
>>  
>> -static int do_subchannel_work(SubchDev *sch)
>> +static void do_subchannel_work(SubchDev *sch)
>>  {
>>      if (sch->do_subchannel_work) {
>> -        return sch->do_subchannel_work(sch);
>> +        sch->do_subchannel_work(sch);
>>      } else {
>> -        return -ENODEV;
>> +        sch->iret.cc = 3;
> 
> See my comment for a prior patch.
>

Nod.

 
>>      }
>>  }
>>  
> 
> (...)
> 
>> diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
>> index 5c5fe6b202..d093181a9e 100644
>> --- a/include/hw/s390x/css.h
>> +++ b/include/hw/s390x/css.h
>> @@ -94,13 +94,31 @@ struct SubchDev {
>>      /* transport-provided data: */
>>      int (*ccw_cb) (SubchDev *, CCW1);
>>      void (*disable_cb)(SubchDev *);
>> -    int (*do_subchannel_work) (SubchDev *);
>> +    void (*do_subchannel_work) (SubchDev *);
>>      SenseId id;
>>      void *driver_data;
>> +    /* io instructions conclude according to iret */
>> +    struct {
>> +        /*
>> +         * General semantic of cc codes of IO instructions is (brief):
>> +         * 0 -- produced expected result
>> +         * 1 -- produced alternate result
>> +         * 2 -- ineffective, because busy with previously initiated function
>> +         * 3 -- ineffective, not operational
> 
> I'm not sure you can summarize this that way in all cases.
> 
> Also, what does "ineffective" mean? If I get a cc 1 for, say, ssch, I
> don't expect something either as the subchannel was already status
> pending.

You are right cc 1 would have been better off with
 'produced alternate result or status pending'

I've tried to make this shorter (from PoP 14-2):
"""
Condition Code 0: Instruction execution produced
the expected or most probable result. (See “Deferred
Condition Code (CC)” on page 9 for a description of
conditions that can be encountered subsequent to
the presentation of condition code 0 that result in a
nonzero deferred condition code.)
Condition Code 1: Instruction execution produced
the alternate or second-most-probable result, or sta-
tus conditions were present that may or may not have
prevented the expected result.
Condition Code 2: Instruction execution was inef-
fective because the designated subchannel or chan-
nel-subsystem facility was busy with a previously
initiated function.
Condition Code 3: Instruction execution was inef-
fective because the designated element was not
operational or because some condition precluded ini-
tiation of the normal function.
"""

Well ineffective means not-effective ;).

> 
>> +         */
>> +        uint32_t cc:4;
>> +        bool pgm_chk:1;
> 
> This looks weird?>

What do you mean? Any suggestions how to do it better?
 
>> +        uint32_t irq_code;
>> +    } iret;
>>  };
>>  
>>  extern const VMStateDescription vmstate_subch_dev;
> 
> (...)
> 
>> @@ -238,33 +236,17 @@ void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb)
>>      }
>>      trace_ioinst_sch_id("ssch", cssid, ssid, schid);
>>      sch = css_find_subch(m, cssid, ssid, schid);
>> -    if (sch && css_subch_visible(sch)) {
>> -        ret = css_do_ssch(sch, &orb);
>> +    if (!sch || !css_subch_visible(sch)) {
>> +        setcc(cpu, 3);
>> +        return;
>>      }
>> -    switch (ret) {
>> -    case -ENODEV:
>> -        cc = 3;
>> -        break;
>> -    case -EBUSY:
>> -        cc = 2;
>> -        break;
>> -    case -EFAULT:
>> -        /*
>> -         * TODO:
>> -         * I'm wondering whether there is something better
>> -         * to do for us here (like setting some device or
>> -         * subchannel status).
>> -         */
> 
> You removed the TODO :(
> 
> There still might be a better way to reflect this...
> 

OK I should have pushed it down to 
@@ -71,10 +72,27 @@ again:
             goto again;
         }
         error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno);
-        return -errno;
+        ret = -errno;
+    } else {
+        ret = region->ret_code;
+    }
+    switch (-ret) {
+    /* Currently we don't update control block and just return the cc code. */
+    case 0:
+        sch->iret.cc = 0;
+        break;
+    case EBUSY:
+        sch->iret.cc = 2;
+        break;
+    case EFAULT:

Like this:

+        /*
+         * TODO:
+         * I'm wondering whether there is something better
+         * to do for us here (like setting some device or
+         * subchannel status).
+         */
+        sch->iret.pgm_chk = true;
+        sch->iret.irq_code = PGM_ADDRESSING;
+        break;
+    case ENODEV:
+    case EACCES:
+    default:
+        sch->iret.cc = 3;
     }
-
-    return region->ret_code;
 }

 static void vfio_ccw_reset(DeviceState *dev)

(deleted in your mail, file hw/vfio/ccw.c).

But I guess, I was afraid of somebody blaming me for this
comment (such TODOs in production code are a bit strange -- what
does it mean: we did not bother to figure it out?).

>> -        program_interrupt(env, PGM_ADDRESSING, 4);
>> +    css_subch_clear_iret(sch);
>> +    css_do_ssch(sch, &orb);
>> +    if (sch->iret.pgm_chk) {
>> +        program_interrupt(env, sch->iret.irq_code, 4);
>>          return;
>> -    case 0:
>> -        cc = 0;
>> -        break;
>> -    default:
>> -        cc = 1;
>> -        break;
>>      }
>> -    setcc(cpu, cc);
>> +    setcc(cpu, sch->iret.cc);
>>  }
>>  
>>  void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb)
> 

Looking forward to the rest of the feedback.

Halil
Cornelia Huck Sept. 5, 2017, 4:25 p.m. UTC | #3
On Tue, 5 Sep 2017 17:55:17 +0200
Halil Pasic <pasic@linux.vnet.ibm.com> wrote:

> On 08/31/2017 11:55 AM, Cornelia Huck wrote:
> > On Wed, 30 Aug 2017 18:36:04 +0200
> > Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> >   
> >> Simplify the error handling of the SSCH and RSCH handler avoiding
> >> arbitrary and cryptic error codes being mapped to what a subchannel is
> >> supposed to do.  Let the code detecting the condition tell how it's to be
> >> handled in a less ambiguous way.  It's best to handle SSCH and RSCH in
> >> one go as the emulation of the two shares a lot of code.  
> > 
> > So determining the return code at the point in time where we can
> > instead of trying to map to return codes and back again makes quite a
> > bit of sense, but I'll have to look at the rest of this.  
> 
> 
> Looging forward to.

Once I manage to find some quiet time for thinking :(


> >> -    ret = s390_ccw_cmd_request(orb, s, sch->driver_data);
> >> -    switch (ret) {
> >> -    /* Currently we don't update control block and just return the cc code. */
> >> -    case 0:
> >> -        break;
> >> -    case -EBUSY:
> >> -        break;
> >> -    case -ENODEV:
> >> -        break;
> >> -    case -EFAULT:
> >> -         break;
> >> -    case -EACCES:
> >> -        /* Let's reflect an inaccessible host device by cc 3. */
> >> -    default:
> >> -        /* Let's make all other return codes map to cc 3.  */
> >> -        ret = -ENODEV;
> >> -    };
> >> -
> >> -    return ret;
> >> +    s390_ccw_cmd_request(sch);  
> > 
> > As you change the handling anyway: Don't change this logic in prior
> > patches?  
> 
> I preserve the logic as altered by the previous patches (at least,
> that is the intention). This is the mapping around part which is going
> away if we push the handling down.

My point is that you touch the code path twice. But we disagreed on the
original change anyway :)

> >> -int do_subchannel_work_passthrough(SubchDev *sch)
> >> +void do_subchannel_work_passthrough(SubchDev *sch)
> >>  {
> >> -    int ret;
> >>      SCSW *s = &sch->curr_status.scsw;
> >>  
> >>      if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
> >>          /* TODO: Clear handling */
> >>          sch_handle_clear_func(sch);
> >> -        ret = 0;
> >>      } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
> >>          /* TODO: Halt handling */
> >>          sch_handle_halt_func(sch);
> >> -        ret = 0;
> >>      } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
> >> -        ret = sch_handle_start_func_passthrough(sch);
> >> +        sch_handle_start_func_passthrough(sch);
> >>      } else {
> >>          /* Cannot happen. */
> >> -        return -ENODEV;
> >> +        sch->iret.cc = 3;  
> > 
> > ftcl == 0 should have been rejected already by the function handlers
> > here as well, shouldn't it?  
> 
> Which function handlers. Basically the instruction handlers set fctl
> and call do_subchannel_work to get the indicated work done.

Or set. My point is that we can't get here with fctl == 0. So an assert
sounds better to me.


> >> diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
> >> index 5c5fe6b202..d093181a9e 100644
> >> --- a/include/hw/s390x/css.h
> >> +++ b/include/hw/s390x/css.h
> >> @@ -94,13 +94,31 @@ struct SubchDev {
> >>      /* transport-provided data: */
> >>      int (*ccw_cb) (SubchDev *, CCW1);
> >>      void (*disable_cb)(SubchDev *);
> >> -    int (*do_subchannel_work) (SubchDev *);
> >> +    void (*do_subchannel_work) (SubchDev *);
> >>      SenseId id;
> >>      void *driver_data;
> >> +    /* io instructions conclude according to iret */
> >> +    struct {
> >> +        /*
> >> +         * General semantic of cc codes of IO instructions is (brief):
> >> +         * 0 -- produced expected result
> >> +         * 1 -- produced alternate result
> >> +         * 2 -- ineffective, because busy with previously initiated function
> >> +         * 3 -- ineffective, not operational  
> > 
> > I'm not sure you can summarize this that way in all cases.
> > 
> > Also, what does "ineffective" mean? If I get a cc 1 for, say, ssch, I
> > don't expect something either as the subchannel was already status
> > pending.  
> 
> You are right cc 1 would have been better off with
>  'produced alternate result or status pending'
> 
> I've tried to make this shorter (from PoP 14-2):
> """
> Condition Code 0: Instruction execution produced
> the expected or most probable result. (See “Deferred
> Condition Code (CC)” on page 9 for a description of
> conditions that can be encountered subsequent to
> the presentation of condition code 0 that result in a
> nonzero deferred condition code.)
> Condition Code 1: Instruction execution produced
> the alternate or second-most-probable result, or sta-
> tus conditions were present that may or may not have
> prevented the expected result.
> Condition Code 2: Instruction execution was inef-
> fective because the designated subchannel or chan-
> nel-subsystem facility was busy with a previously
> initiated function.
> Condition Code 3: Instruction execution was inef-
> fective because the designated element was not
> operational or because some condition precluded ini-
> tiation of the normal function.
> """
> 
> Well ineffective means not-effective ;).

Yes :)

I think the summary in the PoP is already a bit on the over-generalized
side, and condensing it further makes it rather ineffective ;) at
explaining what happens. Frankly, I'd just drop this and rely on
interested parties referring to the PoP.

> 
> >   
> >> +         */
> >> +        uint32_t cc:4;
> >> +        bool pgm_chk:1;  
> > 
> > This looks weird?>  
> 
> What do you mean? Any suggestions how to do it better?

Taking one bit from a bool looks odd. I'd either use a bool normally or
take one bit from an uint8_t.

>  
> >> +        uint32_t irq_code;
> >> +    } iret;
> >>  };
> >>  
> >>  extern const VMStateDescription vmstate_subch_dev;  

> But I guess, I was afraid of somebody blaming me for this
> comment (such TODOs in production code are a bit strange -- what
> does it mean: we did not bother to figure it out?).

For one, the TODO is preexisting... and we really should remember to
figure out if there's something better rather than just drop the
comment.

(And I sure hope nobody is using vfio-ccw in production yet...)
Halil Pasic Sept. 5, 2017, 10:30 p.m. UTC | #4
On 09/05/2017 06:25 PM, Cornelia Huck wrote:
> On Tue, 5 Sep 2017 17:55:17 +0200
> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> 
>> On 08/31/2017 11:55 AM, Cornelia Huck wrote:
>>> On Wed, 30 Aug 2017 18:36:04 +0200
>>> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
>>>   
>>>> Simplify the error handling of the SSCH and RSCH handler avoiding
>>>> arbitrary and cryptic error codes being mapped to what a subchannel is
>>>> supposed to do.  Let the code detecting the condition tell how it's to be
>>>> handled in a less ambiguous way.  It's best to handle SSCH and RSCH in
>>>> one go as the emulation of the two shares a lot of code.  
>>>
>>> So determining the return code at the point in time where we can
>>> instead of trying to map to return codes and back again makes quite a
>>> bit of sense, but I'll have to look at the rest of this.  
>>
>>
>> Looging forward to.
> 
> Once I manage to find some quiet time for thinking :(

I think it's the best if you ignore the rest until v2.

Since we agreed that cc 3 is not the way to go and that the basic
idea is sane, IMHO it does not make much sense to do a thorough
review of this any more.

Not diverting valuable maintainer resources from my indirect data 
access patch set is also a point (IDA is something I have to deliver, while
this is just for fun).
 
> 
>>>> -    ret = s390_ccw_cmd_request(orb, s, sch->driver_data);
>>>> -    switch (ret) {
>>>> -    /* Currently we don't update control block and just return the cc code. */
>>>> -    case 0:
>>>> -        break;
>>>> -    case -EBUSY:
>>>> -        break;
>>>> -    case -ENODEV:
>>>> -        break;
>>>> -    case -EFAULT:
>>>> -         break;
>>>> -    case -EACCES:
>>>> -        /* Let's reflect an inaccessible host device by cc 3. */
>>>> -    default:
>>>> -        /* Let's make all other return codes map to cc 3.  */
>>>> -        ret = -ENODEV;
>>>> -    };
>>>> -
>>>> -    return ret;
>>>> +    s390_ccw_cmd_request(sch);  
>>>
>>> As you change the handling anyway: Don't change this logic in prior
>>> patches?  
>>
>> I preserve the logic as altered by the previous patches (at least,
>> that is the intention). This is the mapping around part which is going
>> away if we push the handling down.
> 
> My point is that you touch the code path twice. But we disagreed on the
> original change anyway :)

Nod.

> 
>>>> -int do_subchannel_work_passthrough(SubchDev *sch)
>>>> +void do_subchannel_work_passthrough(SubchDev *sch)
>>>>  {
>>>> -    int ret;
>>>>      SCSW *s = &sch->curr_status.scsw;
>>>> 
>>>>      if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
>>>>          /* TODO: Clear handling */
>>>>          sch_handle_clear_func(sch);
>>>> -        ret = 0;
>>>>      } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
>>>>          /* TODO: Halt handling */
>>>>          sch_handle_halt_func(sch);
>>>> -        ret = 0;
>>>>      } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
>>>> -        ret = sch_handle_start_func_passthrough(sch);
>>>> +        sch_handle_start_func_passthrough(sch);
>>>>      } else {
>>>>          /* Cannot happen. */
>>>> -        return -ENODEV;
>>>> +        sch->iret.cc = 3;  
>>>
>>> ftcl == 0 should have been rejected already by the function handlers
>>> here as well, shouldn't it?  
>>
>> Which function handlers. Basically the instruction handlers set fctl
>> and call do_subchannel_work to get the indicated work done.
> 
> Or set. My point is that we can't get here with fctl == 0. So an assert
> sounds better to me.
> 
Yeah, I agree assert is the way to go here.

> 
>>>> diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
>>>> index 5c5fe6b202..d093181a9e 100644
>>>> --- a/include/hw/s390x/css.h
>>>> +++ b/include/hw/s390x/css.h
>>>> @@ -94,13 +94,31 @@ struct SubchDev {
>>>>      /* transport-provided data: */
>>>>      int (*ccw_cb) (SubchDev *, CCW1);
>>>>      void (*disable_cb)(SubchDev *);
>>>> -    int (*do_subchannel_work) (SubchDev *);
>>>> +    void (*do_subchannel_work) (SubchDev *);
>>>>      SenseId id;
>>>>      void *driver_data;
>>>> +    /* io instructions conclude according to iret */
>>>> +    struct {
>>>> +        /*
>>>> +         * General semantic of cc codes of IO instructions is (brief):
>>>> +         * 0 -- produced expected result
>>>> +         * 1 -- produced alternate result
>>>> +         * 2 -- ineffective, because busy with previously initiated function
>>>> +         * 3 -- ineffective, not operational  
>>>
>>> I'm not sure you can summarize this that way in all cases.
>>>
>>> Also, what does "ineffective" mean? If I get a cc 1 for, say, ssch, I
>>> don't expect something either as the subchannel was already status
>>> pending.  
>>
>> You are right cc 1 would have been better off with
>>  'produced alternate result or status pending'
>>
>> I've tried to make this shorter (from PoP 14-2):
>> """
>> Condition Code 0: Instruction execution produced
>> the expected or most probable result. (See “Deferred
>> Condition Code (CC)” on page 9 for a description of
>> conditions that can be encountered subsequent to
>> the presentation of condition code 0 that result in a
>> nonzero deferred condition code.)
>> Condition Code 1: Instruction execution produced
>> the alternate or second-most-probable result, or sta-
>> tus conditions were present that may or may not have
>> prevented the expected result.
>> Condition Code 2: Instruction execution was inef-
>> fective because the designated subchannel or chan-
>> nel-subsystem facility was busy with a previously
>> initiated function.
>> Condition Code 3: Instruction execution was inef-
>> fective because the designated element was not
>> operational or because some condition precluded ini-
>> tiation of the normal function.
>> """
>>
>> Well ineffective means not-effective ;).
> 
> Yes :)
> 
> I think the summary in the PoP is already a bit on the over-generalized
> side, and condensing it further makes it rather ineffective ;) at
> explaining what happens. Frankly, I'd just drop this and rely on
> interested parties referring to the PoP.
> 

That's what I did in the first place, but then Janosch complained.
I will meditate on this (having some sort of explanation is helpful
and I think cc 0 2 and 3 are actually quite OK).

>>
>>>   
>>>> +         */
>>>> +        uint32_t cc:4;
>>>> +        bool pgm_chk:1;  
>>>
>>> This looks weird?>  
>>
>> What do you mean? Any suggestions how to do it better?
> 
> Taking one bit from a bool looks odd. I'd either use a bool normally or
> take one bit from an uint8_t.
> 

I have to think about this.

>>  
>>>> +        uint32_t irq_code;
>>>> +    } iret;
>>>>  };
>>>>  
>>>>  extern const VMStateDescription vmstate_subch_dev;  
> 
>> But I guess, I was afraid of somebody blaming me for this
>> comment (such TODOs in production code are a bit strange -- what
>> does it mean: we did not bother to figure it out?).
> 
> For one, the TODO is preexisting... and we really should remember to
> figure out if there's something better rather than just drop the
> comment.
> 
> (And I sure hope nobody is using vfio-ccw in production yet...)
>
Since blame says the TODO has been around since 2017-05-17
let me have a critical look at it.

At a first glance I would say addressing exception for SSCH
is not what we want: the only possibility I see for address
exception for SSCH is due to the ORB address. But if that's
the case we will never reach the code in question. So I suppose
we can do better.

Adding Ren. @Ren: Do you agree with my analysis. If you do,
I could come up with a proposal what to do -- after some reading.

Regards,
Halil
Dong Jia Shi Sept. 6, 2017, 4:31 a.m. UTC | #5
* Halil Pasic <pasic@linux.vnet.ibm.com> [2017-09-06 00:30:58 +0200]:

[...]

> > 
> >> But I guess, I was afraid of somebody blaming me for this
> >> comment (such TODOs in production code are a bit strange -- what
> >> does it mean: we did not bother to figure it out?).
> > 
> > For one, the TODO is preexisting... and we really should remember to
> > figure out if there's something better rather than just drop the
> > comment.
> > 
> > (And I sure hope nobody is using vfio-ccw in production yet...)
> >
> Since blame says the TODO has been around since 2017-05-17
> let me have a critical look at it.
> 
> At a first glance I would say addressing exception for SSCH
> is not what we want: the only possibility I see for address
> exception for SSCH is due to the ORB address. But if that's
> the case we will never reach the code in question.
Agree.

> So I suppose we can do better.
As the comment said, I'm (still) in the state of 'wondering'.

> 
> Adding Ren. @Ren: Do you agree with my analysis. If you do,
> I could come up with a proposal what to do -- after some reading.
If you have a better idea, and time, why not? ;)

> 
> Regards,
> Halil
Halil Pasic Sept. 6, 2017, 12:25 p.m. UTC | #6
On 09/06/2017 06:31 AM, Dong Jia Shi wrote:
> * Halil Pasic <pasic@linux.vnet.ibm.com> [2017-09-06 00:30:58 +0200]:
> 
> [...]
> 
>>>
>>>> But I guess, I was afraid of somebody blaming me for this
>>>> comment (such TODOs in production code are a bit strange -- what
>>>> does it mean: we did not bother to figure it out?).
>>>
>>> For one, the TODO is preexisting... and we really should remember to
>>> figure out if there's something better rather than just drop the
>>> comment.
>>>
>>> (And I sure hope nobody is using vfio-ccw in production yet...)
>>>
>> Since blame says the TODO has been around since 2017-05-17
>> let me have a critical look at it.
>>
>> At a first glance I would say addressing exception for SSCH
>> is not what we want: the only possibility I see for address
>> exception for SSCH is due to the ORB address. But if that's
>> the case we will never reach the code in question.
> Agree.
> 
>> So I suppose we can do better.
> As the comment said, I'm (still) in the state of 'wondering'.
> 
>>
>> Adding Ren. @Ren: Do you agree with my analysis. If you do,
>> I could come up with a proposal what to do -- after some reading.
> If you have a better idea, and time, why not? ;)
> 

We have basically two possibilities/options which ask for different
handling:
1) EFAULT is due to a bug in the vfio-ccw implementation
(can be QEMU or kernel).
2) EFAULT is due to buggy channel program.

Option 2) is basically to be handled with a channel-program check and
setting primary secondary and alert status. For reference see PoP page
15-59 ("Designation of Storage Area").  An exception may be an invalid
channel program address in the ORB. There the channel-program check ain't
explicitly stated (although) I would expect one. It may be implied by the
things on page 15-59 though.

Option 1) is however very similar to other we have figured out that the
implementation is broken situations and should be handled consequently.
The current state of the discussion is with a unit exception.

Does that make sense?

Now, Dong Jia, I need your help to figure out do we have option 1 or
option 2 here? After quick look at the kernel code, it appears to me that
I've seen both option 1 and option 2 (I'm afraid) -- but my assessment
was really very superficial.

I would expect option 2 to be handled differently (kernel provides the
SCSW) though.

Regards,
Halil
Cornelia Huck Sept. 6, 2017, 2:20 p.m. UTC | #7
On Wed, 6 Sep 2017 14:25:13 +0200
Halil Pasic <pasic@linux.vnet.ibm.com> wrote:

> We have basically two possibilities/options which ask for different
> handling:
> 1) EFAULT is due to a bug in the vfio-ccw implementation
> (can be QEMU or kernel).
> 2) EFAULT is due to buggy channel program.
> 
> Option 2) is basically to be handled with a channel-program check and
> setting primary secondary and alert status. For reference see PoP page
> 15-59 ("Designation of Storage Area").  An exception may be an invalid
> channel program address in the ORB. There the channel-program check ain't
> explicitly stated (although) I would expect one. It may be implied by the
> things on page 15-59 though.
> 
> Option 1) is however very similar to other we have figured out that the
> implementation is broken situations and should be handled consequently.
> The current state of the discussion is with a unit exception.
> 
> Does that make sense?

I think the situation is slightly different here, though. For the orb
flags, we reject something out of hand because we have not implemented
it, and for that, unit exception sounds like a good fit. Processing
errors, however, are more similar to errors in the hardware, and as
such can probably be reported via something like equipment check.

> 
> Now, Dong Jia, I need your help to figure out do we have option 1 or
> option 2 here? After quick look at the kernel code, it appears to me that
> I've seen both option 1 and option 2 (I'm afraid) -- but my assessment
> was really very superficial.
> 
> I would expect option 2 to be handled differently (kernel provides the
> SCSW) though.
> 
> Regards,
> Halil
>
Halil Pasic Sept. 6, 2017, 2:43 p.m. UTC | #8
On 09/06/2017 04:20 PM, Cornelia Huck wrote:
> On Wed, 6 Sep 2017 14:25:13 +0200
> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> 
>> We have basically two possibilities/options which ask for different
>> handling:
>> 1) EFAULT is due to a bug in the vfio-ccw implementation
>> (can be QEMU or kernel).
>> 2) EFAULT is due to buggy channel program.
>>
>> Option 2) is basically to be handled with a channel-program check and
>> setting primary secondary and alert status. For reference see PoP page
>> 15-59 ("Designation of Storage Area").  An exception may be an invalid
>> channel program address in the ORB. There the channel-program check ain't
>> explicitly stated (although) I would expect one. It may be implied by the
>> things on page 15-59 though.
>>
>> Option 1) is however very similar to other we have figured out that the
>> implementation is broken situations and should be handled consequently.
>> The current state of the discussion is with a unit exception.
>>
>> Does that make sense?
> 
> I think the situation is slightly different here, though. For the orb
> flags, we reject something out of hand because we have not implemented
> it, and for that, unit exception sounds like a good fit. Processing
> errors, however, are more similar to errors in the hardware, and as
> such can probably be reported via something like equipment check.
> 

Noted. Let's see what Dong Jia has to say, before we continuing a
discussion on something (option 1) what may be irrelevant anyway.

>>
>> Now, Dong Jia, I need your help to figure out do we have option 1 or
>> option 2 here? After quick look at the kernel code, it appears to me that
>> I've seen both option 1 and option 2 (I'm afraid) -- but my assessment
>> was really very superficial.
>>
>> I would expect option 2 to be handled differently (kernel provides the
>> SCSW) though.
>>
>> Regards,
>> Halil
>>
> 
>
Dong Jia Shi Sept. 7, 2017, 8:58 a.m. UTC | #9
* Halil Pasic <pasic@linux.vnet.ibm.com> [2017-09-06 16:43:42 +0200]:

> 
> 
> On 09/06/2017 04:20 PM, Cornelia Huck wrote:
> > On Wed, 6 Sep 2017 14:25:13 +0200
> > Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> > 
> >> We have basically two possibilities/options which ask for different
> >> handling:
> >> 1) EFAULT is due to a bug in the vfio-ccw implementation
> >> (can be QEMU or kernel).
> >> 2) EFAULT is due to buggy channel program.
> >>
> >> Option 2) is basically to be handled with a channel-program check and
> >> setting primary secondary and alert status. For reference see PoP page
> >> 15-59 ("Designation of Storage Area").  An exception may be an invalid
> >> channel program address in the ORB. There the channel-program check ain't
> >> explicitly stated (although) I would expect one. It may be implied by the
> >> things on page 15-59 though.
> >>
> >> Option 1) is however very similar to other we have figured out that the
> >> implementation is broken situations and should be handled consequently.
> >> The current state of the discussion is with a unit exception.
> >>
> >> Does that make sense?
> > 
> > I think the situation is slightly different here, though. For the orb
> > flags, we reject something out of hand because we have not implemented
> > it, and for that, unit exception sounds like a good fit. Processing
> > errors, however, are more similar to errors in the hardware, and as
> > such can probably be reported via something like equipment check.
> > 
> 
> Noted. Let's see what Dong Jia has to say, before we continuing a
> discussion on something (option 1) what may be irrelevant anyway.
> 
> >>
> >> Now, Dong Jia, I need your help to figure out do we have option 1 or
> >> option 2 here? After quick look at the kernel code, it appears to me that
> >> I've seen both option 1 and option 2 (I'm afraid) -- but my assessment
> >> was really very superficial.
There are three cases (all in the kernel) that generate a -EFAULT ret
code:
a. vfio_ccw_mdev_write: copy_from_user() fails.
  This is option 1.

b. ccwchain_fetch_tic
  It's mostly likely that the vfio-ccw driver processed the ccw chains
  wrongly. (Actually I can not think of any other reason.)
  This is option 1.

c. ccwchain_fetch_idal
  When we find that an IDAW contents an invalid address
  This is option 2.

> >>
> >> I would expect option 2 to be handled differently (kernel provides the
> >> SCSW) though.
> >>
> >> Regards,
> >> Halil
> >>
> > 
> >
Halil Pasic Sept. 7, 2017, 10:15 a.m. UTC | #10
On 09/07/2017 10:58 AM, Dong Jia Shi wrote:
> * Halil Pasic <pasic@linux.vnet.ibm.com> [2017-09-06 16:43:42 +0200]:
> 
>>
>>
>> On 09/06/2017 04:20 PM, Cornelia Huck wrote:
>>> On Wed, 6 Sep 2017 14:25:13 +0200
>>> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
>>>
>>>> We have basically two possibilities/options which ask for different
>>>> handling:
>>>> 1) EFAULT is due to a bug in the vfio-ccw implementation
>>>> (can be QEMU or kernel).
>>>> 2) EFAULT is due to buggy channel program.
>>>>
>>>> Option 2) is basically to be handled with a channel-program check and
>>>> setting primary secondary and alert status. For reference see PoP page
>>>> 15-59 ("Designation of Storage Area").  An exception may be an invalid
>>>> channel program address in the ORB. There the channel-program check ain't
>>>> explicitly stated (although) I would expect one. It may be implied by the
>>>> things on page 15-59 though.
>>>>
>>>> Option 1) is however very similar to other we have figured out that the
>>>> implementation is broken situations and should be handled consequently.
>>>> The current state of the discussion is with a unit exception.
>>>>
>>>> Does that make sense?
>>>
>>> I think the situation is slightly different here, though. For the orb
>>> flags, we reject something out of hand because we have not implemented
>>> it, and for that, unit exception sounds like a good fit. Processing
>>> errors, however, are more similar to errors in the hardware, and as
>>> such can probably be reported via something like equipment check.
>>>
>>
>> Noted. Let's see what Dong Jia has to say, before we continuing a
>> discussion on something (option 1) what may be irrelevant anyway.
>>
>>>>
>>>> Now, Dong Jia, I need your help to figure out do we have option 1 or
>>>> option 2 here? After quick look at the kernel code, it appears to me that
>>>> I've seen both option 1 and option 2 (I'm afraid) -- but my assessment
>>>> was really very superficial.
> There are three cases (all in the kernel) that generate a -EFAULT ret
> code:
> a. vfio_ccw_mdev_write: copy_from_user() fails.
>   This is option 1.
> 
> b. ccwchain_fetch_tic
>   It's mostly likely that the vfio-ccw driver processed the ccw chains
>   wrongly. (Actually I can not think of any other reason.)
>   This is option 1.
> 
> c. ccwchain_fetch_idal
>   When we find that an IDAW contents an invalid address
>   This is option 2.
> 

So my fear was justified. If we can't tell if its option 1 or 2, and
currently we can't, I would say we shall trust our infrastructure and
blame the channel program: that boils down to channel-program check.
That's my assessment with the kernel driver being as-is.

If we are willing to change the vfio-ccw kernel driver, then generating
a response to option 2 conditions in the kernel (basically an SCSW) is
IMHO better. For instance we can do SCSW.cpa and SCWS.count properly.
AFAIR we are not allowed to present conditions that logically did not
happen (yet) -- for instance if we have per-fetched a bad CCW address
but the given ccw did not become active yet.

If we handle option 2 via a different channel (SCWS instead of return
code) then we could happily do the proper handling for option 1 here.

So Dong Jia the call is again yours to make!

Regards,
Halil
Cornelia Huck Sept. 7, 2017, 10:24 a.m. UTC | #11
On Thu, 7 Sep 2017 16:58:31 +0800
Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> wrote:

> * Halil Pasic <pasic@linux.vnet.ibm.com> [2017-09-06 16:43:42 +0200]:
> 
> > 
> > 
> > On 09/06/2017 04:20 PM, Cornelia Huck wrote:  
> > > On Wed, 6 Sep 2017 14:25:13 +0200
> > > Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> > >   
> > >> We have basically two possibilities/options which ask for different
> > >> handling:
> > >> 1) EFAULT is due to a bug in the vfio-ccw implementation
> > >> (can be QEMU or kernel).
> > >> 2) EFAULT is due to buggy channel program.
> > >>
> > >> Option 2) is basically to be handled with a channel-program check and
> > >> setting primary secondary and alert status. For reference see PoP page
> > >> 15-59 ("Designation of Storage Area").  An exception may be an invalid
> > >> channel program address in the ORB. There the channel-program check ain't
> > >> explicitly stated (although) I would expect one. It may be implied by the
> > >> things on page 15-59 though.
> > >>
> > >> Option 1) is however very similar to other we have figured out that the
> > >> implementation is broken situations and should be handled consequently.
> > >> The current state of the discussion is with a unit exception.
> > >>
> > >> Does that make sense?  
> > > 
> > > I think the situation is slightly different here, though. For the orb
> > > flags, we reject something out of hand because we have not implemented
> > > it, and for that, unit exception sounds like a good fit. Processing
> > > errors, however, are more similar to errors in the hardware, and as
> > > such can probably be reported via something like equipment check.
> > >   
> > 
> > Noted. Let's see what Dong Jia has to say, before we continuing a
> > discussion on something (option 1) what may be irrelevant anyway.
> >   
> > >>
> > >> Now, Dong Jia, I need your help to figure out do we have option 1 or
> > >> option 2 here? After quick look at the kernel code, it appears to me that
> > >> I've seen both option 1 and option 2 (I'm afraid) -- but my assessment
> > >> was really very superficial.  
> There are three cases (all in the kernel) that generate a -EFAULT ret
> code:
> a. vfio_ccw_mdev_write: copy_from_user() fails.
>   This is option 1.
> 
> b. ccwchain_fetch_tic
>   It's mostly likely that the vfio-ccw driver processed the ccw chains
>   wrongly. (Actually I can not think of any other reason.)

Me neither, I'd consider hitting this a bug in the implementation.

>   This is option 1.
> 
> c. ccwchain_fetch_idal
>   When we find that an IDAW contents an invalid address
>   This is option 2.
> 
> > >>
> > >> I would expect option 2 to be handled differently (kernel provides the
> > >> SCSW) though.

So we would do an equipment check for the first two ("equipment", i.e.
the software, is malfunctioning) and use a more appropriate way for the
malformed idaw?
Halil Pasic Sept. 7, 2017, 11:32 a.m. UTC | #12
On 09/07/2017 12:24 PM, Cornelia Huck wrote:
> On Thu, 7 Sep 2017 16:58:31 +0800
> Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> wrote:
> 
>> * Halil Pasic <pasic@linux.vnet.ibm.com> [2017-09-06 16:43:42 +0200]:
>>
>>>
>>>
>>> On 09/06/2017 04:20 PM, Cornelia Huck wrote:  
>>>> On Wed, 6 Sep 2017 14:25:13 +0200
>>>> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
>>>>   
>>>>> We have basically two possibilities/options which ask for different
>>>>> handling:
>>>>> 1) EFAULT is due to a bug in the vfio-ccw implementation
>>>>> (can be QEMU or kernel).
>>>>> 2) EFAULT is due to buggy channel program.
>>>>>
>>>>> Option 2) is basically to be handled with a channel-program check and
>>>>> setting primary secondary and alert status. For reference see PoP page
>>>>> 15-59 ("Designation of Storage Area").  An exception may be an invalid
>>>>> channel program address in the ORB. There the channel-program check ain't
>>>>> explicitly stated (although) I would expect one. It may be implied by the
>>>>> things on page 15-59 though.
>>>>>
>>>>> Option 1) is however very similar to other we have figured out that the
>>>>> implementation is broken situations and should be handled consequently.
>>>>> The current state of the discussion is with a unit exception.
>>>>>
>>>>> Does that make sense?  
>>>>
>>>> I think the situation is slightly different here, though. For the orb
>>>> flags, we reject something out of hand because we have not implemented
>>>> it, and for that, unit exception sounds like a good fit. Processing
>>>> errors, however, are more similar to errors in the hardware, and as
>>>> such can probably be reported via something like equipment check.
>>>>   
>>>
>>> Noted. Let's see what Dong Jia has to say, before we continuing a
>>> discussion on something (option 1) what may be irrelevant anyway.
>>>   
>>>>>
>>>>> Now, Dong Jia, I need your help to figure out do we have option 1 or
>>>>> option 2 here? After quick look at the kernel code, it appears to me that
>>>>> I've seen both option 1 and option 2 (I'm afraid) -- but my assessment
>>>>> was really very superficial.  
>> There are three cases (all in the kernel) that generate a -EFAULT ret
>> code:
>> a. vfio_ccw_mdev_write: copy_from_user() fails.
>>   This is option 1.
>>
>> b. ccwchain_fetch_tic
>>   It's mostly likely that the vfio-ccw driver processed the ccw chains
>>   wrongly. (Actually I can not think of any other reason.)
> 
> Me neither, I'd consider hitting this a bug in the implementation.

Nod.

> 
>>   This is option 1.
>>
>> c. ccwchain_fetch_idal
>>   When we find that an IDAW contents an invalid address
>>   This is option 2.
>>
>>>>>
>>>>> I would expect option 2 to be handled differently (kernel provides the
>>>>> SCSW) though.
> 
> So we would do an equipment check for the first two ("equipment", i.e.
> the software, is malfunctioning) and use a more appropriate way for the
> malformed idaw?
> 

You have probably missed my previous email where I state something very
similar (MID  <2aa8cf98-c331-fe5a-0a7e-1a553c6c5054@linux.vnet.ibm.com>).
There I say: if we change the kernel code, yes, if we don't I prefer a
program check.


Halil
Cornelia Huck Sept. 7, 2017, 11:41 a.m. UTC | #13
On Thu, 7 Sep 2017 13:32:41 +0200
Halil Pasic <pasic@linux.vnet.ibm.com> wrote:

> On 09/07/2017 12:24 PM, Cornelia Huck wrote:

> > So we would do an equipment check for the first two ("equipment", i.e.
> > the software, is malfunctioning) and use a more appropriate way for the
> > malformed idaw?
> >   
> 
> You have probably missed my previous email where I state something very
> similar (MID  <2aa8cf98-c331-fe5a-0a7e-1a553c6c5054@linux.vnet.ibm.com>).
> There I say: if we change the kernel code, yes, if we don't I prefer a
> program check.

Not missed, they crossed in mid-air.

But I agree, the decision is Dong Jia's.
Dong Jia Shi Sept. 8, 2017, 3:41 a.m. UTC | #14
* Cornelia Huck <cohuck@redhat.com> [2017-09-07 13:41:34 +0200]:

> On Thu, 7 Sep 2017 13:32:41 +0200
> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> 
> > On 09/07/2017 12:24 PM, Cornelia Huck wrote:
> 
> > > So we would do an equipment check for the first two ("equipment", i.e.
> > > the software, is malfunctioning) and use a more appropriate way for the
> > > malformed idaw?
> > >   
> > 
> > You have probably missed my previous email where I state something very
> > similar (MID  <2aa8cf98-c331-fe5a-0a7e-1a553c6c5054@linux.vnet.ibm.com>).
> > There I say: if we change the kernel code, yes, if we don't I prefer a
> > program check.
> 
> Not missed, they crossed in mid-air.
> 
> But I agree, the decision is Dong Jia's.
> 
Let' me summarize here, in case I misunderstand things. Now we have
two ways to choose:

A. Kernel: no change.
   Qemu  : handle -EFAULT as option 2 by generating a program check.

B. Kernel: return -EFAULT
           +
           update the IRB area in the I/O region for option 1 to present
           a unit check SCSW (with proper sense byte ECW), and for option
           2 to present a program check.
   Qemu  : handle -EFAULT according to the information that the IRB area
           provided.

I think the difficult part is in the Qemu side. For either A or B, in
Qemu, we'd need to return a cc0 to indicate the channel program is
accepted successfully by the device, and then update the virtual IRB and
inject an I/O interrupt to notify the guest.

The question is, now we need to map -EFAULT to cc0? This is too odd...
And we need to find a proper place to do the interrupt injection. I
guess it could be in the sch_handle_start_func_passthrough().

If we do not like such modification in the Qemu side, then A is not the
way to go.

And for B, we need to update the IRB area of the I/O region in the three
cases listed in the former mail, and:
1. We need to set the ret_code as 0 for them, so that Qemu could map it
   to cc0.
2. We need to signal Qemu to fetch the IRB we generated with the checks.
All of these are doable I think.

Any comment for the above thoughts?
Halil Pasic Sept. 8, 2017, 9:21 a.m. UTC | #15
On 09/08/2017 05:41 AM, Dong Jia Shi wrote:
> Let' me summarize here, in case I misunderstand things. Now we have
> two ways to choose:
> 
> A. Kernel: no change.
>    Qemu  : handle -EFAULT as option 2 by generating a program check.
> 
> B. Kernel: return -EFAULT
>            +
>            update the IRB area in the I/O region for option 1 to present
>            a unit check SCSW (with proper sense byte ECW), and for option
>            2 to present a program check.
>    Qemu  : handle -EFAULT according to the information that the IRB area
>            provided.

This is not what I was trying to say. You got my message regarding A, but
B was supposed to be understood like this.

Keep the current handling for option 1, that is return -EFAULT. For option
2 do what the spec says, execute the program until the bad address and then
generate a program-check (SCSW) once the bad stuff has it's turn. Thus
the only change in QEMU would be handling -EFAULT with an unit check (because
now it's just option 1).

Halil
Cornelia Huck Sept. 8, 2017, 9:59 a.m. UTC | #16
On Fri, 8 Sep 2017 11:21:57 +0200
Halil Pasic <pasic@linux.vnet.ibm.com> wrote:

> On 09/08/2017 05:41 AM, Dong Jia Shi wrote:
> > Let' me summarize here, in case I misunderstand things. Now we have
> > two ways to choose:
> > 
> > A. Kernel: no change.
> >    Qemu  : handle -EFAULT as option 2 by generating a program check.
> > 
> > B. Kernel: return -EFAULT
> >            +
> >            update the IRB area in the I/O region for option 1 to present
> >            a unit check SCSW (with proper sense byte ECW), and for option
> >            2 to present a program check.
> >    Qemu  : handle -EFAULT according to the information that the IRB area
> >            provided.  
> 
> This is not what I was trying to say. You got my message regarding A, but
> B was supposed to be understood like this.
> 
> Keep the current handling for option 1, that is return -EFAULT. For option
> 2 do what the spec says, execute the program until the bad address and then
> generate a program-check (SCSW) once the bad stuff has it's turn. Thus
> the only change in QEMU would be handling -EFAULT with an unit check (because
> now it's just option 1).

That makes sense to me.
Cornelia Huck Sept. 8, 2017, 10:02 a.m. UTC | #17
On Fri, 8 Sep 2017 11:41:00 +0800
Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> wrote:

> I think the difficult part is in the Qemu side. For either A or B, in
> Qemu, we'd need to return a cc0 to indicate the channel program is
> accepted successfully by the device, and then update the virtual IRB and
> inject an I/O interrupt to notify the guest.
> 
> The question is, now we need to map -EFAULT to cc0? This is too odd...

It's not odd at all, if you consider these as two stages:

- Initial acceptance of the command, set cc 0 to indicate you got it.
- Execution of the start function. This can then fail, of course.

> And we need to find a proper place to do the interrupt injection. I
> guess it could be in the sch_handle_start_func_passthrough().

Probably, yes.

> 
> If we do not like such modification in the Qemu side, then A is not the
> way to go.
> 
> And for B, we need to update the IRB area of the I/O region in the three
> cases listed in the former mail, and:
> 1. We need to set the ret_code as 0 for them, so that Qemu could map it
>    to cc0.
> 2. We need to signal Qemu to fetch the IRB we generated with the checks.
> All of these are doable I think.
> 
> Any comment for the above thoughts?
>
Dong Jia Shi Sept. 25, 2017, 7:14 a.m. UTC | #18
* Cornelia Huck <cohuck@redhat.com> [2017-09-08 12:02:38 +0200]:

> On Fri, 8 Sep 2017 11:41:00 +0800
> Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com> wrote:
> 
> > I think the difficult part is in the Qemu side. For either A or B, in
> > Qemu, we'd need to return a cc0 to indicate the channel program is
> > accepted successfully by the device, and then update the virtual IRB and
> > inject an I/O interrupt to notify the guest.
> > 
> > The question is, now we need to map -EFAULT to cc0? This is too odd...
> 
> It's not odd at all, if you consider these as two stages:
> 
> - Initial acceptance of the command, set cc 0 to indicate you got it.
> - Execution of the start function. This can then fail, of course.
Ok. I got the idea!

> 
> > And we need to find a proper place to do the interrupt injection. I
> > guess it could be in the sch_handle_start_func_passthrough().
> 
> Probably, yes.
> 
> > 
> > If we do not like such modification in the Qemu side, then A is not the
> > way to go.
> > 
> > And for B, we need to update the IRB area of the I/O region in the three
> > cases listed in the former mail, and:
> > 1. We need to set the ret_code as 0 for them, so that Qemu could map it
> >    to cc0.
> > 2. We need to signal Qemu to fetch the IRB we generated with the checks.
> > All of these are doable I think.
> > 
> > Any comment for the above thoughts?
> > 
>
Dong Jia Shi Sept. 25, 2017, 7:31 a.m. UTC | #19
* Cornelia Huck <cohuck@redhat.com> [2017-09-08 11:59:50 +0200]:

> On Fri, 8 Sep 2017 11:21:57 +0200
> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> 
> > On 09/08/2017 05:41 AM, Dong Jia Shi wrote:
> > > Let' me summarize here, in case I misunderstand things. Now we have
> > > two ways to choose:
> > > 
> > > A. Kernel: no change.
> > >    Qemu  : handle -EFAULT as option 2 by generating a program check.
> > > 
> > > B. Kernel: return -EFAULT
> > >            +
> > >            update the IRB area in the I/O region for option 1 to present
> > >            a unit check SCSW (with proper sense byte ECW), and for option
> > >            2 to present a program check.
> > >    Qemu  : handle -EFAULT according to the information that the IRB area
> > >            provided.  
> > 
> > This is not what I was trying to say. You got my message regarding A, but
> > B was supposed to be understood like this.
> > 
> > Keep the current handling for option 1, that is return -EFAULT. For option
> > 2 do what the spec says, execute the program until the bad address and then
> > generate a program-check (SCSW) once the bad stuff has it's turn. Thus
> > the only change in QEMU would be handling -EFAULT with an unit check (because
> > now it's just option 1).
Let me adding some context information here by copying some words from the
previous mail in this thread:
The only option 2 case in the kernel is ccwchain_fetch_idal() finding a
bad idaw_iova.

What you propose to do for this case is (correct me if I get it wrong):
In ccwchain_fetch_idal(), we do not return -EFAULT, instead we return 0,
and issuing the incompletely translated channel program with the bad
address to the physical device. And QEMU will eventually get the SCSW
with the program-check from the physical device I/O result, and inject
it to guest for further handling.

Is this understanding right? If so, I'm fine with that, and I can
provide the fix in the kernel.

> 
> That makes sense to me.
>
Halil Pasic Sept. 25, 2017, 10:57 a.m. UTC | #20
On 09/25/2017 09:31 AM, Dong Jia Shi wrote:
> * Cornelia Huck <cohuck@redhat.com> [2017-09-08 11:59:50 +0200]:
> 
>> On Fri, 8 Sep 2017 11:21:57 +0200
>> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
>>
>>> On 09/08/2017 05:41 AM, Dong Jia Shi wrote:
>>>> Let' me summarize here, in case I misunderstand things. Now we have
>>>> two ways to choose:
>>>>
>>>> A. Kernel: no change.
>>>>    Qemu  : handle -EFAULT as option 2 by generating a program check.
>>>>
>>>> B. Kernel: return -EFAULT
>>>>            +
>>>>            update the IRB area in the I/O region for option 1 to present
>>>>            a unit check SCSW (with proper sense byte ECW), and for option
>>>>            2 to present a program check.
>>>>    Qemu  : handle -EFAULT according to the information that the IRB area
>>>>            provided.  
>>>
>>> This is not what I was trying to say. You got my message regarding A, but
>>> B was supposed to be understood like this.
>>>
>>> Keep the current handling for option 1, that is return -EFAULT. For option
>>> 2 do what the spec says, execute the program until the bad address and then
>>> generate a program-check (SCSW) once the bad stuff has it's turn. Thus
>>> the only change in QEMU would be handling -EFAULT with an unit check (because
>>> now it's just option 1).
> Let me adding some context information here by copying some words from the
> previous mail in this thread:
> The only option 2 case in the kernel is ccwchain_fetch_idal() finding a
> bad idaw_iova.
> 
> What you propose to do for this case is (correct me if I get it wrong):
> In ccwchain_fetch_idal(), we do not return -EFAULT, instead we return 0,
> and issuing the incompletely translated channel program with the bad
> address to the physical device. And QEMU will eventually get the SCSW
> with the program-check from the physical device I/O result, and inject
> it to guest for further handling.
> 

I guess that would be the cleanest. I would also be fine with not making
the physical device program-check (issuing a shortened channel program,
and doing the program check in software) but that's probably more
complicated to implement.

> Is this understanding right? If so, I'm fine with that, and I can
> provide the fix in the kernel.
> 

That would be nice.

>>
>> That makes sense to me.
>>
>
Dong Jia Shi Sept. 27, 2017, 7:55 a.m. UTC | #21
* Halil Pasic <pasic@linux.vnet.ibm.com> [2017-09-25 12:57:31 +0200]:

[restored Cc:]

> 
> 
> On 09/25/2017 09:31 AM, Dong Jia Shi wrote:
> > * Cornelia Huck <cohuck@redhat.com> [2017-09-08 11:59:50 +0200]:
> > 
> >> On Fri, 8 Sep 2017 11:21:57 +0200
> >> Halil Pasic <pasic@linux.vnet.ibm.com> wrote:
> >>
> >>> On 09/08/2017 05:41 AM, Dong Jia Shi wrote:
> >>>> Let' me summarize here, in case I misunderstand things. Now we have
> >>>> two ways to choose:
> >>>>
> >>>> A. Kernel: no change.
> >>>>    Qemu  : handle -EFAULT as option 2 by generating a program check.
> >>>>
> >>>> B. Kernel: return -EFAULT
> >>>>            +
> >>>>            update the IRB area in the I/O region for option 1 to present
> >>>>            a unit check SCSW (with proper sense byte ECW), and for option
> >>>>            2 to present a program check.
> >>>>    Qemu  : handle -EFAULT according to the information that the IRB area
> >>>>            provided.  
> >>>
> >>> This is not what I was trying to say. You got my message regarding A, but
> >>> B was supposed to be understood like this.
> >>>
> >>> Keep the current handling for option 1, that is return -EFAULT. For option
> >>> 2 do what the spec says, execute the program until the bad address and then
> >>> generate a program-check (SCSW) once the bad stuff has it's turn. Thus
> >>> the only change in QEMU would be handling -EFAULT with an unit check (because
> >>> now it's just option 1).
> > Let me adding some context information here by copying some words from the
> > previous mail in this thread:
> > The only option 2 case in the kernel is ccwchain_fetch_idal() finding a
> > bad idaw_iova.
> > 
> > What you propose to do for this case is (correct me if I get it wrong):
> > In ccwchain_fetch_idal(), we do not return -EFAULT, instead we return 0,
> > and issuing the incompletely translated channel program with the bad
> > address to the physical device. And QEMU will eventually get the SCSW
> > with the program-check from the physical device I/O result, and inject
> > it to guest for further handling.
> > 
> 
> I guess that would be the cleanest. I would also be fine with not making
> the physical device program-check (issuing a shortened channel program,
> and doing the program check in software) but that's probably more
> complicated to implement.
That's far more complicated. I will try the simple approach.

> > Is this understanding right? If so, I'm fine with that, and I can
> > provide the fix in the kernel.
> > 
> 
> That would be nice.
Ok.
diff mbox series

Patch

diff --git a/hw/s390x/css.c b/hw/s390x/css.c
index bc47bf5b20..1102642c10 100644
--- a/hw/s390x/css.c
+++ b/hw/s390x/css.c
@@ -1015,12 +1015,11 @@  static void sch_handle_start_func_virtual(SubchDev *sch)
 
 }
 
-static int sch_handle_start_func_passthrough(SubchDev *sch)
+static void sch_handle_start_func_passthrough(SubchDev *sch)
 {
 
     PMCW *p = &sch->curr_status.pmcw;
     SCSW *s = &sch->curr_status.scsw;
-    int ret;
 
     ORB *orb = &sch->orb;
     if (!(s->ctrl & SCSW_ACTL_SUSP)) {
@@ -1034,28 +1033,10 @@  static int sch_handle_start_func_passthrough(SubchDev *sch)
      */
     if (!(orb->ctrl0 & ORB_CTRL0_MASK_PFCH) ||
         !(orb->ctrl0 & ORB_CTRL0_MASK_C64)) {
-        return -ENODEV;
+        sch->iret.cc = 3;
     }
 
-    ret = s390_ccw_cmd_request(orb, s, sch->driver_data);
-    switch (ret) {
-    /* Currently we don't update control block and just return the cc code. */
-    case 0:
-        break;
-    case -EBUSY:
-        break;
-    case -ENODEV:
-        break;
-    case -EFAULT:
-         break;
-    case -EACCES:
-        /* Let's reflect an inaccessible host device by cc 3. */
-    default:
-        /* Let's make all other return codes map to cc 3.  */
-        ret = -ENODEV;
-    };
-
-    return ret;
+    s390_ccw_cmd_request(sch);
 }
 
 /*
@@ -1064,7 +1045,7 @@  static int sch_handle_start_func_passthrough(SubchDev *sch)
  * read/writes) asynchronous later on if we start supporting more than
  * our current very simple devices.
  */
-int do_subchannel_work_virtual(SubchDev *sch)
+void do_subchannel_work_virtual(SubchDev *sch)
 {
 
     SCSW *s = &sch->curr_status.scsw;
@@ -1078,41 +1059,35 @@  int do_subchannel_work_virtual(SubchDev *sch)
         sch_handle_start_func_virtual(sch);
     } else {
         /* Cannot happen. */
-        return -ENODEV;
+        sch->iret.cc = 3;
     }
     css_inject_io_interrupt(sch);
-    return 0;
 }
 
-int do_subchannel_work_passthrough(SubchDev *sch)
+void do_subchannel_work_passthrough(SubchDev *sch)
 {
-    int ret;
     SCSW *s = &sch->curr_status.scsw;
 
     if (s->ctrl & SCSW_FCTL_CLEAR_FUNC) {
         /* TODO: Clear handling */
         sch_handle_clear_func(sch);
-        ret = 0;
     } else if (s->ctrl & SCSW_FCTL_HALT_FUNC) {
         /* TODO: Halt handling */
         sch_handle_halt_func(sch);
-        ret = 0;
     } else if (s->ctrl & SCSW_FCTL_START_FUNC) {
-        ret = sch_handle_start_func_passthrough(sch);
+        sch_handle_start_func_passthrough(sch);
     } else {
         /* Cannot happen. */
-        return -ENODEV;
+        sch->iret.cc = 3;
     }
-
-    return ret;
 }
 
-static int do_subchannel_work(SubchDev *sch)
+static void do_subchannel_work(SubchDev *sch)
 {
     if (sch->do_subchannel_work) {
-        return sch->do_subchannel_work(sch);
+        sch->do_subchannel_work(sch);
     } else {
-        return -ENODEV;
+        sch->iret.cc = 3;
     }
 }
 
@@ -1400,27 +1375,26 @@  static void css_update_chnmon(SubchDev *sch)
     }
 }
 
-int css_do_ssch(SubchDev *sch, ORB *orb)
+void css_do_ssch(SubchDev *sch, ORB *orb)
 {
     SCSW *s = &sch->curr_status.scsw;
     PMCW *p = &sch->curr_status.pmcw;
-    int ret;
 
     if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
-        ret = -ENODEV;
-        goto out;
+        sch->iret.cc = 3;
+        return;
     }
 
     if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
-        ret = -EINPROGRESS;
-        goto out;
+        sch->iret.cc = 1;
+        return;
     }
 
     if (s->ctrl & (SCSW_FCTL_START_FUNC |
                    SCSW_FCTL_HALT_FUNC |
                    SCSW_FCTL_CLEAR_FUNC)) {
-        ret = -EBUSY;
-        goto out;
+        sch->iret.cc = 2;
+        return;
     }
 
     /* If monitoring is active, update counter. */
@@ -1433,10 +1407,7 @@  int css_do_ssch(SubchDev *sch, ORB *orb)
     s->ctrl |= (SCSW_FCTL_START_FUNC | SCSW_ACTL_START_PEND);
     s->flags &= ~SCSW_FLAGS_MASK_PNO;
 
-    ret = do_subchannel_work(sch);
-
-out:
-    return ret;
+    do_subchannel_work(sch);
 }
 
 static void copy_irb_to_guest(IRB *dest, const IRB *src, PMCW *pmcw,
@@ -1683,27 +1654,26 @@  void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo)
     }
 }
 
-int css_do_rsch(SubchDev *sch)
+void css_do_rsch(SubchDev *sch)
 {
     SCSW *s = &sch->curr_status.scsw;
     PMCW *p = &sch->curr_status.pmcw;
-    int ret;
 
     if (~(p->flags) & (PMCW_FLAGS_MASK_DNV | PMCW_FLAGS_MASK_ENA)) {
-        ret = -ENODEV;
-        goto out;
+        sch->iret.cc = 3;
+        return;
     }
 
     if (s->ctrl & SCSW_STCTL_STATUS_PEND) {
-        ret = -EINPROGRESS;
-        goto out;
+        sch->iret.cc = 1;
+        return;
     }
 
     if (((s->ctrl & SCSW_CTRL_MASK_FCTL) != SCSW_FCTL_START_FUNC) ||
         (s->ctrl & SCSW_ACTL_RESUME_PEND) ||
         (!(s->ctrl & SCSW_ACTL_SUSP))) {
-        ret = -EINVAL;
-        goto out;
+        sch->iret.cc = 2;
+        return;
     }
 
     /* If monitoring is active, update counter. */
@@ -1713,10 +1683,6 @@  int css_do_rsch(SubchDev *sch)
 
     s->ctrl |= SCSW_ACTL_RESUME_PEND;
     do_subchannel_work(sch);
-    ret = 0;
-
-out:
-    return ret;
 }
 
 int css_do_rchp(uint8_t cssid, uint8_t chpid)
diff --git a/hw/s390x/s390-ccw.c b/hw/s390x/s390-ccw.c
index 2b0741741c..fa0947894d 100644
--- a/hw/s390x/s390-ccw.c
+++ b/hw/s390x/s390-ccw.c
@@ -18,14 +18,14 @@ 
 #include "hw/s390x/css-bridge.h"
 #include "hw/s390x/s390-ccw.h"
 
-int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data)
+void s390_ccw_cmd_request(SubchDev *sch)
 {
-    S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(data);
+    S390CCWDeviceClass *cdc = S390_CCW_DEVICE_GET_CLASS(sch->driver_data);
 
     if (cdc->handle_request) {
-        return cdc->handle_request(orb, scsw, data);
+        cdc->handle_request(sch);
     } else {
-        return -ENODEV;
+        sch->iret.cc = 3;
     }
 }
 
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index a8baadf57a..b2827ce987 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -23,6 +23,7 @@ 
 #include "hw/s390x/s390-ccw.h"
 #include "hw/s390x/ccw-device.h"
 #include "qemu/error-report.h"
+#include "cpu.h"
 
 #define TYPE_VFIO_CCW "vfio-ccw"
 typedef struct VFIOCCWDevice {
@@ -47,9 +48,9 @@  struct VFIODeviceOps vfio_ccw_ops = {
     .vfio_compute_needs_reset = vfio_ccw_compute_needs_reset,
 };
 
-static int vfio_ccw_handle_request(ORB *orb, SCSW *scsw, void *data)
+static void vfio_ccw_handle_request(SubchDev *sch)
 {
-    S390CCWDevice *cdev = data;
+    S390CCWDevice *cdev = sch->driver_data;
     VFIOCCWDevice *vcdev = DO_UPCAST(VFIOCCWDevice, cdev, cdev);
     struct ccw_io_region *region = vcdev->io_region;
     int ret;
@@ -60,8 +61,8 @@  static int vfio_ccw_handle_request(ORB *orb, SCSW *scsw, void *data)
 
     memset(region, 0, sizeof(*region));
 
-    memcpy(region->orb_area, orb, sizeof(ORB));
-    memcpy(region->scsw_area, scsw, sizeof(SCSW));
+    memcpy(region->orb_area, &sch->orb, sizeof(ORB));
+    memcpy(region->scsw_area, &sch->curr_status.scsw, sizeof(SCSW));
 
 again:
     ret = pwrite(vcdev->vdev.fd, region,
@@ -71,10 +72,27 @@  again:
             goto again;
         }
         error_report("vfio-ccw: wirte I/O region failed with errno=%d", errno);
-        return -errno;
+        ret = -errno;
+    } else {
+        ret = region->ret_code;
+    }
+    switch (-ret) {
+    /* Currently we don't update control block and just return the cc code. */
+    case 0:
+        sch->iret.cc = 0;
+        break;
+    case EBUSY:
+        sch->iret.cc = 2;
+        break;
+    case EFAULT:
+        sch->iret.pgm_chk = true;
+        sch->iret.irq_code = PGM_ADDRESSING;
+        break;
+    case ENODEV:
+    case EACCES:
+    default:
+        sch->iret.cc = 3;
     }
-
-    return region->ret_code;
 }
 
 static void vfio_ccw_reset(DeviceState *dev)
diff --git a/include/hw/s390x/css.h b/include/hw/s390x/css.h
index 5c5fe6b202..d093181a9e 100644
--- a/include/hw/s390x/css.h
+++ b/include/hw/s390x/css.h
@@ -94,13 +94,31 @@  struct SubchDev {
     /* transport-provided data: */
     int (*ccw_cb) (SubchDev *, CCW1);
     void (*disable_cb)(SubchDev *);
-    int (*do_subchannel_work) (SubchDev *);
+    void (*do_subchannel_work) (SubchDev *);
     SenseId id;
     void *driver_data;
+    /* io instructions conclude according to iret */
+    struct {
+        /*
+         * General semantic of cc codes of IO instructions is (brief):
+         * 0 -- produced expected result
+         * 1 -- produced alternate result
+         * 2 -- ineffective, because busy with previously initiated function
+         * 3 -- ineffective, not operational
+         */
+        uint32_t cc:4;
+        bool pgm_chk:1;
+        uint32_t irq_code;
+    } iret;
 };
 
 extern const VMStateDescription vmstate_subch_dev;
 
+static inline void css_subch_clear_iret(SubchDev *sch)
+{
+    memset(&sch->iret, 0, sizeof(sch->iret));
+}
+
 /*
  * Identify a device within the channel subsystem.
  * Note that this can be used to identify either the subchannel or
@@ -156,9 +174,9 @@  void css_generate_sch_crws(uint8_t cssid, uint8_t ssid, uint16_t schid,
 void css_generate_chp_crws(uint8_t cssid, uint8_t chpid);
 void css_generate_css_crws(uint8_t cssid);
 void css_clear_sei_pending(void);
-int s390_ccw_cmd_request(ORB *orb, SCSW *scsw, void *data);
-int do_subchannel_work_virtual(SubchDev *sub);
-int do_subchannel_work_passthrough(SubchDev *sub);
+void s390_ccw_cmd_request(SubchDev *sch);
+void do_subchannel_work_virtual(SubchDev *sub);
+void do_subchannel_work_passthrough(SubchDev *sub);
 
 typedef enum {
     CSS_IO_ADAPTER_VIRTIO = 0,
@@ -189,7 +207,7 @@  int css_do_msch(SubchDev *sch, const SCHIB *schib);
 int css_do_xsch(SubchDev *sch);
 int css_do_csch(SubchDev *sch);
 int css_do_hsch(SubchDev *sch);
-int css_do_ssch(SubchDev *sch, ORB *orb);
+void css_do_ssch(SubchDev *sch, ORB *orb);
 int css_do_tsch_get_irb(SubchDev *sch, IRB *irb, int *irb_len);
 void css_do_tsch_update_subch(SubchDev *sch);
 int css_do_stcrw(CRW *crw);
@@ -200,7 +218,7 @@  int css_collect_chp_desc(int m, uint8_t cssid, uint8_t f_chpid, uint8_t l_chpid,
 void css_do_schm(uint8_t mbk, int update, int dct, uint64_t mbo);
 int css_enable_mcsse(void);
 int css_enable_mss(void);
-int css_do_rsch(SubchDev *sch);
+void css_do_rsch(SubchDev *sch);
 int css_do_rchp(uint8_t cssid, uint8_t chpid);
 bool css_present(uint8_t cssid);
 #endif
diff --git a/include/hw/s390x/s390-ccw.h b/include/hw/s390x/s390-ccw.h
index 9f45cf1347..d2159c9ed7 100644
--- a/include/hw/s390x/s390-ccw.h
+++ b/include/hw/s390x/s390-ccw.h
@@ -33,7 +33,7 @@  typedef struct S390CCWDeviceClass {
     CCWDeviceClass parent_class;
     void (*realize)(S390CCWDevice *dev, char *sysfsdev, Error **errp);
     void (*unrealize)(S390CCWDevice *dev, Error **errp);
-    int (*handle_request) (ORB *, SCSW *, void *);
+    void (*handle_request) (SubchDev *sch);
 } S390CCWDeviceClass;
 
 #endif
diff --git a/target/s390x/ioinst.c b/target/s390x/ioinst.c
index 51fbea620d..e8044068c2 100644
--- a/target/s390x/ioinst.c
+++ b/target/s390x/ioinst.c
@@ -217,8 +217,6 @@  void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb)
     SubchDev *sch;
     ORB orig_orb, orb;
     uint64_t addr;
-    int ret = -ENODEV;
-    int cc;
     CPUS390XState *env = &cpu->env;
     uint8_t ar;
 
@@ -238,33 +236,17 @@  void ioinst_handle_ssch(S390CPU *cpu, uint64_t reg1, uint32_t ipb)
     }
     trace_ioinst_sch_id("ssch", cssid, ssid, schid);
     sch = css_find_subch(m, cssid, ssid, schid);
-    if (sch && css_subch_visible(sch)) {
-        ret = css_do_ssch(sch, &orb);
+    if (!sch || !css_subch_visible(sch)) {
+        setcc(cpu, 3);
+        return;
     }
-    switch (ret) {
-    case -ENODEV:
-        cc = 3;
-        break;
-    case -EBUSY:
-        cc = 2;
-        break;
-    case -EFAULT:
-        /*
-         * TODO:
-         * I'm wondering whether there is something better
-         * to do for us here (like setting some device or
-         * subchannel status).
-         */
-        program_interrupt(env, PGM_ADDRESSING, 4);
+    css_subch_clear_iret(sch);
+    css_do_ssch(sch, &orb);
+    if (sch->iret.pgm_chk) {
+        program_interrupt(env, sch->iret.irq_code, 4);
         return;
-    case 0:
-        cc = 0;
-        break;
-    default:
-        cc = 1;
-        break;
     }
-    setcc(cpu, cc);
+    setcc(cpu, sch->iret.cc);
 }
 
 void ioinst_handle_stcrw(S390CPU *cpu, uint32_t ipb)
@@ -767,8 +749,6 @@  void ioinst_handle_rsch(S390CPU *cpu, uint64_t reg1)
 {
     int cssid, ssid, schid, m;
     SubchDev *sch;
-    int ret = -ENODEV;
-    int cc;
 
     if (ioinst_disassemble_sch_ident(reg1, &m, &cssid, &ssid, &schid)) {
         program_interrupt(&cpu->env, PGM_OPERAND, 4);
@@ -776,24 +756,17 @@  void ioinst_handle_rsch(S390CPU *cpu, uint64_t reg1)
     }
     trace_ioinst_sch_id("rsch", cssid, ssid, schid);
     sch = css_find_subch(m, cssid, ssid, schid);
-    if (sch && css_subch_visible(sch)) {
-        ret = css_do_rsch(sch);
+    if (!sch || !css_subch_visible(sch)) {
+        setcc(cpu, 3);
+        return;
     }
-    switch (ret) {
-    case -ENODEV:
-        cc = 3;
-        break;
-    case -EINVAL:
-        cc = 2;
-        break;
-    case 0:
-        cc = 0;
-        break;
-    default:
-        cc = 1;
-        break;
+    css_subch_clear_iret(sch);
+    css_do_rsch(sch);
+    if (sch->iret.pgm_chk) {
+        program_interrupt(&cpu->env, sch->iret.irq_code, 4);
+        return;
     }
-    setcc(cpu, cc);
+    setcc(cpu, sch->iret.cc);
 }
 
 #define RCHP_REG1_RES(_reg) (_reg & 0x00000000ff00ff00)