diff mbox

[v4] target-s390x: Implement stfl and stfle

Message ID 20170227101817.11748-1-mmarek@suse.com
State New
Headers show

Commit Message

Michal Marek Feb. 27, 2017, 10:18 a.m. UTC
Indicate the actual features in the STFL implementation and implement
STFLE.

Signed-off-by: Michal Marek <mmarek@suse.com>
---
v4:
 - Remove redundant buffer clearing in do_stfle()
 - Always store whole doublewords in STFLE
 - Use s390_cpu_virt_mem_write() to store the result
 - Raise a specification exception if the STFLE address is not aligned
 - Use the LowCore offset instead of hardcoding the STFL store address
v3:
 - Initialize the buffer in do_stfle()
v2:
 - STFLE is not a privileged instruction, go through the MMU to store the
   result
 - annotate the stfl helper with TCG_CALL_NO_RWG
 - Use a large enough buffer to hold the feature bitmap
 - Fix coding style of the stfle helper
---
 target/s390x/cpu_features.c |  6 ++++--
 target/s390x/cpu_features.h |  2 +-
 target/s390x/helper.h       |  2 ++
 target/s390x/insn-data.def  |  2 ++
 target/s390x/misc_helper.c  | 38 ++++++++++++++++++++++++++++++++++++++
 target/s390x/translate.c    | 18 ++++++++++--------
 6 files changed, 57 insertions(+), 11 deletions(-)

Comments

Richard Henderson Feb. 28, 2017, 10:11 p.m. UTC | #1
On 02/27/2017 09:18 PM, Michal Marek wrote:
> Indicate the actual features in the STFL implementation and implement
> STFLE.
>
> Signed-off-by: Michal Marek <mmarek@suse.com>
> ---
> v4:
>  - Remove redundant buffer clearing in do_stfle()
>  - Always store whole doublewords in STFLE
>  - Use s390_cpu_virt_mem_write() to store the result
>  - Raise a specification exception if the STFLE address is not aligned
>  - Use the LowCore offset instead of hardcoding the STFL store address
> v3:
>  - Initialize the buffer in do_stfle()
> v2:
>  - STFLE is not a privileged instruction, go through the MMU to store the
>    result
>  - annotate the stfl helper with TCG_CALL_NO_RWG
>  - Use a large enough buffer to hold the feature bitmap
>  - Fix coding style of the stfle helper
> ---
>  target/s390x/cpu_features.c |  6 ++++--
>  target/s390x/cpu_features.h |  2 +-
>  target/s390x/helper.h       |  2 ++
>  target/s390x/insn-data.def  |  2 ++
>  target/s390x/misc_helper.c  | 38 ++++++++++++++++++++++++++++++++++++++
>  target/s390x/translate.c    | 18 ++++++++++--------
>  6 files changed, 57 insertions(+), 11 deletions(-)
>
> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
> index 42fd9d792bc8..d77c560380c4 100644
> --- a/target/s390x/cpu_features.c
> +++ b/target/s390x/cpu_features.c
> @@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
>      }
>  }
>
> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>                            uint8_t *data)
>  {
>      S390Feat feat;
> -    int bit_nr;
> +    int bit_nr, res = 0;
>
>      if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
>          /* z/Architecture is always active if around */
> @@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>              bit_nr = s390_features[feat].bit;
>              /* big endian on uint8_t array */
>              data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
> +            res = MAX(res, bit_nr / 8 + 1);
>          }
>          feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>      }
> +    return res;
>  }
>
>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
> index d66912178680..e3c41be08060 100644
> --- a/target/s390x/cpu_features.h
> +++ b/target/s390x/cpu_features.h
> @@ -56,7 +56,7 @@ typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
>  const S390FeatDef *s390_feat_def(S390Feat feat);
>  S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
>  void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap);
> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
>                            uint8_t *data);
>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
>                            uint8_t *data);
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index 9102071d0aa4..f24b50ea48ab 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -95,6 +95,8 @@ DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
>  DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
>  DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
>  DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
> +DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
> +DEF_HELPER_4(stfle, i64, env, i64, i32, i64)
>  DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>  DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>  DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 075ff597c3de..be830a42ed8d 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -899,6 +899,8 @@
>      C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
>  /* STORE CPU TIMER */
>      C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
> +/* STORE FACILITY LIST EXTENDED */
> +    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
>  /* STORE FACILITY LIST */
>      C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
>  /* STORE PREFIX */
> diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
> index c9604ea9c728..2645ff8b1840 100644
> --- a/target/s390x/misc_helper.c
> +++ b/target/s390x/misc_helper.c
> @@ -500,6 +500,44 @@ uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
>      return cc;
>  }
>
> +static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar, int len)
> +{
> +    S390CPU *cpu = s390_env_get_cpu(env);
> +    /* 256 doublewords as per STFLE documentation */
> +    uint8_t data[256 * 8] = { 0 };
> +    int res;
> +
> +    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
> +    res = ROUND_UP(res, 8);
> +    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));

This does not do what you think it does.  Or indeed, I suspect what the 
original author thinks it does.  I suspect it works for KVM only, and no one 
has actually tried the non-KVM code path.

Primarily, it does not raise an exception for error.  Secondarily, I have no 
idea what the "AR" argument is supposed to be, or how that's supposed to 
interact with the rest of the virtual memory system.

Your writes need to look a lot more like fast_memmove in mem_helper.c, except 
that of course only the destination needs translation.

Of course, in practice we could reduce this to just one cpu_stl_data for STFL 
and one or two cpu_stq_data for STFLE.  So a full fast_memmove loop is overkill.

As it happens, this reminds me that I wrote a version of this several years ago 
but never submitted it upstream.  I'll dig it out.

> +    need = do_stfle(env, addr, ar, len * 8) / 8;

I'm not keen on the scattered divide by 8.  Can we not standardize on bit 
number please, and divide by 64 or 32 when we need?

> +    if (need <= len) {
> +        env->cc_op = 0;
> +    } else {
> +        env->cc_op = 3;
> +    }

Better as env->cc_op = (need <= len ? 0 : 3);


r~
Thomas Huth March 1, 2017, 8 a.m. UTC | #2
On 28.02.2017 23:11, Richard Henderson wrote:
> On 02/27/2017 09:18 PM, Michal Marek wrote:
>> Indicate the actual features in the STFL implementation and implement
>> STFLE.
>>
>> Signed-off-by: Michal Marek <mmarek@suse.com>
>> ---
>> v4:
>>  - Remove redundant buffer clearing in do_stfle()
>>  - Always store whole doublewords in STFLE
>>  - Use s390_cpu_virt_mem_write() to store the result
>>  - Raise a specification exception if the STFLE address is not aligned
>>  - Use the LowCore offset instead of hardcoding the STFL store address
>> v3:
>>  - Initialize the buffer in do_stfle()
>> v2:
>>  - STFLE is not a privileged instruction, go through the MMU to store the
>>    result
>>  - annotate the stfl helper with TCG_CALL_NO_RWG
>>  - Use a large enough buffer to hold the feature bitmap
>>  - Fix coding style of the stfle helper
>> ---
>>  target/s390x/cpu_features.c |  6 ++++--
>>  target/s390x/cpu_features.h |  2 +-
>>  target/s390x/helper.h       |  2 ++
>>  target/s390x/insn-data.def  |  2 ++
>>  target/s390x/misc_helper.c  | 38 ++++++++++++++++++++++++++++++++++++++
>>  target/s390x/translate.c    | 18 ++++++++++--------
>>  6 files changed, 57 insertions(+), 11 deletions(-)
>>
>> diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
>> index 42fd9d792bc8..d77c560380c4 100644
>> --- a/target/s390x/cpu_features.c
>> +++ b/target/s390x/cpu_features.c
>> @@ -286,11 +286,11 @@ void s390_init_feat_bitmap(const S390FeatInit
>> init, S390FeatBitmap bitmap)
>>      }
>>  }
>>
>> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>>                            uint8_t *data)
>>  {
>>      S390Feat feat;
>> -    int bit_nr;
>> +    int bit_nr, res = 0;
>>
>>      if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH,
>> features)) {
>>          /* z/Architecture is always active if around */
>> @@ -303,9 +303,11 @@ void s390_fill_feat_block(const S390FeatBitmap
>> features, S390FeatType type,
>>              bit_nr = s390_features[feat].bit;
>>              /* big endian on uint8_t array */
>>              data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
>> +            res = MAX(res, bit_nr / 8 + 1);
>>          }
>>          feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
>>      }
>> +    return res;
>>  }
>>
>>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType
>> type,
>> diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
>> index d66912178680..e3c41be08060 100644
>> --- a/target/s390x/cpu_features.h
>> +++ b/target/s390x/cpu_features.h
>> @@ -56,7 +56,7 @@ typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
>>  const S390FeatDef *s390_feat_def(S390Feat feat);
>>  S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
>>  void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap
>> bitmap);
>> -void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>> +int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType
>> type,
>>                            uint8_t *data);
>>  void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType
>> type,
>>                            uint8_t *data);
>> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
>> index 9102071d0aa4..f24b50ea48ab 100644
>> --- a/target/s390x/helper.h
>> +++ b/target/s390x/helper.h
>> @@ -95,6 +95,8 @@ DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
>>  DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
>>  DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
>>  DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
>> +DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
>> +DEF_HELPER_4(stfle, i64, env, i64, i32, i64)
>>  DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>>  DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>>  DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
>> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
>> index 075ff597c3de..be830a42ed8d 100644
>> --- a/target/s390x/insn-data.def
>> +++ b/target/s390x/insn-data.def
>> @@ -899,6 +899,8 @@
>>      C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
>>  /* STORE CPU TIMER */
>>      C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
>> +/* STORE FACILITY LIST EXTENDED */
>> +    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
>>  /* STORE FACILITY LIST */
>>      C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
>>  /* STORE PREFIX */
>> diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
>> index c9604ea9c728..2645ff8b1840 100644
>> --- a/target/s390x/misc_helper.c
>> +++ b/target/s390x/misc_helper.c
>> @@ -500,6 +500,44 @@ uint32_t HELPER(stsi)(CPUS390XState *env,
>> uint64_t a0,
>>      return cc;
>>  }
>>
>> +static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar,
>> int len)
>> +{
>> +    S390CPU *cpu = s390_env_get_cpu(env);
>> +    /* 256 doublewords as per STFLE documentation */
>> +    uint8_t data[256 * 8] = { 0 };
>> +    int res;
>> +
>> +    res = s390_fill_feat_block(cpu->model->features,
>> S390_FEAT_TYPE_STFL, data);
>> +    res = ROUND_UP(res, 8);
>> +    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));
> 
> This does not do what you think it does.  Or indeed, I suspect what the
> original author thinks it does.  I suspect it works for KVM only, and no
> one has actually tried the non-KVM code path.

It should work fine TCG, too. At least it used to work fine with TCG
when I put that stuff together two years ago. The function only uses the
KVM_S390_MEM_OP if KVM is enabled, and uses mmu_translate() internally
to walk the MMU pages otherwise.

> Primarily, it does not raise an exception for error.

Protection and page faults should be generated properly via
trigger_access_exception() there ... or did I miss something?

> Secondarily, I
> have no idea what the "AR" argument is supposed to be, or how that's
> supposed to interact with the rest of the virtual memory system.

AR = Access Register ... they are needed when the CPU runs in access
register mode (i.e. the AS PSW bits are 01). AFAIK we don't emulate this
in QEMU yet, but the KVM_S390_MEM_OP ioctl already supports it.

> Your writes need to look a lot more like fast_memmove in mem_helper.c,
> except that of course only the destination needs translation.

I still think that s390_cpu_virt_mem_write() should be fine here, too.

> Of course, in practice we could reduce this to just one cpu_stl_data for
> STFL and one or two cpu_stq_data for STFLE.

I think STFLE can store more than two 64-bit words, can't it?

 Thomas
Richard Henderson March 1, 2017, 7:30 p.m. UTC | #3
On 03/01/2017 07:00 PM, Thomas Huth wrote:
>> Primarily, it does not raise an exception for error.
>
> Protection and page faults should be generated properly via
> trigger_access_exception() there ... or did I miss something?

So why does s390_cpu_virt_mem_rw document that it returns non-zero if there was 
an error?

I see you do raise an exception on the TCG path (via translate_pages), but not 
the KVM path.  That is very confusing.

> AR = Access Register ... they are needed when the CPU runs in access
> register mode (i.e. the AS PSW bits are 01). AFAIK we don't emulate this
> in QEMU yet, but the KVM_S390_MEM_OP ioctl already supports it.

Hmm.  I guess I don't know how to read that then.

>> Your writes need to look a lot more like fast_memmove in mem_helper.c,
>> except that of course only the destination needs translation.
>
> I still think that s390_cpu_virt_mem_write() should be fine here, too.

Ideally you'd interact with the TCG TLB in some way so that you didn't have to 
manage your own translation and your own exceptions.  That's what fast_memmove 
does.

>
>> Of course, in practice we could reduce this to just one cpu_stl_data for
>> STFL and one or two cpu_stq_data for STFLE.
>
> I think STFLE can store more than two 64-bit words, can't it?

Technically, yes.  But there are less than 128 bits defined.  Certainly much 
less than the 4k bits that Michal prepares for.


r~
Thomas Huth March 1, 2017, 8:20 p.m. UTC | #4
On 01.03.2017 20:30, Richard Henderson wrote:
> On 03/01/2017 07:00 PM, Thomas Huth wrote:
>>> Primarily, it does not raise an exception for error.
>>
>> Protection and page faults should be generated properly via
>> trigger_access_exception() there ... or did I miss something?
> 
> So why does s390_cpu_virt_mem_rw document that it returns non-zero if
> there was an error?

The non-zero return code is needed for the callers to decide whether to
continue or not (this is used in non-TCG code, too, so we need something
that is independent from TCG magic here). See the usage of
s390_cpu_virt_mem_read() / ..._write() in target/s390x/ioinst.c for example.

> I see you do raise an exception on the TCG path (via translate_pages),
> but not the KVM path.  That is very confusing.

The KVM_S390_MEM_OP ioctl can raise an exception, too. I konw, it's
confusing, but that ioctl was necessary since you need to hold a special
lock in KVM while walking the page tables.

>> AR = Access Register ... they are needed when the CPU runs in access
>> register mode (i.e. the AS PSW bits are 01). AFAIK we don't emulate this
>> in QEMU yet, but the KVM_S390_MEM_OP ioctl already supports it.
> 
> Hmm.  I guess I don't know how to read that then.

The access-register mode is described in the "Access-Register
Translation" sub-chapter in Chapter 5 ("Program Execution") of the POP.
Make sure to have some good painkillers ready first, though ... that's
IMHO really one of the ugliest parts of the architecture ;-)

>>> Your writes need to look a lot more like fast_memmove in mem_helper.c,
>>> except that of course only the destination needs translation.
>>
>> I still think that s390_cpu_virt_mem_write() should be fine here, too.
> 
> Ideally you'd interact with the TCG TLB in some way so that you didn't
> have to manage your own translation and your own exceptions.  That's
> what fast_memmove does.

Well, all the code from s390_cpu_virt_mem_rw() can also run with KVM -
in case the KVM_S390_MEM_OP is not available (which also has just been
introduced two years ago). So all the code that can be called by
s390_cpu_virt_mem_rw() has to work also without TCG.

But I agree, since the original problem here (TCG emulation of stfle) is
not related to KVM, it's maybe better to use a function a la
fast_memmove there instead.

>>> Of course, in practice we could reduce this to just one cpu_stl_data for
>>> STFL and one or two cpu_stq_data for STFLE.
>>
>> I think STFLE can store more than two 64-bit words, can't it?
> 
> Technically, yes.  But there are less than 128 bits defined.  Certainly
> much less than the 4k bits that Michal prepares for.

True. But maybe we should then also have an assert() or something
similar here in case the bits exceed the 128 limit one day...?

 Thomas
Michal Marek March 2, 2017, 10:53 a.m. UTC | #5
Dne 28.2.2017 v 23:11 Richard Henderson napsal(a):
> On 02/27/2017 09:18 PM, Michal Marek wrote:
>> +static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar,
>> int len)
>> +{
>> +    S390CPU *cpu = s390_env_get_cpu(env);
>> +    /* 256 doublewords as per STFLE documentation */
>> +    uint8_t data[256 * 8] = { 0 };
>> +    int res;
>> +
>> +    res = s390_fill_feat_block(cpu->model->features,
>> S390_FEAT_TYPE_STFL, data);
>> +    res = ROUND_UP(res, 8);
>> +    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));
> 
> This does not do what you think it does.  Or indeed, I suspect what the
> original author thinks it does.  I suspect it works for KVM only, and no
> one has actually tried the non-KVM code path.
> 
> Primarily, it does not raise an exception for error.  Secondarily, I
> have no idea what the "AR" argument is supposed to be, or how that's
> supposed to interact with the rest of the virtual memory system.
> 
> Your writes need to look a lot more like fast_memmove in mem_helper.c,
> except that of course only the destination needs translation.
> 
> Of course, in practice we could reduce this to just one cpu_stl_data for
> STFL and one or two cpu_stq_data for STFLE.  So a full fast_memmove loop
> is overkill.
> 
> As it happens, this reminds me that I wrote a version of this several
> years ago but never submitted it upstream.  I'll dig it out.

So, does it make sense for me to post a v5 again not using
s390_cpu_virt_mem_write(), or are you going to refresh your stfle
implementation?

Thanks,
Michal
David Hildenbrand March 2, 2017, 1:09 p.m. UTC | #6
>>> Of course, in practice we could reduce this to just one cpu_stl_data for
>>> STFL and one or two cpu_stq_data for STFLE.
>>
>> I think STFLE can store more than two 64-bit words, can't it?
> 
> Technically, yes.  But there are less than 128 bits defined.  Certainly much 
> less than the 4k bits that Michal prepares for.
> 

The architectural limit is 2k bytes (yes I said bytes).

Check out "struct kvm_s390_vm_cpu_machine - fac_list" /
KVM_S390_VM_CPU_MACHINE in
arch/s390/include/uapi/asm/kvm.h. Here we prepared for that.

Also note preparations for new stfl bits for future HW:

cd1836f583d7 (KVM: s390: instruction-execution-protection support)
-> bit 130
a679c547d19d (KVM: s390: gaccess: add ESOP2 handling)
-> bit 131

However, for now 128 bit should be more then enough, as TCG still misses
loads of features. CPU model still isn't properly wired up for TCG yet.
David Hildenbrand March 2, 2017, 1:12 p.m. UTC | #7
>> As it happens, this reminds me that I wrote a version of this several
>> years ago but never submitted it upstream.  I'll dig it out.
> 
> So, does it make sense for me to post a v5 again not using
> s390_cpu_virt_mem_write(), or are you going to refresh your stfle
> implementation?
> 
> Thanks,
> Michal
> 

A new implementation should definitely take care of the CPU model.
diff mbox

Patch

diff --git a/target/s390x/cpu_features.c b/target/s390x/cpu_features.c
index 42fd9d792bc8..d77c560380c4 100644
--- a/target/s390x/cpu_features.c
+++ b/target/s390x/cpu_features.c
@@ -286,11 +286,11 @@  void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap)
     }
 }
 
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data)
 {
     S390Feat feat;
-    int bit_nr;
+    int bit_nr, res = 0;
 
     if (type == S390_FEAT_TYPE_STFL && test_bit(S390_FEAT_ZARCH, features)) {
         /* z/Architecture is always active if around */
@@ -303,9 +303,11 @@  void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
             bit_nr = s390_features[feat].bit;
             /* big endian on uint8_t array */
             data[bit_nr / 8] |= 0x80 >> (bit_nr % 8);
+            res = MAX(res, bit_nr / 8 + 1);
         }
         feat = find_next_bit(features, S390_FEAT_MAX, feat + 1);
     }
+    return res;
 }
 
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
diff --git a/target/s390x/cpu_features.h b/target/s390x/cpu_features.h
index d66912178680..e3c41be08060 100644
--- a/target/s390x/cpu_features.h
+++ b/target/s390x/cpu_features.h
@@ -56,7 +56,7 @@  typedef uint64_t S390FeatInit[S390_FEAT_MAX / 64 + 1];
 const S390FeatDef *s390_feat_def(S390Feat feat);
 S390Feat s390_feat_by_type_and_bit(S390FeatType type, int bit);
 void s390_init_feat_bitmap(const S390FeatInit init, S390FeatBitmap bitmap);
-void s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
+int s390_fill_feat_block(const S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
 void s390_add_from_feat_block(S390FeatBitmap features, S390FeatType type,
                           uint8_t *data);
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 9102071d0aa4..f24b50ea48ab 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -95,6 +95,8 @@  DEF_HELPER_FLAGS_1(stckc, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_FLAGS_2(spt, TCG_CALL_NO_RWG, void, env, i64)
 DEF_HELPER_FLAGS_1(stpt, TCG_CALL_NO_RWG, i64, env)
 DEF_HELPER_4(stsi, i32, env, i64, i64, i64)
+DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
+DEF_HELPER_4(stfle, i64, env, i64, i32, i64)
 DEF_HELPER_FLAGS_4(lctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(lctlg, TCG_CALL_NO_WG, void, env, i32, i64, i32)
 DEF_HELPER_FLAGS_4(stctl, TCG_CALL_NO_WG, void, env, i32, i64, i32)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 075ff597c3de..be830a42ed8d 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -899,6 +899,8 @@ 
     C(0xb202, STIDP,   S,     Z,   la2, 0, new, m1_64, stidp, 0)
 /* STORE CPU TIMER */
     C(0xb209, STPT,    S,     Z,   la2, 0, new, m1_64, stpt, 0)
+/* STORE FACILITY LIST EXTENDED */
+    C(0xb2b0, STFLE,   S,  SFLE,   0, a2, 0, 0, stfle, 0)
 /* STORE FACILITY LIST */
     C(0xb2b1, STFL,    S,     Z,   0, 0, 0, 0, stfl, 0)
 /* STORE PREFIX */
diff --git a/target/s390x/misc_helper.c b/target/s390x/misc_helper.c
index c9604ea9c728..2645ff8b1840 100644
--- a/target/s390x/misc_helper.c
+++ b/target/s390x/misc_helper.c
@@ -500,6 +500,44 @@  uint32_t HELPER(stsi)(CPUS390XState *env, uint64_t a0,
     return cc;
 }
 
+static int do_stfle(CPUS390XState *env, uint64_t addr, uint32_t ar, int len)
+{
+    S390CPU *cpu = s390_env_get_cpu(env);
+    /* 256 doublewords as per STFLE documentation */
+    uint8_t data[256 * 8] = { 0 };
+    int res;
+
+    res = s390_fill_feat_block(cpu->model->features, S390_FEAT_TYPE_STFL, data);
+    res = ROUND_UP(res, 8);
+    s390_cpu_virt_mem_write(cpu, addr, ar, data, MIN(res, len));
+
+    return res;
+}
+
+uint64_t HELPER(stfle)(CPUS390XState *env, uint64_t addr, uint32_t ar,
+                       uint64_t r0)
+{
+    int need, len = r0 & 0xff;
+
+    if (addr % 8) {
+        program_interrupt(env, PGM_SPECIFICATION, 4);
+        return r0;
+    }
+    need = do_stfle(env, addr, ar, len * 8) / 8;
+    if (need <= len) {
+        env->cc_op = 0;
+    } else {
+        env->cc_op = 3;
+    }
+
+    return (r0 & ~0xffLL) | (need - 1);
+}
+
+void HELPER(stfl)(CPUS390XState *env)
+{
+    do_stfle(env, offsetof(LowCore, stfl_fac_list), 0, 4);
+}
+
 uint32_t HELPER(sigp)(CPUS390XState *env, uint64_t order_code, uint32_t r1,
                       uint64_t cpu_addr)
 {
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 01c62176bf70..d18a2ffb447c 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3628,15 +3628,17 @@  static ExitStatus op_spt(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_stfl(DisasContext *s, DisasOps *o)
 {
-    TCGv_i64 f, a;
-    /* We really ought to have more complete indication of facilities
-       that we implement.  Address this when STFLE is implemented.  */
     check_privileged(s);
-    f = tcg_const_i64(0xc0000000);
-    a = tcg_const_i64(200);
-    tcg_gen_qemu_st32(f, a, get_mem_index(s));
-    tcg_temp_free_i64(f);
-    tcg_temp_free_i64(a);
+    gen_helper_stfl(cpu_env);
+    return NO_EXIT;
+}
+
+static ExitStatus op_stfle(DisasContext *s, DisasOps *o)
+{
+    TCGv_i32 b2 = tcg_const_i32(get_field(s->fields, b2));
+    potential_page_fault(s);
+    gen_helper_stfle(regs[0], cpu_env, o->in2, b2, regs[0]);
+    set_cc_static(s);
     return NO_EXIT;
 }