diff mbox series

[v22,03/20] target/s390x/cpu topology: handle STSI(15) and build the SYSIB

Message ID 20230901155812.2696560-4-nsg@linux.ibm.com
State New
Headers show
Series s390x: CPU Topology | expand

Commit Message

Nina Schoetterl-Glausch Sept. 1, 2023, 3:57 p.m. UTC
From: Pierre Morel <pmorel@linux.ibm.com>

On interception of STSI(15.1.x) the System Information Block
(SYSIB) is built from the list of pre-ordered topology entries.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
---
 MAINTAINERS                      |   1 +
 qapi/machine-target.json         |  14 ++
 include/hw/s390x/cpu-topology.h  |  25 +++
 include/hw/s390x/sclp.h          |   1 +
 target/s390x/cpu.h               |  76 ++++++++
 hw/s390x/cpu-topology.c          |   2 +
 target/s390x/kvm/kvm.c           |   5 +-
 target/s390x/kvm/stsi-topology.c | 296 +++++++++++++++++++++++++++++++
 target/s390x/kvm/meson.build     |   3 +-
 9 files changed, 421 insertions(+), 2 deletions(-)
 create mode 100644 target/s390x/kvm/stsi-topology.c

Comments

Nina Schoetterl-Glausch Sept. 4, 2023, 6:23 p.m. UTC | #1
On Fri, 2023-09-01 at 17:57 +0200, Nina Schoetterl-Glausch wrote:
> From: Pierre Morel <pmorel@linux.ibm.com>
> 
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>

Reviewed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>

> ---
>  MAINTAINERS                      |   1 +
>  qapi/machine-target.json         |  14 ++
>  include/hw/s390x/cpu-topology.h  |  25 +++
>  include/hw/s390x/sclp.h          |   1 +
>  target/s390x/cpu.h               |  76 ++++++++
>  hw/s390x/cpu-topology.c          |   2 +
>  target/s390x/kvm/kvm.c           |   5 +-
>  target/s390x/kvm/stsi-topology.c | 296 +++++++++++++++++++++++++++++++
>  target/s390x/kvm/meson.build     |   3 +-
>  9 files changed, 421 insertions(+), 2 deletions(-)
>  create mode 100644 target/s390x/kvm/stsi-topology.c

[...]

> +/*
> + * CPU Topology List provided by STSI with fc=15 provides a list
> + * of two different Topology List Entries (TLE) types to specify
> + * the topology hierarchy.
> + *
> + * - Container Topology List Entry
> + *   Defines a container to contain other Topology List Entries
> + *   of any type, nested containers or CPU.
> + * - CPU Topology List Entry
> + *   Specifies the CPUs position, type, entitlement and polarization
> + *   of the CPUs contained in the last Container TLE.
> + *
> + * There can be theoretically up to five levels of containers, QEMU
> + * uses only three levels, the drawer's, book's and socket's level.
> + *
> + * A container with a nesting level (NL) greater than 1 can only
> + * contain another container of nesting level NL-1.
> + *
> + * A container of nesting level 1 (socket), contains as many CPU TLE
> + * as needed to describe the position and qualities of all CPUs inside
> + * the container.
> + * The qualities of a CPU are polarization, entitlement and type.
> + *
> + * The CPU TLE defines the position of the CPUs of identical qualities
> + * using a 64bits mask which first bit has its offset defined by
> + * the CPU address orgin field of the CPU TLE like in:

s/orgin/origin/

> + * CPU address = origin * 64 + bit position within the mask
> + *
> + */

[...]

> diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c
> new file mode 100644
> index 0000000000..cb78040ea5
> --- /dev/null
> +++ b/target/s390x/kvm/stsi-topology.c

[...]

> +/**
> + * setup_stsi:
> + * sysib: pointer to a SysIB to be filled with SysIB_151x data
> + * level: Nested level specified by the guest

No @ in front of the arguments here.

> + *
> + * Setup the SYSIB for STSI 15.1, the header as well as the description
> + * of the topology.
> + */
> +static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib,
> +                      int level)

[...]
Thomas Huth Sept. 5, 2023, 1:26 p.m. UTC | #2
On 01/09/2023 17.57, Nina Schoetterl-Glausch wrote:
> From: Pierre Morel <pmorel@linux.ibm.com>
> 
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> ---
>   MAINTAINERS                      |   1 +
>   qapi/machine-target.json         |  14 ++
>   include/hw/s390x/cpu-topology.h  |  25 +++
>   include/hw/s390x/sclp.h          |   1 +
>   target/s390x/cpu.h               |  76 ++++++++
>   hw/s390x/cpu-topology.c          |   2 +
>   target/s390x/kvm/kvm.c           |   5 +-
>   target/s390x/kvm/stsi-topology.c | 296 +++++++++++++++++++++++++++++++
>   target/s390x/kvm/meson.build     |   3 +-
>   9 files changed, 421 insertions(+), 2 deletions(-)
>   create mode 100644 target/s390x/kvm/stsi-topology.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index b10b83583f..692ce9f121 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1700,6 +1700,7 @@ M: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
>   S: Supported
>   F: include/hw/s390x/cpu-topology.h
>   F: hw/s390x/cpu-topology.c
> +F: target/s390x/kvm/stsi-topology.c
>   
>   X86 Machines
>   ------------
> diff --git a/qapi/machine-target.json b/qapi/machine-target.json
> index f0a6b72414..275234a20f 100644
> --- a/qapi/machine-target.json
> +++ b/qapi/machine-target.json
> @@ -361,3 +361,17 @@
>                      'TARGET_MIPS',
>                      'TARGET_LOONGARCH64',
>                      'TARGET_RISCV' ] } }
> +
> +##
> +# @CpuS390Polarization:
> +#
> +# An enumeration of cpu polarization that can be assumed by a virtual
> +# S390 CPU
> +#
> +# Since: 8.2
> +##
> +{ 'enum': 'CpuS390Polarization',
> +  'prefix': 'S390_CPU_POLARIZATION',
> +  'data': [ 'horizontal', 'vertical' ],
> +    'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
> +}
> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
> index 97b0af2795..fc15acf297 100644
> --- a/include/hw/s390x/cpu-topology.h
> +++ b/include/hw/s390x/cpu-topology.h
> @@ -15,10 +15,35 @@
>   #include "hw/boards.h"
>   #include "qapi/qapi-types-machine-target.h"
>   
> +#define S390_TOPOLOGY_CPU_IFL   0x03
> +
> +typedef union s390_topology_id {
> +    uint64_t id;
> +    struct {
> +        uint8_t _reserved0;
> +        uint8_t drawer;
> +        uint8_t book;
> +        uint8_t socket;
> +        uint8_t type;
> +        uint8_t inv_polarization;

What sense does it make to store the polarization in an inverted way? ... I 
don't get that ... could you please at least add a comment somewhere for the 
rationale?

> +        uint8_t not_dedicated;
> +        uint8_t origin;
> +    };
> +} s390_topology_id;
> +
> +typedef struct S390TopologyEntry {
> +    QTAILQ_ENTRY(S390TopologyEntry) next;
> +    s390_topology_id id;
> +    uint64_t mask;
> +} S390TopologyEntry;
> +
>   typedef struct S390Topology {
>       uint8_t *cores_per_socket;
> +    CpuS390Polarization polarization;
>   } S390Topology;
>   
> +typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList;
> +
>   #ifdef CONFIG_KVM
>   bool s390_has_topology(void);
>   void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
> diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
> index cf1f2efae2..c49051e17e 100644
> --- a/include/hw/s390x/sclp.h
> +++ b/include/hw/s390x/sclp.h
> @@ -112,6 +112,7 @@ typedef struct CPUEntry {
>   } QEMU_PACKED CPUEntry;
>   
>   #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
> +#define SCLP_READ_SCP_INFO_MNEST                2
>   typedef struct ReadInfo {
>       SCCBHeader h;
>       uint16_t rnmax;
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 7ebd5e05b6..b8a0c02714 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -569,6 +569,29 @@ typedef struct SysIB_322 {
>   } SysIB_322;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>   
> +/*
> + * Topology Magnitude fields (MAG) indicates the maximum number of
> + * topology list entries (TLE) at the corresponding nesting level.
> + */
> +#define S390_TOPOLOGY_MAG  6
> +#define S390_TOPOLOGY_MAG6 0
> +#define S390_TOPOLOGY_MAG5 1
> +#define S390_TOPOLOGY_MAG4 2
> +#define S390_TOPOLOGY_MAG3 3
> +#define S390_TOPOLOGY_MAG2 4
> +#define S390_TOPOLOGY_MAG1 5
> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  reserved0[2];
> +    uint16_t length;
> +    uint8_t  mag[S390_TOPOLOGY_MAG];
> +    uint8_t  reserved1;
> +    uint8_t  mnest;
> +    uint32_t reserved2;
> +    char tle[];
> +} SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>   typedef union SysIB {
>       SysIB_111 sysib_111;
>       SysIB_121 sysib_121;
> @@ -576,9 +599,62 @@ typedef union SysIB {
>       SysIB_221 sysib_221;
>       SysIB_222 sysib_222;
>       SysIB_322 sysib_322;
> +    SysIB_151x sysib_151x;
>   } SysIB;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>   
> +/*
> + * CPU Topology List provided by STSI with fc=15 provides a list
> + * of two different Topology List Entries (TLE) types to specify
> + * the topology hierarchy.
> + *
> + * - Container Topology List Entry
> + *   Defines a container to contain other Topology List Entries
> + *   of any type, nested containers or CPU.
> + * - CPU Topology List Entry
> + *   Specifies the CPUs position, type, entitlement and polarization
> + *   of the CPUs contained in the last Container TLE.
> + *
> + * There can be theoretically up to five levels of containers, QEMU
> + * uses only three levels, the drawer's, book's and socket's level.
> + *
> + * A container with a nesting level (NL) greater than 1 can only
> + * contain another container of nesting level NL-1.
> + *
> + * A container of nesting level 1 (socket), contains as many CPU TLE
> + * as needed to describe the position and qualities of all CPUs inside
> + * the container.
> + * The qualities of a CPU are polarization, entitlement and type.
> + *
> + * The CPU TLE defines the position of the CPUs of identical qualities
> + * using a 64bits mask which first bit has its offset defined by
> + * the CPU address orgin field of the CPU TLE like in:
> + * CPU address = origin * 64 + bit position within the mask
> + *

Remove the empty line at the end?

> + */
> +/* Container type Topology List Entry */
> +typedef struct SYSIBContainerListEntry {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} SYSIBContainerListEntry;
> +QEMU_BUILD_BUG_ON(sizeof(SYSIBContainerListEntry) != 8);
> +
> +/* CPU type Topology List Entry */
> +typedef struct SysIBCPUListEntry {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +#define SYSIB_TLE_POLARITY_MASK 0x03
> +#define SYSIB_TLE_DEDICATED     0x04
> +        uint8_t flags;
> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} SysIBCPUListEntry;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBCPUListEntry) != 16);
> +
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra);
> +
>   /* MMU defines */
>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
> index 06b60ebce4..5971804680 100644
> --- a/hw/s390x/cpu-topology.c
> +++ b/hw/s390x/cpu-topology.c
> @@ -28,10 +28,12 @@
>    * s390_topology is used to keep the topology information.
>    * .cores_per_socket: tracks information on the count of cores
>    *                    per socket.
> + * .polarization: tracks machine polarization.
>    */
>   S390Topology s390_topology = {
>       /* will be initialized after the CPU model is realized */
>       .cores_per_socket = NULL,
> +    .polarization = S390_CPU_POLARIZATION_HORIZONTAL,
>   };
>   
>   /**
> diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
> index 852fbd0df7..56b31b8aae 100644
> --- a/target/s390x/kvm/kvm.c
> +++ b/target/s390x/kvm/kvm.c
> @@ -1911,9 +1911,12 @@ static int handle_stsi(S390CPU *cpu)
>           if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
>               return 0;
>           }
> -        /* Only sysib 3.2.2 needs post-handling for now. */
>           insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
>           return 0;
> +    case 15:
> +        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
> +                           run->s390_stsi.ar, RA_IGNORED);
> +        return 0;
>       default:
>           return 0;
>       }
> diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c
> new file mode 100644
> index 0000000000..cb78040ea5
> --- /dev/null
> +++ b/target/s390x/kvm/stsi-topology.c
> @@ -0,0 +1,296 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * QEMU S390x CPU Topology
> + *
> + * Copyright IBM Corp. 2022, 2023
> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
> + *
> + */
> +#include "qemu/osdep.h"
> +#include "cpu.h"
> +#include "hw/s390x/sclp.h"
> +#include "hw/s390x/cpu-topology.h"
> +
> +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_LOW != 1);
> +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_MEDIUM != 2);
> +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_HIGH != 3);
> +
> +/**
> + * fill_container:
> + * @p: The address of the container TLE to fill
> + * @level: The level of nesting for this container
> + * @id: The container receives a unique ID inside its own container
> + *
> + * Returns the next free TLE entry.
> + */
> +static char *fill_container(char *p, int level, int id)
> +{
> +    SYSIBContainerListEntry *tle = (SYSIBContainerListEntry *)p;
> +
> +    tle->nl = level;
> +    tle->id = id;
> +    return p + sizeof(*tle);
> +}
> +
> +/**
> + * fill_tle_cpu:
> + * @p: The address of the CPU TLE to fill
> + * @entry: a pointer to the S390TopologyEntry defining this
> + *         CPU container.
> + *
> + * Returns the next free TLE entry.
> + */
> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
> +{
> +    SysIBCPUListEntry *tle = (SysIBCPUListEntry *)p;
> +    s390_topology_id topology_id = entry->id;
> +
> +    tle->nl = 0;
> +    tle->flags = 3 - topology_id.inv_polarization;

Can you avoid the magic number 3 here?

Also, you seem to simply revert the "3 - entitlement" from 
s390_topology_from_cpu() ... so that raises again the question: Why doing 
the inversion at all?

> +    if (!topology_id.not_dedicated) {
> +        tle->flags |= SYSIB_TLE_DEDICATED;
> +    }
> +    tle->type = topology_id.type;
> +    tle->origin = cpu_to_be16(topology_id.origin * 64);
> +    tle->mask = cpu_to_be64(entry->mask);
> +    return p + sizeof(*tle);
> +}
> +
> +/*
> + * Macro to check that the size of data after increment
> + * will not get bigger than the size of the SysIB.
> + */
> +#define SYSIB_GUARD(data, x) do {       \
> +        data += x;                      \
> +        if (data > sizeof(SysIB)) {     \
> +            return 0;                   \
> +        }                               \
> +    } while (0)
> +
> +/**
> + * stsi_topology_fill_sysib:
> + * @p: A pointer to the position of the first TLE
> + * @level: The nested level wanted by the guest
> + *
> + * Fill the SYSIB with the topology information as described in
> + * the PoP, nesting containers as appropriate, with the maximum
> + * nesting limited by @level.
> + *
> + * Return value:
> + * On success: the size of the SysIB_15x after being filled with TLE.
> + * On error: 0 in the case we would overrun the end of the SysIB.
> + */
> +static int stsi_topology_fill_sysib(S390TopologyList *topology_list,
> +                                    char *p, int level)
> +{
> +    S390TopologyEntry *entry;
> +    int last_drawer = -1;
> +    int last_book = -1;
> +    int last_socket = -1;
> +    int drawer_id = 0;
> +    int book_id = 0;
> +    int socket_id = 0;
> +    int n = sizeof(SysIB_151x);
> +
> +    QTAILQ_FOREACH(entry, topology_list, next) {
> +        bool drawer_change = last_drawer != entry->id.drawer;
> +        bool book_change = drawer_change || last_book != entry->id.book;
> +        bool socket_change = book_change || last_socket != entry->id.socket;
> +
> +        if (level > 3 && drawer_change) {
> +            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
> +            p = fill_container(p, 3, drawer_id++);
> +            book_id = 0;
> +        }
> +        if (level > 2 && book_change) {
> +            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
> +            p = fill_container(p, 2, book_id++);
> +            socket_id = 0;
> +        }
> +        if (socket_change) {
> +            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
> +            p = fill_container(p, 1, socket_id++);
> +        }
> +
> +        SYSIB_GUARD(n, sizeof(SysIBCPUListEntry));
> +        p = fill_tle_cpu(p, entry);
> +        last_drawer = entry->id.drawer;
> +        last_book = entry->id.book;
> +        last_socket = entry->id.socket;
> +    }
> +
> +    return n;
> +}
> +
> +/**
> + * setup_stsi:
> + * sysib: pointer to a SysIB to be filled with SysIB_151x data
> + * level: Nested level specified by the guest
> + *
> + * Setup the SYSIB for STSI 15.1, the header as well as the description
> + * of the topology.
> + */
> +static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib,
> +                      int level)
> +{
> +    sysib->mnest = level;
> +    switch (level) {
> +    case 4:
> +        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    case 3:
> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
> +                                         current_machine->smp.books;
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    case 2:
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
> +                                         current_machine->smp.books *
> +                                         current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    }
> +
> +    return stsi_topology_fill_sysib(topology_list, sysib->tle, level);
> +}
> +
> +/**
> + * s390_topology_add_cpu_to_entry:
> + * @entry: Topology entry to setup
> + * @cpu: the S390CPU to add
> + *
> + * Set the core bit inside the topology mask.
> + */
> +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
> +                                           S390CPU *cpu)
> +{
> +    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
> +}
> +
> +/**
> + * s390_topology_from_cpu:
> + * @cpu: S390CPU to calculate the topology id
> + *
> + * Initialize the topology id from the CPU environment.
> + */
> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
> +{
> +    s390_topology_id topology_id = {0};
> +
> +    topology_id.drawer = cpu->env.drawer_id;
> +    topology_id.book = cpu->env.book_id;
> +    topology_id.socket = cpu->env.socket_id;
> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
> +    topology_id.not_dedicated = !cpu->env.dedicated;
> +
> +    topology_id.inv_polarization = 3;

Magic number 3 again.

> +    if (s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL) {
> +        topology_id.inv_polarization -= cpu->env.entitlement;
> +    }
> +
> +    topology_id.origin = cpu->env.core_id / 64;
> +
> +    return topology_id;
> +}
> +
> +/**
> + * s390_topology_fill_list_sorted:
> + *
> + * Loop over all CPU and insert it at the right place
> + * inside the TLE entry list.
> + * Fill the S390Topology list with entries according to the order
> + * specified by the PoP.
> + */
> +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
> +{
> +    CPUState *cs;
> +    S390TopologyEntry sentinel;
> +
> +    QTAILQ_INIT(topology_list);
> +
> +    sentinel.id.id = cpu_to_be64(UINT64_MAX);
> +    QTAILQ_INSERT_HEAD(topology_list, &sentinel, next);
> +
> +    CPU_FOREACH(cs) {
> +        s390_topology_id id = s390_topology_from_cpu(S390_CPU(cs));
> +        S390TopologyEntry *entry, *tmp;
> +
> +        QTAILQ_FOREACH(tmp, topology_list, next) {
> +            if (id.id == tmp->id.id) {
> +                entry = tmp;
> +                break;
> +            } else if (be64_to_cpu(id.id) < be64_to_cpu(tmp->id.id)) {
> +                entry = g_malloc0(sizeof(*entry));

Maybe nicer to use g_new0 here instead?

> +                entry->id.id = id.id;

Should this get a cpu_to_be64() ?

> +                QTAILQ_INSERT_BEFORE(tmp, entry, next);
> +                break;
> +            }
> +        }
> +        s390_topology_add_cpu_to_entry(entry, S390_CPU(cs));
> +    }
> +
> +    QTAILQ_REMOVE(topology_list, &sentinel, next);
> +}

  Thomas
Nina Schoetterl-Glausch Sept. 5, 2023, 3:25 p.m. UTC | #3
On Tue, 2023-09-05 at 15:26 +0200, Thomas Huth wrote:
> On 01/09/2023 17.57, Nina Schoetterl-Glausch wrote:
> > From: Pierre Morel <pmorel@linux.ibm.com>
> > 
> > On interception of STSI(15.1.x) the System Information Block
> > (SYSIB) is built from the list of pre-ordered topology entries.
> > 
> > Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> > Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> > Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> > ---
> >   MAINTAINERS                      |   1 +
> >   qapi/machine-target.json         |  14 ++
> >   include/hw/s390x/cpu-topology.h  |  25 +++
> >   include/hw/s390x/sclp.h          |   1 +
> >   target/s390x/cpu.h               |  76 ++++++++
> >   hw/s390x/cpu-topology.c          |   2 +
> >   target/s390x/kvm/kvm.c           |   5 +-
> >   target/s390x/kvm/stsi-topology.c | 296 +++++++++++++++++++++++++++++++
> >   target/s390x/kvm/meson.build     |   3 +-
> >   9 files changed, 421 insertions(+), 2 deletions(-)
> >   create mode 100644 target/s390x/kvm/stsi-topology.c

[...]

> > diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
> > index 97b0af2795..fc15acf297 100644
> > --- a/include/hw/s390x/cpu-topology.h
> > +++ b/include/hw/s390x/cpu-topology.h
> > @@ -15,10 +15,35 @@
> >   #include "hw/boards.h"
> >   #include "qapi/qapi-types-machine-target.h"
> >   
> > +#define S390_TOPOLOGY_CPU_IFL   0x03
> > +
> > +typedef union s390_topology_id {
> > +    uint64_t id;
> > +    struct {
> > +        uint8_t _reserved0;
> > +        uint8_t drawer;
> > +        uint8_t book;
> > +        uint8_t socket;
> > +        uint8_t type;
> > +        uint8_t inv_polarization;
> 
> What sense does it make to store the polarization in an inverted way? ... I 
> don't get that ... could you please at least add a comment somewhere for the 
> rationale?
> 

It inverts the ordering with regards to polarization, as required by
the  PoP. The dedication is inverted for the same reason, dedicated
CPUs show up before non dedicated ones, so the id must have a lower
value.
I will add a comment.

> > +        uint8_t not_dedicated;
> > +        uint8_t origin;
> > +    };
> > +} s390_topology_id;

[...]

> > + * fill_tle_cpu:
> > + * @p: The address of the CPU TLE to fill
> > + * @entry: a pointer to the S390TopologyEntry defining this
> > + *         CPU container.
> > + *
> > + * Returns the next free TLE entry.
> > + */
> > +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
> > +{
> > +    SysIBCPUListEntry *tle = (SysIBCPUListEntry *)p;
> > +    s390_topology_id topology_id = entry->id;
> > +
> > +    tle->nl = 0;
> > +    tle->flags = 3 - topology_id.inv_polarization;
> 
> Can you avoid the magic number 3 here?

Hmm, any number larger than 2 will do.
I could also use a int8_t and just negate, but relying on the
reinterpretation of two's complement is also magical.
I guess S390_CPU_ENTITLEMENT_HIGH makes the most sense.

[...]

> > +/**
> > + * s390_topology_fill_list_sorted:
> > + *
> > + * Loop over all CPU and insert it at the right place
> > + * inside the TLE entry list.
> > + * Fill the S390Topology list with entries according to the order
> > + * specified by the PoP.
> > + */
> > +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
> > +{
> > +    CPUState *cs;
> > +    S390TopologyEntry sentinel;
> > +
> > +    QTAILQ_INIT(topology_list);
> > +
> > +    sentinel.id.id = cpu_to_be64(UINT64_MAX);
> > +    QTAILQ_INSERT_HEAD(topology_list, &sentinel, next);
> > +
> > +    CPU_FOREACH(cs) {
> > +        s390_topology_id id = s390_topology_from_cpu(S390_CPU(cs));
> > +        S390TopologyEntry *entry, *tmp;
> > +
> > +        QTAILQ_FOREACH(tmp, topology_list, next) {
> > +            if (id.id == tmp->id.id) {
> > +                entry = tmp;
> > +                break;

I think I'll add a comment here.

/*
 * Earlier bytes have higher order -> big endian.
 * E.g. an entry with higher drawer number should be later in the list,
 * no matter the later fields (book, socket, etc)
 */


> > +            } else if (be64_to_cpu(id.id) < be64_to_cpu(tmp->id.id)) {
> > +                entry = g_malloc0(sizeof(*entry));
> 
> Maybe nicer to use g_new0 here instead?

I don't think it makes much of a difference.

> 
> > +                entry->id.id = id.id;
> 
> Should this get a cpu_to_be64() ?

No, there is no interpretation of the value here, just a copy.
> 
> > +                QTAILQ_INSERT_BEFORE(tmp, entry, next);
> > +                break;
> > +            }
> > +        }
> > +        s390_topology_add_cpu_to_entry(entry, S390_CPU(cs));
> > +    }
> > +
> > +    QTAILQ_REMOVE(topology_list, &sentinel, next);
> > +}
> 
>   Thomas
> 
>
Thomas Huth Sept. 6, 2023, 8:21 a.m. UTC | #4
On 05/09/2023 17.25, Nina Schoetterl-Glausch wrote:
> On Tue, 2023-09-05 at 15:26 +0200, Thomas Huth wrote:
>> On 01/09/2023 17.57, Nina Schoetterl-Glausch wrote:
>>> From: Pierre Morel <pmorel@linux.ibm.com>
>>>
>>> On interception of STSI(15.1.x) the System Information Block
>>> (SYSIB) is built from the list of pre-ordered topology entries.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>> Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
>>> Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
>>> ---
...
>>> +/**
>>> + * s390_topology_fill_list_sorted:
>>> + *
>>> + * Loop over all CPU and insert it at the right place
>>> + * inside the TLE entry list.
>>> + * Fill the S390Topology list with entries according to the order
>>> + * specified by the PoP.
>>> + */
>>> +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
>>> +{
>>> +    CPUState *cs;
>>> +    S390TopologyEntry sentinel;
>>> +
>>> +    QTAILQ_INIT(topology_list);
>>> +
>>> +    sentinel.id.id = cpu_to_be64(UINT64_MAX);

Since you don't do swapping for entry->id.id below, why do you do it here?

>>> +    QTAILQ_INSERT_HEAD(topology_list, &sentinel, next);
>>> +
>>> +    CPU_FOREACH(cs) {
>>> +        s390_topology_id id = s390_topology_from_cpu(S390_CPU(cs));
>>> +        S390TopologyEntry *entry, *tmp;
>>> +
>>> +        QTAILQ_FOREACH(tmp, topology_list, next) {
>>> +            if (id.id == tmp->id.id) {
>>> +                entry = tmp;
>>> +                break;
> 
> I think I'll add a comment here.
> 
> /*
>   * Earlier bytes have higher order -> big endian.
>   * E.g. an entry with higher drawer number should be later in the list,
>   * no matter the later fields (book, socket, etc)
>   */

Ugh, so this swapping is not due to real endianness issues, but just due to 
ordering? ... that's very ugly! I'd prefer to be more verbose and compare 
book by book, drawer by drawer, etc. instread. Or is this function that 
performance critical that we must save every possible CPU cycle here?

  Thomas


> 
>>> +            } else if (be64_to_cpu(id.id) < be64_to_cpu(tmp->id.id)) {
>>> +                entry = g_malloc0(sizeof(*entry));
>>
>> Maybe nicer to use g_new0 here instead?
> 
> I don't think it makes much of a difference.
> 
>>
>>> +                entry->id.id = id.id;
>>
>> Should this get a cpu_to_be64() ?
> 
> No, there is no interpretation of the value here, just a copy.
>>
>>> +                QTAILQ_INSERT_BEFORE(tmp, entry, next);
>>> +                break;
>>> +            }
>>> +        }
>>> +        s390_topology_add_cpu_to_entry(entry, S390_CPU(cs));
>>> +    }
>>> +
>>> +    QTAILQ_REMOVE(topology_list, &sentinel, next);
>>> +}
>>
>>    Thomas
>>
>>
>
Nina Schoetterl-Glausch Sept. 6, 2023, 12:12 p.m. UTC | #5
On Wed, 2023-09-06 at 10:21 +0200, Thomas Huth wrote:
> On 05/09/2023 17.25, Nina Schoetterl-Glausch wrote:
> > On Tue, 2023-09-05 at 15:26 +0200, Thomas Huth wrote:
> > > On 01/09/2023 17.57, Nina Schoetterl-Glausch wrote:
> > > > From: Pierre Morel <pmorel@linux.ibm.com>
> > > > 
> > > > On interception of STSI(15.1.x) the System Information Block
> > > > (SYSIB) is built from the list of pre-ordered topology entries.
> > > > 
> > > > Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> > > > Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> > > > Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> > > > ---
> ...
> > > > +/**
> > > > + * s390_topology_fill_list_sorted:
> > > > + *
> > > > + * Loop over all CPU and insert it at the right place
> > > > + * inside the TLE entry list.
> > > > + * Fill the S390Topology list with entries according to the order
> > > > + * specified by the PoP.
> > > > + */
> > > > +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
> > > > +{
> > > > +    CPUState *cs;
> > > > +    S390TopologyEntry sentinel;
> > > > +
> > > > +    QTAILQ_INIT(topology_list);
> > > > +
> > > > +    sentinel.id.id = cpu_to_be64(UINT64_MAX);
> 
> Since you don't do swapping for entry->id.id below, why do you do it here?

Because an integer in cpu endianess is converted to the big endian
storage format. So then there is a cpu -> big -> cpu round trip with
the comparison below and the value is the max.
Of course this is entirely cosmetic, since UINT64_MAX is all ones.

> > > > +    QTAILQ_INSERT_HEAD(topology_list, &sentinel, next);
> > > > +
> > > > +    CPU_FOREACH(cs) {
> > > > +        s390_topology_id id = s390_topology_from_cpu(S390_CPU(cs));
> > > > +        S390TopologyEntry *entry, *tmp;
> > > > +
> > > > +        QTAILQ_FOREACH(tmp, topology_list, next) {
> > > > +            if (id.id == tmp->id.id) {
> > > > +                entry = tmp;
> > > > +                break;
> > 
> > I think I'll add a comment here.
> > 
> > /*
> >   * Earlier bytes have higher order -> big endian.
> >   * E.g. an entry with higher drawer number should be later in the list,
> >   * no matter the later fields (book, socket, etc)
> >   */
> 
> Ugh, so this swapping is not due to real endianness issues, but just due to

Yeah.

> ordering? ... that's very ugly! I'd prefer to be more verbose and compare 

I kinda didn't like the verbosity of it, since I then need to copy
paste the whole thing because I also need an equality check.
I considered implementing <=, then a == b as a <= b && b <= a, which
seems fine on second thought, so I'll do that. 
And maybe help the compiler by putting __attribute__((pure)) on there.

> book by book, drawer by drawer, etc. instread. Or is this function that 
> performance critical that we must save every possible CPU cycle here?

No.

> 
>   Thomas
> 
> 
> > 
> > > > +            } else if (be64_to_cpu(id.id) < be64_to_cpu(tmp->id.id)) {
> > > > +                entry = g_malloc0(sizeof(*entry));
> > > 
> > > Maybe nicer to use g_new0 here instead?
> > 
> > I don't think it makes much of a difference.
> > 
> > > 
> > > > +                entry->id.id = id.id;
> > > 
> > > Should this get a cpu_to_be64() ?
> > 
> > No, there is no interpretation of the value here, just a copy.
> > > 
> > > > +                QTAILQ_INSERT_BEFORE(tmp, entry, next);
> > > > +                break;
> > > > +            }
> > > > +        }
> > > > +        s390_topology_add_cpu_to_entry(entry, S390_CPU(cs));
> > > > +    }
> > > > +
> > > > +    QTAILQ_REMOVE(topology_list, &sentinel, next);
> > > > +}
> > > 
> > >    Thomas
> > > 
> > > 
> > 
>
Cédric Le Goater Sept. 6, 2023, 2:53 p.m. UTC | #6
Hello Nina,

On 9/1/23 17:57, Nina Schoetterl-Glausch wrote:
> From: Pierre Morel <pmorel@linux.ibm.com>
> 
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> Co-developed-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> Signed-off-by: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
> ---
>   MAINTAINERS                      |   1 +
>   qapi/machine-target.json         |  14 ++
>   include/hw/s390x/cpu-topology.h  |  25 +++
>   include/hw/s390x/sclp.h          |   1 +
>   target/s390x/cpu.h               |  76 ++++++++
>   hw/s390x/cpu-topology.c          |   2 +
>   target/s390x/kvm/kvm.c           |   5 +-
>   target/s390x/kvm/stsi-topology.c | 296 +++++++++++++++++++++++++++++++
>   target/s390x/kvm/meson.build     |   3 +-
>   9 files changed, 421 insertions(+), 2 deletions(-)
>   create mode 100644 target/s390x/kvm/stsi-topology.c
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index b10b83583f..692ce9f121 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -1700,6 +1700,7 @@ M: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
>   S: Supported
>   F: include/hw/s390x/cpu-topology.h
>   F: hw/s390x/cpu-topology.c
> +F: target/s390x/kvm/stsi-topology.c
>   
>   X86 Machines
>   ------------
> diff --git a/qapi/machine-target.json b/qapi/machine-target.json
> index f0a6b72414..275234a20f 100644
> --- a/qapi/machine-target.json
> +++ b/qapi/machine-target.json
> @@ -361,3 +361,17 @@
>                      'TARGET_MIPS',
>                      'TARGET_LOONGARCH64',
>                      'TARGET_RISCV' ] } }
> +
> +##
> +# @CpuS390Polarization:
> +#
> +# An enumeration of cpu polarization that can be assumed by a virtual
> +# S390 CPU
> +#
> +# Since: 8.2
> +##
> +{ 'enum': 'CpuS390Polarization',
> +  'prefix': 'S390_CPU_POLARIZATION',
> +  'data': [ 'horizontal', 'vertical' ],

Since :

> +    'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
> +}
> diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
> index 97b0af2795..fc15acf297 100644
> --- a/include/hw/s390x/cpu-topology.h
> +++ b/include/hw/s390x/cpu-topology.h
> @@ -15,10 +15,35 @@
>   #include "hw/boards.h"
>   #include "qapi/qapi-types-machine-target.h"
>   
> +#define S390_TOPOLOGY_CPU_IFL   0x03
> +
> +typedef union s390_topology_id {
> +    uint64_t id;
> +    struct {
> +        uint8_t _reserved0;
> +        uint8_t drawer;
> +        uint8_t book;
> +        uint8_t socket;
> +        uint8_t type;
> +        uint8_t inv_polarization;
> +        uint8_t not_dedicated;
> +        uint8_t origin;
> +    };
> +} s390_topology_id;
> +
> +typedef struct S390TopologyEntry {
> +    QTAILQ_ENTRY(S390TopologyEntry) next;
> +    s390_topology_id id;
> +    uint64_t mask;
> +} S390TopologyEntry;
> +
>   typedef struct S390Topology {
>       uint8_t *cores_per_socket;
> +    CpuS390Polarization polarization;

This won't compile for non s390x.
>   } S390Topology;
>   
> +typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList;
> +
>   #ifdef CONFIG_KVM
>   bool s390_has_topology(void);
>   void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
> diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
> index cf1f2efae2..c49051e17e 100644
> --- a/include/hw/s390x/sclp.h
> +++ b/include/hw/s390x/sclp.h
> @@ -112,6 +112,7 @@ typedef struct CPUEntry {
>   } QEMU_PACKED CPUEntry;
>   
>   #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
> +#define SCLP_READ_SCP_INFO_MNEST                2
>   typedef struct ReadInfo {
>       SCCBHeader h;
>       uint16_t rnmax;
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index 7ebd5e05b6..b8a0c02714 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -569,6 +569,29 @@ typedef struct SysIB_322 {
>   } SysIB_322;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>   
> +/*
> + * Topology Magnitude fields (MAG) indicates the maximum number of
> + * topology list entries (TLE) at the corresponding nesting level.
> + */
> +#define S390_TOPOLOGY_MAG  6
> +#define S390_TOPOLOGY_MAG6 0
> +#define S390_TOPOLOGY_MAG5 1
> +#define S390_TOPOLOGY_MAG4 2
> +#define S390_TOPOLOGY_MAG3 3
> +#define S390_TOPOLOGY_MAG2 4
> +#define S390_TOPOLOGY_MAG1 5
> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  reserved0[2];
> +    uint16_t length;
> +    uint8_t  mag[S390_TOPOLOGY_MAG];
> +    uint8_t  reserved1;
> +    uint8_t  mnest;
> +    uint32_t reserved2;
> +    char tle[];
> +} SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>   typedef union SysIB {
>       SysIB_111 sysib_111;
>       SysIB_121 sysib_121;
> @@ -576,9 +599,62 @@ typedef union SysIB {
>       SysIB_221 sysib_221;
>       SysIB_222 sysib_222;
>       SysIB_322 sysib_322;
> +    SysIB_151x sysib_151x;
>   } SysIB;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>   
> +/*
> + * CPU Topology List provided by STSI with fc=15 provides a list
> + * of two different Topology List Entries (TLE) types to specify
> + * the topology hierarchy.
> + *
> + * - Container Topology List Entry
> + *   Defines a container to contain other Topology List Entries
> + *   of any type, nested containers or CPU.
> + * - CPU Topology List Entry
> + *   Specifies the CPUs position, type, entitlement and polarization
> + *   of the CPUs contained in the last Container TLE.
> + *
> + * There can be theoretically up to five levels of containers, QEMU
> + * uses only three levels, the drawer's, book's and socket's level.
> + *
> + * A container with a nesting level (NL) greater than 1 can only
> + * contain another container of nesting level NL-1.
> + *
> + * A container of nesting level 1 (socket), contains as many CPU TLE
> + * as needed to describe the position and qualities of all CPUs inside
> + * the container.
> + * The qualities of a CPU are polarization, entitlement and type.
> + *
> + * The CPU TLE defines the position of the CPUs of identical qualities
> + * using a 64bits mask which first bit has its offset defined by
> + * the CPU address orgin field of the CPU TLE like in:
> + * CPU address = origin * 64 + bit position within the mask
> + *
> + */
> +/* Container type Topology List Entry */
> +typedef struct SYSIBContainerListEntry {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} SYSIBContainerListEntry;
> +QEMU_BUILD_BUG_ON(sizeof(SYSIBContainerListEntry) != 8);
> +
> +/* CPU type Topology List Entry */
> +typedef struct SysIBCPUListEntry {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +#define SYSIB_TLE_POLARITY_MASK 0x03
> +#define SYSIB_TLE_DEDICATED     0x04
> +        uint8_t flags;
> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} SysIBCPUListEntry;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBCPUListEntry) != 16);
> +
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra);
> +
>   /* MMU defines */
>   #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
>   #define ASCE_SUBSPACE         0x200       /* subspace group control           */
> diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
> index 06b60ebce4..5971804680 100644
> --- a/hw/s390x/cpu-topology.c
> +++ b/hw/s390x/cpu-topology.c
> @@ -28,10 +28,12 @@
>    * s390_topology is used to keep the topology information.
>    * .cores_per_socket: tracks information on the count of cores
>    *                    per socket.
> + * .polarization: tracks machine polarization.
>    */
>   S390Topology s390_topology = {
>       /* will be initialized after the CPU model is realized */
>       .cores_per_socket = NULL,
> +    .polarization = S390_CPU_POLARIZATION_HORIZONTAL,
>   };
>   
>   /**
> diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
> index 852fbd0df7..56b31b8aae 100644
> --- a/target/s390x/kvm/kvm.c
> +++ b/target/s390x/kvm/kvm.c
> @@ -1911,9 +1911,12 @@ static int handle_stsi(S390CPU *cpu)
>           if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
>               return 0;
>           }
> -        /* Only sysib 3.2.2 needs post-handling for now. */
>           insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
>           return 0;
> +    case 15:
> +        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
> +                           run->s390_stsi.ar, RA_IGNORED);
> +        return 0;
>       default:
>           return 0;
>       }
> diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c
> new file mode 100644
> index 0000000000..cb78040ea5
> --- /dev/null
> +++ b/target/s390x/kvm/stsi-topology.c
> @@ -0,0 +1,296 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +/*
> + * QEMU S390x CPU Topology
> + *
> + * Copyright IBM Corp. 2022, 2023
> + * Author(s): Pierre Morel <pmorel@linux.ibm.com>
> + *
> + */
> +#include "qemu/osdep.h"
> +#include "cpu.h"
> +#include "hw/s390x/sclp.h"
> +#include "hw/s390x/cpu-topology.h"
> +
> +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_LOW != 1);
> +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_MEDIUM != 2);
> +QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_HIGH != 3);
> +
> +/**
> + * fill_container:
> + * @p: The address of the container TLE to fill
> + * @level: The level of nesting for this container
> + * @id: The container receives a unique ID inside its own container
> + *
> + * Returns the next free TLE entry.
> + */
> +static char *fill_container(char *p, int level, int id)
> +{
> +    SYSIBContainerListEntry *tle = (SYSIBContainerListEntry *)p;
> +
> +    tle->nl = level;
> +    tle->id = id;
> +    return p + sizeof(*tle);
> +}
> +
> +/**
> + * fill_tle_cpu:
> + * @p: The address of the CPU TLE to fill
> + * @entry: a pointer to the S390TopologyEntry defining this
> + *         CPU container.
> + *
> + * Returns the next free TLE entry.
> + */
> +static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
> +{
> +    SysIBCPUListEntry *tle = (SysIBCPUListEntry *)p;
> +    s390_topology_id topology_id = entry->id;
> +
> +    tle->nl = 0;
> +    tle->flags = 3 - topology_id.inv_polarization;
> +    if (!topology_id.not_dedicated) {
> +        tle->flags |= SYSIB_TLE_DEDICATED;
> +    }
> +    tle->type = topology_id.type;
> +    tle->origin = cpu_to_be16(topology_id.origin * 64);
> +    tle->mask = cpu_to_be64(entry->mask);
> +    return p + sizeof(*tle);
> +}
> +
> +/*
> + * Macro to check that the size of data after increment
> + * will not get bigger than the size of the SysIB.
> + */
> +#define SYSIB_GUARD(data, x) do {       \
> +        data += x;                      \
> +        if (data > sizeof(SysIB)) {     \
> +            return 0;                   \
> +        }                               \
> +    } while (0)
> +
> +/**
> + * stsi_topology_fill_sysib:
> + * @p: A pointer to the position of the first TLE
> + * @level: The nested level wanted by the guest
> + *
> + * Fill the SYSIB with the topology information as described in
> + * the PoP, nesting containers as appropriate, with the maximum
> + * nesting limited by @level.
> + *
> + * Return value:
> + * On success: the size of the SysIB_15x after being filled with TLE.
> + * On error: 0 in the case we would overrun the end of the SysIB.
> + */
> +static int stsi_topology_fill_sysib(S390TopologyList *topology_list,
> +                                    char *p, int level)
> +{
> +    S390TopologyEntry *entry;
> +    int last_drawer = -1;
> +    int last_book = -1;
> +    int last_socket = -1;
> +    int drawer_id = 0;
> +    int book_id = 0;
> +    int socket_id = 0;
> +    int n = sizeof(SysIB_151x);
> +
> +    QTAILQ_FOREACH(entry, topology_list, next) {
> +        bool drawer_change = last_drawer != entry->id.drawer;
> +        bool book_change = drawer_change || last_book != entry->id.book;
> +        bool socket_change = book_change || last_socket != entry->id.socket;
> +
> +        if (level > 3 && drawer_change) {
> +            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
> +            p = fill_container(p, 3, drawer_id++);
> +            book_id = 0;
> +        }
> +        if (level > 2 && book_change) {
> +            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
> +            p = fill_container(p, 2, book_id++);
> +            socket_id = 0;
> +        }
> +        if (socket_change) {
> +            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
> +            p = fill_container(p, 1, socket_id++);
> +        }
> +
> +        SYSIB_GUARD(n, sizeof(SysIBCPUListEntry));
> +        p = fill_tle_cpu(p, entry);
> +        last_drawer = entry->id.drawer;
> +        last_book = entry->id.book;
> +        last_socket = entry->id.socket;
> +    }
> +
> +    return n;
> +}
> +
> +/**
> + * setup_stsi:
> + * sysib: pointer to a SysIB to be filled with SysIB_151x data
> + * level: Nested level specified by the guest
> + *
> + * Setup the SYSIB for STSI 15.1, the header as well as the description
> + * of the topology.
> + */
> +static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib,
> +                      int level)
> +{
> +    sysib->mnest = level;
> +    switch (level) {
> +    case 4:
> +        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    case 3:
> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
> +                                         current_machine->smp.books;
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    case 2:
> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
> +                                         current_machine->smp.books *
> +                                         current_machine->smp.sockets;
> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> +        break;
> +    }
> +
> +    return stsi_topology_fill_sysib(topology_list, sysib->tle, level);
> +}
> +
> +/**
> + * s390_topology_add_cpu_to_entry:
> + * @entry: Topology entry to setup
> + * @cpu: the S390CPU to add
> + *
> + * Set the core bit inside the topology mask.
> + */
> +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
> +                                           S390CPU *cpu)
> +{
> +    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
> +}
> +
> +/**
> + * s390_topology_from_cpu:
> + * @cpu: S390CPU to calculate the topology id
> + *
> + * Initialize the topology id from the CPU environment.
> + */
> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
> +{
> +    s390_topology_id topology_id = {0};
> +
> +    topology_id.drawer = cpu->env.drawer_id;
> +    topology_id.book = cpu->env.book_id;
> +    topology_id.socket = cpu->env.socket_id;
> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
> +    topology_id.not_dedicated = !cpu->env.dedicated;
> +
> +    topology_id.inv_polarization = 3;
> +    if (s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL) {
> +        topology_id.inv_polarization -= cpu->env.entitlement;
> +    }
> +
> +    topology_id.origin = cpu->env.core_id / 64;
> +
> +    return topology_id;
> +}
> +
> +/**
> + * s390_topology_fill_list_sorted:
> + *
> + * Loop over all CPU and insert it at the right place
> + * inside the TLE entry list.
> + * Fill the S390Topology list with entries according to the order
> + * specified by the PoP.
> + */
> +static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
> +{
> +    CPUState *cs;
> +    S390TopologyEntry sentinel;
> +
> +    QTAILQ_INIT(topology_list);
> +
> +    sentinel.id.id = cpu_to_be64(UINT64_MAX);
> +    QTAILQ_INSERT_HEAD(topology_list, &sentinel, next);
> +
> +    CPU_FOREACH(cs) {
> +        s390_topology_id id = s390_topology_from_cpu(S390_CPU(cs));
> +        S390TopologyEntry *entry, *tmp;



Please add :

   S390TopologyEntry *entry = NULL, *tmp;

It fixes a compile breakage in :

   ../target/s390x/kvm/stsi-topology.c: In function ‘insert_stsi_15_1_x’:
   ../include/qemu/bitops.h:41:9: error: ‘entry’ may be used uninitialized in this function [-Werror=maybe-uninitialized]
        *p  |= mask;
        ~~~~^~~~~~~
   ../target/s390x/kvm/stsi-topology.c:220:28: note: ‘entry’ was declared here
            S390TopologyEntry *entry, *tmp;
                               ^~~~~
   cc1: all warnings being treated as errors


Thanks,

C.

> +
> +        QTAILQ_FOREACH(tmp, topology_list, next) {
> +            if (id.id == tmp->id.id) {
> +                entry = tmp;
> +                break;
> +            } else if (be64_to_cpu(id.id) < be64_to_cpu(tmp->id.id)) {
> +                entry = g_malloc0(sizeof(*entry));
> +                entry->id.id = id.id;
> +                QTAILQ_INSERT_BEFORE(tmp, entry, next);
> +                break;
> +            }
> +        }
> +        s390_topology_add_cpu_to_entry(entry, S390_CPU(cs));
> +    }
> +
> +    QTAILQ_REMOVE(topology_list, &sentinel, next);
> +}
> +
> +/**
> + * s390_topology_empty_list:
> + *
> + * Clear all entries in the S390Topology list.
> + */
> +static void s390_topology_empty_list(S390TopologyList *topology_list)
> +{
> +    S390TopologyEntry *entry = NULL;
> +    S390TopologyEntry *tmp = NULL;
> +
> +    QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) {
> +        QTAILQ_REMOVE(topology_list, entry, next);
> +        g_free(entry);
> +    }
> +}
> +
> +/**
> + * insert_stsi_15_1_x:
> + * cpu: the CPU doing the call for which we set CC
> + * sel2: the selector 2, containing the nested level
> + * addr: Guest logical address of the guest SysIB
> + * ar: the access register number
> + *
> + * Emulate STSI 15.1.x, that is, perform all necessary checks and
> + * fill the SYSIB.
> + * In case the topology description is too long to fit into the SYSIB,
> + * set CC=3 and abort without writing the SYSIB.
> + */
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra)
> +{
> +    S390TopologyList topology_list;
> +    SysIB sysib = {0};
> +    int length;
> +
> +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    s390_topology_fill_list_sorted(&topology_list);
> +
> +    length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2);
> +
> +    if (!length) {
> +        s390_topology_empty_list(&topology_list);
> +        setcc(cpu, 3);
> +        return;
> +    }
> +
> +    sysib.sysib_151x.length = cpu_to_be16(length);
> +    if (!s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length)) {
> +        setcc(cpu, 0);
> +    } else {
> +        s390_cpu_virt_mem_handle_exc(cpu, ra);
> +    }
> +
> +    s390_topology_empty_list(&topology_list);
> +}
> diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
> index d6aca590ae..588a9aa737 100644
> --- a/target/s390x/kvm/meson.build
> +++ b/target/s390x/kvm/meson.build
> @@ -1,7 +1,8 @@
>   
>   s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
>     'pv.c',
> -  'kvm.c'
> +  'kvm.c',
> +  'stsi-topology.c'
>   ), if_false: files(
>     'stubs.c'
>   ))
diff mbox series

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index b10b83583f..692ce9f121 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1700,6 +1700,7 @@  M: Nina Schoetterl-Glausch <nsg@linux.ibm.com>
 S: Supported
 F: include/hw/s390x/cpu-topology.h
 F: hw/s390x/cpu-topology.c
+F: target/s390x/kvm/stsi-topology.c
 
 X86 Machines
 ------------
diff --git a/qapi/machine-target.json b/qapi/machine-target.json
index f0a6b72414..275234a20f 100644
--- a/qapi/machine-target.json
+++ b/qapi/machine-target.json
@@ -361,3 +361,17 @@ 
                    'TARGET_MIPS',
                    'TARGET_LOONGARCH64',
                    'TARGET_RISCV' ] } }
+
+##
+# @CpuS390Polarization:
+#
+# An enumeration of cpu polarization that can be assumed by a virtual
+# S390 CPU
+#
+# Since: 8.2
+##
+{ 'enum': 'CpuS390Polarization',
+  'prefix': 'S390_CPU_POLARIZATION',
+  'data': [ 'horizontal', 'vertical' ],
+    'if': { 'all': [ 'TARGET_S390X' , 'CONFIG_KVM' ] }
+}
diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 97b0af2795..fc15acf297 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -15,10 +15,35 @@ 
 #include "hw/boards.h"
 #include "qapi/qapi-types-machine-target.h"
 
+#define S390_TOPOLOGY_CPU_IFL   0x03
+
+typedef union s390_topology_id {
+    uint64_t id;
+    struct {
+        uint8_t _reserved0;
+        uint8_t drawer;
+        uint8_t book;
+        uint8_t socket;
+        uint8_t type;
+        uint8_t inv_polarization;
+        uint8_t not_dedicated;
+        uint8_t origin;
+    };
+} s390_topology_id;
+
+typedef struct S390TopologyEntry {
+    QTAILQ_ENTRY(S390TopologyEntry) next;
+    s390_topology_id id;
+    uint64_t mask;
+} S390TopologyEntry;
+
 typedef struct S390Topology {
     uint8_t *cores_per_socket;
+    CpuS390Polarization polarization;
 } S390Topology;
 
+typedef QTAILQ_HEAD(, S390TopologyEntry) S390TopologyList;
+
 #ifdef CONFIG_KVM
 bool s390_has_topology(void);
 void s390_topology_setup_cpu(MachineState *ms, S390CPU *cpu, Error **errp);
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index cf1f2efae2..c49051e17e 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -112,6 +112,7 @@  typedef struct CPUEntry {
 } QEMU_PACKED CPUEntry;
 
 #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
+#define SCLP_READ_SCP_INFO_MNEST                2
 typedef struct ReadInfo {
     SCCBHeader h;
     uint16_t rnmax;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index 7ebd5e05b6..b8a0c02714 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -569,6 +569,29 @@  typedef struct SysIB_322 {
 } SysIB_322;
 QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
 
+/*
+ * Topology Magnitude fields (MAG) indicates the maximum number of
+ * topology list entries (TLE) at the corresponding nesting level.
+ */
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+    uint8_t  reserved0[2];
+    uint16_t length;
+    uint8_t  mag[S390_TOPOLOGY_MAG];
+    uint8_t  reserved1;
+    uint8_t  mnest;
+    uint32_t reserved2;
+    char tle[];
+} SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
 typedef union SysIB {
     SysIB_111 sysib_111;
     SysIB_121 sysib_121;
@@ -576,9 +599,62 @@  typedef union SysIB {
     SysIB_221 sysib_221;
     SysIB_222 sysib_222;
     SysIB_322 sysib_322;
+    SysIB_151x sysib_151x;
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/*
+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology List Entry
+ *   Specifies the CPUs position, type, entitlement and polarization
+ *   of the CPUs contained in the last Container TLE.
+ *
+ * There can be theoretically up to five levels of containers, QEMU
+ * uses only three levels, the drawer's, book's and socket's level.
+ *
+ * A container with a nesting level (NL) greater than 1 can only
+ * contain another container of nesting level NL-1.
+ *
+ * A container of nesting level 1 (socket), contains as many CPU TLE
+ * as needed to describe the position and qualities of all CPUs inside
+ * the container.
+ * The qualities of a CPU are polarization, entitlement and type.
+ *
+ * The CPU TLE defines the position of the CPUs of identical qualities
+ * using a 64bits mask which first bit has its offset defined by
+ * the CPU address orgin field of the CPU TLE like in:
+ * CPU address = origin * 64 + bit position within the mask
+ *
+ */
+/* Container type Topology List Entry */
+typedef struct SYSIBContainerListEntry {
+        uint8_t nl;
+        uint8_t reserved[6];
+        uint8_t id;
+} SYSIBContainerListEntry;
+QEMU_BUILD_BUG_ON(sizeof(SYSIBContainerListEntry) != 8);
+
+/* CPU type Topology List Entry */
+typedef struct SysIBCPUListEntry {
+        uint8_t nl;
+        uint8_t reserved0[3];
+#define SYSIB_TLE_POLARITY_MASK 0x03
+#define SYSIB_TLE_DEDICATED     0x04
+        uint8_t flags;
+        uint8_t type;
+        uint16_t origin;
+        uint64_t mask;
+} SysIBCPUListEntry;
+QEMU_BUILD_BUG_ON(sizeof(SysIBCPUListEntry) != 16);
+
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra);
+
 /* MMU defines */
 #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
 #define ASCE_SUBSPACE         0x200       /* subspace group control           */
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 06b60ebce4..5971804680 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -28,10 +28,12 @@ 
  * s390_topology is used to keep the topology information.
  * .cores_per_socket: tracks information on the count of cores
  *                    per socket.
+ * .polarization: tracks machine polarization.
  */
 S390Topology s390_topology = {
     /* will be initialized after the CPU model is realized */
     .cores_per_socket = NULL,
+    .polarization = S390_CPU_POLARIZATION_HORIZONTAL,
 };
 
 /**
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 852fbd0df7..56b31b8aae 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -1911,9 +1911,12 @@  static int handle_stsi(S390CPU *cpu)
         if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
             return 0;
         }
-        /* Only sysib 3.2.2 needs post-handling for now. */
         insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
         return 0;
+    case 15:
+        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
+                           run->s390_stsi.ar, RA_IGNORED);
+        return 0;
     default:
         return 0;
     }
diff --git a/target/s390x/kvm/stsi-topology.c b/target/s390x/kvm/stsi-topology.c
new file mode 100644
index 0000000000..cb78040ea5
--- /dev/null
+++ b/target/s390x/kvm/stsi-topology.c
@@ -0,0 +1,296 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * QEMU S390x CPU Topology
+ *
+ * Copyright IBM Corp. 2022, 2023
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/cpu-topology.h"
+
+QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_LOW != 1);
+QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_MEDIUM != 2);
+QEMU_BUILD_BUG_ON(S390_CPU_ENTITLEMENT_HIGH != 3);
+
+/**
+ * fill_container:
+ * @p: The address of the container TLE to fill
+ * @level: The level of nesting for this container
+ * @id: The container receives a unique ID inside its own container
+ *
+ * Returns the next free TLE entry.
+ */
+static char *fill_container(char *p, int level, int id)
+{
+    SYSIBContainerListEntry *tle = (SYSIBContainerListEntry *)p;
+
+    tle->nl = level;
+    tle->id = id;
+    return p + sizeof(*tle);
+}
+
+/**
+ * fill_tle_cpu:
+ * @p: The address of the CPU TLE to fill
+ * @entry: a pointer to the S390TopologyEntry defining this
+ *         CPU container.
+ *
+ * Returns the next free TLE entry.
+ */
+static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
+{
+    SysIBCPUListEntry *tle = (SysIBCPUListEntry *)p;
+    s390_topology_id topology_id = entry->id;
+
+    tle->nl = 0;
+    tle->flags = 3 - topology_id.inv_polarization;
+    if (!topology_id.not_dedicated) {
+        tle->flags |= SYSIB_TLE_DEDICATED;
+    }
+    tle->type = topology_id.type;
+    tle->origin = cpu_to_be16(topology_id.origin * 64);
+    tle->mask = cpu_to_be64(entry->mask);
+    return p + sizeof(*tle);
+}
+
+/*
+ * Macro to check that the size of data after increment
+ * will not get bigger than the size of the SysIB.
+ */
+#define SYSIB_GUARD(data, x) do {       \
+        data += x;                      \
+        if (data > sizeof(SysIB)) {     \
+            return 0;                   \
+        }                               \
+    } while (0)
+
+/**
+ * stsi_topology_fill_sysib:
+ * @p: A pointer to the position of the first TLE
+ * @level: The nested level wanted by the guest
+ *
+ * Fill the SYSIB with the topology information as described in
+ * the PoP, nesting containers as appropriate, with the maximum
+ * nesting limited by @level.
+ *
+ * Return value:
+ * On success: the size of the SysIB_15x after being filled with TLE.
+ * On error: 0 in the case we would overrun the end of the SysIB.
+ */
+static int stsi_topology_fill_sysib(S390TopologyList *topology_list,
+                                    char *p, int level)
+{
+    S390TopologyEntry *entry;
+    int last_drawer = -1;
+    int last_book = -1;
+    int last_socket = -1;
+    int drawer_id = 0;
+    int book_id = 0;
+    int socket_id = 0;
+    int n = sizeof(SysIB_151x);
+
+    QTAILQ_FOREACH(entry, topology_list, next) {
+        bool drawer_change = last_drawer != entry->id.drawer;
+        bool book_change = drawer_change || last_book != entry->id.book;
+        bool socket_change = book_change || last_socket != entry->id.socket;
+
+        if (level > 3 && drawer_change) {
+            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
+            p = fill_container(p, 3, drawer_id++);
+            book_id = 0;
+        }
+        if (level > 2 && book_change) {
+            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
+            p = fill_container(p, 2, book_id++);
+            socket_id = 0;
+        }
+        if (socket_change) {
+            SYSIB_GUARD(n, sizeof(SYSIBContainerListEntry));
+            p = fill_container(p, 1, socket_id++);
+        }
+
+        SYSIB_GUARD(n, sizeof(SysIBCPUListEntry));
+        p = fill_tle_cpu(p, entry);
+        last_drawer = entry->id.drawer;
+        last_book = entry->id.book;
+        last_socket = entry->id.socket;
+    }
+
+    return n;
+}
+
+/**
+ * setup_stsi:
+ * sysib: pointer to a SysIB to be filled with SysIB_151x data
+ * level: Nested level specified by the guest
+ *
+ * Setup the SYSIB for STSI 15.1, the header as well as the description
+ * of the topology.
+ */
+static int setup_stsi(S390TopologyList *topology_list, SysIB_151x *sysib,
+                      int level)
+{
+    sysib->mnest = level;
+    switch (level) {
+    case 4:
+        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 3:
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
+                                         current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 2:
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
+                                         current_machine->smp.books *
+                                         current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    }
+
+    return stsi_topology_fill_sysib(topology_list, sysib->tle, level);
+}
+
+/**
+ * s390_topology_add_cpu_to_entry:
+ * @entry: Topology entry to setup
+ * @cpu: the S390CPU to add
+ *
+ * Set the core bit inside the topology mask.
+ */
+static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
+                                           S390CPU *cpu)
+{
+    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
+}
+
+/**
+ * s390_topology_from_cpu:
+ * @cpu: S390CPU to calculate the topology id
+ *
+ * Initialize the topology id from the CPU environment.
+ */
+static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
+{
+    s390_topology_id topology_id = {0};
+
+    topology_id.drawer = cpu->env.drawer_id;
+    topology_id.book = cpu->env.book_id;
+    topology_id.socket = cpu->env.socket_id;
+    topology_id.type = S390_TOPOLOGY_CPU_IFL;
+    topology_id.not_dedicated = !cpu->env.dedicated;
+
+    topology_id.inv_polarization = 3;
+    if (s390_topology.polarization == S390_CPU_POLARIZATION_VERTICAL) {
+        topology_id.inv_polarization -= cpu->env.entitlement;
+    }
+
+    topology_id.origin = cpu->env.core_id / 64;
+
+    return topology_id;
+}
+
+/**
+ * s390_topology_fill_list_sorted:
+ *
+ * Loop over all CPU and insert it at the right place
+ * inside the TLE entry list.
+ * Fill the S390Topology list with entries according to the order
+ * specified by the PoP.
+ */
+static void s390_topology_fill_list_sorted(S390TopologyList *topology_list)
+{
+    CPUState *cs;
+    S390TopologyEntry sentinel;
+
+    QTAILQ_INIT(topology_list);
+
+    sentinel.id.id = cpu_to_be64(UINT64_MAX);
+    QTAILQ_INSERT_HEAD(topology_list, &sentinel, next);
+
+    CPU_FOREACH(cs) {
+        s390_topology_id id = s390_topology_from_cpu(S390_CPU(cs));
+        S390TopologyEntry *entry, *tmp;
+
+        QTAILQ_FOREACH(tmp, topology_list, next) {
+            if (id.id == tmp->id.id) {
+                entry = tmp;
+                break;
+            } else if (be64_to_cpu(id.id) < be64_to_cpu(tmp->id.id)) {
+                entry = g_malloc0(sizeof(*entry));
+                entry->id.id = id.id;
+                QTAILQ_INSERT_BEFORE(tmp, entry, next);
+                break;
+            }
+        }
+        s390_topology_add_cpu_to_entry(entry, S390_CPU(cs));
+    }
+
+    QTAILQ_REMOVE(topology_list, &sentinel, next);
+}
+
+/**
+ * s390_topology_empty_list:
+ *
+ * Clear all entries in the S390Topology list.
+ */
+static void s390_topology_empty_list(S390TopologyList *topology_list)
+{
+    S390TopologyEntry *entry = NULL;
+    S390TopologyEntry *tmp = NULL;
+
+    QTAILQ_FOREACH_SAFE(entry, topology_list, next, tmp) {
+        QTAILQ_REMOVE(topology_list, entry, next);
+        g_free(entry);
+    }
+}
+
+/**
+ * insert_stsi_15_1_x:
+ * cpu: the CPU doing the call for which we set CC
+ * sel2: the selector 2, containing the nested level
+ * addr: Guest logical address of the guest SysIB
+ * ar: the access register number
+ *
+ * Emulate STSI 15.1.x, that is, perform all necessary checks and
+ * fill the SYSIB.
+ * In case the topology description is too long to fit into the SYSIB,
+ * set CC=3 and abort without writing the SYSIB.
+ */
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, uint64_t addr, uint8_t ar, uintptr_t ra)
+{
+    S390TopologyList topology_list;
+    SysIB sysib = {0};
+    int length;
+
+    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
+        setcc(cpu, 3);
+        return;
+    }
+
+    s390_topology_fill_list_sorted(&topology_list);
+
+    length = setup_stsi(&topology_list, &sysib.sysib_151x, sel2);
+
+    if (!length) {
+        s390_topology_empty_list(&topology_list);
+        setcc(cpu, 3);
+        return;
+    }
+
+    sysib.sysib_151x.length = cpu_to_be16(length);
+    if (!s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, length)) {
+        setcc(cpu, 0);
+    } else {
+        s390_cpu_virt_mem_handle_exc(cpu, ra);
+    }
+
+    s390_topology_empty_list(&topology_list);
+}
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
index d6aca590ae..588a9aa737 100644
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@@ -1,7 +1,8 @@ 
 
 s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
   'pv.c',
-  'kvm.c'
+  'kvm.c',
+  'stsi-topology.c'
 ), if_false: files(
   'stubs.c'
 ))