diff mbox series

[v15,03/11] target/s390x/cpu topology: handle STSI(15) and build the SYSIB

Message ID 20230201132051.126868-4-pmorel@linux.ibm.com
State New
Headers show
Series s390x: CPU Topology | expand

Commit Message

Pierre Morel Feb. 1, 2023, 1:20 p.m. UTC
On interception of STSI(15.1.x) the System Information Block
(SYSIB) is built from the list of pre-ordered topology entries.

Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
---
 include/hw/s390x/cpu-topology.h |  22 +++
 include/hw/s390x/sclp.h         |   1 +
 target/s390x/cpu.h              |  72 +++++++
 hw/s390x/cpu-topology.c         |  10 +
 target/s390x/kvm/cpu_topology.c | 335 ++++++++++++++++++++++++++++++++
 target/s390x/kvm/kvm.c          |   5 +-
 target/s390x/kvm/meson.build    |   3 +-
 7 files changed, 446 insertions(+), 2 deletions(-)
 create mode 100644 target/s390x/kvm/cpu_topology.c

Comments

Nina Schoetterl-Glausch Feb. 3, 2023, 5:36 p.m. UTC | #1
On Wed, 2023-02-01 at 14:20 +0100, Pierre Morel wrote:
> > On interception of STSI(15.1.x) the System Information Block
> > (SYSIB) is built from the list of pre-ordered topology entries.
> > 
> > Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> > ---
> >  include/hw/s390x/cpu-topology.h |  22 +++
> >  include/hw/s390x/sclp.h         |   1 +
> >  target/s390x/cpu.h              |  72 +++++++
> >  hw/s390x/cpu-topology.c         |  10 +
> >  target/s390x/kvm/cpu_topology.c | 335 ++++++++++++++++++++++++++++++++
> >  target/s390x/kvm/kvm.c          |   5 +-
> >  target/s390x/kvm/meson.build    |   3 +-
> >  7 files changed, 446 insertions(+), 2 deletions(-)
> >  create mode 100644 target/s390x/kvm/cpu_topology.c
> > 
[...]
> > 
> > diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> > index d654267a71..e1f6925856 100644
> > --- a/target/s390x/cpu.h
> > +++ b/target/s390x/cpu.h
[...]
> > +
> > +/* CPU type Topology List Entry */
> > +typedef struct SysIBTl_cpu {
> > +        uint8_t nl;
> > +        uint8_t reserved0[3];
> > +#define SYSIB_TLE_POLARITY_MASK 0x03
> > +#define SYSIB_TLE_DEDICATED     0x04
> > +        uint8_t entitlement;

I would just call this flags, since it's multiple fields.

> > +        uint8_t type;
> > +        uint16_t origin;
> > +        uint64_t mask;
> > +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
> > +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
> > 
> > +
> > 
[...]
> >  /**
> > diff --git a/target/s390x/kvm/cpu_topology.c b/target/s390x/kvm/cpu_topology.c
> > new file mode 100644
> > index 0000000000..aba141fb66
> > --- /dev/null
> > +++ b/target/s390x/kvm/cpu_topology.c
> > 
[...]
> > +
> > +/*
> > + * Macro to check that the size of data after increment
> > + * will not get bigger than the size of the SysIB.
> > + */
> > +#define SYSIB_GUARD(data, x) do {       \
> > +        data += x;                      \
> > +        if (data  > sizeof(SysIB)) {    \
> > +            return -ENOSPC;             \

I would go with ENOMEM here.

> > +        }                               \
> > +    } while (0)
> > +
> > +/**
> > + * stsi_set_tle:
> > + * @p: A pointer to the position of the first TLE
> > + * @level: The nested level wanted by the guest
> > + *
> > + * Loop inside the s390_topology.list until the sentinelle entry

s/sentinelle/sentinel/

> > + * is found and for each entry:
> > + *   - Check using SYSIB_GUARD() that the size of the SysIB is not
> > + *     reached.
> > + *   - Add all the container TLE needed for the level
> > + *   - Add the CPU TLE.

I'd focus more on *what* the function does instead of *how*.

Fill the SYSIB with the topology information as described in the PoP,
nesting containers as appropriate, with the maximum nesting limited by @level.

Or something similar.

> > + *
> > + * Return value:
> > + * s390_top_set_level returns the size of the SysIB_15x after being

You forgot to rename the function here, right?
How about stsi_fill_topology_sysib or stsi_topology_fill_sysib, instead?

> > + * filled with TLE on success.
> > + * It returns -ENOSPC in the case we would overrun the end of the SysIB.

You would have to change to ENOMEM here than also.

> > + */
> > +static int stsi_set_tle(char *p, int level)
> > +{
> > +    S390TopologyEntry *entry;
> > +    int last_drawer = -1;
> > +    int last_book = -1;
> > +    int last_socket = -1;
> > +    int drawer_id = 0;
> > +    int book_id = 0;
> > +    int socket_id = 0;
> > +    int n = sizeof(SysIB_151x);
> > +
> > +    QTAILQ_FOREACH(entry, &s390_topology.list, next) {
> > +        int current_drawer = entry->id.drawer;
> > +        int current_book = entry->id.book;
> > +        int current_socket = entry->id.socket;

This only saves two characters, so you could just use entry->id. ...

> > +        bool drawer_change = last_drawer != current_drawer;
> > +        bool book_change = drawer_change || last_book != current_book;
> > +        bool socket_change = book_change || last_socket != current_socket;

... but keep it if it would make this line too long.
You could also rename entry, to current or cur, if you want to emphasize that.

> > +
> > +        /* If we reach the guard get out */
> > +        if (entry->id.level5) {
> > +            break;
> > +        }
> > +
> > +        if (level > 3 && drawer_change) {
> > +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
> > +            p = fill_container(p, 3, drawer_id++);
> > +            book_id = 0;
> > +        }
> > +        if (level > 2 && book_change) {
> > +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
> > +            p = fill_container(p, 2, book_id++);
> > +            socket_id = 0;
> > +        }
> > +        if (socket_change) {
> > +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
> > +            p = fill_container(p, 1, socket_id++);
> > +        }
> > +
> > +        SYSIB_GUARD(n, sizeof(SysIBTl_cpu));
> > +        p = fill_tle_cpu(p, entry);
> > +        last_drawer = entry->id.drawer;
> > +        last_book = entry->id.book;
> > +        last_socket = entry->id.socket;
> > +    }
> > +
> > +    return n;
> > +}
> > +
> > +/**
> > + * setup_stsi:
> > + * sysib: pointer to a SysIB to be filled with SysIB_151x data
> > + * level: Nested level specified by the guest
> > + *
> > + * Setup the SysIB_151x header before calling stsi_set_tle with
> > + * a pointer to the first TLE entry.

Same thing here with regards to describing the what.

Setup the SYSIB for STSI 15.1, the header as well as the description
of the topology.

> > + */
> > +static int setup_stsi(SysIB_151x *sysib, int level)
> > +{
> > +    sysib->mnest = level;
> > +    switch (level) {
> > +    case 4:
> > +        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
> > +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
> > +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> > +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> > +        break;
> > +    case 3:
> > +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
> > +                                         current_machine->smp.books;
> > +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
> > +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> > +        break;
> > +    case 2:
> > +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
> > +                                         current_machine->smp.books *
> > +                                         current_machine->smp.sockets;
> > +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
> > +        break;
> > +    }
> > +
> > +    return stsi_set_tle(sysib->tle, level);
> > +}
> > +
> > +/**
> > + * s390_topology_add_cpu_to_entry:
> > + * @entry: Topology entry to setup
> > + * @cpu: the S390CPU to add
> > + *
> > + * Set the core bit inside the topology mask and
> > + * increments the number of cores for the socket.
> > + */
> > +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
> > +                                           S390CPU *cpu)
> > +{
> > +    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
> > +}
> > +
> > +/**
> > + * s390_topology_new_entry:
> > + * @id: s390_topology_id to add
> > + * @cpu: the S390CPU to add
> > + *
> > + * Allocate a new entry and initialize it.
> > + *
> > + * returns the newly allocated entry.
> > + */
> > +static S390TopologyEntry *s390_topology_new_entry(s390_topology_id id,
> > +                                                  S390CPU *cpu)

This is used only once, right?
I think I'd go ahead and inline it into s390_topology_insert, since I had
to go back and check if new_entry calls add_cpu when reading s390_topology_insert.

> > +{
> > +    S390TopologyEntry *entry;
> > +
> > +    entry = g_malloc0(sizeof(S390TopologyEntry));
> > +    entry->id.id = id.id;
> > +    s390_topology_add_cpu_to_entry(entry, cpu);
> > +
> > +    return entry;
> > +}
> > +
> > +/**
> > + * s390_topology_from_cpu:
> > + * @cpu: The S390CPU
> > + *
> > + * Initialize the topology id from the CPU environment.
> > + */
> > +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
> > +{
> > +    s390_topology_id topology_id = {0};
> > +
> > +    topology_id.drawer = cpu->env.drawer_id;
> > +    topology_id.book = cpu->env.book_id;
> > +    topology_id.socket = cpu->env.socket_id;
> > +    topology_id.origin = cpu->env.core_id / 64;
> > +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
> > +    topology_id.dedicated = cpu->env.dedicated;
> > +
> > +    if (s390_topology.polarity == POLARITY_VERTICAL) {
> > +        /*
> > +         * Vertical polarity with dedicated CPU implies
> > +         * vertical high entitlement.
> > +         */
> > +        if (topology_id.dedicated) {
> > +            topology_id.polarity |= POLARITY_VERTICAL_HIGH;
> > +        } else {
> > +            topology_id.polarity |= cpu->env.entitlement;
> > +        }
> > +    }
> > +
> > +    return topology_id;
> > +}
> > +
> > +/**
> > + * s390_topology_insert:
> > + * @cpu: s390CPU insert.
> > + *
> > + * Parse the topology list to find if the entry already
> > + * exist and add the core in it.
> > + * If it does not exist, allocate a new entry and insert
> > + * it in the queue from lower id to greater id.
> > + */
> > +static void s390_topology_insert(S390CPU *cpu)
> > +{
> > +    s390_topology_id id = s390_topology_from_cpu(cpu);
> > +    S390TopologyEntry *entry = NULL;
> > +    S390TopologyEntry *tmp = NULL;
> > +
> > +    QTAILQ_FOREACH(tmp, &s390_topology.list, next) {
> > +        if (id.id == tmp->id.id) {
> > +            s390_topology_add_cpu_to_entry(tmp, cpu);
> > +            return;
> > +        } else if (id.id < tmp->id.id) {
> > +            entry = s390_topology_new_entry(id, cpu);
> > +            QTAILQ_INSERT_BEFORE(tmp, entry, next);
> > +            return;
> > +        }
> > +    }
> > +}
> > +
> > +/**
> > + * s390_order_tle:
> > + *
> > + * Loop over all CPU and insert it at the right place
> > + * inside the TLE entry list.
> > + */

Suggestion:

s390_topology_fill_list_sorted

Fill the S390Topology list with entries according to the order specified
by the PoP.

> > +static void s390_order_tle(void)
> > +{
> > +    CPUState *cs;
> > +
> > +    CPU_FOREACH(cs) {
> > +        s390_topology_insert(S390_CPU(cs));
> > +    }
> > +}
> > +
> > +/**
> > + * s390_free_tle:
> > + *
> > + * Loop over all TLE entries and free them.
> > + * Keep the sentinelle which is the only one with level5 != 0

s/sentinelle/sentinel/

> > + */

Suggestion:
s390_topology_empty_list

Clear all entries in the S390Topology list except the sentinel.

> > +static void s390_free_tle(void)
> > +{
> > +    S390TopologyEntry *entry = NULL;
> > +    S390TopologyEntry *tmp = NULL;
> > +
> > +    QTAILQ_FOREACH_SAFE(entry, &s390_topology.list, next, tmp) {
> > +        if (!entry->id.level5) {
> > +            QTAILQ_REMOVE(&s390_topology.list, entry, next);
> > +            g_free(entry);
> > +        }
> > +    }
> > +}
> > +
> > +/**
> > + * insert_stsi_15_1_x:
> > + * cpu: the CPU doing the call for which we set CC
> > + * sel2: the selector 2, containing the nested level
> > + * addr: Guest logical address of the guest SysIB
> > + * ar: the access register number
> > + *
> > + * Reserve a zeroed SysIB, let setup_stsi to fill it and
> > + * copy the SysIB to the guest memory.
> > + *
> > + * In case of overflow set CC(3) and no copy is done.

Suggestion:

Emulate STSI 15.1.x, that is, perform all necessary checks and fill the SYSIB.
In case the topology description is too long to fit into the SYSIB,
set CC=3 and abort without writing the SYSIB.
 
> > + */
> > +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
> > +{
> > +    SysIB sysib = {0};
> > +    int len;
> > +
> > +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
> > +        setcc(cpu, 3);
> > +        return;
> > +    }
> > +
> > +    s390_order_tle();
> > +
> > +    len = setup_stsi(&sysib.sysib_151x, sel2);
> > +
> > +    if (len < 0) {

I stumbled a bit over this, maybe rename len to r.

> > +        setcc(cpu, 3);
> > +        return;
> > +    }
> > +
> > +    sysib.sysib_151x.length = cpu_to_be16(len);
> > +    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, len);
> > +    setcc(cpu, 0);
> > +
> > +    s390_free_tle();
> > +}
> > diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
> > index 3ac7ec9acf..5ea358cbb0 100644
> > --- a/target/s390x/kvm/kvm.c
> > +++ b/target/s390x/kvm/kvm.c
> > @@ -1919,9 +1919,12 @@ static int handle_stsi(S390CPU *cpu)
> >          if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
> >              return 0;
> >          }
> > -        /* Only sysib 3.2.2 needs post-handling for now. */
> >          insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
> >          return 0;
> > +    case 15:
> > +        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
> > +                           run->s390_stsi.ar);
> > +        return 0;
> >      default:
> >          return 0;
> >      }
> > diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
> > index aef52b6686..5daa5c6033 100644
> > --- a/target/s390x/kvm/meson.build
> > +++ b/target/s390x/kvm/meson.build
> > @@ -1,6 +1,7 @@
> >  
> >  s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
> > -  'kvm.c'
> > +  'kvm.c',
> > +  'cpu_topology.c'
> >  ), if_false: files(
> >    'stubs.c'
> >  ))
Pierre Morel Feb. 6, 2023, 10:06 a.m. UTC | #2
On 2/3/23 18:36, Nina Schoetterl-Glausch wrote:
> On Wed, 2023-02-01 at 14:20 +0100, Pierre Morel wrote:
>>> On interception of STSI(15.1.x) the System Information Block
>>> (SYSIB) is built from the list of pre-ordered topology entries.
>>>
>>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>>> ---
>>>   include/hw/s390x/cpu-topology.h |  22 +++
>>>   include/hw/s390x/sclp.h         |   1 +
>>>   target/s390x/cpu.h              |  72 +++++++
>>>   hw/s390x/cpu-topology.c         |  10 +
>>>   target/s390x/kvm/cpu_topology.c | 335 ++++++++++++++++++++++++++++++++
>>>   target/s390x/kvm/kvm.c          |   5 +-
>>>   target/s390x/kvm/meson.build    |   3 +-
>>>   7 files changed, 446 insertions(+), 2 deletions(-)
>>>   create mode 100644 target/s390x/kvm/cpu_topology.c
>>>
> [...]
>>>
>>> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
>>> index d654267a71..e1f6925856 100644
>>> --- a/target/s390x/cpu.h
>>> +++ b/target/s390x/cpu.h
> [...]
>>> +
>>> +/* CPU type Topology List Entry */
>>> +typedef struct SysIBTl_cpu {
>>> +        uint8_t nl;
>>> +        uint8_t reserved0[3];
>>> +#define SYSIB_TLE_POLARITY_MASK 0x03
>>> +#define SYSIB_TLE_DEDICATED     0x04
>>> +        uint8_t entitlement;
> 
> I would just call this flags, since it's multiple fields.

OK

> 
>>> +        uint8_t type;
>>> +        uint16_t origin;
>>> +        uint64_t mask;
>>> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
>>> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
>>>
>>> +
>>>
> [...]
>>>   /**
>>> diff --git a/target/s390x/kvm/cpu_topology.c b/target/s390x/kvm/cpu_topology.c
>>> new file mode 100644
>>> index 0000000000..aba141fb66
>>> --- /dev/null
>>> +++ b/target/s390x/kvm/cpu_topology.c
>>>
> [...]
>>> +
>>> +/*
>>> + * Macro to check that the size of data after increment
>>> + * will not get bigger than the size of the SysIB.
>>> + */
>>> +#define SYSIB_GUARD(data, x) do {       \
>>> +        data += x;                      \
>>> +        if (data  > sizeof(SysIB)) {    \
>>> +            return -ENOSPC;             \
> 
> I would go with ENOMEM here.

Considering your comment of the length in insert_stsi_15_1_x(), I think 
the best would be to return 0.
Because:
- We do not need to report any error reason.
- The value will be forwarded to insert_stsi_15_1_x() as the length of 
the SYSIB to write
- On error we do not write the SYSIB (len = 0)
- In normal case the return value is always non zero and positive.

> 
>>> +        }                               \
>>> +    } while (0)
>>> +
>>> +/**
>>> + * stsi_set_tle:
>>> + * @p: A pointer to the position of the first TLE
>>> + * @level: The nested level wanted by the guest
>>> + *
>>> + * Loop inside the s390_topology.list until the sentinelle entry
> 
> s/sentinelle/sentinel/

OK, thx,

> 
>>> + * is found and for each entry:
>>> + *   - Check using SYSIB_GUARD() that the size of the SysIB is not
>>> + *     reached.
>>> + *   - Add all the container TLE needed for the level
>>> + *   - Add the CPU TLE.
> 
> I'd focus more on *what* the function does instead of *how*.

Right.

> 
> Fill the SYSIB with the topology information as described in the PoP,
> nesting containers as appropriate, with the maximum nesting limited by @level.
> 
> Or something similar.

Thanks, looks good to me .

> 
>>> + *
>>> + * Return value:
>>> + * s390_top_set_level returns the size of the SysIB_15x after being
> 
> You forgot to rename the function here, right?
> How about stsi_fill_topology_sysib or stsi_topology_fill_sysib, instead?


OK with stsi_topology_fill_sysib()

> 
>>> + * filled with TLE on success.
>>> + * It returns -ENOSPC in the case we would overrun the end of the SysIB.
> 
> You would have to change to ENOMEM here than also.

As discussed above, it seems to me that return 0 is even better.

> 
>>> + */
>>> +static int stsi_set_tle(char *p, int level)
>>> +{
>>> +    S390TopologyEntry *entry;
>>> +    int last_drawer = -1;
>>> +    int last_book = -1;
>>> +    int last_socket = -1;
>>> +    int drawer_id = 0;
>>> +    int book_id = 0;
>>> +    int socket_id = 0;
>>> +    int n = sizeof(SysIB_151x);
>>> +
>>> +    QTAILQ_FOREACH(entry, &s390_topology.list, next) {
>>> +        int current_drawer = entry->id.drawer;
>>> +        int current_book = entry->id.book;
>>> +        int current_socket = entry->id.socket;
> 
> This only saves two characters, so you could just use entry->id. ...

OK

> 
>>> +        bool drawer_change = last_drawer != current_drawer;
>>> +        bool book_change = drawer_change || last_book != current_book;
>>> +        bool socket_change = book_change || last_socket != current_socket;
> 
> ... but keep it if it would make this line too long.

it is OK

> You could also rename entry, to current or cur, if you want to emphasize that.
> 
>>> +
>>> +        /* If we reach the guard get out */
>>> +        if (entry->id.level5) {
>>> +            break;
>>> +        }
>>> +
>>> +        if (level > 3 && drawer_change) {
>>> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
>>> +            p = fill_container(p, 3, drawer_id++);
>>> +            book_id = 0;
>>> +        }
>>> +        if (level > 2 && book_change) {
>>> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
>>> +            p = fill_container(p, 2, book_id++);
>>> +            socket_id = 0;
>>> +        }
>>> +        if (socket_change) {
>>> +            SYSIB_GUARD(n, sizeof(SysIBTl_container));
>>> +            p = fill_container(p, 1, socket_id++);
>>> +        }
>>> +
>>> +        SYSIB_GUARD(n, sizeof(SysIBTl_cpu));
>>> +        p = fill_tle_cpu(p, entry);
>>> +        last_drawer = entry->id.drawer;
>>> +        last_book = entry->id.book;
>>> +        last_socket = entry->id.socket;
>>> +    }
>>> +
>>> +    return n;
>>> +}
>>> +
>>> +/**
>>> + * setup_stsi:
>>> + * sysib: pointer to a SysIB to be filled with SysIB_151x data
>>> + * level: Nested level specified by the guest
>>> + *
>>> + * Setup the SysIB_151x header before calling stsi_set_tle with
>>> + * a pointer to the first TLE entry.
> 
> Same thing here with regards to describing the what.
> 
> Setup the SYSIB for STSI 15.1, the header as well as the description
> of the topology.


OK, thx

> 
>>> + */
>>> +static int setup_stsi(SysIB_151x *sysib, int level)
>>> +{
>>> +    sysib->mnest = level;
>>> +    switch (level) {
>>> +    case 4:
>>> +        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
>>> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
>>> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
>>> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
>>> +        break;
>>> +    case 3:
>>> +        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
>>> +                                         current_machine->smp.books;
>>> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
>>> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
>>> +        break;
>>> +    case 2:
>>> +        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
>>> +                                         current_machine->smp.books *
>>> +                                         current_machine->smp.sockets;
>>> +        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
>>> +        break;
>>> +    }
>>> +
>>> +    return stsi_set_tle(sysib->tle, level);
>>> +}
>>> +
>>> +/**
>>> + * s390_topology_add_cpu_to_entry:
>>> + * @entry: Topology entry to setup
>>> + * @cpu: the S390CPU to add
>>> + *
>>> + * Set the core bit inside the topology mask and
>>> + * increments the number of cores for the socket.
>>> + */
>>> +static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
>>> +                                           S390CPU *cpu)
>>> +{
>>> +    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
>>> +}
>>> +
>>> +/**
>>> + * s390_topology_new_entry:
>>> + * @id: s390_topology_id to add
>>> + * @cpu: the S390CPU to add
>>> + *
>>> + * Allocate a new entry and initialize it.
>>> + *
>>> + * returns the newly allocated entry.
>>> + */
>>> +static S390TopologyEntry *s390_topology_new_entry(s390_topology_id id,
>>> +                                                  S390CPU *cpu)
> 
> This is used only once, right?
> I think I'd go ahead and inline it into s390_topology_insert, since I had
> to go back and check if new_entry calls add_cpu when reading s390_topology_insert.

OK

> 
>>> +{
>>> +    S390TopologyEntry *entry;
>>> +
>>> +    entry = g_malloc0(sizeof(S390TopologyEntry));
>>> +    entry->id.id = id.id;
>>> +    s390_topology_add_cpu_to_entry(entry, cpu);
>>> +
>>> +    return entry;
>>> +}
>>> +
>>> +/**
>>> + * s390_topology_from_cpu:
>>> + * @cpu: The S390CPU
>>> + *
>>> + * Initialize the topology id from the CPU environment.
>>> + */
>>> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
>>> +{
>>> +    s390_topology_id topology_id = {0};
>>> +
>>> +    topology_id.drawer = cpu->env.drawer_id;
>>> +    topology_id.book = cpu->env.book_id;
>>> +    topology_id.socket = cpu->env.socket_id;
>>> +    topology_id.origin = cpu->env.core_id / 64;
>>> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
>>> +    topology_id.dedicated = cpu->env.dedicated;
>>> +
>>> +    if (s390_topology.polarity == POLARITY_VERTICAL) {
>>> +        /*
>>> +         * Vertical polarity with dedicated CPU implies
>>> +         * vertical high entitlement.
>>> +         */
>>> +        if (topology_id.dedicated) {
>>> +            topology_id.polarity |= POLARITY_VERTICAL_HIGH;
>>> +        } else {
>>> +            topology_id.polarity |= cpu->env.entitlement;
>>> +        }
>>> +    }
>>> +
>>> +    return topology_id;
>>> +}
>>> +
>>> +/**
>>> + * s390_topology_insert:
>>> + * @cpu: s390CPU insert.
>>> + *
>>> + * Parse the topology list to find if the entry already
>>> + * exist and add the core in it.
>>> + * If it does not exist, allocate a new entry and insert
>>> + * it in the queue from lower id to greater id.
>>> + */
>>> +static void s390_topology_insert(S390CPU *cpu)
>>> +{
>>> +    s390_topology_id id = s390_topology_from_cpu(cpu);
>>> +    S390TopologyEntry *entry = NULL;
>>> +    S390TopologyEntry *tmp = NULL;
>>> +
>>> +    QTAILQ_FOREACH(tmp, &s390_topology.list, next) {
>>> +        if (id.id == tmp->id.id) {
>>> +            s390_topology_add_cpu_to_entry(tmp, cpu);
>>> +            return;
>>> +        } else if (id.id < tmp->id.id) {
>>> +            entry = s390_topology_new_entry(id, cpu);
>>> +            QTAILQ_INSERT_BEFORE(tmp, entry, next);
>>> +            return;
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +/**
>>> + * s390_order_tle:
>>> + *
>>> + * Loop over all CPU and insert it at the right place
>>> + * inside the TLE entry list.
>>> + */
> 
> Suggestion:
> 
> s390_topology_fill_list_sorted
> 
> Fill the S390Topology list with entries according to the order specified
> by the PoP.

OK

> 
>>> +static void s390_order_tle(void)
>>> +{
>>> +    CPUState *cs;
>>> +
>>> +    CPU_FOREACH(cs) {
>>> +        s390_topology_insert(S390_CPU(cs));
>>> +    }
>>> +}
>>> +
>>> +/**
>>> + * s390_free_tle:
>>> + *
>>> + * Loop over all TLE entries and free them.
>>> + * Keep the sentinelle which is the only one with level5 != 0
> 
> s/sentinelle/sentinel/

yes, thx

> 
>>> + */
> 
> Suggestion:
> s390_topology_empty_list
> 
> Clear all entries in the S390Topology list except the sentinel.
> 
>>> +static void s390_free_tle(void)
>>> +{
>>> +    S390TopologyEntry *entry = NULL;
>>> +    S390TopologyEntry *tmp = NULL;
>>> +
>>> +    QTAILQ_FOREACH_SAFE(entry, &s390_topology.list, next, tmp) {
>>> +        if (!entry->id.level5) {
>>> +            QTAILQ_REMOVE(&s390_topology.list, entry, next);
>>> +            g_free(entry);
>>> +        }
>>> +    }
>>> +}
>>> +
>>> +/**
>>> + * insert_stsi_15_1_x:
>>> + * cpu: the CPU doing the call for which we set CC
>>> + * sel2: the selector 2, containing the nested level
>>> + * addr: Guest logical address of the guest SysIB
>>> + * ar: the access register number
>>> + *
>>> + * Reserve a zeroed SysIB, let setup_stsi to fill it and
>>> + * copy the SysIB to the guest memory.
>>> + *
>>> + * In case of overflow set CC(3) and no copy is done.
> 
> Suggestion:
> 
> Emulate STSI 15.1.x, that is, perform all necessary checks and fill the SYSIB.
> In case the topology description is too long to fit into the SYSIB,
> set CC=3 and abort without writing the SYSIB.

OK, better thanks.

>   
>>> + */
>>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
>>> +{
>>> +    SysIB sysib = {0};
>>> +    int len;
>>> +
>>> +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
>>> +        setcc(cpu, 3);
>>> +        return;
>>> +    }
>>> +
>>> +    s390_order_tle();
>>> +
>>> +    len = setup_stsi(&sysib.sysib_151x, sel2);
>>> +
>>> +    if (len < 0) {
> 
> I stumbled a bit over this, maybe rename len to r.

Why ? it is the length used to fill the length field of the SYSIB.

May be it would be clearer if we give back the length to write and 0 on 
error then we have here:

	if (!len) {
		setcc(cpu, 3);
		return;
	}

> 
>>> +        setcc(cpu, 3);
>>> +        return;
>>> +    }
>>> +
>>> +    sysib.sysib_151x.length = cpu_to_be16(len);
>>> +    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, len);
>>> +    setcc(cpu, 0);
>>> +
>>> +    s390_free_tle();
>>> +}

Thanks for the comments.
If there are only comments and cosmetic changes will I get your RB if I 
change them according to your wishes?

Regards,
Pierre
Nina Schoetterl-Glausch Feb. 6, 2023, 10:32 a.m. UTC | #3
On Mon, 2023-02-06 at 11:06 +0100, Pierre Morel wrote:
> 
> On 2/3/23 18:36, Nina Schoetterl-Glausch wrote:
> > On Wed, 2023-02-01 at 14:20 +0100, Pierre Morel wrote:
> > > > On interception of STSI(15.1.x) the System Information Block
> > > > (SYSIB) is built from the list of pre-ordered topology entries.
> > > > 
> > > > Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> > > > ---
> > > >   include/hw/s390x/cpu-topology.h |  22 +++
> > > >   include/hw/s390x/sclp.h         |   1 +
> > > >   target/s390x/cpu.h              |  72 +++++++
> > > >   hw/s390x/cpu-topology.c         |  10 +
> > > >   target/s390x/kvm/cpu_topology.c | 335 ++++++++++++++++++++++++++++++++
> > > >   target/s390x/kvm/kvm.c          |   5 +-
> > > >   target/s390x/kvm/meson.build    |   3 +-
> > > >   7 files changed, 446 insertions(+), 2 deletions(-)
> > > >   create mode 100644 target/s390x/kvm/cpu_topology.c
> > > > 
[...]
> > 
> > > > + */
> > > > +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
> > > > +{
> > > > +    SysIB sysib = {0};
> > > > +    int len;
> > > > +
> > > > +    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
> > > > +        setcc(cpu, 3);
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    s390_order_tle();
> > > > +
> > > > +    len = setup_stsi(&sysib.sysib_151x, sel2);
> > > > +
> > > > +    if (len < 0) {
> > 
> > I stumbled a bit over this, maybe rename len to r.
> 
> Why ? it is the length used to fill the length field of the SYSIB.

Well a negative length doesn't make sense, that's what confused me for a bit.
It's the error value of course, I proposed renaming it to the more generic r return value,
to signify that the return value isn't exactly the length.

You're solution of using 0 is fine with me, too.
> 
> May be it would be clearer if we give back the length to write and 0 on 
> error then we have here:
> 
> 	if (!len) {
> 		setcc(cpu, 3);
> 		return;
> 	}
> 
> > 
> > > > +        setcc(cpu, 3);
> > > > +        return;
> > > > +    }
> > > > +
> > > > +    sysib.sysib_151x.length = cpu_to_be16(len);
> > > > +    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, len);
> > > > +    setcc(cpu, 0);
> > > > +
> > > > +    s390_free_tle();
> > > > +}
> 
> Thanks for the comments.
> If there are only comments and cosmetic changes will I get your RB if I 
> change them according to your wishes?

Usually I review the whole series and then comment, this time sent feedback early,
so the review isn't as deep. Probably easiest to give you the R-b for v16.
My impression is that the series is close to final.
> 
> Regards,
> Pierre
>
Thomas Huth Feb. 6, 2023, 11:24 a.m. UTC | #4
On 01/02/2023 14.20, Pierre Morel wrote:
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
...
> diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
> index d3ade40a5a..712fd68123 100644
> --- a/include/hw/s390x/sclp.h
> +++ b/include/hw/s390x/sclp.h
> @@ -112,6 +112,7 @@ typedef struct CPUEntry {
>   } QEMU_PACKED CPUEntry;
>   
>   #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
> +#define SCLP_READ_SCP_INFO_MNEST                2
>   typedef struct ReadInfo {
>       SCCBHeader h;
>       uint16_t rnmax;
> diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
> index d654267a71..e1f6925856 100644
> --- a/target/s390x/cpu.h
> +++ b/target/s390x/cpu.h
> @@ -560,6 +560,25 @@ typedef struct SysIB_322 {
>   } SysIB_322;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
>   
> +#define S390_TOPOLOGY_MAG  6
> +#define S390_TOPOLOGY_MAG6 0
> +#define S390_TOPOLOGY_MAG5 1
> +#define S390_TOPOLOGY_MAG4 2
> +#define S390_TOPOLOGY_MAG3 3
> +#define S390_TOPOLOGY_MAG2 4
> +#define S390_TOPOLOGY_MAG1 5
> +/* Configuration topology */
> +typedef struct SysIB_151x {
> +    uint8_t  reserved0[2];
> +    uint16_t length;
> +    uint8_t  mag[S390_TOPOLOGY_MAG];
> +    uint8_t  reserved1;
> +    uint8_t  mnest;
> +    uint32_t reserved2;
> +    char tle[];
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
> +QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
> +
>   typedef union SysIB {
>       SysIB_111 sysib_111;
>       SysIB_121 sysib_121;
> @@ -567,9 +586,62 @@ typedef union SysIB {
>       SysIB_221 sysib_221;
>       SysIB_222 sysib_222;
>       SysIB_322 sysib_322;
> +    SysIB_151x sysib_151x;
>   } SysIB;
>   QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
>   
> +/*
> + * CPU Topology List provided by STSI with fc=15 provides a list
> + * of two different Topology List Entries (TLE) types to specify
> + * the topology hierarchy.
> + *
> + * - Container Topology List Entry
> + *   Defines a container to contain other Topology List Entries
> + *   of any type, nested containers or CPU.
> + * - CPU Topology List Entry
> + *   Specifies the CPUs position, type, entitlement and polarization
> + *   of the CPUs contained in the last Container TLE.
> + *
> + * There can be theoretically up to five levels of containers, QEMU
> + * uses only three levels, the drawer's, book's and socket's level.
> + *
> + * A container of with a nesting level (NL) greater than 1 can only
> + * contain another container of nesting level NL-1.
> + *
> + * A container of nesting level 1 (socket), contains as many CPU TLE
> + * as needed to describe the position and qualities of all CPUs inside
> + * the container.
> + * The qualities of a CPU are polarization, entitlement and type.
> + *
> + * The CPU TLE defines the position of the CPUs of identical qualities
> + * using a 64bits mask which first bit has its offset defined by
> + * the CPU address orgin field of the CPU TLE like in:
> + * CPU address = origin * 64 + bit position within the mask
> + *
> + */
> +/* Container type Topology List Entry */
> +typedef struct SysIBTl_container {
> +        uint8_t nl;
> +        uint8_t reserved[6];
> +        uint8_t id;
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
> +
> +/* CPU type Topology List Entry */
> +typedef struct SysIBTl_cpu {
> +        uint8_t nl;
> +        uint8_t reserved0[3];
> +#define SYSIB_TLE_POLARITY_MASK 0x03
> +#define SYSIB_TLE_DEDICATED     0x04
> +        uint8_t entitlement;
> +        uint8_t type;
> +        uint16_t origin;
> +        uint64_t mask;
> +} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
> +QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
> +
> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);

__u64 is not a portable type (e.g. fails when doing "make 
vm-build-openbsd"), please replace with uint64_t.

  Thanks,
   Thomas
Pierre Morel Feb. 6, 2023, 12:57 p.m. UTC | #5
On 2/6/23 12:24, Thomas Huth wrote:
> On 01/02/2023 14.20, Pierre Morel wrote:
>> On interception of STSI(15.1.x) the System Information Block
>> (SYSIB) is built from the list of pre-ordered topology entries.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
> ...

...

>> --- a/target/s390x/cpu.h
>> +++ b/target/s390x/cpu.h

...

>> +
>> +void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
> 
> __u64 is not a portable type (e.g. fails when doing "make 
> vm-build-openbsd"), please replace with uint64_t.

done.
Thanks


Regards,
Pierre
Nina Schoetterl-Glausch Feb. 9, 2023, 4:39 p.m. UTC | #6
On Wed, 2023-02-01 at 14:20 +0100, Pierre Morel wrote:
> On interception of STSI(15.1.x) the System Information Block
> (SYSIB) is built from the list of pre-ordered topology entries.
> 
> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
> ---
>  include/hw/s390x/cpu-topology.h |  22 +++
>  include/hw/s390x/sclp.h         |   1 +
>  target/s390x/cpu.h              |  72 +++++++
>  hw/s390x/cpu-topology.c         |  10 +
>  target/s390x/kvm/cpu_topology.c | 335 ++++++++++++++++++++++++++++++++
>  target/s390x/kvm/kvm.c          |   5 +-
>  target/s390x/kvm/meson.build    |   3 +-
>  7 files changed, 446 insertions(+), 2 deletions(-)
>  create mode 100644 target/s390x/kvm/cpu_topology.c
> 
[...]
> +
> +/**
> + * s390_topology_from_cpu:
> + * @cpu: The S390CPU
> + *
> + * Initialize the topology id from the CPU environment.
> + */
> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
> +{
> +    s390_topology_id topology_id = {0};
> +
> +    topology_id.drawer = cpu->env.drawer_id;
> +    topology_id.book = cpu->env.book_id;
> +    topology_id.socket = cpu->env.socket_id;
> +    topology_id.origin = cpu->env.core_id / 64;
> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
> +    topology_id.dedicated = cpu->env.dedicated;
> +
> +    if (s390_topology.polarity == POLARITY_VERTICAL) {
> +        /*
> +         * Vertical polarity with dedicated CPU implies
> +         * vertical high entitlement.
> +         */
> +        if (topology_id.dedicated) {
> +            topology_id.polarity |= POLARITY_VERTICAL_HIGH;
> +        } else {
> +            topology_id.polarity |= cpu->env.entitlement;
> +        }

Why |= instead of an assignment?
Anyway, I think you can get rid of this in the next version.
If you define the entitlement via qapi you can just put a little switch
here and convert it to the hardware definition of polarization.
(Or you just do +1, but I think the switch is easier to understand)

> +    }
> +
> +    return topology_id;
> +}
> +
[...]
Pierre Morel Feb. 10, 2023, 2:16 p.m. UTC | #7
On 2/9/23 17:39, Nina Schoetterl-Glausch wrote:
> On Wed, 2023-02-01 at 14:20 +0100, Pierre Morel wrote:
>> On interception of STSI(15.1.x) the System Information Block
>> (SYSIB) is built from the list of pre-ordered topology entries.
>>
>> Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
>> ---
>>   include/hw/s390x/cpu-topology.h |  22 +++
>>   include/hw/s390x/sclp.h         |   1 +
>>   target/s390x/cpu.h              |  72 +++++++
>>   hw/s390x/cpu-topology.c         |  10 +
>>   target/s390x/kvm/cpu_topology.c | 335 ++++++++++++++++++++++++++++++++
>>   target/s390x/kvm/kvm.c          |   5 +-
>>   target/s390x/kvm/meson.build    |   3 +-
>>   7 files changed, 446 insertions(+), 2 deletions(-)
>>   create mode 100644 target/s390x/kvm/cpu_topology.c
>>
> [...]
>> +
>> +/**
>> + * s390_topology_from_cpu:
>> + * @cpu: The S390CPU
>> + *
>> + * Initialize the topology id from the CPU environment.
>> + */
>> +static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
>> +{
>> +    s390_topology_id topology_id = {0};
>> +
>> +    topology_id.drawer = cpu->env.drawer_id;
>> +    topology_id.book = cpu->env.book_id;
>> +    topology_id.socket = cpu->env.socket_id;
>> +    topology_id.origin = cpu->env.core_id / 64;
>> +    topology_id.type = S390_TOPOLOGY_CPU_IFL;
>> +    topology_id.dedicated = cpu->env.dedicated;
>> +
>> +    if (s390_topology.polarity == POLARITY_VERTICAL) {
>> +        /*
>> +         * Vertical polarity with dedicated CPU implies
>> +         * vertical high entitlement.
>> +         */
>> +        if (topology_id.dedicated) {
>> +            topology_id.polarity |= POLARITY_VERTICAL_HIGH;
>> +        } else {
>> +            topology_id.polarity |= cpu->env.entitlement;
>> +        }
> 
> Why |= instead of an assignment?
> Anyway, I think you can get rid of this in the next version.
> If you define the entitlement via qapi you can just put a little switch
> here and convert it to the hardware definition of polarization.
> (Or you just do +1, but I think the switch is easier to understand)

Oh! right thanks, it is a leftover from when dedication and polarity 
were in the same variable.

I change it with the QAPI enum.

Regards,
Pierre
diff mbox series

Patch

diff --git a/include/hw/s390x/cpu-topology.h b/include/hw/s390x/cpu-topology.h
index 9b6f889ad4..1ae7e7c5e3 100644
--- a/include/hw/s390x/cpu-topology.h
+++ b/include/hw/s390x/cpu-topology.h
@@ -24,9 +24,31 @@  enum s390_topology_polarity {
     POLARITY_MAX,
 };
 
+typedef union s390_topology_id {
+    uint64_t id;
+    struct {
+        uint8_t level5;
+        uint8_t drawer;
+        uint8_t book;
+        uint8_t socket;
+        uint8_t dedicated;
+        uint8_t polarity;
+        uint8_t type;
+        uint8_t origin;
+    };
+} s390_topology_id;
+
+typedef struct S390TopologyEntry {
+    QTAILQ_ENTRY(S390TopologyEntry) next;
+    s390_topology_id id;
+    uint64_t mask;
+} S390TopologyEntry;
+
 typedef struct S390Topology {
     uint8_t *cores_per_socket;
+    QTAILQ_HEAD(, S390TopologyEntry) list;
     CpuTopology *smp;
+    uint8_t polarity;
 } S390Topology;
 
 #ifdef CONFIG_KVM
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index d3ade40a5a..712fd68123 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -112,6 +112,7 @@  typedef struct CPUEntry {
 } QEMU_PACKED CPUEntry;
 
 #define SCLP_READ_SCP_INFO_FIXED_CPU_OFFSET     128
+#define SCLP_READ_SCP_INFO_MNEST                2
 typedef struct ReadInfo {
     SCCBHeader h;
     uint16_t rnmax;
diff --git a/target/s390x/cpu.h b/target/s390x/cpu.h
index d654267a71..e1f6925856 100644
--- a/target/s390x/cpu.h
+++ b/target/s390x/cpu.h
@@ -560,6 +560,25 @@  typedef struct SysIB_322 {
 } SysIB_322;
 QEMU_BUILD_BUG_ON(sizeof(SysIB_322) != 4096);
 
+#define S390_TOPOLOGY_MAG  6
+#define S390_TOPOLOGY_MAG6 0
+#define S390_TOPOLOGY_MAG5 1
+#define S390_TOPOLOGY_MAG4 2
+#define S390_TOPOLOGY_MAG3 3
+#define S390_TOPOLOGY_MAG2 4
+#define S390_TOPOLOGY_MAG1 5
+/* Configuration topology */
+typedef struct SysIB_151x {
+    uint8_t  reserved0[2];
+    uint16_t length;
+    uint8_t  mag[S390_TOPOLOGY_MAG];
+    uint8_t  reserved1;
+    uint8_t  mnest;
+    uint32_t reserved2;
+    char tle[];
+} QEMU_PACKED QEMU_ALIGNED(8) SysIB_151x;
+QEMU_BUILD_BUG_ON(sizeof(SysIB_151x) != 16);
+
 typedef union SysIB {
     SysIB_111 sysib_111;
     SysIB_121 sysib_121;
@@ -567,9 +586,62 @@  typedef union SysIB {
     SysIB_221 sysib_221;
     SysIB_222 sysib_222;
     SysIB_322 sysib_322;
+    SysIB_151x sysib_151x;
 } SysIB;
 QEMU_BUILD_BUG_ON(sizeof(SysIB) != 4096);
 
+/*
+ * CPU Topology List provided by STSI with fc=15 provides a list
+ * of two different Topology List Entries (TLE) types to specify
+ * the topology hierarchy.
+ *
+ * - Container Topology List Entry
+ *   Defines a container to contain other Topology List Entries
+ *   of any type, nested containers or CPU.
+ * - CPU Topology List Entry
+ *   Specifies the CPUs position, type, entitlement and polarization
+ *   of the CPUs contained in the last Container TLE.
+ *
+ * There can be theoretically up to five levels of containers, QEMU
+ * uses only three levels, the drawer's, book's and socket's level.
+ *
+ * A container of with a nesting level (NL) greater than 1 can only
+ * contain another container of nesting level NL-1.
+ *
+ * A container of nesting level 1 (socket), contains as many CPU TLE
+ * as needed to describe the position and qualities of all CPUs inside
+ * the container.
+ * The qualities of a CPU are polarization, entitlement and type.
+ *
+ * The CPU TLE defines the position of the CPUs of identical qualities
+ * using a 64bits mask which first bit has its offset defined by
+ * the CPU address orgin field of the CPU TLE like in:
+ * CPU address = origin * 64 + bit position within the mask
+ *
+ */
+/* Container type Topology List Entry */
+typedef struct SysIBTl_container {
+        uint8_t nl;
+        uint8_t reserved[6];
+        uint8_t id;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_container;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_container) != 8);
+
+/* CPU type Topology List Entry */
+typedef struct SysIBTl_cpu {
+        uint8_t nl;
+        uint8_t reserved0[3];
+#define SYSIB_TLE_POLARITY_MASK 0x03
+#define SYSIB_TLE_DEDICATED     0x04
+        uint8_t entitlement;
+        uint8_t type;
+        uint16_t origin;
+        uint64_t mask;
+} QEMU_PACKED QEMU_ALIGNED(8) SysIBTl_cpu;
+QEMU_BUILD_BUG_ON(sizeof(SysIBTl_cpu) != 16);
+
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar);
+
 /* MMU defines */
 #define ASCE_ORIGIN           (~0xfffULL) /* segment table origin             */
 #define ASCE_SUBSPACE         0x200       /* subspace group control           */
diff --git a/hw/s390x/cpu-topology.c b/hw/s390x/cpu-topology.c
index 12df4eca6c..a80a1ebf22 100644
--- a/hw/s390x/cpu-topology.c
+++ b/hw/s390x/cpu-topology.c
@@ -31,6 +31,8 @@  S390Topology s390_topology = {
     /* will be initialized after the cpu model is realized */
     .cores_per_socket = NULL,
     .smp = NULL,
+    .list = QTAILQ_HEAD_INITIALIZER(s390_topology.list),
+    .polarity = POLARITY_HORIZONTAL,
 };
 
 /**
@@ -65,14 +67,22 @@  bool s390_has_topology(void)
  * Allocate an array to keep the count of cores per socket.
  * The index of the array starts at socket 0 from book 0 and
  * drawer 0 up to the maximum allowed by the machine topology.
+ *
+ * Insert a sentinel entry using unused level5 with its maximum value.
+ * This entry will never be free.
  */
 static void s390_topology_init(MachineState *ms)
 {
     CpuTopology *smp = &ms->smp;
+    S390TopologyEntry *entry;
 
     s390_topology.smp = smp;
     s390_topology.cores_per_socket = g_new0(uint8_t, smp->sockets *
                                             smp->books * smp->drawers);
+
+    entry = g_malloc0(sizeof(S390TopologyEntry));
+    entry->id.level5 = 0xff;
+    QTAILQ_INSERT_HEAD(&s390_topology.list, entry, next);
 }
 
 /**
diff --git a/target/s390x/kvm/cpu_topology.c b/target/s390x/kvm/cpu_topology.c
new file mode 100644
index 0000000000..aba141fb66
--- /dev/null
+++ b/target/s390x/kvm/cpu_topology.c
@@ -0,0 +1,335 @@ 
+/*
+ * QEMU S390x CPU Topology
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Pierre Morel <pmorel@linux.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or (at
+ * your option) any later version. See the COPYING file in the top-level
+ * directory.
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "hw/s390x/pv.h"
+#include "hw/sysbus.h"
+#include "hw/s390x/sclp.h"
+#include "hw/s390x/cpu-topology.h"
+
+/**
+ * fill_container:
+ * @p: The address of the container TLE to fill
+ * @level: The level of nesting for this container
+ * @id: The container receives a uniq ID inside its own container
+ *
+ * Returns the next free TLE entry.
+ */
+static char *fill_container(char *p, int level, int id)
+{
+    SysIBTl_container *tle = (SysIBTl_container *)p;
+
+    tle->nl = level;
+    tle->id = id;
+    return p + sizeof(*tle);
+}
+
+/**
+ * fill_tle_cpu:
+ * @p: The address of the CPU TLE to fill
+ * @entry: a pointer to the S390TopologyEntry defining this
+ *         CPU container.
+ *
+ * Returns the next free TLE entry.
+ */
+static char *fill_tle_cpu(char *p, S390TopologyEntry *entry)
+{
+    SysIBTl_cpu *tle = (SysIBTl_cpu *)p;
+    s390_topology_id topology_id = entry->id;
+
+    tle->nl = 0;
+    if (topology_id.dedicated) {
+        tle->entitlement = SYSIB_TLE_DEDICATED;
+    }
+    tle->entitlement |= topology_id.polarity;
+    tle->type = topology_id.type;
+    tle->origin = cpu_to_be16(topology_id.origin * 64);
+    tle->mask = cpu_to_be64(entry->mask);
+    return p + sizeof(*tle);
+}
+
+/*
+ * Macro to check that the size of data after increment
+ * will not get bigger than the size of the SysIB.
+ */
+#define SYSIB_GUARD(data, x) do {       \
+        data += x;                      \
+        if (data  > sizeof(SysIB)) {    \
+            return -ENOSPC;             \
+        }                               \
+    } while (0)
+
+/**
+ * stsi_set_tle:
+ * @p: A pointer to the position of the first TLE
+ * @level: The nested level wanted by the guest
+ *
+ * Loop inside the s390_topology.list until the sentinelle entry
+ * is found and for each entry:
+ *   - Check using SYSIB_GUARD() that the size of the SysIB is not
+ *     reached.
+ *   - Add all the container TLE needed for the level
+ *   - Add the CPU TLE.
+ *
+ * Return value:
+ * s390_top_set_level returns the size of the SysIB_15x after being
+ * filled with TLE on success.
+ * It returns -ENOSPC in the case we would overrun the end of the SysIB.
+ */
+static int stsi_set_tle(char *p, int level)
+{
+    S390TopologyEntry *entry;
+    int last_drawer = -1;
+    int last_book = -1;
+    int last_socket = -1;
+    int drawer_id = 0;
+    int book_id = 0;
+    int socket_id = 0;
+    int n = sizeof(SysIB_151x);
+
+    QTAILQ_FOREACH(entry, &s390_topology.list, next) {
+        int current_drawer = entry->id.drawer;
+        int current_book = entry->id.book;
+        int current_socket = entry->id.socket;
+        bool drawer_change = last_drawer != current_drawer;
+        bool book_change = drawer_change || last_book != current_book;
+        bool socket_change = book_change || last_socket != current_socket;
+
+        /* If we reach the guard get out */
+        if (entry->id.level5) {
+            break;
+        }
+
+        if (level > 3 && drawer_change) {
+            SYSIB_GUARD(n, sizeof(SysIBTl_container));
+            p = fill_container(p, 3, drawer_id++);
+            book_id = 0;
+        }
+        if (level > 2 && book_change) {
+            SYSIB_GUARD(n, sizeof(SysIBTl_container));
+            p = fill_container(p, 2, book_id++);
+            socket_id = 0;
+        }
+        if (socket_change) {
+            SYSIB_GUARD(n, sizeof(SysIBTl_container));
+            p = fill_container(p, 1, socket_id++);
+        }
+
+        SYSIB_GUARD(n, sizeof(SysIBTl_cpu));
+        p = fill_tle_cpu(p, entry);
+        last_drawer = entry->id.drawer;
+        last_book = entry->id.book;
+        last_socket = entry->id.socket;
+    }
+
+    return n;
+}
+
+/**
+ * setup_stsi:
+ * sysib: pointer to a SysIB to be filled with SysIB_151x data
+ * level: Nested level specified by the guest
+ *
+ * Setup the SysIB_151x header before calling stsi_set_tle with
+ * a pointer to the first TLE entry.
+ */
+static int setup_stsi(SysIB_151x *sysib, int level)
+{
+    sysib->mnest = level;
+    switch (level) {
+    case 4:
+        sysib->mag[S390_TOPOLOGY_MAG4] = current_machine->smp.drawers;
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 3:
+        sysib->mag[S390_TOPOLOGY_MAG3] = current_machine->smp.drawers *
+                                         current_machine->smp.books;
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    case 2:
+        sysib->mag[S390_TOPOLOGY_MAG2] = current_machine->smp.drawers *
+                                         current_machine->smp.books *
+                                         current_machine->smp.sockets;
+        sysib->mag[S390_TOPOLOGY_MAG1] = current_machine->smp.cores;
+        break;
+    }
+
+    return stsi_set_tle(sysib->tle, level);
+}
+
+/**
+ * s390_topology_add_cpu_to_entry:
+ * @entry: Topology entry to setup
+ * @cpu: the S390CPU to add
+ *
+ * Set the core bit inside the topology mask and
+ * increments the number of cores for the socket.
+ */
+static void s390_topology_add_cpu_to_entry(S390TopologyEntry *entry,
+                                           S390CPU *cpu)
+{
+    set_bit(63 - (cpu->env.core_id % 64), &entry->mask);
+}
+
+/**
+ * s390_topology_new_entry:
+ * @id: s390_topology_id to add
+ * @cpu: the S390CPU to add
+ *
+ * Allocate a new entry and initialize it.
+ *
+ * returns the newly allocated entry.
+ */
+static S390TopologyEntry *s390_topology_new_entry(s390_topology_id id,
+                                                  S390CPU *cpu)
+{
+    S390TopologyEntry *entry;
+
+    entry = g_malloc0(sizeof(S390TopologyEntry));
+    entry->id.id = id.id;
+    s390_topology_add_cpu_to_entry(entry, cpu);
+
+    return entry;
+}
+
+/**
+ * s390_topology_from_cpu:
+ * @cpu: The S390CPU
+ *
+ * Initialize the topology id from the CPU environment.
+ */
+static s390_topology_id s390_topology_from_cpu(S390CPU *cpu)
+{
+    s390_topology_id topology_id = {0};
+
+    topology_id.drawer = cpu->env.drawer_id;
+    topology_id.book = cpu->env.book_id;
+    topology_id.socket = cpu->env.socket_id;
+    topology_id.origin = cpu->env.core_id / 64;
+    topology_id.type = S390_TOPOLOGY_CPU_IFL;
+    topology_id.dedicated = cpu->env.dedicated;
+
+    if (s390_topology.polarity == POLARITY_VERTICAL) {
+        /*
+         * Vertical polarity with dedicated CPU implies
+         * vertical high entitlement.
+         */
+        if (topology_id.dedicated) {
+            topology_id.polarity |= POLARITY_VERTICAL_HIGH;
+        } else {
+            topology_id.polarity |= cpu->env.entitlement;
+        }
+    }
+
+    return topology_id;
+}
+
+/**
+ * s390_topology_insert:
+ * @cpu: s390CPU insert.
+ *
+ * Parse the topology list to find if the entry already
+ * exist and add the core in it.
+ * If it does not exist, allocate a new entry and insert
+ * it in the queue from lower id to greater id.
+ */
+static void s390_topology_insert(S390CPU *cpu)
+{
+    s390_topology_id id = s390_topology_from_cpu(cpu);
+    S390TopologyEntry *entry = NULL;
+    S390TopologyEntry *tmp = NULL;
+
+    QTAILQ_FOREACH(tmp, &s390_topology.list, next) {
+        if (id.id == tmp->id.id) {
+            s390_topology_add_cpu_to_entry(tmp, cpu);
+            return;
+        } else if (id.id < tmp->id.id) {
+            entry = s390_topology_new_entry(id, cpu);
+            QTAILQ_INSERT_BEFORE(tmp, entry, next);
+            return;
+        }
+    }
+}
+
+/**
+ * s390_order_tle:
+ *
+ * Loop over all CPU and insert it at the right place
+ * inside the TLE entry list.
+ */
+static void s390_order_tle(void)
+{
+    CPUState *cs;
+
+    CPU_FOREACH(cs) {
+        s390_topology_insert(S390_CPU(cs));
+    }
+}
+
+/**
+ * s390_free_tle:
+ *
+ * Loop over all TLE entries and free them.
+ * Keep the sentinelle which is the only one with level5 != 0
+ */
+static void s390_free_tle(void)
+{
+    S390TopologyEntry *entry = NULL;
+    S390TopologyEntry *tmp = NULL;
+
+    QTAILQ_FOREACH_SAFE(entry, &s390_topology.list, next, tmp) {
+        if (!entry->id.level5) {
+            QTAILQ_REMOVE(&s390_topology.list, entry, next);
+            g_free(entry);
+        }
+    }
+}
+
+/**
+ * insert_stsi_15_1_x:
+ * cpu: the CPU doing the call for which we set CC
+ * sel2: the selector 2, containing the nested level
+ * addr: Guest logical address of the guest SysIB
+ * ar: the access register number
+ *
+ * Reserve a zeroed SysIB, let setup_stsi to fill it and
+ * copy the SysIB to the guest memory.
+ *
+ * In case of overflow set CC(3) and no copy is done.
+ */
+void insert_stsi_15_1_x(S390CPU *cpu, int sel2, __u64 addr, uint8_t ar)
+{
+    SysIB sysib = {0};
+    int len;
+
+    if (!s390_has_topology() || sel2 < 2 || sel2 > SCLP_READ_SCP_INFO_MNEST) {
+        setcc(cpu, 3);
+        return;
+    }
+
+    s390_order_tle();
+
+    len = setup_stsi(&sysib.sysib_151x, sel2);
+
+    if (len < 0) {
+        setcc(cpu, 3);
+        return;
+    }
+
+    sysib.sysib_151x.length = cpu_to_be16(len);
+    s390_cpu_virt_mem_write(cpu, addr, ar, &sysib, len);
+    setcc(cpu, 0);
+
+    s390_free_tle();
+}
diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c
index 3ac7ec9acf..5ea358cbb0 100644
--- a/target/s390x/kvm/kvm.c
+++ b/target/s390x/kvm/kvm.c
@@ -1919,9 +1919,12 @@  static int handle_stsi(S390CPU *cpu)
         if (run->s390_stsi.sel1 != 2 || run->s390_stsi.sel2 != 2) {
             return 0;
         }
-        /* Only sysib 3.2.2 needs post-handling for now. */
         insert_stsi_3_2_2(cpu, run->s390_stsi.addr, run->s390_stsi.ar);
         return 0;
+    case 15:
+        insert_stsi_15_1_x(cpu, run->s390_stsi.sel2, run->s390_stsi.addr,
+                           run->s390_stsi.ar);
+        return 0;
     default:
         return 0;
     }
diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build
index aef52b6686..5daa5c6033 100644
--- a/target/s390x/kvm/meson.build
+++ b/target/s390x/kvm/meson.build
@@ -1,6 +1,7 @@ 
 
 s390x_ss.add(when: 'CONFIG_KVM', if_true: files(
-  'kvm.c'
+  'kvm.c',
+  'cpu_topology.c'
 ), if_false: files(
   'stubs.c'
 ))