diff mbox

[v5,3/3] sclp-s390: Add memory hotplug SCLPs

Message ID 1403706420-20109-4-git-send-email-mjrosato@linux.vnet.ibm.com
State New
Headers show

Commit Message

Matthew Rosato June 25, 2014, 2:27 p.m. UTC
Add memory information to read SCP info and add handlers for
Read Storage Element Information, Attach Storage Element,
Assign Storage and Unassign Storage.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
---
 hw/s390x/sclp.c    |  259 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 target-s390x/cpu.h |   15 +++
 target-s390x/kvm.c |    5 +
 3 files changed, 273 insertions(+), 6 deletions(-)

Comments

Christian Borntraeger June 26, 2014, 1:14 p.m. UTC | #1
On 25/06/14 16:27, Matthew Rosato wrote:
> Add memory information to read SCP info and add handlers for
> Read Storage Element Information, Attach Storage Element,
> Assign Storage and Unassign Storage.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>

In general this looks fine. I gave it some testing and most of it seems to work.
With lots of standy chunks (-m 10240,maxmem=204800M,slots=16) I get an abort, though:

#0  0x000003fffb9cffd8 in raise () from /lib64/libc.so.6
#1  0x000003fffb9d1b30 in abort () from /lib64/libc.so.6
#2  0x000003fffb9c7376 in __assert_fail_base () from /lib64/libc.so.6
#3  0x000003fffb9c7418 in __assert_fail () from /lib64/libc.so.6
#4  0x000000008001db28 in find_ram_offset (size=<optimized out>) at /home/cborntra/REPOS/qemu/exec.c:1109
#5  ram_block_add (new_block=0x3fd70001ab0) at /home/cborntra/REPOS/qemu/exec.c:1236
#6  0x000000008005ced0 in memory_region_init_ram (mr=mr@entry=0x3fd700019e0, owner=owner@entry=0x0, name=name@entry=0x3fd759fda98 "standby.ram1", size=0) at /home/cborntra/REPOS/qemu/memory.c:1033

Why do we pass size==0?

#7  0x0000000080080cdc in assign_storage (sccb=0x3fd759fca98) at /home/cborntra/REPOS/qemu/hw/s390x/sclp.c:242
#8  sclp_execute (code=<optimized out>, sccb=0x3fd759fca98) at /home/cborntra/REPOS/qemu/hw/s390x/sclp.c:348
#9  sclp_service_call (env=env@entry=0x808dcf98, sccb=2144296960, code=<optimized out>) at /home/cborntra/REPOS/qemu/hw/s390x/sclp.c:395
#10 0x00000000800b127c in kvm_sclp_service_call (run=<optimized out>, ipbh0=35, cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:705
#11 handle_b2 (run=<optimized out>, ipa1=<optimized out>, cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:777
#12 handle_instruction (run=<optimized out>, cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:1001
#13 handle_intercept (cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:1060
#14 kvm_arch_handle_exit (cs=cs@entry=0x808d4d00, run=run@entry=0x3fffd081000) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:1177
#15 0x000000008005a696 in kvm_cpu_exec (cpu=cpu@entry=0x808d4d00) at /home/cborntra/REPOS/qemu/kvm-all.c:1784
#16 0x000000008004668a in qemu_kvm_cpu_thread_fn (arg=0x808d4d00) at /home/cborntra/REPOS/qemu/cpus.c:874
#17 0x000003fffce18412 in start_thread () from /lib64/libpthread.so.0
#18 0x000003fffba9e0ae in thread_start () from /lib64/libc.so.6



> ---
>  hw/s390x/sclp.c    |  259 ++++++++++++++++++++++++++++++++++++++++++++++++++--
>  target-s390x/cpu.h |   15 +++
>  target-s390x/kvm.c |    5 +
>  3 files changed, 273 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
> index 769d7c3..b3bf924 100644
> --- a/hw/s390x/sclp.c
> +++ b/hw/s390x/sclp.c
> @@ -16,7 +16,8 @@
>  #include "sysemu/kvm.h"
>  #include "exec/memory.h"
>  #include "sysemu/sysemu.h"
> -
> +#include "exec/address-spaces.h"
> +#include "qemu/config-file.h"
>  #include "hw/s390x/sclp.h"
>  #include "hw/s390x/event-facility.h"
> 
> @@ -33,10 +34,19 @@ static inline SCLPEventFacility *get_event_facility(void)
>  static void read_SCP_info(SCCB *sccb)
>  {
>      ReadInfo *read_info = (ReadInfo *) sccb;
> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
>      CPUState *cpu;
> -    int shift = 0;
>      int cpu_count = 0;
>      int i = 0;
> +    int increment_size = 20;
> +    int rnsize, rnmax;
> +    QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory"), NULL);
> +    int slots = qemu_opt_get_number(opts, "slots", 0);
> +    int max_avail_slots = s390_get_memslot_count(kvm_state);
> +
> +    if (slots > max_avail_slots) {
> +        slots = max_avail_slots;
> +    }
> 
>      CPU_FOREACH(cpu) {
>          cpu_count++;
> @@ -54,14 +64,235 @@ static void read_SCP_info(SCCB *sccb)
> 
>      read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO);
> 
> -    while ((ram_size >> (20 + shift)) > 65535) {
> -        shift++;
> +    /*
> +     * The storage increment size is a multiple of 1M and is a power of 2.
> +     * The number of storage increments must be MAX_STORAGE_INCREMENTS or fewer.
> +     */
> +    while ((ram_size >> increment_size) > MAX_STORAGE_INCREMENTS) {
> +        increment_size++;
> +    }
> +    rnmax = ram_size >> increment_size;
> +
> +    /* Memory Hotplug is only supported for the ccw machine type */
> +    if (mhd) {
> +        while ((mhd->standby_mem_size >> increment_size) >
> +               MAX_STORAGE_INCREMENTS) {
> +            increment_size++;
> +        }
> +        assert(increment_size == mhd->increment_size);
> +
> +        mhd->standby_subregion_size = MEM_SECTION_SIZE;
> +        /* Deduct the memory slot already used for core */
> +        if (slots > 0) {
> +            while ((mhd->standby_subregion_size * (slots - 1)
> +                    < mhd->standby_mem_size)) {
> +                mhd->standby_subregion_size = mhd->standby_subregion_size << 1;
> +            }
> +        }
> +        /*
> +         * Initialize mapping of guest standby memory sections indicating which
> +         * are and are not online. Assume all standby memory begins offline.
> +         */
> +        if (mhd->standby_state_map == 0) {
> +            if (mhd->standby_mem_size % mhd->standby_subregion_size) {
> +                mhd->standby_state_map = g_malloc0((mhd->standby_mem_size /
> +                                             mhd->standby_subregion_size + 1) *
> +                                             (mhd->standby_subregion_size /
> +                                             MEM_SECTION_SIZE));
> +            } else {
> +                mhd->standby_state_map = g_malloc0(mhd->standby_mem_size /
> +                                                   MEM_SECTION_SIZE);
> +            }
> +        }
> +        mhd->padded_ram_size = ram_size + mhd->pad_size;
> +        mhd->rzm = 1 << mhd->increment_size;
> +        rnmax = ((ram_size + mhd->standby_mem_size + mhd->pad_size)
> +             >> mhd->increment_size);
> +
> +        read_info->facilities |= cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);
> +    }
> +
> +    rnsize = 1 << (increment_size - 20);
> +    if (rnsize <= 128) {
> +        read_info->rnsize = rnsize;
> +    } else {
> +        read_info->rnsize = 0;
> +        read_info->rnsize2 = cpu_to_be32(rnsize);
>      }
> -    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
> -    read_info->rnsize = 1 << shift;
> +
> +    if (rnmax < 0x10000) {
> +        read_info->rnmax = cpu_to_be16(rnmax);
> +    } else {
> +        read_info->rnmax = cpu_to_be16(0);
> +        read_info->rnmax2 = cpu_to_be64(rnmax);
> +    }
> +
>      sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
>  }
> 
> +static void read_storage_element0_info(SCCB *sccb)
> +{
> +    int i, assigned;
> +    int subincrement_id = SCLP_STARTING_SUBINCREMENT_ID;
> +    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
> +
> +    assert(mhd);
> +
> +    if ((ram_size >> mhd->increment_size) >= 0x10000) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
> +        return;
> +    }
> +
> +    /* Return information regarding core memory */
> +    storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0);
> +    assigned = ram_size >> mhd->increment_size;
> +    storage_info->assigned = cpu_to_be16(assigned);
> +
> +    for (i = 0; i < assigned; i++) {
> +        storage_info->entries[i] = cpu_to_be32(subincrement_id);
> +        subincrement_id += SCLP_INCREMENT_UNIT;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
> +}
> +
> +static void read_storage_element1_info(SCCB *sccb)
> +{
> +    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
> +
> +    assert(mhd);
> +
> +    if ((mhd->standby_mem_size >> mhd->increment_size) >= 0x10000) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
> +        return;
> +    }
> +
> +    /* Return information regarding standby memory */
> +    storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0);
> +    storage_info->assigned = cpu_to_be16(mhd->standby_mem_size >>
> +                                         mhd->increment_size);
> +    storage_info->standby = cpu_to_be16(mhd->standby_mem_size >>
> +                                        mhd->increment_size);
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_STANDBY_READ_COMPLETION);
> +}
> +
> +static void attach_storage_element(SCCB *sccb, uint16_t element)
> +{
> +    int i, assigned, subincrement_id;
> +    AttachStorageElement *attach_info = (AttachStorageElement *) sccb;
> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
> +
> +    assert(mhd);
> +
> +    if (element != 1) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
> +        return;
> +    }
> +
> +    assigned = mhd->standby_mem_size >> mhd->increment_size;
> +    attach_info->assigned = cpu_to_be16(assigned);
> +    subincrement_id = ((ram_size >> mhd->increment_size) << 16)
> +                      + SCLP_STARTING_SUBINCREMENT_ID;
> +    for (i = 0; i < assigned; i++) {
> +        attach_info->entries[i] = cpu_to_be32(subincrement_id);
> +        subincrement_id += SCLP_INCREMENT_UNIT;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> +static void assign_storage(SCCB *sccb)
> +{
> +    MemoryRegion *mr = NULL;
> +    int this_subregion_size;
> +    AssignStorage *assign_info = (AssignStorage *) sccb;
> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
> +    assert(mhd);
> +    ram_addr_t assign_addr = (assign_info->rn - 1) * mhd->rzm;
> +    MemoryRegion *sysmem = get_system_memory();
> +
> +    if ((assign_addr % MEM_SECTION_SIZE == 0) &&
> +        (assign_addr >= mhd->padded_ram_size)) {
> +        /* Re-use existing memory region if found */
> +        mr = memory_region_find(sysmem, assign_addr, 1).mr;
> +        if (!mr) {
> +
> +            MemoryRegion *standby_ram = g_new(MemoryRegion, 1);
> +
> +            /* offset to align to standby_subregion_size for allocation */
> +            ram_addr_t offset = assign_addr -
> +                                (assign_addr - mhd->padded_ram_size)
> +                                % mhd->standby_subregion_size;
> +
> +            /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) +  NULL */
> +            char id[16];
> +            snprintf(id, 16, "standby.ram%d",
> +                     (int)((offset - mhd->padded_ram_size) /
> +                     mhd->standby_subregion_size) + 1);
> +
> +            /* Allocate a subregion of the calculated standby_subregion_size */
> +            if (offset + mhd->standby_subregion_size >
> +                mhd->padded_ram_size + mhd->standby_mem_size) {
> +                this_subregion_size = mhd->padded_ram_size +
> +                  mhd->standby_mem_size - offset;
> +            } else {
> +                this_subregion_size = mhd->standby_subregion_size;
> +            }
> +
> +            memory_region_init_ram(standby_ram, NULL, id, this_subregion_size);
> +            vmstate_register_ram_global(standby_ram);
> +            memory_region_add_subregion(sysmem, offset, standby_ram);
> +        }
> +        /* The specified subregion is no longer in standby */
> +        mhd->standby_state_map[(assign_addr - mhd->padded_ram_size)
> +                               / MEM_SECTION_SIZE] = 1;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> +static void unassign_storage(SCCB *sccb)
> +{
> +    MemoryRegion *mr = NULL;
> +    AssignStorage *assign_info = (AssignStorage *) sccb;
> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
> +    assert(mhd);
> +    ram_addr_t unassign_addr = (assign_info->rn - 1) * mhd->rzm;
> +    MemoryRegion *sysmem = get_system_memory();
> +
> +    /* if the addr is a multiple of 256 MB */
> +    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
> +        (unassign_addr >= mhd->padded_ram_size)) {
> +        mhd->standby_state_map[(unassign_addr -
> +                           mhd->padded_ram_size) / MEM_SECTION_SIZE] = 0;
> +
> +        /* find the specified memory region and destroy it */
> +        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
> +        if (mr) {
> +            int i;
> +            int is_removable = 1;
> +            ram_addr_t map_offset = (unassign_addr - mhd->padded_ram_size -
> +                                     (unassign_addr - mhd->padded_ram_size)
> +                                     % mhd->standby_subregion_size);
> +            /* Mark all affected subregions as 'standby' once again */
> +            for (i = 0;
> +                 i < (mhd->standby_subregion_size / MEM_SECTION_SIZE);
> +                 i++) {
> +
> +                if (mhd->standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
> +                    is_removable = 0;
> +                    break;
> +                }
> +            }
> +            if (is_removable) {
> +                memory_region_del_subregion(sysmem, mr);
> +                memory_region_destroy(mr);
> +                g_free(mr);
> +            }
> +        }
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
>  /* Provide information about the CPU */
>  static void sclp_read_cpu_info(SCCB *sccb)
>  {
> @@ -103,6 +334,22 @@ static void sclp_execute(SCCB *sccb, uint32_t code)
>      case SCLP_CMDW_READ_CPU_INFO:
>          sclp_read_cpu_info(sccb);
>          break;
> +    case SCLP_READ_STORAGE_ELEMENT_INFO:
> +        if (code & 0xff00) {
> +            read_storage_element1_info(sccb);
> +        } else {
> +            read_storage_element0_info(sccb);
> +        }
> +        break;
> +    case SCLP_ATTACH_STORAGE_ELEMENT:
> +        attach_storage_element(sccb, (code & 0xff00) >> 8);
> +        break;
> +    case SCLP_ASSIGN_STORAGE:
> +        assign_storage(sccb);
> +        break;
> +    case SCLP_UNASSIGN_STORAGE:
> +        unassign_storage(sccb);
> +        break;
>      default:
>          efc->command_handler(ef, sccb, code);
>          break;
> diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
> index ba0e4b4..1c1681c 100644
> --- a/target-s390x/cpu.h
> +++ b/target-s390x/cpu.h
> @@ -1047,6 +1047,7 @@ static inline void cpu_inject_crw_mchk(S390CPU *cpu)
> 
>  /* from s390-virtio-ccw */
>  #define MEM_SECTION_SIZE             0x10000000UL
> +#define MAX_AVAIL_SLOTS              32
> 
>  /* fpu_helper.c */
>  uint32_t set_cc_nz_f32(float32 v);
> @@ -1070,6 +1071,7 @@ void kvm_s390_enable_css_support(S390CPU *cpu);
>  int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
>                                      int vq, bool assign);
>  int kvm_s390_cpu_restart(S390CPU *cpu);
> +int kvm_s390_get_memslot_count(KVMState *s);
>  void kvm_s390_clear_cmma_callback(void *opaque);
>  #else
>  static inline void kvm_s390_io_interrupt(uint16_t subchannel_id,
> @@ -1097,6 +1099,10 @@ static inline int kvm_s390_cpu_restart(S390CPU *cpu)
>  static inline void kvm_s390_clear_cmma_callback(void *opaque)
>  {
>  }
> +static inline int kvm_s390_get_memslot_count(KVMState *s)
> +{
> +  return MAX_AVAIL_SLOTS;
> +}
>  #endif
> 
>  static inline void cmma_reset(S390CPU *cpu)
> @@ -1115,6 +1121,15 @@ static inline int s390_cpu_restart(S390CPU *cpu)
>      return -ENOSYS;
>  }
> 
> +static inline int s390_get_memslot_count(KVMState *s)
> +{
> +    if (kvm_enabled()) {
> +        return kvm_s390_get_memslot_count(s);
> +    } else {
> +        return MAX_AVAIL_SLOTS;
> +    }
> +}
> +
>  void s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr,
>                         uint32_t io_int_parm, uint32_t io_int_word);
>  void s390_crw_mchk(void);
> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
> index a6e587b..4b05d48 100644
> --- a/target-s390x/kvm.c
> +++ b/target-s390x/kvm.c
> @@ -1283,3 +1283,8 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
>      }
>      return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
>  }
> +
> +int kvm_s390_get_memslot_count(KVMState *s)
> +{
> +    return kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
> +}
>
Matthew Rosato June 26, 2014, 3:30 p.m. UTC | #2
On 06/26/2014 09:14 AM, Christian Borntraeger wrote:
> On 25/06/14 16:27, Matthew Rosato wrote:
>> Add memory information to read SCP info and add handlers for
>> Read Storage Element Information, Attach Storage Element,
>> Assign Storage and Unassign Storage.
>>
>> Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
> 
> In general this looks fine. I gave it some testing and most of it seems to work.
> With lots of standy chunks (-m 10240,maxmem=204800M,slots=16) I get an abort, though:
> 
> #0  0x000003fffb9cffd8 in raise () from /lib64/libc.so.6
> #1  0x000003fffb9d1b30 in abort () from /lib64/libc.so.6
> #2  0x000003fffb9c7376 in __assert_fail_base () from /lib64/libc.so.6
> #3  0x000003fffb9c7418 in __assert_fail () from /lib64/libc.so.6
> #4  0x000000008001db28 in find_ram_offset (size=<optimized out>) at /home/cborntra/REPOS/qemu/exec.c:1109
> #5  ram_block_add (new_block=0x3fd70001ab0) at /home/cborntra/REPOS/qemu/exec.c:1236
> #6  0x000000008005ced0 in memory_region_init_ram (mr=mr@entry=0x3fd700019e0, owner=owner@entry=0x0, name=name@entry=0x3fd759fda98 "standby.ram1", size=0) at /home/cborntra/REPOS/qemu/memory.c:1033
> 
> Why do we pass size==0?
> 
> #7  0x0000000080080cdc in assign_storage (sccb=0x3fd759fca98) at /home/cborntra/REPOS/qemu/hw/s390x/sclp.c:242
> #8  sclp_execute (code=<optimized out>, sccb=0x3fd759fca98) at /home/cborntra/REPOS/qemu/hw/s390x/sclp.c:348
> #9  sclp_service_call (env=env@entry=0x808dcf98, sccb=2144296960, code=<optimized out>) at /home/cborntra/REPOS/qemu/hw/s390x/sclp.c:395
> #10 0x00000000800b127c in kvm_sclp_service_call (run=<optimized out>, ipbh0=35, cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:705
> #11 handle_b2 (run=<optimized out>, ipa1=<optimized out>, cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:777
> #12 handle_instruction (run=<optimized out>, cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:1001
> #13 handle_intercept (cpu=0x808d4d00) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:1060
> #14 kvm_arch_handle_exit (cs=cs@entry=0x808d4d00, run=run@entry=0x3fffd081000) at /home/cborntra/REPOS/qemu/target-s390x/kvm.c:1177
> #15 0x000000008005a696 in kvm_cpu_exec (cpu=cpu@entry=0x808d4d00) at /home/cborntra/REPOS/qemu/kvm-all.c:1784
> #16 0x000000008004668a in qemu_kvm_cpu_thread_fn (arg=0x808d4d00) at /home/cborntra/REPOS/qemu/cpus.c:874
> #17 0x000003fffce18412 in start_thread () from /lib64/libpthread.so.0
> #18 0x000003fffba9e0ae in thread_start () from /lib64/libc.so.6'

Looks like this_subregion_size is overflowing...

[..snip..]

>> +static void assign_storage(SCCB *sccb)
>> +{
>> +    MemoryRegion *mr = NULL;
>> +    int this_subregion_size;

This shouldn't be an int, everything else is 64-bit unsigned terms, so
it's causing truncation with large subregion sizes.  Should be uint64_t
this_subregion_size, which is what memory_region_init_ram expects.

This fixes the problem for me.  I'll do some more testing with that and
include it in the next version -- Thanks!

>> +    AssignStorage *assign_info = (AssignStorage *) sccb;
>> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
>> +    assert(mhd);
>> +    ram_addr_t assign_addr = (assign_info->rn - 1) * mhd->rzm;
>> +    MemoryRegion *sysmem = get_system_memory();
>> +
>> +    if ((assign_addr % MEM_SECTION_SIZE == 0) &&
>> +        (assign_addr >= mhd->padded_ram_size)) {
>> +        /* Re-use existing memory region if found */
>> +        mr = memory_region_find(sysmem, assign_addr, 1).mr;
>> +        if (!mr) {
>> +
>> +            MemoryRegion *standby_ram = g_new(MemoryRegion, 1);
>> +
>> +            /* offset to align to standby_subregion_size for allocation */
>> +            ram_addr_t offset = assign_addr -
>> +                                (assign_addr - mhd->padded_ram_size)
>> +                                % mhd->standby_subregion_size;
>> +
>> +            /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) +  NULL */
>> +            char id[16];
>> +            snprintf(id, 16, "standby.ram%d",
>> +                     (int)((offset - mhd->padded_ram_size) /
>> +                     mhd->standby_subregion_size) + 1);
>> +
>> +            /* Allocate a subregion of the calculated standby_subregion_size */
>> +            if (offset + mhd->standby_subregion_size >
>> +                mhd->padded_ram_size + mhd->standby_mem_size) {
>> +                this_subregion_size = mhd->padded_ram_size +
>> +                  mhd->standby_mem_size - offset;
>> +            } else {
>> +                this_subregion_size = mhd->standby_subregion_size;
>> +            }
>> +
>> +            memory_region_init_ram(standby_ram, NULL, id, this_subregion_size);
>> +            vmstate_register_ram_global(standby_ram);
>> +            memory_region_add_subregion(sysmem, offset, standby_ram);
>> +        }
>> +        /* The specified subregion is no longer in standby */
>> +        mhd->standby_state_map[(assign_addr - mhd->padded_ram_size)
>> +                               / MEM_SECTION_SIZE] = 1;
>> +    }
>> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
>> +}
>> +
>> +static void unassign_storage(SCCB *sccb)
>> +{
>> +    MemoryRegion *mr = NULL;
>> +    AssignStorage *assign_info = (AssignStorage *) sccb;
>> +    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
>> +    assert(mhd);
>> +    ram_addr_t unassign_addr = (assign_info->rn - 1) * mhd->rzm;
>> +    MemoryRegion *sysmem = get_system_memory();
>> +
>> +    /* if the addr is a multiple of 256 MB */
>> +    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
>> +        (unassign_addr >= mhd->padded_ram_size)) {
>> +        mhd->standby_state_map[(unassign_addr -
>> +                           mhd->padded_ram_size) / MEM_SECTION_SIZE] = 0;
>> +
>> +        /* find the specified memory region and destroy it */
>> +        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
>> +        if (mr) {
>> +            int i;
>> +            int is_removable = 1;
>> +            ram_addr_t map_offset = (unassign_addr - mhd->padded_ram_size -
>> +                                     (unassign_addr - mhd->padded_ram_size)
>> +                                     % mhd->standby_subregion_size);
>> +            /* Mark all affected subregions as 'standby' once again */
>> +            for (i = 0;
>> +                 i < (mhd->standby_subregion_size / MEM_SECTION_SIZE);
>> +                 i++) {
>> +
>> +                if (mhd->standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
>> +                    is_removable = 0;
>> +                    break;
>> +                }
>> +            }
>> +            if (is_removable) {
>> +                memory_region_del_subregion(sysmem, mr);
>> +                memory_region_destroy(mr);
>> +                g_free(mr);
>> +            }
>> +        }
>> +    }
>> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
>> +}
>> +
>>  /* Provide information about the CPU */
>>  static void sclp_read_cpu_info(SCCB *sccb)
>>  {
>> @@ -103,6 +334,22 @@ static void sclp_execute(SCCB *sccb, uint32_t code)
>>      case SCLP_CMDW_READ_CPU_INFO:
>>          sclp_read_cpu_info(sccb);
>>          break;
>> +    case SCLP_READ_STORAGE_ELEMENT_INFO:
>> +        if (code & 0xff00) {
>> +            read_storage_element1_info(sccb);
>> +        } else {
>> +            read_storage_element0_info(sccb);
>> +        }
>> +        break;
>> +    case SCLP_ATTACH_STORAGE_ELEMENT:
>> +        attach_storage_element(sccb, (code & 0xff00) >> 8);
>> +        break;
>> +    case SCLP_ASSIGN_STORAGE:
>> +        assign_storage(sccb);
>> +        break;
>> +    case SCLP_UNASSIGN_STORAGE:
>> +        unassign_storage(sccb);
>> +        break;
>>      default:
>>          efc->command_handler(ef, sccb, code);
>>          break;
>> diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
>> index ba0e4b4..1c1681c 100644
>> --- a/target-s390x/cpu.h
>> +++ b/target-s390x/cpu.h
>> @@ -1047,6 +1047,7 @@ static inline void cpu_inject_crw_mchk(S390CPU *cpu)
>>
>>  /* from s390-virtio-ccw */
>>  #define MEM_SECTION_SIZE             0x10000000UL
>> +#define MAX_AVAIL_SLOTS              32
>>
>>  /* fpu_helper.c */
>>  uint32_t set_cc_nz_f32(float32 v);
>> @@ -1070,6 +1071,7 @@ void kvm_s390_enable_css_support(S390CPU *cpu);
>>  int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
>>                                      int vq, bool assign);
>>  int kvm_s390_cpu_restart(S390CPU *cpu);
>> +int kvm_s390_get_memslot_count(KVMState *s);
>>  void kvm_s390_clear_cmma_callback(void *opaque);
>>  #else
>>  static inline void kvm_s390_io_interrupt(uint16_t subchannel_id,
>> @@ -1097,6 +1099,10 @@ static inline int kvm_s390_cpu_restart(S390CPU *cpu)
>>  static inline void kvm_s390_clear_cmma_callback(void *opaque)
>>  {
>>  }
>> +static inline int kvm_s390_get_memslot_count(KVMState *s)
>> +{
>> +  return MAX_AVAIL_SLOTS;
>> +}
>>  #endif
>>
>>  static inline void cmma_reset(S390CPU *cpu)
>> @@ -1115,6 +1121,15 @@ static inline int s390_cpu_restart(S390CPU *cpu)
>>      return -ENOSYS;
>>  }
>>
>> +static inline int s390_get_memslot_count(KVMState *s)
>> +{
>> +    if (kvm_enabled()) {
>> +        return kvm_s390_get_memslot_count(s);
>> +    } else {
>> +        return MAX_AVAIL_SLOTS;
>> +    }
>> +}
>> +
>>  void s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr,
>>                         uint32_t io_int_parm, uint32_t io_int_word);
>>  void s390_crw_mchk(void);
>> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
>> index a6e587b..4b05d48 100644
>> --- a/target-s390x/kvm.c
>> +++ b/target-s390x/kvm.c
>> @@ -1283,3 +1283,8 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
>>      }
>>      return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
>>  }
>> +
>> +int kvm_s390_get_memslot_count(KVMState *s)
>> +{
>> +    return kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
>> +}
>>
> 
> 
> 
>
diff mbox

Patch

diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index 769d7c3..b3bf924 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -16,7 +16,8 @@ 
 #include "sysemu/kvm.h"
 #include "exec/memory.h"
 #include "sysemu/sysemu.h"
-
+#include "exec/address-spaces.h"
+#include "qemu/config-file.h"
 #include "hw/s390x/sclp.h"
 #include "hw/s390x/event-facility.h"
 
@@ -33,10 +34,19 @@  static inline SCLPEventFacility *get_event_facility(void)
 static void read_SCP_info(SCCB *sccb)
 {
     ReadInfo *read_info = (ReadInfo *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
     CPUState *cpu;
-    int shift = 0;
     int cpu_count = 0;
     int i = 0;
+    int increment_size = 20;
+    int rnsize, rnmax;
+    QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory"), NULL);
+    int slots = qemu_opt_get_number(opts, "slots", 0);
+    int max_avail_slots = s390_get_memslot_count(kvm_state);
+
+    if (slots > max_avail_slots) {
+        slots = max_avail_slots;
+    }
 
     CPU_FOREACH(cpu) {
         cpu_count++;
@@ -54,14 +64,235 @@  static void read_SCP_info(SCCB *sccb)
 
     read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO);
 
-    while ((ram_size >> (20 + shift)) > 65535) {
-        shift++;
+    /*
+     * The storage increment size is a multiple of 1M and is a power of 2.
+     * The number of storage increments must be MAX_STORAGE_INCREMENTS or fewer.
+     */
+    while ((ram_size >> increment_size) > MAX_STORAGE_INCREMENTS) {
+        increment_size++;
+    }
+    rnmax = ram_size >> increment_size;
+
+    /* Memory Hotplug is only supported for the ccw machine type */
+    if (mhd) {
+        while ((mhd->standby_mem_size >> increment_size) >
+               MAX_STORAGE_INCREMENTS) {
+            increment_size++;
+        }
+        assert(increment_size == mhd->increment_size);
+
+        mhd->standby_subregion_size = MEM_SECTION_SIZE;
+        /* Deduct the memory slot already used for core */
+        if (slots > 0) {
+            while ((mhd->standby_subregion_size * (slots - 1)
+                    < mhd->standby_mem_size)) {
+                mhd->standby_subregion_size = mhd->standby_subregion_size << 1;
+            }
+        }
+        /*
+         * Initialize mapping of guest standby memory sections indicating which
+         * are and are not online. Assume all standby memory begins offline.
+         */
+        if (mhd->standby_state_map == 0) {
+            if (mhd->standby_mem_size % mhd->standby_subregion_size) {
+                mhd->standby_state_map = g_malloc0((mhd->standby_mem_size /
+                                             mhd->standby_subregion_size + 1) *
+                                             (mhd->standby_subregion_size /
+                                             MEM_SECTION_SIZE));
+            } else {
+                mhd->standby_state_map = g_malloc0(mhd->standby_mem_size /
+                                                   MEM_SECTION_SIZE);
+            }
+        }
+        mhd->padded_ram_size = ram_size + mhd->pad_size;
+        mhd->rzm = 1 << mhd->increment_size;
+        rnmax = ((ram_size + mhd->standby_mem_size + mhd->pad_size)
+             >> mhd->increment_size);
+
+        read_info->facilities |= cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);
+    }
+
+    rnsize = 1 << (increment_size - 20);
+    if (rnsize <= 128) {
+        read_info->rnsize = rnsize;
+    } else {
+        read_info->rnsize = 0;
+        read_info->rnsize2 = cpu_to_be32(rnsize);
     }
-    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
-    read_info->rnsize = 1 << shift;
+
+    if (rnmax < 0x10000) {
+        read_info->rnmax = cpu_to_be16(rnmax);
+    } else {
+        read_info->rnmax = cpu_to_be16(0);
+        read_info->rnmax2 = cpu_to_be64(rnmax);
+    }
+
     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
 }
 
+static void read_storage_element0_info(SCCB *sccb)
+{
+    int i, assigned;
+    int subincrement_id = SCLP_STARTING_SUBINCREMENT_ID;
+    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+
+    assert(mhd);
+
+    if ((ram_size >> mhd->increment_size) >= 0x10000) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
+        return;
+    }
+
+    /* Return information regarding core memory */
+    storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0);
+    assigned = ram_size >> mhd->increment_size;
+    storage_info->assigned = cpu_to_be16(assigned);
+
+    for (i = 0; i < assigned; i++) {
+        storage_info->entries[i] = cpu_to_be32(subincrement_id);
+        subincrement_id += SCLP_INCREMENT_UNIT;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
+}
+
+static void read_storage_element1_info(SCCB *sccb)
+{
+    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+
+    assert(mhd);
+
+    if ((mhd->standby_mem_size >> mhd->increment_size) >= 0x10000) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
+        return;
+    }
+
+    /* Return information regarding standby memory */
+    storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0);
+    storage_info->assigned = cpu_to_be16(mhd->standby_mem_size >>
+                                         mhd->increment_size);
+    storage_info->standby = cpu_to_be16(mhd->standby_mem_size >>
+                                        mhd->increment_size);
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_STANDBY_READ_COMPLETION);
+}
+
+static void attach_storage_element(SCCB *sccb, uint16_t element)
+{
+    int i, assigned, subincrement_id;
+    AttachStorageElement *attach_info = (AttachStorageElement *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+
+    assert(mhd);
+
+    if (element != 1) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
+        return;
+    }
+
+    assigned = mhd->standby_mem_size >> mhd->increment_size;
+    attach_info->assigned = cpu_to_be16(assigned);
+    subincrement_id = ((ram_size >> mhd->increment_size) << 16)
+                      + SCLP_STARTING_SUBINCREMENT_ID;
+    for (i = 0; i < assigned; i++) {
+        attach_info->entries[i] = cpu_to_be32(subincrement_id);
+        subincrement_id += SCLP_INCREMENT_UNIT;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
+static void assign_storage(SCCB *sccb)
+{
+    MemoryRegion *mr = NULL;
+    int this_subregion_size;
+    AssignStorage *assign_info = (AssignStorage *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+    assert(mhd);
+    ram_addr_t assign_addr = (assign_info->rn - 1) * mhd->rzm;
+    MemoryRegion *sysmem = get_system_memory();
+
+    if ((assign_addr % MEM_SECTION_SIZE == 0) &&
+        (assign_addr >= mhd->padded_ram_size)) {
+        /* Re-use existing memory region if found */
+        mr = memory_region_find(sysmem, assign_addr, 1).mr;
+        if (!mr) {
+
+            MemoryRegion *standby_ram = g_new(MemoryRegion, 1);
+
+            /* offset to align to standby_subregion_size for allocation */
+            ram_addr_t offset = assign_addr -
+                                (assign_addr - mhd->padded_ram_size)
+                                % mhd->standby_subregion_size;
+
+            /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) +  NULL */
+            char id[16];
+            snprintf(id, 16, "standby.ram%d",
+                     (int)((offset - mhd->padded_ram_size) /
+                     mhd->standby_subregion_size) + 1);
+
+            /* Allocate a subregion of the calculated standby_subregion_size */
+            if (offset + mhd->standby_subregion_size >
+                mhd->padded_ram_size + mhd->standby_mem_size) {
+                this_subregion_size = mhd->padded_ram_size +
+                  mhd->standby_mem_size - offset;
+            } else {
+                this_subregion_size = mhd->standby_subregion_size;
+            }
+
+            memory_region_init_ram(standby_ram, NULL, id, this_subregion_size);
+            vmstate_register_ram_global(standby_ram);
+            memory_region_add_subregion(sysmem, offset, standby_ram);
+        }
+        /* The specified subregion is no longer in standby */
+        mhd->standby_state_map[(assign_addr - mhd->padded_ram_size)
+                               / MEM_SECTION_SIZE] = 1;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
+static void unassign_storage(SCCB *sccb)
+{
+    MemoryRegion *mr = NULL;
+    AssignStorage *assign_info = (AssignStorage *) sccb;
+    sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev();
+    assert(mhd);
+    ram_addr_t unassign_addr = (assign_info->rn - 1) * mhd->rzm;
+    MemoryRegion *sysmem = get_system_memory();
+
+    /* if the addr is a multiple of 256 MB */
+    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
+        (unassign_addr >= mhd->padded_ram_size)) {
+        mhd->standby_state_map[(unassign_addr -
+                           mhd->padded_ram_size) / MEM_SECTION_SIZE] = 0;
+
+        /* find the specified memory region and destroy it */
+        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
+        if (mr) {
+            int i;
+            int is_removable = 1;
+            ram_addr_t map_offset = (unassign_addr - mhd->padded_ram_size -
+                                     (unassign_addr - mhd->padded_ram_size)
+                                     % mhd->standby_subregion_size);
+            /* Mark all affected subregions as 'standby' once again */
+            for (i = 0;
+                 i < (mhd->standby_subregion_size / MEM_SECTION_SIZE);
+                 i++) {
+
+                if (mhd->standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
+                    is_removable = 0;
+                    break;
+                }
+            }
+            if (is_removable) {
+                memory_region_del_subregion(sysmem, mr);
+                memory_region_destroy(mr);
+                g_free(mr);
+            }
+        }
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
 /* Provide information about the CPU */
 static void sclp_read_cpu_info(SCCB *sccb)
 {
@@ -103,6 +334,22 @@  static void sclp_execute(SCCB *sccb, uint32_t code)
     case SCLP_CMDW_READ_CPU_INFO:
         sclp_read_cpu_info(sccb);
         break;
+    case SCLP_READ_STORAGE_ELEMENT_INFO:
+        if (code & 0xff00) {
+            read_storage_element1_info(sccb);
+        } else {
+            read_storage_element0_info(sccb);
+        }
+        break;
+    case SCLP_ATTACH_STORAGE_ELEMENT:
+        attach_storage_element(sccb, (code & 0xff00) >> 8);
+        break;
+    case SCLP_ASSIGN_STORAGE:
+        assign_storage(sccb);
+        break;
+    case SCLP_UNASSIGN_STORAGE:
+        unassign_storage(sccb);
+        break;
     default:
         efc->command_handler(ef, sccb, code);
         break;
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index ba0e4b4..1c1681c 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -1047,6 +1047,7 @@  static inline void cpu_inject_crw_mchk(S390CPU *cpu)
 
 /* from s390-virtio-ccw */
 #define MEM_SECTION_SIZE             0x10000000UL
+#define MAX_AVAIL_SLOTS              32
 
 /* fpu_helper.c */
 uint32_t set_cc_nz_f32(float32 v);
@@ -1070,6 +1071,7 @@  void kvm_s390_enable_css_support(S390CPU *cpu);
 int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
                                     int vq, bool assign);
 int kvm_s390_cpu_restart(S390CPU *cpu);
+int kvm_s390_get_memslot_count(KVMState *s);
 void kvm_s390_clear_cmma_callback(void *opaque);
 #else
 static inline void kvm_s390_io_interrupt(uint16_t subchannel_id,
@@ -1097,6 +1099,10 @@  static inline int kvm_s390_cpu_restart(S390CPU *cpu)
 static inline void kvm_s390_clear_cmma_callback(void *opaque)
 {
 }
+static inline int kvm_s390_get_memslot_count(KVMState *s)
+{
+  return MAX_AVAIL_SLOTS;
+}
 #endif
 
 static inline void cmma_reset(S390CPU *cpu)
@@ -1115,6 +1121,15 @@  static inline int s390_cpu_restart(S390CPU *cpu)
     return -ENOSYS;
 }
 
+static inline int s390_get_memslot_count(KVMState *s)
+{
+    if (kvm_enabled()) {
+        return kvm_s390_get_memslot_count(s);
+    } else {
+        return MAX_AVAIL_SLOTS;
+    }
+}
+
 void s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr,
                        uint32_t io_int_parm, uint32_t io_int_word);
 void s390_crw_mchk(void);
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index a6e587b..4b05d48 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -1283,3 +1283,8 @@  int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch,
     }
     return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
 }
+
+int kvm_s390_get_memslot_count(KVMState *s)
+{
+    return kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
+}