diff mbox

[5/5] sclp-s390: Add memory hotplug SCLPs

Message ID 1387227072-21965-6-git-send-email-mjrosato@linux.vnet.ibm.com
State New
Headers show

Commit Message

Matthew Rosato Dec. 16, 2013, 8:51 p.m. UTC
Add memory information to read SCP info and add handlers for
Read Storage Element Information, Attach Storage Element,
Assign Storage and Unassign Storage.

Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
---
 hw/s390x/sclp.c         |  233 +++++++++++++++++++++++++++++++++++++++++++++--
 include/hw/s390x/sclp.h |    2 +
 2 files changed, 229 insertions(+), 6 deletions(-)

Comments

Alexander Graf Dec. 16, 2013, 9:42 p.m. UTC | #1
On 16.12.2013, at 21:51, Matthew Rosato <mjrosato@linux.vnet.ibm.com> wrote:

> Add memory information to read SCP info and add handlers for
> Read Storage Element Information, Attach Storage Element,
> Assign Storage and Unassign Storage.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
> ---
> hw/s390x/sclp.c         |  233 +++++++++++++++++++++++++++++++++++++++++++++--
> include/hw/s390x/sclp.h |    2 +
> 2 files changed, 229 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
> index cb53d7e..5d27fc3 100644
> --- a/hw/s390x/sclp.c
> +++ b/hw/s390x/sclp.c
> @@ -15,9 +15,15 @@
> #include "cpu.h"
> #include "sysemu/kvm.h"
> #include "exec/memory.h"
> -
> +#include "exec/address-spaces.h"
> #include "hw/s390x/sclp.h"
> 
> +int shift;
> +ram_addr_t standby_subregion_size;
> +ram_addr_t padded_ram_size;
> +ram_addr_t rzm;
> +char *standby_state_map;

Do you really need all these globals?

> +
> static inline S390SCLPDevice *get_event_facility(void)
> {
>     ObjectProperty *op = object_property_find(qdev_get_machine(),
> @@ -31,16 +37,215 @@ static inline S390SCLPDevice *get_event_facility(void)
> static void read_SCP_info(SCCB *sccb)
> {
>     ReadInfo *read_info = (ReadInfo *) sccb;
> -    int shift = 0;
> +    int rnsize, rnmax;
> +    int max_avail_slots = MAX_AVAIL_SLOTS;
> +#ifdef CONFIG_KVM
> +    max_avail_slots = kvm_check_extension(kvm_state, KVM_CAP_NR_MEMSLOTS);

Please don't call kvm specific code from non-kvm specific code. Check out kvm_s390_io_interrupt() as an example how to plumb it in. Please also check kvm_enabled(), as CONFIG_KVM doesn't mean that you actually end up using KVM. It's probably best to completely abstract this away in a helper.

> +#endif
> +
> +    read_info->facilities = 0;
> +
> +    if (standby_mem_size) {

I don't understand why we have 2 code paths - one for standby and one without. Can't we just handle all cases as standby?

> +        /*
> +         * The storage increment size is a multiple of 1M and is a power of 2.
> +         * The number of storage increments must be 1020 or fewer.
> +         */
> +        while ((ram_size >> (20 + shift)) > 1020) {
> +            shift++;
> +        }
> +        while ((standby_mem_size >> (20 + shift)) > 1020) {
> +            shift++;
> +        }
> +
> +        standby_subregion_size = MEM_SECTION_SIZE;
> +        /* Deduct the memory slot already used for core */
> +        while ((standby_subregion_size * (max_avail_slots - 1)
> +               < standby_mem_size)) {
> +            standby_subregion_size = standby_subregion_size << 1;
> +        }
> +        /*
> +         * Initialize mapping of guest standby memory sections indicating which
> +         * are and are not online. Assume all standby memory begins offline.
> +         */
> +        if (standby_mem_size % standby_subregion_size) {
> +            standby_state_map = g_malloc0((standby_mem_size /
> +                                           standby_subregion_size + 1) *
> +                                          (standby_subregion_size /
> +                                           MEM_SECTION_SIZE));
> +        } else {
> +            standby_state_map = g_malloc0(standby_mem_size / MEM_SECTION_SIZE);
> +        }

You don't free this thing when the guest calls read_SCP_info twice.

> +
> +        padded_ram_size = ram_size + pad_size;
> +        rzm = 1 << (20 + shift);
> +
> +    } else {
> +        while ((ram_size >> (20 + shift)) > 65535) {
> +            shift++;
> +        }
> +    }
> 
> -    while ((ram_size >> (20 + shift)) > 65535) {
> -        shift++;
> +    rnsize = 1 << shift;
> +    if (rnsize <= 128) {
> +        read_info->rnsize = rnsize;
> +    } else {
> +        read_info->rnsize = 0;
> +        read_info->rnsize2 = cpu_to_be32(rnsize);
> +    }
> +
> +    rnmax = (ram_size + standby_mem_size + pad_size) >> (20 + shift);
> +    if (rnmax < 0x10000) {
> +        read_info->rnmax = cpu_to_be16(rnmax);
> +    } else {
> +        read_info->rnmax = cpu_to_be16(0);
> +        read_info->rnmax2 = cpu_to_be64(rnmax);
> +    }
> +
> +    if (standby_mem_size) {
> +        read_info->facilities |= cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);

Any reason to make this conditional?

>     }
> -    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
> -    read_info->rnsize = 1 << shift;
>     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
> }
> 
> +static void read_storage_element0_info(SCCB *sccb)
> +{
> +    int i, assigned;
> +    int subincrement_id = SCLP_STARTING_SUBINCREMENT_ID;
> +    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
> +
> +    if ((ram_size >> (20 + shift)) >= 0x10000) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
> +        return;
> +    }
> +
> +    storage_info->max_id = cpu_to_be16(standby_mem_size ? 1 : 0);
> +    assigned = ram_size >> (20 + shift);
> +    storage_info->assigned = cpu_to_be16(assigned);
> +
> +    for (i = 0; i < assigned; i++) {
> +        storage_info->entries[i] = cpu_to_be32(subincrement_id);
> +        subincrement_id += SCLP_INCREMENT_UNIT;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
> +}
> +
> +static void read_storage_element1_info(SCCB *sccb)
> +{
> +    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
> +
> +    if ((standby_mem_size >> (20 + shift)) >= 0x10000) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
> +        return;
> +    }
> +
> +    storage_info->max_id = cpu_to_be16(standby_mem_size ? 1 : 0);
> +    storage_info->assigned = cpu_to_be16(standby_mem_size >> (20 + shift));
> +    storage_info->standby = cpu_to_be16(standby_mem_size >> (20 + shift));
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_STANDBY_READ_COMPLETION);
> +}
> +
> +static void attach_storage_element(SCCB *sccb, uint16_t element)
> +{
> +    int i, assigned, subincrement_id;
> +    AttachStorageElement *attach_info = (AttachStorageElement *) sccb;
> +
> +    if (element != 1) {
> +        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
> +        return;
> +    }
> +
> +    assigned = standby_mem_size >> (20 + shift);
> +    attach_info->assigned = cpu_to_be16(assigned);
> +    subincrement_id = ((ram_size >> (20 + shift)) << 16)
> +                      + SCLP_STARTING_SUBINCREMENT_ID;
> +    for (i = 0; i < assigned; i++) {
> +        attach_info->entries[i] = cpu_to_be32(subincrement_id);
> +        subincrement_id += SCLP_INCREMENT_UNIT;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> +static void assign_storage(SCCB *sccb)
> +{
> +    MemoryRegion *mr = NULL;
> +    int this_subregion_size;
> +    AssignStorage *assign_info = (AssignStorage *) sccb;
> +    ram_addr_t assign_addr = (assign_info->rn - 1) * rzm;
> +    MemoryRegion *sysmem = get_system_memory();
> +
> +    if ((assign_addr % MEM_SECTION_SIZE == 0) &&
> +        (assign_addr >= padded_ram_size)) {
> +        mr = memory_region_find(sysmem, assign_addr, 1).mr;
> +        if (!mr) {
> +
> +            MemoryRegion *standby_ram = g_new(MemoryRegion, 1);
> +
> +            /* offset to align to standby_subregion_size for allocation */
> +            ram_addr_t offset = assign_addr -
> +                                (assign_addr - padded_ram_size)
> +                                % standby_subregion_size;
> +
> +            /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) +  NULL */
> +            char id[16];
> +            snprintf(id, 16, "standby.ram%d", (int)((offset - padded_ram_size)
> +                     / standby_subregion_size) + 1);
> +
> +            /* Allocate a subregion of the calculated standby_subregion_size */
> +            if (offset + standby_subregion_size >
> +                padded_ram_size + standby_mem_size) {
> +                this_subregion_size = padded_ram_size + standby_mem_size
> +                                      - offset;
> +            } else {
> +                this_subregion_size = standby_subregion_size;
> +            }
> +
> +            memory_region_init_ram(standby_ram, NULL, id, this_subregion_size);
> +            vmstate_register_ram_global(standby_ram);
> +            memory_region_add_subregion(sysmem, offset, standby_ram);
> +        }
> +        standby_state_map[(assign_addr - padded_ram_size)
> +                          / MEM_SECTION_SIZE] = 1;
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> +static void unassign_storage(SCCB *sccb)
> +{
> +    MemoryRegion *mr = NULL;
> +    AssignStorage *assign_info = (AssignStorage *) sccb;
> +    ram_addr_t unassign_addr = (assign_info->rn - 1) * rzm;
> +    MemoryRegion *sysmem = get_system_memory();
> +
> +    /* if the addr is a multiple of 256 MB */
> +    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
> +        (unassign_addr >= padded_ram_size)) {
> +        standby_state_map[(unassign_addr -
> +                           padded_ram_size) / MEM_SECTION_SIZE] = 0;
> +        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
> +        if (mr) {
> +            int i;
> +            int is_removable = 1;
> +            ram_addr_t map_offset = (unassign_addr - padded_ram_size -
> +                                     (unassign_addr - padded_ram_size)
> +                                     % standby_subregion_size);
> +            for (i = 0;
> +                 i < (standby_subregion_size / MEM_SECTION_SIZE);
> +                 i++) {
> +
> +                if (standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
> +                    is_removable = 0;
> +                    break;
> +                }
> +            }
> +            if (is_removable) {
> +                memory_region_del_subregion(sysmem, mr);
> +                memory_region_destroy(mr);

no g_free()?

> +            }
> +        }
> +    }
> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
> +}
> +
> static void sclp_execute(SCCB *sccb, uint64_t code)
> {
>     S390SCLPDevice *sdev = get_event_facility();
> @@ -50,6 +255,22 @@ static void sclp_execute(SCCB *sccb, uint64_t code)
>     case SCLP_CMDW_READ_SCP_INFO_FORCED:
>         read_SCP_info(sccb);
>         break;
> +    case SCLP_READ_STORAGE_ELEMENT_INFO:
> +        if (code & 0xff00) {
> +            read_storage_element1_info(sccb);
> +        } else {
> +            read_storage_element0_info(sccb);
> +        }
> +        break;
> +    case SCLP_ATTACH_STORAGE_ELEMENT:
> +        attach_storage_element(sccb, (code & 0xff00) >> 8);
> +        break;
> +    case SCLP_ASSIGN_STORAGE:
> +        assign_storage(sccb);
> +        break;
> +    case SCLP_UNASSIGN_STORAGE:
> +        unassign_storage(sccb);
> +        break;

Do you think it'd be possible to model this whole thing as a device that has its own state? That's where you could keep the bitmap for example. You'd only need some callback mechanism to hook into the SCLP calls, but the PPC guys already have something similar with their hypercalls.

Overall, the code could use _a lot_ more comments.


Alex
Alexander Graf Dec. 16, 2013, 11:18 p.m. UTC | #2
On 16.12.2013, at 22:42, Alexander Graf <agraf@suse.de> wrote:

> 
> On 16.12.2013, at 21:51, Matthew Rosato <mjrosato@linux.vnet.ibm.com> wrote:
> 

[...]

> 
>> +            }
>> +        }
>> +    }
>> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
>> +}
>> +
>> static void sclp_execute(SCCB *sccb, uint64_t code)
>> {
>>    S390SCLPDevice *sdev = get_event_facility();
>> @@ -50,6 +255,22 @@ static void sclp_execute(SCCB *sccb, uint64_t code)
>>    case SCLP_CMDW_READ_SCP_INFO_FORCED:
>>        read_SCP_info(sccb);
>>        break;
>> +    case SCLP_READ_STORAGE_ELEMENT_INFO:
>> +        if (code & 0xff00) {
>> +            read_storage_element1_info(sccb);
>> +        } else {
>> +            read_storage_element0_info(sccb);
>> +        }
>> +        break;
>> +    case SCLP_ATTACH_STORAGE_ELEMENT:
>> +        attach_storage_element(sccb, (code & 0xff00) >> 8);
>> +        break;
>> +    case SCLP_ASSIGN_STORAGE:
>> +        assign_storage(sccb);
>> +        break;
>> +    case SCLP_UNASSIGN_STORAGE:
>> +        unassign_storage(sccb);
>> +        break;
> 
> Do you think it'd be possible to model this whole thing as a device that has its own state? That's where you could keep the bitmap for example. You'd only need some callback mechanism to hook into the SCLP calls, but the PPC guys already have something similar with their hypercalls.

Speaking of state - in the current model the "is standby storage active" bitmap doesn't get migrated, no?


Alex
Matthew Rosato Dec. 17, 2013, 4:06 p.m. UTC | #3
On 12/16/2013 04:42 PM, Alexander Graf wrote:
> 
> On 16.12.2013, at 21:51, Matthew Rosato <mjrosato@linux.vnet.ibm.com> wrote:
> 
>> Add memory information to read SCP info and add handlers for
>> Read Storage Element Information, Attach Storage Element,
>> Assign Storage and Unassign Storage.
>>
>> Signed-off-by: Matthew Rosato <mjrosato@linux.vnet.ibm.com>
>> ---
>> hw/s390x/sclp.c         |  233 +++++++++++++++++++++++++++++++++++++++++++++--
>> include/hw/s390x/sclp.h |    2 +
>> 2 files changed, 229 insertions(+), 6 deletions(-)
>>
>> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
>> index cb53d7e..5d27fc3 100644
>> --- a/hw/s390x/sclp.c
>> +++ b/hw/s390x/sclp.c
>> @@ -15,9 +15,15 @@
>> #include "cpu.h"
>> #include "sysemu/kvm.h"
>> #include "exec/memory.h"
>> -
>> +#include "exec/address-spaces.h"
>> #include "hw/s390x/sclp.h"
>>
>> +int shift;
>> +ram_addr_t standby_subregion_size;
>> +ram_addr_t padded_ram_size;
>> +ram_addr_t rzm;
>> +char *standby_state_map;
> 
> Do you really need all these globals?
> 

I think this ties into your device comment below - I agree that these
should be encapsulated.

>> +
>> static inline S390SCLPDevice *get_event_facility(void)
>> {
>>     ObjectProperty *op = object_property_find(qdev_get_machine(),
>> @@ -31,16 +37,215 @@ static inline S390SCLPDevice *get_event_facility(void)
>> static void read_SCP_info(SCCB *sccb)
>> {
>>     ReadInfo *read_info = (ReadInfo *) sccb;
>> -    int shift = 0;
>> +    int rnsize, rnmax;
>> +    int max_avail_slots = MAX_AVAIL_SLOTS;
>> +#ifdef CONFIG_KVM
>> +    max_avail_slots = kvm_check_extension(kvm_state, KVM_CAP_NR_MEMSLOTS);
> 
> Please don't call kvm specific code from non-kvm specific code. Check out kvm_s390_io_interrupt() as an example how to plumb it in. Please also check kvm_enabled(), as CONFIG_KVM doesn't mean that you actually end up using KVM. It's probably best to completely abstract this away in a helper.
> 

OK, will do.

>> +#endif
>> +
>> +    read_info->facilities = 0;
>> +
>> +    if (standby_mem_size) {
> 
> I don't understand why we have 2 code paths - one for standby and one without. Can't we just handle all cases as standby?
> 

Yes I think so -  everything should fall-through naturally and we can
just skip the malloc of standby_state_map.

>> +        /*
>> +         * The storage increment size is a multiple of 1M and is a power of 2.
>> +         * The number of storage increments must be 1020 or fewer.
>> +         */
>> +        while ((ram_size >> (20 + shift)) > 1020) {
>> +            shift++;
>> +        }
>> +        while ((standby_mem_size >> (20 + shift)) > 1020) {
>> +            shift++;
>> +        }
>> +
>> +        standby_subregion_size = MEM_SECTION_SIZE;
>> +        /* Deduct the memory slot already used for core */
>> +        while ((standby_subregion_size * (max_avail_slots - 1)
>> +               < standby_mem_size)) {
>> +            standby_subregion_size = standby_subregion_size << 1;
>> +        }
>> +        /*
>> +         * Initialize mapping of guest standby memory sections indicating which
>> +         * are and are not online. Assume all standby memory begins offline.
>> +         */
>> +        if (standby_mem_size % standby_subregion_size) {
>> +            standby_state_map = g_malloc0((standby_mem_size /
>> +                                           standby_subregion_size + 1) *
>> +                                          (standby_subregion_size /
>> +                                           MEM_SECTION_SIZE));
>> +        } else {
>> +            standby_state_map = g_malloc0(standby_mem_size / MEM_SECTION_SIZE);
>> +        }
> 
> You don't free this thing when the guest calls read_SCP_info twice.
> 

Oops.  Acknowledged.

>> +
>> +        padded_ram_size = ram_size + pad_size;
>> +        rzm = 1 << (20 + shift);
>> +
>> +    } else {
>> +        while ((ram_size >> (20 + shift)) > 65535) {
>> +            shift++;
>> +        }
>> +    }
>>
>> -    while ((ram_size >> (20 + shift)) > 65535) {
>> -        shift++;
>> +    rnsize = 1 << shift;
>> +    if (rnsize <= 128) {
>> +        read_info->rnsize = rnsize;
>> +    } else {
>> +        read_info->rnsize = 0;
>> +        read_info->rnsize2 = cpu_to_be32(rnsize);
>> +    }
>> +
>> +    rnmax = (ram_size + standby_mem_size + pad_size) >> (20 + shift);
>> +    if (rnmax < 0x10000) {
>> +        read_info->rnmax = cpu_to_be16(rnmax);
>> +    } else {
>> +        read_info->rnmax = cpu_to_be16(0);
>> +        read_info->rnmax2 = cpu_to_be64(rnmax);
>> +    }
>> +
>> +    if (standby_mem_size) {
>> +        read_info->facilities |= cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);
> 
> Any reason to make this conditional?
> 

I suppose not -- I'll make this unconditional.

>>     }
>> -    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
>> -    read_info->rnsize = 1 << shift;
>>     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
>> }

[...]

>> +static void unassign_storage(SCCB *sccb)
>> +{
>> +    MemoryRegion *mr = NULL;
>> +    AssignStorage *assign_info = (AssignStorage *) sccb;
>> +    ram_addr_t unassign_addr = (assign_info->rn - 1) * rzm;
>> +    MemoryRegion *sysmem = get_system_memory();
>> +
>> +    /* if the addr is a multiple of 256 MB */
>> +    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
>> +        (unassign_addr >= padded_ram_size)) {
>> +        standby_state_map[(unassign_addr -
>> +                           padded_ram_size) / MEM_SECTION_SIZE] = 0;
>> +        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
>> +        if (mr) {
>> +            int i;
>> +            int is_removable = 1;
>> +            ram_addr_t map_offset = (unassign_addr - padded_ram_size -
>> +                                     (unassign_addr - padded_ram_size)
>> +                                     % standby_subregion_size);
>> +            for (i = 0;
>> +                 i < (standby_subregion_size / MEM_SECTION_SIZE);
>> +                 i++) {
>> +
>> +                if (standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
>> +                    is_removable = 0;
>> +                    break;
>> +                }
>> +            }
>> +            if (is_removable) {
>> +                memory_region_del_subregion(sysmem, mr);
>> +                memory_region_destroy(mr);
> 
> no g_free()?

Oops.  Acknowledged.

> 
>> +            }
>> +        }
>> +    }
>> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
>> +}
>> +
>> static void sclp_execute(SCCB *sccb, uint64_t code)
>> {
>>     S390SCLPDevice *sdev = get_event_facility();
>> @@ -50,6 +255,22 @@ static void sclp_execute(SCCB *sccb, uint64_t code)
>>     case SCLP_CMDW_READ_SCP_INFO_FORCED:
>>         read_SCP_info(sccb);
>>         break;
>> +    case SCLP_READ_STORAGE_ELEMENT_INFO:
>> +        if (code & 0xff00) {
>> +            read_storage_element1_info(sccb);
>> +        } else {
>> +            read_storage_element0_info(sccb);
>> +        }
>> +        break;
>> +    case SCLP_ATTACH_STORAGE_ELEMENT:
>> +        attach_storage_element(sccb, (code & 0xff00) >> 8);
>> +        break;
>> +    case SCLP_ASSIGN_STORAGE:
>> +        assign_storage(sccb);
>> +        break;
>> +    case SCLP_UNASSIGN_STORAGE:
>> +        unassign_storage(sccb);
>> +        break;
> 
> Do you think it'd be possible to model this whole thing as a device that has its own state? That's where you could keep the bitmap for example. You'd only need some callback mechanism to hook into the SCLP calls, but the PPC guys already have something similar with their hypercalls.
> 

OK, let me look into this for the next version.

> Overall, the code could use _a lot_ more comments.

Will do.  Thanks for your comments.

> 
> 
> Alex
> 
> 
> 
>
Matthew Rosato Dec. 17, 2013, 4:09 p.m. UTC | #4
On 12/16/2013 06:18 PM, Alexander Graf wrote:
> 
> On 16.12.2013, at 22:42, Alexander Graf <agraf@suse.de> wrote:
> 
>>
>> On 16.12.2013, at 21:51, Matthew Rosato <mjrosato@linux.vnet.ibm.com> wrote:
>>
> 
> [...]
> 
>>
>>> +            }
>>> +        }
>>> +    }
>>> +    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
>>> +}
>>> +
>>> static void sclp_execute(SCCB *sccb, uint64_t code)
>>> {
>>>    S390SCLPDevice *sdev = get_event_facility();
>>> @@ -50,6 +255,22 @@ static void sclp_execute(SCCB *sccb, uint64_t code)
>>>    case SCLP_CMDW_READ_SCP_INFO_FORCED:
>>>        read_SCP_info(sccb);
>>>        break;
>>> +    case SCLP_READ_STORAGE_ELEMENT_INFO:
>>> +        if (code & 0xff00) {
>>> +            read_storage_element1_info(sccb);
>>> +        } else {
>>> +            read_storage_element0_info(sccb);
>>> +        }
>>> +        break;
>>> +    case SCLP_ATTACH_STORAGE_ELEMENT:
>>> +        attach_storage_element(sccb, (code & 0xff00) >> 8);
>>> +        break;
>>> +    case SCLP_ASSIGN_STORAGE:
>>> +        assign_storage(sccb);
>>> +        break;
>>> +    case SCLP_UNASSIGN_STORAGE:
>>> +        unassign_storage(sccb);
>>> +        break;
>>
>> Do you think it'd be possible to model this whole thing as a device that has its own state? That's where you could keep the bitmap for example. You'd only need some callback mechanism to hook into the SCLP calls, but the PPC guys already have something similar with their hypercalls.
> 
> Speaking of state - in the current model the "is standby storage active" bitmap doesn't get migrated, no?
> 

Yes, you are correct, migration support still needs to be added  - But I
wanted to get this much out for review at least.

> 
> Alex
> 
> 
>
diff mbox

Patch

diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c
index cb53d7e..5d27fc3 100644
--- a/hw/s390x/sclp.c
+++ b/hw/s390x/sclp.c
@@ -15,9 +15,15 @@ 
 #include "cpu.h"
 #include "sysemu/kvm.h"
 #include "exec/memory.h"
-
+#include "exec/address-spaces.h"
 #include "hw/s390x/sclp.h"
 
+int shift;
+ram_addr_t standby_subregion_size;
+ram_addr_t padded_ram_size;
+ram_addr_t rzm;
+char *standby_state_map;
+
 static inline S390SCLPDevice *get_event_facility(void)
 {
     ObjectProperty *op = object_property_find(qdev_get_machine(),
@@ -31,16 +37,215 @@  static inline S390SCLPDevice *get_event_facility(void)
 static void read_SCP_info(SCCB *sccb)
 {
     ReadInfo *read_info = (ReadInfo *) sccb;
-    int shift = 0;
+    int rnsize, rnmax;
+    int max_avail_slots = MAX_AVAIL_SLOTS;
+#ifdef CONFIG_KVM
+    max_avail_slots = kvm_check_extension(kvm_state, KVM_CAP_NR_MEMSLOTS);
+#endif
+
+    read_info->facilities = 0;
+
+    if (standby_mem_size) {
+        /*
+         * The storage increment size is a multiple of 1M and is a power of 2.
+         * The number of storage increments must be 1020 or fewer.
+         */
+        while ((ram_size >> (20 + shift)) > 1020) {
+            shift++;
+        }
+        while ((standby_mem_size >> (20 + shift)) > 1020) {
+            shift++;
+        }
+
+        standby_subregion_size = MEM_SECTION_SIZE;
+        /* Deduct the memory slot already used for core */
+        while ((standby_subregion_size * (max_avail_slots - 1)
+               < standby_mem_size)) {
+            standby_subregion_size = standby_subregion_size << 1;
+        }
+        /*
+         * Initialize mapping of guest standby memory sections indicating which
+         * are and are not online. Assume all standby memory begins offline.
+         */
+        if (standby_mem_size % standby_subregion_size) {
+            standby_state_map = g_malloc0((standby_mem_size /
+                                           standby_subregion_size + 1) *
+                                          (standby_subregion_size /
+                                           MEM_SECTION_SIZE));
+        } else {
+            standby_state_map = g_malloc0(standby_mem_size / MEM_SECTION_SIZE);
+        }
+
+        padded_ram_size = ram_size + pad_size;
+        rzm = 1 << (20 + shift);
+
+    } else {
+        while ((ram_size >> (20 + shift)) > 65535) {
+            shift++;
+        }
+    }
 
-    while ((ram_size >> (20 + shift)) > 65535) {
-        shift++;
+    rnsize = 1 << shift;
+    if (rnsize <= 128) {
+        read_info->rnsize = rnsize;
+    } else {
+        read_info->rnsize = 0;
+        read_info->rnsize2 = cpu_to_be32(rnsize);
+    }
+
+    rnmax = (ram_size + standby_mem_size + pad_size) >> (20 + shift);
+    if (rnmax < 0x10000) {
+        read_info->rnmax = cpu_to_be16(rnmax);
+    } else {
+        read_info->rnmax = cpu_to_be16(0);
+        read_info->rnmax2 = cpu_to_be64(rnmax);
+    }
+
+    if (standby_mem_size) {
+        read_info->facilities |= cpu_to_be64(SCLP_FC_ASSIGN_ATTACH_READ_STOR);
     }
-    read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift));
-    read_info->rnsize = 1 << shift;
     sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
 }
 
+static void read_storage_element0_info(SCCB *sccb)
+{
+    int i, assigned;
+    int subincrement_id = SCLP_STARTING_SUBINCREMENT_ID;
+    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
+
+    if ((ram_size >> (20 + shift)) >= 0x10000) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
+        return;
+    }
+
+    storage_info->max_id = cpu_to_be16(standby_mem_size ? 1 : 0);
+    assigned = ram_size >> (20 + shift);
+    storage_info->assigned = cpu_to_be16(assigned);
+
+    for (i = 0; i < assigned; i++) {
+        storage_info->entries[i] = cpu_to_be32(subincrement_id);
+        subincrement_id += SCLP_INCREMENT_UNIT;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION);
+}
+
+static void read_storage_element1_info(SCCB *sccb)
+{
+    ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb;
+
+    if ((standby_mem_size >> (20 + shift)) >= 0x10000) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION);
+        return;
+    }
+
+    storage_info->max_id = cpu_to_be16(standby_mem_size ? 1 : 0);
+    storage_info->assigned = cpu_to_be16(standby_mem_size >> (20 + shift));
+    storage_info->standby = cpu_to_be16(standby_mem_size >> (20 + shift));
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_STANDBY_READ_COMPLETION);
+}
+
+static void attach_storage_element(SCCB *sccb, uint16_t element)
+{
+    int i, assigned, subincrement_id;
+    AttachStorageElement *attach_info = (AttachStorageElement *) sccb;
+
+    if (element != 1) {
+        sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND);
+        return;
+    }
+
+    assigned = standby_mem_size >> (20 + shift);
+    attach_info->assigned = cpu_to_be16(assigned);
+    subincrement_id = ((ram_size >> (20 + shift)) << 16)
+                      + SCLP_STARTING_SUBINCREMENT_ID;
+    for (i = 0; i < assigned; i++) {
+        attach_info->entries[i] = cpu_to_be32(subincrement_id);
+        subincrement_id += SCLP_INCREMENT_UNIT;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
+static void assign_storage(SCCB *sccb)
+{
+    MemoryRegion *mr = NULL;
+    int this_subregion_size;
+    AssignStorage *assign_info = (AssignStorage *) sccb;
+    ram_addr_t assign_addr = (assign_info->rn - 1) * rzm;
+    MemoryRegion *sysmem = get_system_memory();
+
+    if ((assign_addr % MEM_SECTION_SIZE == 0) &&
+        (assign_addr >= padded_ram_size)) {
+        mr = memory_region_find(sysmem, assign_addr, 1).mr;
+        if (!mr) {
+
+            MemoryRegion *standby_ram = g_new(MemoryRegion, 1);
+
+            /* offset to align to standby_subregion_size for allocation */
+            ram_addr_t offset = assign_addr -
+                                (assign_addr - padded_ram_size)
+                                % standby_subregion_size;
+
+            /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) +  NULL */
+            char id[16];
+            snprintf(id, 16, "standby.ram%d", (int)((offset - padded_ram_size)
+                     / standby_subregion_size) + 1);
+
+            /* Allocate a subregion of the calculated standby_subregion_size */
+            if (offset + standby_subregion_size >
+                padded_ram_size + standby_mem_size) {
+                this_subregion_size = padded_ram_size + standby_mem_size
+                                      - offset;
+            } else {
+                this_subregion_size = standby_subregion_size;
+            }
+
+            memory_region_init_ram(standby_ram, NULL, id, this_subregion_size);
+            vmstate_register_ram_global(standby_ram);
+            memory_region_add_subregion(sysmem, offset, standby_ram);
+        }
+        standby_state_map[(assign_addr - padded_ram_size)
+                          / MEM_SECTION_SIZE] = 1;
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
+static void unassign_storage(SCCB *sccb)
+{
+    MemoryRegion *mr = NULL;
+    AssignStorage *assign_info = (AssignStorage *) sccb;
+    ram_addr_t unassign_addr = (assign_info->rn - 1) * rzm;
+    MemoryRegion *sysmem = get_system_memory();
+
+    /* if the addr is a multiple of 256 MB */
+    if ((unassign_addr % MEM_SECTION_SIZE == 0) &&
+        (unassign_addr >= padded_ram_size)) {
+        standby_state_map[(unassign_addr -
+                           padded_ram_size) / MEM_SECTION_SIZE] = 0;
+        mr = memory_region_find(sysmem, unassign_addr, 1).mr;
+        if (mr) {
+            int i;
+            int is_removable = 1;
+            ram_addr_t map_offset = (unassign_addr - padded_ram_size -
+                                     (unassign_addr - padded_ram_size)
+                                     % standby_subregion_size);
+            for (i = 0;
+                 i < (standby_subregion_size / MEM_SECTION_SIZE);
+                 i++) {
+
+                if (standby_state_map[i + map_offset / MEM_SECTION_SIZE]) {
+                    is_removable = 0;
+                    break;
+                }
+            }
+            if (is_removable) {
+                memory_region_del_subregion(sysmem, mr);
+                memory_region_destroy(mr);
+            }
+        }
+    }
+    sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION);
+}
+
 static void sclp_execute(SCCB *sccb, uint64_t code)
 {
     S390SCLPDevice *sdev = get_event_facility();
@@ -50,6 +255,22 @@  static void sclp_execute(SCCB *sccb, uint64_t code)
     case SCLP_CMDW_READ_SCP_INFO_FORCED:
         read_SCP_info(sccb);
         break;
+    case SCLP_READ_STORAGE_ELEMENT_INFO:
+        if (code & 0xff00) {
+            read_storage_element1_info(sccb);
+        } else {
+            read_storage_element0_info(sccb);
+        }
+        break;
+    case SCLP_ATTACH_STORAGE_ELEMENT:
+        attach_storage_element(sccb, (code & 0xff00) >> 8);
+        break;
+    case SCLP_ASSIGN_STORAGE:
+        assign_storage(sccb);
+        break;
+    case SCLP_UNASSIGN_STORAGE:
+        unassign_storage(sccb);
+        break;
     default:
         sdev->sclp_command_handler(sdev->ef, sccb, code);
         break;
diff --git a/include/hw/s390x/sclp.h b/include/hw/s390x/sclp.h
index e80cb23..1211d69 100644
--- a/include/hw/s390x/sclp.h
+++ b/include/hw/s390x/sclp.h
@@ -143,6 +143,8 @@  static inline int sccb_data_len(SCCB *sccb)
      OBJECT_GET_CLASS(S390SCLPDeviceClass, (obj), \
              TYPE_DEVICE_S390_SCLP)
 
+extern ram_addr_t pad_size;
+
 typedef struct SCLPEventFacility SCLPEventFacility;
 
 typedef struct S390SCLPDevice {