diff mbox series

[v8,10/24] MPIPL: Register for OPAL dump

Message ID 20190616171024.22799-11-hegdevasant@linux.vnet.ibm.com
State Superseded
Headers show
Series MPIPL support | expand

Checks

Context Check Description
snowpatch_ozlabs/apply_patch success Successfully applied on branch master (dbf27b6c4af84addb36bd3be34f96580aba9c873)
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot fail Test snowpatch/job/snowpatch-skiboot on branch master
snowpatch_ozlabs/snowpatch_job_snowpatch-skiboot-dco success Signed-off-by present

Commit Message

Vasant Hegde June 16, 2019, 5:10 p.m. UTC
This patch adds support to register for OPAL dump.
  - Calculate memory required to capture OPAL dump
  - Reserve OPAL dump destination memory
  - Add OPAL dump details to MDST and MDDT table

Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
---
 core/Makefile.inc   |   2 +-
 core/init.c         |   6 ++-
 core/opal-dump.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/opal-dump.h |   4 ++
 4 files changed, 153 insertions(+), 2 deletions(-)
 create mode 100644 core/opal-dump.c

Comments

Nicholas Piggin June 28, 2019, 1:33 a.m. UTC | #1
Vasant Hegde's on June 17, 2019 3:10 am:
> This patch adds support to register for OPAL dump.
>   - Calculate memory required to capture OPAL dump
>   - Reserve OPAL dump destination memory
>   - Add OPAL dump details to MDST and MDDT table
> 
> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
> ---
>  core/Makefile.inc   |   2 +-
>  core/init.c         |   6 ++-
>  core/opal-dump.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  include/opal-dump.h |   4 ++
>  4 files changed, 153 insertions(+), 2 deletions(-)
>  create mode 100644 core/opal-dump.c
> 
> diff --git a/core/Makefile.inc b/core/Makefile.inc
> index 21c12fb8d..c2c9731db 100644
> --- a/core/Makefile.inc
> +++ b/core/Makefile.inc
> @@ -10,7 +10,7 @@ CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
>  CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
>  CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
>  CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o
> -CORE_OBJS += flash-firmware-versions.o
> +CORE_OBJS += flash-firmware-versions.o opal-dump.o
>  
>  ifeq ($(SKIBOOT_GCOV),1)
>  CORE_OBJS += gcov-profiling.o
> diff --git a/core/init.c b/core/init.c
> index 3db9df314..03776537e 100644
> --- a/core/init.c
> +++ b/core/init.c
> @@ -1,4 +1,4 @@
> -/* Copyright 2013-2016 IBM Corp.
> +/* Copyright 2013-2019 IBM Corp.
>   *
>   * Licensed under the Apache License, Version 2.0 (the "License");
>   * you may not use this file except in compliance with the License.
> @@ -55,6 +55,7 @@
>  #include <sbe-p9.h>
>  #include <debug_descriptor.h>
>  #include <occ.h>
> +#include <opal-dump.h>
>  
>  enum proc_gen proc_gen;
>  unsigned int pcie_max_link_speed;
> @@ -1173,6 +1174,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>  	if (platform.init)
>  		platform.init();
>  
> +	/* init opal dump */
> +	opal_mpipl_init();
> +
>  	/* Read in NVRAM and set it up */
>  	nvram_init();
>  

So you're reserving memory and adding important OPAL memory to the MPIPL 
ranges just in case, even if the host won't be using it?

It seems nicer if you would just advertise in the dt what the important
OPAL address ranges are, and the host can preserve them if it wants to
do OPAL dumps.

> diff --git a/core/opal-dump.c b/core/opal-dump.c
> new file mode 100644
> index 000000000..dc364fab1
> --- /dev/null
> +++ b/core/opal-dump.c
> @@ -0,0 +1,143 @@
> +/* Copyright 2019 IBM Corp.
> + *
> + * Licensed under the Apache License, Version 2.0 (the "License");
> + * you may not use this file except in compliance with the License.
> + * You may obtain a copy of the License at
> + *
> + *	http://www.apache.org/licenses/LICENSE-2.0
> + *
> + * Unless required by applicable law or agreed to in writing, software
> + * distributed under the License is distributed on an "AS IS" BASIS,
> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
> + * implied.
> + * See the License for the specific language governing permissions and
> + * limitations under the License.
> + */
> +
> +#define pr_fmt(fmt)	"DUMP: " fmt
> +
> +#include <device.h>
> +#include <mem-map.h>
> +#include <mem_region.h>
> +#include <mem_region-malloc.h>
> +#include <opal.h>
> +#include <opal-dump.h>
> +#include <opal-internal.h>
> +#include <skiboot.h>
> +
> +#include <ccan/endian/endian.h>
> +
> +#include "hdata/spira.h"
> +
> +/* Actual address of MDST and MDDT table */
> +#define MDST_TABLE_BASE		(SKIBOOT_BASE + MDST_TABLE_OFF)
> +#define MDDT_TABLE_BASE		(SKIBOOT_BASE + MDDT_TABLE_OFF)
> +
> +static struct spira_ntuple *ntuple_mdst;
> +static struct spira_ntuple *ntuple_mddt;
> +static struct spira_ntuple *ntuple_mdrt;
> +
> +static int opal_mpipl_add_entry(u8 region, u64 src, u64 dest, u64 size)
> +{
> +	int i, max_cnt;
> +	struct mdst_table *mdst;
> +	struct mddt_table *mddt;
> +
> +	max_cnt = MDST_TABLE_SIZE / sizeof(struct mdst_table);
> +	if (ntuple_mdst->act_cnt >= max_cnt) {
> +		prlog(PR_DEBUG, "MDST table is full\n");
> +		return OPAL_RESOURCE;
> +	}
> +
> +	max_cnt = MDDT_TABLE_SIZE / sizeof(struct mddt_table);
> +	if (ntuple_mdst->act_cnt >= max_cnt) {
> +		prlog(PR_DEBUG, "MDDT table is full\n");
> +		return OPAL_RESOURCE;
> +	}
> +
> +	/* Use relocated memory address */
> +	mdst = (void *)(MDST_TABLE_BASE);
> +	mddt = (void *)(MDDT_TABLE_BASE);
> +
> +	/* Check for duplicate entry */
> +	for (i = 0; i < ntuple_mdst->act_cnt; i++) {
> +		if (mdst->addr == (src | HRMOR_BIT)) {
> +			prlog(PR_DEBUG,
> +			      "Duplicate source address : 0x%llx", src);
> +			return OPAL_PARAMETER;
> +		}
> +		mdst++;
> +	}
> +	for (i = 0; i < ntuple_mddt->act_cnt; i++) {
> +		if (mddt->addr == (dest | HRMOR_BIT)) {
> +			prlog(PR_DEBUG,
> +			      "Duplicate destination address : 0x%llx", dest);
> +			return OPAL_PARAMETER;
> +		}
> +		mddt++;
> +	}
> +
> +	/* Add OPAL source address to MDST entry */
> +	mdst->addr = src | HRMOR_BIT;
> +	mdst->data_region = region;
> +	mdst->size = size;
> +	ntuple_mdst->act_cnt++;
> +
> +	/* Add OPAL destination address to MDDT entry */
> +	mddt->addr = dest | HRMOR_BIT;
> +	mddt->data_region = region;
> +	mddt->size = size;
> +	ntuple_mddt->act_cnt++;
> +
> +	prlog(PR_TRACE, "Added new entry. src : 0x%llx, dest : 0x%llx,"
> +	      " size : 0x%llx\n", src, dest, size);
> +	return OPAL_SUCCESS;
> +}
> +
> +/* Register for OPAL dump.  */
> +static void opal_mpipl_register(void)
> +{
> +	u64 opal_dest, opal_size;
> +
> +	/* Get OPAL runtime size */
> +	if (!dt_find_property(opal_node, "opal-runtime-size")) {
> +		prlog(PR_DEBUG, "Could not get OPAL runtime size\n");
> +		return;
> +	}

As an aside, is it best practice to read your own device tree for this
kind of thing? As opposed to just exporting it from the code which set
the dt entry.

> +	opal_size = dt_prop_get_u64(opal_node, "opal-runtime-size");
> +	if (!opal_size) {
> +		prlog(PR_DEBUG, "OPAL runtime size is zero\n");
> +		return;
> +	}
> +
> +	/* Calculate and reserve OPAL dump destination memory */
> +	opal_dest = SKIBOOT_BASE + opal_size;
> +	mem_reserve_fw("ibm,firmware-dump", opal_dest, opal_size);
> +
> +	opal_mpipl_add_entry(DUMP_REGION_OPAL_MEMORY,
> +			     SKIBOOT_BASE, opal_dest, opal_size);
> +}

Thanks,
Nick
Vasant Hegde June 28, 2019, 10:36 a.m. UTC | #2
On 06/28/2019 07:03 AM, Nicholas Piggin wrote:
> Vasant Hegde's on June 17, 2019 3:10 am:
>> This patch adds support to register for OPAL dump.
>>    - Calculate memory required to capture OPAL dump
>>    - Reserve OPAL dump destination memory
>>    - Add OPAL dump details to MDST and MDDT table
>>
>> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
>> ---
>>   core/Makefile.inc   |   2 +-
>>   core/init.c         |   6 ++-
>>   core/opal-dump.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>   include/opal-dump.h |   4 ++
>>   4 files changed, 153 insertions(+), 2 deletions(-)
>>   create mode 100644 core/opal-dump.c
>>
>> diff --git a/core/Makefile.inc b/core/Makefile.inc
>> index 21c12fb8d..c2c9731db 100644
>> --- a/core/Makefile.inc
>> +++ b/core/Makefile.inc
>> @@ -10,7 +10,7 @@ CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
>>   CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
>>   CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
>>   CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o
>> -CORE_OBJS += flash-firmware-versions.o
>> +CORE_OBJS += flash-firmware-versions.o opal-dump.o
>>   
>>   ifeq ($(SKIBOOT_GCOV),1)
>>   CORE_OBJS += gcov-profiling.o
>> diff --git a/core/init.c b/core/init.c
>> index 3db9df314..03776537e 100644
>> --- a/core/init.c
>> +++ b/core/init.c
>> @@ -1,4 +1,4 @@
>> -/* Copyright 2013-2016 IBM Corp.
>> +/* Copyright 2013-2019 IBM Corp.
>>    *
>>    * Licensed under the Apache License, Version 2.0 (the "License");
>>    * you may not use this file except in compliance with the License.
>> @@ -55,6 +55,7 @@
>>   #include <sbe-p9.h>
>>   #include <debug_descriptor.h>
>>   #include <occ.h>
>> +#include <opal-dump.h>
>>   
>>   enum proc_gen proc_gen;
>>   unsigned int pcie_max_link_speed;
>> @@ -1173,6 +1174,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>>   	if (platform.init)
>>   		platform.init();
>>   
>> +	/* init opal dump */
>> +	opal_mpipl_init();
>> +
>>   	/* Read in NVRAM and set it up */
>>   	nvram_init();
>>   
> 
> So you're reserving memory and adding important OPAL memory to the MPIPL
> ranges just in case, even if the host won't be using it?

Yes.
   - This is to make sure each layer takes care of its reservation. OPAL should 
take care of
     memory reservation for OPAL/ kernel should take care of kernel memory 
reservation.
   - Also if future if we support early OPAL crashes then we can't really wait 
for kernel to
     reserve memory.

   - Kernel can always go and capture OPAL dump irrespective of dump method its 
using.


Probably I should call opal_mpipl_init() bit late in the init path.

> 
> It seems nicer if you would just advertise in the dt what the important
> OPAL address ranges are, and the host can preserve them if it wants to
> do OPAL dumps.

I don't think so.


> 
>> diff --git a/core/opal-dump.c b/core/opal-dump.c
>> new file mode 100644
>> index 000000000..dc364fab1
>> --- /dev/null
>> +++ b/core/opal-dump.c
>> @@ -0,0 +1,143 @@
>> +/* Copyright 2019 IBM Corp.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at
>> + *
>> + *	http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
>> + * implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +#define pr_fmt(fmt)	"DUMP: " fmt
>> +
>> +#include <device.h>
>> +#include <mem-map.h>
>> +#include <mem_region.h>
>> +#include <mem_region-malloc.h>
>> +#include <opal.h>
>> +#include <opal-dump.h>
>> +#include <opal-internal.h>
>> +#include <skiboot.h>
>> +
>> +#include <ccan/endian/endian.h>
>> +
>> +#include "hdata/spira.h"
>> +
>> +/* Actual address of MDST and MDDT table */
>> +#define MDST_TABLE_BASE		(SKIBOOT_BASE + MDST_TABLE_OFF)
>> +#define MDDT_TABLE_BASE		(SKIBOOT_BASE + MDDT_TABLE_OFF)
>> +
>> +static struct spira_ntuple *ntuple_mdst;
>> +static struct spira_ntuple *ntuple_mddt;
>> +static struct spira_ntuple *ntuple_mdrt;
>> +
>> +static int opal_mpipl_add_entry(u8 region, u64 src, u64 dest, u64 size)
>> +{
>> +	int i, max_cnt;
>> +	struct mdst_table *mdst;
>> +	struct mddt_table *mddt;
>> +
>> +	max_cnt = MDST_TABLE_SIZE / sizeof(struct mdst_table);
>> +	if (ntuple_mdst->act_cnt >= max_cnt) {
>> +		prlog(PR_DEBUG, "MDST table is full\n");
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	max_cnt = MDDT_TABLE_SIZE / sizeof(struct mddt_table);
>> +	if (ntuple_mdst->act_cnt >= max_cnt) {
>> +		prlog(PR_DEBUG, "MDDT table is full\n");
>> +		return OPAL_RESOURCE;
>> +	}
>> +
>> +	/* Use relocated memory address */
>> +	mdst = (void *)(MDST_TABLE_BASE);
>> +	mddt = (void *)(MDDT_TABLE_BASE);
>> +
>> +	/* Check for duplicate entry */
>> +	for (i = 0; i < ntuple_mdst->act_cnt; i++) {
>> +		if (mdst->addr == (src | HRMOR_BIT)) {
>> +			prlog(PR_DEBUG,
>> +			      "Duplicate source address : 0x%llx", src);
>> +			return OPAL_PARAMETER;
>> +		}
>> +		mdst++;
>> +	}
>> +	for (i = 0; i < ntuple_mddt->act_cnt; i++) {
>> +		if (mddt->addr == (dest | HRMOR_BIT)) {
>> +			prlog(PR_DEBUG,
>> +			      "Duplicate destination address : 0x%llx", dest);
>> +			return OPAL_PARAMETER;
>> +		}
>> +		mddt++;
>> +	}
>> +
>> +	/* Add OPAL source address to MDST entry */
>> +	mdst->addr = src | HRMOR_BIT;
>> +	mdst->data_region = region;
>> +	mdst->size = size;
>> +	ntuple_mdst->act_cnt++;
>> +
>> +	/* Add OPAL destination address to MDDT entry */
>> +	mddt->addr = dest | HRMOR_BIT;
>> +	mddt->data_region = region;
>> +	mddt->size = size;
>> +	ntuple_mddt->act_cnt++;
>> +
>> +	prlog(PR_TRACE, "Added new entry. src : 0x%llx, dest : 0x%llx,"
>> +	      " size : 0x%llx\n", src, dest, size);
>> +	return OPAL_SUCCESS;
>> +}
>> +
>> +/* Register for OPAL dump.  */
>> +static void opal_mpipl_register(void)
>> +{
>> +	u64 opal_dest, opal_size;
>> +
>> +	/* Get OPAL runtime size */
>> +	if (!dt_find_property(opal_node, "opal-runtime-size")) {
>> +		prlog(PR_DEBUG, "Could not get OPAL runtime size\n");
>> +		return;
>> +	}
> 
> As an aside, is it best practice to read your own device tree for this
> kind of thing? As opposed to just exporting it from the code which set
> the dt entry.

I think its fine to read from DT (we have been doing that) instead of creating
so many global variables.

-Vasant
Nicholas Piggin June 28, 2019, 11:09 a.m. UTC | #3
Vasant Hegde's on June 28, 2019 8:36 pm:
> On 06/28/2019 07:03 AM, Nicholas Piggin wrote:
>> Vasant Hegde's on June 17, 2019 3:10 am:
>>> This patch adds support to register for OPAL dump.
>>>    - Calculate memory required to capture OPAL dump
>>>    - Reserve OPAL dump destination memory
>>>    - Add OPAL dump details to MDST and MDDT table
>>>
>>> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
>>> ---
>>>   core/Makefile.inc   |   2 +-
>>>   core/init.c         |   6 ++-
>>>   core/opal-dump.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>   include/opal-dump.h |   4 ++
>>>   4 files changed, 153 insertions(+), 2 deletions(-)
>>>   create mode 100644 core/opal-dump.c
>>>
>>> diff --git a/core/Makefile.inc b/core/Makefile.inc
>>> index 21c12fb8d..c2c9731db 100644
>>> --- a/core/Makefile.inc
>>> +++ b/core/Makefile.inc
>>> @@ -10,7 +10,7 @@ CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
>>>   CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
>>>   CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
>>>   CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o
>>> -CORE_OBJS += flash-firmware-versions.o
>>> +CORE_OBJS += flash-firmware-versions.o opal-dump.o
>>>   
>>>   ifeq ($(SKIBOOT_GCOV),1)
>>>   CORE_OBJS += gcov-profiling.o
>>> diff --git a/core/init.c b/core/init.c
>>> index 3db9df314..03776537e 100644
>>> --- a/core/init.c
>>> +++ b/core/init.c
>>> @@ -1,4 +1,4 @@
>>> -/* Copyright 2013-2016 IBM Corp.
>>> +/* Copyright 2013-2019 IBM Corp.
>>>    *
>>>    * Licensed under the Apache License, Version 2.0 (the "License");
>>>    * you may not use this file except in compliance with the License.
>>> @@ -55,6 +55,7 @@
>>>   #include <sbe-p9.h>
>>>   #include <debug_descriptor.h>
>>>   #include <occ.h>
>>> +#include <opal-dump.h>
>>>   
>>>   enum proc_gen proc_gen;
>>>   unsigned int pcie_max_link_speed;
>>> @@ -1173,6 +1174,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>>>   	if (platform.init)
>>>   		platform.init();
>>>   
>>> +	/* init opal dump */
>>> +	opal_mpipl_init();
>>> +
>>>   	/* Read in NVRAM and set it up */
>>>   	nvram_init();
>>>   
>> 
>> So you're reserving memory and adding important OPAL memory to the MPIPL
>> ranges just in case, even if the host won't be using it?
> 
> Yes.
>    - This is to make sure each layer takes care of its reservation. OPAL should 
> take care of
>      memory reservation for OPAL/ kernel should take care of kernel memory 
> reservation.

Why though? The kernel is what has to activate the MPIPL and then put
together the dump after the reboot. Kernel knows whether or not it will
enable the feature. Why should OPAL take care of it?

>    - Also if future if we support early OPAL crashes then we can't really wait 
> for kernel to
>      reserve memory.

No, but that's not in this series.

>    - Kernel can always go and capture OPAL dump irrespective of dump method its 
> using.

I'm not sure what this means.

> 
> 
> Probably I should call opal_mpipl_init() bit late in the init path.
> 
>> 
>> It seems nicer if you would just advertise in the dt what the important
>> OPAL address ranges are, and the host can preserve them if it wants to
>> do OPAL dumps.
> 
> I don't think so.

Then most systems will just waste double memory on OPAL that they'll
never use.

Thanks,
Nick
Vasant Hegde July 2, 2019, 10:16 a.m. UTC | #4
On 06/28/2019 04:39 PM, Nicholas Piggin wrote:
> Vasant Hegde's on June 28, 2019 8:36 pm:
>> On 06/28/2019 07:03 AM, Nicholas Piggin wrote:
>>> Vasant Hegde's on June 17, 2019 3:10 am:
>>>> This patch adds support to register for OPAL dump.
>>>>     - Calculate memory required to capture OPAL dump
>>>>     - Reserve OPAL dump destination memory
>>>>     - Add OPAL dump details to MDST and MDDT table
>>>>
>>>> Signed-off-by: Vasant Hegde <hegdevasant@linux.vnet.ibm.com>
>>>> ---
>>>>    core/Makefile.inc   |   2 +-
>>>>    core/init.c         |   6 ++-
>>>>    core/opal-dump.c    | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>    include/opal-dump.h |   4 ++
>>>>    4 files changed, 153 insertions(+), 2 deletions(-)
>>>>    create mode 100644 core/opal-dump.c
>>>>
>>>> diff --git a/core/Makefile.inc b/core/Makefile.inc
>>>> index 21c12fb8d..c2c9731db 100644
>>>> --- a/core/Makefile.inc
>>>> +++ b/core/Makefile.inc
>>>> @@ -10,7 +10,7 @@ CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
>>>>    CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
>>>>    CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
>>>>    CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o
>>>> -CORE_OBJS += flash-firmware-versions.o
>>>> +CORE_OBJS += flash-firmware-versions.o opal-dump.o
>>>>    
>>>>    ifeq ($(SKIBOOT_GCOV),1)
>>>>    CORE_OBJS += gcov-profiling.o
>>>> diff --git a/core/init.c b/core/init.c
>>>> index 3db9df314..03776537e 100644
>>>> --- a/core/init.c
>>>> +++ b/core/init.c
>>>> @@ -1,4 +1,4 @@
>>>> -/* Copyright 2013-2016 IBM Corp.
>>>> +/* Copyright 2013-2019 IBM Corp.
>>>>     *
>>>>     * Licensed under the Apache License, Version 2.0 (the "License");
>>>>     * you may not use this file except in compliance with the License.
>>>> @@ -55,6 +55,7 @@
>>>>    #include <sbe-p9.h>
>>>>    #include <debug_descriptor.h>
>>>>    #include <occ.h>
>>>> +#include <opal-dump.h>
>>>>    
>>>>    enum proc_gen proc_gen;
>>>>    unsigned int pcie_max_link_speed;
>>>> @@ -1173,6 +1174,9 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt)
>>>>    	if (platform.init)
>>>>    		platform.init();
>>>>    
>>>> +	/* init opal dump */
>>>> +	opal_mpipl_init();
>>>> +
>>>>    	/* Read in NVRAM and set it up */
>>>>    	nvram_init();
>>>>    
>>>
>>> So you're reserving memory and adding important OPAL memory to the MPIPL
>>> ranges just in case, even if the host won't be using it?
>>
>> Yes.
>>     - This is to make sure each layer takes care of its reservation. OPAL should
>> take care of
>>       memory reservation for OPAL/ kernel should take care of kernel memory
>> reservation.
> 
> Why though? The kernel is what has to activate the MPIPL and then put
> together the dump after the reboot. Kernel knows whether or not it will
> enable the feature. Why should OPAL take care of it?

That's not the way it works. There is no explicit activate MPIPL concept.
Platform advertises MPIPL support during boot. In crash path it search for
source and destination table for valid  entry. If we have valid entry it will go
and preserve the content.

Here I'm doing OPAL registration so that we can capture OPAL dump. There is
no need to wait for kernel to boot to register for MPIPL.


> 
>>     - Also if future if we support early OPAL crashes then we can't really wait
>> for kernel to
>>       reserve memory.
> 
> No, but that's not in this series.
> 
>>     - Kernel can always go and capture OPAL dump irrespective of dump method its
>> using.
> 
> I'm not sure what this means.

What I'm telling is, irrespective of dump method kernel can go and generate 
OPALCORE.

> 
>>
>>
>> Probably I should call opal_mpipl_init() bit late in the init path.
>>
>>>
>>> It seems nicer if you would just advertise in the dt what the important
>>> OPAL address ranges are, and the host can preserve them if it wants to
>>> do OPAL dumps.
>>
>> I don't think so.
> 
> Then most systems will just waste double memory on OPAL that they'll
> never use.

This was discussed during early design as well. Right now we do not have any 
other better way. If we need dump then we have to reserve that much memory. 
That's the trade off.
Given that OPAL is less than 100MB I think we are good.

Bigger problem is on Linux side. Mahesh/Hari is working on CMA based allocation 
for Linux to reduce the memory reservation.


-Vasant
diff mbox series

Patch

diff --git a/core/Makefile.inc b/core/Makefile.inc
index 21c12fb8d..c2c9731db 100644
--- a/core/Makefile.inc
+++ b/core/Makefile.inc
@@ -10,7 +10,7 @@  CORE_OBJS += console-log.o ipmi.o time-utils.o pel.o pool.o errorlog.o
 CORE_OBJS += timer.o i2c.o rtc.o flash.o sensor.o ipmi-opal.o
 CORE_OBJS += flash-subpartition.o bitmap.o buddy.o pci-quirk.o powercap.o psr.o
 CORE_OBJS += pci-dt-slot.o direct-controls.o cpufeatures.o
-CORE_OBJS += flash-firmware-versions.o
+CORE_OBJS += flash-firmware-versions.o opal-dump.o
 
 ifeq ($(SKIBOOT_GCOV),1)
 CORE_OBJS += gcov-profiling.o
diff --git a/core/init.c b/core/init.c
index 3db9df314..03776537e 100644
--- a/core/init.c
+++ b/core/init.c
@@ -1,4 +1,4 @@ 
-/* Copyright 2013-2016 IBM Corp.
+/* Copyright 2013-2019 IBM Corp.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -55,6 +55,7 @@ 
 #include <sbe-p9.h>
 #include <debug_descriptor.h>
 #include <occ.h>
+#include <opal-dump.h>
 
 enum proc_gen proc_gen;
 unsigned int pcie_max_link_speed;
@@ -1173,6 +1174,9 @@  void __noreturn __nomcount main_cpu_entry(const void *fdt)
 	if (platform.init)
 		platform.init();
 
+	/* init opal dump */
+	opal_mpipl_init();
+
 	/* Read in NVRAM and set it up */
 	nvram_init();
 
diff --git a/core/opal-dump.c b/core/opal-dump.c
new file mode 100644
index 000000000..dc364fab1
--- /dev/null
+++ b/core/opal-dump.c
@@ -0,0 +1,143 @@ 
+/* Copyright 2019 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *	http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define pr_fmt(fmt)	"DUMP: " fmt
+
+#include <device.h>
+#include <mem-map.h>
+#include <mem_region.h>
+#include <mem_region-malloc.h>
+#include <opal.h>
+#include <opal-dump.h>
+#include <opal-internal.h>
+#include <skiboot.h>
+
+#include <ccan/endian/endian.h>
+
+#include "hdata/spira.h"
+
+/* Actual address of MDST and MDDT table */
+#define MDST_TABLE_BASE		(SKIBOOT_BASE + MDST_TABLE_OFF)
+#define MDDT_TABLE_BASE		(SKIBOOT_BASE + MDDT_TABLE_OFF)
+
+static struct spira_ntuple *ntuple_mdst;
+static struct spira_ntuple *ntuple_mddt;
+static struct spira_ntuple *ntuple_mdrt;
+
+static int opal_mpipl_add_entry(u8 region, u64 src, u64 dest, u64 size)
+{
+	int i, max_cnt;
+	struct mdst_table *mdst;
+	struct mddt_table *mddt;
+
+	max_cnt = MDST_TABLE_SIZE / sizeof(struct mdst_table);
+	if (ntuple_mdst->act_cnt >= max_cnt) {
+		prlog(PR_DEBUG, "MDST table is full\n");
+		return OPAL_RESOURCE;
+	}
+
+	max_cnt = MDDT_TABLE_SIZE / sizeof(struct mddt_table);
+	if (ntuple_mdst->act_cnt >= max_cnt) {
+		prlog(PR_DEBUG, "MDDT table is full\n");
+		return OPAL_RESOURCE;
+	}
+
+	/* Use relocated memory address */
+	mdst = (void *)(MDST_TABLE_BASE);
+	mddt = (void *)(MDDT_TABLE_BASE);
+
+	/* Check for duplicate entry */
+	for (i = 0; i < ntuple_mdst->act_cnt; i++) {
+		if (mdst->addr == (src | HRMOR_BIT)) {
+			prlog(PR_DEBUG,
+			      "Duplicate source address : 0x%llx", src);
+			return OPAL_PARAMETER;
+		}
+		mdst++;
+	}
+	for (i = 0; i < ntuple_mddt->act_cnt; i++) {
+		if (mddt->addr == (dest | HRMOR_BIT)) {
+			prlog(PR_DEBUG,
+			      "Duplicate destination address : 0x%llx", dest);
+			return OPAL_PARAMETER;
+		}
+		mddt++;
+	}
+
+	/* Add OPAL source address to MDST entry */
+	mdst->addr = src | HRMOR_BIT;
+	mdst->data_region = region;
+	mdst->size = size;
+	ntuple_mdst->act_cnt++;
+
+	/* Add OPAL destination address to MDDT entry */
+	mddt->addr = dest | HRMOR_BIT;
+	mddt->data_region = region;
+	mddt->size = size;
+	ntuple_mddt->act_cnt++;
+
+	prlog(PR_TRACE, "Added new entry. src : 0x%llx, dest : 0x%llx,"
+	      " size : 0x%llx\n", src, dest, size);
+	return OPAL_SUCCESS;
+}
+
+/* Register for OPAL dump.  */
+static void opal_mpipl_register(void)
+{
+	u64 opal_dest, opal_size;
+
+	/* Get OPAL runtime size */
+	if (!dt_find_property(opal_node, "opal-runtime-size")) {
+		prlog(PR_DEBUG, "Could not get OPAL runtime size\n");
+		return;
+	}
+	opal_size = dt_prop_get_u64(opal_node, "opal-runtime-size");
+	if (!opal_size) {
+		prlog(PR_DEBUG, "OPAL runtime size is zero\n");
+		return;
+	}
+
+	/* Calculate and reserve OPAL dump destination memory */
+	opal_dest = SKIBOOT_BASE + opal_size;
+	mem_reserve_fw("ibm,firmware-dump", opal_dest, opal_size);
+
+	opal_mpipl_add_entry(DUMP_REGION_OPAL_MEMORY,
+			     SKIBOOT_BASE, opal_dest, opal_size);
+}
+
+void opal_mpipl_init(void)
+{
+	void *mdst_base = (void *)MDST_TABLE_BASE;
+	void *mddt_base = (void *)MDDT_TABLE_BASE;
+	struct dt_node *dump_node;
+
+	dump_node = dt_find_by_path(opal_node, "dump");
+	if (!dump_node)
+		return;
+
+	/* Get MDST and MDDT ntuple from SPIRAH */
+	ntuple_mdst = &(spirah.ntuples.mdump_src);
+	ntuple_mddt = &(spirah.ntuples.mdump_dst);
+	ntuple_mdrt = &(spirah.ntuples.mdump_res);
+
+	/* Clear MDST and MDDT table */
+	memset(mdst_base, 0, MDST_TABLE_SIZE);
+	ntuple_mdst->act_cnt = 0;
+	memset(mddt_base, 0, MDDT_TABLE_SIZE);
+	ntuple_mddt->act_cnt = 0;
+
+	opal_mpipl_register();
+}
diff --git a/include/opal-dump.h b/include/opal-dump.h
index ccf49f953..0a86dcb1b 100644
--- a/include/opal-dump.h
+++ b/include/opal-dump.h
@@ -33,6 +33,7 @@ 
 
 #define DUMP_REGION_CONSOLE	0x01
 #define DUMP_REGION_HBRT_LOG	0x02
+#define DUMP_REGION_OPAL_MEMORY	0x03
 
 /* Mainstore memory to be captured by FSP SYSDUMP */
 #define DUMP_TYPE_SYSDUMP		0xF5
@@ -120,4 +121,7 @@  struct proc_reg_data {
 	uint64_t reg_val;
 } __packed;
 
+/* init opal dump */
+extern void opal_mpipl_init(void);
+
 #endif	/* __OPAL_DUMP_H */