Patchwork [3/3] support readonly memory feature in qemu

login
register
mail settings
Submitter Liu Sheng
Date Sept. 7, 2012, 8:26 a.m.
Message ID <1347006389-543-4-git-send-email-liusheng@linux.vnet.ibm.com>
Download mbox | patch
Permalink /patch/182342/
State New
Headers show

Comments

Liu Sheng - Sept. 7, 2012, 8:26 a.m.
as readonly memory is support in kvm, this patch supports this feature
in qemu, mainly pluging the memory region with KVM_MEM_READONLY flag
to kvm.

this can be tested by a kernel module from the author of kvm readonly
memory slot:

static int rom_tester_probe(struct pci_dev *dev, const struct pci_device_id *id)
{
	struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
	char buf[6];
	size_t rom_size;
	char * __iomem map;
	int i;

	if (res->flags & (IORESOURCE_ROM_SHADOW | IORESOURCE_ROM_COPY |
	      IORESOURCE_ROM_BIOS_COPY)) {
		dev_printk(KERN_INFO, &dev->dev, "skip ROM COPY.\n");
		return 0;
	}

	if (res->flags &
			(IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY))
	dev_printk(KERN_INFO, &dev->dev, "rom tester\n");

	if (pci_enable_rom(dev)) {
		dev_printk(KERN_INFO, &dev->dev, "do not found Rom\n");
		goto exit;
	}

	map = pci_map_rom(dev, &rom_size);
	if (!map) {
		dev_printk(KERN_INFO, &dev->dev, "map rom fail.\n");
		goto disable_exit;
	}

	dev_printk(KERN_INFO, &dev->dev, "Rom map: %p [size: %lx], phsyc:%llx.\n",
		   map, rom_size, pci_resource_start(dev, PCI_ROM_RESOURCE));

	if (rom_size < 6) {
		printk("map size < 6.\n");
		goto unmap_exit;
	}

	printk("The first 6 bytes:\n");
	for (i = 0; i < 6; i++) {
		buf[i] = map[i];
		printk("%x ", buf[i]);
	}
	printk("\n\n");

	memcpy(map, "KVMKVM", 6);
	if (!memcmp(map, "KVMKVM", 6)) {
		printk("Rom Test: fail.\n");
		goto unmap_exit;
	}

	for (i = 0; i < 6; i++)
		if (buf[i] != ((char *)map)[i]) {
			printk("The %d byte is changed: %x -> %x.\n",
			       i, buf[i], map[i]);
			printk("Rom Test: fail.\n");
			goto unmap_exit;
		}

	printk("Rom Test: Okay.\n");

unmap_exit:
	pci_unmap_rom(dev, map);
disable_exit:
	pci_disable_rom(dev);
exit:
	return 0;
}

static DEFINE_PCI_DEVICE_TABLE(rom_tester_tbl) = {
	{ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID)},

	{0,}						/* 0 terminated list. */
};
MODULE_DEVICE_TABLE(pci, rom_tester_tbl);

static struct pci_driver rom_tester = {
	.name		= "pci-rom-tester",
	.id_table	= rom_tester_tbl,
	.probe		= rom_tester_probe,
};

static int __init pci_rom_test_init(void)
{
	int rc;

	rc = pci_register_driver(&rom_tester);
	if (rc)
		return rc;

	return 0;
}

static void __exit pci_rom_test_exit(void)
{
	pci_unregister_driver(&rom_tester);
}

module_init(pci_rom_test_init);
module_exit(pci_rom_test_exit);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>");

we test it with the rom of Intel 82540EM Gigabit Ethernet Controller.
0. start qemu:
    qemu-system-x86_64 -enable-kvm -m 1G -smp 2 -hda fedora16.qcow2 \
        -nic nic,model=e1000,vlan=1,macadrr=52:00:12:34:56 \
        -net user,vlan=1
1. unbind the device:
    echo "0000:00:03.0" > /sys/bus/pci/devices/0000\:00\:03.0/driver/unbind
2. install the test kernel module, here we name it write_rom:
    modprobe write_rom
3. print dmesg to verify the result is ok or fail:
    dmesg
4. remove the test kernel module.
    rmmod write_rom
5. rebind the device to its driver, test if the nic still works:
    echo "0000:00:03.0" > /sys/bus/pci/drivers/e1000/bind
    open firefox and try some web page.

when I use kvm without readonly memory slot, in step 2 it reports:
Rom Test: fail. this means we can write to the memory region of a rom,
which is quite not safe for the guest and host.

when I use kvm with readonly memory slot, in step 2 it reports:
    Rom Test: Okay.
and the error message prints out in host console:
    Guest is writing readonly memory, phys_addr: febc0000, len:4, data[0]:K,
    skip it
    Guest is writing readonly memory, phys_addr: febc0000, len:2, data[0]:V,
    skip it
this is what we write "KVMKVM".
this means write operation is not successful, and return back to qemu from kvm.
thus we can make the rom real rom.

Signed-off-by: Liu Sheng <liusheng@linux.vnet.ibm.com>
---
 kvm-all.c |   47 ++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 36 insertions(+), 11 deletions(-)
Jan Kiszka - Sept. 7, 2012, 8:50 a.m.
On 2012-09-07 10:26, Liu Sheng wrote:
> as readonly memory is support in kvm, this patch supports this feature
> in qemu, mainly pluging the memory region with KVM_MEM_READONLY flag
> to kvm.
> 
> this can be tested by a kernel module from the author of kvm readonly
> memory slot:
> 
> static int rom_tester_probe(struct pci_dev *dev, const struct pci_device_id *id)
> {
> 	struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
> 	char buf[6];
> 	size_t rom_size;
> 	char * __iomem map;
> 	int i;
> 
> 	if (res->flags & (IORESOURCE_ROM_SHADOW | IORESOURCE_ROM_COPY |
> 	      IORESOURCE_ROM_BIOS_COPY)) {
> 		dev_printk(KERN_INFO, &dev->dev, "skip ROM COPY.\n");
> 		return 0;
> 	}
> 
> 	if (res->flags &
> 			(IORESOURCE_ROM_COPY | IORESOURCE_ROM_BIOS_COPY))
> 	dev_printk(KERN_INFO, &dev->dev, "rom tester\n");
> 
> 	if (pci_enable_rom(dev)) {
> 		dev_printk(KERN_INFO, &dev->dev, "do not found Rom\n");
> 		goto exit;
> 	}
> 
> 	map = pci_map_rom(dev, &rom_size);
> 	if (!map) {
> 		dev_printk(KERN_INFO, &dev->dev, "map rom fail.\n");
> 		goto disable_exit;
> 	}
> 
> 	dev_printk(KERN_INFO, &dev->dev, "Rom map: %p [size: %lx], phsyc:%llx.\n",
> 		   map, rom_size, pci_resource_start(dev, PCI_ROM_RESOURCE));
> 
> 	if (rom_size < 6) {
> 		printk("map size < 6.\n");
> 		goto unmap_exit;
> 	}
> 
> 	printk("The first 6 bytes:\n");
> 	for (i = 0; i < 6; i++) {
> 		buf[i] = map[i];
> 		printk("%x ", buf[i]);
> 	}
> 	printk("\n\n");
> 
> 	memcpy(map, "KVMKVM", 6);
> 	if (!memcmp(map, "KVMKVM", 6)) {
> 		printk("Rom Test: fail.\n");
> 		goto unmap_exit;
> 	}
> 
> 	for (i = 0; i < 6; i++)
> 		if (buf[i] != ((char *)map)[i]) {
> 			printk("The %d byte is changed: %x -> %x.\n",
> 			       i, buf[i], map[i]);
> 			printk("Rom Test: fail.\n");
> 			goto unmap_exit;
> 		}
> 
> 	printk("Rom Test: Okay.\n");
> 
> unmap_exit:
> 	pci_unmap_rom(dev, map);
> disable_exit:
> 	pci_disable_rom(dev);
> exit:
> 	return 0;
> }
> 
> static DEFINE_PCI_DEVICE_TABLE(rom_tester_tbl) = {
> 	{ PCI_DEVICE(PCI_ANY_ID, PCI_ANY_ID)},
> 
> 	{0,}						/* 0 terminated list. */
> };
> MODULE_DEVICE_TABLE(pci, rom_tester_tbl);
> 
> static struct pci_driver rom_tester = {
> 	.name		= "pci-rom-tester",
> 	.id_table	= rom_tester_tbl,
> 	.probe		= rom_tester_probe,
> };
> 
> static int __init pci_rom_test_init(void)
> {
> 	int rc;
> 
> 	rc = pci_register_driver(&rom_tester);
> 	if (rc)
> 		return rc;
> 
> 	return 0;
> }
> 
> static void __exit pci_rom_test_exit(void)
> {
> 	pci_unregister_driver(&rom_tester);
> }
> 
> module_init(pci_rom_test_init);
> module_exit(pci_rom_test_exit);
> 
> MODULE_LICENSE("GPL");
> MODULE_AUTHOR("Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>");
> 
> we test it with the rom of Intel 82540EM Gigabit Ethernet Controller.
> 0. start qemu:
>     qemu-system-x86_64 -enable-kvm -m 1G -smp 2 -hda fedora16.qcow2 \
>         -nic nic,model=e1000,vlan=1,macadrr=52:00:12:34:56 \
>         -net user,vlan=1
> 1. unbind the device:
>     echo "0000:00:03.0" > /sys/bus/pci/devices/0000\:00\:03.0/driver/unbind
> 2. install the test kernel module, here we name it write_rom:
>     modprobe write_rom
> 3. print dmesg to verify the result is ok or fail:
>     dmesg
> 4. remove the test kernel module.
>     rmmod write_rom
> 5. rebind the device to its driver, test if the nic still works:
>     echo "0000:00:03.0" > /sys/bus/pci/drivers/e1000/bind
>     open firefox and try some web page.
> 
> when I use kvm without readonly memory slot, in step 2 it reports:
> Rom Test: fail. this means we can write to the memory region of a rom,
> which is quite not safe for the guest and host.
> 
> when I use kvm with readonly memory slot, in step 2 it reports:
>     Rom Test: Okay.
> and the error message prints out in host console:
>     Guest is writing readonly memory, phys_addr: febc0000, len:4, data[0]:K,
>     skip it
>     Guest is writing readonly memory, phys_addr: febc0000, len:2, data[0]:V,
>     skip it
> this is what we write "KVMKVM".
> this means write operation is not successful, and return back to qemu from kvm.
> thus we can make the rom real rom.
> 
> Signed-off-by: Liu Sheng <liusheng@linux.vnet.ibm.com>
> ---
>  kvm-all.c |   47 ++++++++++++++++++++++++++++++++++++-----------
>  1 files changed, 36 insertions(+), 11 deletions(-)
> 
> diff --git a/kvm-all.c b/kvm-all.c
> index badf1d8..1b66183 100644
> --- a/kvm-all.c
> +++ b/kvm-all.c
> @@ -97,6 +97,9 @@ struct KVMState
>      QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
>      bool direct_msi;
>  #endif
> +#ifdef KVM_CAP_READONLY_MEM
> +    int readonly_mem;
> +#endif

See comment on patch 2: Make sure that KVM_CAP_READONLY_MEM is always
defined and remove all these #ifdefs.

>  };
>  
>  KVMState *kvm_state;
> @@ -261,9 +264,18 @@ err:
>   * dirty pages logging control
>   */
>  
> -static int kvm_mem_flags(KVMState *s, bool log_dirty)
> +static int kvm_mem_flags(KVMState *s, MemoryRegion *mr)
>  {
> -    return log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
> +    int flags;
> +    bool log_dirty = memory_region_is_logging(mr);
> +    bool readonly = mr->readonly;

I suppose you want to add some memory_region_is_readonly(). And both
bools can be inlined into the checks below.

> +    flags = log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
> +#ifdef KVM_CAP_READONLY_MEM
> +    if (s->readonly_mem) {
> +        flags |= readonly ? KVM_MEM_READONLY : 0;
> +    }
> +#endif
> +    return flags;
>  }
>  
>  static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
> @@ -274,7 +286,7 @@ static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
>  
>      old_flags = mem->flags;
>  
> -    flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty);
> +    flags = (mem->flags & ~mask) | (log_dirty ? mask : 0);
>      mem->flags = flags;
>  
>      /* If nothing changed effectively, no need to issue ioctl */
> @@ -622,7 +634,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>              mem->memory_size = old.memory_size;
>              mem->start_addr = old.start_addr;
>              mem->ram = old.ram;
> -            mem->flags = kvm_mem_flags(s, log_dirty);
> +            mem->flags = kvm_mem_flags(s, mr);
>  
>              err = kvm_set_user_memory_region(s, mem);
>              if (err) {
> @@ -643,7 +655,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>              mem->memory_size = start_addr - old.start_addr;
>              mem->start_addr = old.start_addr;
>              mem->ram = old.ram;
> -            mem->flags =  kvm_mem_flags(s, log_dirty);
> +            mem->flags =  kvm_mem_flags(s, mr);
>  
>              err = kvm_set_user_memory_region(s, mem);
>              if (err) {
> @@ -667,7 +679,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>              size_delta = mem->start_addr - old.start_addr;
>              mem->memory_size = old.memory_size - size_delta;
>              mem->ram = old.ram + size_delta;
> -            mem->flags = kvm_mem_flags(s, log_dirty);
> +            mem->flags = kvm_mem_flags(s, mr);
>  
>              err = kvm_set_user_memory_region(s, mem);
>              if (err) {
> @@ -689,7 +701,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
>      mem->memory_size = size;
>      mem->start_addr = start_addr;
>      mem->ram = ram;
> -    mem->flags = kvm_mem_flags(s, log_dirty);
> +    mem->flags = kvm_mem_flags(s, mr);
>  
>      err = kvm_set_user_memory_region(s, mem);
>      if (err) {
> @@ -1393,6 +1405,10 @@ int kvm_init(void)
>      s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
>  #endif
>  
> +#ifdef KVM_CAP_READONLY_MEM
> +    s->readonly_mem = kvm_check_extension(s, KVM_CAP_READONLY_MEM);
> +#endif
> +
>      s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
>  
>      ret = kvm_arch_init(s);
> @@ -1607,10 +1623,19 @@ int kvm_cpu_exec(CPUArchState *env)
>              break;
>          case KVM_EXIT_MMIO:
>              DPRINTF("handle_mmio\n");
> -            cpu_physical_memory_rw(run->mmio.phys_addr,
> -                                   run->mmio.data,
> -                                   run->mmio.len,
> -                                   run->mmio.is_write);
> +            if (run->mmio.write_readonly_mem) {
> +                fprintf(stderr, "Guest is writing readonly memory, "
> +                    "phys_addr: %llx, len:%x, data[0]:%c, skip it.\n",
> +                    run->mmio.phys_addr,
> +                    run->mmio.len,
> +                    run->mmio.data[0]);

This should be DPRINTF.

> +            } else {
> +                cpu_physical_memory_rw(run->mmio.phys_addr,
> +                                       run->mmio.data,
> +                                       run->mmio.len,
> +                                       run->mmio.is_write);
> +            }
> +
>              ret = 0;
>              break;
>          case KVM_EXIT_IRQ_WINDOW_OPEN:
> 

Great to see this feature for KVM finally! I'm just afraid that this
will finally break good old isapc - due to broken Seabios. KVM used to
"unbreak" it as it didn't respect write protections. ;)

Jan
Avi Kivity - Sept. 9, 2012, 3:42 p.m.
On 09/07/2012 11:26 AM, Liu Sheng wrote:
> as readonly memory is support in kvm, this patch supports this feature
> in qemu, mainly pluging the memory region with KVM_MEM_READONLY flag
> to kvm.
> 
> @@ -1607,10 +1623,19 @@ int kvm_cpu_exec(CPUArchState *env)
>              break;
>          case KVM_EXIT_MMIO:
>              DPRINTF("handle_mmio\n");
> -            cpu_physical_memory_rw(run->mmio.phys_addr,
> -                                   run->mmio.data,
> -                                   run->mmio.len,
> -                                   run->mmio.is_write);
> +            if (run->mmio.write_readonly_mem) {
> +                fprintf(stderr, "Guest is writing readonly memory, "
> +                    "phys_addr: %llx, len:%x, data[0]:%c, skip it.\n",
> +                    run->mmio.phys_addr,
> +                    run->mmio.len,
> +                    run->mmio.data[0]);
> +            } else {
> +                cpu_physical_memory_rw(run->mmio.phys_addr,
> +                                       run->mmio.data,
> +                                       run->mmio.len,
> +                                       run->mmio.is_write);
> +            }
> +
>              ret = 0;

I don't think this is needed.  A cpu_physical_memory_rw() to a ROM ought
to be ignored (and btw, it may not be a ROM - it could be a ROM/Device,
in which case it ought not to be ignored).
Avi Kivity - Sept. 9, 2012, 3:45 p.m.
On 09/07/2012 11:50 AM, Jan Kiszka wrote:
> 
>> +            } else {
>> +                cpu_physical_memory_rw(run->mmio.phys_addr,
>> +                                       run->mmio.data,
>> +                                       run->mmio.len,
>> +                                       run->mmio.is_write);
>> +            }
>> +
>>              ret = 0;
>>              break;
>>          case KVM_EXIT_IRQ_WINDOW_OPEN:
>> 
> 
> Great to see this feature for KVM finally! I'm just afraid that this
> will finally break good old isapc - due to broken Seabios. KVM used to
> "unbreak" it as it didn't respect write protections. ;)

Can you describe the breakage?
Jan Kiszka - Sept. 10, 2012, 9:25 a.m.
On 2012-09-09 17:45, Avi Kivity wrote:
> On 09/07/2012 11:50 AM, Jan Kiszka wrote:
>>
>>> +            } else {
>>> +                cpu_physical_memory_rw(run->mmio.phys_addr,
>>> +                                       run->mmio.data,
>>> +                                       run->mmio.len,
>>> +                                       run->mmio.is_write);
>>> +            }
>>> +
>>>              ret = 0;
>>>              break;
>>>          case KVM_EXIT_IRQ_WINDOW_OPEN:
>>>
>>
>> Great to see this feature for KVM finally! I'm just afraid that this
>> will finally break good old isapc - due to broken Seabios. KVM used to
>> "unbreak" it as it didn't respect write protections. ;)
> 
> Can you describe the breakage?

Try "qemu -machine isapc [-enable-kvm]". Seabios is writing to some
read-only marked area. Don't recall where precisely.

Jan
Kevin O'Connor - Sept. 11, 2012, 3:02 a.m.
On Mon, Sep 10, 2012 at 11:25:38AM +0200, Jan Kiszka wrote:
> On 2012-09-09 17:45, Avi Kivity wrote:
> > On 09/07/2012 11:50 AM, Jan Kiszka wrote:
> >>
> >>> +            } else {
> >>> +                cpu_physical_memory_rw(run->mmio.phys_addr,
> >>> +                                       run->mmio.data,
> >>> +                                       run->mmio.len,
> >>> +                                       run->mmio.is_write);
> >>> +            }
> >>> +
> >>>              ret = 0;
> >>>              break;
> >>>          case KVM_EXIT_IRQ_WINDOW_OPEN:
> >>>
> >>
> >> Great to see this feature for KVM finally! I'm just afraid that this
> >> will finally break good old isapc - due to broken Seabios. KVM used to
> >> "unbreak" it as it didn't respect write protections. ;)
> > 
> > Can you describe the breakage?
> 
> Try "qemu -machine isapc [-enable-kvm]". Seabios is writing to some
> read-only marked area. Don't recall where precisely.

On boot, QEMU marks the memory at 0xc0000-0x100000 as read-only.
SeaBIOS then makes the area read-write, performs its init, and then
makes portions of it read-only before launching the OS.

The registers SeaBIOS uses to make the memory read-write are on a PCI
device.  On isapc, this device is not reachable, and thus SeaBIOS
can't make the memory writable.

The easiest way to fix this is to change QEMU to boot with the area
read-write.  There's no real gain in booting with the memory read-only
as the first thing SeaBIOS does is make it read-write.

-Kevin
Jan Kiszka - Sept. 11, 2012, 3:31 p.m.
On 2012-09-11 05:02, Kevin O'Connor wrote:
> On Mon, Sep 10, 2012 at 11:25:38AM +0200, Jan Kiszka wrote:
>> On 2012-09-09 17:45, Avi Kivity wrote:
>>> On 09/07/2012 11:50 AM, Jan Kiszka wrote:
>>>>
>>>>> +            } else {
>>>>> +                cpu_physical_memory_rw(run->mmio.phys_addr,
>>>>> +                                       run->mmio.data,
>>>>> +                                       run->mmio.len,
>>>>> +                                       run->mmio.is_write);
>>>>> +            }
>>>>> +
>>>>>              ret = 0;
>>>>>              break;
>>>>>          case KVM_EXIT_IRQ_WINDOW_OPEN:
>>>>>
>>>>
>>>> Great to see this feature for KVM finally! I'm just afraid that this
>>>> will finally break good old isapc - due to broken Seabios. KVM used to
>>>> "unbreak" it as it didn't respect write protections. ;)
>>>
>>> Can you describe the breakage?
>>
>> Try "qemu -machine isapc [-enable-kvm]". Seabios is writing to some
>> read-only marked area. Don't recall where precisely.
> 
> On boot, QEMU marks the memory at 0xc0000-0x100000 as read-only.

Only the remapped BIOS ROM (0xe0000-0xfffff) is read-only. And that's
where SeaBIOS apparently wants to write to.

> SeaBIOS then makes the area read-write, performs its init, and then
> makes portions of it read-only before launching the OS.

What does it do if there is no PAM? Nothing?

> 
> The registers SeaBIOS uses to make the memory read-write are on a PCI
> device.  On isapc, this device is not reachable, and thus SeaBIOS
> can't make the memory writable.

On isapc, this device and all the PAM does not even exist.

> 
> The easiest way to fix this is to change QEMU to boot with the area
> read-write.  There's no real gain in booting with the memory read-only
> as the first thing SeaBIOS does is make it read-write.

Considering SeaBIOS, that is true. If Seabios depends inherently on
shadow ROMs and as we have no real chipset for isapc to control
shadowing behavior, that will likely be the best option. Can have a look.

Jan
Anthony Liguori - Sept. 11, 2012, 4:15 p.m.
Jan Kiszka <jan.kiszka@siemens.com> writes:

> On 2012-09-11 05:02, Kevin O'Connor wrote:
>> On Mon, Sep 10, 2012 at 11:25:38AM +0200, Jan Kiszka wrote:
>>> On 2012-09-09 17:45, Avi Kivity wrote:
>>>> On 09/07/2012 11:50 AM, Jan Kiszka wrote:
>>>>>
>>>>>> +            } else {
>>>>>> +                cpu_physical_memory_rw(run->mmio.phys_addr,
>>>>>> +                                       run->mmio.data,
>>>>>> +                                       run->mmio.len,
>>>>>> +                                       run->mmio.is_write);
>>>>>> +            }
>>>>>> +
>>>>>>              ret = 0;
>>>>>>              break;
>>>>>>          case KVM_EXIT_IRQ_WINDOW_OPEN:
>>>>>>
>>>>>
>>>>> Great to see this feature for KVM finally! I'm just afraid that this
>>>>> will finally break good old isapc - due to broken Seabios. KVM used to
>>>>> "unbreak" it as it didn't respect write protections. ;)
>>>>
>>>> Can you describe the breakage?
>>>
>>> Try "qemu -machine isapc [-enable-kvm]". Seabios is writing to some
>>> read-only marked area. Don't recall where precisely.
>> 
>> On boot, QEMU marks the memory at 0xc0000-0x100000 as read-only.
>
> Only the remapped BIOS ROM (0xe0000-0xfffff) is read-only. And that's
> where SeaBIOS apparently wants to write to.
>
>> SeaBIOS then makes the area read-write, performs its init, and then
>> makes portions of it read-only before launching the OS.
>
> What does it do if there is no PAM? Nothing?
>
>> 
>> The registers SeaBIOS uses to make the memory read-write are on a PCI
>> device.  On isapc, this device is not reachable, and thus SeaBIOS
>> can't make the memory writable.
>
> On isapc, this device and all the PAM does not even exist.
>
>> 
>> The easiest way to fix this is to change QEMU to boot with the area
>> read-write.  There's no real gain in booting with the memory read-only
>> as the first thing SeaBIOS does is make it read-write.
>
> Considering SeaBIOS, that is true. If Seabios depends inherently on
> shadow ROMs and as we have no real chipset for isapc to control
> shadowing behavior, that will likely be the best option. Can have a
> look.

I've never really understood this.

Why do we need ISAPC?  An ISA-only OS would still be okay on a system
with an i440fx and no PCI devices, no?

I think that makes a lot more sense because then SeaBIOS doesn't have to
deal with the notion of ISAPC.

Regards,

Anthony Liguori


>
> Jan
>
> -- 
> Siemens AG, Corporate Technology, CT RTC ITP SDP-DE
> Corporate Competence Center Embedded Linux
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka - Sept. 11, 2012, 4:33 p.m.
On 2012-09-11 18:15, Anthony Liguori wrote:
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> 
>> On 2012-09-11 05:02, Kevin O'Connor wrote:
>>> On Mon, Sep 10, 2012 at 11:25:38AM +0200, Jan Kiszka wrote:
>>>> On 2012-09-09 17:45, Avi Kivity wrote:
>>>>> On 09/07/2012 11:50 AM, Jan Kiszka wrote:
>>>>>>
>>>>>>> +            } else {
>>>>>>> +                cpu_physical_memory_rw(run->mmio.phys_addr,
>>>>>>> +                                       run->mmio.data,
>>>>>>> +                                       run->mmio.len,
>>>>>>> +                                       run->mmio.is_write);
>>>>>>> +            }
>>>>>>> +
>>>>>>>              ret = 0;
>>>>>>>              break;
>>>>>>>          case KVM_EXIT_IRQ_WINDOW_OPEN:
>>>>>>>
>>>>>>
>>>>>> Great to see this feature for KVM finally! I'm just afraid that this
>>>>>> will finally break good old isapc - due to broken Seabios. KVM used to
>>>>>> "unbreak" it as it didn't respect write protections. ;)
>>>>>
>>>>> Can you describe the breakage?
>>>>
>>>> Try "qemu -machine isapc [-enable-kvm]". Seabios is writing to some
>>>> read-only marked area. Don't recall where precisely.
>>>
>>> On boot, QEMU marks the memory at 0xc0000-0x100000 as read-only.
>>
>> Only the remapped BIOS ROM (0xe0000-0xfffff) is read-only. And that's
>> where SeaBIOS apparently wants to write to.
>>
>>> SeaBIOS then makes the area read-write, performs its init, and then
>>> makes portions of it read-only before launching the OS.
>>
>> What does it do if there is no PAM? Nothing?
>>
>>>
>>> The registers SeaBIOS uses to make the memory read-write are on a PCI
>>> device.  On isapc, this device is not reachable, and thus SeaBIOS
>>> can't make the memory writable.
>>
>> On isapc, this device and all the PAM does not even exist.
>>
>>>
>>> The easiest way to fix this is to change QEMU to boot with the area
>>> read-write.  There's no real gain in booting with the memory read-only
>>> as the first thing SeaBIOS does is make it read-write.
>>
>> Considering SeaBIOS, that is true. If Seabios depends inherently on
>> shadow ROMs and as we have no real chipset for isapc to control
>> shadowing behavior, that will likely be the best option. Can have a
>> look.
> 
> I've never really understood this.
> 
> Why do we need ISAPC?  An ISA-only OS would still be okay on a system
> with an i440fx and no PCI devices, no?

For OSes that were not aware of newer devices, there should be indeed no
difference. But for those that were, the behaviour can be different than
what you want to recreate. I suppose that was the reason for
creating/keeping this variant.

> I think that makes a lot more sense because then SeaBIOS doesn't have to
> deal with the notion of ISAPC.

How much difference does it actually today? Was it really ever written
for such a use case or does it now work by chance?

Jan
Kevin O'Connor - Sept. 12, 2012, 12:01 a.m.
On Tue, Sep 11, 2012 at 11:15:50AM -0500, Anthony Liguori wrote:
> Jan Kiszka <jan.kiszka@siemens.com> writes:
> > On 2012-09-11 05:02, Kevin O'Connor wrote:
> >> The easiest way to fix this is to change QEMU to boot with the area
> >> read-write.  There's no real gain in booting with the memory read-only
> >> as the first thing SeaBIOS does is make it read-write.
> >
> > Considering SeaBIOS, that is true. If Seabios depends inherently on
> > shadow ROMs and as we have no real chipset for isapc to control
> > shadowing behavior, that will likely be the best option. Can have a
> > look.
> 
> I've never really understood this.
> 
> Why do we need ISAPC?  An ISA-only OS would still be okay on a system
> with an i440fx and no PCI devices, no?
> 
> I think that makes a lot more sense because then SeaBIOS doesn't have to
> deal with the notion of ISAPC.

Regardless of whether or not there is a need to support an isapc
machine, I think it would still be preferable to boot with the
0xc0000-0x100000 memory in read-write mode.  The SeaBIOS code to make
that memory read-write without being able to modify any of that ram is
awkward.

-Kevin

Patch

diff --git a/kvm-all.c b/kvm-all.c
index badf1d8..1b66183 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -97,6 +97,9 @@  struct KVMState
     QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
     bool direct_msi;
 #endif
+#ifdef KVM_CAP_READONLY_MEM
+    int readonly_mem;
+#endif
 };
 
 KVMState *kvm_state;
@@ -261,9 +264,18 @@  err:
  * dirty pages logging control
  */
 
-static int kvm_mem_flags(KVMState *s, bool log_dirty)
+static int kvm_mem_flags(KVMState *s, MemoryRegion *mr)
 {
-    return log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+    int flags;
+    bool log_dirty = memory_region_is_logging(mr);
+    bool readonly = mr->readonly;
+    flags = log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+#ifdef KVM_CAP_READONLY_MEM
+    if (s->readonly_mem) {
+        flags |= readonly ? KVM_MEM_READONLY : 0;
+    }
+#endif
+    return flags;
 }
 
 static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
@@ -274,7 +286,7 @@  static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
 
     old_flags = mem->flags;
 
-    flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty);
+    flags = (mem->flags & ~mask) | (log_dirty ? mask : 0);
     mem->flags = flags;
 
     /* If nothing changed effectively, no need to issue ioctl */
@@ -622,7 +634,7 @@  static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
             mem->memory_size = old.memory_size;
             mem->start_addr = old.start_addr;
             mem->ram = old.ram;
-            mem->flags = kvm_mem_flags(s, log_dirty);
+            mem->flags = kvm_mem_flags(s, mr);
 
             err = kvm_set_user_memory_region(s, mem);
             if (err) {
@@ -643,7 +655,7 @@  static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
             mem->memory_size = start_addr - old.start_addr;
             mem->start_addr = old.start_addr;
             mem->ram = old.ram;
-            mem->flags =  kvm_mem_flags(s, log_dirty);
+            mem->flags =  kvm_mem_flags(s, mr);
 
             err = kvm_set_user_memory_region(s, mem);
             if (err) {
@@ -667,7 +679,7 @@  static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
             size_delta = mem->start_addr - old.start_addr;
             mem->memory_size = old.memory_size - size_delta;
             mem->ram = old.ram + size_delta;
-            mem->flags = kvm_mem_flags(s, log_dirty);
+            mem->flags = kvm_mem_flags(s, mr);
 
             err = kvm_set_user_memory_region(s, mem);
             if (err) {
@@ -689,7 +701,7 @@  static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
     mem->memory_size = size;
     mem->start_addr = start_addr;
     mem->ram = ram;
-    mem->flags = kvm_mem_flags(s, log_dirty);
+    mem->flags = kvm_mem_flags(s, mr);
 
     err = kvm_set_user_memory_region(s, mem);
     if (err) {
@@ -1393,6 +1405,10 @@  int kvm_init(void)
     s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
 #endif
 
+#ifdef KVM_CAP_READONLY_MEM
+    s->readonly_mem = kvm_check_extension(s, KVM_CAP_READONLY_MEM);
+#endif
+
     s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
 
     ret = kvm_arch_init(s);
@@ -1607,10 +1623,19 @@  int kvm_cpu_exec(CPUArchState *env)
             break;
         case KVM_EXIT_MMIO:
             DPRINTF("handle_mmio\n");
-            cpu_physical_memory_rw(run->mmio.phys_addr,
-                                   run->mmio.data,
-                                   run->mmio.len,
-                                   run->mmio.is_write);
+            if (run->mmio.write_readonly_mem) {
+                fprintf(stderr, "Guest is writing readonly memory, "
+                    "phys_addr: %llx, len:%x, data[0]:%c, skip it.\n",
+                    run->mmio.phys_addr,
+                    run->mmio.len,
+                    run->mmio.data[0]);
+            } else {
+                cpu_physical_memory_rw(run->mmio.phys_addr,
+                                       run->mmio.data,
+                                       run->mmio.len,
+                                       run->mmio.is_write);
+            }
+
             ret = 0;
             break;
         case KVM_EXIT_IRQ_WINDOW_OPEN: