diff mbox

[v8,3/3] Add optionrom compatible with fw_cfg DMA version

Message ID 1463000807-18015-4-git-send-email-rjones@redhat.com
State New
Headers show

Commit Message

Richard W.M. Jones May 11, 2016, 9:06 p.m. UTC
From: Marc Marí <markmb@redhat.com>

This optionrom is based on linuxboot.S.

Signed-off-by: Marc Marí <markmb@redhat.com>
Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
---
 .gitignore                        |   4 +
 hw/i386/pc.c                      |  10 +-
 hw/nvram/fw_cfg.c                 |   2 +-
 include/hw/nvram/fw_cfg.h         |   1 +
 pc-bios/optionrom/Makefile        |  13 +-
 pc-bios/optionrom/linuxboot_dma.c | 291 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 316 insertions(+), 5 deletions(-)
 create mode 100644 pc-bios/optionrom/linuxboot_dma.c

Comments

Paolo Bonzini May 23, 2016, 3:05 p.m. UTC | #1
On 11/05/2016 23:06, Richard W.M. Jones wrote:
> From: Marc Marí <markmb@redhat.com>
> 
> This optionrom is based on linuxboot.S.
> 
> Signed-off-by: Marc Marí <markmb@redhat.com>
> Signed-off-by: Richard W.M. Jones <rjones@redhat.com>

Hmm, I hadn't noticed that you added -m16.  That breaks on even
not-too-old GCC (such as 4.8 on RHEL/CentOS 7). Can you add some
Makefile logic to switch between "-m16" and a "-include code16gcc.h"
that only contains 'asm(".code16gcc");'?

Also, you need to add linuxboot_dma.bin after linuxboot.bin in the
main Makefile.

Thanks,

Paolo

> ---
>  .gitignore                        |   4 +
>  hw/i386/pc.c                      |  10 +-
>  hw/nvram/fw_cfg.c                 |   2 +-
>  include/hw/nvram/fw_cfg.h         |   1 +
>  pc-bios/optionrom/Makefile        |  13 +-
>  pc-bios/optionrom/linuxboot_dma.c | 291 ++++++++++++++++++++++++++++++++++++++
>  6 files changed, 316 insertions(+), 5 deletions(-)
>  create mode 100644 pc-bios/optionrom/linuxboot_dma.c
> 
> diff --git a/.gitignore b/.gitignore
> index 88a80ff..101d1e0 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -94,6 +94,10 @@
>  /pc-bios/optionrom/linuxboot.bin
>  /pc-bios/optionrom/linuxboot.raw
>  /pc-bios/optionrom/linuxboot.img
> +/pc-bios/optionrom/linuxboot_dma.asm
> +/pc-bios/optionrom/linuxboot_dma.bin
> +/pc-bios/optionrom/linuxboot_dma.raw
> +/pc-bios/optionrom/linuxboot_dma.img
>  /pc-bios/optionrom/multiboot.asm
>  /pc-bios/optionrom/multiboot.bin
>  /pc-bios/optionrom/multiboot.raw
> diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> index 99437e0..098d571 100644
> --- a/hw/i386/pc.c
> +++ b/hw/i386/pc.c
> @@ -999,8 +999,13 @@ static void load_linux(PCMachineState *pcms,
>      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
>      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
>  
> -    option_rom[nb_option_roms].name = "linuxboot.bin";
> -    option_rom[nb_option_roms].bootindex = 0;
> +    if (fw_cfg_dma_enabled(fw_cfg)) {
> +        option_rom[nb_option_roms].name = "linuxboot_dma.bin";
> +        option_rom[nb_option_roms].bootindex = 0;
> +    } else {
> +        option_rom[nb_option_roms].name = "linuxboot.bin";
> +        option_rom[nb_option_roms].bootindex = 0;
> +    }
>      nb_option_roms++;
>  }
>  
> @@ -1263,6 +1268,7 @@ void xen_load_linux(PCMachineState *pcms)
>      load_linux(pcms, fw_cfg);
>      for (i = 0; i < nb_option_roms; i++) {
>          assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
> +               !strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
>                 !strcmp(option_rom[i].name, "multiboot.bin"));
>          rom_add_option(option_rom[i].name, option_rom[i].bootindex);
>      }
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index 999f480..114aea8 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -551,7 +551,7 @@ static bool is_version_1(void *opaque, int version_id)
>      return version_id == 1;
>  }
>  
> -static bool fw_cfg_dma_enabled(void *opaque)
> +bool fw_cfg_dma_enabled(void *opaque)
>  {
>      FWCfgState *s = opaque;
>  
> diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
> index d008112..5c27a1f 100644
> --- a/include/hw/nvram/fw_cfg.h
> +++ b/include/hw/nvram/fw_cfg.h
> @@ -182,5 +182,6 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
>                                   hwaddr dma_addr, AddressSpace *dma_as);
>  
>  FWCfgState *fw_cfg_find(void);
> +bool fw_cfg_dma_enabled(void *opaque);
>  
>  #endif
> diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
> index ce4852a..a51dd6e 100644
> --- a/pc-bios/optionrom/Makefile
> +++ b/pc-bios/optionrom/Makefile
> @@ -13,15 +13,24 @@ CFLAGS := -Wall -Wstrict-prototypes -Werror -fomit-frame-pointer -fno-builtin
>  CFLAGS += -I$(SRC_PATH)
>  CFLAGS += $(call cc-option, $(CFLAGS), -fno-stack-protector)
>  CFLAGS += $(CFLAGS_NOPIE)
> +CFLAGS += -m16
>  QEMU_CFLAGS = $(CFLAGS)
>  
> -build-all: multiboot.bin linuxboot.bin kvmvapic.bin
> +ASFLAGS += -32
> +
> +build-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin
>  
>  # suppress auto-removal of intermediate files
>  .SECONDARY:
>  
> +ifdef CONFIG_WIN32
> +LD_EMULATION = i386pe
> +else
> +LD_EMULATION = elf_i386
> +endif
> +
>  %.img: %.o
> -	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -Ttext 0 -e _start -s -o $@ $<,"  Building $(TARGET_DIR)$@")
> +	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -m $(LD_EMULATION) -Ttext 0 -e _start -s -o $@ $<,"  Building $(TARGET_DIR)$@")
>  
>  %.raw: %.img
>  	$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@,"  Building $(TARGET_DIR)$@")
> diff --git a/pc-bios/optionrom/linuxboot_dma.c b/pc-bios/optionrom/linuxboot_dma.c
> new file mode 100644
> index 0000000..8ebc480
> --- /dev/null
> +++ b/pc-bios/optionrom/linuxboot_dma.c
> @@ -0,0 +1,291 @@
> +/*
> + * Linux Boot Option ROM for fw_cfg DMA
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, see <http://www.gnu.org/licenses/>.
> + *
> + * Copyright (c) 2015-2016 Red Hat Inc.
> + *   Authors:
> + *     Marc Marí <markmb@redhat.com>
> + *     Richard W.M. Jones <rjones@redhat.com>
> + */
> +
> +asm(
> +".text\n"
> +".global _start\n"
> +"_start:\n"
> +"   .short 0xaa55\n"
> +"   .byte 0\n" /* size in 512 units, filled in by signrom.py */
> +"   .byte 0xcb\n" /* far return without prefix */
> +"   .org 0x18\n"
> +"   .short 0\n"
> +"   .short _pnph\n"
> +"_pnph:\n"
> +"   .ascii \"$PnP\"\n"
> +"   .byte 0x01\n"
> +"   .byte (_pnph_len / 16)\n"
> +"   .short 0x0000\n"
> +"   .byte 0x00\n"
> +"   .byte 0x00\n"
> +"   .long 0x00000000\n"
> +"   .short _manufacturer\n"
> +"   .short _product\n"
> +"   .long 0x00000000\n"
> +"   .short 0x0000\n"
> +"   .short 0x0000\n"
> +"   .short _bev\n"
> +"   .short 0x0000\n"
> +"   .short 0x0000\n"
> +"   .equ _pnph_len, . - _pnph\n"
> +"_manufacturer:\n"
> +"   .asciz \"QEMU\"\n"
> +"_product:\n"
> +"   .asciz \"Linux loader DMA\"\n"
> +"   .align 4, 0\n"
> +"_bev:\n"
> +"   cli\n"
> +"   cld\n"
> +"   jmp load_kernel\n"
> +);
> +
> +#include "../../include/hw/nvram/fw_cfg_keys.h"
> +
> +/* QEMU_CFG_DMA_CONTROL bits */
> +#define BIOS_CFG_DMA_CTL_ERROR   0x01
> +#define BIOS_CFG_DMA_CTL_READ    0x02
> +#define BIOS_CFG_DMA_CTL_SKIP    0x04
> +#define BIOS_CFG_DMA_CTL_SELECT  0x08
> +
> +#define BIOS_CFG_DMA_ADDR_HIGH 0x514
> +#define BIOS_CFG_DMA_ADDR_LOW  0x518
> +
> +#define uint64_t unsigned long long
> +#define uint32_t unsigned int
> +#define uint16_t unsigned short
> +
> +#define barrier() asm("" : : : "memory")
> +
> +typedef struct FWCfgDmaAccess {
> +    uint32_t control;
> +    uint32_t length;
> +    uint64_t address;
> +} __attribute__((packed)) FWCfgDmaAccess;
> +
> +static inline void outl(uint32_t value, uint16_t port)
> +{
> +    asm("outl %0, %w1" : : "a"(value), "Nd"(port));
> +}
> +
> +static inline void set_es(void *addr)
> +{
> +    uint32_t seg = (uint32_t)addr >> 4;
> +    asm("movl %0, %%es" : : "r"(seg));
> +}
> +
> +#ifdef __clang__
> +#define ADDR32
> +#else
> +#define ADDR32 "addr32 "
> +#endif
> +
> +static inline uint16_t readw_es(uint16_t offset)
> +{
> +    uint16_t val;
> +    asm(ADDR32 "movw %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
> +    barrier();
> +    return val;
> +}
> +
> +static inline uint32_t readl_es(uint16_t offset)
> +{
> +    uint32_t val;
> +    asm(ADDR32 "movl %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
> +    barrier();
> +    return val;
> +}
> +
> +static inline void writel_es(uint16_t offset, uint32_t val)
> +{
> +    barrier();
> +    asm(ADDR32 "movl %0, %%es:(%1)" : : "r"(val), "r"((uint32_t)offset));
> +}
> +
> +static inline uint32_t bswap32(uint32_t x)
> +{
> +    return
> +        ((x & 0x000000ffU) << 24) |
> +        ((x & 0x0000ff00U) <<  8) |
> +        ((x & 0x00ff0000U) >>  8) |
> +        ((x & 0xff000000U) >> 24);
> +}
> +
> +static inline uint64_t bswap64(uint64_t x)
> +{
> +    return
> +        ((x & 0x00000000000000ffULL) << 56) |
> +        ((x & 0x000000000000ff00ULL) << 40) |
> +        ((x & 0x0000000000ff0000ULL) << 24) |
> +        ((x & 0x00000000ff000000ULL) <<  8) |
> +        ((x & 0x000000ff00000000ULL) >>  8) |
> +        ((x & 0x0000ff0000000000ULL) >> 24) |
> +        ((x & 0x00ff000000000000ULL) >> 40) |
> +        ((x & 0xff00000000000000ULL) >> 56);
> +}
> +
> +static inline uint64_t cpu_to_be64(uint64_t x)
> +{
> +    return bswap64(x);
> +}
> +
> +static inline uint32_t cpu_to_be32(uint32_t x)
> +{
> +    return bswap32(x);
> +}
> +
> +static inline uint32_t be32_to_cpu(uint32_t x)
> +{
> +    return bswap32(x);
> +}
> +
> +static void bios_cfg_read_entry(void *buf, uint16_t entry, uint32_t len)
> +{
> +    FWCfgDmaAccess access;
> +    uint32_t control = (entry << 16) | BIOS_CFG_DMA_CTL_SELECT
> +                        | BIOS_CFG_DMA_CTL_READ;
> +
> +    access.address = cpu_to_be64((uint64_t)(uint32_t)buf);
> +    access.length = cpu_to_be32(len);
> +    access.control = cpu_to_be32(control);
> +
> +    barrier();
> +
> +    outl(cpu_to_be32((uint32_t)&access), BIOS_CFG_DMA_ADDR_LOW);
> +
> +    while (be32_to_cpu(access.control) & ~BIOS_CFG_DMA_CTL_ERROR) {
> +        barrier();
> +    }
> +}
> +
> +/* Return top of memory using BIOS function E801. */
> +static uint32_t get_e801_addr(void)
> +{
> +    uint16_t ax, bx, cx, dx;
> +    uint32_t ret;
> +
> +    asm("int $0x15\n"
> +        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx)
> +        : "a"(0xe801), "b"(0), "c"(0), "d"(0));
> +
> +    /* Not SeaBIOS, but in theory a BIOS could return CX=DX=0 in which
> +     * case we need to use the result from AX & BX instead.
> +     */
> +    if (cx == 0 && dx == 0) {
> +        cx = ax;
> +        dx = bx;
> +    }
> +
> +    if (dx) {
> +        /* DX = extended memory above 16M, in 64K units.
> +         * Convert it to bytes and return.
> +         */
> +        ret = ((uint32_t)dx + 256 /* 16M in 64K units */) << 16;
> +    } else {
> +        /* This is a fallback path for machines with <= 16MB of RAM,
> +         * which probably would never be the case, but deal with it
> +         * anyway.
> +         *
> +         * CX = extended memory between 1M and 16M, in kilobytes
> +         * Convert it to bytes and return.
> +         */
> +        ret = ((uint32_t)cx + 1024 /* 1M in K */) << 10;
> +    }
> +
> +    return ret;
> +}
> +
> +void load_kernel(void)
> +{
> +    void *setup_addr;
> +    void *initrd_addr;
> +    void *kernel_addr;
> +    void *cmdline_addr;
> +    uint32_t setup_size;
> +    uint32_t initrd_size;
> +    uint32_t kernel_size;
> +    uint32_t cmdline_size;
> +    uint32_t initrd_end_page, max_allowed_page;
> +    uint32_t segment_addr, stack_addr;
> +
> +    bios_cfg_read_entry(&setup_addr, FW_CFG_SETUP_ADDR, 4);
> +    bios_cfg_read_entry(&setup_size, FW_CFG_SETUP_SIZE, 4);
> +    bios_cfg_read_entry(setup_addr, FW_CFG_SETUP_DATA, setup_size);
> +
> +    set_es(setup_addr);
> +
> +    /* For protocol < 0x203 we don't have initrd_max ... */
> +    if (readw_es(0x206) < 0x203) {
> +        /* ... so we assume initrd_max = 0x37ffffff. */
> +        writel_es(0x22c, 0x37ffffff);
> +    }
> +
> +    bios_cfg_read_entry(&initrd_addr, FW_CFG_INITRD_ADDR, 4);
> +    bios_cfg_read_entry(&initrd_size, FW_CFG_INITRD_SIZE, 4);
> +
> +    initrd_end_page = ((uint32_t)(initrd_addr + initrd_size) & -4096);
> +    max_allowed_page = (readl_es(0x22c) & -4096);
> +
> +    if (initrd_end_page != 0 && max_allowed_page != 0 &&
> +        initrd_end_page != max_allowed_page) {
> +        /* Initrd at the end of memory. Compute better initrd address
> +         * based on e801 data
> +         */
> +        initrd_addr = (void *)((get_e801_addr() - initrd_size) & -4096);
> +        writel_es(0x218, (uint32_t)initrd_addr);
> +
> +    }
> +
> +    bios_cfg_read_entry(initrd_addr, FW_CFG_INITRD_DATA, initrd_size);
> +
> +    bios_cfg_read_entry(&kernel_addr, FW_CFG_KERNEL_ADDR, 4);
> +    bios_cfg_read_entry(&kernel_size, FW_CFG_KERNEL_SIZE, 4);
> +    bios_cfg_read_entry(kernel_addr, FW_CFG_KERNEL_DATA, kernel_size);
> +
> +    bios_cfg_read_entry(&cmdline_addr, FW_CFG_CMDLINE_ADDR, 4);
> +    bios_cfg_read_entry(&cmdline_size, FW_CFG_CMDLINE_SIZE, 4);
> +    bios_cfg_read_entry(cmdline_addr, FW_CFG_CMDLINE_DATA, cmdline_size);
> +
> +    /* Boot linux */
> +    segment_addr = ((uint32_t)setup_addr >> 4);
> +    stack_addr = (uint32_t)(cmdline_addr - setup_addr - 16);
> +
> +    /* As we are changing critical registers, we cannot leave freedom to the
> +     * compiler.
> +     */
> +    asm("movw %%ax, %%ds\n"
> +        "movw %%ax, %%es\n"
> +        "movw %%ax, %%fs\n"
> +        "movw %%ax, %%gs\n"
> +        "movw %%ax, %%ss\n"
> +        "movl %%ebx, %%esp\n"
> +        "addw $0x20, %%ax\n"
> +        "pushw %%ax\n" /* CS */
> +        "pushw $0\n" /* IP */
> +        /* Clear registers and jump to Linux */
> +        "xor %%ebx, %%ebx\n"
> +        "xor %%ecx, %%ecx\n"
> +        "xor %%edx, %%edx\n"
> +        "xor %%edi, %%edi\n"
> +        "xor %%ebp, %%ebp\n"
> +        "lretw\n"
> +        : : "a"(segment_addr), "b"(stack_addr));
> +}
>
Richard W.M. Jones May 23, 2016, 3:44 p.m. UTC | #2
On Mon, May 23, 2016 at 05:05:15PM +0200, Paolo Bonzini wrote:
> 
> 
> On 11/05/2016 23:06, Richard W.M. Jones wrote:
> > From: Marc Marí <markmb@redhat.com>
> > 
> > This optionrom is based on linuxboot.S.
> > 
> > Signed-off-by: Marc Marí <markmb@redhat.com>
> > Signed-off-by: Richard W.M. Jones <rjones@redhat.com>
> 
> Hmm, I hadn't noticed that you added -m16.  That breaks on even
> not-too-old GCC (such as 4.8 on RHEL/CentOS 7). Can you add some
> Makefile logic to switch between "-m16" and a "-include code16gcc.h"
> that only contains 'asm(".code16gcc");'?

I'll note that we did know that -m16 would break on gcc < 4.9.  See
Stefan's message here:

  https://lists.gnu.org/archive/html/qemu-devel/2016-05/msg01238.html

However you're right we shouldn't break RHEL 7, so I'll try to
implement your suggested fix in the next version.

> Also, you need to add linuxboot_dma.bin after linuxboot.bin in the
> main Makefile.

Ah ha, I was wondering why I had to copy the file manually after
building it ...

Rich.

> Thanks,
> 
> Paolo
> 
> > ---
> >  .gitignore                        |   4 +
> >  hw/i386/pc.c                      |  10 +-
> >  hw/nvram/fw_cfg.c                 |   2 +-
> >  include/hw/nvram/fw_cfg.h         |   1 +
> >  pc-bios/optionrom/Makefile        |  13 +-
> >  pc-bios/optionrom/linuxboot_dma.c | 291 ++++++++++++++++++++++++++++++++++++++
> >  6 files changed, 316 insertions(+), 5 deletions(-)
> >  create mode 100644 pc-bios/optionrom/linuxboot_dma.c
> > 
> > diff --git a/.gitignore b/.gitignore
> > index 88a80ff..101d1e0 100644
> > --- a/.gitignore
> > +++ b/.gitignore
> > @@ -94,6 +94,10 @@
> >  /pc-bios/optionrom/linuxboot.bin
> >  /pc-bios/optionrom/linuxboot.raw
> >  /pc-bios/optionrom/linuxboot.img
> > +/pc-bios/optionrom/linuxboot_dma.asm
> > +/pc-bios/optionrom/linuxboot_dma.bin
> > +/pc-bios/optionrom/linuxboot_dma.raw
> > +/pc-bios/optionrom/linuxboot_dma.img
> >  /pc-bios/optionrom/multiboot.asm
> >  /pc-bios/optionrom/multiboot.bin
> >  /pc-bios/optionrom/multiboot.raw
> > diff --git a/hw/i386/pc.c b/hw/i386/pc.c
> > index 99437e0..098d571 100644
> > --- a/hw/i386/pc.c
> > +++ b/hw/i386/pc.c
> > @@ -999,8 +999,13 @@ static void load_linux(PCMachineState *pcms,
> >      fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
> >      fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
> >  
> > -    option_rom[nb_option_roms].name = "linuxboot.bin";
> > -    option_rom[nb_option_roms].bootindex = 0;
> > +    if (fw_cfg_dma_enabled(fw_cfg)) {
> > +        option_rom[nb_option_roms].name = "linuxboot_dma.bin";
> > +        option_rom[nb_option_roms].bootindex = 0;
> > +    } else {
> > +        option_rom[nb_option_roms].name = "linuxboot.bin";
> > +        option_rom[nb_option_roms].bootindex = 0;
> > +    }
> >      nb_option_roms++;
> >  }
> >  
> > @@ -1263,6 +1268,7 @@ void xen_load_linux(PCMachineState *pcms)
> >      load_linux(pcms, fw_cfg);
> >      for (i = 0; i < nb_option_roms; i++) {
> >          assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
> > +               !strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
> >                 !strcmp(option_rom[i].name, "multiboot.bin"));
> >          rom_add_option(option_rom[i].name, option_rom[i].bootindex);
> >      }
> > diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> > index 999f480..114aea8 100644
> > --- a/hw/nvram/fw_cfg.c
> > +++ b/hw/nvram/fw_cfg.c
> > @@ -551,7 +551,7 @@ static bool is_version_1(void *opaque, int version_id)
> >      return version_id == 1;
> >  }
> >  
> > -static bool fw_cfg_dma_enabled(void *opaque)
> > +bool fw_cfg_dma_enabled(void *opaque)
> >  {
> >      FWCfgState *s = opaque;
> >  
> > diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
> > index d008112..5c27a1f 100644
> > --- a/include/hw/nvram/fw_cfg.h
> > +++ b/include/hw/nvram/fw_cfg.h
> > @@ -182,5 +182,6 @@ FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
> >                                   hwaddr dma_addr, AddressSpace *dma_as);
> >  
> >  FWCfgState *fw_cfg_find(void);
> > +bool fw_cfg_dma_enabled(void *opaque);
> >  
> >  #endif
> > diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
> > index ce4852a..a51dd6e 100644
> > --- a/pc-bios/optionrom/Makefile
> > +++ b/pc-bios/optionrom/Makefile
> > @@ -13,15 +13,24 @@ CFLAGS := -Wall -Wstrict-prototypes -Werror -fomit-frame-pointer -fno-builtin
> >  CFLAGS += -I$(SRC_PATH)
> >  CFLAGS += $(call cc-option, $(CFLAGS), -fno-stack-protector)
> >  CFLAGS += $(CFLAGS_NOPIE)
> > +CFLAGS += -m16
> >  QEMU_CFLAGS = $(CFLAGS)
> >  
> > -build-all: multiboot.bin linuxboot.bin kvmvapic.bin
> > +ASFLAGS += -32
> > +
> > +build-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin
> >  
> >  # suppress auto-removal of intermediate files
> >  .SECONDARY:
> >  
> > +ifdef CONFIG_WIN32
> > +LD_EMULATION = i386pe
> > +else
> > +LD_EMULATION = elf_i386
> > +endif
> > +
> >  %.img: %.o
> > -	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -Ttext 0 -e _start -s -o $@ $<,"  Building $(TARGET_DIR)$@")
> > +	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -m $(LD_EMULATION) -Ttext 0 -e _start -s -o $@ $<,"  Building $(TARGET_DIR)$@")
> >  
> >  %.raw: %.img
> >  	$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@,"  Building $(TARGET_DIR)$@")
> > diff --git a/pc-bios/optionrom/linuxboot_dma.c b/pc-bios/optionrom/linuxboot_dma.c
> > new file mode 100644
> > index 0000000..8ebc480
> > --- /dev/null
> > +++ b/pc-bios/optionrom/linuxboot_dma.c
> > @@ -0,0 +1,291 @@
> > +/*
> > + * Linux Boot Option ROM for fw_cfg DMA
> > + *
> > + * This program is free software; you can redistribute it and/or modify
> > + * it under the terms of the GNU General Public License as published by
> > + * the Free Software Foundation; either version 2 of the License, or
> > + * (at your option) any later version.
> > + *
> > + * This program is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> > + * GNU General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU General Public License
> > + * along with this program; if not, see <http://www.gnu.org/licenses/>.
> > + *
> > + * Copyright (c) 2015-2016 Red Hat Inc.
> > + *   Authors:
> > + *     Marc Marí <markmb@redhat.com>
> > + *     Richard W.M. Jones <rjones@redhat.com>
> > + */
> > +
> > +asm(
> > +".text\n"
> > +".global _start\n"
> > +"_start:\n"
> > +"   .short 0xaa55\n"
> > +"   .byte 0\n" /* size in 512 units, filled in by signrom.py */
> > +"   .byte 0xcb\n" /* far return without prefix */
> > +"   .org 0x18\n"
> > +"   .short 0\n"
> > +"   .short _pnph\n"
> > +"_pnph:\n"
> > +"   .ascii \"$PnP\"\n"
> > +"   .byte 0x01\n"
> > +"   .byte (_pnph_len / 16)\n"
> > +"   .short 0x0000\n"
> > +"   .byte 0x00\n"
> > +"   .byte 0x00\n"
> > +"   .long 0x00000000\n"
> > +"   .short _manufacturer\n"
> > +"   .short _product\n"
> > +"   .long 0x00000000\n"
> > +"   .short 0x0000\n"
> > +"   .short 0x0000\n"
> > +"   .short _bev\n"
> > +"   .short 0x0000\n"
> > +"   .short 0x0000\n"
> > +"   .equ _pnph_len, . - _pnph\n"
> > +"_manufacturer:\n"
> > +"   .asciz \"QEMU\"\n"
> > +"_product:\n"
> > +"   .asciz \"Linux loader DMA\"\n"
> > +"   .align 4, 0\n"
> > +"_bev:\n"
> > +"   cli\n"
> > +"   cld\n"
> > +"   jmp load_kernel\n"
> > +);
> > +
> > +#include "../../include/hw/nvram/fw_cfg_keys.h"
> > +
> > +/* QEMU_CFG_DMA_CONTROL bits */
> > +#define BIOS_CFG_DMA_CTL_ERROR   0x01
> > +#define BIOS_CFG_DMA_CTL_READ    0x02
> > +#define BIOS_CFG_DMA_CTL_SKIP    0x04
> > +#define BIOS_CFG_DMA_CTL_SELECT  0x08
> > +
> > +#define BIOS_CFG_DMA_ADDR_HIGH 0x514
> > +#define BIOS_CFG_DMA_ADDR_LOW  0x518
> > +
> > +#define uint64_t unsigned long long
> > +#define uint32_t unsigned int
> > +#define uint16_t unsigned short
> > +
> > +#define barrier() asm("" : : : "memory")
> > +
> > +typedef struct FWCfgDmaAccess {
> > +    uint32_t control;
> > +    uint32_t length;
> > +    uint64_t address;
> > +} __attribute__((packed)) FWCfgDmaAccess;
> > +
> > +static inline void outl(uint32_t value, uint16_t port)
> > +{
> > +    asm("outl %0, %w1" : : "a"(value), "Nd"(port));
> > +}
> > +
> > +static inline void set_es(void *addr)
> > +{
> > +    uint32_t seg = (uint32_t)addr >> 4;
> > +    asm("movl %0, %%es" : : "r"(seg));
> > +}
> > +
> > +#ifdef __clang__
> > +#define ADDR32
> > +#else
> > +#define ADDR32 "addr32 "
> > +#endif
> > +
> > +static inline uint16_t readw_es(uint16_t offset)
> > +{
> > +    uint16_t val;
> > +    asm(ADDR32 "movw %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
> > +    barrier();
> > +    return val;
> > +}
> > +
> > +static inline uint32_t readl_es(uint16_t offset)
> > +{
> > +    uint32_t val;
> > +    asm(ADDR32 "movl %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
> > +    barrier();
> > +    return val;
> > +}
> > +
> > +static inline void writel_es(uint16_t offset, uint32_t val)
> > +{
> > +    barrier();
> > +    asm(ADDR32 "movl %0, %%es:(%1)" : : "r"(val), "r"((uint32_t)offset));
> > +}
> > +
> > +static inline uint32_t bswap32(uint32_t x)
> > +{
> > +    return
> > +        ((x & 0x000000ffU) << 24) |
> > +        ((x & 0x0000ff00U) <<  8) |
> > +        ((x & 0x00ff0000U) >>  8) |
> > +        ((x & 0xff000000U) >> 24);
> > +}
> > +
> > +static inline uint64_t bswap64(uint64_t x)
> > +{
> > +    return
> > +        ((x & 0x00000000000000ffULL) << 56) |
> > +        ((x & 0x000000000000ff00ULL) << 40) |
> > +        ((x & 0x0000000000ff0000ULL) << 24) |
> > +        ((x & 0x00000000ff000000ULL) <<  8) |
> > +        ((x & 0x000000ff00000000ULL) >>  8) |
> > +        ((x & 0x0000ff0000000000ULL) >> 24) |
> > +        ((x & 0x00ff000000000000ULL) >> 40) |
> > +        ((x & 0xff00000000000000ULL) >> 56);
> > +}
> > +
> > +static inline uint64_t cpu_to_be64(uint64_t x)
> > +{
> > +    return bswap64(x);
> > +}
> > +
> > +static inline uint32_t cpu_to_be32(uint32_t x)
> > +{
> > +    return bswap32(x);
> > +}
> > +
> > +static inline uint32_t be32_to_cpu(uint32_t x)
> > +{
> > +    return bswap32(x);
> > +}
> > +
> > +static void bios_cfg_read_entry(void *buf, uint16_t entry, uint32_t len)
> > +{
> > +    FWCfgDmaAccess access;
> > +    uint32_t control = (entry << 16) | BIOS_CFG_DMA_CTL_SELECT
> > +                        | BIOS_CFG_DMA_CTL_READ;
> > +
> > +    access.address = cpu_to_be64((uint64_t)(uint32_t)buf);
> > +    access.length = cpu_to_be32(len);
> > +    access.control = cpu_to_be32(control);
> > +
> > +    barrier();
> > +
> > +    outl(cpu_to_be32((uint32_t)&access), BIOS_CFG_DMA_ADDR_LOW);
> > +
> > +    while (be32_to_cpu(access.control) & ~BIOS_CFG_DMA_CTL_ERROR) {
> > +        barrier();
> > +    }
> > +}
> > +
> > +/* Return top of memory using BIOS function E801. */
> > +static uint32_t get_e801_addr(void)
> > +{
> > +    uint16_t ax, bx, cx, dx;
> > +    uint32_t ret;
> > +
> > +    asm("int $0x15\n"
> > +        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx)
> > +        : "a"(0xe801), "b"(0), "c"(0), "d"(0));
> > +
> > +    /* Not SeaBIOS, but in theory a BIOS could return CX=DX=0 in which
> > +     * case we need to use the result from AX & BX instead.
> > +     */
> > +    if (cx == 0 && dx == 0) {
> > +        cx = ax;
> > +        dx = bx;
> > +    }
> > +
> > +    if (dx) {
> > +        /* DX = extended memory above 16M, in 64K units.
> > +         * Convert it to bytes and return.
> > +         */
> > +        ret = ((uint32_t)dx + 256 /* 16M in 64K units */) << 16;
> > +    } else {
> > +        /* This is a fallback path for machines with <= 16MB of RAM,
> > +         * which probably would never be the case, but deal with it
> > +         * anyway.
> > +         *
> > +         * CX = extended memory between 1M and 16M, in kilobytes
> > +         * Convert it to bytes and return.
> > +         */
> > +        ret = ((uint32_t)cx + 1024 /* 1M in K */) << 10;
> > +    }
> > +
> > +    return ret;
> > +}
> > +
> > +void load_kernel(void)
> > +{
> > +    void *setup_addr;
> > +    void *initrd_addr;
> > +    void *kernel_addr;
> > +    void *cmdline_addr;
> > +    uint32_t setup_size;
> > +    uint32_t initrd_size;
> > +    uint32_t kernel_size;
> > +    uint32_t cmdline_size;
> > +    uint32_t initrd_end_page, max_allowed_page;
> > +    uint32_t segment_addr, stack_addr;
> > +
> > +    bios_cfg_read_entry(&setup_addr, FW_CFG_SETUP_ADDR, 4);
> > +    bios_cfg_read_entry(&setup_size, FW_CFG_SETUP_SIZE, 4);
> > +    bios_cfg_read_entry(setup_addr, FW_CFG_SETUP_DATA, setup_size);
> > +
> > +    set_es(setup_addr);
> > +
> > +    /* For protocol < 0x203 we don't have initrd_max ... */
> > +    if (readw_es(0x206) < 0x203) {
> > +        /* ... so we assume initrd_max = 0x37ffffff. */
> > +        writel_es(0x22c, 0x37ffffff);
> > +    }
> > +
> > +    bios_cfg_read_entry(&initrd_addr, FW_CFG_INITRD_ADDR, 4);
> > +    bios_cfg_read_entry(&initrd_size, FW_CFG_INITRD_SIZE, 4);
> > +
> > +    initrd_end_page = ((uint32_t)(initrd_addr + initrd_size) & -4096);
> > +    max_allowed_page = (readl_es(0x22c) & -4096);
> > +
> > +    if (initrd_end_page != 0 && max_allowed_page != 0 &&
> > +        initrd_end_page != max_allowed_page) {
> > +        /* Initrd at the end of memory. Compute better initrd address
> > +         * based on e801 data
> > +         */
> > +        initrd_addr = (void *)((get_e801_addr() - initrd_size) & -4096);
> > +        writel_es(0x218, (uint32_t)initrd_addr);
> > +
> > +    }
> > +
> > +    bios_cfg_read_entry(initrd_addr, FW_CFG_INITRD_DATA, initrd_size);
> > +
> > +    bios_cfg_read_entry(&kernel_addr, FW_CFG_KERNEL_ADDR, 4);
> > +    bios_cfg_read_entry(&kernel_size, FW_CFG_KERNEL_SIZE, 4);
> > +    bios_cfg_read_entry(kernel_addr, FW_CFG_KERNEL_DATA, kernel_size);
> > +
> > +    bios_cfg_read_entry(&cmdline_addr, FW_CFG_CMDLINE_ADDR, 4);
> > +    bios_cfg_read_entry(&cmdline_size, FW_CFG_CMDLINE_SIZE, 4);
> > +    bios_cfg_read_entry(cmdline_addr, FW_CFG_CMDLINE_DATA, cmdline_size);
> > +
> > +    /* Boot linux */
> > +    segment_addr = ((uint32_t)setup_addr >> 4);
> > +    stack_addr = (uint32_t)(cmdline_addr - setup_addr - 16);
> > +
> > +    /* As we are changing critical registers, we cannot leave freedom to the
> > +     * compiler.
> > +     */
> > +    asm("movw %%ax, %%ds\n"
> > +        "movw %%ax, %%es\n"
> > +        "movw %%ax, %%fs\n"
> > +        "movw %%ax, %%gs\n"
> > +        "movw %%ax, %%ss\n"
> > +        "movl %%ebx, %%esp\n"
> > +        "addw $0x20, %%ax\n"
> > +        "pushw %%ax\n" /* CS */
> > +        "pushw $0\n" /* IP */
> > +        /* Clear registers and jump to Linux */
> > +        "xor %%ebx, %%ebx\n"
> > +        "xor %%ecx, %%ecx\n"
> > +        "xor %%edx, %%edx\n"
> > +        "xor %%edi, %%edi\n"
> > +        "xor %%ebp, %%ebp\n"
> > +        "lretw\n"
> > +        : : "a"(segment_addr), "b"(stack_addr));
> > +}
> >
diff mbox

Patch

diff --git a/.gitignore b/.gitignore
index 88a80ff..101d1e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -94,6 +94,10 @@ 
 /pc-bios/optionrom/linuxboot.bin
 /pc-bios/optionrom/linuxboot.raw
 /pc-bios/optionrom/linuxboot.img
+/pc-bios/optionrom/linuxboot_dma.asm
+/pc-bios/optionrom/linuxboot_dma.bin
+/pc-bios/optionrom/linuxboot_dma.raw
+/pc-bios/optionrom/linuxboot_dma.img
 /pc-bios/optionrom/multiboot.asm
 /pc-bios/optionrom/multiboot.bin
 /pc-bios/optionrom/multiboot.raw
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 99437e0..098d571 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -999,8 +999,13 @@  static void load_linux(PCMachineState *pcms,
     fw_cfg_add_i32(fw_cfg, FW_CFG_SETUP_SIZE, setup_size);
     fw_cfg_add_bytes(fw_cfg, FW_CFG_SETUP_DATA, setup, setup_size);
 
-    option_rom[nb_option_roms].name = "linuxboot.bin";
-    option_rom[nb_option_roms].bootindex = 0;
+    if (fw_cfg_dma_enabled(fw_cfg)) {
+        option_rom[nb_option_roms].name = "linuxboot_dma.bin";
+        option_rom[nb_option_roms].bootindex = 0;
+    } else {
+        option_rom[nb_option_roms].name = "linuxboot.bin";
+        option_rom[nb_option_roms].bootindex = 0;
+    }
     nb_option_roms++;
 }
 
@@ -1263,6 +1268,7 @@  void xen_load_linux(PCMachineState *pcms)
     load_linux(pcms, fw_cfg);
     for (i = 0; i < nb_option_roms; i++) {
         assert(!strcmp(option_rom[i].name, "linuxboot.bin") ||
+               !strcmp(option_rom[i].name, "linuxboot_dma.bin") ||
                !strcmp(option_rom[i].name, "multiboot.bin"));
         rom_add_option(option_rom[i].name, option_rom[i].bootindex);
     }
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 999f480..114aea8 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -551,7 +551,7 @@  static bool is_version_1(void *opaque, int version_id)
     return version_id == 1;
 }
 
-static bool fw_cfg_dma_enabled(void *opaque)
+bool fw_cfg_dma_enabled(void *opaque)
 {
     FWCfgState *s = opaque;
 
diff --git a/include/hw/nvram/fw_cfg.h b/include/hw/nvram/fw_cfg.h
index d008112..5c27a1f 100644
--- a/include/hw/nvram/fw_cfg.h
+++ b/include/hw/nvram/fw_cfg.h
@@ -182,5 +182,6 @@  FWCfgState *fw_cfg_init_mem_wide(hwaddr ctl_addr,
                                  hwaddr dma_addr, AddressSpace *dma_as);
 
 FWCfgState *fw_cfg_find(void);
+bool fw_cfg_dma_enabled(void *opaque);
 
 #endif
diff --git a/pc-bios/optionrom/Makefile b/pc-bios/optionrom/Makefile
index ce4852a..a51dd6e 100644
--- a/pc-bios/optionrom/Makefile
+++ b/pc-bios/optionrom/Makefile
@@ -13,15 +13,24 @@  CFLAGS := -Wall -Wstrict-prototypes -Werror -fomit-frame-pointer -fno-builtin
 CFLAGS += -I$(SRC_PATH)
 CFLAGS += $(call cc-option, $(CFLAGS), -fno-stack-protector)
 CFLAGS += $(CFLAGS_NOPIE)
+CFLAGS += -m16
 QEMU_CFLAGS = $(CFLAGS)
 
-build-all: multiboot.bin linuxboot.bin kvmvapic.bin
+ASFLAGS += -32
+
+build-all: multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin
 
 # suppress auto-removal of intermediate files
 .SECONDARY:
 
+ifdef CONFIG_WIN32
+LD_EMULATION = i386pe
+else
+LD_EMULATION = elf_i386
+endif
+
 %.img: %.o
-	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -Ttext 0 -e _start -s -o $@ $<,"  Building $(TARGET_DIR)$@")
+	$(call quiet-command,$(LD) $(LDFLAGS_NOPIE) -m $(LD_EMULATION) -Ttext 0 -e _start -s -o $@ $<,"  Building $(TARGET_DIR)$@")
 
 %.raw: %.img
 	$(call quiet-command,$(OBJCOPY) -O binary -j .text $< $@,"  Building $(TARGET_DIR)$@")
diff --git a/pc-bios/optionrom/linuxboot_dma.c b/pc-bios/optionrom/linuxboot_dma.c
new file mode 100644
index 0000000..8ebc480
--- /dev/null
+++ b/pc-bios/optionrom/linuxboot_dma.c
@@ -0,0 +1,291 @@ 
+/*
+ * Linux Boot Option ROM for fw_cfg DMA
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2015-2016 Red Hat Inc.
+ *   Authors:
+ *     Marc Marí <markmb@redhat.com>
+ *     Richard W.M. Jones <rjones@redhat.com>
+ */
+
+asm(
+".text\n"
+".global _start\n"
+"_start:\n"
+"   .short 0xaa55\n"
+"   .byte 0\n" /* size in 512 units, filled in by signrom.py */
+"   .byte 0xcb\n" /* far return without prefix */
+"   .org 0x18\n"
+"   .short 0\n"
+"   .short _pnph\n"
+"_pnph:\n"
+"   .ascii \"$PnP\"\n"
+"   .byte 0x01\n"
+"   .byte (_pnph_len / 16)\n"
+"   .short 0x0000\n"
+"   .byte 0x00\n"
+"   .byte 0x00\n"
+"   .long 0x00000000\n"
+"   .short _manufacturer\n"
+"   .short _product\n"
+"   .long 0x00000000\n"
+"   .short 0x0000\n"
+"   .short 0x0000\n"
+"   .short _bev\n"
+"   .short 0x0000\n"
+"   .short 0x0000\n"
+"   .equ _pnph_len, . - _pnph\n"
+"_manufacturer:\n"
+"   .asciz \"QEMU\"\n"
+"_product:\n"
+"   .asciz \"Linux loader DMA\"\n"
+"   .align 4, 0\n"
+"_bev:\n"
+"   cli\n"
+"   cld\n"
+"   jmp load_kernel\n"
+);
+
+#include "../../include/hw/nvram/fw_cfg_keys.h"
+
+/* QEMU_CFG_DMA_CONTROL bits */
+#define BIOS_CFG_DMA_CTL_ERROR   0x01
+#define BIOS_CFG_DMA_CTL_READ    0x02
+#define BIOS_CFG_DMA_CTL_SKIP    0x04
+#define BIOS_CFG_DMA_CTL_SELECT  0x08
+
+#define BIOS_CFG_DMA_ADDR_HIGH 0x514
+#define BIOS_CFG_DMA_ADDR_LOW  0x518
+
+#define uint64_t unsigned long long
+#define uint32_t unsigned int
+#define uint16_t unsigned short
+
+#define barrier() asm("" : : : "memory")
+
+typedef struct FWCfgDmaAccess {
+    uint32_t control;
+    uint32_t length;
+    uint64_t address;
+} __attribute__((packed)) FWCfgDmaAccess;
+
+static inline void outl(uint32_t value, uint16_t port)
+{
+    asm("outl %0, %w1" : : "a"(value), "Nd"(port));
+}
+
+static inline void set_es(void *addr)
+{
+    uint32_t seg = (uint32_t)addr >> 4;
+    asm("movl %0, %%es" : : "r"(seg));
+}
+
+#ifdef __clang__
+#define ADDR32
+#else
+#define ADDR32 "addr32 "
+#endif
+
+static inline uint16_t readw_es(uint16_t offset)
+{
+    uint16_t val;
+    asm(ADDR32 "movw %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
+    barrier();
+    return val;
+}
+
+static inline uint32_t readl_es(uint16_t offset)
+{
+    uint32_t val;
+    asm(ADDR32 "movl %%es:(%1), %0" : "=r"(val) : "r"((uint32_t)offset));
+    barrier();
+    return val;
+}
+
+static inline void writel_es(uint16_t offset, uint32_t val)
+{
+    barrier();
+    asm(ADDR32 "movl %0, %%es:(%1)" : : "r"(val), "r"((uint32_t)offset));
+}
+
+static inline uint32_t bswap32(uint32_t x)
+{
+    return
+        ((x & 0x000000ffU) << 24) |
+        ((x & 0x0000ff00U) <<  8) |
+        ((x & 0x00ff0000U) >>  8) |
+        ((x & 0xff000000U) >> 24);
+}
+
+static inline uint64_t bswap64(uint64_t x)
+{
+    return
+        ((x & 0x00000000000000ffULL) << 56) |
+        ((x & 0x000000000000ff00ULL) << 40) |
+        ((x & 0x0000000000ff0000ULL) << 24) |
+        ((x & 0x00000000ff000000ULL) <<  8) |
+        ((x & 0x000000ff00000000ULL) >>  8) |
+        ((x & 0x0000ff0000000000ULL) >> 24) |
+        ((x & 0x00ff000000000000ULL) >> 40) |
+        ((x & 0xff00000000000000ULL) >> 56);
+}
+
+static inline uint64_t cpu_to_be64(uint64_t x)
+{
+    return bswap64(x);
+}
+
+static inline uint32_t cpu_to_be32(uint32_t x)
+{
+    return bswap32(x);
+}
+
+static inline uint32_t be32_to_cpu(uint32_t x)
+{
+    return bswap32(x);
+}
+
+static void bios_cfg_read_entry(void *buf, uint16_t entry, uint32_t len)
+{
+    FWCfgDmaAccess access;
+    uint32_t control = (entry << 16) | BIOS_CFG_DMA_CTL_SELECT
+                        | BIOS_CFG_DMA_CTL_READ;
+
+    access.address = cpu_to_be64((uint64_t)(uint32_t)buf);
+    access.length = cpu_to_be32(len);
+    access.control = cpu_to_be32(control);
+
+    barrier();
+
+    outl(cpu_to_be32((uint32_t)&access), BIOS_CFG_DMA_ADDR_LOW);
+
+    while (be32_to_cpu(access.control) & ~BIOS_CFG_DMA_CTL_ERROR) {
+        barrier();
+    }
+}
+
+/* Return top of memory using BIOS function E801. */
+static uint32_t get_e801_addr(void)
+{
+    uint16_t ax, bx, cx, dx;
+    uint32_t ret;
+
+    asm("int $0x15\n"
+        : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx)
+        : "a"(0xe801), "b"(0), "c"(0), "d"(0));
+
+    /* Not SeaBIOS, but in theory a BIOS could return CX=DX=0 in which
+     * case we need to use the result from AX & BX instead.
+     */
+    if (cx == 0 && dx == 0) {
+        cx = ax;
+        dx = bx;
+    }
+
+    if (dx) {
+        /* DX = extended memory above 16M, in 64K units.
+         * Convert it to bytes and return.
+         */
+        ret = ((uint32_t)dx + 256 /* 16M in 64K units */) << 16;
+    } else {
+        /* This is a fallback path for machines with <= 16MB of RAM,
+         * which probably would never be the case, but deal with it
+         * anyway.
+         *
+         * CX = extended memory between 1M and 16M, in kilobytes
+         * Convert it to bytes and return.
+         */
+        ret = ((uint32_t)cx + 1024 /* 1M in K */) << 10;
+    }
+
+    return ret;
+}
+
+void load_kernel(void)
+{
+    void *setup_addr;
+    void *initrd_addr;
+    void *kernel_addr;
+    void *cmdline_addr;
+    uint32_t setup_size;
+    uint32_t initrd_size;
+    uint32_t kernel_size;
+    uint32_t cmdline_size;
+    uint32_t initrd_end_page, max_allowed_page;
+    uint32_t segment_addr, stack_addr;
+
+    bios_cfg_read_entry(&setup_addr, FW_CFG_SETUP_ADDR, 4);
+    bios_cfg_read_entry(&setup_size, FW_CFG_SETUP_SIZE, 4);
+    bios_cfg_read_entry(setup_addr, FW_CFG_SETUP_DATA, setup_size);
+
+    set_es(setup_addr);
+
+    /* For protocol < 0x203 we don't have initrd_max ... */
+    if (readw_es(0x206) < 0x203) {
+        /* ... so we assume initrd_max = 0x37ffffff. */
+        writel_es(0x22c, 0x37ffffff);
+    }
+
+    bios_cfg_read_entry(&initrd_addr, FW_CFG_INITRD_ADDR, 4);
+    bios_cfg_read_entry(&initrd_size, FW_CFG_INITRD_SIZE, 4);
+
+    initrd_end_page = ((uint32_t)(initrd_addr + initrd_size) & -4096);
+    max_allowed_page = (readl_es(0x22c) & -4096);
+
+    if (initrd_end_page != 0 && max_allowed_page != 0 &&
+        initrd_end_page != max_allowed_page) {
+        /* Initrd at the end of memory. Compute better initrd address
+         * based on e801 data
+         */
+        initrd_addr = (void *)((get_e801_addr() - initrd_size) & -4096);
+        writel_es(0x218, (uint32_t)initrd_addr);
+
+    }
+
+    bios_cfg_read_entry(initrd_addr, FW_CFG_INITRD_DATA, initrd_size);
+
+    bios_cfg_read_entry(&kernel_addr, FW_CFG_KERNEL_ADDR, 4);
+    bios_cfg_read_entry(&kernel_size, FW_CFG_KERNEL_SIZE, 4);
+    bios_cfg_read_entry(kernel_addr, FW_CFG_KERNEL_DATA, kernel_size);
+
+    bios_cfg_read_entry(&cmdline_addr, FW_CFG_CMDLINE_ADDR, 4);
+    bios_cfg_read_entry(&cmdline_size, FW_CFG_CMDLINE_SIZE, 4);
+    bios_cfg_read_entry(cmdline_addr, FW_CFG_CMDLINE_DATA, cmdline_size);
+
+    /* Boot linux */
+    segment_addr = ((uint32_t)setup_addr >> 4);
+    stack_addr = (uint32_t)(cmdline_addr - setup_addr - 16);
+
+    /* As we are changing critical registers, we cannot leave freedom to the
+     * compiler.
+     */
+    asm("movw %%ax, %%ds\n"
+        "movw %%ax, %%es\n"
+        "movw %%ax, %%fs\n"
+        "movw %%ax, %%gs\n"
+        "movw %%ax, %%ss\n"
+        "movl %%ebx, %%esp\n"
+        "addw $0x20, %%ax\n"
+        "pushw %%ax\n" /* CS */
+        "pushw $0\n" /* IP */
+        /* Clear registers and jump to Linux */
+        "xor %%ebx, %%ebx\n"
+        "xor %%ecx, %%ecx\n"
+        "xor %%edx, %%edx\n"
+        "xor %%edi, %%edi\n"
+        "xor %%ebp, %%ebp\n"
+        "lretw\n"
+        : : "a"(segment_addr), "b"(stack_addr));
+}