Patchwork [14/14] linux-user: Load a VDSO for x86-64.

login
register
mail settings
Submitter Richard Henderson
Date April 5, 2010, 2:13 a.m.
Message ID <2251fe09ac6615677e74e926f76d15ad4c8a7938.1270486025.git.rth@twiddle.net>
Download mbox | patch
Permalink /patch/49402/
State New
Headers show

Comments

Richard Henderson - April 5, 2010, 2:13 a.m.
Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 linux-user/elfload.c |  201 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 193 insertions(+), 8 deletions(-)

Patch

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 100efdc..7b854e2 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -183,6 +183,8 @@  static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUState *env)
     (*regs)[26] = env->segs[R_GS].selector & 0xffff;
 }
 
+#define VDSO_BASENAME  "vdso-linux-x64.so"
+
 #else
 
 #define ELF_START_MMAP 0x80000000
@@ -801,6 +803,10 @@  static inline void init_thread(struct target_pt_regs *regs,
 #define ELF_HWCAP 0
 #endif
 
+#ifndef VDSO_BASENAME
+#define VDSO_BASENAME NULL
+#endif
+
 #ifdef TARGET_ABI32
 #undef ELF_CLASS
 #define ELF_CLASS ELFCLASS32
@@ -1077,7 +1083,8 @@  static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
 static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
                                    struct elfhdr *exec,
                                    struct image_info *info,
-                                   struct image_info *interp_info)
+                                   struct image_info *interp_info,
+                                   struct image_info *vdso_info)
 {
     abi_ulong sp;
     int size;
@@ -1100,16 +1107,21 @@  static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
      */
     sp = sp &~ (abi_ulong)15;
     size = (DLINFO_ITEMS + 1) * 2;
-    if (k_platform)
+    if (k_platform) {
         size += 2;
+    }
+    if (vdso_info) {
+        size += 4;
+    }
 #ifdef DLINFO_ARCH_ITEMS
     size += DLINFO_ARCH_ITEMS * 2;
 #endif
     size += envc + argc + 2;
     size += 1;	/* argc itself */
     size *= n;
-    if (size & 15)
+    if (size & 15) {
         sp -= 16 - (size & 15);
+    }
 
     /* This is correct because Linux defines
      * elf_addr_t as Elf32_Off / Elf64_Off
@@ -1126,7 +1138,9 @@  static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
     NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr)));
     NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum));
     NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE));
-    NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info->load_addr));
+    if (interp_info) {
+        NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info->load_addr));
+    }
     NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
     NEW_AUX_ENT(AT_ENTRY, info->entry);
     NEW_AUX_ENT(AT_UID, (abi_ulong) getuid());
@@ -1135,8 +1149,13 @@  static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
     NEW_AUX_ENT(AT_EGID, (abi_ulong) getegid());
     NEW_AUX_ENT(AT_HWCAP, (abi_ulong) ELF_HWCAP);
     NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK));
-    if (k_platform)
+    if (k_platform) {
         NEW_AUX_ENT(AT_PLATFORM, u_platform);
+    }
+    if (vdso_info) {
+        NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry);
+        NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr);
+    }
 #ifdef ARCH_DLINFO
     /*
      * ARCH_DLINFO must come last so platform specific code can enforce
@@ -1378,6 +1397,165 @@  static void load_elf_interp(const char *filename, struct image_info *info,
     exit(-1);
 }
 
+static void load_elf_vdso(const char *basename, struct image_info *info,
+                          char bprm_buf[BPRM_BUF_SIZE])
+{
+    const char *errmsg;
+    char *filename;
+    int fd, retval, i;
+    abi_ulong load_bias;
+
+    /* ??? What we really need access to is qemu_find_file, but that is
+       only built for system targets at the moment.  */
+    filename = alloca(strlen(CONFIG_QEMU_SHAREDIR "/") + strlen(basename) + 1);
+    stpcpy(stpcpy(filename, CONFIG_QEMU_SHAREDIR "/"), basename);
+
+    fd = open(filename, O_RDONLY);
+    if (fd < 0) {
+        goto exit_perror;
+    }
+
+    retval = read(fd, bprm_buf, BPRM_BUF_SIZE);
+    if (retval < 0) {
+        goto exit_perror;
+    }
+    if (retval < BPRM_BUF_SIZE) {
+        memset(bprm_buf + retval, 0, BPRM_BUF_SIZE - retval);
+    }
+
+    load_elf_image(basename, fd, info, NULL, bprm_buf);
+    load_bias = info->load_bias;
+
+    /* We most likely need to relocate the VDSO image.  The one built into
+       the kernel is built for a fixed address.  The one built for QEMU is
+       not, since that requires close control of the guest address space.  */
+    if (load_bias) {
+        struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
+        struct elf_phdr *phdr;
+        abi_ulong dynamic_addr = -1;
+        abi_ulong dynsym_addr = -1;
+
+        /* ??? Assume QEMU's VDSO is built "properly", which arranges
+           for the PHDRs, and all the sections manipulated below, to
+           be included with a writable load segment.  */
+
+        /* ??? One might think that we'd need to relocate ehdr.e_entry,
+           but for some reason glibc does that one itself, though that
+           is also available via the AT_SYSINFO entry.  */
+
+        /* Relocate the program headers.  */
+        phdr = (struct elf_phdr *)g2h(info->load_addr + ehdr->e_phoff); 
+        bswap_phdr(phdr, ehdr->e_phnum);
+        for (i = 0; i < ehdr->e_phnum; ++i) {
+            phdr[i].p_vaddr += load_bias;
+            phdr[i].p_paddr += load_bias;
+            if (phdr[i].p_type == PT_DYNAMIC) {
+                dynamic_addr = phdr[i].p_vaddr;
+            }
+        }
+        bswap_phdr(phdr, ehdr->e_phnum);
+
+        /* Relocate the DYNAMIC entries.  */
+        if (dynamic_addr != -1) {
+            abi_ulong tag, val, *dyn = (abi_ulong *)g2h(dynamic_addr);
+            do {
+                tag = tswapl(dyn[0]);
+                val = tswapl(dyn[1]);
+                switch (tag) {
+                case DT_SYMTAB:
+                    dynsym_addr = load_bias + val;
+                    dyn[1] = tswapl(dynsym_addr);
+                    break;
+                case DT_SYMENT:
+                    if (val != sizeof(struct elf_sym)) {
+                        errmsg = "VDSO has an unexpected dynamic symbol size";
+                        goto exit_errmsg;
+                    }
+                    break;
+
+                case DT_HASH:
+                case DT_STRTAB:
+                case DT_VERDEF:
+                case DT_VERSYM:
+                case DT_ADDRRNGLO ... DT_ADDRRNGHI:
+                    /* These entries store an address in the entry.  */
+                    dyn[1] = tswapl(load_bias + val);
+                    break;
+
+                case DT_NULL:
+                case DT_STRSZ:
+                case DT_SONAME:
+                case DT_DEBUG:
+                case DT_FLAGS:
+                case DT_FLAGS_1:
+                case DT_VERDEFNUM:
+                case DT_VALRNGLO ... DT_VALRNGHI:
+                    /* These entries store an integer in the entry.  */
+                    break;
+
+                case DT_REL:
+                case DT_RELA:
+                    /* These entries indicate that the VDSO was built
+                       incorrectly.  It should not have real relocations.  */
+                    errmsg = "VDSO has relocations";
+                    goto exit_errmsg;
+                case DT_NEEDED:
+                case DT_VERNEED:
+                    errmsg = "VDSO has external dependancies";
+                    goto exit_errmsg;
+
+                default:
+                    /* This is probably something target specific.  */
+                    errmsg = "VDSO has unknown DYNAMIC entry";
+                    goto exit_errmsg;
+                }
+                dyn += 2;
+            } while (tag != DT_NULL);
+        }
+
+        /* Relocate the dynamic symbol table.  */
+        if (dynsym_addr != -1) {
+            struct elf_shdr *shdr;
+            struct elf_sym *sym;
+            int dynsym_size = 0;
+
+            /* Read the section headers to find out the size of the
+               dynamic symbol table.  */
+            shdr = (struct elf_shdr *)g2h(info->load_addr + ehdr->e_shoff);
+            for (i = 0; i < ehdr->e_shnum; ++i) {
+                abi_ulong addr = tswapl(shdr[i].sh_addr) + load_bias;
+                if (addr == dynsym_addr) {
+                    dynsym_size = tswapl(shdr[i].sh_size);
+                    break;
+                }
+            }
+
+            sym = (struct elf_sym *)g2h(dynsym_addr);
+            for (i = 0; i < dynsym_size / sizeof(*sym); ++i) {
+                sym[i].st_value = tswapl(tswapl(sym[i].st_value) + load_bias);
+            }
+        }
+    }
+
+    /* Mark the VDSO writable segment read-only.  */
+    /* ??? This assumes that the VDSO implementation doesn't actually
+       have any truely writable data.  Perhaps we should instead use
+       the PT_GNU_RELRO header to indicate that we really want this.  */
+    retval = target_mprotect(info->start_data, info->brk - info->start_data,
+                             PROT_READ);
+    if (retval < 0) {
+        goto exit_perror;
+    }
+    return;
+
+ exit_perror:
+    errmsg = strerror(errno);
+ exit_errmsg:
+    fprintf(stderr, "%s: %s\n", filename, errmsg);
+    exit(-1);
+}
+
+
 static int symfind(const void *s0, const void *s1)
 {
     struct elf_sym *key = (struct elf_sym *)s0;
@@ -1512,7 +1690,7 @@  static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias)
 int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
                     struct image_info * info)
 {
-    struct image_info interp_info;
+    struct image_info interp_info, vdso_info;
     struct elfhdr elf_ex;
     char *elf_interpreter = NULL;
 
@@ -1559,8 +1737,15 @@  int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs,
         }
     }
 
-    bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex,
-                                info, (elf_interpreter ? &interp_info : NULL));
+    /* If we've been given a VDSO to load, do so.  */
+    if (VDSO_BASENAME) {
+        load_elf_vdso(VDSO_BASENAME, &vdso_info, bprm->buf);
+    }
+
+    bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc,
+                                &elf_ex, info,
+                                (elf_interpreter ? &interp_info : NULL),
+                                (VDSO_BASENAME ? &vdso_info : NULL));
     info->start_stack = bprm->p;
 
     /* If we have an interpreter, set that as the program's entry point.