From patchwork Mon Apr 5 02:13:56 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 49402 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 7D035B7C06 for ; Tue, 6 Apr 2010 03:46:47 +1000 (EST) Received: from localhost ([127.0.0.1]:51992 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1Nyq1U-0007PS-S0 for incoming@patchwork.ozlabs.org; Mon, 05 Apr 2010 13:23:40 -0400 Received: from mailman by lists.gnu.org with tmda-scanned (Exim 4.43) id 1NyptZ-0005rx-TA for qemu-devel@nongnu.org; Mon, 05 Apr 2010 13:15:30 -0400 Received: from [140.186.70.92] (port=41554 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1NyptR-0005nI-LR for qemu-devel@nongnu.org; Mon, 05 Apr 2010 13:15:25 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.69) (envelope-from ) id 1NyptN-0007RR-8o for qemu-devel@nongnu.org; Mon, 05 Apr 2010 13:15:21 -0400 Received: from are.twiddle.net ([75.149.56.221]:42061) by eggs.gnu.org with esmtp (Exim 4.69) (envelope-from ) id 1NyptM-0007QQ-6s for qemu-devel@nongnu.org; Mon, 05 Apr 2010 13:15:17 -0400 Received: by are.twiddle.net (Postfix, from userid 5000) id 472E8DE3; Mon, 5 Apr 2010 10:15:12 -0700 (PDT) Message-Id: <2251fe09ac6615677e74e926f76d15ad4c8a7938.1270486025.git.rth@twiddle.net> In-Reply-To: References: From: Richard Henderson Date: Sun, 4 Apr 2010 19:13:56 -0700 To: qemu-devel@nongnu.org X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 2) Subject: [Qemu-devel] [PATCH 14/14] linux-user: Load a VDSO for x86-64. X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Signed-off-by: Richard Henderson --- linux-user/elfload.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++-- 1 files changed, 193 insertions(+), 8 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 100efdc..7b854e2 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -183,6 +183,8 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs, const CPUState *env) (*regs)[26] = env->segs[R_GS].selector & 0xffff; } +#define VDSO_BASENAME "vdso-linux-x64.so" + #else #define ELF_START_MMAP 0x80000000 @@ -801,6 +803,10 @@ static inline void init_thread(struct target_pt_regs *regs, #define ELF_HWCAP 0 #endif +#ifndef VDSO_BASENAME +#define VDSO_BASENAME NULL +#endif + #ifdef TARGET_ABI32 #undef ELF_CLASS #define ELF_CLASS ELFCLASS32 @@ -1077,7 +1083,8 @@ static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot) static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, struct elfhdr *exec, struct image_info *info, - struct image_info *interp_info) + struct image_info *interp_info, + struct image_info *vdso_info) { abi_ulong sp; int size; @@ -1100,16 +1107,21 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, */ sp = sp &~ (abi_ulong)15; size = (DLINFO_ITEMS + 1) * 2; - if (k_platform) + if (k_platform) { size += 2; + } + if (vdso_info) { + size += 4; + } #ifdef DLINFO_ARCH_ITEMS size += DLINFO_ARCH_ITEMS * 2; #endif size += envc + argc + 2; size += 1; /* argc itself */ size *= n; - if (size & 15) + if (size & 15) { sp -= 16 - (size & 15); + } /* This is correct because Linux defines * elf_addr_t as Elf32_Off / Elf64_Off @@ -1126,7 +1138,9 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_PHENT, (abi_ulong)(sizeof (struct elf_phdr))); NEW_AUX_ENT(AT_PHNUM, (abi_ulong)(exec->e_phnum)); NEW_AUX_ENT(AT_PAGESZ, (abi_ulong)(TARGET_PAGE_SIZE)); - NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info->load_addr)); + if (interp_info) { + NEW_AUX_ENT(AT_BASE, (abi_ulong)(interp_info->load_addr)); + } NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0); NEW_AUX_ENT(AT_ENTRY, info->entry); NEW_AUX_ENT(AT_UID, (abi_ulong) getuid()); @@ -1135,8 +1149,13 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc, NEW_AUX_ENT(AT_EGID, (abi_ulong) getegid()); NEW_AUX_ENT(AT_HWCAP, (abi_ulong) ELF_HWCAP); NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK)); - if (k_platform) + if (k_platform) { NEW_AUX_ENT(AT_PLATFORM, u_platform); + } + if (vdso_info) { + NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry); + NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr); + } #ifdef ARCH_DLINFO /* * ARCH_DLINFO must come last so platform specific code can enforce @@ -1378,6 +1397,165 @@ static void load_elf_interp(const char *filename, struct image_info *info, exit(-1); } +static void load_elf_vdso(const char *basename, struct image_info *info, + char bprm_buf[BPRM_BUF_SIZE]) +{ + const char *errmsg; + char *filename; + int fd, retval, i; + abi_ulong load_bias; + + /* ??? What we really need access to is qemu_find_file, but that is + only built for system targets at the moment. */ + filename = alloca(strlen(CONFIG_QEMU_SHAREDIR "/") + strlen(basename) + 1); + stpcpy(stpcpy(filename, CONFIG_QEMU_SHAREDIR "/"), basename); + + fd = open(filename, O_RDONLY); + if (fd < 0) { + goto exit_perror; + } + + retval = read(fd, bprm_buf, BPRM_BUF_SIZE); + if (retval < 0) { + goto exit_perror; + } + if (retval < BPRM_BUF_SIZE) { + memset(bprm_buf + retval, 0, BPRM_BUF_SIZE - retval); + } + + load_elf_image(basename, fd, info, NULL, bprm_buf); + load_bias = info->load_bias; + + /* We most likely need to relocate the VDSO image. The one built into + the kernel is built for a fixed address. The one built for QEMU is + not, since that requires close control of the guest address space. */ + if (load_bias) { + struct elfhdr *ehdr = (struct elfhdr *)bprm_buf; + struct elf_phdr *phdr; + abi_ulong dynamic_addr = -1; + abi_ulong dynsym_addr = -1; + + /* ??? Assume QEMU's VDSO is built "properly", which arranges + for the PHDRs, and all the sections manipulated below, to + be included with a writable load segment. */ + + /* ??? One might think that we'd need to relocate ehdr.e_entry, + but for some reason glibc does that one itself, though that + is also available via the AT_SYSINFO entry. */ + + /* Relocate the program headers. */ + phdr = (struct elf_phdr *)g2h(info->load_addr + ehdr->e_phoff); + bswap_phdr(phdr, ehdr->e_phnum); + for (i = 0; i < ehdr->e_phnum; ++i) { + phdr[i].p_vaddr += load_bias; + phdr[i].p_paddr += load_bias; + if (phdr[i].p_type == PT_DYNAMIC) { + dynamic_addr = phdr[i].p_vaddr; + } + } + bswap_phdr(phdr, ehdr->e_phnum); + + /* Relocate the DYNAMIC entries. */ + if (dynamic_addr != -1) { + abi_ulong tag, val, *dyn = (abi_ulong *)g2h(dynamic_addr); + do { + tag = tswapl(dyn[0]); + val = tswapl(dyn[1]); + switch (tag) { + case DT_SYMTAB: + dynsym_addr = load_bias + val; + dyn[1] = tswapl(dynsym_addr); + break; + case DT_SYMENT: + if (val != sizeof(struct elf_sym)) { + errmsg = "VDSO has an unexpected dynamic symbol size"; + goto exit_errmsg; + } + break; + + case DT_HASH: + case DT_STRTAB: + case DT_VERDEF: + case DT_VERSYM: + case DT_ADDRRNGLO ... DT_ADDRRNGHI: + /* These entries store an address in the entry. */ + dyn[1] = tswapl(load_bias + val); + break; + + case DT_NULL: + case DT_STRSZ: + case DT_SONAME: + case DT_DEBUG: + case DT_FLAGS: + case DT_FLAGS_1: + case DT_VERDEFNUM: + case DT_VALRNGLO ... DT_VALRNGHI: + /* These entries store an integer in the entry. */ + break; + + case DT_REL: + case DT_RELA: + /* These entries indicate that the VDSO was built + incorrectly. It should not have real relocations. */ + errmsg = "VDSO has relocations"; + goto exit_errmsg; + case DT_NEEDED: + case DT_VERNEED: + errmsg = "VDSO has external dependancies"; + goto exit_errmsg; + + default: + /* This is probably something target specific. */ + errmsg = "VDSO has unknown DYNAMIC entry"; + goto exit_errmsg; + } + dyn += 2; + } while (tag != DT_NULL); + } + + /* Relocate the dynamic symbol table. */ + if (dynsym_addr != -1) { + struct elf_shdr *shdr; + struct elf_sym *sym; + int dynsym_size = 0; + + /* Read the section headers to find out the size of the + dynamic symbol table. */ + shdr = (struct elf_shdr *)g2h(info->load_addr + ehdr->e_shoff); + for (i = 0; i < ehdr->e_shnum; ++i) { + abi_ulong addr = tswapl(shdr[i].sh_addr) + load_bias; + if (addr == dynsym_addr) { + dynsym_size = tswapl(shdr[i].sh_size); + break; + } + } + + sym = (struct elf_sym *)g2h(dynsym_addr); + for (i = 0; i < dynsym_size / sizeof(*sym); ++i) { + sym[i].st_value = tswapl(tswapl(sym[i].st_value) + load_bias); + } + } + } + + /* Mark the VDSO writable segment read-only. */ + /* ??? This assumes that the VDSO implementation doesn't actually + have any truely writable data. Perhaps we should instead use + the PT_GNU_RELRO header to indicate that we really want this. */ + retval = target_mprotect(info->start_data, info->brk - info->start_data, + PROT_READ); + if (retval < 0) { + goto exit_perror; + } + return; + + exit_perror: + errmsg = strerror(errno); + exit_errmsg: + fprintf(stderr, "%s: %s\n", filename, errmsg); + exit(-1); +} + + static int symfind(const void *s0, const void *s1) { struct elf_sym *key = (struct elf_sym *)s0; @@ -1512,7 +1690,7 @@ static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias) int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, struct image_info * info) { - struct image_info interp_info; + struct image_info interp_info, vdso_info; struct elfhdr elf_ex; char *elf_interpreter = NULL; @@ -1559,8 +1737,15 @@ int load_elf_binary(struct linux_binprm * bprm, struct target_pt_regs * regs, } } - bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex, - info, (elf_interpreter ? &interp_info : NULL)); + /* If we've been given a VDSO to load, do so. */ + if (VDSO_BASENAME) { + load_elf_vdso(VDSO_BASENAME, &vdso_info, bprm->buf); + } + + bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, + &elf_ex, info, + (elf_interpreter ? &interp_info : NULL), + (VDSO_BASENAME ? &vdso_info : NULL)); info->start_stack = bprm->p; /* If we have an interpreter, set that as the program's entry point.