From patchwork Mon Nov 1 15:01:33 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alexander Graf X-Patchwork-Id: 69810 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [199.232.76.165]) (using TLSv1 with cipher DHE-RSA-AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id A9A47B70DA for ; Tue, 2 Nov 2010 03:20:36 +1100 (EST) Received: from localhost ([127.0.0.1]:46008 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PCwT6-00071k-DF for incoming@patchwork.ozlabs.org; Mon, 01 Nov 2010 11:38:44 -0400 Received: from [140.186.70.92] (port=52864 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PCvtz-0003gD-UJ for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:35 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PCvtW-0000fl-Dg for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:02:03 -0400 Received: from cantor2.suse.de ([195.135.220.15]:39333 helo=mx2.suse.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PCvtV-0000eb-UY for qemu-devel@nongnu.org; Mon, 01 Nov 2010 11:01:58 -0400 Received: from relay2.suse.de (charybdis-ext.suse.de [195.135.221.2]) by mx2.suse.de (Postfix) with ESMTP id 497168891E; Mon, 1 Nov 2010 16:01:54 +0100 (CET) From: Alexander Graf To: qemu-devel Developers Date: Mon, 1 Nov 2010 16:01:33 +0100 Message-Id: <1288623713-28062-21-git-send-email-agraf@suse.de> X-Mailer: git-send-email 1.6.0.2 In-Reply-To: <1288623713-28062-1-git-send-email-agraf@suse.de> References: <1288623713-28062-1-git-send-email-agraf@suse.de> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.4-2.6 Cc: Gerd Hoffmann Subject: [Qemu-devel] [PATCH 20/40] xenner: kernel: mmu support for 32-bit PAE X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.5 Precedence: list List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org This patch adds support for memory management on 32 bit systems with PAE. Signed-off-by: Alexander Graf --- pc-bios/xenner/xenner-mmpae.c | 444 +++++++++++++++++++++++++++++++++++++++++ 1 files changed, 444 insertions(+), 0 deletions(-) create mode 100644 pc-bios/xenner/xenner-mmpae.c diff --git a/pc-bios/xenner/xenner-mmpae.c b/pc-bios/xenner/xenner-mmpae.c new file mode 100644 index 0000000..7c11732 --- /dev/null +++ b/pc-bios/xenner/xenner-mmpae.c @@ -0,0 +1,444 @@ +/* + * Copyright (C) Red Hat 2007 + * Copyright (C) Novell Inc. 2010 + * + * Author(s): Gerd Hoffmann + * Alexander Graf + * + * Xenner memory management for 32 bit pae mode + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + */ + +#include +#include + +#include "xenner.h" +#include "xenner-mm.c" + +/* --------------------------------------------------------------------- */ + +#define MAPS_R_BITS 4 +#define MAPS_R_COUNT (1 << MAPS_R_BITS) +#define MAPS_R_MASK (MAPS_R_COUNT - 1) +#define MAPS_R_SIZE (MAPS_MAX / MAPS_R_COUNT) +#define MAPS_R_LOW(r) (MAPS_R_SIZE * (r)) +#define MAPS_R_HIGH(r) (MAPS_R_SIZE * (r) + MAPS_R_SIZE) +static int maps_next[MAPS_R_COUNT]; + +static spinlock_t maplock = SPIN_LOCK_UNLOCKED; + +/* --------------------------------------------------------------------- */ + +uintptr_t emu_pa(uintptr_t va) +{ + switch(va & 0xff800000) { + case XEN_TXT: + return va - (uintptr_t)_vstart; + case XEN_IPT: + { + uintptr_t mfn_guest = emudev_get(EMUDEV_CONF_GUEST_START_PFN, 0); + uintptr_t init_pt_pfn = emudev_get(EMUDEV_CONF_PFN_INIT_PT, 0); + + return frame_to_addr(mfn_guest + init_pt_pfn) | (va - XEN_IPT); + } + case XEN_M2P: + return va - XEN_M2P + frame_to_addr(vmconf.mfn_m2p); + } + + panic("unknown address", NULL); + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int find_slot(int range) +{ + int low = MAPS_R_LOW(range); + int high = MAPS_R_HIGH(range); + int *next = maps_next + range; + int start = *next; + int slot; + + while (maps_refcnt[*next]) { + (*next)++; + if (*next == high) { + *next = low; + } + if (*next == start) { + return -1; + } + } + slot = *next; + (*next)++; + if (*next == high) { + *next = low; + } + return slot; +} + +static int mfn_to_slot_pae(uint32_t mfn, int range) +{ + int low = MAPS_R_LOW(range); + int high = MAPS_R_HIGH(range); + int slot; + + for (slot = low; slot < high; slot++) { + if (!test_pgflag_pae(maps_pae[slot], _PAGE_PRESENT)) { + continue; + } + if (get_pgframe_pae(maps_pae[slot]) == mfn) { + /* cache hit */ + return slot; + } + } + return -1; +} + +void *map_page(unsigned long maddr) +{ + uint32_t mfn = addr_to_frame(maddr); + uint32_t off = addr_offset(maddr); + uint32_t va; + int range, slot; + + spin_lock(&maplock); + range = mfn & MAPS_R_MASK; + slot = mfn_to_slot_pae(mfn, range); + if (slot == -1) { + slot = find_slot(range); + if (slot == -1) { + panic("out of map slots", NULL); + } + printk(3, "%s: mfn %5x range %d [%3d - %3d], slot %3d\n", __FUNCTION__, + mfn, range, MAPS_R_LOW(range), MAPS_R_HIGH(range), slot); + maps_pae[slot] = get_pgentry_pae(mfn, EMU_PGFLAGS); + vminfo.faults[XEN_FAULT_MAPS_MAPIT]++; + va = XEN_MAP_PAE + slot*PAGE_SIZE; + flush_tlb_addr(va); + } else { + vminfo.faults[XEN_FAULT_MAPS_REUSE]++; + va = XEN_MAP_PAE + slot*PAGE_SIZE; + } + spin_unlock(&maplock); + + maps_refcnt[slot]++; + return (void*)va + off; +} + +void free_page(void *ptr) +{ + uintptr_t va = (uintptr_t)ptr; + uintptr_t base = XEN_MAP_PAE; + int slot = (va - base) >> PAGE_SHIFT; + + spin_lock(&maplock); + maps_refcnt[slot]--; + spin_unlock(&maplock); +} + +void *fixmap_page(struct xen_cpu *cpu, unsigned long maddr) +{ + static int fixmap_slot = MAPS_MAX; + uint32_t mfn = addr_to_frame(maddr); + uint32_t off = addr_offset(maddr); + uint32_t va; + int slot; + + slot = fixmap_slot++; + printk(2, "%s: mfn %5x slot %3d\n", __FUNCTION__, mfn, slot); + maps_pae[slot] = get_pgentry_pae(mfn, EMU_PGFLAGS); + va = XEN_MAP_PAE + slot*PAGE_SIZE; + return (void*)va + off; +} + +/* --------------------------------------------------------------------- */ + +pte_t *find_pte_lpt(uint32_t va) +{ + uint64_t *lpt_base = (void*)XEN_LPT_PAE; + uint32_t offset = va >> PAGE_SHIFT; + + return lpt_base + offset; +} + +pte_t *find_pte_map(struct xen_cpu *cpu, uint32_t va) +{ + uint64_t *pgd; + uint64_t *pmd; + uint64_t *pte; + int g,m,t; + + g = PGD_INDEX_PAE(va); + m = PMD_INDEX_PAE(va); + t = PTE_INDEX_PAE(va); + + pgd = map_page(frame_to_addr(read_cr3_mfn(cpu))); + if (!test_pgflag_pae(pgd[g], _PAGE_PRESENT)) { + goto err1; + } + + pmd = map_page(frame_to_addr(get_pgframe_pae(pgd[g]))); + if (!test_pgflag_pae(pmd[m], _PAGE_PRESENT)) { + goto err2; + } + + pte = map_page(frame_to_addr(get_pgframe_pae(pmd[m]))); + free_page(pgd); + free_page(pmd); + return pte+t; + +err2: + free_page(pmd); +err1: + free_page(pgd); + return NULL; +} + +static char *print_pgflags(uint32_t flags) +{ + static char buf[80]; + + snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s\n", + flags & _PAGE_GLOBAL ? " global" : "", + flags & _PAGE_PSE ? " pse" : "", + flags & _PAGE_DIRTY ? " dirty" : "", + flags & _PAGE_ACCESSED ? " accessed" : "", + flags & _PAGE_PCD ? " pcd" : "", + flags & _PAGE_PWT ? " pwt" : "", + flags & _PAGE_USER ? " user" : "", + flags & _PAGE_RW ? " write" : "", + flags & _PAGE_PRESENT ? " present" : ""); + return buf; +} + +void pgtable_walk(struct xen_cpu *cpu, uint32_t va) +{ + uint64_t *pgd = NULL; + uint64_t *pmd = NULL; + uint64_t *pte = NULL; + uint64_t mfn; + uint32_t g,m,t, flags; + + g = PGD_INDEX_PAE(va); + m = PMD_INDEX_PAE(va); + t = PTE_INDEX_PAE(va); + printk(5, "va %" PRIx32 " | pae %d -> %d -> %d\n", va, g, m, t); + + pgd = map_page(frame_to_addr(read_cr3_mfn(cpu))); + mfn = get_pgframe_64(pgd[g]); + flags = get_pgflags_64(pgd[g]); + printk(5, " pgd +%3d : %08" PRIx64 " | mfn %4" PRIx64 " | %s", + g, pgd[g], mfn, print_pgflags(flags)); + if (!test_pgflag_pae(pgd[g], _PAGE_PRESENT)) { + goto cleanup; + } + + pmd = map_page(frame_to_addr(get_pgframe_pae(pgd[g]))); + mfn = get_pgframe_64(pmd[m]); + flags = get_pgflags_64(pmd[m]); + printk(5, " pmd +%3d : %08" PRIx64 " | mfn %4" PRIx64 " | %s", + m, pmd[m], mfn, print_pgflags(flags)); + if (!test_pgflag_pae(pmd[m], _PAGE_PRESENT)) { + goto cleanup; + } + if (test_pgflag_pae(pmd[m], _PAGE_PSE)) { + goto cleanup; + } + + pte = map_page(frame_to_addr(get_pgframe_pae(pmd[m]))); + mfn = get_pgframe_64(pte[t]); + flags = get_pgflags_64(pte[t]); + printk(5, " pte +%3d : %08" PRIx64 " | mfn %4" PRIx64 " | %s", + t, pte[t], mfn, print_pgflags(flags)); + +cleanup: + if (pgd) { + free_page(pgd); + } + if (pmd) { + free_page(pmd); + } + if (pte) { + free_page(pte); + } +} + +/* --------------------------------------------------------------------- */ + +static inline pte_t *find_pgd(unsigned long va, uint64_t mfn, int alloc) +{ + pte_t *pgd = map_page(frame_to_addr(mfn)); + pte_t *pmd; + + pgd += PGD_INDEX_PAE(va); + + if (!test_pgflag(*pgd, _PAGE_PRESENT)) { + pmd = get_pages(1, "pmd"); + *pgd = get_pgentry(EMU_MFN(pmd), _PAGE_PRESENT); + } + + return pgd; +} + +static inline pte_t *find_pmd(unsigned long va, uint64_t mfn, int alloc) +{ + pte_t *pmd = map_page(frame_to_addr(mfn)); + pte_t *pte; + + pmd += PMD_INDEX_PAE(va); + if (!test_pgflag(*pmd, _PAGE_PRESENT)) { + pte = get_pages(1, "pte"); + *pmd = get_pgentry(EMU_MFN(pte), ALL_PGFLAGS | _PAGE_RW); + } + + return pmd; +} + +static inline pte_t *find_pte(unsigned long va, uint64_t mfn) +{ + pte_t *pte = map_page(frame_to_addr(mfn)); + return pte + PTE_INDEX_PAE(va); +} + +static void map_one_page(struct xen_cpu *cpu, unsigned long va, uint64_t maddr, + int flags) +{ + uint64_t mfn = addr_to_frame(maddr); + pte_t *pgd; + pte_t *pmd; + pte_t *pte; + + pgd = find_pgd(va, read_cr3_mfn(cpu), 1); + pmd = find_pmd(va, get_pgframe(*pgd), 1); + if (test_pgflag(*pmd, _PAGE_PSE)) { + *pmd = 0; + pmd = find_pmd(va, get_pgframe(*pgd), 1); + } + pte = find_pte(va, get_pgframe(*pmd)); + *pte = get_pgentry(mfn, flags); + + free_page(pte); + free_page(pmd); + free_page(pgd); +} + +void map_region(struct xen_cpu *cpu, uint64_t va, uint32_t flags, + uint64_t start, uint64_t count) +{ + uint64_t maddr = frame_to_addr(start); + uint64_t maddr_end = maddr + frame_to_addr(count); + + for (; maddr < maddr_end; maddr += PAGE_SIZE, va += PAGE_SIZE) { + map_one_page(cpu, va, maddr, flags); + } +} + +/* --------------------------------------------------------------------- */ + +void update_emu_mappings(uint32_t cr3_mfn) +{ + uint64_t *new_pgd, *new_pmd3; + uint64_t entry; + uint32_t mfn; + int idx, i; + + new_pgd = map_page(frame_to_addr(cr3_mfn)); + + /* maybe alloc a pmd page */ + switch_heap(HEAP_HIGH); + new_pmd3 = find_pgd(0xffffffff, cr3_mfn, 1); + free_page(new_pmd3); + switch_heap(HEAP_EMU); + /* map the pmd page */ + new_pmd3 = map_page(frame_to_addr(get_pgframe_pae(new_pgd[3]))); + + /* xenner mapping */ + idx = PMD_INDEX_PAE(XEN_TXT_PAE); + for (mfn = vmconf.mfn_emu; + mfn < vmconf.mfn_emu + vmconf.pg_emu; + mfn += PMD_COUNT_PAE, idx++) { + new_pmd3[idx] = emu_pmd_pae[idx]; + } + + idx = PMD_INDEX_PAE(XEN_M2P_PAE); + if (!test_pgflag_pae(new_pmd3[idx], _PAGE_PRESENT)) { + /* new one, must init static mappings */ + for (; idx < PMD_COUNT_PAE; idx++) { + if (!test_pgflag_pae(emu_pmd_pae[idx], _PAGE_PRESENT)) { + continue; + } + + if ((idx >= PMD_INDEX_PAE(XEN_LPT_PAE)) && + (idx < (PMD_INDEX_PAE(XEN_LPT_PAE) + 4))) { + continue; + } + new_pmd3[idx] = emu_pmd_pae[idx]; + } + } + + /* linear pgtable mappings */ + idx = PMD_INDEX_PAE(XEN_LPT_PAE); + for (i = 0; i < 4; i++) { + if (test_pgflag_pae(new_pgd[i], _PAGE_PRESENT)) { + mfn = get_pgframe_pae(new_pgd[i]); + entry = get_pgentry_pae(mfn, LPT_PGFLAGS); + } else { + entry = 0; + } + if (new_pmd3[idx+i] != entry) { + new_pmd3[idx+i] = entry; + } + } + + /* mapping slots */ + idx = PMD_INDEX_PAE(XEN_MAP_PAE); + new_pmd3[idx] = emu_pmd_pae[idx]; + + free_page(new_pgd); + free_page(new_pmd3); +} + +/* --------------------------------------------------------------------- */ + +void paging_init(struct xen_cpu *cpu) +{ + uintptr_t mfn_guest = emudev_get(EMUDEV_CONF_GUEST_START_PFN, 0); + uintptr_t init_pt_pfn = emudev_get(EMUDEV_CONF_PFN_INIT_PT, 0); + uint32_t mfn; + int idx; + + idx = PMD_INDEX_PAE(XEN_TXT_PAE); + for (mfn = vmconf.mfn_emu; + mfn < vmconf.mfn_emu + vmconf.pg_emu; + mfn += PMD_COUNT_PAE, idx++) { + emu_pmd_pae[idx] = get_pgentry_pae(mfn, EMU_PGFLAGS | _PAGE_PSE); + } + + idx = PMD_INDEX_PAE(XEN_M2P_PAE); + for (mfn = vmconf.mfn_m2p; + mfn < vmconf.mfn_m2p + vmconf.pg_m2p; + mfn += PMD_COUNT_PAE, idx++) { + emu_pmd_pae[idx] = get_pgentry_pae(mfn, EMU_PGFLAGS | _PAGE_PSE); + } + + idx = PMD_INDEX_PAE(XEN_MAP_PAE); + emu_pmd_pae[idx] = get_pgentry_pae(EMU_MFN(maps_pae), PGT_PGFLAGS_32); + + idx = PMD_INDEX_PAE(XEN_IPT); + emu_pmd_pae[idx] = get_pgentry(mfn_guest + init_pt_pfn, + EMU_PGFLAGS | _PAGE_PSE); + + m2p = (void*)XEN_M2P_PAE; +} +