From patchwork Tue Oct 8 00:41:27 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marcelo Tosatti X-Patchwork-Id: 281296 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id 470562C0089 for ; Tue, 8 Oct 2013 11:50:10 +1100 (EST) Received: from localhost ([::1]:34103 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VTLTU-0004lt-S3 for incoming@patchwork.ozlabs.org; Mon, 07 Oct 2013 20:48:32 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:52322) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VTLT3-0004iC-6v for qemu-devel@nongnu.org; Mon, 07 Oct 2013 20:48:11 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1VTLSt-0005EY-Md for qemu-devel@nongnu.org; Mon, 07 Oct 2013 20:48:05 -0400 Received: from mx1.redhat.com ([209.132.183.28]:9730) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VTLSt-0005ED-EY for qemu-devel@nongnu.org; Mon, 07 Oct 2013 20:47:55 -0400 Received: from int-mx02.intmail.prod.int.phx2.redhat.com (int-mx02.intmail.prod.int.phx2.redhat.com [10.5.11.12]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id r980lsmn025871 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK) for ; Mon, 7 Oct 2013 20:47:54 -0400 Received: from amt.cnet (vpn1-5-50.gru2.redhat.com [10.97.5.50]) by int-mx02.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id r980lrFK028274; Mon, 7 Oct 2013 20:47:53 -0400 Received: from amt.cnet (localhost [127.0.0.1]) by amt.cnet (Postfix) with ESMTP id 756351043D2; Mon, 7 Oct 2013 21:43:27 -0300 (BRT) Received: (from marcelo@localhost) by amt.cnet (8.14.6/8.14.6/Submit) id r980hQhu005758; Mon, 7 Oct 2013 21:43:26 -0300 Message-Id: <20131008004224.509422315@amt.cnet> User-Agent: quilt/0.60-1 Date: Mon, 07 Oct 2013 21:41:27 -0300 From: Marcelo Tosatti To: qemu-devel@nongnu.org References: <20131008004126.773017235@amt.cnet> Content-Disposition: inline; filename=map-populate-failure X-Scanned-By: MIMEDefang 2.67 on 10.5.11.12 X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x X-Received-From: 209.132.183.28 Cc: Marcelo Tosatti Subject: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org MAP_POPULATE mmap flag does not cause mmap to fail if allocation of the entire area is not performed. HugeTLBfs performs reservation of pages on a global basis: any further restriction to the reserved memory such as cpusets placement or numa node policy is performed at fault time only. Manually fault in pages at allocation time. This allows memory restrictions to be applied before guest initialization. Signed-off-by: Marcelo Tosatti Index: qemu/exec.c =================================================================== --- qemu.orig/exec.c +++ qemu/exec.c @@ -918,6 +918,13 @@ static long gethugepagesize(const char * return fs.f_bsize; } +sigjmp_buf sigjump; + +static void sigbus_handler(int signal) +{ + siglongjmp(sigjump, 1); +} + static void *file_ram_alloc(RAMBlock *block, ram_addr_t memory, const char *path) @@ -927,9 +934,6 @@ static void *file_ram_alloc(RAMBlock *bl char *c; void *area; int fd; -#ifdef MAP_POPULATE - int flags; -#endif unsigned long hpagesize; hpagesize = gethugepagesize(path); @@ -977,21 +981,57 @@ static void *file_ram_alloc(RAMBlock *bl if (ftruncate(fd, memory)) perror("ftruncate"); -#ifdef MAP_POPULATE - /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case - * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED - * to sidestep this quirk. - */ - flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE; - area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0); -#else area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); -#endif if (area == MAP_FAILED) { perror("file_ram_alloc: can't mmap RAM pages"); close(fd); return (NULL); } + + if (mem_prealloc) { + int ret, i; + struct sigaction act, oldact; + sigset_t set, oldset; + + memset(&act, 0, sizeof(act)); + act.sa_handler = &sigbus_handler; + act.sa_flags = 0; + + ret = sigaction(SIGBUS, &act, &oldact); + if (ret) { + perror("file_ram_alloc: fail to install signal handler"); + exit(1); + } + + /* unblock SIGBUS */ + pthread_sigmask(SIG_BLOCK, NULL, &oldset); + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); + + if (sigsetjmp(sigjump, 1)) { + fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n"); + exit(1); + } + + /* MAP_POPULATE silently ignores failures */ + for (i = 0; i < (memory/hpagesize)-1; i++) { + memset(area + (hpagesize*i), 0, 1); + } + + ret = sigaction(SIGBUS, &oldact, NULL); + if (ret) { + perror("file_ram_alloc: fail to reinstall signal handler"); + exit(1); + } + + if (sigismember(&oldset, SIGBUS)) { + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_BLOCK, &set, NULL); + } + } + block->fd = fd; return area; } Index: qemu/vl.c =================================================================== --- qemu.orig/vl.c +++ qemu/vl.c @@ -188,9 +188,7 @@ static int display_remote; const char* keyboard_layout = NULL; ram_addr_t ram_size; const char *mem_path = NULL; -#ifdef MAP_POPULATE int mem_prealloc = 0; /* force preallocation of physical target memory */ -#endif int nb_nics; NICInfo nd_table[MAX_NICS]; int autostart; @@ -3205,11 +3203,9 @@ int main(int argc, char **argv, char **e case QEMU_OPTION_mempath: mem_path = optarg; break; -#ifdef MAP_POPULATE case QEMU_OPTION_mem_prealloc: mem_prealloc = 1; break; -#endif case QEMU_OPTION_d: log_mask = optarg; break; Index: qemu/qemu-options.def =================================================================== --- qemu.orig/qemu-options.def +++ qemu/qemu-options.def @@ -66,11 +66,9 @@ stringify(DEFAULT_RAM_SIZE) "]\n", QEMU_ DEF("mem-path", HAS_ARG, QEMU_OPTION_mempath, "-mem-path FILE provide backing storage for guest RAM\n", QEMU_ARCH_ALL) -#ifdef MAP_POPULATE DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc, "-mem-prealloc preallocate guest memory (use with -mem-path)\n", QEMU_ARCH_ALL) -#endif DEF("k", HAS_ARG, QEMU_OPTION_k, "-k language use keyboard layout (for example 'fr' for French)\n", Index: git/qemu/qemu-options.hx =================================================================== --- qemu.orig/qemu-options.hx +++ qemu/qemu-options.hx @@ -228,7 +228,6 @@ STEXI Allocate guest RAM from a temporarily created file in @var{path}. ETEXI -#ifdef MAP_POPULATE DEF("mem-prealloc", 0, QEMU_OPTION_mem_prealloc, "-mem-prealloc preallocate guest memory (use with -mem-path)\n", QEMU_ARCH_ALL) @@ -237,7 +236,6 @@ STEXI @findex -mem-prealloc Preallocate memory when using -mem-path. ETEXI -#endif DEF("k", HAS_ARG, QEMU_OPTION_k, "-k language use keyboard layout (for example 'fr' for French)\n",