From patchwork Fri Aug 14 14:52:06 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Xiao Guangrong X-Patchwork-Id: 507433 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [IPv6:2001:4830:134:3::11]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 474DF1401DE for ; Sat, 15 Aug 2015 01:02:06 +1000 (AEST) Received: from localhost ([::1]:46680 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZQGUe-0002Qt-G6 for incoming@patchwork.ozlabs.org; Fri, 14 Aug 2015 11:02:04 -0400 Received: from eggs.gnu.org ([2001:4830:134:3::10]:51500) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZQGRc-0005OI-Mo for qemu-devel@nongnu.org; Fri, 14 Aug 2015 10:58:58 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZQGRa-0005dp-Bm for qemu-devel@nongnu.org; Fri, 14 Aug 2015 10:58:56 -0400 Received: from mga02.intel.com ([134.134.136.20]:42327) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZQGRa-0005cx-3L for qemu-devel@nongnu.org; Fri, 14 Aug 2015 10:58:54 -0400 Received: from orsmga001.jf.intel.com ([10.7.209.18]) by orsmga101.jf.intel.com with ESMTP; 14 Aug 2015 07:58:53 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.15,678,1432623600"; d="scan'208";a="748633447" Received: from xiao.sh.intel.com ([10.239.159.86]) by orsmga001.jf.intel.com with ESMTP; 14 Aug 2015 07:58:35 -0700 From: Xiao Guangrong To: pbonzini@redhat.com, imammedo@redhat.com Date: Fri, 14 Aug 2015 22:52:06 +0800 Message-Id: <1439563931-12352-14-git-send-email-guangrong.xiao@linux.intel.com> X-Mailer: git-send-email 2.4.3 In-Reply-To: <1439563931-12352-1-git-send-email-guangrong.xiao@linux.intel.com> References: <1439563931-12352-1-git-send-email-guangrong.xiao@linux.intel.com> X-detected-operating-system: by eggs.gnu.org: Genre and OS details not recognized. X-Received-From: 134.134.136.20 Cc: Xiao Guangrong , ehabkost@redhat.com, kvm@vger.kernel.org, mst@redhat.com, gleb@kernel.org, mtosatti@redhat.com, qemu-devel@nongnu.org, stefanha@redhat.com, rth@twiddle.net Subject: [Qemu-devel] [PATCH v2 13/18] nvdimm: build namespace config data X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org If @configdata is false, Qemu will build a static and readonly namespace in memory and use it serveing for DSM GET_CONFIG_SIZE/GET_CONFIG_DATA requests Signed-off-by: Xiao Guangrong --- hw/mem/Makefile.objs | 3 +- hw/mem/nvdimm/acpi.c | 10 ++ hw/mem/nvdimm/internal.h | 12 ++ hw/mem/nvdimm/namespace.c | 307 +++++++++++++++++++++++++++++++++++++++++++++ include/hw/mem/pc-nvdimm.h | 2 + 5 files changed, 333 insertions(+), 1 deletion(-) create mode 100644 hw/mem/nvdimm/namespace.c diff --git a/hw/mem/Makefile.objs b/hw/mem/Makefile.objs index 7a6948d..7f3fab2 100644 --- a/hw/mem/Makefile.objs +++ b/hw/mem/Makefile.objs @@ -1,2 +1,3 @@ common-obj-$(CONFIG_MEM_HOTPLUG) += pc-dimm.o -common-obj-$(CONFIG_NVDIMM) += nvdimm/pc-nvdimm.o nvdimm/acpi.o +common-obj-$(CONFIG_NVDIMM) += nvdimm/pc-nvdimm.o nvdimm/acpi.o \ + nvdimm/namespace.o diff --git a/hw/mem/nvdimm/acpi.c b/hw/mem/nvdimm/acpi.c index 0b09efa..c773954 100644 --- a/hw/mem/nvdimm/acpi.c +++ b/hw/mem/nvdimm/acpi.c @@ -240,6 +240,8 @@ static void build_nfit_table(GSList *device_list, char *buf) for (; device_list; device_list = device_list->next) { PCNVDIMMDevice *nvdimm = device_list->data; + struct nfit_memdev *nfit_memdev; + struct nfit_dcr *nfit_dcr; int spa_index, dcr_index; spa_index = ++index; @@ -252,10 +254,15 @@ static void build_nfit_table(GSList *device_list, char *buf) * build Memory Device to System Physical Address Range Mapping * Table. */ + nfit_memdev = (struct nfit_memdev *)buf; buf += build_memdev_table(buf, nvdimm, spa_index, dcr_index); /* build Control Region Descriptor Table. */ + nfit_dcr = (struct nfit_dcr *)buf; buf += build_dcr_table(buf, nvdimm, dcr_index); + + calculate_nvdimm_isetcookie(nvdimm, nfit_memdev->region_spa_offset, + nfit_dcr->serial_number); } } @@ -382,6 +389,9 @@ void pc_nvdimm_build_nfit_table(GArray *table_offsets, GArray *table_data, build_header(linker, table_data, (void *)(table_data->data + nfit_start), "NFIT", table_data->len - nfit_start, 1); + + build_nvdimm_configdata(list); + exit: g_slist_free(list); } diff --git a/hw/mem/nvdimm/internal.h b/hw/mem/nvdimm/internal.h index 90d54dc..b1f3f16 100644 --- a/hw/mem/nvdimm/internal.h +++ b/hw/mem/nvdimm/internal.h @@ -13,6 +13,14 @@ #ifndef __NVDIMM_INTERNAL_H #define __NVDIMM_INTERNAL_H +/* #define NVDIMM_DEBUG */ + +#ifdef NVDIMM_DEBUG +#define nvdebug(fmt, ...) fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__) +#else +#define nvdebug(...) +#endif + #define PAGE_SIZE (1UL << 12) typedef struct { @@ -27,4 +35,8 @@ typedef struct { GSList *get_nvdimm_built_list(void); ram_addr_t reserved_range_push(uint64_t size); + +void calculate_nvdimm_isetcookie(PCNVDIMMDevice *nvdimm, uint64_t spa, + uint32_t sn); +void build_nvdimm_configdata(GSList *device_list); #endif diff --git a/hw/mem/nvdimm/namespace.c b/hw/mem/nvdimm/namespace.c new file mode 100644 index 0000000..04626da --- /dev/null +++ b/hw/mem/nvdimm/namespace.c @@ -0,0 +1,307 @@ +/* + * NVDIMM Namespace Support + * + * Copyright(C) 2015 Intel Corporation. + * + * Author: + * Xiao Guangrong + * + * NVDIMM namespace specification can be found at: + * http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see + */ + +#include "hw/mem/pc-nvdimm.h" + +#include "internal.h" + +static uint64_t fletcher64(void *addr, size_t len) +{ + uint32_t *buf = addr; + uint32_t lo32 = 0; + uint64_t hi32 = 0; + int i; + + for (i = 0; i < len / sizeof(uint32_t); i++) { + lo32 += cpu_to_le32(buf[i]); + hi32 += lo32; + } + + return hi32 << 32 | lo32; +} + +struct interleave_set_info { + struct interleave_set_info_map { + uint64_t region_spa_offset; + uint32_t serial_number; + uint32_t zero; + } mapping[1]; +}; + +void calculate_nvdimm_isetcookie(PCNVDIMMDevice *nvdimm, uint64_t spa, + uint32_t sn) +{ + struct interleave_set_info info; + + info.mapping[0].region_spa_offset = spa; + info.mapping[0].serial_number = sn; + info.mapping[0].zero = 0; + + nvdimm->isetcookie = fletcher64(&info, sizeof(info)); +} + +#define NSINDEX_SIGNATURE "NAMESPACE_INDEX\0" + +enum { + NSINDEX_SIG_LEN = 16, + NSINDEX_ALIGN = 256, + NSINDEX_SEQ_MASK = 0x3, + NSINDEX_MAJOR = 0x1, + NSINDEX_MINOR = 0x1, + + NSLABEL_UUID_LEN = 16, + NSLABEL_NAME_LEN = 64, + NSLABEL_FLAG_ROLABEL = 0x1, /* read-only label */ + NSLABEL_FLAG_LOCAL = 0x2, /* DIMM-local namespace */ + NSLABEL_FLAG_BTT = 0x4, /* namespace contains a BTT */ + NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */ +}; + +/* + * struct nd_namespace_index - label set superblock + * @sig: NAMESPACE_INDEX\0 + * @flags: placeholder + * @seq: sequence number for this index + * @myoff: offset of this index in label area + * @mysize: size of this index struct + * @otheroff: offset of other index + * @labeloff: offset of first label slot + * @nslot: total number of label slots + * @major: label area major version + * @minor: label area minor version + * @checksum: fletcher64 of all fields + * @free[0]: bitmap, nlabel bits + * + * The size of free[] is rounded up so the total struct size is a + * multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond + * nlabel bits must be zero. + */ +struct namespace_label_index_block { + uint8_t sig[NSINDEX_SIG_LEN]; + uint32_t flags; + uint32_t seq; + uint64_t myoff; + uint64_t mysize; + uint64_t otheroff; + uint64_t labeloff; + uint32_t nlabel; + uint16_t major; + uint16_t minor; + uint64_t checksum; + uint8_t free[0]; +} QEMU_PACKED; + +/* + * struct nd_namespace_label - namespace superblock + * @uuid: UUID per RFC 4122 + * @name: optional name (NULL-terminated) + * @flags: see NSLABEL_FLAG_* + * @nlabel: num labels to describe this ns + * @position: labels position in set + * @isetcookie: interleave set cookie + * @lbasize: LBA size in bytes or 0 for pmem + * @dpa: DPA of NVM range on this DIMM + * @rawsize: size of namespace + * @slot: slot of this label in label area + * @unused: must be zero + */ +struct namespace_label { + uint8_t uuid[NSLABEL_UUID_LEN]; + uint8_t name[NSLABEL_NAME_LEN]; + uint32_t flags; + uint16_t nlabel; + uint16_t position; + uint64_t isetcookie; + uint64_t lbasize; + uint64_t dpa; + uint64_t rawsize; + uint32_t slot; + uint32_t unused; +} QEMU_PACKED; + +/*calculate the number of label can be contained in whole config space. */ +static int config_space_max_label_nr(PCNVDIMMDevice *nvdimm, size_t block_size) +{ + /* totally we have 2 namespace label index block. */ + if (block_size * 2 >= nvdimm->config_data_size) { + return 0; + } + + return (nvdimm->config_data_size - block_size * 2) / + sizeof(struct namespace_label); +} + +/*calculate the number of label can be contained in index block. */ +static int label_index_block_max_label_nr(size_t block_size) +{ + int free_size; + + free_size = block_size - sizeof(struct namespace_label_index_block); + + return free_size * BITS_PER_BYTE; +} + +static int calculate_max_label_nr(PCNVDIMMDevice *nvdimm, size_t block_size) +{ + return MIN(label_index_block_max_label_nr(block_size), + config_space_max_label_nr(nvdimm, block_size)); +} + +/* + * check if we can increase the size of namespace_label_index_block to + * contain more labels. + */ +static bool can_increase_index_block(PCNVDIMMDevice *nvdimm, + size_t block_size, int label_nr) +{ + size_t remaining; + + remaining = nvdimm->config_data_size - block_size * 2 - + label_nr * sizeof(struct namespace_label); + + assert((int64_t)remaining >= 0); + + /* can contain 1 label at least. */ + return remaining >= NSINDEX_ALIGN * 2 + sizeof(struct namespace_label); +} + +static void count_label_nr(PCNVDIMMDevice *nvdimm, size_t *label_block_size, + int *label_nr) +{ + *label_block_size = 0; + + do { + /* + * The minimum size of an index block is 256 bytes and the size must + * be a multiple of 256 bytes. + */ + *label_block_size += NSINDEX_ALIGN; + + *label_nr = calculate_max_label_nr(nvdimm, *label_block_size); + } while (can_increase_index_block(nvdimm, *label_block_size, *label_nr)); +} + +static void namespace_label_uuid(PCNVDIMMDevice *nvdimm, void *uuid) +{ + uuid_le label_uuid_init = UUID_LE(0x137e67a9, 0x7dcb, 0x4c66, 0xb2, + 0xe6, 0x05, 0x06, 0x5b, 0xeb, + 0x6a, 0x00); + + assert(nvdimm->device_index <= 0xff); + + label_uuid_init.b[0] += nvdimm->device_index; + memcpy(uuid, &label_uuid_init, sizeof(label_uuid_init)); +} + +static void init_namespace(PCNVDIMMDevice *nvdimm) +{ + struct namespace_label_index_block *index1, *index2; + struct namespace_label *label; + int i; + + size_t label_block_size; + int label_nr; + + assert(!nvdimm->configdata); + + count_label_nr(nvdimm, &label_block_size, &label_nr); + nvdebug("nvdimm%d: label_block_size 0x%lx label_nr %d.\n", + nvdimm->device_index, label_block_size, label_nr); + + index1 = nvdimm->config_data_addr; + + /* + * init the first namespace label index block, except @otheroff + * and @checksum. we will do it later. + */ + memcpy(index1->sig, NSINDEX_SIGNATURE, sizeof(NSINDEX_SIGNATURE)); + index1->flags = cpu_to_le32(0); + index1->seq = cpu_to_le32(0x1); + index1->myoff = cpu_to_le64(0); + index1->mysize = cpu_to_le64(label_block_size); + index1->labeloff = cpu_to_le64(label_block_size * 2); + index1->nlabel = cpu_to_le32(label_nr); + index1->major = cpu_to_le16(NSINDEX_MAJOR); + index1->minor = cpu_to_le16(NSINDEX_MINOR); + index1->checksum = cpu_to_le64(0); + memset(index1->free, 0, + label_block_size - sizeof(struct namespace_label_index_block)); + + /* + * the label slot with the lowest offset in the label storage area is + * tracked by the least significant bit of the first byte of the free + * array. + * + * the fist label is used. + */ + for (i = 1; i < index1->nlabel; i++) { + set_bit(i, (unsigned long *)index1->free); + } + + /* init the second namespace label index block. */ + index2 = (void *)index1 + label_block_size; + memcpy(index2, index1, label_block_size); + index2->seq = cpu_to_le32(0x2); + index2->myoff = cpu_to_le64(label_block_size); + + /* init @otheroff and @checksume. */ + index1->otheroff = cpu_to_le64(index2->myoff); + index2->otheroff = cpu_to_le64(index1->myoff); + index1->checksum = cpu_to_le64(fletcher64(index1, label_block_size)); + index2->checksum = cpu_to_le64(fletcher64(index2, label_block_size)); + + /* only one label is used which is the first label and is readonly. */ + label = nvdimm->config_data_addr + label_block_size * 2; + namespace_label_uuid(nvdimm, label->uuid); + sprintf((char *)label->name, "QEMU NS%d", nvdimm->device_index); + label->flags = cpu_to_le32(NSLABEL_FLAG_ROLABEL); + label->nlabel = cpu_to_le16(1); + label->position = cpu_to_le16(0); + label->isetcookie = cpu_to_le64(nvdimm->isetcookie); + label->lbasize = cpu_to_le64(0); + label->dpa = cpu_to_le64(object_property_get_int(OBJECT(&nvdimm->mr), + "addr", NULL)); + label->rawsize = cpu_to_le64(memory_region_size(&nvdimm->mr)); + label->slot = cpu_to_le32(0); + label->unused = cpu_to_le32(0); + + nvdebug("nvdimm%d, checksum1 0x%lx checksum2 0x%lx isetcookie 0x%lx.\n", + nvdimm->device_index, index1->checksum, index2->checksum, + label->isetcookie); +} + +void build_nvdimm_configdata(GSList *device_list) +{ + for (; device_list; device_list = device_list->next) { + PCNVDIMMDevice *nvdimm = device_list->data; + + if (nvdimm->config_data_addr) { + return; + } + + nvdimm->config_data_addr = g_malloc(nvdimm->config_data_size); + init_namespace(nvdimm); + } +} diff --git a/include/hw/mem/pc-nvdimm.h b/include/hw/mem/pc-nvdimm.h index b7faec3..8aa7086 100644 --- a/include/hw/mem/pc-nvdimm.h +++ b/include/hw/mem/pc-nvdimm.h @@ -28,6 +28,8 @@ typedef struct PCNVDIMMDevice { uint64_t config_data_size; void *config_data_addr; + uint64_t isetcookie; + MemoryRegion mr; } PCNVDIMMDevice;