From patchwork Fri Mar 29 05:42:21 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Liu Yuan X-Patchwork-Id: 232294 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from lists.gnu.org (lists.gnu.org [208.118.235.17]) (using TLSv1 with cipher AES256-SHA (256/256 bits)) (Client did not present a certificate) by ozlabs.org (Postfix) with ESMTPS id BF1272C00A8 for ; Fri, 29 Mar 2013 16:45:05 +1100 (EST) Received: from localhost ([::1]:36495 helo=lists.gnu.org) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ULS7b-0007XK-Ev for incoming@patchwork.ozlabs.org; Fri, 29 Mar 2013 01:45:03 -0400 Received: from eggs.gnu.org ([208.118.235.92]:53143) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ULS5g-0004Tt-PA for qemu-devel@nongnu.org; Fri, 29 Mar 2013 01:43:06 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ULS5e-000436-Sf for qemu-devel@nongnu.org; Fri, 29 Mar 2013 01:43:04 -0400 Received: from mail-pd0-f172.google.com ([209.85.192.172]:54013) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ULS5e-00042t-DS for qemu-devel@nongnu.org; Fri, 29 Mar 2013 01:43:02 -0400 Received: by mail-pd0-f172.google.com with SMTP id w10so125790pde.3 for ; Thu, 28 Mar 2013 22:43:01 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20120113; h=x-received:from:to:subject:date:message-id:x-mailer:in-reply-to :references; bh=k4RWiYcQ27u7RPNT9BqeLHUj0MvFiNiY5CM3SvdNasw=; b=VDtFwGIoBje6W93ajiKBizXyYyzO7enSkP40fkL5CrccLHc5gRSG+J1yitCq6vorkB iUuJCNtBmhrOPYQvzOVm/qdUX4lJW5hiJJPx98hSKRNYEeWMyHFvPwO+CU4mqOrbikHi kZrufZDfLu+v/RBp6UfFiGEtL5Lpxsue13yPV/KmngQl/ZoGbdn/ngex8ATZjh7yaX5l 0uGahG8OsAhHEYdMTTFv5oZWa1dfDmu5Fb8eFIB2G5XaJu1IERlpcgjcBJLTpfnbXb/3 LB+2yNmV/GvprH20hsgskURAAVLZA8QozIbPhk86M1k7v48zE5cCxHO5tP//r0Frem2q uNRw== X-Received: by 10.68.195.234 with SMTP id ih10mr1885953pbc.187.1364535781695; Thu, 28 Mar 2013 22:43:01 -0700 (PDT) Received: from K55VM-ubuntu.taobao.ali.com ([182.92.247.2]) by mx.google.com with ESMTPS id t1sm2083818pab.12.2013.03.28.22.42.56 (version=TLSv1.1 cipher=ECDHE-RSA-RC4-SHA bits=128/128); Thu, 28 Mar 2013 22:43:00 -0700 (PDT) From: Liu Yuan To: qemu-devel@nongnu.org Date: Fri, 29 Mar 2013 13:42:21 +0800 Message-Id: <1364535744-8707-5-git-send-email-namei.unix@gmail.com> X-Mailer: git-send-email 1.7.9.5 In-Reply-To: <1364535744-8707-1-git-send-email-namei.unix@gmail.com> References: <1364535744-8707-1-git-send-email-namei.unix@gmail.com> X-detected-operating-system: by eggs.gnu.org: GNU/Linux 3.x [fuzzy] X-Received-From: 209.85.192.172 Subject: [Qemu-devel] [PATCH v2 4/7] md: add hot-plug and hot-unplug support X-BeenThere: qemu-devel@nongnu.org X-Mailman-Version: 2.1.14 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org Sender: qemu-devel-bounces+incoming=patchwork.ozlabs.org@nongnu.org From: Liu Yuan We allow group plug, group unplug and disks failure during (un)plugging. Also add disk information function for collie. Signed-off-by: Liu Yuan --- collie/collie.c | 2 +- include/internal_proto.h | 16 +++ include/sheepdog_proto.h | 2 + sheep/md.c | 263 ++++++++++++++++++++++++++++++++-------------- sheep/ops.c | 45 ++++++++ sheep/sheep_priv.h | 5 +- sheep/store.c | 3 +- 7 files changed, 253 insertions(+), 83 deletions(-) diff --git a/collie/collie.c b/collie/collie.c index 08c78eb..19085b4 100644 --- a/collie/collie.c +++ b/collie/collie.c @@ -19,7 +19,7 @@ #include "util.h" static const char program_name[] = "collie"; -const char *sdhost = "localhost"; +const char *sdhost = "127.0.0.1"; int sdport = SD_LISTEN_PORT; bool highlight = true; bool raw_output; diff --git a/include/internal_proto.h b/include/internal_proto.h index 6f1fdb3..c43855b 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -69,6 +69,9 @@ #define SD_OP_FLUSH_PEER 0xAE #define SD_OP_NOTIFY_VDI_ADD 0xAF #define SD_OP_DELETE_CACHE 0xB0 +#define SD_OP_MD_INFO 0xB1 +#define SD_OP_MD_PLUG 0xB2 +#define SD_OP_MD_UNPLUG 0xB3 /* internal flags for hdr.flags, must be above 0x80 */ #define SD_FLAG_CMD_RECOVERY 0x0080 @@ -229,4 +232,17 @@ struct vdi_op_message { uint8_t data[0]; }; +struct md_info { + int idx; + uint64_t size; + uint64_t used; + char path[PATH_MAX]; +}; + +#define MD_MAX_DISK 64 /* FIXME remove roof and make it dynamic */ +struct sd_md_info { + struct md_info disk[MD_MAX_DISK]; + int nr; +}; + #endif /* __INTERNAL_PROTO_H__ */ diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index fe3738b..94baede 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -13,6 +13,8 @@ #include #include +#include + #include "util.h" #define SD_PROTO_VER 0x02 diff --git a/sheep/md.c b/sheep/md.c index 821a391..124f2ba 100644 --- a/sheep/md.c +++ b/sheep/md.c @@ -21,11 +21,12 @@ #include #include #include +#include #include "sheep_priv.h" +#include "util.h" #define MD_DEFAULT_VDISKS 128 -#define MD_MAX_DISK 64 /* FIXME remove roof and make it dynamic */ #define MD_MAX_VDISK (MD_MAX_DISK * MD_DEFAULT_VDISKS) struct disk { @@ -123,20 +124,33 @@ static inline struct vdisk *oid_to_vdisk(uint64_t oid) return oid_to_vdisk_from(md_vds, md_nr_vds, oid); } -int md_init_disk(char *path) +static int path_to_disk_idx(char *path) { + int i; + + for (i = 0; i < md_nr_disks; i++) + if (strcmp(md_disks[i].path, path) == 0) + return i; + + return -1; +} + +void md_add_disk(char *path) +{ + if (path_to_disk_idx(path) != -1) { + sd_eprintf("duplicate path %s", path); + return; + } + md_nr_disks++; - if (xmkdir(path, def_dmode) < 0) - panic("%s, %m", path); pstrcpy(md_disks[md_nr_disks - 1].path, PATH_MAX, path); - sd_iprintf("%s added to md, nr %d", md_disks[md_nr_disks - 1].path, + sd_iprintf("%s, nr %d", md_disks[md_nr_disks - 1].path, md_nr_disks); - return 0; } static inline void calculate_vdisks(struct disk *disks, int nr_disks, - uint64_t total) + uint64_t total) { uint64_t avg_size = total / nr_disks; float factor; @@ -154,6 +168,79 @@ static inline void calculate_vdisks(struct disk *disks, int nr_disks, #define MDNAME "user.md.size" #define MDSIZE sizeof(uint64_t) +static int get_total_object_size(uint64_t oid, char *ignore, void *total) +{ + uint64_t *t = total; + *t += get_objsize(oid); + + return SD_RES_SUCCESS; +} + +/* If cleanup is true, temporary objects will be removed */ +static int for_each_object_in_path(char *path, + int (*func)(uint64_t, char *, void *), + bool cleanup, void *arg) +{ + DIR *dir; + struct dirent *d; + uint64_t oid; + int ret = SD_RES_SUCCESS; + char p[PATH_MAX]; + + dir = opendir(path); + if (!dir) { + sd_eprintf("failed to open %s, %m", path); + return SD_RES_EIO; + } + + while ((d = readdir(dir))) { + if (!strncmp(d->d_name, ".", 1)) + continue; + + oid = strtoull(d->d_name, NULL, 16); + if (oid == 0 || oid == ULLONG_MAX) + continue; + + /* don't call callback against temporary objects */ + if (strlen(d->d_name) == 20 && + strcmp(d->d_name + 16, ".tmp") == 0) { + if (cleanup) { + snprintf(p, PATH_MAX, "%s/%016"PRIx64".tmp", + path, oid); + sd_dprintf("remove tmp object %s", p); + unlink(p); + } + continue; + } + + ret = func(oid, path, arg); + if (ret != SD_RES_SUCCESS) + break; + } + closedir(dir); + return ret; +} + +static uint64_t get_path_size(char *path, uint64_t *used) +{ + struct statvfs fs; + uint64_t size; + + if (statvfs(path, &fs) < 0) { + sd_eprintf("get disk %s space failed %m", path); + return 0; + } + size = (int64_t)fs.f_frsize * fs.f_bfree; + + if (!used) + goto out; + if (for_each_object_in_path(path, get_total_object_size, false, used) + != SD_RES_SUCCESS) + return 0; +out: + return size; +} + /* * If path is broken during initilization or not support xattr return 0. We can * safely use 0 to represent failure case because 0 space path can be @@ -161,9 +248,13 @@ static inline void calculate_vdisks(struct disk *disks, int nr_disks, */ static uint64_t init_path_space(char *path) { - struct statvfs fs; uint64_t size; + if (xmkdir(path, def_dmode) < 0) { + sd_eprintf("%s, %m", path); + goto broken_path; + } + if (!is_xattr_enabled(path)) { sd_iprintf("multi-disk support need xattr feature"); goto broken_path; @@ -180,11 +271,9 @@ static uint64_t init_path_space(char *path) return size; create: - if (statvfs(path, &fs) < 0) { - sd_eprintf("get disk %s space failed %m", path); + size = get_path_size(path, NULL); + if (!size) goto broken_path; - } - size = (int64_t)fs.f_frsize * fs.f_bfree; if (setxattr(path, MDNAME, &size, MDSIZE, 0) < 0) { sd_eprintf("%s, %m", path); goto broken_path; @@ -229,7 +318,8 @@ reinit: } calculate_vdisks(md_disks, md_nr_disks, total); md_nr_vds = disks_to_vdisks(md_disks, md_nr_disks, md_vds); - sys->enable_md = true; + if (!sys->enable_md) + sys->enable_md = true; return total; } @@ -259,51 +349,6 @@ static char *get_object_path_nolock(uint64_t oid) return md_disks[vd->idx].path; } -/* If cleanup is true, temporary objects will be removed */ -static int for_each_object_in_path(char *path, - int (*func)(uint64_t, char *, void *), - bool cleanup, void *arg) -{ - DIR *dir; - struct dirent *d; - uint64_t oid; - int ret = SD_RES_SUCCESS; - char p[PATH_MAX]; - - dir = opendir(path); - if (!dir) { - sd_eprintf("failed to open %s, %m", path); - return SD_RES_EIO; - } - - while ((d = readdir(dir))) { - if (!strncmp(d->d_name, ".", 1)) - continue; - - oid = strtoull(d->d_name, NULL, 16); - if (oid == 0 || oid == ULLONG_MAX) - continue; - - /* don't call callback against temporary objects */ - if (strlen(d->d_name) == 20 && - strcmp(d->d_name + 16, ".tmp") == 0) { - if (cleanup) { - snprintf(p, PATH_MAX, "%s/%016"PRIx64".tmp", - path, oid); - sd_dprintf("remove tmp object %s", p); - unlink(p); - } - continue; - } - - ret = func(oid, path, arg); - if (ret != SD_RES_SUCCESS) - break; - } - closedir(dir); - return ret; -} - int for_each_object_in_wd(int (*func)(uint64_t oid, char *path, void *arg), bool cleanup, void *arg) { @@ -345,17 +390,6 @@ struct md_work { char path[PATH_MAX]; }; -static int path_to_disk_idx(char *path) -{ - int i; - - for (i = 0; i < md_nr_disks; i++) - if (strcmp(md_disks[i].path, path) == 0) - return i; - - return -1; -} - static inline void kick_recover(void) { struct vnode_info *vinfo = get_vnode_info(); @@ -364,15 +398,6 @@ static inline void kick_recover(void) put_vnode_info(vinfo); } -static void unplug_disk(int idx) -{ - - remove_disk(idx); - sys->disk_space = md_init_space(); - if (md_nr_disks > 0) - kick_recover(); -} - static void md_do_recover(struct work *work) { struct md_work *mw = container_of(work, struct md_work, work); @@ -383,7 +408,10 @@ static void md_do_recover(struct work *work) if (idx < 0) /* Just ignore the duplicate EIO of the same path */ goto out; - unplug_disk(idx); + remove_disk(idx); + sys->disk_space = md_init_space(); + if (md_nr_disks > 0) + kick_recover(); out: pthread_rwlock_unlock(&md_lock); free(mw); @@ -500,3 +528,80 @@ int md_get_stale_path(uint64_t oid, uint32_t epoch, char *path) return SD_RES_NO_OBJ; } + +uint32_t md_get_info(struct sd_md_info *info) +{ + uint32_t ret = sizeof(*info); + int i; + + memset(info, 0, ret); + pthread_rwlock_rdlock(&md_lock); + for (i = 0; i < md_nr_disks; i++) { + info->disk[i].idx = i; + pstrcpy(info->disk[i].path, PATH_MAX, md_disks[i].path); + info->disk[i].size = get_path_size(info->disk[i].path, + &info->disk[i].used); + if (!info->disk[i].size) { + ret = 0; + break; + } + } + info->nr = md_nr_disks; + pthread_rwlock_unlock(&md_lock); + return ret; +} + +static inline void md_del_disk(char *path) +{ + int idx = path_to_disk_idx(path); + + if (idx < 0) { + sd_eprintf("invalid path %s", path); + return; + } + remove_disk(idx); +} + +static int do_plug_unplug(char *disks, bool plug) +{ + char *path; + int old_nr, ret = SD_RES_UNKNOWN; + + pthread_rwlock_wrlock(&md_lock); + old_nr = md_nr_disks; + path = strtok(disks, ","); + do { + if (plug) + md_add_disk(path); + else + md_del_disk(path); + } while ((path = strtok(NULL, ","))); + + /* If no disks change, bail out */ + if (old_nr == md_nr_disks) + goto out; + + sys->disk_space = md_init_space(); + /* + * We have to kick recover aggressively because there is possibility + * that nr of disks are removed during md_init_space() happens to equal + * nr of disks we added. + */ + if (md_nr_disks > 0) + kick_recover(); + + ret = SD_RES_SUCCESS; +out: + pthread_rwlock_unlock(&md_lock); + return ret; +} + +int md_plug_disks(char *disks) +{ + return do_plug_unplug(disks, true); +} + +int md_unplug_disks(char *disks) +{ + return do_plug_unplug(disks, false); +} diff --git a/sheep/ops.c b/sheep/ops.c index 8cba70d..3839437 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -667,6 +667,33 @@ static int local_set_cache_size(const struct sd_req *req, struct sd_rsp *rsp, return SD_RES_SUCCESS; } +static int local_md_info(struct request *request) +{ + struct sd_rsp *rsp = &request->rp; + struct sd_req *req = &request->rq; + + assert(req->data_length == sizeof(struct sd_md_info)); + rsp->data_length = md_get_info((struct sd_md_info *)request->data); + + return rsp->data_length ? SD_RES_SUCCESS : SD_RES_UNKNOWN; +} + +static int local_md_plug(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + char *disks = (char *)data; + + return md_plug_disks(disks); +} + +static int local_md_unplug(const struct sd_req *req, struct sd_rsp *rsp, + void *data) +{ + char *disks = (char *)data; + + return md_unplug_disks(disks); +} + static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp, void *data) { @@ -1110,6 +1137,24 @@ static struct sd_op_template sd_ops[] = { .process_main = local_set_cache_size, }, + [SD_OP_MD_INFO] = { + .name = "MD_INFO", + .type = SD_OP_TYPE_LOCAL, + .process_work = local_md_info, + }, + + [SD_OP_MD_PLUG] = { + .name = "MD_PLUG_DISKS", + .type = SD_OP_TYPE_LOCAL, + .process_main = local_md_plug, + }, + + [SD_OP_MD_UNPLUG] = { + .name = "MD_UNPLUG_DISKS", + .type = SD_OP_TYPE_LOCAL, + .process_main = local_md_unplug, + }, + /* gateway I/O operations */ [SD_OP_CREATE_AND_WRITE_OBJ] = { .name = "CREATE_AND_WRITE_OBJ", diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 652fd3a..098a7bb 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -417,11 +417,14 @@ int journal_file_init(const char *path, size_t size, bool skip); int journal_file_write(uint64_t oid, const char *buf, size_t size, off_t, bool); /* md.c */ -int md_init_disk(char *path); +void md_add_disk(char *path); uint64_t md_init_space(void); char *get_object_path(uint64_t oid); int md_handle_eio(char *); bool md_exist(uint64_t oid); int md_get_stale_path(uint64_t oid, uint32_t epoch, char *path); +uint32_t md_get_info(struct sd_md_info *info); +int md_plug_disks(char *disks); +int md_unplug_disks(char *disks); #endif diff --git a/sheep/store.c b/sheep/store.c index 58303fa..cbf24dc 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -269,8 +269,7 @@ static int init_obj_path(const char *base_path, char *argp) /* Eat up the first component */ strtok(argp, ","); while ((p = strtok(NULL, ","))) - if (md_init_disk(p) < 0) - return -1; + md_add_disk(p); return init_path(obj_path, NULL); }