From 1d575314c910984db6106d7d37a01f1be57c3060 Mon Sep 17 00:00:00 2001 From: Jorge Martin Date: Mon, 13 Jul 2015 17:44:51 +0100 Subject: [PATCH 01/43] CA-161449: Stats between tapdisk and vdi traffic Added new stats structure to account tapdisk traffic. Requests issued from tapdisk to the vdi will be found in /dev/shm/td3-/vdi- Signed-off-by: Jorge Martin Reviewed-by: Stefano Panella --- drivers/tapdisk-metrics.c | 70 ++++++++++++++++++++++++++++++++++++--- drivers/tapdisk-metrics.h | 30 +++++++++++++++-- drivers/tapdisk-vbd.c | 25 ++++++++++++++ drivers/tapdisk-vbd.h | 2 ++ 4 files changed, 120 insertions(+), 7 deletions(-) diff --git a/drivers/tapdisk-metrics.c b/drivers/tapdisk-metrics.c index f3c6ef66..1db683a6 100644 --- a/drivers/tapdisk-metrics.c +++ b/drivers/tapdisk-metrics.c @@ -27,8 +27,9 @@ #include "tapdisk-metrics.h" #include "lock.h" #include "tapdisk-log.h" +#include "debug.h" +#include "tapdisk-queue.h" -/* make a static metrics struct, so it only exists in the context of this file */ static td_metrics_t td_metrics; /* Returns 0 in case there were no problems while emptying the folder */ @@ -55,7 +56,8 @@ empty_folder(char *path) err = asprintf(&file, "%s/%s", path, direntry->d_name); if (unlikely(err == -1)) { err = errno; - EPRINTF("failed to allocate file path name in memory to delete: %s\n", strerror(err)); + EPRINTF("failed to allocate file path name in memory to delete: %s\n", + strerror(err)); goto out; } stat(file, &statbuf); @@ -83,7 +85,8 @@ td_metrics_start() err = asprintf(&td_metrics.path, TAPDISK_METRICS_PATHF, getpid()); if (unlikely(err == -1)) { err = errno; - EPRINTF("failed to allocate metric's folder path name in memory: %s\n", strerror(err)); + EPRINTF("failed to allocate metric's folder path name in memory: %s\n", + strerror(err)); td_metrics.path = NULL; goto out; } @@ -91,7 +94,9 @@ td_metrics_start() err = mkdir(td_metrics.path, S_IRWXU); if (unlikely(err == -1)) { if (errno == EEXIST) { - //In case there is a previous folder with the same pid, we empty it and use it for the new tapdisk instance. + /* In case there is a previous folder with the same pid, + * we empty it and use it for the new tapdisk instance. + */ err = 0; empty_folder(td_metrics.path); }else{ @@ -122,3 +127,60 @@ td_metrics_stop() out: return; } + +int +td_metrics_vdi_start(int minor, stats_t *vdi_stats) +{ + int err = 0; + + if(!td_metrics.path) + goto out; + + shm_init(&vdi_stats->shm); + + err = asprintf(&vdi_stats->shm.path, TAPDISK_METRICS_VDI_PATHF, + td_metrics.path, minor); + + if(unlikely(err == -1)){ + err = errno; + EPRINTF("failed to allocate memory to store vdi metrics path: %s\n", + strerror(err)); + vdi_stats->shm.path = NULL; + goto out; + } + + vdi_stats->shm.size = PAGE_SIZE; + + err = shm_create(&vdi_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to create shm ring stats file: %s\n", strerror(err)); + goto out; + } + + vdi_stats->stats = vdi_stats->shm.mem; + +out: + return err; +} + +int +td_metrics_vdi_stop(stats_t *vdi_stats) +{ + int err = 0; + + if(!vdi_stats->shm.path) + goto end; + + err = shm_destroy(&vdi_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to destroy vdi metrics file: %s\n", strerror(err)); + } + + free(vdi_stats->shm.path); + vdi_stats->shm.path = NULL; + +end: + return err; +} diff --git a/drivers/tapdisk-metrics.h b/drivers/tapdisk-metrics.h index 7b315661..3956f330 100644 --- a/drivers/tapdisk-metrics.h +++ b/drivers/tapdisk-metrics.h @@ -19,7 +19,29 @@ #ifndef TAPDISK_METRICS_H #define TAPDISK_METRICS_H -#define TAPDISK_METRICS_PATHF "/dev/shm/td3-%d" +#define TAPDISK_METRICS_PATHF "/dev/shm/td3-%d" +#define TAPDISK_METRICS_VDI_PATHF "%s/vdi-%hu" + +#include + +#include "tapdisk-utils.h" +#include "tapdisk.h" + +struct stats { + unsigned long long read_reqs_submitted; + unsigned long long read_reqs_completed; + unsigned long long read_sectors; + unsigned long long read_total_ticks; + unsigned long long write_reqs_submitted; + unsigned long long write_reqs_completed; + unsigned long long write_sectors; + unsigned long long write_total_ticks; +}; + +typedef struct { + struct shm shm; + struct stats *stats; +} stats_t; typedef struct { char *path; @@ -27,8 +49,10 @@ typedef struct { /* Creates a folder in which to store tapdisk3 statistics: /dev/shm/td3- */ int td_metrics_start(); - /* Destroys the folder /dev/shm/td3- and its contents */ void td_metrics_stop(); - +/* Creates the shm for the file that will store the metrics */ +int td_metrics_vdi_start(int minor, stats_t *vdi_stats); +/* Destroys the files created to store metrics */ +int td_metrics_vdi_stop(stats_t *vdi_stats); #endif /* TAPDISK_METRICS_H */ diff --git a/drivers/tapdisk-vbd.c b/drivers/tapdisk-vbd.c index be5013b8..1df6eb0d 100644 --- a/drivers/tapdisk-vbd.c +++ b/drivers/tapdisk-vbd.c @@ -38,6 +38,7 @@ #include "tapdisk-driver.h" #include "tapdisk-server.h" #include "tapdisk-vbd.h" +#include "tapdisk-metrics.h" #include "tapdisk-disktype.h" #include "tapdisk-interface.h" #include "tapdisk-stats.h" @@ -238,6 +239,11 @@ tapdisk_vbd_close_vdi(td_vbd_t *vbd) strerror(-err)); } + err = td_metrics_vdi_stop(&vbd->vdi_stats); + if (err) { + EPRINTF("failed to destroy stats file: %s\n", strerror(-err)); + } + tapdisk_image_close_chain(&vbd->images); if (vbd->secondary && @@ -605,6 +611,9 @@ tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *name, td_flag_t flags, int prt_d if (err) goto fail; + err = td_metrics_vdi_start(vbd->tap->minor, &vbd->vdi_stats); + if (err) + goto fail; if (tmp != vbd->name) free(tmp); @@ -1249,6 +1258,8 @@ __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq, td_image_t *image = treq.image; int err; + long long interval; + err = (res <= 0 ? res : -res); vbd->secs_pending -= treq.secs; vreq->secs_pending -= treq.secs; @@ -1277,6 +1288,18 @@ __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq, vreq->error = (vreq->error ? : err); } + interval = timeval_to_us(&vbd->ts) - timeval_to_us(&vreq->ts); + + if(treq.op == TD_OP_READ){ + vbd->vdi_stats.stats->read_reqs_completed++; + vbd->vdi_stats.stats->read_sectors += treq.secs; + vbd->vdi_stats.stats->read_total_ticks += interval; + }else{ + vbd->vdi_stats.stats->write_reqs_completed++; + vbd->vdi_stats.stats->write_sectors += treq.secs; + vbd->vdi_stats.stats->write_total_ticks += interval; + } + tapdisk_vbd_complete_vbd_request(vbd, vreq); } @@ -1475,6 +1498,7 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq) switch (vreq->op) { case TD_OP_WRITE: treq.op = TD_OP_WRITE; + vbd->vdi_stats.stats->write_reqs_submitted++; /* * it's important to queue the mirror request before * queuing the main one. If the main image runs into @@ -1491,6 +1515,7 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq) case TD_OP_READ: treq.op = TD_OP_READ; + vbd->vdi_stats.stats->read_reqs_submitted++; td_queue_read(treq.image, treq); break; } diff --git a/drivers/tapdisk-vbd.h b/drivers/tapdisk-vbd.h index 8a432bf4..af3327f8 100644 --- a/drivers/tapdisk-vbd.h +++ b/drivers/tapdisk-vbd.h @@ -25,6 +25,7 @@ #include "tapdisk-image.h" #include "tapdisk-blktap.h" #include "td-blkif.h" +#include "tapdisk-metrics.h" #define TD_VBD_REQUEST_TIMEOUT 120 #define TD_VBD_MAX_RETRIES 100 @@ -146,6 +147,7 @@ struct td_vbd_handle { td_disk_info_t disk_info; struct td_vbd_rrd rrd; + stats_t vdi_stats; }; #define tapdisk_vbd_for_each_request(vreq, tmp, list) \ From b781cef53491d358167e0670a118a70a8e971ed9 Mon Sep 17 00:00:00 2001 From: Jorge Martin Date: Mon, 13 Jul 2015 18:47:57 +0100 Subject: [PATCH 02/43] CA-165117: Stats between tapdisk and blkfront Rearranged the previous stats recording of the communications between tapdisk and blkfront to suit the new format. Signed-off-by: Jorge Martin Reviewed-by: Stefano Panella --- drivers/tapdisk-metrics.c | 57 +++++++++++++++++++++++++++++++++++++++ drivers/tapdisk-metrics.h | 14 ++++++++-- drivers/td-blkif.c | 8 ++++++ drivers/td-blkif.h | 3 +++ drivers/td-req.c | 17 +++++++++--- drivers/td-req.h | 2 ++ 6 files changed, 96 insertions(+), 5 deletions(-) diff --git a/drivers/tapdisk-metrics.c b/drivers/tapdisk-metrics.c index 1db683a6..d2db7565 100644 --- a/drivers/tapdisk-metrics.c +++ b/drivers/tapdisk-metrics.c @@ -24,12 +24,15 @@ #include #include #include + #include "tapdisk-metrics.h" #include "lock.h" #include "tapdisk-log.h" #include "debug.h" #include "tapdisk-queue.h" +#include "td-req.h" +/* make a static metrics struct, so it only exists in the context of this file */ static td_metrics_t td_metrics; /* Returns 0 in case there were no problems while emptying the folder */ @@ -184,3 +187,57 @@ td_metrics_vdi_stop(stats_t *vdi_stats) end: return err; } +int +td_metrics_vbd_start(int domain, int id, stats_t *vbd_stats) +{ + int err = 0; + + if(!td_metrics.path) + goto out; + + shm_init(&vbd_stats->shm); + + err = asprintf(&vbd_stats->shm.path, TAPDISK_METRICS_VBD_PATHF, + td_metrics.path, domain, id); + if(unlikely(err == -1)){ + err = errno; + EPRINTF("failed to allocate memory to store vbd metrics path: %s\n", + strerror(err)); + vbd_stats->shm.path = NULL; + goto out; + } + + vbd_stats->shm.size = PAGE_SIZE; + + err = shm_create(&vbd_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to create shm ring stats file: %s\n", strerror(err)); + goto out; + } + vbd_stats->stats = vbd_stats->shm.mem; +out: + return err; + +} + +int +td_metrics_vbd_stop(stats_t *vbd_stats) +{ + int err = 0; + + if(!vbd_stats->shm.path) + goto end; + + err = shm_destroy(&vbd_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to destroy vbd metrics file: %s\n", strerror(err)); + } + + free(vbd_stats->shm.path); + vbd_stats->shm.path = NULL; + +end: + return err; +} diff --git a/drivers/tapdisk-metrics.h b/drivers/tapdisk-metrics.h index 3956f330..65c07ce4 100644 --- a/drivers/tapdisk-metrics.h +++ b/drivers/tapdisk-metrics.h @@ -21,6 +21,7 @@ #define TAPDISK_METRICS_PATHF "/dev/shm/td3-%d" #define TAPDISK_METRICS_VDI_PATHF "%s/vdi-%hu" +#define TAPDISK_METRICS_VBD_PATHF "%s/vbd-%d-%d" #include @@ -51,8 +52,17 @@ typedef struct { int td_metrics_start(); /* Destroys the folder /dev/shm/td3- and its contents */ void td_metrics_stop(); -/* Creates the shm for the file that will store the metrics */ + +/* Creates the shm for the file that stores metrics from tapdisk to the vdi */ int td_metrics_vdi_start(int minor, stats_t *vdi_stats); -/* Destroys the files created to store metrics */ + +/* Destroys the files created to store the metrics from tapdisk to the vdi */ int td_metrics_vdi_stop(stats_t *vdi_stats); + +/* Creates the metrics file to store the stats from blkfront to tapdisk */ +int td_metrics_vbd_start(int domain, int id, stats_t *vbd_stats); + +/* Destroys the files created to store metrics from blkfront to tapdisk */ +int td_metrics_vbd_stop(stats_t *vbd_stats); + #endif /* TAPDISK_METRICS_H */ diff --git a/drivers/td-blkif.c b/drivers/td-blkif.c index a6aa8542..4430d33c 100644 --- a/drivers/td-blkif.c +++ b/drivers/td-blkif.c @@ -30,6 +30,7 @@ #include "tapdisk-log.h" #include "util.h" #include "tapdisk-server.h" +#include "tapdisk-metrics.h" #include "td-blkif.h" #include "td-ctx.h" @@ -221,6 +222,9 @@ tapdisk_xenblkif_destroy(struct td_xenblkif * blkif) list_del(&blkif->entry); tapdisk_xenio_ctx_put(blkif->ctx); } + err = td_metrics_vbd_stop(&blkif->vbd_stats); + if (unlikely(err)) + EPRINTF("failed to destroy blkfront stats file: %s\n", strerror(-err)); err = tapdisk_xenblkif_stats_destroy(blkif); if (unlikely(err)) { @@ -458,6 +462,10 @@ tapdisk_xenblkif_connect(domid_t domid, int devid, const grant_ref_t * grefs, goto fail; } + err = td_metrics_vbd_start(td_blkif->domid, td_blkif->devid, &td_blkif->vbd_stats); + if (unlikely(err)) + goto fail; + err = tapdisk_xenblkif_stats_create(td_blkif); if (unlikely(err)) goto fail; diff --git a/drivers/td-blkif.h b/drivers/td-blkif.h index e5b0966c..84028419 100644 --- a/drivers/td-blkif.h +++ b/drivers/td-blkif.h @@ -33,6 +33,7 @@ #include "td-stats.h" #include "tapdisk-vbd.h" #include "tapdisk-utils.h" +#include "tapdisk-metrics.h" struct td_xenio_ctx; struct td_vbd_handle; @@ -125,6 +126,8 @@ struct td_xenblkif { */ struct td_xenblkif_stats stats; + stats_t vbd_stats; + struct { /** * Root directory of the stats. diff --git a/drivers/td-req.c b/drivers/td-req.c index 6799f619..aea1a068 100644 --- a/drivers/td-req.c +++ b/drivers/td-req.c @@ -32,6 +32,7 @@ #include "td-blkif.h" #include "td-ctx.h" #include "tapdisk-server.h" +#include "tapdisk-metrics.h" #include "tapdisk-vbd.h" #include "tapdisk-log.h" #include "tapdisk.h" @@ -416,6 +417,7 @@ tapdisk_xenblkif_complete_request(struct td_xenblkif * const blkif, long long *max = NULL, *sum = NULL, *cnt = NULL; static int depth = 0; bool processing_barrier_message; + unsigned long long *ticks = NULL; ASSERT(blkif); ASSERT(tapreq); @@ -454,6 +456,8 @@ tapdisk_xenblkif_complete_request(struct td_xenblkif * const blkif, cnt = &blkif->stats.xenvbd->st_rd_cnt; sum = &blkif->stats.xenvbd->st_rd_sum_usecs; max = &blkif->stats.xenvbd->st_rd_max_usecs; + blkif->vbd_stats.stats->read_reqs_completed++; + ticks = &blkif->vbd_stats.stats->read_total_ticks; if (likely(!err)) { _err = guest_copy2(blkif, tapreq); if (unlikely(_err)) { @@ -466,14 +470,16 @@ tapdisk_xenblkif_complete_request(struct td_xenblkif * const blkif, cnt = &blkif->stats.xenvbd->st_wr_cnt; sum = &blkif->stats.xenvbd->st_wr_sum_usecs; max = &blkif->stats.xenvbd->st_wr_max_usecs; + blkif->vbd_stats.stats->write_reqs_completed++; + ticks = &blkif->vbd_stats.stats->write_total_ticks; } if (likely(cnt)) { struct timeval now; long long interval; gettimeofday(&now, NULL); - interval = timeval_to_us(&now) - timeval_to_us(&tapreq->vreq.ts); - + interval = timeval_to_us(&now) - timeval_to_us(&tapreq->ts); + *ticks += interval; if (interval > *max) *max = interval; @@ -644,8 +650,10 @@ tapdisk_xenblkif_parse_request(struct td_xenblkif * const blkif, goto out; } blkif->stats.xenvbd->st_wr_sect += nr_sect; + blkif->vbd_stats.stats->write_sectors += nr_sect; } else blkif->stats.xenvbd->st_rd_sect += nr_sect; + blkif->vbd_stats.stats->read_sectors += nr_sect; /* * TODO Isn't this kind of expensive to do for each requests? Why does @@ -688,16 +696,17 @@ tapdisk_xenblkif_make_vbd_request(struct td_xenblkif * const blkif, memset(vreq, 0, sizeof(*vreq)); tapreq->vma = NULL; - switch (tapreq->msg.operation) { case BLKIF_OP_READ: blkif->stats.xenvbd->st_rd_req++; + blkif->vbd_stats.stats->read_reqs_submitted++; tapreq->prot = PROT_WRITE; vreq->op = TD_OP_READ; break; case BLKIF_OP_WRITE: case BLKIF_OP_WRITE_BARRIER: blkif->stats.xenvbd->st_wr_req++; + blkif->vbd_stats.stats->write_reqs_submitted++; tapreq->prot = PROT_READ; vreq->op = TD_OP_WRITE; break; @@ -707,6 +716,8 @@ tapdisk_xenblkif_make_vbd_request(struct td_xenblkif * const blkif, err = EOPNOTSUPP; goto out; } + /* Timestamp before the requests leave the blkif layer */ + gettimeofday(&tapreq->ts, NULL); /* * Check that the number of segments is sane. diff --git a/drivers/td-req.h b/drivers/td-req.h index d7caa463..f94fbbd3 100644 --- a/drivers/td-req.h +++ b/drivers/td-req.h @@ -61,6 +61,8 @@ struct td_xenblkif_req { */ char name[16 + 1]; + struct timeval ts; + /** * The scatter/gather list td_vbd_request_t.iov points to. */ From 1fc0ba1571311e481aa8677b120a3f8bbf48475c Mon Sep 17 00:00:00 2001 From: Jorge Martin Date: Mon, 13 Jul 2015 19:06:20 +0100 Subject: [PATCH 03/43] CA-161451: Stats between tapdisk and blktap Stats between tapdisk and blktap are now recorded. Signed-off-by: Jorge Martin Reviewed-by: Stefano Panella --- drivers/tapdisk-blktap.c | 38 ++++++++++++++++++++++++--- drivers/tapdisk-blktap.h | 3 +++ drivers/tapdisk-metrics.c | 55 +++++++++++++++++++++++++++++++++++++++ drivers/tapdisk-metrics.h | 13 ++++++--- drivers/tapdisk-vbd.h | 1 - 5 files changed, 103 insertions(+), 7 deletions(-) diff --git a/drivers/tapdisk-blktap.c b/drivers/tapdisk-blktap.c index 605f231e..7f5f5917 100644 --- a/drivers/tapdisk-blktap.c +++ b/drivers/tapdisk-blktap.c @@ -33,6 +33,7 @@ #include "blktap.h" #include "tapdisk-vbd.h" #include "tapdisk-blktap.h" +#include "tapdisk-metrics.h" #include "tapdisk-server.h" #include "linux-blktap.h" @@ -67,6 +68,7 @@ struct td_blktap_req { unsigned int id; char name[16]; struct td_iovec iov[BLKTAP_SEGMENT_MAX]; + struct timeval ts; }; td_blktap_req_t * @@ -197,18 +199,26 @@ tapdisk_blktap_put_response(td_blktap_t *tap, { blktap_ring_rsp_t *rsp; int op = 0; + unsigned long long interval; + struct timeval now; BUG_ON(!tap->vma); rsp = BLKTAP_GET_RESPONSE(tap, tap->rsp_prod_pvt); + gettimeofday(&now, NULL); + interval = timeval_to_us(&now) - timeval_to_us(&req->ts); switch (req->vreq.op) { case TD_OP_READ: op = BLKTAP_OP_READ; + tap->blktap_stats.stats->read_reqs_completed++; + tap->blktap_stats.stats->read_total_ticks += interval; break; case TD_OP_WRITE: op = BLKTAP_OP_WRITE; - break; + tap->blktap_stats.stats->write_reqs_completed++; + tap->blktap_stats.stats->write_total_ticks += interval; + break; default: BUG(); } @@ -252,6 +262,7 @@ tapdisk_blktap_vector_request(td_blktap_t *tap, void *page, *next, *last; size_t size; int i; + unsigned nr_sect = 0; iov = req->iov - 1; last = NULL; @@ -274,8 +285,17 @@ tapdisk_blktap_vector_request(td_blktap_t *tap, last = iov->base + (iov->secs << SECTOR_SHIFT); page += BLKTAP_PAGE_SIZE; + nr_sect += size; } + switch(msg->operation){ + case BLKTAP_OP_READ: + tap->blktap_stats.stats->read_sectors += nr_sect; + break; + case BLKTAP_OP_WRITE: + tap->blktap_stats.stats->write_sectors += nr_sect; + break; + } vreq->iov = req->iov; vreq->iovcnt = iov - req->iov + 1; vreq->sec = msg->sector_number; @@ -290,11 +310,15 @@ tapdisk_blktap_parse_request(td_blktap_t *tap, memset(req, 0, sizeof(*req)); + gettimeofday(&req->ts, NULL); + switch (msg->operation) { case BLKTAP_OP_READ: - op = TD_OP_READ; + tap->blktap_stats.stats->read_reqs_submitted++; + op = TD_OP_READ; break; case BLKTAP_OP_WRITE: + tap->blktap_stats.stats->write_reqs_submitted++; op = TD_OP_WRITE; break; default: @@ -500,7 +524,7 @@ __tapdisk_blktap_close(td_blktap_t *tap) * -EFAULT. vreq completion just backs off once fd/vma are * gone, so we'll drain, then idle until close(). */ - + int err; if (tap->event_id >= 0) { tapdisk_server_unregister_event(tap->event_id); tap->event_id = -1; @@ -512,6 +536,10 @@ __tapdisk_blktap_close(td_blktap_t *tap) close(tap->fd); tap->fd = -1; } + err = td_metrics_blktap_stop(&tap->blktap_stats); + if (err) { + EPRINTF("failed to destroy blktap stats file: %s\n", strerror(-err)); + } } void @@ -558,6 +586,10 @@ tapdisk_blktap_open(const char *devname, td_vbd_t *vbd, td_blktap_t **_tap) if (err) goto fail; + err = td_metrics_blktap_start(tap->minor, &tap->blktap_stats); + if (err) + goto fail; + tap->event_id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, tap->fd, 0, diff --git a/drivers/tapdisk-blktap.h b/drivers/tapdisk-blktap.h index ec24f0ee..e7f2e617 100644 --- a/drivers/tapdisk-blktap.h +++ b/drivers/tapdisk-blktap.h @@ -24,6 +24,7 @@ typedef struct td_blktap_req td_blktap_req_t; #include "blktap.h" #include "tapdisk-vbd.h" #include "list.h" +#include "tapdisk-metrics.h" struct td_blktap_stats { struct { @@ -60,6 +61,8 @@ struct td_blktap { struct list_head entry; struct td_blktap_stats stats; + + stats_t blktap_stats; }; int tapdisk_blktap_open(const char *, td_vbd_t *, td_blktap_t **); diff --git a/drivers/tapdisk-metrics.c b/drivers/tapdisk-metrics.c index d2db7565..11d5aea3 100644 --- a/drivers/tapdisk-metrics.c +++ b/drivers/tapdisk-metrics.c @@ -241,3 +241,58 @@ td_metrics_vbd_stop(stats_t *vbd_stats) end: return err; } + +int +td_metrics_blktap_start(int minor, stats_t *blktap_stats) +{ + + int err = 0; + + if(!td_metrics.path) + goto out; + + shm_init(&blktap_stats->shm); + + err = asprintf(&blktap_stats->shm.path, TAPDISK_METRICS_BLKTAP_PATHF, td_metrics.path, minor); + if(unlikely(err == -1)){ + err = errno; + EPRINTF("failed to allocate memory to store blktap metrics path: %s\n",strerror(err)); + blktap_stats->shm.path = NULL; + goto out; + } + + blktap_stats->shm.size = PAGE_SIZE; + + err = shm_create(&blktap_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to create blktap shm ring stats file: %s\n", strerror(err)); + goto out; + } + blktap_stats->stats = blktap_stats->shm.mem; +out: + return err; +} + +int +td_metrics_blktap_stop(stats_t *blktap_stats) +{ + int err = 0; + + if(!blktap_stats->shm.path) + goto end; + + err = shm_destroy(&blktap_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to destroy blktap metrics file: %s\n", strerror(err)); + } + + free(blktap_stats->shm.path); + blktap_stats->shm.path = NULL; + +end: + return err; + +} + diff --git a/drivers/tapdisk-metrics.h b/drivers/tapdisk-metrics.h index 65c07ce4..1d34d6b5 100644 --- a/drivers/tapdisk-metrics.h +++ b/drivers/tapdisk-metrics.h @@ -19,9 +19,10 @@ #ifndef TAPDISK_METRICS_H #define TAPDISK_METRICS_H -#define TAPDISK_METRICS_PATHF "/dev/shm/td3-%d" -#define TAPDISK_METRICS_VDI_PATHF "%s/vdi-%hu" -#define TAPDISK_METRICS_VBD_PATHF "%s/vbd-%d-%d" +#define TAPDISK_METRICS_PATHF "/dev/shm/td3-%d" +#define TAPDISK_METRICS_VDI_PATHF "%s/vdi-%hu" +#define TAPDISK_METRICS_VBD_PATHF "%s/vbd-%d-%d" +#define TAPDISK_METRICS_BLKTAP_PATHF "%s/blktap-%d" #include @@ -65,4 +66,10 @@ int td_metrics_vbd_start(int domain, int id, stats_t *vbd_stats); /* Destroys the files created to store metrics from blkfront to tapdisk */ int td_metrics_vbd_stop(stats_t *vbd_stats); +/* Creates the metrics file between tapdisk and blktap */ +int td_metrics_blktap_start(int minor, stats_t *blktap_stats); + +/* Destroys the metrics file between tapdisk and blktap */ +int td_metrics_blktap_stop(stats_t *blktap_stats); + #endif /* TAPDISK_METRICS_H */ diff --git a/drivers/tapdisk-vbd.h b/drivers/tapdisk-vbd.h index af3327f8..b6949987 100644 --- a/drivers/tapdisk-vbd.h +++ b/drivers/tapdisk-vbd.h @@ -25,7 +25,6 @@ #include "tapdisk-image.h" #include "tapdisk-blktap.h" #include "td-blkif.h" -#include "tapdisk-metrics.h" #define TD_VBD_REQUEST_TIMEOUT 120 #define TD_VBD_MAX_RETRIES 100 From 8f794e9138cfa503e20b0f242fcf2809c90b2a41 Mon Sep 17 00:00:00 2001 From: Jorge Martin Date: Tue, 14 Jul 2015 19:06:22 +0100 Subject: [PATCH 04/43] CA-161450: Stats between tapdisk and NBD Traffic stats for NBD and tapdisk are now accounted under /dev/shm/td3-/nbd- Signed-off-by: Jorge Martin Reviewed-by: Stefano Panella Signed-off-by: Germano Percossi --- drivers/tapdisk-metrics.c | 50 +++++++++++++++++++++++++++++++++++++ drivers/tapdisk-metrics.h | 4 +++ drivers/tapdisk-nbdserver.c | 25 ++++++++++++++++++- drivers/tapdisk-nbdserver.h | 2 ++ 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/drivers/tapdisk-metrics.c b/drivers/tapdisk-metrics.c index 11d5aea3..fedeb2cf 100644 --- a/drivers/tapdisk-metrics.c +++ b/drivers/tapdisk-metrics.c @@ -296,3 +296,53 @@ td_metrics_blktap_stop(stats_t *blktap_stats) } +int +td_metrics_nbd_start(stats_t *nbd_stats, int minor) +{ + int err = 0; + + if(!td_metrics.path || nbd_stats->shm.path != NULL) + goto out; + + shm_init(&nbd_stats->shm); + + err = asprintf(&nbd_stats->shm.path, TAPDISK_METRICS_NBD_PATHF, td_metrics.path, minor); + if(unlikely(err == -1)){ + err = errno; + EPRINTF("failed to allocate memory to store NBD metrics path: %s\n",strerror(err)); + nbd_stats->shm.path = NULL; + goto out; + } + + nbd_stats->shm.size = PAGE_SIZE; + + err = shm_create(&nbd_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to create NBD shm ring stats file: %s\n", strerror(err)); + goto out; + } + nbd_stats->stats = nbd_stats->shm.mem; +out: + return err; +} + +int +td_metrics_nbd_stop(stats_t *nbd_stats) +{ + int err = 0; + + if(!nbd_stats->shm.path) + goto end; + err = shm_destroy(&nbd_stats->shm); + if (unlikely(err)) { + err = errno; + EPRINTF("failed to destroy NBD metrics file: %s\n", strerror(err)); + } + + free(nbd_stats->shm.path); + nbd_stats->shm.path = NULL; + +end: + return err; +} diff --git a/drivers/tapdisk-metrics.h b/drivers/tapdisk-metrics.h index 1d34d6b5..80d01b5c 100644 --- a/drivers/tapdisk-metrics.h +++ b/drivers/tapdisk-metrics.h @@ -23,6 +23,7 @@ #define TAPDISK_METRICS_VDI_PATHF "%s/vdi-%hu" #define TAPDISK_METRICS_VBD_PATHF "%s/vbd-%d-%d" #define TAPDISK_METRICS_BLKTAP_PATHF "%s/blktap-%d" +#define TAPDISK_METRICS_NBD_PATHF "%s/nbd-%d" #include @@ -72,4 +73,7 @@ int td_metrics_blktap_start(int minor, stats_t *blktap_stats); /* Destroys the metrics file between tapdisk and blktap */ int td_metrics_blktap_stop(stats_t *blktap_stats); +int td_metrics_nbd_start(stats_t *nbd_server, int minor); + +int td_metrics_nbd_stop(stats_t *nbd_server); #endif /* TAPDISK_METRICS_H */ diff --git a/drivers/tapdisk-nbdserver.c b/drivers/tapdisk-nbdserver.c index 04bce7b7..2519ff2f 100644 --- a/drivers/tapdisk-nbdserver.c +++ b/drivers/tapdisk-nbdserver.c @@ -266,7 +266,10 @@ __tapdisk_nbdserver_request_cb(td_vbd_request_t *vreq, int error, void *token, int final) { td_nbdserver_client_t *client = token; + td_nbdserver_t *server = client->server; td_nbdserver_req_t *req = containerof(vreq, td_nbdserver_req_t, vreq); + unsigned long long interval; + struct timeval now; struct nbd_reply reply; int tosend = 0; int sent = 0; @@ -276,6 +279,9 @@ __tapdisk_nbdserver_request_cb(td_vbd_request_t *vreq, int error, reply.error = htonl(error); memcpy(reply.handle, req->id, sizeof(reply.handle)); + gettimeofday(&now, NULL); + interval = timeval_to_us(&now) - timeval_to_us(&vreq->ts); + if (client->client_fd < 0) { ERR("Finishing request for client that has disappeared"); goto finish; @@ -286,6 +292,9 @@ __tapdisk_nbdserver_request_cb(td_vbd_request_t *vreq, int error, switch(vreq->op) { case TD_OP_READ: tosend = len = vreq->iov->secs << SECTOR_SHIFT; + server->nbd_stats.stats->read_reqs_completed++; + server->nbd_stats.stats->read_sectors += vreq->iov->secs; + server->nbd_stats.stats->read_total_ticks += interval; while (tosend > 0) { sent = send(client->client_fd, vreq->iov->base + (len - tosend), @@ -299,6 +308,10 @@ __tapdisk_nbdserver_request_cb(td_vbd_request_t *vreq, int error, tosend -= sent; } break; + case TD_OP_WRITE: + server->nbd_stats.stats->write_reqs_completed++; + server->nbd_stats.stats->write_sectors += vreq->iov->secs; + server->nbd_stats.stats->write_total_ticks += interval; default: break; } @@ -440,10 +453,11 @@ tapdisk_nbdserver_clientcb(event_id_t id, char mode, void *data) switch(request.type) { case NBD_CMD_READ: vreq->op = TD_OP_READ; + server->nbd_stats.stats->read_reqs_submitted++; break; case NBD_CMD_WRITE: vreq->op = TD_OP_WRITE; - + server->nbd_stats.stats->write_reqs_submitted++; n = 0; while (n < len) { rc = recv(fd, vreq->iov->base + n, (len - n), 0); @@ -574,6 +588,11 @@ tapdisk_nbdserver_alloc(td_vbd_t *vbd, td_disk_info_t info) server->unix_listening_event_id = -1; INIT_LIST_HEAD(&server->clients); + if (td_metrics_nbd_start(&server->nbd_stats, server->vbd->tap->minor)) { + ERR("failed to create metrics file for nbdserver"); + goto fail; + } + if (snprintf(fdreceiver_path, TAPDISK_NBDSERVER_MAX_PATH_LEN, "%s%d.%d", TAPDISK_NBDSERVER_LISTEN_SOCK_PATH, getpid(), vbd->uuid) < 0) { @@ -891,6 +910,10 @@ tapdisk_nbdserver_free(td_nbdserver_t *server) if (err) ERR("failed to remove UNIX domain socket %s: %s\n", server->sockpath, strerror(errno)); + err = td_metrics_nbd_stop(&server->nbd_stats); + + if (err) + ERR("failed to delete NBD metrics: %s\n", strerror(errno)); free(server); } diff --git a/drivers/tapdisk-nbdserver.h b/drivers/tapdisk-nbdserver.h index c171becd..3889e2b5 100644 --- a/drivers/tapdisk-nbdserver.h +++ b/drivers/tapdisk-nbdserver.h @@ -66,6 +66,8 @@ struct td_nbdserver { char sockpath[TAPDISK_NBDSERVER_MAX_PATH_LEN]; struct list_head clients; + + stats_t nbd_stats; }; struct td_nbdserver_client { From 6da9913df94b5f14b271689b74cd7f5d6bb9a458 Mon Sep 17 00:00:00 2001 From: Kostas Ladopoulos Date: Fri, 17 Jul 2015 12:11:50 +0000 Subject: [PATCH 05/43] CA-148438: Expose mem_mode flag in statistics file Each tapdisk's memory mode state is now exposed in the respective '/dev/shm/vbd3-*/statistics' memory mapped file. This will be picked up by xcp-rrdd-iostat which will then aggregate it for the total number of tapdisks on the host and expose a single rrd with the total number of tapdisks in low memory mode. See also CP-12967. Signed-off-by: Kostas Ladopoulos Reviewed-by: Stefano Panella GitHub: closes xapi-project/blktap#165 --- drivers/tapdisk-server.c | 14 ++++++++++++++ drivers/tapdisk-vbd.h | 3 +++ drivers/td-blkif.c | 4 ++++ include/blktap3.h | 12 ++++++++++++ 4 files changed, 33 insertions(+) diff --git a/drivers/tapdisk-server.c b/drivers/tapdisk-server.c index 4abf5e78..418b8a7a 100644 --- a/drivers/tapdisk-server.c +++ b/drivers/tapdisk-server.c @@ -38,6 +38,7 @@ #include "tapdisk-driver.h" #include "tapdisk-interface.h" #include "tapdisk-log.h" +#include "td-blkif.h" #define DBG(_level, _f, _a...) tlog_write(_level, _f, ##_a) #define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a) @@ -471,11 +472,17 @@ static void lowmem_cleanup(void) static void lowmem_timeout(event_id_t id, char mode, void *data) { int ret; + td_vbd_t *vbd, *tmpv; + struct td_xenblkif *blkif, *tmpb; server.mem_state.mode = NORMAL_MEMORY_MODE; tapdisk_server_unregister_event(server.mem_state.mem_evid); server.mem_state.mem_evid = -1; + tapdisk_server_for_each_vbd(vbd, tmpv) + tapdisk_vbd_for_each_blkif(vbd, blkif, tmpb) + td_flag_clear(blkif->stats.xenvbd->flags, BT3_LOW_MEMORY_MODE); + if ((ret = tapdisk_server_reset_lowmem_mode()) < 0) { ERR(-ret, "Failed to re-init low memory handler: %s\n", strerror(-ret)); @@ -491,6 +498,9 @@ static void lowmem_event(event_id_t id, char mode, void *data) ssize_t n; int backoff; + td_vbd_t *vbd, *tmpv; + struct td_xenblkif *blkif, *tmpb; + n = read(server.mem_state.efd, &result, sizeof(result)); if (n < 0) { ERR(-errno, "Failed to read from eventfd: %s\n", @@ -533,6 +543,10 @@ static void lowmem_event(event_id_t id, char mode, void *data) } server.mem_state.mode = LOW_MEMORY_MODE; + tapdisk_server_for_each_vbd(vbd, tmpv) + tapdisk_vbd_for_each_blkif(vbd, blkif, tmpb) + td_flag_set(blkif->stats.xenvbd->flags, BT3_LOW_MEMORY_MODE); + /* Increment backoff up to a limit */ if (server.mem_state.backoff < MAX_BACKOFF) server.mem_state.backoff *= 2; diff --git a/drivers/tapdisk-vbd.h b/drivers/tapdisk-vbd.h index b6949987..590452a9 100644 --- a/drivers/tapdisk-vbd.h +++ b/drivers/tapdisk-vbd.h @@ -155,6 +155,9 @@ struct td_vbd_handle { #define tapdisk_vbd_for_each_image(vbd, image, tmp) \ tapdisk_for_each_image_safe(image, tmp, &vbd->images) +#define tapdisk_vbd_for_each_blkif(vbd, blkif, tmp) \ + list_for_each_entry_safe((blkif), (tmp), (&vbd->rings), entry) + static inline void tapdisk_vbd_move_request(td_vbd_request_t *vreq, struct list_head *dest) { diff --git a/drivers/td-blkif.c b/drivers/td-blkif.c index 4430d33c..92814e3f 100644 --- a/drivers/td-blkif.c +++ b/drivers/td-blkif.c @@ -152,6 +152,10 @@ tapdisk_xenblkif_stats_create(struct td_xenblkif *blkif) blkif->stats.xenvbd = blkif->xenvbd_stats.stats.mem; + if (tapdisk_server_mem_mode()) { + td_flag_set(blkif->stats.xenvbd->flags, BT3_LOW_MEMORY_MODE); + } + err = tapdisk_xenblkif_ring_stats_update(blkif); if (unlikely(err)) { EPRINTF("failed to generate shared I/O ring stats: %s\n", diff --git a/include/blktap3.h b/include/blktap3.h index 30b48d2d..1e442448 100644 --- a/include/blktap3.h +++ b/include/blktap3.h @@ -29,6 +29,11 @@ #define TAPBACK_CTL_SOCK_PATH "/var/run/tapback.sock" #define BLKTAP2_DEVNAME "tapdev" +/** + * Flag defines + */ +#define BT3_LOW_MEMORY_MODE 0x0000000000000001 + /** * blkback-style stats */ @@ -100,6 +105,13 @@ struct blkback_stats { * Absolute maximum BLKIF_OP_WRITE response time, in us. */ long long st_wr_max_usecs; + + /** + * Allocated space for 64 flags (due to 8-byte alignment) + * 1st flag is LSB, last flag is MSB. + * mem_mode: 0 - NORMAL_MEMORY_MODE; 1 - LOW_MEMORY_MODE; + */ + unsigned long long flags; } __attribute__ ((aligned (8))); #endif /* __BLKTAP_3_H__ */ From 2417d53cc0b5dd4f84b3747989de9745c42cb74b Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:23:03 +0100 Subject: [PATCH 06/43] Pre-release 3.1.1 Signed-off-by: Germano Percossi --- VERSION | 2 +- WHATS_NEW | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index a0cd9f0c..50e47c89 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.0 \ No newline at end of file +3.1.1 \ No newline at end of file diff --git a/WHATS_NEW b/WHATS_NEW index b6121918..a7e43063 100644 --- a/WHATS_NEW +++ b/WHATS_NEW @@ -1,3 +1,15 @@ +Version 3.1.1 - 24th Sep 2015 +================================ +CA-148438: Expose mem_mode flag in statistics file +CA-161450: Stats between tapdisk and NBD +CA-161451: Stats between tapdisk and blktap +CA-165117: Stats between tapdisk and blkfront +CA-161449: Stats between tapdisk and vdi traffic +CA-170614: Harden the nbdserver code in tapdisk3 +CP-12399: Prevent tapback from plugging non-blktap2 devices +CA-169052: Work arounds for NULL uuid in VHD header +CA-153279: don't complain if there's no tapdisk to signal + Version 3.1.0 - 23th Sep 2015 ================================ Coverity integration From 48d0d25b10e051ac7bf64f43b2132ecf972f075d Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 15:18:40 +0100 Subject: [PATCH 07/43] Initial import of Kuiper code Import from https://github.com/germanop/blktap: - branch kuiper-rebased - commit e6450790 Signed-off-by: Germano Percossi --- Makefile.am | 1 + configure.ac | 1 + control/tap-ctl.c | 16 +- drivers/Makefile.am | 1 + drivers/block-vhd.c | 144 +++++- drivers/tapdisk-control.c | 2 + drivers/tapdisk.h | 1 + include/Makefile.am | 2 + include/blktap.h | 1 + include/kpr_util.h | 10 + include/payload.h | 40 ++ include/tapdisk-message.h | 1 + mk/blktap.spec.in | 2 + thin/Makefile.am | 24 + thin/kpr_util.c | 166 ++++++ thin/payload.c | 26 + thin/sock_client2.c | 46 ++ thin/sock_serv.c | 1024 +++++++++++++++++++++++++++++++++++++ thin/thin_cli.c | 84 +++ 19 files changed, 1581 insertions(+), 11 deletions(-) create mode 100644 include/kpr_util.h create mode 100644 include/payload.h create mode 100644 thin/Makefile.am create mode 100644 thin/kpr_util.c create mode 100644 thin/payload.c create mode 100644 thin/sock_client2.c create mode 100644 thin/sock_serv.c create mode 100644 thin/thin_cli.c diff --git a/Makefile.am b/Makefile.am index 8cbf7401..2dec2b43 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,6 +6,7 @@ SUBDIRS = lvm SUBDIRS += $(MAYBE_part) SUBDIRS += vhd +SUBDIRS += thin SUBDIRS += control SUBDIRS += drivers SUBDIRS += include diff --git a/configure.ac b/configure.ac index 5f703541..69db1cc4 100644 --- a/configure.ac +++ b/configure.ac @@ -80,5 +80,6 @@ control/Makefile drivers/Makefile include/Makefile tapback/Makefile +thin/Makefile ]) AC_OUTPUT diff --git a/control/tap-ctl.c b/control/tap-ctl.c index 90f9b9c7..1541b749 100644 --- a/control/tap-ctl.c +++ b/control/tap-ctl.c @@ -252,7 +252,8 @@ tap_cli_create_usage(FILE *stream) "[-r turn on read caching into leaf node] [-2 " "use secondary image (in mirror mode if no -s)] [-s " "fail over to the secondary image on ENOSPC] " - "[-t request timeout in seconds] [-D no O_DIRECT]\n"); + "[-t request timeout in seconds] [-D no O_DIRECT] " + "[-T enable thin provisioning]\n"); } static int @@ -269,7 +270,7 @@ tap_cli_create(int argc, char **argv) timeout = 0; optind = 0; - while ((c = getopt(argc, argv, "a:RDd:e:r2:st:h")) != -1) { + while ((c = getopt(argc, argv, "a:RDd:e:r2:st:Th")) != -1) { switch (c) { case 'a': args = optarg; @@ -300,6 +301,9 @@ tap_cli_create(int argc, char **argv) case 't': timeout = atoi(optarg); break; + case 'T': + flags |= TAPDISK_MESSAGE_FLAG_THIN; + break; case '?': goto usage; case 'h': @@ -717,7 +721,8 @@ tap_cli_open_usage(FILE *stream) "[-r turn on read caching into leaf node] [-2 " "use secondary image (in mirror mode if no -s)] [-s " "fail over to the secondary image on ENOSPC] " - "[-t request timeout in seconds] [-D no O_DIRECT]\n"); + "[-t request timeout in seconds] [-D no O_DIRECT] " + "[-T enable thin provisioning]\n"); } static int @@ -735,7 +740,7 @@ tap_cli_open(int argc, char **argv) secondary = NULL; optind = 0; - while ((c = getopt(argc, argv, "a:RDm:p:e:r2:st:h")) != -1) { + while ((c = getopt(argc, argv, "a:RDm:p:e:r2:st:Th")) != -1) { switch (c) { case 'p': pid = atoi(optarg); @@ -769,6 +774,9 @@ tap_cli_open(int argc, char **argv) case 't': timeout = atoi(optarg); break; + case 'T': + flags |= TAPDISK_MESSAGE_FLAG_THIN; + break; case '?': goto usage; case 'h': diff --git a/drivers/Makefile.am b/drivers/Makefile.am index 4e46489d..f647f0de 100644 --- a/drivers/Makefile.am +++ b/drivers/Makefile.am @@ -100,6 +100,7 @@ libtapdisk_la_SOURCES += td-stats.c libtapdisk_la_SOURCES += td-stats.h libtapdisk_la_LIBADD = ../vhd/lib/libvhd.la +libtapdisk_la_LIBADD += ../thin/libtapdiskthin.la libtapdisk_la_LIBADD += -laio libtapdisk_la_LIBADD += -lxenctrl diff --git a/drivers/block-vhd.c b/drivers/block-vhd.c index 9cdc1a7e..61dfef82 100644 --- a/drivers/block-vhd.c +++ b/drivers/block-vhd.c @@ -62,6 +62,8 @@ #include "tapdisk-disktype.h" #include "tapdisk-storage.h" +#include "payload.h" + unsigned int SPB; #define DEBUGGING 2 @@ -123,6 +125,7 @@ unsigned int SPB; #define VHD_FLAG_OPEN_PREALLOCATE 32 #define VHD_FLAG_OPEN_NO_O_DIRECT 64 #define VHD_FLAG_OPEN_LOCAL_CACHE 128 +#define VHD_FLAG_OPEN_THIN 256 #define VHD_FLAG_BAT_LOCKED 1 #define VHD_FLAG_BAT_WRITE_STARTED 2 @@ -232,6 +235,9 @@ struct vhd_state { struct vhd_request *vreq_free[VHD_REQS_DATA]; struct vhd_request vreq_list[VHD_REQS_DATA]; + /* thin provisioning data */ + off64_t eof_bytes; + /* for redundant bitmap writes */ int padbm_size; char *padbm_buf; @@ -257,6 +263,7 @@ struct vhd_state { static void vhd_complete(void *, struct tiocb *, int); static void finish_data_transaction(struct vhd_state *, struct vhd_bitmap *); +static int vhd_thin_prepare(struct vhd_state *); static struct vhd_state *_vhd_master; static unsigned long _vhd_zsize; @@ -391,29 +398,128 @@ vhd_kill_footer(struct vhd_state *s) return 0; } + +static int +thin_prepare_req(const struct vhd_state * s, + struct payload * message, + uint64_t next_db, int query) +{ + /* + * sectors can be converted in bytes using info.sector_size + * that happens to be calculated using the same VHD define used + * by vhd_sectors_to_bytes. This is no more unreliable than using + * 2 different macros to do the job (see __vhd_open) + */ + uint64_t virt_bytes, phy_bytes, req_bytes; + const uint64_t warn1 = 52428800; /* 50 MBs warning */ + const uint64_t warn2 = 4194304; /* 4 MBs warning */ + uint64_t safe_threshold, critical_threshold; + + virt_bytes = vhd_sectors_to_bytes(s->driver->info.size); + phy_bytes = s->eof_bytes; + req_bytes = vhd_sectors_to_bytes(next_db); + + /* we want to exit as soon as possible if not needed */ + if (phy_bytes >= virt_bytes) + return 1; /* nothing to be done: space is enough */ + safe_threshold = (phy_bytes < warn1) ? 0 : (phy_bytes - warn1); + if (req_bytes < safe_threshold) + return 1; /* we do not bother yet */ + + init_payload(message); + + critical_threshold = (phy_bytes < warn2) ? 0 : (phy_bytes - warn2); + if (req_bytes > critical_threshold) { + /* this must be our last request so we have to loop for + a final answer or block: do something more.. */ + message->reply = PAYLOAD_QUERY; + } else { /* if we are here it means we hit the threshold */ + /* if we already issued a request just check its status */ + message->reply = query ? PAYLOAD_QUERY: PAYLOAD_REQUEST; + } + + strncpy(message->path, s->vhd.file, PAYLOAD_MAX_PATH_LENGTH); + message->curr = phy_bytes; + message->req = req_bytes; + message->vhd_size = virt_bytes; + + return 0; +} + + +static int +thin_parse_reply(const struct payload * buf, struct vhd_state *s, int * query) +{ + switch (buf->reply) { + case PAYLOAD_ACCEPTED: + *query = 1; /* next time just query */ + break; + case PAYLOAD_WAIT: + break; /* just keep asking */ + case PAYLOAD_REJECTED: + *query = 0; /* can make new requests if necessary */ + break; + default: /* It should be only DONE but to be safe..*/ + *query = 0; /* we cannot query any more, it has been served */ + vhd_thin_prepare(s); + } + return 0; +} + +static void +update_next_db(struct vhd_state *s, uint64_t next_db, int notify) +{ + int err; + struct payload message; + static int query = 0; /* distinguish requests from queries */ + + DPRINTF("update_next_db"); + + s->next_db = next_db; + + if (!(s->flags & VHD_FLAG_OPEN_THIN)) + return; + + if (notify && !thin_prepare_req(s, &message, next_db, query)) { + /* socket message block */ + err = thin_sock_comm(&message); + if (err) + DBG(TLOG_WARN, "socket returned: %d\n", err); + err = thin_parse_reply(&message, s, &query); + if (err) + DBG(TLOG_WARN, "thin_parse_reply returned: %d\n", err); + } +} + + + static inline int find_next_free_block(struct vhd_state *s) { int err; off64_t eom; uint32_t i, entry; + uint64_t next_db; err = vhd_end_of_headers(&s->vhd, &eom); if (err) return err; - s->next_db = secs_round_up(eom); + update_next_db(s, secs_round_up(eom), 0); s->first_db = s->next_db; if ((s->first_db + s->bm_secs) % s->spp) s->first_db += (s->spp - ((s->first_db + s->bm_secs) % s->spp)); for (i = 0; i < s->bat.bat.entries; i++) { entry = bat_entry(s, i); - if (entry != DD_BLK_UNUSED && entry >= s->next_db) - s->next_db = (uint64_t)entry + (uint64_t)s->spb + if (entry != DD_BLK_UNUSED && entry >= s->next_db) { + next_db = (uint64_t)entry + (uint64_t)s->spb + (uint64_t)s->bm_secs; - if (s->next_db > UINT_MAX) - break; + update_next_db(s, next_db, 0); + } + + if (s->next_db > UINT_MAX) + break; } return 0; @@ -642,6 +748,15 @@ vhd_log_open(struct vhd_state *s) allocated, full, s->next_db); } +static int +vhd_thin_prepare(struct vhd_state *s) +{ + if ((s->eof_bytes = lseek64(s->vhd.fd, 0, SEEK_END)) == -1) + return -errno; + + return 0; +} + static int __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags) { @@ -696,6 +811,14 @@ __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags) vhd_log_open(s); + if(!test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) { + update_next_db(s, s->next_db, 0); + } + + if(test_vhd_flag(flags, VHD_FLAG_OPEN_THIN)) { + vhd_thin_prepare(s); + } + SPB = s->spb; s->vreq_free_count = VHD_REQS_DATA; @@ -749,10 +872,17 @@ _vhd_open(td_driver_t *driver, const char *name, td_flag_t flags) VHD_FLAG_OPEN_NO_CACHE); if (flags & TD_OPEN_LOCAL_CACHE) vhd_flags |= VHD_FLAG_OPEN_LOCAL_CACHE; + if (flags & TD_OPEN_THIN) + if(!(flags & TD_OPEN_RDONLY)) + vhd_flags |= VHD_FLAG_OPEN_THIN; /* pre-allocate for all but NFS and LVM storage */ driver->storage = tapdisk_storage_type(name); + /* Disable thin provisioning if not LVM */ + if (driver->storage != TAPDISK_STORAGE_TYPE_LVM) + clear_vhd_flag(vhd_flags, VHD_FLAG_OPEN_THIN); + if (driver->storage != TAPDISK_STORAGE_TYPE_NFS && driver->storage != TAPDISK_STORAGE_TYPE_LVM) vhd_flags |= VHD_FLAG_OPEN_PREALLOCATE; @@ -1537,7 +1667,7 @@ allocate_block(struct vhd_state *s, uint32_t blk) if (next_db > UINT_MAX) return -ENOSPC; - s->next_db = next_db; + update_next_db(s,next_db, 0); s->bat.pbw_blk = blk; s->bat.pbw_offset = s->next_db; @@ -2146,7 +2276,7 @@ finish_bat_write(struct vhd_request *req) if (!req->error) { bat_entry(s, s->bat.pbw_blk) = s->bat.pbw_offset; - s->next_db = s->bat.pbw_offset + s->spb + s->bm_secs; + update_next_db(s, s->bat.pbw_offset + s->spb + s->bm_secs, 1); } else tx->error = req->error; diff --git a/drivers/tapdisk-control.c b/drivers/tapdisk-control.c index 76970b69..2375ec2a 100644 --- a/drivers/tapdisk-control.c +++ b/drivers/tapdisk-control.c @@ -731,6 +731,8 @@ tapdisk_control_open_image(struct tapdisk_ctl_conn *conn, flags |= TD_OPEN_REUSE_PARENT; if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_STANDBY) flags |= TD_OPEN_STANDBY; + if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_THIN) + flags |= TD_OPEN_THIN; if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_SECONDARY) { char *name = strdup(request->u.params.secondary); if (!name) { diff --git a/drivers/tapdisk.h b/drivers/tapdisk.h index bddedbfc..4a263fd5 100644 --- a/drivers/tapdisk.h +++ b/drivers/tapdisk.h @@ -89,6 +89,7 @@ extern unsigned int PAGE_SHIFT; #define TD_OPEN_STANDBY 0x00800 #define TD_IGNORE_ENOSPC 0x01000 #define TD_OPEN_NO_O_DIRECT 0x02000 +#define TD_OPEN_THIN 0x04000 #define TD_CREATE_SPARSE 0x00001 #define TD_CREATE_MULTITYPE 0x00002 diff --git a/include/Makefile.am b/include/Makefile.am index 90ba1b45..3314e0bc 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -18,6 +18,8 @@ blktap_HEADERS += tapdisk-message.h blktap_HEADERS += tap-ctl.h blktap_HEADERS += debug.h blktap_HEADERS += util.h +blktap_HEADERS += payload.h +blktap_HEADERS += kpr_util.h noinst_HEADERS = blktap.h noinst_HEADERS += compiler.h diff --git a/include/blktap.h b/include/blktap.h index 9c3437b8..e75406d4 100644 --- a/include/blktap.h +++ b/include/blktap.h @@ -27,5 +27,6 @@ #define BLKTAP2_RING_DEVICE BLKTAP2_DIRECTORY"/blktap" #define BLKTAP2_IO_DEVICE BLKTAP2_DIRECTORY"/tapdev" #define BLKTAP2_ENOSPC_SIGNAL_FILE "/var/run/tapdisk-enospc" +#define THIN_CONTROL_SOCKET "/var/run/thin-prov-ctl" #endif /* _TD_BLKTAP_H_ */ diff --git a/include/kpr_util.h b/include/kpr_util.h new file mode 100644 index 00000000..64ea3599 --- /dev/null +++ b/include/kpr_util.h @@ -0,0 +1,10 @@ +#ifndef _KPR_UTIL_H +#define _KPR_UTIL_H + +#include "payload.h" + +int kpr_split_lvm_path(const char *, char *, char *); +int kpr_tcp_create(uint16_t port); +int kpr_tcp_conn_tx_rx(const char *ip, uint16_t port, struct payload *); + +#endif /* KPR_UTIL_H */ diff --git a/include/payload.h b/include/payload.h new file mode 100644 index 00000000..3a1c9a24 --- /dev/null +++ b/include/payload.h @@ -0,0 +1,40 @@ +#ifndef _PAYLOAD_H +#define _PAYLOAD_H 1 +#include +#include +#include "tapdisk-message.h" + +#define PAYLOAD_MAX_PATH_LENGTH TAPDISK_MESSAGE_MAX_PATH_LENGTH +#define IP_MAX_LEN 32 + +typedef enum { + /* server */ + PAYLOAD_ACCEPTED = 0, + PAYLOAD_REJECTED, + PAYLOAD_DONE, + PAYLOAD_WAIT, + /* client */ + PAYLOAD_QUERY, + PAYLOAD_REQUEST, + PAYLOAD_CLI, + /* generic */ + PAYLOAD_UNDEF +} payload_message_t; + +struct payload { + pid_t id; + char path[PAYLOAD_MAX_PATH_LENGTH]; + uint64_t curr; + uint64_t req; + off64_t vhd_size; + payload_message_t reply; + char ipaddr[IP_MAX_LEN]; /* used internally */ +}; + +int init_payload(struct payload *); +void print_payload(struct payload *); + +/* Temporary location */ +int thin_sock_comm(struct payload *); + +#endif /* payload.h */ diff --git a/include/tapdisk-message.h b/include/tapdisk-message.h index f4ea0e4a..43df4f6e 100644 --- a/include/tapdisk-message.h +++ b/include/tapdisk-message.h @@ -42,6 +42,7 @@ #define TAPDISK_MESSAGE_FLAG_SECONDARY 0x080 #define TAPDISK_MESSAGE_FLAG_STANDBY 0x100 #define TAPDISK_MESSAGE_FLAG_NO_O_DIRECT 0x200 +#define TAPDISK_MESSAGE_FLAG_THIN 0x400 typedef struct tapdisk_message tapdisk_message_t; typedef uint32_t tapdisk_message_flag_t; diff --git a/mk/blktap.spec.in b/mk/blktap.spec.in index dbe4faac..92846d35 100644 --- a/mk/blktap.spec.in +++ b/mk/blktap.spec.in @@ -61,6 +61,8 @@ mkdir -p %{buildroot}%{_localstatedir}/log/blktap %{_sbindir}/td-rated %{_sbindir}/part-util %{_sbindir}/vhdpartx +%{_sbindir}/thin-tapdisk +%{_sbindir}/thin-cli %{_libexecdir}/tapdisk %{_sysconfdir}/udev/rules.d/blktap.rules %{_sysconfdir}/rc.d/init.d/tapback diff --git a/thin/Makefile.am b/thin/Makefile.am new file mode 100644 index 00000000..9d2bf804 --- /dev/null +++ b/thin/Makefile.am @@ -0,0 +1,24 @@ + +AM_CFLAGS = -Wall +AM_CFLAGS += -Werror +AM_CFLAGS += -pthread + +AM_CPPFLAGS = -D_GNU_SOURCE +AM_CPPFLAGS += -I$(top_srcdir)/include + +sbin_PROGRAMS = thin-tapdisk +thin_tapdisk_SOURCES = sock_serv.c +thin_tapdisk_LDADD = libtapdiskthin.la + +sbin_PROGRAMS += thin-cli +thin_cli_SOURCES = thin_cli.c +thin_cli_LDADD = libtapdiskthin.la + +lib_LTLIBRARIES = libtapdiskthin.la + +libtapdiskthin_la_SOURCES = payload.c +libtapdiskthin_la_SOURCES += sock_client2.c +libtapdiskthin_la_SOURCES += kpr_util.c + +libtapdiskthin_la_LDFLAGS = -version-info 1:1:1 + diff --git a/thin/kpr_util.c b/thin/kpr_util.c new file mode 100644 index 00000000..3e0505e2 --- /dev/null +++ b/thin/kpr_util.c @@ -0,0 +1,166 @@ +#include +#include +#include +#include +#include +#include +#include +#include "payload.h" + +#define PFX_SIZE 5 +#define TCP_BACKLOG 10 + + +/** + * This function parse the given path and populate the other char arrays + * with volume group and logical volume names (if any). + * The accepted patter is the following + * /dev// where and can contain any + * char other than '/' and are interpreted, respectively as VG name + * and LV name. Any other pattern is wrong (/dev/mapper/ is accepted + * but it is not what you want..). + * + * @param[in] path is a NULL terminated char array containing the path to parse + * @param[out] vg if successful contains VG name. Caller must ensure allocated + * space is bigger than #path + * @param[out] lv if successful contains LV name. Caller must ensure allocated + * space is bigger than #path + * @return 0 if OK and 1 otherwise. On exit all the array are NULL terminated + */ +int +kpr_split_lvm_path(const char * path, char * vg, char * lv) +{ + static const char dev_pfx[PFX_SIZE] = "/dev/"; /* no \0 */ + bool flag = false; + + if( strncmp(dev_pfx, path, PFX_SIZE) ) { + fprintf(stderr, "Not a device pattern\n"); + return 1; + } + path += PFX_SIZE; + + /* Extract VG */ + for( ; *path; ++path, ++vg ) { + if( *path == '/' ) { + *vg = '\0'; + break; + } + *vg = *path; + flag = true; + } + + /* Check why and how the loop ended */ + if ( *path == '\0' || !flag ) { + /* terminate strings and error */ + *vg = '\0'; + *lv = '\0'; + return 1; + } + + /* Extract LV */ + ++path; /* skip slash */ + for( flag = false; *path; ++path, ++lv ) { + if( *path == '/' ) { + fprintf(stderr, "too many slashes\n"); + *lv = '\0'; + return 1; + } + *lv = *path; + flag = true; + } + *lv = '\0'; /* terminate string */ + + return flag ? 0 : 1; +} + + +/** + * Create, bind and listen to specified socket + * + * @param[in] port number + * @return file descriptor of socket or -1 + */ +int +kpr_tcp_create(uint16_t port) +{ + int sfd; + struct sockaddr_in s_addr; + + /* create tcp socket */ + sfd = socket(AF_INET, SOCK_STREAM, 0); + if (sfd == -1) { + fprintf(stderr, "tcp socket error"); + return -1; + } + + /* Build socket address, bind and listen */ + memset(&s_addr, 0, sizeof(s_addr)); + s_addr.sin_family = AF_INET; + s_addr.sin_port = htons(port); + s_addr.sin_addr.s_addr = INADDR_ANY; + + if (bind(sfd, (struct sockaddr *) &s_addr, sizeof(s_addr)) == -1) { + fprintf(stderr, "bind error"); + goto fail; + } + + if (listen(sfd, TCP_BACKLOG) == -1) { + fprintf(stderr, "listen error"); + goto fail; + } + + return sfd; +fail: + close(sfd); + return -1; +} + + +int +kpr_tcp_conn_tx_rx(const char *ip, uint16_t port, struct payload * message) +{ + int sfd, ret, len; + struct sockaddr_in s_addr; + struct in_addr ipaddr; + + if ( !inet_aton(ip, &ipaddr) ) { + ret = 1; + goto end; + } + + len = sizeof(struct payload); + + /* create tcp socket */ + sfd = socket(AF_INET, SOCK_STREAM, 0); + if (sfd == -1) { + ret = 1; + goto end; + } + + memset(&s_addr, 0, sizeof(s_addr)); + s_addr.sin_family = AF_INET; + s_addr.sin_port = htons(port); + s_addr.sin_addr = ipaddr; + + if ( connect(sfd, (struct sockaddr *) &s_addr, sizeof(s_addr)) ) { + ret = 1; + goto end; + } + + /* TBD: very basic write, need a while loop */ + if (write(sfd, message, len) != len) { + ret = 1; + goto end; + } + + /* TBD: very basic read */ + if (read(sfd, message, len) != len) { + ret = 2; + goto end; + } + +end: + close(sfd); + return 0; /* Closes our socket; server sees EOF */ + +} diff --git a/thin/payload.c b/thin/payload.c new file mode 100644 index 00000000..c1ba10b4 --- /dev/null +++ b/thin/payload.c @@ -0,0 +1,26 @@ +#include +#include "payload.h" + +int init_payload(struct payload *pload) +{ + pload->id = -1; + pload->curr = 0; + pload->req = 0; + pload->vhd_size = 0; + pload->reply = PAYLOAD_UNDEF; + pload->ipaddr[0] = '\0'; + return 0; +} + +void print_payload(struct payload *pload) +{ + printf("payload data:\n"); + printf("id = %d\n", pload->id); + printf("path = %s\n", pload->path); + printf("current size = %"PRIu64"\n", pload->curr); + printf("requested size = %"PRIu64"\n", pload->req); + printf("virtual size = %"PRIu64"\n", pload->vhd_size); + printf("request type = %d\n", pload->reply); + printf("dest ipaddr = %s\n", pload->ipaddr); + return; +} diff --git a/thin/sock_client2.c b/thin/sock_client2.c new file mode 100644 index 00000000..75ec3529 --- /dev/null +++ b/thin/sock_client2.c @@ -0,0 +1,46 @@ +#include +#include +#include +#include +#include +#include "blktap.h" +#include "payload.h" + +int +thin_sock_comm(struct payload *message) +{ + /* maybe we can borrow tap_ctl_connect but it is risky because + of strcpy */ + + struct sockaddr_un addr; + int sfd, len; + pid_t pid; + + len = sizeof(struct payload); + + pid = getpid(); + message->id = pid; + + sfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (sfd == -1) + return -errno; /* to be handled */ + + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, THIN_CONTROL_SOCKET, sizeof(addr.sun_path) - 1); + + if (connect(sfd, (struct sockaddr *) &addr, sizeof(addr)) == -1) + return -errno; + + /* TBD: very basic write, need a while loop */ + if (write(sfd, message, len) != len) + return -errno; + + /* TBD: very basic read */ + if (read(sfd, message, len) != len) + return -errno; + + close(sfd); + return 0; /* Closes our socket; server sees EOF */ +} + diff --git a/thin/sock_serv.c b/thin/sock_serv.c new file mode 100644 index 00000000..6f321a49 --- /dev/null +++ b/thin/sock_serv.c @@ -0,0 +1,1024 @@ +#include +#include +#include +#include /* TCP accept client info */ +#include /* TCP accept client info */ +#include +#include +#include +#include +#include +#include +#include /* non POSIX */ +#include /* non POSIX */ +#include +#include +#include "blktap.h" +#include "payload.h" +#include "kpr_util.h" + +#define BACKLOG 5 +#define PORT_NO 7777 + +static inline int process_payload(int, struct payload *); +static inline int req_reply(int, struct payload *); +static int handle_request(struct payload * buf); +static int handle_query(struct payload * buf); +static void * worker_thread(void *); +static void * worker_thread_net(void *); +static int slave_worker_hook(struct payload *); +static int reject_hook(struct payload *); +static int dispatch_hook(struct payload *); +static int slave_net_hook(struct payload *); +static int master_net_hook(struct payload *); +static int increase_size(off64_t size, const char * path); +static int refresh_lvm(const char * path); +static void parse_cmdline(int, char **); +static int do_daemon(void); +static int handle_cli(struct payload *); +static void split_command(char *, char **); +static int add_vg(char *vg); +static int del_vg(char *vg); +static int slave_mode(char *ip); +static int master_mode(void); + +bool master; /* no need to be mutex-ed: main writes, workers read */ +char master_ip[IP_MAX_LEN]; /* + Used only in slave mode, ensure it is + NULL terminated. This variable is used + only in handle_request but, as long as + this function is used in the network thread, + it must be mutex protected + */ +pthread_mutex_t ip_mtx = PTHREAD_MUTEX_INITIALIZER; /* see above */ + + +/* queue structures */ +SIMPLEQ_HEAD(sqhead, sq_entry); +struct kpr_queue { + struct sqhead qhead; + pthread_mutex_t mtx; + pthread_cond_t cnd; + int efd; /* some queues are notified by eventfd */ +} *net_queue, *out_queue; +struct sq_entry { + struct payload data; + SIMPLEQ_ENTRY(sq_entry) entries; +}; + +static struct sq_entry * find_and_remove(struct sqhead *, pid_t); +static struct kpr_queue * get_out_queue(struct payload *); + +/* thread structures */ +struct kpr_thread_info { + pthread_t thr_id; + struct kpr_queue *r_queue; + int (*hook)(struct payload *); + int (*net_hook)(struct payload *); + bool net; +}; + +/* list structures */ +LIST_HEAD(vg_list_head, vg_entry); +struct kpr_vg_list { + struct vg_list_head head; + pthread_mutex_t mtx; +} vg_pool; +struct vg_entry { + char name[PAYLOAD_MAX_PATH_LENGTH]; + struct kpr_thread_info thr; + struct kpr_queue *r_queue; + LIST_ENTRY(vg_entry) entries; +}; + +static struct vg_entry * vg_pool_find(char *, bool); +static struct vg_entry * vg_pool_find_and_remove(char *); + + +int daemonize; + + +static struct kpr_queue * +alloc_init_queue(void) +{ + struct kpr_queue *sqp; + + sqp = malloc(sizeof(*sqp)); + if (sqp) { + SIMPLEQ_INIT(&sqp->qhead); + if (pthread_mutex_init(&sqp->mtx, NULL) != 0) + goto out; + if (pthread_cond_init(&sqp->cnd, NULL) != 0) + goto out; + if ( (sqp->efd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK| + EFD_SEMAPHORE)) == -1 ) + goto out; + } + return sqp; + + out: + free(sqp); + return NULL; +} + +int +main(int argc, char *argv[]) { + + /* default is master mode */ + master = true; + + /* Init pool */ + LIST_INIT(&vg_pool.head); + if (pthread_mutex_init(&vg_pool.mtx, NULL) != 0) + return 1; + + /* Init default queues */ + net_queue = alloc_init_queue(); + if(!net_queue) + return 1; /*no free: return from main */ + out_queue = alloc_init_queue(); + if(!out_queue) + return 1; /*no free: return from main */ + + daemonize = 0; + + /* accept command line opts */ + parse_cmdline(argc, argv); + + /* daemonize if required */ + if (do_daemon() == -1) + return 1; /* can do better */ + + /* prepare and spawn default thread: use vg_entry even if not VG */ + struct vg_entry net_thr; + net_thr.thr.r_queue = net_queue; + net_thr.thr.hook = dispatch_hook; + if (master) + net_thr.thr.net_hook = master_net_hook; + else + net_thr.thr.net_hook = slave_net_hook; + net_thr.thr.net = true; + if (pthread_create(&net_thr.thr.thr_id, NULL, worker_thread_net, + &net_thr.thr)) { + printf("failed worker thread creation\n"); + return 1; + } + + + struct sockaddr_un addr; + int sfd, cfd; + ssize_t numRead; + struct payload buf; + + sfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (sfd == -1) + return -errno; + + memset(&addr, 0, sizeof(struct sockaddr_un)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, THIN_CONTROL_SOCKET, sizeof(addr.sun_path) - 1); + + if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1) + return -errno; + + if (listen(sfd, BACKLOG) == -1) + return -errno; + + for (;;) { + + cfd = accept4(sfd, NULL, NULL, SOCK_CLOEXEC); + if (cfd == -1) + return -errno; + + while ((numRead = read(cfd, &buf, sizeof(buf))) > 0) { + /* temporary: ensure ipaddr is NULL if coming from + socket. Remove if network thread is sending packets + through the socket + */ + buf.ipaddr[0] = '\0'; + process_payload(cfd, &buf); + } + + if (numRead == -1) + return -errno; + + if (close(cfd) == -1) + return -errno; + } +} + +static inline int +process_payload(int fd, struct payload * buf) +{ + int err; + + print_payload(buf); + err = req_reply(fd, buf); + print_payload(buf); + printf("EOM\n\n"); + + return err; +} + +static int +req_reply(int fd, struct payload * buf) +{ + switch (buf->reply) { + case PAYLOAD_REQUEST: + handle_request(buf); + break; + case PAYLOAD_QUERY: + handle_query(buf); + break; + case PAYLOAD_CLI: + handle_cli(buf); + break; + default: + buf->reply = PAYLOAD_UNDEF; + print_payload(buf); + } + + /* TBD: very basic write, need a while loop */ + if (write(fd, buf, sizeof(*buf)) != sizeof(*buf)) + return -errno; + + return 0; +} + +static int +handle_request(struct payload * buf) +{ + struct sq_entry *req; + struct vg_entry *vgentry; + struct kpr_queue *in_queue; + char vgname[PAYLOAD_MAX_PATH_LENGTH]; + char lvname[PAYLOAD_MAX_PATH_LENGTH]; + + if( kpr_split_lvm_path(buf->path, vgname, lvname) ) + return 1; + + /* search we have a queue for it */ + vgentry = vg_pool_find(vgname, true); + if (vgentry) /* we do */ + in_queue = vgentry->r_queue; /* no lock (sure?) */ + else { + /* In master mode this means rejected */ + if (master) { + /* hack to reuse it in net_thread */ + if (buf->ipaddr[0] != '\0') + return 1; + in_queue = out_queue; + buf->reply = PAYLOAD_REJECTED; + } + else { + /* write master address */ + pthread_mutex_lock(&ip_mtx); + strncpy(buf->ipaddr, master_ip, IP_MAX_LEN); + pthread_mutex_unlock(&ip_mtx); + in_queue = net_queue; + } + } + + req = malloc(sizeof(struct sq_entry)); + if(!req) + return 1; + + req->data = *buf; + buf->reply = PAYLOAD_ACCEPTED; + pthread_mutex_lock(&in_queue->mtx); + + SIMPLEQ_INSERT_TAIL(&in_queue->qhead, req, entries); + + /* Temporary hack for the new event mechanism used by default queue */ + if ( in_queue == net_queue ) + eventfd_write(in_queue->efd, 1); + else if ( in_queue == out_queue ) + /* no need to signal for out_queue */ + ; + else + pthread_cond_signal(&in_queue->cnd); + pthread_mutex_unlock(&in_queue->mtx); + + return 0; +} + +static int +handle_query(struct payload * buf) +{ + struct sq_entry * req; + + /* Check we have something ready */ + pthread_mutex_lock(&out_queue->mtx); + if (SIMPLEQ_EMPTY(&out_queue->qhead)) { + pthread_mutex_unlock(&out_queue->mtx); + buf->reply = PAYLOAD_WAIT; + return 0; + } + + /* check if we have a served request for this query */ + req = find_and_remove(&out_queue->qhead, buf->id); + if (req) { + pthread_mutex_unlock(&out_queue->mtx); + buf->reply = req->data.reply; + free(req); + } else { /* wait */ + pthread_mutex_unlock(&out_queue->mtx); + buf->reply = PAYLOAD_WAIT; + } + + return 0; +} + + +static int +handle_cli(struct payload * buf) +{ + char command[PAYLOAD_MAX_PATH_LENGTH]; + char *cmd[2]; + int ret; + + /* we reuse the path field for CLI */ + strcpy(command, buf->path); + + split_command(command, cmd); + if(!cmd[0]) + return 1; + + if (!strcmp("add", cmd[0])) { + if(!cmd[1]) + return 1; + ret = add_vg(cmd[1]); + + } + else if (!strcmp("del", cmd[0])) { + if(!cmd[1]) + return 1; + ret = del_vg(cmd[1]); + } + else if (!strcmp("slave", cmd[0])) { + if(!cmd[1]) + return 1; + ret = slave_mode(cmd[1]); + } + else if (!strcmp("master", cmd[0])) { + ret = master_mode(); + } + else + ret = 1; + + if (ret) + strcpy(buf->path, "fail"); + else + strcpy(buf->path, "ok"); + + return 0; +} + + +/* This function must be invoked with the corresponding mutex locked */ +static struct sq_entry * +find_and_remove(struct sqhead * head, pid_t id) +{ + struct sq_entry * entry; + SIMPLEQ_FOREACH(entry, head, entries) { + if (entry->data.id == id) { + SIMPLEQ_REMOVE(head, entry, sq_entry, entries); + return entry; + } + } + /* No matches */ + return NULL; +} + + +static void * +worker_thread(void * ap) +{ + struct sq_entry * req; + struct payload * data; + struct kpr_thread_info *thr_arg; + struct kpr_queue *r_queue, *o_queue; + int (*hook)(struct payload *); + + /* We must guarantee this structure is properly polulated or + check it and fail in case it is not. In the latter case + we need to check if the thread has returned. + */ + thr_arg = (struct kpr_thread_info *) ap; + r_queue = thr_arg->r_queue; + hook = thr_arg->hook; + + for(;;) { + pthread_mutex_lock(&r_queue->mtx); + + while (SIMPLEQ_EMPTY(&r_queue->qhead)) { + pthread_cond_wait(&r_queue->cnd, &r_queue->mtx); + } + + /* pop from requests queue and unlock */ + req = SIMPLEQ_FIRST(&r_queue->qhead); + SIMPLEQ_REMOVE_HEAD(&r_queue->qhead, entries); + pthread_mutex_unlock(&r_queue->mtx); + + data = &req->data; + /* For the time being we use PAYLOAD_UNDEF as a way + to notify threads to exit + */ + if (data->reply == PAYLOAD_UNDEF) { + free(req); + fprintf(stderr, "Thread cancellation received\n"); + return NULL; + } + + /* Execute worker-thread specific hook */ + hook(data); + + /* push to out queue */ + o_queue = get_out_queue(data); + pthread_mutex_lock(&o_queue->mtx); + SIMPLEQ_INSERT_TAIL(&o_queue->qhead, req, entries); + pthread_mutex_unlock(&o_queue->mtx); + } + return NULL; +} + + +static void * +worker_thread_net(void * ap) +{ + struct sq_entry * req; + struct payload * data; + struct kpr_thread_info *thr_arg; + struct kpr_queue *r_queue; + + struct pollfd fds[2]; + int maxfds = 2; + int i; + int (*hook)(struct payload *); + int (*net_hook)(struct payload *); + + int sfd, cfd; + struct payload buf; + static int len = sizeof(buf); + struct sockaddr_in c_addr; + socklen_t c_len; + char *c; + + uint64_t ebuf; + int ret; + + /* We must guarantee this structure is properly polulated or + check it and fail in case it is not. In the latter case + we need to check if the thread has returned. + */ + thr_arg = (struct kpr_thread_info *) ap; + r_queue = thr_arg->r_queue; + hook = thr_arg->hook; + net_hook = thr_arg->net_hook; + + /* + * Network specific block + */ + if (thr_arg->net) { + /* create tcp socket and listen */ + sfd = kpr_tcp_create(PORT_NO); + if (sfd < 0) + return NULL; + } else { + sfd = -1; + maxfds = 1; /* no need to loop more */ + } + + /* register events for poll */ + fds[0].fd = r_queue->efd; + fds[0].events = POLLIN; + fds[1].fd = sfd; /* if net=false it is ok to be negative */ + fds[1].events = POLLIN; + + for(;;) { + ret = poll(fds, maxfds, -1); /* wait for ever */ + if ( ret < 1 ) { /* 0 not expected */ + fprintf(stderr, "poll returned %d\n", ret); + continue; + } + + for( i = 0; i < maxfds; ++i) { + if ( !fds[i].revents ) + continue; + switch(i) { + case 0: /* queue request */ + pthread_mutex_lock(&r_queue->mtx); + /* others using this queue..? */ + if (SIMPLEQ_EMPTY(&r_queue->qhead)) { + pthread_mutex_unlock(&r_queue->mtx); + continue; + } + + /* pop from requests queue and unlock */ + req = SIMPLEQ_FIRST(&r_queue->qhead); + SIMPLEQ_REMOVE_HEAD(&r_queue->qhead, entries); + /* notify back we read one el of the queue */ + eventfd_read(r_queue->efd, &ebuf); + pthread_mutex_unlock(&r_queue->mtx); + + data = &req->data; + /* For the time being we use PAYLOAD_UNDEF as a way + to notify threads to exit + */ + if (data->reply == PAYLOAD_UNDEF) { + fprintf(stderr, "Thread cancellation received\n"); + free(req); + if(sfd >= 0) + close(sfd); + return NULL; + } + + /* Execute worker-thread specific hook */ + if ( hook(data) ) { + free(req); + continue; + } + + /* push to served queue */ + pthread_mutex_lock(&out_queue->mtx); + SIMPLEQ_INSERT_TAIL(&out_queue->qhead, req, entries); + pthread_mutex_unlock(&out_queue->mtx); + break; + case 1: /* TCP socket */ + c_len = sizeof(c_addr); + cfd = accept4(sfd, &c_addr, &c_len, SOCK_CLOEXEC); + if (cfd == -1) { + fprintf(stderr, "Accept error\n"); + continue; + } + if ( read(cfd, &buf, len) != len ) { + fprintf(stderr, "TCP read error\n"); + continue; + } + + req = malloc(sizeof(struct sq_entry)); + if(!req) { + fprintf(stderr, "Cannot allocate" + "for TCP packet\n"); + continue; + } + + req->data = buf; + buf.reply = PAYLOAD_ACCEPTED; + + /* Always acknowledge we got it */ + /* TBD: very basic write, need a while loop */ + if (write(cfd, &buf, len) != len) + fprintf(stderr, "TCP not " + "acknowledged\n"); + + /* store sender ipaddr */ + c = inet_ntoa(c_addr.sin_addr); + strncpy(req->data.ipaddr, c, IP_MAX_LEN); + + /* process payload */ + if ( net_hook(&req->data) ) { + free(req); + continue; + } + + /* push to served queue */ + pthread_mutex_lock(&out_queue->mtx); + SIMPLEQ_INSERT_TAIL(&out_queue->qhead, req, entries); + pthread_mutex_unlock(&out_queue->mtx); + break; + default: /* it should not happen */ + fprintf(stderr, "what?!?!\n"); + } + } + } + return NULL; +} + + +static int +slave_worker_hook(struct payload *data) +{ + int ret; + + /* Fulfil request */ + ret = increase_size(data->curr, data->path); + if (ret == 0 || ret == 3) /* 3 means big enough */ + data->reply = PAYLOAD_DONE; + else + data->reply = PAYLOAD_REJECTED; + printf("worker_thread: completed %u %s (%d)\n\n", + (unsigned)data->id, data->path, ret); + + return 0; +} + + +static int +reject_hook(struct payload *data) +{ + /* Reject request */ + data->reply = PAYLOAD_REJECTED; + printf("default_thread: No registered VG!\n\n"); + + return 0; +} + +/** + * Send packet to specified destination. If send is successful and + * packet is accepted it returns 1 because there is nothing more + * to be done. Reply will come on the TCP socket. + * In master mode packets not sent are discarded, while in slave + * mode they are queued as rejected. + * + * @param[in,out] data to be processed + * @return 0 if packet is not sent and marked rejected. + * 1 if sent or to be discarded anyway + */ +static int +dispatch_hook(struct payload *data) +{ + /* Send */ + if ( !kpr_tcp_conn_tx_rx(data->ipaddr, PORT_NO, data ) ) { + fprintf(stderr, "Dispatch failed\n"); + goto fail; + } + /* Check reply */ + if ( data->reply != PAYLOAD_ACCEPTED ) { + fprintf(stderr, "Payload rejected\n"); + goto fail; + } + + return 1; +fail: + return master ? 1 : reject_hook(data); +} + + +/** + * Packet can be either DONE or REJECTED, in any other case packet + * is discarded. + * + * @param[in,out] data to be processed + * @return 0 if packet can be pushed in the "served" queue, 1 otherwise + */ +static int +slave_net_hook(struct payload *data) +{ + switch(data->reply) { + case PAYLOAD_REJECTED: + break; + case PAYLOAD_DONE: + refresh_lvm(data->path); + break; + default: + fprintf(stderr, "Spurious payload\n"); + return 1; + } + + return 0; +} + + +/** + * Packet can be only a REQUEST, in any other case packet + * is discarded. If it is a request, we always return 1 because + * it is either pushed in the proper queue here or discarded. + * + * @param[in,out] data to be processed + * @return 1 + */ +static int +master_net_hook(struct payload *data) +{ + /* Check reply */ + if ( data->reply != PAYLOAD_REQUEST ) { + fprintf(stderr, "Spurious payload\n"); + return 1; + } + + /* Either way we need to return 1 to avoid further push in queue */ + if ( handle_request(data) ) + fprintf(stderr, "Packet discarded\n"); + return 1; +} + + +/** + * @param size: current size to increase in bytes + * @param path: device full path + * @return command return code if command returned properly, -1 otherwise + */ +static int +increase_size(off64_t size, const char * path) +{ +#define NCHARS 16 + pid_t pid; + int status, num_read; + char ssize[NCHARS]; /* enough for G bytes */ + size += 104857600; /* add 100 MB */ + + /* prepare size for command line */ + num_read = snprintf(ssize, NCHARS, "-L""%"PRIu64"b", size); + if (num_read >= NCHARS) + return -1; /* size too big */ + + switch (pid = fork()) { + case -1: + return -1; + case 0: /* child */ + execl("/opt/xensource/sm/lvhdutil.py", "lvhdutil.py", "extend", ssize, + path, (char *)NULL); + _exit(127); /* TBD */ + default: /* parent */ + if (waitpid(pid, &status, 0) == -1) + return -1; + else if (WIFEXITED(status)) /* normal exit? */ + status = WEXITSTATUS(status); + else + return -1; + return status; + } +} + + +/** + * @param path: device full path + * @return command return code if command returned properly, -1 otherwise + */ +static int +refresh_lvm(const char * path) +{ + pid_t pid; + int status; + + switch (pid = fork()) { + case -1: + return -1; + case 0: /* child */ + execl("/sbin/lvchange", "lvchange", "--refresh", path, + (char *)NULL); + _exit(127); /* TBD */ + default: /* parent */ + if (waitpid(pid, &status, 0) == -1) + return -1; + else if (WIFEXITED(status)) /* normal exit? */ + status = WEXITSTATUS(status); + else + return -1; + return status; + } +} + + +static void +parse_cmdline(int argc, char ** argv) +{ + int arg, fd_open = 0; + + while ((arg = getopt(argc, argv, "df")) != EOF ) { + switch(arg) { + case 'd': /* daemonize and close fd */ + daemonize = 1; + break; + case 'f': /* if daemonized leave fd open */ + fd_open = 1; + default: + break; + } + } + daemonize += daemonize ? fd_open : 0; + return; +} + + +static int +do_daemon() +{ + if (!daemonize) + return 0; + + return daemon(0, daemonize - 1); /* root dir and close if needed */ +} + + +static void +split_command(char *command, char **cmd_vec) +{ + char *token; + int i; + + token = strtok(command, " "); + for(i = 0; token && (i < 2); ++i) { + cmd_vec[i] = token; + token = strtok(NULL, " "); + } + + if (i < 2) + cmd_vec[1] = '\0'; + + return; +} + + +static int +add_vg(char *vg) +{ + struct vg_entry *p_vg; + + printf("CLI: add_vg\n"); + + /* check we already have it */ + if(vg_pool_find(vg, true)) { + printf("%s already added\n", vg); + return 0; + } + + /* allocate and init vg_entry */ + p_vg = malloc(sizeof(*p_vg)); + if (!p_vg) + return 1; + + /* We rely on CLI to avoid truncated name or non-NULL terminated + strings. Moreover, by dest is not smaller then src */ + strcpy(p_vg->name, vg); + + /* VG and thread specific thread allocated */ + p_vg->r_queue = alloc_init_queue(); + if(!p_vg->r_queue) + goto out; + + /* Prepare and start VG specific thread */ + p_vg->thr.r_queue = p_vg->r_queue; + p_vg->thr.hook = slave_worker_hook; + p_vg->thr.net_hook = NULL; + if (pthread_create(&p_vg->thr.thr_id, NULL, worker_thread, &p_vg->thr)) { + fprintf(stderr, "Failed worker thread creation for %s\n", + p_vg->name); + goto out2; + } + + /* Everything ok. Add vg to pool */ + LIST_INSERT_HEAD(&vg_pool.head, p_vg, entries); + + return 0; +out2: + free(p_vg->r_queue); +out: + free(p_vg); + return 1; +} + + +static int +del_vg(char *vg) +{ + struct vg_entry *p_vg; + struct sq_entry *req; + int ret; + + printf("CLI: del_vg\n"); + + /* Once removed from the pool no new requests can be served + any more + */ + p_vg = vg_pool_find_and_remove(vg); + if(!p_vg) { + fprintf(stderr, "Nothing removed\n"); + return 1; + } + + /* The thread is still able to crunch requests in its queue + so we "poison" the queue to stop the thread + */ + req = malloc(sizeof(*req)); + if(!req) { + /* FIXME: we are going to return but the vg is no more in the + pool while the thread is still running. + We are returning with a runnig thread, not able to receive new + requests and 2 memory leaks.. + */ + fprintf(stderr, "Error with malloc!! Thread still running\n" + "and memory leaked\n"); + return 1; + } + init_payload(&req->data); + req->data.reply = PAYLOAD_UNDEF; + /* Insert in queue */ + pthread_mutex_lock(&p_vg->r_queue->mtx); + SIMPLEQ_INSERT_TAIL(&p_vg->r_queue->qhead, req, entries); + pthread_cond_signal(&p_vg->r_queue->cnd); /* Wake thread if needed */ + pthread_mutex_unlock(&p_vg->r_queue->mtx); + + /* Wait for thread to complete */ + ret = pthread_join(p_vg->thr.thr_id, NULL); + if (ret != 0) + fprintf(stderr, "Problem joining thread..FIXME\n"); + + /* + * Thread is dead, let's free resources + */ + /* By design the queue must be empty but we check */ + if (!SIMPLEQ_EMPTY(&p_vg->r_queue->qhead)) + fprintf(stderr, "queue not empty, memory leak! FIXME\n"); + free(p_vg->r_queue); + free(p_vg); + + return 0; +} + + +int +slave_mode(char *ipaddr) +{ + fprintf(stderr, "CLI slave %s received\n", ipaddr); + if (master) { + fprintf(stderr, "Fake: switching master to slave\n"); + } else { + fprintf(stderr, "Already in slave mode: checking ip addr\n"); + pthread_mutex_lock(&ip_mtx); /* not really needed.. */ + if ( !strcmp(master_ip, ipaddr) ) { + fprintf(stderr, "nothing to be done\n"); + goto done; + } + strncpy(master_ip, ipaddr, IP_MAX_LEN); + pthread_mutex_unlock(&ip_mtx); + } + + return 0; +done: + pthread_mutex_unlock(&ip_mtx); + return 0; +} + + +int +master_mode(void) +{ + fprintf(stderr, "CLI master received\n"); + return 0; +} + + +/** + * This function searches the vg_pool for an entry with a given VG name. + * If invoked with locking no mutexes must be hold + * + * @param vg_name name of the volume group to search for + * @param lock ask for function to take care of locking + * @return NULL if not in the pool or a pointer to the entry +*/ +static struct vg_entry * +vg_pool_find(char *vg_name, bool lock) +{ + struct vg_entry *entry, *ret; + ret = NULL; + + if(lock) + pthread_mutex_lock(&vg_pool.mtx); + LIST_FOREACH(entry, &vg_pool.head, entries) { + /* looking for exact match */ + if (strcmp(entry->name, vg_name) == 0) { + ret = entry; + break; + } + } + if(lock) + pthread_mutex_unlock(&vg_pool.mtx); + + return ret; +} + + +/** + * This function removes from vg_pool the entry named vg_name. + * The pool lock is automatic so make sure you are not holding + * any mutex + * + * @param vg_name name of the volume group to remove + * @return NULL if nothing is removed or a pointer to removed item +*/ +static struct vg_entry * +vg_pool_find_and_remove(char *vg_name) +{ + struct vg_entry *entry; + + pthread_mutex_lock(&vg_pool.mtx); + entry = vg_pool_find(vg_name, false); + if(!entry) + return NULL; + LIST_REMOVE(entry, entries); + pthread_mutex_unlock(&vg_pool.mtx); + + return entry; +} + + +static struct kpr_queue * +get_out_queue(struct payload *data) +{ + if ( master && (data->ipaddr[0] != '\0') ) + return net_queue; + + return out_queue; +} diff --git a/thin/thin_cli.c b/thin/thin_cli.c new file mode 100644 index 00000000..3f51868d --- /dev/null +++ b/thin/thin_cli.c @@ -0,0 +1,84 @@ +#include +#include +#include +#include "payload.h" + +static void usage(char *); + +int +main(int argc, char *argv[]) { + struct payload message; + int arg; + int opt_idx = 0, flag = 1; + int ret; + + const struct option longopts[] = { + { "add", required_argument, NULL, 0 }, + { "del", required_argument, NULL, 0 }, + { "master", no_argument, NULL, 0 }, + { "slave", required_argument, NULL, 's' }, + { 0, 0, 0, 0 } + }; + + /* We expect at least one valid option and, if more, the others + are discarded + */ + while ((arg = getopt_long(argc, argv, "h", + longopts, &opt_idx)) != -1 && flag) { + switch(arg) { + case 0: + /* master: it is fine to have a string with trailing spaces */ + ret = snprintf(message.path, PAYLOAD_MAX_PATH_LENGTH, + "%s %s", longopts[opt_idx].name, optarg); + if (ret >= PAYLOAD_MAX_PATH_LENGTH) { + fprintf(stderr, "input too long\n"); + return 2; + } + flag = 0; + break; + case 's': + ret = snprintf(message.path, IP_MAX_LEN, "%s %s", + longopts[opt_idx].name, optarg); + if (ret >= IP_MAX_LEN) { + fprintf(stderr, "input too long\n"); + return 2; + } + flag = 0; + break; + case 'h': + usage(argv[0]); + return 0; + default: + usage(argv[0]); + return 1; + } + } + + /* there must be at least one valid option */ + if(flag) { + usage(argv[0]); + return 1; + } + + init_payload(&message); + message.reply = PAYLOAD_CLI; + + ret = thin_sock_comm(&message); + if(ret) { + fprintf(stderr, "socket error (%d)\n", ret); + return 1; + } + printf("message: %s\n", message.path); + + return 0; +} + +static void +usage(char *prog_name) +{ + printf("usage: %s -h\n", prog_name); + printf("usage: %s --add \n", prog_name); + printf("usage: %s --del \n", prog_name); + printf("usage: %s --master\n", prog_name); + printf("usage: %s --slave \n", prog_name); +} From d157a63308f7fd05705b46d014bf46b6ba7c5632 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 08/43] Removed unnecessary goto and return values Signed-off-by: Germano Percossi --- thin/kpr_util.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/thin/kpr_util.c b/thin/kpr_util.c index 3e0505e2..d20f4c90 100644 --- a/thin/kpr_util.c +++ b/thin/kpr_util.c @@ -119,13 +119,12 @@ kpr_tcp_create(uint16_t port) int kpr_tcp_conn_tx_rx(const char *ip, uint16_t port, struct payload * message) { - int sfd, ret, len; + int sfd, len; struct sockaddr_in s_addr; struct in_addr ipaddr; if ( !inet_aton(ip, &ipaddr) ) { - ret = 1; - goto end; + return 0; } len = sizeof(struct payload); @@ -133,8 +132,7 @@ kpr_tcp_conn_tx_rx(const char *ip, uint16_t port, struct payload * message) /* create tcp socket */ sfd = socket(AF_INET, SOCK_STREAM, 0); if (sfd == -1) { - ret = 1; - goto end; + return 0; } memset(&s_addr, 0, sizeof(s_addr)); @@ -143,19 +141,16 @@ kpr_tcp_conn_tx_rx(const char *ip, uint16_t port, struct payload * message) s_addr.sin_addr = ipaddr; if ( connect(sfd, (struct sockaddr *) &s_addr, sizeof(s_addr)) ) { - ret = 1; goto end; } /* TBD: very basic write, need a while loop */ if (write(sfd, message, len) != len) { - ret = 1; goto end; } /* TBD: very basic read */ if (read(sfd, message, len) != len) { - ret = 2; goto end; } From 2810ec824b68105c827dcf009ba1a0e999d82898 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 09/43] Fixed flag size from uint8_t to uint16_t We ran out of flags (we have 9 now) so the size of the flag is not sufficient. In theory there should not be any problem but it is not clear if this flag is used to be written somewhere where it is expected to be 8 bits. To be checked later Signed-off-by: Germano Percossi --- drivers/block-vhd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block-vhd.c b/drivers/block-vhd.c index 61dfef82..d75246ea 100644 --- a/drivers/block-vhd.c +++ b/drivers/block-vhd.c @@ -143,7 +143,7 @@ unsigned int SPB; #define VHD_FLAG_TX_LIVE 1 #define VHD_FLAG_TX_UPDATE_BAT 2 -typedef uint8_t vhd_flag_t; +typedef uint16_t vhd_flag_t; struct vhd_state; struct vhd_request; From 83355959518da524ce872f02883e47f9a79cc17c Mon Sep 17 00:00:00 2001 From: Chandrika Srinivasan Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 10/43] Added -s option to start thin-tapdisk in slave mode Signed-off-by: Chandrika Srinivasan Reviewed-by: Germano Percossi --- thin/sock_serv.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index 6f321a49..f78c3330 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -155,8 +155,11 @@ main(int argc, char *argv[]) { net_thr.thr.hook = dispatch_hook; if (master) net_thr.thr.net_hook = master_net_hook; - else + else { + printf("Starting daemon in slave mode with master at: %s\n", + master_ip); net_thr.thr.net_hook = slave_net_hook; + } net_thr.thr.net = true; if (pthread_create(&net_thr.thr.thr_id, NULL, worker_thread_net, &net_thr.thr)) { @@ -776,13 +779,19 @@ parse_cmdline(int argc, char ** argv) { int arg, fd_open = 0; - while ((arg = getopt(argc, argv, "df")) != EOF ) { + while ((arg = getopt(argc, argv, "dfs:")) != EOF ) { switch(arg) { case 'd': /* daemonize and close fd */ daemonize = 1; break; case 'f': /* if daemonized leave fd open */ fd_open = 1; + break; + case 's': /* start daemon in slave mode */ + printf("Master Ip address passed as: %s\n", optarg); + strncpy(master_ip, optarg, IP_MAX_LEN); + master = false; + break; default: break; } From 1b50b7597b21b28ff7b330efe445a0d18fd3399b Mon Sep 17 00:00:00 2001 From: Chandrika Srinivasan Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 11/43] Ensure kpr_tcp_conn_tx_rx returns non-zero value on success Signed-off-by: Chandrika Srinivasan Reviewed-by: Germano Percossi --- thin/kpr_util.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/thin/kpr_util.c b/thin/kpr_util.c index d20f4c90..55c0663e 100644 --- a/thin/kpr_util.c +++ b/thin/kpr_util.c @@ -122,6 +122,7 @@ kpr_tcp_conn_tx_rx(const char *ip, uint16_t port, struct payload * message) int sfd, len; struct sockaddr_in s_addr; struct in_addr ipaddr; + int ret = 1; if ( !inet_aton(ip, &ipaddr) ) { return 0; @@ -141,21 +142,27 @@ kpr_tcp_conn_tx_rx(const char *ip, uint16_t port, struct payload * message) s_addr.sin_addr = ipaddr; if ( connect(sfd, (struct sockaddr *) &s_addr, sizeof(s_addr)) ) { + fprintf(stderr,"Connect failed!\n"); + ret = 0; goto end; } /* TBD: very basic write, need a while loop */ if (write(sfd, message, len) != len) { + fprintf(stderr,"Failed to write to socket!\n"); + ret = 0; goto end; } /* TBD: very basic read */ if (read(sfd, message, len) != len) { + fprintf(stderr,"Failed to read from socket!\n"); + ret = 0; goto end; } end: close(sfd); - return 0; /* Closes our socket; server sees EOF */ + return ret; /* Closes our socket; server sees EOF */ } From 1eb99e1dd11b1ff1103f9a632c2b38ff9fe022e4 Mon Sep 17 00:00:00 2001 From: Chandrika Srinivasan Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 12/43] Added debug messages Signed-off-by: Chandrika Srinivasan Reviewed-by: Germano Percossi --- thin/sock_serv.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index f78c3330..63c08648 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -525,6 +525,7 @@ worker_thread_net(void * ap) pthread_mutex_unlock(&r_queue->mtx); data = &req->data; + printf("Request dequeued in dispatch queue\n"); /* For the time being we use PAYLOAD_UNDEF as a way to notify threads to exit */ @@ -569,6 +570,8 @@ worker_thread_net(void * ap) req->data = buf; buf.reply = PAYLOAD_ACCEPTED; + printf("Payload received on netsocket accepted\n"); + /* Always acknowledge we got it */ /* TBD: very basic write, need a while loop */ if (write(cfd, &buf, len) != len) @@ -651,6 +654,9 @@ dispatch_hook(struct payload *data) fprintf(stderr, "Payload rejected\n"); goto fail; } + else { + printf("Payload dispatched\n"); + } return 1; fail: @@ -675,7 +681,7 @@ slave_net_hook(struct payload *data) refresh_lvm(data->path); break; default: - fprintf(stderr, "Spurious payload\n"); + fprintf(stderr, "Spurious payload in slave_net_hook\n"); return 1; } @@ -696,7 +702,8 @@ master_net_hook(struct payload *data) { /* Check reply */ if ( data->reply != PAYLOAD_REQUEST ) { - fprintf(stderr, "Spurious payload\n"); + fprintf(stderr, "Spurious payload in master_net_hook\n"); + print_payload(data); return 1; } @@ -835,7 +842,7 @@ add_vg(char *vg) { struct vg_entry *p_vg; - printf("CLI: add_vg\n"); + printf("CLI: add_vg for %s\n",vg); /* check we already have it */ if(vg_pool_find(vg, true)) { @@ -845,8 +852,10 @@ add_vg(char *vg) /* allocate and init vg_entry */ p_vg = malloc(sizeof(*p_vg)); - if (!p_vg) + if (!p_vg) { + fprintf(stderr, "Failed to allocate vg_entry struct\n"); return 1; + } /* We rely on CLI to avoid truncated name or non-NULL terminated strings. Moreover, by dest is not smaller then src */ @@ -854,8 +863,11 @@ add_vg(char *vg) /* VG and thread specific thread allocated */ p_vg->r_queue = alloc_init_queue(); - if(!p_vg->r_queue) + if(!p_vg->r_queue) { + fprintf(stderr, "Failed worker queue creation for %s\n", + p_vg->name); goto out; + } /* Prepare and start VG specific thread */ p_vg->thr.r_queue = p_vg->r_queue; @@ -870,6 +882,7 @@ add_vg(char *vg) /* Everything ok. Add vg to pool */ LIST_INSERT_HEAD(&vg_pool.head, p_vg, entries); + printf("Successfully registered VG %s\n", p_vg->name); return 0; out2: free(p_vg->r_queue); From 042af12170a4c318270944c805d205732b73f33c Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 13/43] Worker threads in master mode need to notify if pushing to net queues Signed-off-by: Germano Percossi --- thin/sock_serv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index 63c08648..8c4b9465 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -440,6 +440,9 @@ worker_thread(void * ap) o_queue = get_out_queue(data); pthread_mutex_lock(&o_queue->mtx); SIMPLEQ_INSERT_TAIL(&o_queue->qhead, req, entries); + /* net queue needs to notify through eventfd */ + if (o_queue == net_queue) + eventfd_write(o_queue->efd, 1); pthread_mutex_unlock(&o_queue->mtx); } return NULL; From d978d8832f783f969560f000028b20187bbcd511 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 14/43] Fixed "lvchange --refresh" to work on a slave Signed-off-by: Germano Percossi --- thin/sock_serv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index 8c4b9465..8ece268b 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -681,7 +681,9 @@ slave_net_hook(struct payload *data) case PAYLOAD_REJECTED: break; case PAYLOAD_DONE: - refresh_lvm(data->path); + if ( refresh_lvm(data->path) ) { + printf("*** Refresh failed ***\n"); + } break; default: fprintf(stderr, "Spurious payload in slave_net_hook\n"); @@ -770,6 +772,7 @@ refresh_lvm(const char * path) return -1; case 0: /* child */ execl("/sbin/lvchange", "lvchange", "--refresh", path, + "--config", "global{metadata_read_only=0}", (char *)NULL); _exit(127); /* TBD */ default: /* parent */ From 6a36340e3d1e943cb5d9632ed1a37e0f16281fa0 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:55 +0100 Subject: [PATCH 15/43] Clean-up socket on exit Not very reliable implementation, too many assumptions and not taken into account possible problems associated with threads. Signed-off-by: Germano Percossi --- thin/sock_serv.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index 8ece268b..5d5a9419 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -13,6 +13,7 @@ #include /* non POSIX */ #include #include +#include #include "blktap.h" #include "payload.h" #include "kpr_util.h" @@ -121,9 +122,31 @@ alloc_init_queue(void) return NULL; } +/** + * SIGINT handler to clean-up socket file on exit + * + * This is very basic and it assumes it is registered once the socket file + * is created, so no checks. + * FIXME: did not give much thought to its behaviour in multi-threaded env + * + * @param signo the signal to hanlder (SIGINT) + */ +static void clean_handler(int signo) +{ + if ( unlink(THIN_CONTROL_SOCKET) ) + _exit(1); +} + int main(int argc, char *argv[]) { + struct sigaction new_act, old_act; /* SIGINT handling */ + + /* SIGINT handling */ + new_act.sa_handler = clean_handler; + sigemptyset(&new_act.sa_mask); + new_act.sa_flags = 0; + /* default is master mode */ master = true; @@ -181,8 +204,16 @@ main(int argc, char *argv[]) { addr.sun_family = AF_UNIX; strncpy(addr.sun_path, THIN_CONTROL_SOCKET, sizeof(addr.sun_path) - 1); - if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1) + if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1) { + perror("bind failed"); return -errno; + } + + /* Register now the SIGINT handler to remove socket cleanly */ + if ( sigaction(SIGINT, &new_act, &old_act) < 0 ) { + printf("Signal handler registration failed: expect manual" + " clean-up\n"); + } if (listen(sfd, BACKLOG) == -1) return -errno; From ea523ec5001f276a77d766dd4e74b08eaf6a903e Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 16/43] Resize and refresh no more hardcoded Signed-off-by: Germano Percossi --- thin/sock_serv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index 5d5a9419..5a1479de 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -773,7 +773,7 @@ increase_size(off64_t size, const char * path) case -1: return -1; case 0: /* child */ - execl("/opt/xensource/sm/lvhdutil.py", "lvhdutil.py", "extend", ssize, + execl("/opt/xensource/sm/resize-demo", "resize-demo", ssize, path, (char *)NULL); _exit(127); /* TBD */ default: /* parent */ @@ -802,8 +802,7 @@ refresh_lvm(const char * path) case -1: return -1; case 0: /* child */ - execl("/sbin/lvchange", "lvchange", "--refresh", path, - "--config", "global{metadata_read_only=0}", + execl("/opt/xensource/sm/refresh-demo", "refresh-demo", path, (char *)NULL); _exit(127); /* TBD */ default: /* parent */ From a997bca86e5a53014bff73b2e894376778f37922 Mon Sep 17 00:00:00 2001 From: Jon Ludlam Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 17/43] Use xenvm instead of lvm Signed-off-by: Jon Ludlam Acked-by: Germano Percossi --- lvm/lvm-util.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lvm/lvm-util.c b/lvm/lvm-util.c index 9e2bfda4..0c41f6fc 100644 --- a/lvm/lvm-util.c +++ b/lvm/lvm-util.c @@ -106,9 +106,9 @@ lvm_open_vg(const char *vgname, struct vg *vg) memset(vg, 0, sizeof(*vg)); - err = asprintf(&cmd, "vgs %s --noheadings --nosuffix --units=b " + err = asprintf(&cmd, "/bin/xenvm vgs %s --noheadings --nosuffix --units=b " "--options=vg_name,vg_extent_size,lv_count,pv_count," - "pv_name,pe_start --unbuffered 2> /dev/null", vgname); + "pv_name,pe_start 2> /dev/null", vgname); if (err == -1) return -ENOMEM; @@ -211,9 +211,9 @@ lvm_scan_lvs(struct vg *vg) FILE *scan; int i, err; - err = asprintf(&cmd, "lvs %s --noheadings --nosuffix --units=b " + err = asprintf(&cmd, "/bin/xenvm lvs %s --noheadings --nosuffix --units=b " "--options=lv_name,lv_size,segtype,seg_count,seg_start," - "seg_size,devices --unbuffered 2> /dev/null", vg->name); + "seg_size,devices 2> /dev/null", vg->name); if (err == -1) return -ENOMEM; From 2c33a77b3aca0548b4682afd180995252412b220 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 18/43] Refine thin provisioning hooks Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- drivers/block-vhd.c | 240 ++++++++++++++++++++++++++------------------ 1 file changed, 145 insertions(+), 95 deletions(-) diff --git a/drivers/block-vhd.c b/drivers/block-vhd.c index d75246ea..168ffc5f 100644 --- a/drivers/block-vhd.c +++ b/drivers/block-vhd.c @@ -54,6 +54,7 @@ #include #include +#include #include "debug.h" #include "libvhd.h" #include "tapdisk.h" @@ -143,6 +144,11 @@ unsigned int SPB; #define VHD_FLAG_TX_LIVE 1 #define VHD_FLAG_TX_UPDATE_BAT 2 +/*******THIN PARAMETERS******/ +#define THIN_WARN_1 52428800L /* 50 MBs to go, send resize request */ +#define THIN_WARN_2 10485760L /* 10 MBs to go, start slow down */ +#define THIN_RESIZE_INCREMENT 104857600L /* 100 MBs incremets */ + typedef uint16_t vhd_flag_t; struct vhd_state; @@ -237,6 +243,7 @@ struct vhd_state { /* thin provisioning data */ off64_t eof_bytes; + uint64_t req_bytes; /* for redundant bitmap writes */ int padbm_size; @@ -398,97 +405,10 @@ vhd_kill_footer(struct vhd_state *s) return 0; } - -static int -thin_prepare_req(const struct vhd_state * s, - struct payload * message, - uint64_t next_db, int query) -{ - /* - * sectors can be converted in bytes using info.sector_size - * that happens to be calculated using the same VHD define used - * by vhd_sectors_to_bytes. This is no more unreliable than using - * 2 different macros to do the job (see __vhd_open) - */ - uint64_t virt_bytes, phy_bytes, req_bytes; - const uint64_t warn1 = 52428800; /* 50 MBs warning */ - const uint64_t warn2 = 4194304; /* 4 MBs warning */ - uint64_t safe_threshold, critical_threshold; - - virt_bytes = vhd_sectors_to_bytes(s->driver->info.size); - phy_bytes = s->eof_bytes; - req_bytes = vhd_sectors_to_bytes(next_db); - - /* we want to exit as soon as possible if not needed */ - if (phy_bytes >= virt_bytes) - return 1; /* nothing to be done: space is enough */ - safe_threshold = (phy_bytes < warn1) ? 0 : (phy_bytes - warn1); - if (req_bytes < safe_threshold) - return 1; /* we do not bother yet */ - - init_payload(message); - - critical_threshold = (phy_bytes < warn2) ? 0 : (phy_bytes - warn2); - if (req_bytes > critical_threshold) { - /* this must be our last request so we have to loop for - a final answer or block: do something more.. */ - message->reply = PAYLOAD_QUERY; - } else { /* if we are here it means we hit the threshold */ - /* if we already issued a request just check its status */ - message->reply = query ? PAYLOAD_QUERY: PAYLOAD_REQUEST; - } - - strncpy(message->path, s->vhd.file, PAYLOAD_MAX_PATH_LENGTH); - message->curr = phy_bytes; - message->req = req_bytes; - message->vhd_size = virt_bytes; - - return 0; -} - - -static int -thin_parse_reply(const struct payload * buf, struct vhd_state *s, int * query) -{ - switch (buf->reply) { - case PAYLOAD_ACCEPTED: - *query = 1; /* next time just query */ - break; - case PAYLOAD_WAIT: - break; /* just keep asking */ - case PAYLOAD_REJECTED: - *query = 0; /* can make new requests if necessary */ - break; - default: /* It should be only DONE but to be safe..*/ - *query = 0; /* we cannot query any more, it has been served */ - vhd_thin_prepare(s); - } - return 0; -} - static void -update_next_db(struct vhd_state *s, uint64_t next_db, int notify) +update_next_db(struct vhd_state *s, uint64_t next_db) { - int err; - struct payload message; - static int query = 0; /* distinguish requests from queries */ - - DPRINTF("update_next_db"); - s->next_db = next_db; - - if (!(s->flags & VHD_FLAG_OPEN_THIN)) - return; - - if (notify && !thin_prepare_req(s, &message, next_db, query)) { - /* socket message block */ - err = thin_sock_comm(&message); - if (err) - DBG(TLOG_WARN, "socket returned: %d\n", err); - err = thin_parse_reply(&message, s, &query); - if (err) - DBG(TLOG_WARN, "thin_parse_reply returned: %d\n", err); - } } @@ -505,7 +425,7 @@ find_next_free_block(struct vhd_state *s) if (err) return err; - update_next_db(s, secs_round_up(eom), 0); + update_next_db(s, secs_round_up(eom)); s->first_db = s->next_db; if ((s->first_db + s->bm_secs) % s->spp) s->first_db += (s->spp - ((s->first_db + s->bm_secs) % s->spp)); @@ -515,7 +435,7 @@ find_next_free_block(struct vhd_state *s) if (entry != DD_BLK_UNUSED && entry >= s->next_db) { next_db = (uint64_t)entry + (uint64_t)s->spb + (uint64_t)s->bm_secs; - update_next_db(s, next_db, 0); + update_next_db(s, next_db); } if (s->next_db > UINT_MAX) @@ -753,6 +673,7 @@ vhd_thin_prepare(struct vhd_state *s) { if ((s->eof_bytes = lseek64(s->vhd.fd, 0, SEEK_END)) == -1) return -errno; + s->req_bytes = 0; return 0; } @@ -812,7 +733,7 @@ __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags) vhd_log_open(s); if(!test_vhd_flag(flags, VHD_FLAG_OPEN_RDONLY)) { - update_next_db(s, s->next_db, 0); + update_next_db(s, s->next_db); } if(test_vhd_flag(flags, VHD_FLAG_OPEN_THIN)) { @@ -1459,6 +1380,128 @@ aio_write(struct vhd_state *s, struct vhd_request *req, uint64_t offset) TRACE(s); } +static inline int +thin_check_warn_2(struct vhd_state *s, int64_t available_bytes) +{ + uint64_t phy_bytes; + + /* we are worried the resize is taking too long, we will try to + * buy some time sleeping some time for every new block request + * We sleep more if available_bytes is getting small. + */ + sleep((THIN_WARN_2 - available_bytes)/(1024*1024)); + EPRINTF("Sleep(%ld)", (THIN_WARN_2 - available_bytes)/(1024*1024)); + phy_bytes = lseek64(s->vhd.fd, 0, SEEK_END); + if ((s->eof_bytes + THIN_RESIZE_INCREMENT) <= phy_bytes) { + /* Request is completed but maybe the response got lost */ + s->eof_bytes = phy_bytes; + s->req_bytes = 0; + return 0; + } + + if (available_bytes < 0) { + /* We must fail with ENOSPC after we tried everything */ + EPRINTF("Returning -1"); + return -1; + } else { + return 0; + } +} + +static inline void +check_resize_request_progress(struct vhd_state *s, struct payload *message, + int64_t available_bytes) +{ + int err; + uint64_t phy_bytes; + + message->reply = PAYLOAD_QUERY; + err = thin_sock_comm(message); + if (err) { + DBG(TLOG_WARN, "socket returned: %d\n", err); + } else if (message->reply == PAYLOAD_DONE) { + /* check if the obtained size is compatible with the + * requested one */ + phy_bytes = lseek64(s->vhd.fd, 0, SEEK_END); + + EPRINTF("Requested:%ld, obtained:%ld:\n", + s->req_bytes, phy_bytes); + if (s->req_bytes <= phy_bytes) { + s->eof_bytes = phy_bytes; + s->req_bytes = 0; + + } else { + /* The daemon was thinking that the resize was + * successfull but somehow it extended less than + * asked, do an other request */ + EPRINTF("Extended less than expected"); + s->req_bytes = 0; + } + } +} + +static inline void +send_resize_request(struct vhd_state *s, struct payload *message) +{ + int err; + + message->reply = PAYLOAD_REQUEST; + err = thin_sock_comm(message); + if (err) { + DBG(TLOG_WARN, "socket returned: %d\n", err); + } else if (message->reply == PAYLOAD_ACCEPTED) { + /* record that req_bytes request has been submitted*/ + s->req_bytes = message->req; + } else { + /* we will try to send the request again next time */ + DBG(TLOG_WARN, "failed reply: %d\n", message->reply); + } +} + +/** + * @brief Execute checks related to thin provisioning + * + * This function is called every time there is a request for a new + * block in the VHD, but only if VHD_FLAG_OPEN_THIN is set. + * This is implementing the high level state machine and is using + * some other helper functions to do some specific bits. + * @param s Pointer to vhd_state structure + * @param needed_sectors Number of sectors needed + */ +static inline int +thin_provisioning_checks(struct vhd_state *s, uint64_t needed_sectors) +{ + int64_t available_bytes; + struct payload message; + int ret = 0; + + available_bytes = s->eof_bytes - vhd_sectors_to_bytes(needed_sectors); + EPRINTF("needed_bytes=%ld", available_bytes); + + if (available_bytes < THIN_WARN_1) { + /* prepare common part of the message */ + init_payload(&message); + strncpy(message.path, s->vhd.file, PAYLOAD_MAX_PATH_LENGTH); + message.curr = s->eof_bytes; + message.req = s->eof_bytes + THIN_RESIZE_INCREMENT; + message.vhd_size = vhd_sectors_to_bytes(s->driver->info.size); + + /* s->req_bytes is indicating our state */ + if (s->req_bytes != 0) { + check_resize_request_progress(s, &message, available_bytes); + } else { + send_resize_request(s, &message); + } + + /* let's be more pedantic when we reach end of space */ + if (available_bytes < THIN_WARN_2) { + ret = thin_check_warn_2(s, available_bytes); + } + } + + return ret; +} + /** * Reserves a new extent. * @@ -1471,6 +1514,7 @@ static inline uint64_t reserve_new_block(struct vhd_state *s, uint32_t blk) { int gap = 0; + int ret; ASSERT(!test_vhd_flag(s->bat.status, VHD_FLAG_BAT_WRITE_STARTED)); @@ -1481,6 +1525,12 @@ reserve_new_block(struct vhd_state *s, uint32_t blk) if (s->next_db + gap > UINT_MAX) return (uint64_t)ENOSPC << 32; + if ((s->flags & VHD_FLAG_OPEN_THIN)) { + ret = thin_provisioning_checks(s, s->next_db + gap + s->spb + s->bm_secs); + if (ret < 0) + return (uint64_t)ENOSPC << 32; + } + s->bat.pbw_blk = blk; s->bat.pbw_offset = s->next_db + gap; @@ -1604,7 +1654,7 @@ update_bat(struct vhd_state *s, uint32_t blk) int err; uint64_t lb_end; struct vhd_bitmap *bm; - + EPRINTF("SSS update_bat enter blk=%d", blk); ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED); if (bat_locked(s)) { @@ -1632,7 +1682,7 @@ update_bat(struct vhd_state *s, uint32_t blk) } schedule_zero_bm_write(s, bm, lb_end); set_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT); - + EPRINTF("SSS update_bat exit"); return 0; } @@ -1667,7 +1717,7 @@ allocate_block(struct vhd_state *s, uint32_t blk) if (next_db > UINT_MAX) return -ENOSPC; - update_next_db(s,next_db, 0); + update_next_db(s,next_db); s->bat.pbw_blk = blk; s->bat.pbw_offset = s->next_db; @@ -2276,7 +2326,7 @@ finish_bat_write(struct vhd_request *req) if (!req->error) { bat_entry(s, s->bat.pbw_blk) = s->bat.pbw_offset; - update_next_db(s, s->bat.pbw_offset + s->spb + s->bm_secs, 1); + update_next_db(s, s->bat.pbw_offset + s->spb + s->bm_secs); } else tx->error = req->error; From 1cef4c55342c0d94500a218cc6db7e552dc93c02 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 19/43] Start using new tapdisk-allocator protocol This change involves few things: - New transport based on DGRAM - New packet/messages protocol - Server runs in one fixed mode, dropped support for master and slave - Because of protocol change now we do not use anymore condition wait but just eventfd so we can use poll to block for socket read and reading reqs from the queue from the same context. - Only bare minimal part of protocol implemented, server only supports PAYLOAD_CB_NONE Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- drivers/block-vhd.c | 35 ++- include/payload.h | 39 +-- thin/payload.c | 18 +- thin/sock_client2.c | 114 +++++-- thin/sock_serv.c | 724 +++++++++++++------------------------------- thin/thin_cli.c | 23 +- 6 files changed, 368 insertions(+), 585 deletions(-) diff --git a/drivers/block-vhd.c b/drivers/block-vhd.c index 168ffc5f..94da49dc 100644 --- a/drivers/block-vhd.c +++ b/drivers/block-vhd.c @@ -244,6 +244,7 @@ struct vhd_state { /* thin provisioning data */ off64_t eof_bytes; uint64_t req_bytes; + struct thin_conn_handle *ch; /* for redundant bitmap writes */ int padbm_size; @@ -674,7 +675,11 @@ vhd_thin_prepare(struct vhd_state *s) if ((s->eof_bytes = lseek64(s->vhd.fd, 0, SEEK_END)) == -1) return -errno; s->req_bytes = 0; - + s->ch = thin_connection_create(); + if (s->ch == NULL) { + EPRINTF("thin connection creation has failed"); + return -1; + } return 0; } @@ -871,6 +876,14 @@ _vhd_close(td_driver_t *driver) EPRINTF("writing %s batmap: %d\n", s->vhd.file, err); } + if (test_vhd_flag(s->flags, VHD_FLAG_OPEN_THIN)) { + if (s->ch) { + thin_connection_destroy(s->ch); + /* Let's set ch to NULL just in case */ + s->ch = NULL; + } + } + free: vhd_log_close(s); vhd_free_bat(s); @@ -1412,9 +1425,9 @@ static inline void check_resize_request_progress(struct vhd_state *s, struct payload *message, int64_t available_bytes) { +#ifdef THIN_OLD_PROTOCOL int err; uint64_t phy_bytes; - message->reply = PAYLOAD_QUERY; err = thin_sock_comm(message); if (err) { @@ -1438,23 +1451,25 @@ check_resize_request_progress(struct vhd_state *s, struct payload *message, s->req_bytes = 0; } } +#else + return; +#endif /* THIN_OLD_PROTOCOL */ } static inline void send_resize_request(struct vhd_state *s, struct payload *message) { int err; - - message->reply = PAYLOAD_REQUEST; - err = thin_sock_comm(message); + message->type = PAYLOAD_RESIZE; + err = thin_sync_send_and_receive(s->ch, message); if (err) { DBG(TLOG_WARN, "socket returned: %d\n", err); - } else if (message->reply == PAYLOAD_ACCEPTED) { + } else if (message->err_code == THIN_ERR_CODE_SUCCESS) { /* record that req_bytes request has been submitted*/ - s->req_bytes = message->req; + s->req_bytes = message->req_size; } else { /* we will try to send the request again next time */ - DBG(TLOG_WARN, "failed reply: %d\n", message->reply); + DBG(TLOG_WARN, "failed reply: %d\n", message->err_code); } } @@ -1482,9 +1497,7 @@ thin_provisioning_checks(struct vhd_state *s, uint64_t needed_sectors) /* prepare common part of the message */ init_payload(&message); strncpy(message.path, s->vhd.file, PAYLOAD_MAX_PATH_LENGTH); - message.curr = s->eof_bytes; - message.req = s->eof_bytes + THIN_RESIZE_INCREMENT; - message.vhd_size = vhd_sectors_to_bytes(s->driver->info.size); + message.req_size = s->eof_bytes + THIN_RESIZE_INCREMENT; /* s->req_bytes is indicating our state */ if (s->req_bytes != 0) { diff --git a/include/payload.h b/include/payload.h index 3a1c9a24..69709f76 100644 --- a/include/payload.h +++ b/include/payload.h @@ -7,34 +7,37 @@ #define PAYLOAD_MAX_PATH_LENGTH TAPDISK_MESSAGE_MAX_PATH_LENGTH #define IP_MAX_LEN 32 +#define THIN_ERR_CODE_SUCCESS 0 +#define THIN_ERR_CODE_FAILURE 1 + +#define PAYLOAD_CB_NONE 0 +#define PAYLOAD_CB_SOCK 1 + typedef enum { - /* server */ - PAYLOAD_ACCEPTED = 0, - PAYLOAD_REJECTED, - PAYLOAD_DONE, - PAYLOAD_WAIT, - /* client */ - PAYLOAD_QUERY, - PAYLOAD_REQUEST, + PAYLOAD_RESIZE = 0, PAYLOAD_CLI, - /* generic */ + PAYLOAD_STATUS, PAYLOAD_UNDEF } payload_message_t; struct payload { - pid_t id; + uint8_t type; char path[PAYLOAD_MAX_PATH_LENGTH]; - uint64_t curr; - uint64_t req; - off64_t vhd_size; - payload_message_t reply; - char ipaddr[IP_MAX_LEN]; /* used internally */ + uint64_t req_size; + uint8_t cb_type; + char cb_data[128]; + uint16_t err_code; + uint32_t reserved_clt; + uint32_t reserved_srv; + char id[128]; }; +struct thin_conn_handle; +int thin_sync_send_and_receive(struct thin_conn_handle *ch, + struct payload *message); +struct thin_conn_handle * thin_connection_create(void); +void thin_connection_destroy(struct thin_conn_handle *ch); int init_payload(struct payload *); void print_payload(struct payload *); -/* Temporary location */ -int thin_sock_comm(struct payload *); - #endif /* payload.h */ diff --git a/thin/payload.c b/thin/payload.c index c1ba10b4..a52bbee7 100644 --- a/thin/payload.c +++ b/thin/payload.c @@ -1,26 +1,20 @@ #include +#include #include "payload.h" int init_payload(struct payload *pload) { - pload->id = -1; - pload->curr = 0; - pload->req = 0; - pload->vhd_size = 0; - pload->reply = PAYLOAD_UNDEF; - pload->ipaddr[0] = '\0'; + memset(pload, 0, sizeof(struct payload)); return 0; } void print_payload(struct payload *pload) { printf("payload data:\n"); - printf("id = %d\n", pload->id); + printf("type = %d\n", pload->type); printf("path = %s\n", pload->path); - printf("current size = %"PRIu64"\n", pload->curr); - printf("requested size = %"PRIu64"\n", pload->req); - printf("virtual size = %"PRIu64"\n", pload->vhd_size); - printf("request type = %d\n", pload->reply); - printf("dest ipaddr = %s\n", pload->ipaddr); + printf("requested size = %"PRIu64"\n", pload->req_size); + printf("cb_type = %d\n", pload->cb_type); + printf("err_code = %d\n", pload->err_code); return; } diff --git a/thin/sock_client2.c b/thin/sock_client2.c index 75ec3529..de0f7126 100644 --- a/thin/sock_client2.c +++ b/thin/sock_client2.c @@ -3,44 +3,106 @@ #include #include #include +#include +#include +#include +#include +#include "debug.h" +#include "../drivers/tapdisk-log.h" #include "blktap.h" #include "payload.h" -int -thin_sock_comm(struct payload *message) +struct thin_conn_handle { + int sfd; +}; + +int thin_sync_send_and_receive(struct thin_conn_handle *ch, + struct payload *message) { - /* maybe we can borrow tap_ctl_connect but it is risky because - of strcpy */ + size_t len = sizeof(struct payload); - struct sockaddr_un addr; - int sfd, len; - pid_t pid; + if (ch == NULL) { + return -1; + } - len = sizeof(struct payload); + /* Send messages to server */ + if (write(ch->sfd, message, len) != len) + return -errno; - pid = getpid(); - message->id = pid; + /* Wait for ACK packet */ + if (read(ch->sfd, message, len) != len) + return -errno; - sfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); - if (sfd == -1) - return -errno; /* to be handled */ + return 0; +} - memset(&addr, 0, sizeof(addr)); - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, THIN_CONTROL_SOCKET, sizeof(addr.sun_path) - 1); +static int timeout = 10; - if (connect(sfd, (struct sockaddr *) &addr, sizeof(addr)) == -1) - return -errno; +struct thin_conn_handle * +thin_connection_create(void) +{ + struct sockaddr_un svaddr, claddr; + struct thin_conn_handle *ch; + char client_sock_name[64]; - /* TBD: very basic write, need a while loop */ - if (write(sfd, message, len) != len) - return -errno; + ch = malloc(sizeof(struct thin_conn_handle)); + if (ch == NULL) + goto out2; - /* TBD: very basic read */ - if (read(sfd, message, len) != len) - return -errno; + ch->sfd = socket(AF_UNIX, SOCK_DGRAM, 0); + if (ch->sfd == -1) { + EPRINTF("Socket creation failed"); + goto out1; + } - close(sfd); - return 0; /* Closes our socket; server sees EOF */ + setsockopt(ch->sfd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, + sizeof(timeout)); + + sprintf(client_sock_name, "td_thin_client_%d", getpid()); + + /* Construct address of the client*/ + memset(&claddr, 0, sizeof(struct sockaddr_un)); + claddr.sun_family = AF_UNIX; + strncpy(&claddr.sun_path[1], client_sock_name , strlen(client_sock_name)); + + if (bind(ch->sfd, (struct sockaddr *) &claddr, + sizeof(sa_family_t) + strlen(client_sock_name) + 1) == -1) { + EPRINTF("Bind has failed"); + goto out3; + } + + /* Construct address of server */ + memset(&svaddr, 0, sizeof(struct sockaddr_un)); + svaddr.sun_family = AF_UNIX; + strncpy(svaddr.sun_path, THIN_CONTROL_SOCKET, sizeof(svaddr.sun_path) - 1); + + /* Connect to the server */ + if (connect(ch->sfd, (struct sockaddr *) &svaddr, sizeof(svaddr)) == -1) { + EPRINTF("Connect has failed"); + goto out3; + } + + /* All went well, just return the opaque structure */ + return ch; + +out3: + close(ch->sfd); +out1: + free(ch); +out2: + return NULL; } +void +thin_connection_destroy(struct thin_conn_handle *ch) +{ + if (ch == NULL) { + EPRINTF("Was asked to destroy a NULL handle"); + return; + } + /* WARING: + * Close could fail, but not sure what to do if that happes + */ + close(ch->sfd); + free(ch); +} diff --git a/thin/sock_serv.c b/thin/sock_serv.c index 5a1479de..ad23f915 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -21,55 +21,40 @@ #define BACKLOG 5 #define PORT_NO 7777 -static inline int process_payload(int, struct payload *); -static inline int req_reply(int, struct payload *); -static int handle_request(struct payload * buf); -static int handle_query(struct payload * buf); +static inline int process_payload(struct payload *); +static void process_out_queue(void); +static inline int req_reply(struct payload *); +static int handle_resize(struct payload * buf); +static int handle_status(struct payload * buf); +static int handle_cli(struct payload *); static void * worker_thread(void *); -static void * worker_thread_net(void *); static int slave_worker_hook(struct payload *); -static int reject_hook(struct payload *); -static int dispatch_hook(struct payload *); -static int slave_net_hook(struct payload *); -static int master_net_hook(struct payload *); static int increase_size(off64_t size, const char * path); -static int refresh_lvm(const char * path); static void parse_cmdline(int, char **); static int do_daemon(void); -static int handle_cli(struct payload *); static void split_command(char *, char **); static int add_vg(char *vg); static int del_vg(char *vg); -static int slave_mode(char *ip); -static int master_mode(void); + +#ifdef THIN_REFRESH_LVM +static int refresh_lvm(const char * path); +#endif /* THIN_REFRESH_LVM */ bool master; /* no need to be mutex-ed: main writes, workers read */ -char master_ip[IP_MAX_LEN]; /* - Used only in slave mode, ensure it is - NULL terminated. This variable is used - only in handle_request but, as long as - this function is used in the network thread, - it must be mutex protected - */ pthread_mutex_t ip_mtx = PTHREAD_MUTEX_INITIALIZER; /* see above */ - /* queue structures */ SIMPLEQ_HEAD(sqhead, sq_entry); struct kpr_queue { struct sqhead qhead; pthread_mutex_t mtx; - pthread_cond_t cnd; int efd; /* some queues are notified by eventfd */ -} *net_queue, *out_queue; +} *out_queue; struct sq_entry { struct payload data; SIMPLEQ_ENTRY(sq_entry) entries; }; -static struct sq_entry * find_and_remove(struct sqhead *, pid_t); -static struct kpr_queue * get_out_queue(struct payload *); - /* thread structures */ struct kpr_thread_info { pthread_t thr_id; @@ -109,17 +94,57 @@ alloc_init_queue(void) SIMPLEQ_INIT(&sqp->qhead); if (pthread_mutex_init(&sqp->mtx, NULL) != 0) goto out; - if (pthread_cond_init(&sqp->cnd, NULL) != 0) - goto out; if ( (sqp->efd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK| EFD_SEMAPHORE)) == -1 ) goto out; } return sqp; - out: - free(sqp); - return NULL; +out: + free(sqp); + return NULL; +} + +static inline struct sq_entry * +get_req_from_queue(struct kpr_queue *q) +{ + struct sq_entry *req = NULL; + uint64_t ebuf; + + pthread_mutex_lock(&q->mtx); + if (!SIMPLEQ_EMPTY(&q->qhead)) { + /* pop from requests queue */ + req = SIMPLEQ_FIRST(&q->qhead); + SIMPLEQ_REMOVE_HEAD(&q->qhead, entries); + + } else { + /* clear the fd so we can go back to poll */ + eventfd_read(q->efd, &ebuf); + } + pthread_mutex_unlock(&q->mtx); + return req; +} + +static inline void +put_req_into_queue(struct kpr_queue *q, struct sq_entry *req ) +{ + bool notify_consumer; + + pthread_mutex_lock(&q->mtx); + if (SIMPLEQ_EMPTY(&q->qhead)) { + notify_consumer = true; + } else { + notify_consumer = false; + } + SIMPLEQ_INSERT_TAIL(&q->qhead, req, entries); + pthread_mutex_unlock(&q->mtx); + + /* Notify after releasing the mutex, since the receiver + * is probably in a poll, so when he gets notified the mutex + * is probably available */ + if (notify_consumer == true) { + eventfd_write(q->efd, 1); + } } /** @@ -146,9 +171,8 @@ main(int argc, char *argv[]) { new_act.sa_handler = clean_handler; sigemptyset(&new_act.sa_mask); new_act.sa_flags = 0; - - /* default is master mode */ - master = true; + struct pollfd fds[2]; + nfds_t maxfds = 2; /* Init pool */ LIST_INIT(&vg_pool.head); @@ -156,9 +180,6 @@ main(int argc, char *argv[]) { return 1; /* Init default queues */ - net_queue = alloc_init_queue(); - if(!net_queue) - return 1; /*no free: return from main */ out_queue = alloc_init_queue(); if(!out_queue) return 1; /*no free: return from main */ @@ -172,39 +193,22 @@ main(int argc, char *argv[]) { if (do_daemon() == -1) return 1; /* can do better */ - /* prepare and spawn default thread: use vg_entry even if not VG */ - struct vg_entry net_thr; - net_thr.thr.r_queue = net_queue; - net_thr.thr.hook = dispatch_hook; - if (master) - net_thr.thr.net_hook = master_net_hook; - else { - printf("Starting daemon in slave mode with master at: %s\n", - master_ip); - net_thr.thr.net_hook = slave_net_hook; - } - net_thr.thr.net = true; - if (pthread_create(&net_thr.thr.thr_id, NULL, worker_thread_net, - &net_thr.thr)) { - printf("failed worker thread creation\n"); - return 1; - } - - - struct sockaddr_un addr; - int sfd, cfd; - ssize_t numRead; + struct sockaddr_un sv_addr, cl_addr; + int sfd; + socklen_t len; + ssize_t ret; + int poll_ret; struct payload buf; - sfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + sfd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); if (sfd == -1) return -errno; - memset(&addr, 0, sizeof(struct sockaddr_un)); - addr.sun_family = AF_UNIX; - strncpy(addr.sun_path, THIN_CONTROL_SOCKET, sizeof(addr.sun_path) - 1); + memset(&sv_addr, 0, sizeof(struct sockaddr_un)); + sv_addr.sun_family = AF_UNIX; + strncpy(sv_addr.sun_path, THIN_CONTROL_SOCKET, sizeof(sv_addr.sun_path) - 1); - if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_un)) == -1) { + if (bind(sfd, (struct sockaddr *) &sv_addr, sizeof(struct sockaddr_un)) == -1) { perror("bind failed"); return -errno; } @@ -214,40 +218,87 @@ main(int argc, char *argv[]) { printf("Signal handler registration failed: expect manual" " clean-up\n"); } + + fds[0].fd = out_queue->efd; + fds[0].events = POLLIN; + fds[1].fd = sfd; + fds[1].events = POLLIN; - if (listen(sfd, BACKLOG) == -1) - return -errno; + for(;;) { + poll_ret = poll(fds, maxfds, -1); /* wait for ever */ + if ( poll_ret < 1 ) { /* 0 not expected */ + fprintf(stderr, "poll returned %d, %s\n", + poll_ret, strerror(errno)); + continue; + } - for (;;) { + if (fds[0].revents) { + /* process out_queue until empty*/ + process_out_queue(); + } + + if (fds[1].revents) { + /* read from the control socket */ + ret = recvfrom(sfd, &buf, sizeof(buf), 0, + &cl_addr, &len); + if (ret != sizeof(buf)) { + fprintf(stderr, "recvfrom returned %ld, %s\n", + (long)ret, strerror(errno)); + continue; + } + /* Packet of expected len arrived, process it*/ + process_payload(&buf); + + /* Send the acknowledge packet */ + ret = sendto(sfd, &buf, ret, 0, &cl_addr, len); + if(ret != sizeof(buf)) { - cfd = accept4(sfd, NULL, NULL, SOCK_CLOEXEC); - if (cfd == -1) - return -errno; - - while ((numRead = read(cfd, &buf, sizeof(buf))) > 0) { - /* temporary: ensure ipaddr is NULL if coming from - socket. Remove if network thread is sending packets - through the socket - */ - buf.ipaddr[0] = '\0'; - process_payload(cfd, &buf); + fprintf(stderr, "sendto returned %ld, %s\n", + (long)ret, strerror(errno)); + } } + } +} - if (numRead == -1) - return -errno; +static void +process_out_queue(void) +{ + struct payload buf; + struct sq_entry *req; - if (close(cfd) == -1) - return -errno; + for (;;) { + req = get_req_from_queue(out_queue); + if (req == NULL) + break; + + /* Process the req */ + buf = req->data; + switch (buf.cb_type) { + case PAYLOAD_CB_NONE: + /* Just free the req, no async response + * was needed */ + fprintf(stderr, "Processed CB_NONE req\n"); + break; + case PAYLOAD_CB_SOCK: + /* FIXME: + * We do not expect somebody to use this + * for now */ + fprintf(stderr, "CB_SOCK not implemented yet\n"); + break; + default: + fprintf(stderr, "cb_type unknown\n"); + } + free(req); } } static inline int -process_payload(int fd, struct payload * buf) +process_payload(struct payload * buf) { int err; print_payload(buf); - err = req_reply(fd, buf); + err = req_reply(buf); print_payload(buf); printf("EOM\n\n"); @@ -255,32 +306,27 @@ process_payload(int fd, struct payload * buf) } static int -req_reply(int fd, struct payload * buf) +req_reply(struct payload * buf) { - switch (buf->reply) { - case PAYLOAD_REQUEST: - handle_request(buf); - break; - case PAYLOAD_QUERY: - handle_query(buf); + switch (buf->type) { + case PAYLOAD_RESIZE: + handle_resize(buf); break; case PAYLOAD_CLI: handle_cli(buf); break; + case PAYLOAD_STATUS: + handle_status(buf); + break; default: - buf->reply = PAYLOAD_UNDEF; + buf->type = PAYLOAD_UNDEF; print_payload(buf); } - - /* TBD: very basic write, need a while loop */ - if (write(fd, buf, sizeof(*buf)) != sizeof(*buf)) - return -errno; - return 0; } static int -handle_request(struct payload * buf) +handle_resize(struct payload * buf) { struct sq_entry *req; struct vg_entry *vgentry; @@ -288,29 +334,20 @@ handle_request(struct payload * buf) char vgname[PAYLOAD_MAX_PATH_LENGTH]; char lvname[PAYLOAD_MAX_PATH_LENGTH]; - if( kpr_split_lvm_path(buf->path, vgname, lvname) ) + if( kpr_split_lvm_path(buf->path, vgname, lvname) ) { + /* Fail request, malformed path */ + buf->err_code = THIN_ERR_CODE_FAILURE; return 1; + } /* search we have a queue for it */ vgentry = vg_pool_find(vgname, true); if (vgentry) /* we do */ in_queue = vgentry->r_queue; /* no lock (sure?) */ else { - /* In master mode this means rejected */ - if (master) { - /* hack to reuse it in net_thread */ - if (buf->ipaddr[0] != '\0') - return 1; - in_queue = out_queue; - buf->reply = PAYLOAD_REJECTED; - } - else { - /* write master address */ - pthread_mutex_lock(&ip_mtx); - strncpy(buf->ipaddr, master_ip, IP_MAX_LEN); - pthread_mutex_unlock(&ip_mtx); - in_queue = net_queue; - } + /* Fail request, vg unknown */ + buf->err_code = THIN_ERR_CODE_FAILURE; + return 1; } req = malloc(sizeof(struct sq_entry)); @@ -318,52 +355,20 @@ handle_request(struct payload * buf) return 1; req->data = *buf; - buf->reply = PAYLOAD_ACCEPTED; - pthread_mutex_lock(&in_queue->mtx); - - SIMPLEQ_INSERT_TAIL(&in_queue->qhead, req, entries); - - /* Temporary hack for the new event mechanism used by default queue */ - if ( in_queue == net_queue ) - eventfd_write(in_queue->efd, 1); - else if ( in_queue == out_queue ) - /* no need to signal for out_queue */ - ; - else - pthread_cond_signal(&in_queue->cnd); - pthread_mutex_unlock(&in_queue->mtx); + buf->err_code = THIN_ERR_CODE_SUCCESS; + put_req_into_queue(in_queue, req); return 0; } static int -handle_query(struct payload * buf) +handle_status(struct payload * buf) { - struct sq_entry * req; - - /* Check we have something ready */ - pthread_mutex_lock(&out_queue->mtx); - if (SIMPLEQ_EMPTY(&out_queue->qhead)) { - pthread_mutex_unlock(&out_queue->mtx); - buf->reply = PAYLOAD_WAIT; - return 0; - } - - /* check if we have a served request for this query */ - req = find_and_remove(&out_queue->qhead, buf->id); - if (req) { - pthread_mutex_unlock(&out_queue->mtx); - buf->reply = req->data.reply; - free(req); - } else { /* wait */ - pthread_mutex_unlock(&out_queue->mtx); - buf->reply = PAYLOAD_WAIT; - } - + /* This is just returning SUCCESS for now */ + buf->err_code = THIN_ERR_CODE_SUCCESS; return 0; } - static int handle_cli(struct payload * buf) { @@ -389,14 +394,6 @@ handle_cli(struct payload * buf) return 1; ret = del_vg(cmd[1]); } - else if (!strcmp("slave", cmd[0])) { - if(!cmd[1]) - return 1; - ret = slave_mode(cmd[1]); - } - else if (!strcmp("master", cmd[0])) { - ret = master_mode(); - } else ret = 1; @@ -408,30 +405,16 @@ handle_cli(struct payload * buf) return 0; } - -/* This function must be invoked with the corresponding mutex locked */ -static struct sq_entry * -find_and_remove(struct sqhead * head, pid_t id) -{ - struct sq_entry * entry; - SIMPLEQ_FOREACH(entry, head, entries) { - if (entry->data.id == id) { - SIMPLEQ_REMOVE(head, entry, sq_entry, entries); - return entry; - } - } - /* No matches */ - return NULL; -} - - static void * worker_thread(void * ap) { struct sq_entry * req; struct payload * data; struct kpr_thread_info *thr_arg; - struct kpr_queue *r_queue, *o_queue; + struct kpr_queue *r_queue; + struct pollfd fds[1]; + int maxfds = 1; + int poll_ret; int (*hook)(struct payload *); /* We must guarantee this structure is properly polulated or @@ -442,23 +425,34 @@ worker_thread(void * ap) r_queue = thr_arg->r_queue; hook = thr_arg->hook; - for(;;) { - pthread_mutex_lock(&r_queue->mtx); + /* Register events for poll */ + fds[0].fd = r_queue->efd; + fds[0].events = POLLIN; - while (SIMPLEQ_EMPTY(&r_queue->qhead)) { - pthread_cond_wait(&r_queue->cnd, &r_queue->mtx); + for(;;) { + req = get_req_from_queue(r_queue); + if (req == NULL) { + /* wait until there is something in the queue */ + for (;;) { + /* wait for ever */ + poll_ret = poll(fds, maxfds, -1); + if ( poll_ret < 1 ) { /* 0 not expected */ + fprintf(stderr, "poll returned %d, %s\n", + poll_ret, strerror(errno)); + continue; + } + if (fds[0].revents) + break; + } + /* try again to get a req after the poll */ + continue; } - /* pop from requests queue and unlock */ - req = SIMPLEQ_FIRST(&r_queue->qhead); - SIMPLEQ_REMOVE_HEAD(&r_queue->qhead, entries); - pthread_mutex_unlock(&r_queue->mtx); - data = &req->data; /* For the time being we use PAYLOAD_UNDEF as a way to notify threads to exit */ - if (data->reply == PAYLOAD_UNDEF) { + if (data->type == PAYLOAD_UNDEF) { free(req); fprintf(stderr, "Thread cancellation received\n"); return NULL; @@ -468,288 +462,35 @@ worker_thread(void * ap) hook(data); /* push to out queue */ - o_queue = get_out_queue(data); - pthread_mutex_lock(&o_queue->mtx); - SIMPLEQ_INSERT_TAIL(&o_queue->qhead, req, entries); - /* net queue needs to notify through eventfd */ - if (o_queue == net_queue) - eventfd_write(o_queue->efd, 1); - pthread_mutex_unlock(&o_queue->mtx); - } - return NULL; -} - - -static void * -worker_thread_net(void * ap) -{ - struct sq_entry * req; - struct payload * data; - struct kpr_thread_info *thr_arg; - struct kpr_queue *r_queue; - - struct pollfd fds[2]; - int maxfds = 2; - int i; - int (*hook)(struct payload *); - int (*net_hook)(struct payload *); - - int sfd, cfd; - struct payload buf; - static int len = sizeof(buf); - struct sockaddr_in c_addr; - socklen_t c_len; - char *c; - - uint64_t ebuf; - int ret; - - /* We must guarantee this structure is properly polulated or - check it and fail in case it is not. In the latter case - we need to check if the thread has returned. - */ - thr_arg = (struct kpr_thread_info *) ap; - r_queue = thr_arg->r_queue; - hook = thr_arg->hook; - net_hook = thr_arg->net_hook; - - /* - * Network specific block - */ - if (thr_arg->net) { - /* create tcp socket and listen */ - sfd = kpr_tcp_create(PORT_NO); - if (sfd < 0) - return NULL; - } else { - sfd = -1; - maxfds = 1; /* no need to loop more */ - } - - /* register events for poll */ - fds[0].fd = r_queue->efd; - fds[0].events = POLLIN; - fds[1].fd = sfd; /* if net=false it is ok to be negative */ - fds[1].events = POLLIN; - - for(;;) { - ret = poll(fds, maxfds, -1); /* wait for ever */ - if ( ret < 1 ) { /* 0 not expected */ - fprintf(stderr, "poll returned %d\n", ret); - continue; - } - - for( i = 0; i < maxfds; ++i) { - if ( !fds[i].revents ) - continue; - switch(i) { - case 0: /* queue request */ - pthread_mutex_lock(&r_queue->mtx); - /* others using this queue..? */ - if (SIMPLEQ_EMPTY(&r_queue->qhead)) { - pthread_mutex_unlock(&r_queue->mtx); - continue; - } - - /* pop from requests queue and unlock */ - req = SIMPLEQ_FIRST(&r_queue->qhead); - SIMPLEQ_REMOVE_HEAD(&r_queue->qhead, entries); - /* notify back we read one el of the queue */ - eventfd_read(r_queue->efd, &ebuf); - pthread_mutex_unlock(&r_queue->mtx); - - data = &req->data; - printf("Request dequeued in dispatch queue\n"); - /* For the time being we use PAYLOAD_UNDEF as a way - to notify threads to exit - */ - if (data->reply == PAYLOAD_UNDEF) { - fprintf(stderr, "Thread cancellation received\n"); - free(req); - if(sfd >= 0) - close(sfd); - return NULL; - } - - /* Execute worker-thread specific hook */ - if ( hook(data) ) { - free(req); - continue; - } - - /* push to served queue */ - pthread_mutex_lock(&out_queue->mtx); - SIMPLEQ_INSERT_TAIL(&out_queue->qhead, req, entries); - pthread_mutex_unlock(&out_queue->mtx); - break; - case 1: /* TCP socket */ - c_len = sizeof(c_addr); - cfd = accept4(sfd, &c_addr, &c_len, SOCK_CLOEXEC); - if (cfd == -1) { - fprintf(stderr, "Accept error\n"); - continue; - } - if ( read(cfd, &buf, len) != len ) { - fprintf(stderr, "TCP read error\n"); - continue; - } - - req = malloc(sizeof(struct sq_entry)); - if(!req) { - fprintf(stderr, "Cannot allocate" - "for TCP packet\n"); - continue; - } - - req->data = buf; - buf.reply = PAYLOAD_ACCEPTED; - - printf("Payload received on netsocket accepted\n"); - - /* Always acknowledge we got it */ - /* TBD: very basic write, need a while loop */ - if (write(cfd, &buf, len) != len) - fprintf(stderr, "TCP not " - "acknowledged\n"); - - /* store sender ipaddr */ - c = inet_ntoa(c_addr.sin_addr); - strncpy(req->data.ipaddr, c, IP_MAX_LEN); - - /* process payload */ - if ( net_hook(&req->data) ) { - free(req); - continue; - } - - /* push to served queue */ - pthread_mutex_lock(&out_queue->mtx); - SIMPLEQ_INSERT_TAIL(&out_queue->qhead, req, entries); - pthread_mutex_unlock(&out_queue->mtx); - break; - default: /* it should not happen */ - fprintf(stderr, "what?!?!\n"); - } - } + put_req_into_queue(out_queue, req); } return NULL; } - static int slave_worker_hook(struct payload *data) { int ret; /* Fulfil request */ - ret = increase_size(data->curr, data->path); + ret = increase_size(data->req_size, data->path); if (ret == 0 || ret == 3) /* 3 means big enough */ - data->reply = PAYLOAD_DONE; + data->err_code = THIN_ERR_CODE_SUCCESS; else - data->reply = PAYLOAD_REJECTED; - printf("worker_thread: completed %u %s (%d)\n\n", - (unsigned)data->id, data->path, ret); - - return 0; -} - - -static int -reject_hook(struct payload *data) -{ - /* Reject request */ - data->reply = PAYLOAD_REJECTED; - printf("default_thread: No registered VG!\n\n"); - + data->err_code = THIN_ERR_CODE_FAILURE; + printf("worker_thread: completed %s (%d)\n\n", + data->path, ret); + /* FIXME: + * Probably we do not need to call refresh_lvm, leaving the + * code here commented so we do not forget that before it was + * called from the slave as a result of a resize from the + * done from master */ +#ifdef THIN_REFRESH_LVM + refresh_lvm(data->path); +#endif /* THIN_REFRESH_LVM */ return 0; } -/** - * Send packet to specified destination. If send is successful and - * packet is accepted it returns 1 because there is nothing more - * to be done. Reply will come on the TCP socket. - * In master mode packets not sent are discarded, while in slave - * mode they are queued as rejected. - * - * @param[in,out] data to be processed - * @return 0 if packet is not sent and marked rejected. - * 1 if sent or to be discarded anyway - */ -static int -dispatch_hook(struct payload *data) -{ - /* Send */ - if ( !kpr_tcp_conn_tx_rx(data->ipaddr, PORT_NO, data ) ) { - fprintf(stderr, "Dispatch failed\n"); - goto fail; - } - /* Check reply */ - if ( data->reply != PAYLOAD_ACCEPTED ) { - fprintf(stderr, "Payload rejected\n"); - goto fail; - } - else { - printf("Payload dispatched\n"); - } - - return 1; -fail: - return master ? 1 : reject_hook(data); -} - - -/** - * Packet can be either DONE or REJECTED, in any other case packet - * is discarded. - * - * @param[in,out] data to be processed - * @return 0 if packet can be pushed in the "served" queue, 1 otherwise - */ -static int -slave_net_hook(struct payload *data) -{ - switch(data->reply) { - case PAYLOAD_REJECTED: - break; - case PAYLOAD_DONE: - if ( refresh_lvm(data->path) ) { - printf("*** Refresh failed ***\n"); - } - break; - default: - fprintf(stderr, "Spurious payload in slave_net_hook\n"); - return 1; - } - - return 0; -} - - -/** - * Packet can be only a REQUEST, in any other case packet - * is discarded. If it is a request, we always return 1 because - * it is either pushed in the proper queue here or discarded. - * - * @param[in,out] data to be processed - * @return 1 - */ -static int -master_net_hook(struct payload *data) -{ - /* Check reply */ - if ( data->reply != PAYLOAD_REQUEST ) { - fprintf(stderr, "Spurious payload in master_net_hook\n"); - print_payload(data); - return 1; - } - - /* Either way we need to return 1 to avoid further push in queue */ - if ( handle_request(data) ) - fprintf(stderr, "Packet discarded\n"); - return 1; -} - - /** * @param size: current size to increase in bytes * @param path: device full path @@ -762,7 +503,6 @@ increase_size(off64_t size, const char * path) pid_t pid; int status, num_read; char ssize[NCHARS]; /* enough for G bytes */ - size += 104857600; /* add 100 MB */ /* prepare size for command line */ num_read = snprintf(ssize, NCHARS, "-L""%"PRIu64"b", size); @@ -787,7 +527,7 @@ increase_size(off64_t size, const char * path) } } - +#ifdef THIN_REFRESH_LVM /** * @param path: device full path * @return command return code if command returned properly, -1 otherwise @@ -815,7 +555,7 @@ refresh_lvm(const char * path) return status; } } - +#endif /* THIN_REFRESH_LVM */ static void parse_cmdline(int argc, char ** argv) @@ -830,11 +570,6 @@ parse_cmdline(int argc, char ** argv) case 'f': /* if daemonized leave fd open */ fd_open = 1; break; - case 's': /* start daemon in slave mode */ - printf("Master Ip address passed as: %s\n", optarg); - strncpy(master_ip, optarg, IP_MAX_LEN); - master = false; - break; default: break; } @@ -961,12 +696,9 @@ del_vg(char *vg) return 1; } init_payload(&req->data); - req->data.reply = PAYLOAD_UNDEF; + req->data.type = PAYLOAD_UNDEF; /* Insert in queue */ - pthread_mutex_lock(&p_vg->r_queue->mtx); - SIMPLEQ_INSERT_TAIL(&p_vg->r_queue->qhead, req, entries); - pthread_cond_signal(&p_vg->r_queue->cnd); /* Wake thread if needed */ - pthread_mutex_unlock(&p_vg->r_queue->mtx); + put_req_into_queue(p_vg->r_queue, req); /* Wait for thread to complete */ ret = pthread_join(p_vg->thr.thr_id, NULL); @@ -985,39 +717,6 @@ del_vg(char *vg) return 0; } - -int -slave_mode(char *ipaddr) -{ - fprintf(stderr, "CLI slave %s received\n", ipaddr); - if (master) { - fprintf(stderr, "Fake: switching master to slave\n"); - } else { - fprintf(stderr, "Already in slave mode: checking ip addr\n"); - pthread_mutex_lock(&ip_mtx); /* not really needed.. */ - if ( !strcmp(master_ip, ipaddr) ) { - fprintf(stderr, "nothing to be done\n"); - goto done; - } - strncpy(master_ip, ipaddr, IP_MAX_LEN); - pthread_mutex_unlock(&ip_mtx); - } - - return 0; -done: - pthread_mutex_unlock(&ip_mtx); - return 0; -} - - -int -master_mode(void) -{ - fprintf(stderr, "CLI master received\n"); - return 0; -} - - /** * This function searches the vg_pool for an entry with a given VG name. * If invoked with locking no mutexes must be hold @@ -1063,20 +762,29 @@ vg_pool_find_and_remove(char *vg_name) pthread_mutex_lock(&vg_pool.mtx); entry = vg_pool_find(vg_name, false); - if(!entry) + if(!entry) { + pthread_mutex_unlock(&vg_pool.mtx); return NULL; + } LIST_REMOVE(entry, entries); pthread_mutex_unlock(&vg_pool.mtx); return entry; } - -static struct kpr_queue * -get_out_queue(struct payload *data) +#if 0 +/* Leaving this here in case will be usefull later */ +static struct sq_entry * +find_and_remove(struct sqhead * head, pid_t id) { - if ( master && (data->ipaddr[0] != '\0') ) - return net_queue; - - return out_queue; + struct sq_entry * entry; + SIMPLEQ_FOREACH(entry, head, entries) { + if (entry->data.id == id) { + SIMPLEQ_REMOVE(head, entry, sq_entry, entries); + return entry; + } + } + /* No matches */ + return NULL; } +#endif diff --git a/thin/thin_cli.c b/thin/thin_cli.c index 3f51868d..ac647e9f 100644 --- a/thin/thin_cli.c +++ b/thin/thin_cli.c @@ -8,6 +8,7 @@ static void usage(char *); int main(int argc, char *argv[]) { struct payload message; + struct thin_conn_handle *ch; int arg; int opt_idx = 0, flag = 1; int ret; @@ -15,11 +16,12 @@ main(int argc, char *argv[]) { const struct option longopts[] = { { "add", required_argument, NULL, 0 }, { "del", required_argument, NULL, 0 }, - { "master", no_argument, NULL, 0 }, - { "slave", required_argument, NULL, 's' }, { 0, 0, 0, 0 } }; + init_payload(&message); + message.type = PAYLOAD_CLI; + /* We expect at least one valid option and, if more, the others are discarded */ @@ -60,14 +62,17 @@ main(int argc, char *argv[]) { return 1; } - init_payload(&message); - message.reply = PAYLOAD_CLI; - - ret = thin_sock_comm(&message); + ch = thin_connection_create(); + if (ch == NULL) { + fprintf(stderr, "connection initialization failed"); + return 1; + } + ret = thin_sync_send_and_receive(ch, &message); if(ret) { - fprintf(stderr, "socket error (%d)\n", ret); - return 1; + fprintf(stderr, "socket error (%d)\n", ret); + return 1; } + thin_connection_destroy(ch); printf("message: %s\n", message.path); return 0; @@ -79,6 +84,4 @@ usage(char *prog_name) printf("usage: %s -h\n", prog_name); printf("usage: %s --add \n", prog_name); printf("usage: %s --del \n", prog_name); - printf("usage: %s --master\n", prog_name); - printf("usage: %s --slave \n", prog_name); } From 599049c5416246a51980fdee7d1014869113318c Mon Sep 17 00:00:00 2001 From: Kostas Ladopoulos Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 20/43] CP-11868: Extend blktap's lvm-util for thin provisioning When the SR blktap is operating on is thinly provisioned, pass all lvm commands as arguments to /bin/xenvm. Signed-off-by: Kostas Ladopoulos Signed-off-by: Jon Ludlam Reviewed-by: Germano Percossi --- lvm/lvm-util.c | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/lvm/lvm-util.c b/lvm/lvm-util.c index 0c41f6fc..5e3d71e0 100644 --- a/lvm/lvm-util.c +++ b/lvm/lvm-util.c @@ -96,6 +96,31 @@ lvm_parse_pv(struct vg *vg, const char *name, int pvs, uint64_t start) return 0; } +static int +lvm_create_cmd(char *out, const char *command) +{ + char *sr_alloc; + int thin_flag; + + if (!(sr_alloc = getenv("SR_ALLOC"))) { + return -EINVAL; + } + + if (!strcmp(sr_alloc, "thin")) { + thin_flag = 1; + } else if (!strcmp(sr_alloc, "thick")) { + thin_flag = 0; + } else { + return -EINVAL; + } + + strcpy(out, thin_flag ? "/bin/xenvm " : ""); + strcat(out, command); + strcat(out, " 2> /dev/null"); + + return 0; +} + static int lvm_open_vg(const char *vgname, struct vg *vg) { @@ -103,12 +128,18 @@ lvm_open_vg(const char *vgname, struct vg *vg) int i, err, pvs, lvs; char *cmd, pvname[256]; uint64_t size, pv_start; + char buf[MAX_NAME_SIZE + 256]; + + if ((err = lvm_create_cmd(buf, "vgs %s --noheadings --nosuffix --units=b " + "--options=vg_name,vg_extent_size,lv_count,pv_count," + "pv_name,pe_start"))) { + return err; + } memset(vg, 0, sizeof(*vg)); - err = asprintf(&cmd, "/bin/xenvm vgs %s --noheadings --nosuffix --units=b " - "--options=vg_name,vg_extent_size,lv_count,pv_count," - "pv_name,pe_start 2> /dev/null", vgname); + err = asprintf(&cmd, buf, vgname); + if (err == -1) return -ENOMEM; @@ -210,10 +241,16 @@ lvm_scan_lvs(struct vg *vg) char *cmd; FILE *scan; int i, err; + char buf[MAX_NAME_SIZE + 256]; + + if ((err = lvm_create_cmd(buf, "lvs %s --noheadings --nosuffix --units=b " + "--options=lv_name,lv_size,segtype,seg_count,seg_start," + "seg_size,devices"))) { + return err; + } + + err = asprintf(&cmd, buf, vg->name); - err = asprintf(&cmd, "/bin/xenvm lvs %s --noheadings --nosuffix --units=b " - "--options=lv_name,lv_size,segtype,seg_count,seg_start," - "seg_size,devices 2> /dev/null", vg->name); if (err == -1) return -ENOMEM; From 8fa5232e0f13e7282c8110a23ff43449228bed35 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 21/43] Fix the timeout in sock_client2.c implementation Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- thin/sock_client2.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/thin/sock_client2.c b/thin/sock_client2.c index de0f7126..41903cdd 100644 --- a/thin/sock_client2.c +++ b/thin/sock_client2.c @@ -36,14 +36,14 @@ int thin_sync_send_and_receive(struct thin_conn_handle *ch, return 0; } -static int timeout = 10; - struct thin_conn_handle * thin_connection_create(void) { struct sockaddr_un svaddr, claddr; struct thin_conn_handle *ch; char client_sock_name[64]; + struct timeval timeout; + int ret; ch = malloc(sizeof(struct thin_conn_handle)); if (ch == NULL) @@ -55,8 +55,14 @@ thin_connection_create(void) goto out1; } - setsockopt(ch->sfd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, - sizeof(timeout)); + timeout.tv_sec = 10; + timeout.tv_usec = 0; + ret = setsockopt(ch->sfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, + sizeof(struct timeval)); + if (ret < 0) { + EPRINTF("Socket set timeout failed"); + goto out1; + } sprintf(client_sock_name, "td_thin_client_%d", getpid()); From e2a9b127ede2438ec8b4ee1a48438273f2293f41 Mon Sep 17 00:00:00 2001 From: Chandrika Srinivasan Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 22/43] CA-169598: thinprovd started as a service at boot Added init script that will start the thinprovd daemon at boot time. Signed-off-by: Chandrika Srinivasan Reviewed-by: Germano Percossi --- mk/blktap.spec.in | 4 +- thin/Makefile.am | 14 +++++-- thin/thinprovd.init | 91 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 4 deletions(-) create mode 100755 thin/thinprovd.init diff --git a/mk/blktap.spec.in b/mk/blktap.spec.in index 92846d35..c221eb2f 100644 --- a/mk/blktap.spec.in +++ b/mk/blktap.spec.in @@ -61,7 +61,7 @@ mkdir -p %{buildroot}%{_localstatedir}/log/blktap %{_sbindir}/td-rated %{_sbindir}/part-util %{_sbindir}/vhdpartx -%{_sbindir}/thin-tapdisk +%{_sbindir}/thinprovd %{_sbindir}/thin-cli %{_libexecdir}/tapdisk %{_sysconfdir}/udev/rules.d/blktap.rules @@ -69,6 +69,7 @@ mkdir -p %{buildroot}%{_localstatedir}/log/blktap %{_sysconfdir}/logrotate.d/blktap %{_sysconfdir}/xensource/bugtool/tapdisk-logs.xml %{_sysconfdir}/xensource/bugtool/tapdisk-logs/description.xml +%{_sysconfdir}/rc.d/init.d/thinprovd %{_localstatedir}/log/blktap %files devel @@ -81,5 +82,6 @@ mkdir -p %{buildroot}%{_localstatedir}/log/blktap %post [ ! -x /sbin/chkconfig ] || chkconfig --add tapback +[ ! -x /sbin/chkconfig ] || chkconfig --add thinprovd %changelog diff --git a/thin/Makefile.am b/thin/Makefile.am index 9d2bf804..decb6780 100644 --- a/thin/Makefile.am +++ b/thin/Makefile.am @@ -6,9 +6,9 @@ AM_CFLAGS += -pthread AM_CPPFLAGS = -D_GNU_SOURCE AM_CPPFLAGS += -I$(top_srcdir)/include -sbin_PROGRAMS = thin-tapdisk -thin_tapdisk_SOURCES = sock_serv.c -thin_tapdisk_LDADD = libtapdiskthin.la +sbin_PROGRAMS = thinprovd +thinprovd_SOURCES = sock_serv.c +thinprovd_LDADD = libtapdiskthin.la sbin_PROGRAMS += thin-cli thin_cli_SOURCES = thin_cli.c @@ -22,3 +22,11 @@ libtapdiskthin_la_SOURCES += kpr_util.c libtapdiskthin_la_LDFLAGS = -version-info 1:1:1 +# Have "exec" in name to ensure it's done trough the install-exec route +# (and before install-exec-local) +exec_init_ddir = $(sysconfdir)/rc.d/init.d +dist_exec_init_d_SCRIPTS = thinprovd.init + +install-exec-local: + cd ${DESTDIR}${exec_init_ddir} && mv thinprovd.init thinprovd + diff --git a/thin/thinprovd.init b/thin/thinprovd.init new file mode 100755 index 00000000..1eec7d0c --- /dev/null +++ b/thin/thinprovd.init @@ -0,0 +1,91 @@ +#!/bin/bash +# +# Copyright (C) Citrix Systems Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation; version 2.1 only. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# +# /etc/rc.d/init.d/thinprovd +# +# Starts the thin provisioning daemon +# +# chkconfig: 2345 22 76 +# description: Manage the thin-provisioning daemon +# processname: thinprovd +# PID_FILE: /var/run/thinprovd.pid + +declare -r DAEMON=/usr/sbin/thinprovd +declare -r PROG=`basename $DAEMON` +declare -r PID_FILE=/var/run/${PROG}.pid + +# Source function library. +. /etc/init.d/functions + +RETVAL=0 + +start() { + test -x $DAEMON || exit 5 + if [ -f $PID_FILE ]; then + pid=`cat $PID_FILE` + echo "$PROG already running with pid $pid" + return 0 + fi + echo -n $"Starting $PROG daemon: " + $DAEMON -d + RETVAL=$? + if [ $RETVAL -eq 0 ]; then + i=`pidof $PROG` + echo $i > $PID_FILE + success + else + failure + fi + return $RETVAL +} + +stop() { + echo -n $"Stopping $PROG daemon: " + test -e $PID_FILE || exit 5 + PID=`cat $PID_FILE` + kill -SIGINT $PID + RETVAL=$? + [ $RETVAL -eq 0 ] && rm -f $PID_FILE + [ $RETVAL -eq 0 ] && success || failure + return $RETVAL +} + +restart() { + stop + start +} + +case "$1" in +start) + start + ;; +stop) + stop + ;; +restart) + restart + ;; +status) + status $PROG + RETVAL=$? + ;; +*) + echo $"Usage: $0 {start|stop|status|restart}" + RETVAL=3 +esac + +exit $RETVAL From 3a3df953eced9a0c6085831ece94f3d5457a62cd Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 23/43] Always exit upon signal Signed-off-by: Germano Percossi --- thin/sock_serv.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index ad23f915..a3d141fe 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -154,12 +154,12 @@ put_req_into_queue(struct kpr_queue *q, struct sq_entry *req ) * is created, so no checks. * FIXME: did not give much thought to its behaviour in multi-threaded env * - * @param signo the signal to hanlder (SIGINT) + * @param[in] signo the signal to handle */ static void clean_handler(int signo) { - if ( unlink(THIN_CONTROL_SOCKET) ) - _exit(1); + unlink(THIN_CONTROL_SOCKET); + _exit(0); } int From 354fad5b8a9c3b54c56ab58af338351ea1c5c50c Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 24/43] Signal handlers registered through proper setter function Signed-off-by: Germano Percossi --- thin/sock_serv.c | 44 +++++++++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index a3d141fe..46789d42 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -35,6 +35,8 @@ static int do_daemon(void); static void split_command(char *, char **); static int add_vg(char *vg); static int del_vg(char *vg); +static int signal_set(int signo, void (*func) (int)); +static void clean_handler(int signo); #ifdef THIN_REFRESH_LVM static int refresh_lvm(const char * path); @@ -148,7 +150,7 @@ put_req_into_queue(struct kpr_queue *q, struct sq_entry *req ) } /** - * SIGINT handler to clean-up socket file on exit + * Signal handler to clean-up socket file on exit * * This is very basic and it assumes it is registered once the socket file * is created, so no checks. @@ -162,15 +164,35 @@ static void clean_handler(int signo) _exit(0); } -int -main(int argc, char *argv[]) { - struct sigaction new_act, old_act; /* SIGINT handling */ +/** + * Set a new signal handler for the specified signal + * + * @param[in] signo the signal to handle + * @return the same as sigaction + */ +static int +signal_set(int signo, void (*func) (int)) +{ + struct sigaction new_act; + struct sigaction old_act; + int r; - /* SIGINT handling */ - new_act.sa_handler = clean_handler; + new_act.sa_handler = func; sigemptyset(&new_act.sa_mask); - new_act.sa_flags = 0; + new_act.sa_flags = 0; + + r = sigaction(signo, &new_act, &old_act); + if (r < 0) + fprintf(stderr, "Signal %d: handler registration failed", + signo); + return r; +} + + +int +main(int argc, char *argv[]) { + struct pollfd fds[2]; nfds_t maxfds = 2; @@ -213,12 +235,8 @@ main(int argc, char *argv[]) { return -errno; } - /* Register now the SIGINT handler to remove socket cleanly */ - if ( sigaction(SIGINT, &new_act, &old_act) < 0 ) { - printf("Signal handler registration failed: expect manual" - " clean-up\n"); - } - + signal_set(SIGINT, clean_handler); + fds[0].fd = out_queue->efd; fds[0].events = POLLIN; fds[1].fd = sfd; From f6099ed980f199a28280efafa1bfdea37854dd69 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 25/43] Handle SIGTERM as well Signed-off-by: Germano Percossi --- thin/sock_serv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/thin/sock_serv.c b/thin/sock_serv.c index 46789d42..bf1219ee 100644 --- a/thin/sock_serv.c +++ b/thin/sock_serv.c @@ -236,6 +236,7 @@ main(int argc, char *argv[]) { } signal_set(SIGINT, clean_handler); + signal_set(SIGTERM, clean_handler); fds[0].fd = out_queue->efd; fds[0].events = POLLIN; From a020e93234b770110b95f83dd533c1d432b350e0 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 26/43] Daemon killed using SIGTERM (now handled by the daemon) Signed-off-by: Germano Percossi --- thin/thinprovd.init | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thin/thinprovd.init b/thin/thinprovd.init index 1eec7d0c..eb532ee3 100755 --- a/thin/thinprovd.init +++ b/thin/thinprovd.init @@ -57,7 +57,7 @@ stop() { echo -n $"Stopping $PROG daemon: " test -e $PID_FILE || exit 5 PID=`cat $PID_FILE` - kill -SIGINT $PID + kill $PID RETVAL=$? [ $RETVAL -eq 0 ] && rm -f $PID_FILE [ $RETVAL -eq 0 ] && success || failure From 7ecea239764e8b635bf2d739645f757275a386c0 Mon Sep 17 00:00:00 2001 From: Kostas Ladopoulos Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 27/43] CP-12478: Implement a logging mechanism for thinprovd Signed-off-by: Kostas Ladopoulos Reviewed-by: Stefano Panella --- include/Makefile.am | 1 + include/thinprovd_log.h | 36 +++++++++++++++++ thin/Makefile.am | 1 + thin/thinprovd_log.c | 89 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100644 include/thinprovd_log.h create mode 100644 thin/thinprovd_log.c diff --git a/include/Makefile.am b/include/Makefile.am index 3314e0bc..9f03d9aa 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -20,6 +20,7 @@ blktap_HEADERS += debug.h blktap_HEADERS += util.h blktap_HEADERS += payload.h blktap_HEADERS += kpr_util.h +blktap_HEADERS += thinprovd_log.h noinst_HEADERS = blktap.h noinst_HEADERS += compiler.h diff --git a/include/thinprovd_log.h b/include/thinprovd_log.h new file mode 100644 index 00000000..668e05e1 --- /dev/null +++ b/include/thinprovd_log.h @@ -0,0 +1,36 @@ +#ifndef THIN_THINPROVD_LOG +#define THIN_THINPROVD_LOG + +/* + * This header file provides a logging API for thinprovd. + * + * Things to note: + * + * 1) Messages over 1024 characters will be truncated by syslogd. + * + * 2) New line characters, '\n', DO NOT break lines; the octal ASCII + * code of the char is printed instead. For every new line you want, + * you have to call a variant of 'thinpd_log_X()' + */ + +enum THINPD_LOG_LEVEL +{ + THINPD_LOG_ERR, + THINPD_LOG_INFO, + THINPD_LOG_DBG +}; + +void thinpd_openlog (void); +void thinpd_closelog(void); + +/* + * Use this function to specify the LOWEST logging level. + * Everything from this level and above will be logged. + */ +void thinpd_log_upto(enum THINPD_LOG_LEVEL tpd_log_level); + +void thinpd_log_err (const char *message, ...); +void thinpd_log_info(const char *message, ...); +void thinpd_log_dbg (const char *message, ...); + +#endif /* THIN_THINPROVD_LOG */ diff --git a/thin/Makefile.am b/thin/Makefile.am index decb6780..510e5c4e 100644 --- a/thin/Makefile.am +++ b/thin/Makefile.am @@ -19,6 +19,7 @@ lib_LTLIBRARIES = libtapdiskthin.la libtapdiskthin_la_SOURCES = payload.c libtapdiskthin_la_SOURCES += sock_client2.c libtapdiskthin_la_SOURCES += kpr_util.c +libtapdiskthin_la_SOURCES += thinprovd_log.c libtapdiskthin_la_LDFLAGS = -version-info 1:1:1 diff --git a/thin/thinprovd_log.c b/thin/thinprovd_log.c new file mode 100644 index 00000000..ac5ee18c --- /dev/null +++ b/thin/thinprovd_log.c @@ -0,0 +1,89 @@ +/* This define is needed for vsyslog(). */ +#define _BSD_SOURCE + +#include "thinprovd_log.h" + +#include +#include + +#define THINPROVD "THINPROVD" + +static int +thinpd_match_log_level(enum THINPD_LOG_LEVEL tpd_log_level) +{ + int log_level; + + switch (tpd_log_level) { + case THINPD_LOG_ERR: + log_level = LOG_EMERG; + break; + case THINPD_LOG_INFO: + log_level = LOG_INFO; + break; + case THINPD_LOG_DBG: + log_level = LOG_DEBUG; + break; + default: + log_level = LOG_INFO; + } + + return log_level; +} + +inline void +thinpd_openlog(void) +{ + openlog(THINPROVD, LOG_CONS | LOG_PID, LOG_DAEMON); +} + +inline void +thinpd_closelog(void) +{ + closelog(); +} + +inline void +thinpd_log_upto(enum THINPD_LOG_LEVEL tpd_log_level) +{ + setlogmask(LOG_UPTO(thinpd_match_log_level(tpd_log_level))); +} + +void +thinpd_log_err(const char *message, ...) +{ + va_list args; + + va_start(args, message); + + /* THINPD_LOG_ERR == LOG_EMERG */ + vsyslog(LOG_EMERG, message, args); + + va_end(args); +} + +void +thinpd_log_info(const char *message, ...) + +{ + va_list args; + + va_start(args, message); + + /* THINPD_LOG_INFO == LOG_INFO */ + vsyslog(LOG_INFO, message, args); + + va_end(args); +} + +void +thinpd_log_dbg(const char *message, ...) +{ + va_list args; + + va_start(args, message); + + /* THINPD_LOG_DBG == LOG_DEBUG */ + vsyslog(LOG_DEBUG, message, args); + + va_end(args); +} From b802f13c13ae9a6a7c3f50b771929007d5ff2fdc Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 28/43] CP-12487 Add quantum allocator parameter From: Stefano Panella - 'tap-ctl open' and 'tap-ctl create' are accepting an extra optional parameter "-q XXX" meaning allocation quantum size in MB. - to make use of this new parameter the logic of the thin_check function has changed a bit since before all was just hardcoded - rearranging the thin_check has lead to a fix for [CA-172442], no sleep if not strictly needed - Now the hook is going to sleep only if available_bytes is negative (e.g. no space to perform any further operation) - I have also fixed [CA-173081] since now when we reach the end of the size we decrease the quantum allocation to the minimum (16MBytes) to avoid wasting too much space, so a maximum of 16 MB are wasted Signed-off-by: Stefano Panella Acked-by: Germano Percossi --- control/tap-ctl-create.c | 4 +- control/tap-ctl-open.c | 4 +- control/tap-ctl.c | 28 ++++-- drivers/block-vhd.c | 170 ++++++++++++++++++++++-------------- drivers/tapdisk-control.c | 5 ++ drivers/tapdisk-interface.c | 15 ++++ drivers/tapdisk-interface.h | 1 + drivers/tapdisk-vbd.c | 6 ++ drivers/tapdisk-vbd.h | 1 + drivers/tapdisk.h | 1 + include/tap-ctl.h | 6 +- include/tapdisk-message.h | 1 + 12 files changed, 162 insertions(+), 80 deletions(-) diff --git a/control/tap-ctl-create.c b/control/tap-ctl-create.c index d9ae1e46..16ad01d5 100644 --- a/control/tap-ctl-create.c +++ b/control/tap-ctl-create.c @@ -30,7 +30,7 @@ int tap_ctl_create(const char *params, char **devname, int flags, int parent_minor, - char *secondary, int timeout) + char *secondary, int timeout, int alloc_quantum) { int err, id, minor; @@ -49,7 +49,7 @@ tap_ctl_create(const char *params, char **devname, int flags, int parent_minor, goto destroy; err = tap_ctl_open(id, minor, params, flags, parent_minor, secondary, - timeout); + timeout, alloc_quantum); if (err) goto detach; diff --git a/control/tap-ctl-open.c b/control/tap-ctl-open.c index 3f207df3..d881416a 100644 --- a/control/tap-ctl-open.c +++ b/control/tap-ctl-open.c @@ -30,7 +30,8 @@ int tap_ctl_open(const int id, const int minor, const char *params, int flags, - const int prt_minor, const char *secondary, int timeout) + const int prt_minor, const char *secondary, int timeout, + int alloc_quantum) { int err; tapdisk_message_t message; @@ -41,6 +42,7 @@ tap_ctl_open(const int id, const int minor, const char *params, int flags, message.u.params.devnum = minor; message.u.params.prt_devnum = prt_minor; message.u.params.req_timeout = timeout; + message.u.params.alloc_quantum = alloc_quantum; message.u.params.flags = flags; err = snprintf(message.u.params.path, diff --git a/control/tap-ctl.c b/control/tap-ctl.c index 1541b749..e774c8ad 100644 --- a/control/tap-ctl.c +++ b/control/tap-ctl.c @@ -253,13 +253,15 @@ tap_cli_create_usage(FILE *stream) "use secondary image (in mirror mode if no -s)] [-s " "fail over to the secondary image on ENOSPC] " "[-t request timeout in seconds] [-D no O_DIRECT] " - "[-T enable thin provisioning]\n"); + "[-T enable thin provisioning] " + "[-q allocation quantum in MBytes]\n"); + } static int tap_cli_create(int argc, char **argv) { - int c, err, flags, prt_minor, timeout; + int c, err, flags, prt_minor, timeout, alloc_quantum; char *args, *devname, *secondary; args = NULL; @@ -268,9 +270,10 @@ tap_cli_create(int argc, char **argv) prt_minor = -1; flags = 0; timeout = 0; + alloc_quantum = 0; optind = 0; - while ((c = getopt(argc, argv, "a:RDd:e:r2:st:Th")) != -1) { + while ((c = getopt(argc, argv, "a:RDd:e:r2:st:Tq:h")) != -1) { switch (c) { case 'a': args = optarg; @@ -304,6 +307,9 @@ tap_cli_create(int argc, char **argv) case 'T': flags |= TAPDISK_MESSAGE_FLAG_THIN; break; + case 'q': + alloc_quantum = atoi(optarg); + break; case '?': goto usage; case 'h': @@ -316,7 +322,7 @@ tap_cli_create(int argc, char **argv) goto usage; err = tap_ctl_create(args, &devname, flags, prt_minor, secondary, - timeout); + timeout, alloc_quantum); if (!err) printf("%s\n", devname); @@ -722,14 +728,15 @@ tap_cli_open_usage(FILE *stream) "use secondary image (in mirror mode if no -s)] [-s " "fail over to the secondary image on ENOSPC] " "[-t request timeout in seconds] [-D no O_DIRECT] " - "[-T enable thin provisioning]\n"); + "[-T enable thin provisioning] " + "[-q allocation quantum in MBytes]\n"); } static int tap_cli_open(int argc, char **argv) { const char *args, *secondary; - int c, pid, minor, flags, prt_minor, timeout; + int c, pid, minor, flags, prt_minor, timeout, alloc_quantum; flags = 0; pid = -1; @@ -738,9 +745,11 @@ tap_cli_open(int argc, char **argv) timeout = 0; args = NULL; secondary = NULL; + alloc_quantum = 0; + optind = 0; - while ((c = getopt(argc, argv, "a:RDm:p:e:r2:st:Th")) != -1) { + while ((c = getopt(argc, argv, "a:RDm:p:e:r2:st:Tq:h")) != -1) { switch (c) { case 'p': pid = atoi(optarg); @@ -777,6 +786,9 @@ tap_cli_open(int argc, char **argv) case 'T': flags |= TAPDISK_MESSAGE_FLAG_THIN; break; + case 'q': + alloc_quantum = atoi(optarg); + break; case '?': goto usage; case 'h': @@ -789,7 +801,7 @@ tap_cli_open(int argc, char **argv) goto usage; return tap_ctl_open(pid, minor, args, flags, prt_minor, secondary, - timeout); + timeout, alloc_quantum); usage: tap_cli_open_usage(stderr); diff --git a/drivers/block-vhd.c b/drivers/block-vhd.c index 94da49dc..652140ea 100644 --- a/drivers/block-vhd.c +++ b/drivers/block-vhd.c @@ -145,9 +145,9 @@ unsigned int SPB; #define VHD_FLAG_TX_UPDATE_BAT 2 /*******THIN PARAMETERS******/ -#define THIN_WARN_1 52428800L /* 50 MBs to go, send resize request */ -#define THIN_WARN_2 10485760L /* 10 MBs to go, start slow down */ -#define THIN_RESIZE_INCREMENT 104857600L /* 100 MBs incremets */ +#define THIN_RESIZE_MIN_INCREMENT 16777216L /* 16 MBs incremets */ +#define THIN_RESIZE_MAX_INCREMENT 1073741824L /* 1024 MBs incremets */ +#define THIN_RESIZE_DEF_INCREMENT 104857600L /* 100 MBs incremets */ typedef uint16_t vhd_flag_t; @@ -245,6 +245,20 @@ struct vhd_state { off64_t eof_bytes; uint64_t req_bytes; struct thin_conn_handle *ch; + /* bytes to increase the LV */ + int64_t alloc_quantum; /* bytes the LV will be + * increased every time */ + int64_t virt_bytes; /* virtual VHD size, will be + * set to 0 when the LV is + * increased at least as the + * virtual size */ + int64_t thin_warn_1; /* when available_bytes follow + * below this watermark, a new + * resize is requested */ + int64_t thin_warn_2; /* when available_bytes follow + * below this watermark, we + * start checking if resize + * has succeded */ /* for redundant bitmap writes */ int padbm_size; @@ -675,6 +689,8 @@ vhd_thin_prepare(struct vhd_state *s) if ((s->eof_bytes = lseek64(s->vhd.fd, 0, SEEK_END)) == -1) return -errno; s->req_bytes = 0; + s->virt_bytes = vhd_sectors_to_bytes(s->driver->info.size); + EPRINTF("Thin VHD virt_bytes = %ld", s->virt_bytes); s->ch = thin_connection_create(); if (s->ch == NULL) { EPRINTF("thin connection creation has failed"); @@ -741,10 +757,6 @@ __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags) update_next_db(s, s->next_db); } - if(test_vhd_flag(flags, VHD_FLAG_OPEN_THIN)) { - vhd_thin_prepare(s); - } - SPB = s->spb; s->vreq_free_count = VHD_REQS_DATA; @@ -755,6 +767,10 @@ __vhd_open(td_driver_t *driver, const char *name, vhd_flag_t flags) driver->info.sector_size = VHD_SECTOR_SIZE; driver->info.info = 0; + if(test_vhd_flag(flags, VHD_FLAG_OPEN_THIN)) { + vhd_thin_prepare(s); + } + DBG(TLOG_INFO, "vhd_open: done (sz:%"PRIu64", sct:%lu, inf:%u)\n", driver->info.size, driver->info.sector_size, driver->info.info); @@ -1397,70 +1413,70 @@ static inline int thin_check_warn_2(struct vhd_state *s, int64_t available_bytes) { uint64_t phy_bytes; - - /* we are worried the resize is taking too long, we will try to - * buy some time sleeping some time for every new block request - * We sleep more if available_bytes is getting small. - */ - sleep((THIN_WARN_2 - available_bytes)/(1024*1024)); - EPRINTF("Sleep(%ld)", (THIN_WARN_2 - available_bytes)/(1024*1024)); - phy_bytes = lseek64(s->vhd.fd, 0, SEEK_END); - if ((s->eof_bytes + THIN_RESIZE_INCREMENT) <= phy_bytes) { - /* Request is completed but maybe the response got lost */ - s->eof_bytes = phy_bytes; - s->req_bytes = 0; - return 0; - } + int count; if (available_bytes < 0) { - /* We must fail with ENOSPC after we tried everything */ - EPRINTF("Returning -1"); - return -1; + count = 40; } else { - return 0; + count = 1; } -} - -static inline void -check_resize_request_progress(struct vhd_state *s, struct payload *message, - int64_t available_bytes) -{ -#ifdef THIN_OLD_PROTOCOL - int err; - uint64_t phy_bytes; - message->reply = PAYLOAD_QUERY; - err = thin_sock_comm(message); - if (err) { - DBG(TLOG_WARN, "socket returned: %d\n", err); - } else if (message->reply == PAYLOAD_DONE) { - /* check if the obtained size is compatible with the - * requested one */ + while (count--) { phy_bytes = lseek64(s->vhd.fd, 0, SEEK_END); - - EPRINTF("Requested:%ld, obtained:%ld:\n", - s->req_bytes, phy_bytes); + EPRINTF("thin_check_warn_2 phy_bytes = %ld", phy_bytes); if (s->req_bytes <= phy_bytes) { + /* Request is completed */ + EPRINTF("Request has been completed"); s->eof_bytes = phy_bytes; s->req_bytes = 0; - - } else { - /* The daemon was thinking that the resize was - * successfull but somehow it extended less than - * asked, do an other request */ - EPRINTF("Extended less than expected"); - s->req_bytes = 0; + return 0; + } + if (count > 0) { + sleep(1); } } -#else - return; -#endif /* THIN_OLD_PROTOCOL */ + + if (available_bytes < 0) { + /* We must fail with ENOSPC after we tried everything */ + EPRINTF("Returning -1, fail with ENOSPC"); + return -1; + } else { + return 0; + } } static inline void send_resize_request(struct vhd_state *s, struct payload *message) { int err; + + init_payload(message); + strncpy(message->path, s->vhd.file, PAYLOAD_MAX_PATH_LENGTH); + if (s->virt_bytes < s->eof_bytes + s->alloc_quantum && + s->virt_bytes != 0) { + + /* This is so we do not waste too much space if we + * have a big quantum. For example if the virtual size + * of the VHD is 1000 MB and the alloc_quantum is 200MB, + * when the lv is 980MB and we need to resize again we + * go to 1180MB, meaning we are wasting around 180MB. + */ + + /* We extend once up to the virtual size and than start + * resizing at THIN_RESIZE_MIN_INCREMENT + * for the remaining metadata part of the VHD. + */ + message->req_size = s->virt_bytes; + s->virt_bytes = 0; + s->alloc_quantum = THIN_RESIZE_MIN_INCREMENT; + s->thin_warn_1 = s->alloc_quantum / 2; + s->thin_warn_2 = s->alloc_quantum / 4; + } else { + message->req_size = s->eof_bytes + s->alloc_quantum; + } message->type = PAYLOAD_RESIZE; + + EPRINTF("sending resize request for %ld", message->req_size); + err = thin_sync_send_and_receive(s->ch, message); if (err) { DBG(TLOG_WARN, "socket returned: %d\n", err); @@ -1491,23 +1507,17 @@ thin_provisioning_checks(struct vhd_state *s, uint64_t needed_sectors) int ret = 0; available_bytes = s->eof_bytes - vhd_sectors_to_bytes(needed_sectors); - EPRINTF("needed_bytes=%ld", available_bytes); - - if (available_bytes < THIN_WARN_1) { - /* prepare common part of the message */ - init_payload(&message); - strncpy(message.path, s->vhd.file, PAYLOAD_MAX_PATH_LENGTH); - message.req_size = s->eof_bytes + THIN_RESIZE_INCREMENT; + EPRINTF("virt_bytes=%ld eof_bytes=%ld available_bytes=%ld", + s->virt_bytes, s->eof_bytes, available_bytes); + if (available_bytes < s->thin_warn_1) { /* s->req_bytes is indicating our state */ - if (s->req_bytes != 0) { - check_resize_request_progress(s, &message, available_bytes); - } else { + if (s->req_bytes == 0) { send_resize_request(s, &message); } /* let's be more pedantic when we reach end of space */ - if (available_bytes < THIN_WARN_2) { + if (available_bytes < s->thin_warn_2) { ret = thin_check_warn_2(s, available_bytes); } } @@ -1667,7 +1677,6 @@ update_bat(struct vhd_state *s, uint32_t blk) int err; uint64_t lb_end; struct vhd_bitmap *bm; - EPRINTF("SSS update_bat enter blk=%d", blk); ASSERT(bat_entry(s, blk) == DD_BLK_UNUSED); if (bat_locked(s)) { @@ -1695,7 +1704,6 @@ update_bat(struct vhd_state *s, uint32_t blk) } schedule_zero_bm_write(s, bm, lb_end); set_vhd_flag(bm->tx.status, VHD_FLAG_TX_UPDATE_BAT); - EPRINTF("SSS update_bat exit"); return 0; } @@ -2666,12 +2674,40 @@ vhd_debug(td_driver_t *driver) */ } +int +vhd_set_quantum(td_driver_t *driver, int quantum_mb) +{ + struct vhd_state *s = (struct vhd_state *)driver->data; + int64_t quantum = quantum_mb * 1048576; + + /* This is always called in case of thin, and even if the + * tap-ctl open/create is not passed as parameter, + * s->alloc_quantum will still be set: + */ + + if (quantum < THIN_RESIZE_MIN_INCREMENT && quantum != 0) { + s->alloc_quantum = THIN_RESIZE_MIN_INCREMENT; + } else if (quantum > THIN_RESIZE_MAX_INCREMENT) { + s->alloc_quantum = THIN_RESIZE_MAX_INCREMENT; + } else { + if (quantum == 0) + s->alloc_quantum = THIN_RESIZE_DEF_INCREMENT; + else + s->alloc_quantum = quantum; + } + + s->thin_warn_1 = s->alloc_quantum / 2; + s->thin_warn_2 = s->alloc_quantum / 4; + return 0; +} + struct tap_disk tapdisk_vhd = { .disk_type = "tapdisk_vhd", .flags = 0, .private_data_size = sizeof(struct vhd_state), .td_open = _vhd_open, .td_close = _vhd_close, + .td_set_quantum = vhd_set_quantum, .td_queue_read = vhd_queue_read, .td_queue_write = vhd_queue_write, .td_get_parent_id = vhd_get_parent_id, diff --git a/drivers/tapdisk-control.c b/drivers/tapdisk-control.c index 2375ec2a..36d7f220 100644 --- a/drivers/tapdisk-control.c +++ b/drivers/tapdisk-control.c @@ -748,6 +748,11 @@ tapdisk_control_open_image(struct tapdisk_ctl_conn *conn, if (err) goto out; + if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_THIN) { + /* Set allocation Quantum only to the leaf */ + tapdisk_vbd_set_quantum(vbd, request->u.params.alloc_quantum); + } + err = tapdisk_vbd_get_disk_info(vbd, &vbd->disk_info); if (err) { EPRINTF("VBD %d failed to get disk info: %s\n", vbd->uuid, diff --git a/drivers/tapdisk-interface.c b/drivers/tapdisk-interface.c index 661ebcbe..96ef2dfb 100644 --- a/drivers/tapdisk-interface.c +++ b/drivers/tapdisk-interface.c @@ -93,6 +93,21 @@ __td_open(td_image_t *image, td_disk_info_t *info) return 0; } +int +td_set_quantum(td_image_t *image, int quantum) +{ + td_driver_t *driver; + + driver = image->driver; + if (!driver) + return -EINVAL; + + if (driver->ops->td_set_quantum) + return driver->ops->td_set_quantum(driver, quantum); + + return -EINVAL; +} + int td_open(td_image_t *image) { diff --git a/drivers/tapdisk-interface.h b/drivers/tapdisk-interface.h index 7ec40188..458d74aa 100644 --- a/drivers/tapdisk-interface.h +++ b/drivers/tapdisk-interface.h @@ -29,6 +29,7 @@ int td_load(td_image_t *); int td_close(td_image_t *); int td_get_parent_id(td_image_t *, td_disk_id_t *); int td_validate_parent(td_image_t *, td_image_t *); +int td_set_quantum(td_image_t *, int); void td_queue_write(td_image_t *, td_request_t); void td_queue_read(td_image_t *, td_request_t); diff --git a/drivers/tapdisk-vbd.c b/drivers/tapdisk-vbd.c index 1df6eb0d..af5bcf7f 100644 --- a/drivers/tapdisk-vbd.c +++ b/drivers/tapdisk-vbd.c @@ -633,6 +633,12 @@ tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *name, td_flag_t flags, int prt_d return err; } +int +tapdisk_vbd_set_quantum(td_vbd_t *vbd, int alloc_quantum) +{ + return td_set_quantum(tapdisk_vbd_first_image(vbd), alloc_quantum); +} + void tapdisk_vbd_detach(td_vbd_t *vbd) { diff --git a/drivers/tapdisk-vbd.h b/drivers/tapdisk-vbd.h index 590452a9..89d7818b 100644 --- a/drivers/tapdisk-vbd.h +++ b/drivers/tapdisk-vbd.h @@ -186,6 +186,7 @@ int tapdisk_vbd_close(td_vbd_t *); */ int tapdisk_vbd_open_vdi(td_vbd_t * vbd, const char *params, td_flag_t flags, int prt_devnum); +int tapdisk_vbd_set_quantum(td_vbd_t *vbd, int alloc_quantum); void tapdisk_vbd_close_vdi(td_vbd_t *); int tapdisk_vbd_attach(td_vbd_t *, const char *, int); diff --git a/drivers/tapdisk.h b/drivers/tapdisk.h index 4a263fd5..40944a83 100644 --- a/drivers/tapdisk.h +++ b/drivers/tapdisk.h @@ -183,6 +183,7 @@ struct tap_disk { int private_data_size; int (*td_open) (td_driver_t *, const char *, td_flag_t); int (*td_close) (td_driver_t *); + int (*td_set_quantum) (td_driver_t *, int); int (*td_get_parent_id) (td_driver_t *, td_disk_id_t *); int (*td_validate_parent) (td_driver_t *, td_driver_t *, td_flag_t); void (*td_queue_read) (td_driver_t *, td_request_t); diff --git a/include/tap-ctl.h b/include/tap-ctl.h index fd502f00..b4248ffa 100644 --- a/include/tap-ctl.h +++ b/include/tap-ctl.h @@ -92,7 +92,8 @@ int tap_ctl_allocate(int *minor, char **devname); int tap_ctl_free(const int minor); int tap_ctl_create(const char *params, char **devname, int flags, - int prt_minor, char *secondary, int timeout); + int prt_minor, char *secondary, int timeout, + int alloc_quantum); int tap_ctl_destroy(const int id, const int minor, int force, struct timeval *timeout); @@ -103,7 +104,8 @@ int tap_ctl_attach(const int id, const int minor); int tap_ctl_detach(const int id, const int minor); int tap_ctl_open(const int id, const int minor, const char *params, int flags, - const int prt_minor, const char *secondary, int timeout); + const int prt_minor, const char *secondary, int timeout, + int alloc_quantum); int tap_ctl_close(const int id, const int minor, const int force, struct timeval *timeout); diff --git a/include/tapdisk-message.h b/include/tapdisk-message.h index 43df4f6e..7d9bda77 100644 --- a/include/tapdisk-message.h +++ b/include/tapdisk-message.h @@ -62,6 +62,7 @@ struct tapdisk_message_params { char path[TAPDISK_MESSAGE_MAX_PATH_LENGTH]; uint32_t prt_devnum; uint16_t req_timeout; + uint32_t alloc_quantum; char secondary[TAPDISK_MESSAGE_MAX_PATH_LENGTH]; }; From 8945a2eb425a4f9262c19be7da994ba4d8db9477 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 29/43] Rename sock_client2.c and sock_serv.c Signed-off-by: Stefano Panella Acked-by: Germano Percossi --- thin/Makefile.am | 4 ++-- thin/{sock_client2.c => thin_client.c} | 0 thin/{sock_serv.c => thinprovd.c} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename thin/{sock_client2.c => thin_client.c} (100%) rename thin/{sock_serv.c => thinprovd.c} (100%) diff --git a/thin/Makefile.am b/thin/Makefile.am index 510e5c4e..3a8caf4e 100644 --- a/thin/Makefile.am +++ b/thin/Makefile.am @@ -7,7 +7,7 @@ AM_CPPFLAGS = -D_GNU_SOURCE AM_CPPFLAGS += -I$(top_srcdir)/include sbin_PROGRAMS = thinprovd -thinprovd_SOURCES = sock_serv.c +thinprovd_SOURCES = thinprovd.c thinprovd_LDADD = libtapdiskthin.la sbin_PROGRAMS += thin-cli @@ -17,7 +17,7 @@ thin_cli_LDADD = libtapdiskthin.la lib_LTLIBRARIES = libtapdiskthin.la libtapdiskthin_la_SOURCES = payload.c -libtapdiskthin_la_SOURCES += sock_client2.c +libtapdiskthin_la_SOURCES += thin_client.c libtapdiskthin_la_SOURCES += kpr_util.c libtapdiskthin_la_SOURCES += thinprovd_log.c diff --git a/thin/sock_client2.c b/thin/thin_client.c similarity index 100% rename from thin/sock_client2.c rename to thin/thin_client.c diff --git a/thin/sock_serv.c b/thin/thinprovd.c similarity index 100% rename from thin/sock_serv.c rename to thin/thinprovd.c From 848f9bbc6c8e56219e6daf8748806340c830a403 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 30/43] CA-172824: Fix protocol on PAYLOAD_CLI requests Signed-off-by: Stefano Panella Acked-by: Germano Percossi --- thin/thin_cli.c | 7 +++++-- thin/thinprovd.c | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/thin/thin_cli.c b/thin/thin_cli.c index ac647e9f..8b24be41 100644 --- a/thin/thin_cli.c +++ b/thin/thin_cli.c @@ -72,9 +72,12 @@ main(int argc, char *argv[]) { fprintf(stderr, "socket error (%d)\n", ret); return 1; } - thin_connection_destroy(ch); - printf("message: %s\n", message.path); + if(message.err_code == THIN_ERR_CODE_SUCCESS) + printf("message: ok\n"); + else + printf("message: fail\n"); + thin_connection_destroy(ch); return 0; } diff --git a/thin/thinprovd.c b/thin/thinprovd.c index bf1219ee..5082f5b2 100644 --- a/thin/thinprovd.c +++ b/thin/thinprovd.c @@ -417,9 +417,9 @@ handle_cli(struct payload * buf) ret = 1; if (ret) - strcpy(buf->path, "fail"); + buf->err_code = THIN_ERR_CODE_FAILURE; else - strcpy(buf->path, "ok"); + buf->err_code = THIN_ERR_CODE_SUCCESS; return 0; } From c4b76ccec829705b2263175ec169e9899a662e9d Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 31/43] CA-173663: Close the eventfd when deleting the VG Signed-off-by: Stefano Panella Acked-by: Germano Percossi --- thin/thinprovd.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/thin/thinprovd.c b/thin/thinprovd.c index 5082f5b2..a67f951e 100644 --- a/thin/thinprovd.c +++ b/thin/thinprovd.c @@ -107,6 +107,15 @@ alloc_init_queue(void) return NULL; } +static void +free_queue(struct kpr_queue *sqp) +{ + if (sqp) { + close(sqp->efd); + free(sqp); + } +} + static inline struct sq_entry * get_req_from_queue(struct kpr_queue *q) { @@ -730,7 +739,7 @@ del_vg(char *vg) /* By design the queue must be empty but we check */ if (!SIMPLEQ_EMPTY(&p_vg->r_queue->qhead)) fprintf(stderr, "queue not empty, memory leak! FIXME\n"); - free(p_vg->r_queue); + free_queue(p_vg->r_queue); free(p_vg); return 0; From ce46edc0d57d84ac5268922d2df191bc5dd822b1 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:56 +0100 Subject: [PATCH 32/43] CA-173408: initialise recvfrom addrlen argument properly The argument addrlen is a value-result argument, which the caller should initialize before the call to the size of the buffer associated with src_addr, and modified on return to indicate the actual size of the source address. The returned address is truncated if the buffer provided is too small; in this case, addrlen will return a value greater than was supplied to the call Signed-off-by: Stefano Panella Acked-by: Germano Percossi --- thin/thinprovd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/thin/thinprovd.c b/thin/thinprovd.c index a67f951e..dc26548b 100644 --- a/thin/thinprovd.c +++ b/thin/thinprovd.c @@ -266,6 +266,7 @@ main(int argc, char *argv[]) { } if (fds[1].revents) { + len = sizeof(struct sockaddr_un); /* read from the control socket */ ret = recvfrom(sfd, &buf, sizeof(buf), 0, &cl_addr, &len); From 8c43de4f6eae7f15f036cc3a00164ceee52857dd Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 33/43] CA-173369: Make thinprovd crash consistent Now thinprovd, if restarted, will automatically manage the same VGs it was already managing before the crash. This has been implemented changing thin-cli to create/delete a file with the name of the VG just after the VG has been successfully added/deleted from the server, so there is no race. The files are created/removed in /var/run/thinprovd/ On a machine just booted, /var/run/thinprovd/ does not exists and is created by thinprovd when started. Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- include/payload.h | 1 + thin/thin_cli.c | 64 ++++++++++++++++++++++++++++++++++++------ thin/thinprovd.c | 71 ++++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 121 insertions(+), 15 deletions(-) diff --git a/include/payload.h b/include/payload.h index 69709f76..3041b20e 100644 --- a/include/payload.h +++ b/include/payload.h @@ -4,6 +4,7 @@ #include #include "tapdisk-message.h" +#define THINPROVD_DIR "/var/run/thinprovd" #define PAYLOAD_MAX_PATH_LENGTH TAPDISK_MESSAGE_MAX_PATH_LENGTH #define IP_MAX_LEN 32 diff --git a/thin/thin_cli.c b/thin/thin_cli.c index 8b24be41..dec3ed19 100644 --- a/thin/thin_cli.c +++ b/thin/thin_cli.c @@ -1,6 +1,11 @@ #include #include #include +#include +#include +#include +#include +#include #include "payload.h" static void usage(char *); @@ -12,7 +17,8 @@ main(int argc, char *argv[]) { int arg; int opt_idx = 0, flag = 1; int ret; - + char vg_name[256]; + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; const struct option longopts[] = { { "add", required_argument, NULL, 0 }, { "del", required_argument, NULL, 0 }, @@ -64,21 +70,63 @@ main(int argc, char *argv[]) { ch = thin_connection_create(); if (ch == NULL) { - fprintf(stderr, "connection initialization failed"); + fprintf(stderr, "connection initialization failed," + " maybe thinprovd is not running?\n"); return 1; } ret = thin_sync_send_and_receive(ch, &message); if(ret) { fprintf(stderr, "socket error (%d)\n", ret); - return 1; + return -ret; } - if(message.err_code == THIN_ERR_CODE_SUCCESS) - printf("message: ok\n"); - else - printf("message: fail\n"); thin_connection_destroy(ch); - return 0; + + if(message.err_code == THIN_ERR_CODE_SUCCESS) { + /* The request has been successful, so we record it + * creating or deleting a VG file in THINPROVD_DIR. In + * this way if thinprovd will restart it will find all + * the VGs that had been added previously inside + * THINPROVD_DIR and it will we able to add them + * back. + */ + sprintf(vg_name, "%s/%s", THINPROVD_DIR, &message.path[4]); + if (strncmp("add ", message.path, 4) == 0) { + ret = open(vg_name, O_CREAT|O_RDONLY|O_EXCL, mode); + if (ret == -1) { + if (errno == 17) { + printf("%s already added\n", + &message.path[4]); + } else { + fprintf(stderr, "failed to create" + " %s errno=%d\n", + vg_name, errno); + } + } else { + printf("%s added\n", &message.path[4]); + } + close(ret); + } else { + ret = unlink(vg_name); + if (ret == -1) { + if (errno == 2) { + printf("%s already deleted\n", + &message.path[4]); + } else { + fprintf(stderr, "failed to unlink" + " %s errno=%d\n", + vg_name, errno); + } + } else { + printf("%s deleted\n", &message.path[4]); + } + } + return 0; + } else { + fprintf(stderr, "operation failed: err_code=%d\n", + message.err_code); + return message.err_code; + } } static void diff --git a/thin/thinprovd.c b/thin/thinprovd.c index dc26548b..ed45d3cd 100644 --- a/thin/thinprovd.c +++ b/thin/thinprovd.c @@ -4,6 +4,8 @@ #include /* TCP accept client info */ #include /* TCP accept client info */ #include +#include +#include #include #include #include @@ -198,12 +200,50 @@ signal_set(int signo, void (*func) (int)) return r; } +static int +add_previously_added_vgs(void) +{ + DIR *dir; + struct dirent *ent; + int ret; + + if ((dir = opendir(THINPROVD_DIR)) != NULL) { + /* Wen need to call add_vg for every file in this + * directory excluding '.' and '..' since these files + * were added as a consequence of a successfull + * 'thin-cli --add ' command + */ + while ((ent = readdir(dir)) != NULL) { + if (ent->d_name[0] != '.') { + fprintf(stderr, "adding VG %s\n", ent->d_name); + ret = add_vg(ent->d_name); + if (ret != 0) { + fprintf(stderr, "failed to add VG %s\n", + ent->d_name); + } + } + } + closedir(dir); + } else { + /* could not open directory */ + fprintf(stderr, "could not open %s\n", THINPROVD_DIR); + return errno; + } + return 0; +} int main(int argc, char *argv[]) { struct pollfd fds[2]; nfds_t maxfds = 2; + struct sockaddr_un sv_addr, cl_addr; + int sfd; + socklen_t len; + ssize_t ret; + int poll_ret; + struct payload buf; + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; /* Init pool */ LIST_INIT(&vg_pool.head); @@ -224,12 +264,29 @@ main(int argc, char *argv[]) { if (do_daemon() == -1) return 1; /* can do better */ - struct sockaddr_un sv_addr, cl_addr; - int sfd; - socklen_t len; - ssize_t ret; - int poll_ret; - struct payload buf; + ret = mkdir(THINPROVD_DIR, mode); + if (ret == -1) { + if (errno == EEXIST) { + /* If there are some volume groups files in + * this directory, we need to add the + * corresponding VGs back. This is because + * some logic was able to successfully add + * them and is relying on that, so it is not + * going to do an other "add" to the newly + * started thinprovd. + */ + fprintf(stderr, "adding previously added vgs\n"); + ret = add_previously_added_vgs(); + if (ret != 0) { + fprintf(stderr, + "failed to add previously added vgs\n"); + } + } else { + fprintf(stderr, "failed to create %s errno=%d\n", + THINPROVD_DIR, errno); + return errno; + } + } sfd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); if (sfd == -1) @@ -707,7 +764,7 @@ del_vg(char *vg) p_vg = vg_pool_find_and_remove(vg); if(!p_vg) { fprintf(stderr, "Nothing removed\n"); - return 1; + return 0; } /* The thread is still able to crunch requests in its queue From a85bbc915c27f7cc00a38cf9fc091fd9a4ef9677 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 34/43] CP-12850: Refactor thinprovd_log code to be less specific This so we can use previous thinprovd_log stuff in a more general way and not only for thinprovd. - thinprovd_log[c,h] renamed to thin_log[c,h] - all THINPROVD_LOG renamed to THIN_LOG - all thinpd_ renamed to thin_ - thin_openlog now accepts a string for the log name so it can be used for different modules/executables Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- include/Makefile.am | 2 +- include/thin_log.h | 36 ++++++++++++++++++++++++++++ include/thinprovd_log.h | 36 ---------------------------- thin/Makefile.am | 2 +- thin/{thinprovd_log.c => thin_log.c} | 34 +++++++++++++------------- 5 files changed, 54 insertions(+), 56 deletions(-) create mode 100644 include/thin_log.h delete mode 100644 include/thinprovd_log.h rename thin/{thinprovd_log.c => thin_log.c} (55%) diff --git a/include/Makefile.am b/include/Makefile.am index 9f03d9aa..dfbf732f 100644 --- a/include/Makefile.am +++ b/include/Makefile.am @@ -20,7 +20,7 @@ blktap_HEADERS += debug.h blktap_HEADERS += util.h blktap_HEADERS += payload.h blktap_HEADERS += kpr_util.h -blktap_HEADERS += thinprovd_log.h +blktap_HEADERS += thin_log.h noinst_HEADERS = blktap.h noinst_HEADERS += compiler.h diff --git a/include/thin_log.h b/include/thin_log.h new file mode 100644 index 00000000..361303a5 --- /dev/null +++ b/include/thin_log.h @@ -0,0 +1,36 @@ +#ifndef THIN_LOG +#define THIN_LOG + +/* + * This header file provides a logging API for thin provisioning. + * + * Things to note: + * + * 1) Messages over 1024 characters will be truncated by syslogd. + * + * 2) New line characters, '\n', DO NOT break lines; the octal ASCII + * code of the char is printed instead. For every new line you want, + * you have to call a variant of 'thin_log_X()' + */ + +enum THIN_LOG_LEVEL +{ + THIN_LOG_ERR, + THIN_LOG_INFO, + THIN_LOG_DBG +}; + +void thin_openlog (char *logname); +void thin_closelog(void); + +/* + * Use this function to specify the LOWEST logging level. + * Everything from this level and above will be logged. + */ +void thin_log_upto(enum THIN_LOG_LEVEL tpd_log_level); + +void thin_log_err (const char *message, ...); +void thin_log_info(const char *message, ...); +void thin_log_dbg (const char *message, ...); + +#endif /* THIN_LOG */ diff --git a/include/thinprovd_log.h b/include/thinprovd_log.h deleted file mode 100644 index 668e05e1..00000000 --- a/include/thinprovd_log.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef THIN_THINPROVD_LOG -#define THIN_THINPROVD_LOG - -/* - * This header file provides a logging API for thinprovd. - * - * Things to note: - * - * 1) Messages over 1024 characters will be truncated by syslogd. - * - * 2) New line characters, '\n', DO NOT break lines; the octal ASCII - * code of the char is printed instead. For every new line you want, - * you have to call a variant of 'thinpd_log_X()' - */ - -enum THINPD_LOG_LEVEL -{ - THINPD_LOG_ERR, - THINPD_LOG_INFO, - THINPD_LOG_DBG -}; - -void thinpd_openlog (void); -void thinpd_closelog(void); - -/* - * Use this function to specify the LOWEST logging level. - * Everything from this level and above will be logged. - */ -void thinpd_log_upto(enum THINPD_LOG_LEVEL tpd_log_level); - -void thinpd_log_err (const char *message, ...); -void thinpd_log_info(const char *message, ...); -void thinpd_log_dbg (const char *message, ...); - -#endif /* THIN_THINPROVD_LOG */ diff --git a/thin/Makefile.am b/thin/Makefile.am index 3a8caf4e..ca646d53 100644 --- a/thin/Makefile.am +++ b/thin/Makefile.am @@ -19,7 +19,7 @@ lib_LTLIBRARIES = libtapdiskthin.la libtapdiskthin_la_SOURCES = payload.c libtapdiskthin_la_SOURCES += thin_client.c libtapdiskthin_la_SOURCES += kpr_util.c -libtapdiskthin_la_SOURCES += thinprovd_log.c +libtapdiskthin_la_SOURCES += thin_log.c libtapdiskthin_la_LDFLAGS = -version-info 1:1:1 diff --git a/thin/thinprovd_log.c b/thin/thin_log.c similarity index 55% rename from thin/thinprovd_log.c rename to thin/thin_log.c index ac5ee18c..949f3cb5 100644 --- a/thin/thinprovd_log.c +++ b/thin/thin_log.c @@ -1,26 +1,24 @@ /* This define is needed for vsyslog(). */ #define _BSD_SOURCE -#include "thinprovd_log.h" +#include "thin_log.h" #include #include -#define THINPROVD "THINPROVD" - static int -thinpd_match_log_level(enum THINPD_LOG_LEVEL tpd_log_level) +thin_match_log_level(enum THIN_LOG_LEVEL tpd_log_level) { int log_level; switch (tpd_log_level) { - case THINPD_LOG_ERR: + case THIN_LOG_ERR: log_level = LOG_EMERG; break; - case THINPD_LOG_INFO: + case THIN_LOG_INFO: log_level = LOG_INFO; break; - case THINPD_LOG_DBG: + case THIN_LOG_DBG: log_level = LOG_DEBUG; break; default: @@ -31,58 +29,58 @@ thinpd_match_log_level(enum THINPD_LOG_LEVEL tpd_log_level) } inline void -thinpd_openlog(void) +thin_openlog(char *logname) { - openlog(THINPROVD, LOG_CONS | LOG_PID, LOG_DAEMON); + openlog(logname, LOG_CONS | LOG_PID, LOG_DAEMON); } inline void -thinpd_closelog(void) +thin_closelog(void) { closelog(); } inline void -thinpd_log_upto(enum THINPD_LOG_LEVEL tpd_log_level) +thin_log_upto(enum THIN_LOG_LEVEL tpd_log_level) { - setlogmask(LOG_UPTO(thinpd_match_log_level(tpd_log_level))); + setlogmask(LOG_UPTO(thin_match_log_level(tpd_log_level))); } void -thinpd_log_err(const char *message, ...) +thin_log_err(const char *message, ...) { va_list args; va_start(args, message); - /* THINPD_LOG_ERR == LOG_EMERG */ + /* THIN_LOG_ERR == LOG_EMERG */ vsyslog(LOG_EMERG, message, args); va_end(args); } void -thinpd_log_info(const char *message, ...) +thin_log_info(const char *message, ...) { va_list args; va_start(args, message); - /* THINPD_LOG_INFO == LOG_INFO */ + /* THIN_LOG_INFO == LOG_INFO */ vsyslog(LOG_INFO, message, args); va_end(args); } void -thinpd_log_dbg(const char *message, ...) +thin_log_dbg(const char *message, ...) { va_list args; va_start(args, message); - /* THINPD_LOG_DBG == LOG_DEBUG */ + /* THIN_LOG_DBG == LOG_DEBUG */ vsyslog(LOG_DEBUG, message, args); va_end(args); From 9857f95d00c9ed51c74c1dadeee876061a7a7372 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 35/43] CP-12851: Move all thinprovd log to use thin_log facility We need this otherwise we do not get any log when thinprovd is started as a daemon. Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- thin/payload.c | 14 ++++----- thin/thinprovd.c | 80 +++++++++++++++++++++++++----------------------- 2 files changed, 49 insertions(+), 45 deletions(-) diff --git a/thin/payload.c b/thin/payload.c index a52bbee7..e7f9287c 100644 --- a/thin/payload.c +++ b/thin/payload.c @@ -1,6 +1,6 @@ -#include #include #include "payload.h" +#include "thin_log.h" int init_payload(struct payload *pload) { @@ -10,11 +10,11 @@ int init_payload(struct payload *pload) void print_payload(struct payload *pload) { - printf("payload data:\n"); - printf("type = %d\n", pload->type); - printf("path = %s\n", pload->path); - printf("requested size = %"PRIu64"\n", pload->req_size); - printf("cb_type = %d\n", pload->cb_type); - printf("err_code = %d\n", pload->err_code); + thin_log_info("payload data:\n"); + thin_log_info("type = %d\n", pload->type); + thin_log_info("path = %s\n", pload->path); + thin_log_info("requested size = %"PRIu64"\n", pload->req_size); + thin_log_info("cb_type = %d\n", pload->cb_type); + thin_log_info("err_code = %d\n", pload->err_code); return; } diff --git a/thin/thinprovd.c b/thin/thinprovd.c index ed45d3cd..e85a42e7 100644 --- a/thin/thinprovd.c +++ b/thin/thinprovd.c @@ -18,6 +18,7 @@ #include #include "blktap.h" #include "payload.h" +#include "thin_log.h" #include "kpr_util.h" #define BACKLOG 5 @@ -195,8 +196,8 @@ signal_set(int signo, void (*func) (int)) r = sigaction(signo, &new_act, &old_act); if (r < 0) - fprintf(stderr, "Signal %d: handler registration failed", - signo); + thin_log_err("Signal %d: handler registration failed", + signo); return r; } @@ -215,10 +216,10 @@ add_previously_added_vgs(void) */ while ((ent = readdir(dir)) != NULL) { if (ent->d_name[0] != '.') { - fprintf(stderr, "adding VG %s\n", ent->d_name); + thin_log_info("adding VG %s\n", ent->d_name); ret = add_vg(ent->d_name); if (ret != 0) { - fprintf(stderr, "failed to add VG %s\n", + thin_log_info("failed to add VG %s\n", ent->d_name); } } @@ -226,7 +227,7 @@ add_previously_added_vgs(void) closedir(dir); } else { /* could not open directory */ - fprintf(stderr, "could not open %s\n", THINPROVD_DIR); + thin_log_err("could not open %s\n", THINPROVD_DIR); return errno; } return 0; @@ -245,6 +246,9 @@ main(int argc, char *argv[]) { struct payload buf; mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; + thin_openlog("THINPROVD"); + thin_log_upto(THIN_LOG_INFO); + /* Init pool */ LIST_INIT(&vg_pool.head); if (pthread_mutex_init(&vg_pool.mtx, NULL) != 0) @@ -275,15 +279,15 @@ main(int argc, char *argv[]) { * going to do an other "add" to the newly * started thinprovd. */ - fprintf(stderr, "adding previously added vgs\n"); + thin_log_info("adding previously added vgs\n"); ret = add_previously_added_vgs(); if (ret != 0) { - fprintf(stderr, + thin_log_info( "failed to add previously added vgs\n"); } } else { - fprintf(stderr, "failed to create %s errno=%d\n", - THINPROVD_DIR, errno); + thin_log_err("failed to create %s errno=%d\n", + THINPROVD_DIR, errno); return errno; } } @@ -297,7 +301,7 @@ main(int argc, char *argv[]) { strncpy(sv_addr.sun_path, THIN_CONTROL_SOCKET, sizeof(sv_addr.sun_path) - 1); if (bind(sfd, (struct sockaddr *) &sv_addr, sizeof(struct sockaddr_un)) == -1) { - perror("bind failed"); + thin_log_err("bind failed, %s", strerror(errno)); return -errno; } @@ -312,8 +316,8 @@ main(int argc, char *argv[]) { for(;;) { poll_ret = poll(fds, maxfds, -1); /* wait for ever */ if ( poll_ret < 1 ) { /* 0 not expected */ - fprintf(stderr, "poll returned %d, %s\n", - poll_ret, strerror(errno)); + thin_log_info("poll returned %d, %s\n", + poll_ret, strerror(errno)); continue; } @@ -328,8 +332,8 @@ main(int argc, char *argv[]) { ret = recvfrom(sfd, &buf, sizeof(buf), 0, &cl_addr, &len); if (ret != sizeof(buf)) { - fprintf(stderr, "recvfrom returned %ld, %s\n", - (long)ret, strerror(errno)); + thin_log_info("recvfrom returned %ld, %s\n", + (long)ret, strerror(errno)); continue; } /* Packet of expected len arrived, process it*/ @@ -339,8 +343,8 @@ main(int argc, char *argv[]) { ret = sendto(sfd, &buf, ret, 0, &cl_addr, len); if(ret != sizeof(buf)) { - fprintf(stderr, "sendto returned %ld, %s\n", - (long)ret, strerror(errno)); + thin_log_info("sendto returned %ld, %s\n", + (long)ret, strerror(errno)); } } } @@ -363,16 +367,16 @@ process_out_queue(void) case PAYLOAD_CB_NONE: /* Just free the req, no async response * was needed */ - fprintf(stderr, "Processed CB_NONE req\n"); + thin_log_info("Processed CB_NONE req\n"); break; case PAYLOAD_CB_SOCK: /* FIXME: * We do not expect somebody to use this * for now */ - fprintf(stderr, "CB_SOCK not implemented yet\n"); + thin_log_err("CB_SOCK not implemented yet\n"); break; default: - fprintf(stderr, "cb_type unknown\n"); + thin_log_err("cb_type unknown\n"); } free(req); } @@ -386,7 +390,7 @@ process_payload(struct payload * buf) print_payload(buf); err = req_reply(buf); print_payload(buf); - printf("EOM\n\n"); + thin_log_info("EOM\n\n"); return err; } @@ -523,8 +527,8 @@ worker_thread(void * ap) /* wait for ever */ poll_ret = poll(fds, maxfds, -1); if ( poll_ret < 1 ) { /* 0 not expected */ - fprintf(stderr, "poll returned %d, %s\n", - poll_ret, strerror(errno)); + thin_log_info("poll returned %d, %s\n", + poll_ret, strerror(errno)); continue; } if (fds[0].revents) @@ -540,7 +544,7 @@ worker_thread(void * ap) */ if (data->type == PAYLOAD_UNDEF) { free(req); - fprintf(stderr, "Thread cancellation received\n"); + thin_log_info("Thread cancellation received\n"); return NULL; } @@ -564,7 +568,7 @@ slave_worker_hook(struct payload *data) data->err_code = THIN_ERR_CODE_SUCCESS; else data->err_code = THIN_ERR_CODE_FAILURE; - printf("worker_thread: completed %s (%d)\n\n", + thin_log_info("worker_thread: completed %s (%d)\n\n", data->path, ret); /* FIXME: * Probably we do not need to call refresh_lvm, leaving the @@ -699,18 +703,18 @@ add_vg(char *vg) { struct vg_entry *p_vg; - printf("CLI: add_vg for %s\n",vg); + thin_log_info("CLI: add_vg for %s\n",vg); /* check we already have it */ if(vg_pool_find(vg, true)) { - printf("%s already added\n", vg); + thin_log_info("%s already added\n", vg); return 0; } /* allocate and init vg_entry */ p_vg = malloc(sizeof(*p_vg)); if (!p_vg) { - fprintf(stderr, "Failed to allocate vg_entry struct\n"); + thin_log_err("Failed to allocate vg_entry struct\n"); return 1; } @@ -721,8 +725,8 @@ add_vg(char *vg) /* VG and thread specific thread allocated */ p_vg->r_queue = alloc_init_queue(); if(!p_vg->r_queue) { - fprintf(stderr, "Failed worker queue creation for %s\n", - p_vg->name); + thin_log_err("Failed worker queue creation for %s\n", + p_vg->name); goto out; } @@ -731,15 +735,15 @@ add_vg(char *vg) p_vg->thr.hook = slave_worker_hook; p_vg->thr.net_hook = NULL; if (pthread_create(&p_vg->thr.thr_id, NULL, worker_thread, &p_vg->thr)) { - fprintf(stderr, "Failed worker thread creation for %s\n", - p_vg->name); + thin_log_err("Failed worker thread creation for %s\n", + p_vg->name); goto out2; } /* Everything ok. Add vg to pool */ LIST_INSERT_HEAD(&vg_pool.head, p_vg, entries); - printf("Successfully registered VG %s\n", p_vg->name); + thin_log_info("Successfully registered VG %s\n", p_vg->name); return 0; out2: free(p_vg->r_queue); @@ -756,14 +760,14 @@ del_vg(char *vg) struct sq_entry *req; int ret; - printf("CLI: del_vg\n"); + thin_log_info("CLI: del_vg\n"); /* Once removed from the pool no new requests can be served any more */ p_vg = vg_pool_find_and_remove(vg); if(!p_vg) { - fprintf(stderr, "Nothing removed\n"); + thin_log_info("Nothing removed\n"); return 0; } @@ -777,8 +781,8 @@ del_vg(char *vg) We are returning with a runnig thread, not able to receive new requests and 2 memory leaks.. */ - fprintf(stderr, "Error with malloc!! Thread still running\n" - "and memory leaked\n"); + thin_log_err("Error with malloc!! Thread still running\n" + "and memory leaked\n"); return 1; } init_payload(&req->data); @@ -789,14 +793,14 @@ del_vg(char *vg) /* Wait for thread to complete */ ret = pthread_join(p_vg->thr.thr_id, NULL); if (ret != 0) - fprintf(stderr, "Problem joining thread..FIXME\n"); + thin_log_err("Problem joining thread..FIXME\n"); /* * Thread is dead, let's free resources */ /* By design the queue must be empty but we check */ if (!SIMPLEQ_EMPTY(&p_vg->r_queue->qhead)) - fprintf(stderr, "queue not empty, memory leak! FIXME\n"); + thin_log_err("queue not empty, memory leak! FIXME\n"); free_queue(p_vg->r_queue); free(p_vg); From 7ba9d528dc461dd72ba1a82ae4d764bf7af87288 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 36/43] CA-172827: Fix resize-demo improper arguments to logger Thinprovd has been modified to remove '-L' from the size parameter for resize-demo, which means the wrapper script needed to be changed as well. As part of this change we have agreed to move resize-demo and refresh-demo scripts from the xenvm repo to the blktap repo with new names: xlvhd-refresh xlvhd-resize These two new scripts will be installed in /usr/sbin/ while resize-demo and refresh-demo were installed in /opt/xensource/sm/ Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- mk/blktap.spec.in | 2 ++ thin/Makefile.am | 3 +++ thin/thinprovd.c | 6 +++--- thin/xlvhd-refresh | 2 ++ thin/xlvhd-resize | 3 +++ 5 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 thin/xlvhd-refresh create mode 100644 thin/xlvhd-resize diff --git a/mk/blktap.spec.in b/mk/blktap.spec.in index c221eb2f..7766e56d 100644 --- a/mk/blktap.spec.in +++ b/mk/blktap.spec.in @@ -63,6 +63,8 @@ mkdir -p %{buildroot}%{_localstatedir}/log/blktap %{_sbindir}/vhdpartx %{_sbindir}/thinprovd %{_sbindir}/thin-cli +%{_sbindir}/xlvhd-resize +%{_sbindir}/xlvhd-refresh %{_libexecdir}/tapdisk %{_sysconfdir}/udev/rules.d/blktap.rules %{_sysconfdir}/rc.d/init.d/tapback diff --git a/thin/Makefile.am b/thin/Makefile.am index ca646d53..2442d30c 100644 --- a/thin/Makefile.am +++ b/thin/Makefile.am @@ -14,6 +14,9 @@ sbin_PROGRAMS += thin-cli thin_cli_SOURCES = thin_cli.c thin_cli_LDADD = libtapdiskthin.la +dist_sbin_SCRIPTS = xlvhd-resize +dist_sbin_SCRIPTS += xlvhd-refresh + lib_LTLIBRARIES = libtapdiskthin.la libtapdiskthin_la_SOURCES = payload.c diff --git a/thin/thinprovd.c b/thin/thinprovd.c index e85a42e7..b1600eac 100644 --- a/thin/thinprovd.c +++ b/thin/thinprovd.c @@ -595,7 +595,7 @@ increase_size(off64_t size, const char * path) char ssize[NCHARS]; /* enough for G bytes */ /* prepare size for command line */ - num_read = snprintf(ssize, NCHARS, "-L""%"PRIu64"b", size); + num_read = snprintf(ssize, NCHARS, "%"PRIu64"b", size); if (num_read >= NCHARS) return -1; /* size too big */ @@ -603,7 +603,7 @@ increase_size(off64_t size, const char * path) case -1: return -1; case 0: /* child */ - execl("/opt/xensource/sm/resize-demo", "resize-demo", ssize, + execl("/usr/sbin/xlvhd-resize", "xlvhd-resize", ssize, path, (char *)NULL); _exit(127); /* TBD */ default: /* parent */ @@ -632,7 +632,7 @@ refresh_lvm(const char * path) case -1: return -1; case 0: /* child */ - execl("/opt/xensource/sm/refresh-demo", "refresh-demo", path, + execl("/usr/sbin/xlvhd-refresh", "xlvhd-refresh", path, (char *)NULL); _exit(127); /* TBD */ default: /* parent */ diff --git a/thin/xlvhd-refresh b/thin/xlvhd-refresh new file mode 100644 index 00000000..0d4124cb --- /dev/null +++ b/thin/xlvhd-refresh @@ -0,0 +1,2 @@ +#!/bin/bash +true \ No newline at end of file diff --git a/thin/xlvhd-resize b/thin/xlvhd-resize new file mode 100644 index 00000000..ab5811e5 --- /dev/null +++ b/thin/xlvhd-resize @@ -0,0 +1,3 @@ +#!/bin/bash +logger -t xlvhd-resize ${1} ${2} +xenvm lvextend -L ${1} ${2} --live From cacff799fa8928cf1201a9996f1a1279cd898001 Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 37/43] lvm_create_cmd() reads the allocation type from a file Every SR has their allocation type written in '/var/run/nonpersistent/sr_alloc_'. blktap reads it from there and defaults to 'thick' if it doesn't exist. Signed-off-by: Kostas Ladopoulos Acked-by: Germano Percossi --- lvm/lvm-util.c | 95 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 22 deletions(-) diff --git a/lvm/lvm-util.c b/lvm/lvm-util.c index 5e3d71e0..8ff53828 100644 --- a/lvm/lvm-util.c +++ b/lvm/lvm-util.c @@ -97,28 +97,79 @@ lvm_parse_pv(struct vg *vg, const char *name, int pvs, uint64_t start) } static int -lvm_create_cmd(char *out, const char *command) +lvm_create_cmd(char *out, const char *command, const char *vgname) { - char *sr_alloc; - int thin_flag; - - if (!(sr_alloc = getenv("SR_ALLOC"))) { - return -EINVAL; + char path[96] = "/var/run/nonpersistent/sr_alloc_"; + char vgs_opts[] = "vg_name,vg_extent_size,lv_count," + "pv_count,pv_name,pe_start"; + char lvs_opts[] = "lv_name,lv_size,segtype,seg_count," + "seg_start,seg_size,devices"; + char sr_alloc[8]; + char *c; + FILE *f = 0; + int dnmc_flag; + int err = 0; + + if (strcmp(command, "vgs") && strcmp(command, "lvs")) { + err = EINVAL; + goto exit; } - if (!strcmp(sr_alloc, "thin")) { - thin_flag = 1; - } else if (!strcmp(sr_alloc, "thick")) { - thin_flag = 0; - } else { - return -EINVAL; + /* 'vgname' is a string that looks like: + * 'VG_XenStorage-85fcc87c-0167-acf1-da64-ed982543add8'. + * '+14' is used to strip the 'VG_XenStorage-' part. + */ + strcat(path, vgname + 14); + + if ((f = fopen(path, "r"))) { + if (!fgets(sr_alloc, 8, f)) { + if ((err = ferror(f))) { + } else if ((err = feof(f))) { + } else { /* Unknown error */ + err = 666; + } + + goto exit; + } + + if ((c = strchr(sr_alloc, '\n'))) { + *c = '\0'; + } + + /* "dnmc": dynamic + * "thck": thick + */ + if (!strcmp(sr_alloc, "dnmc")) { + dnmc_flag = 1; + } else if (!strcmp(sr_alloc, "thck")) { + dnmc_flag = 0; + } else { + err = EINVAL; + goto exit; + } + + } else { /* Fallback to original LVM commands. */ + dnmc_flag = 0; } - strcpy(out, thin_flag ? "/bin/xenvm " : ""); + /* Construct the lvm command. */ + strcpy(out, dnmc_flag ? "/bin/xenvm " : ""); + strcat(out, command); + strcat(out, " "); + strcat(out, vgname); + strcat(out, " --noheadings --nosuffix --units=b --options="); + + strcat(out, strcmp(command, "vgs") ? lvs_opts : vgs_opts); + strcat(out, " 2> /dev/null"); - return 0; +exit: + if (f) { + fclose(f); + f = 0; + } + return -err; } static int @@ -130,15 +181,13 @@ lvm_open_vg(const char *vgname, struct vg *vg) uint64_t size, pv_start; char buf[MAX_NAME_SIZE + 256]; - if ((err = lvm_create_cmd(buf, "vgs %s --noheadings --nosuffix --units=b " - "--options=vg_name,vg_extent_size,lv_count,pv_count," - "pv_name,pe_start"))) { + if ((err = lvm_create_cmd(buf, "vgs", vgname))) { return err; } memset(vg, 0, sizeof(*vg)); - err = asprintf(&cmd, buf, vgname); + err = asprintf(&cmd, buf); if (err == -1) return -ENOMEM; @@ -243,13 +292,11 @@ lvm_scan_lvs(struct vg *vg) int i, err; char buf[MAX_NAME_SIZE + 256]; - if ((err = lvm_create_cmd(buf, "lvs %s --noheadings --nosuffix --units=b " - "--options=lv_name,lv_size,segtype,seg_count,seg_start," - "seg_size,devices"))) { + if ((err = lvm_create_cmd(buf, "lvs", vg->name))) { return err; } - err = asprintf(&cmd, buf, vg->name); + err = asprintf(&cmd, buf); if (err == -1) return -ENOMEM; @@ -343,12 +390,16 @@ lvm_scan_vg(const char *vg_name, struct vg *vg) if (err) return err; + fprintf(stderr, "lvm_open_vg success"); + err = lvm_scan_lvs(vg); if (err) { lvm_free_vg(vg); return err; } + fprintf(stderr, "lvm_scan_lvs success"); + return 0; } From fb23f6e7c008d4db576a7a6ae651883d7d5418b3 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 38/43] CP-12660: Retry send/recv if interrupted by a signal Thinprovd and tapdisk need to retry if a send/recv or read/write returns -1 and errno == EINTR Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- thin/thin_client.c | 13 +++++++++++-- thin/thinprovd.c | 15 ++++++++++----- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/thin/thin_client.c b/thin/thin_client.c index 41903cdd..d17f7a6f 100644 --- a/thin/thin_client.c +++ b/thin/thin_client.c @@ -20,17 +20,26 @@ int thin_sync_send_and_receive(struct thin_conn_handle *ch, struct payload *message) { size_t len = sizeof(struct payload); + int ret; if (ch == NULL) { return -1; } /* Send messages to server */ - if (write(ch->sfd, message, len) != len) +write: + ret = write(ch->sfd, message, len); + if (ret == -1 && errno == EINTR) + goto write; + if (ret != len) return -errno; /* Wait for ACK packet */ - if (read(ch->sfd, message, len) != len) +read: + ret = read(ch->sfd, message, len); + if (ret == -1 && errno == EINTR) + goto read; + if (ret != len) return -errno; return 0; diff --git a/thin/thinprovd.c b/thin/thinprovd.c index b1600eac..af94f6be 100644 --- a/thin/thinprovd.c +++ b/thin/thinprovd.c @@ -327,24 +327,29 @@ main(int argc, char *argv[]) { } if (fds[1].revents) { +recv: len = sizeof(struct sockaddr_un); /* read from the control socket */ ret = recvfrom(sfd, &buf, sizeof(buf), 0, &cl_addr, &len); + if (ret == -1 && errno == EINTR) + goto recv; if (ret != sizeof(buf)) { - thin_log_info("recvfrom returned %ld, %s\n", - (long)ret, strerror(errno)); + thin_log_err("recvfrom returned %ld, %s\n", + (long)ret, strerror(errno)); continue; } /* Packet of expected len arrived, process it*/ process_payload(&buf); /* Send the acknowledge packet */ +send: ret = sendto(sfd, &buf, ret, 0, &cl_addr, len); + if (ret == -1 && errno == EINTR) + goto send; if(ret != sizeof(buf)) { - - thin_log_info("sendto returned %ld, %s\n", - (long)ret, strerror(errno)); + thin_log_err("sendto returned %ld, %s\n", + (long)ret, strerror(errno)); } } } From 8a17e9e6ad3fb932fc6f4dff15d952ca7e283daa Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 39/43] CA-174097: Request must be completed only if in progress There was a corner case triggered when thinprovd was killed where thin_check_warn_2 was declaring a request completed even if the request was not pending at all (in fact req_bytes was 0). The request is now completed only if it was pending already (req_bytes != 0) Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- drivers/block-vhd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/block-vhd.c b/drivers/block-vhd.c index 652140ea..bc715e1c 100644 --- a/drivers/block-vhd.c +++ b/drivers/block-vhd.c @@ -1423,7 +1423,8 @@ thin_check_warn_2(struct vhd_state *s, int64_t available_bytes) while (count--) { phy_bytes = lseek64(s->vhd.fd, 0, SEEK_END); EPRINTF("thin_check_warn_2 phy_bytes = %ld", phy_bytes); - if (s->req_bytes <= phy_bytes) { + if (s->req_bytes != 0 && + s->req_bytes <= phy_bytes) { /* Request is completed */ EPRINTF("Request has been completed"); s->eof_bytes = phy_bytes; From c78798f39716ea2fc8db527e14a6c1ebe573c78e Mon Sep 17 00:00:00 2001 From: Kostas Ladopoulos Date: Thu, 24 Sep 2015 11:44:57 +0100 Subject: [PATCH 40/43] CA-174119: Infer SR alloc type from xenvm config file's existence If file '/etc/xenvm.d/VG_XenStorage-' exists, the SR's allocation is 'dynamic' and 'xenvm' is called. If not, the normal lvm commands are used. Signed-off-by: Kostas Ladopoulos Reviewed-by: Jorge Martin --- lvm/lvm-util.c | 59 ++++++++------------------------------------------ 1 file changed, 9 insertions(+), 50 deletions(-) diff --git a/lvm/lvm-util.c b/lvm/lvm-util.c index 8ff53828..2452d024 100644 --- a/lvm/lvm-util.c +++ b/lvm/lvm-util.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "lvm-util.h" @@ -99,15 +100,11 @@ lvm_parse_pv(struct vg *vg, const char *name, int pvs, uint64_t start) static int lvm_create_cmd(char *out, const char *command, const char *vgname) { - char path[96] = "/var/run/nonpersistent/sr_alloc_"; + char path[96] = "/etc/xenvm.d/"; char vgs_opts[] = "vg_name,vg_extent_size,lv_count," - "pv_count,pv_name,pe_start"; + "pv_count,pv_name,pe_start"; char lvs_opts[] = "lv_name,lv_size,segtype,seg_count," - "seg_start,seg_size,devices"; - char sr_alloc[8]; - char *c; - FILE *f = 0; - int dnmc_flag; + "seg_start,seg_size,devices"; int err = 0; if (strcmp(command, "vgs") && strcmp(command, "lvs")) { @@ -115,45 +112,15 @@ lvm_create_cmd(char *out, const char *command, const char *vgname) goto exit; } - /* 'vgname' is a string that looks like: + /** + * 'vgname' is a string in the form: * 'VG_XenStorage-85fcc87c-0167-acf1-da64-ed982543add8'. - * '+14' is used to strip the 'VG_XenStorage-' part. */ - strcat(path, vgname + 14); - - if ((f = fopen(path, "r"))) { - if (!fgets(sr_alloc, 8, f)) { - if ((err = ferror(f))) { - } else if ((err = feof(f))) { - } else { /* Unknown error */ - err = 666; - } - - goto exit; - } - - if ((c = strchr(sr_alloc, '\n'))) { - *c = '\0'; - } - - /* "dnmc": dynamic - * "thck": thick - */ - if (!strcmp(sr_alloc, "dnmc")) { - dnmc_flag = 1; - } else if (!strcmp(sr_alloc, "thck")) { - dnmc_flag = 0; - } else { - err = EINVAL; - goto exit; - } - - } else { /* Fallback to original LVM commands. */ - dnmc_flag = 0; - } + strcat(path, vgname); /* Construct the lvm command. */ - strcpy(out, dnmc_flag ? "/bin/xenvm " : ""); + /* This file exists only if the SR is of type 'dynamic'. */ + strcpy(out, access(path, F_OK) ? "" : "/bin/xenvm "); strcat(out, command); strcat(out, " "); @@ -165,10 +132,6 @@ lvm_create_cmd(char *out, const char *command, const char *vgname) strcat(out, " 2> /dev/null"); exit: - if (f) { - fclose(f); - f = 0; - } return -err; } @@ -390,16 +353,12 @@ lvm_scan_vg(const char *vg_name, struct vg *vg) if (err) return err; - fprintf(stderr, "lvm_open_vg success"); - err = lvm_scan_lvs(vg); if (err) { lvm_free_vg(vg); return err; } - fprintf(stderr, "lvm_scan_lvs success"); - return 0; } From 9199c90101ce83385b6ee9a99940b6994795c1b3 Mon Sep 17 00:00:00 2001 From: Stefano Panella Date: Wed, 2 Sep 2015 14:49:27 +0000 Subject: [PATCH 41/43] CA-181428: Tapdisk should remember xlvhd parameters on unpause In case of unpause (resume) operation the -T and -q parameters are not passed and the ones from open/create should be used. This is saving in the vbd structure the xlvhd_allocation_quantum for future use during unpause. Signed-off-by: Stefano Panella Reviewed-by: Germano Percossi --- drivers/tapdisk-control.c | 4 +++- drivers/tapdisk-vbd.c | 9 +++++++-- drivers/tapdisk-vbd.h | 3 ++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/tapdisk-control.c b/drivers/tapdisk-control.c index 36d7f220..70bcb67e 100644 --- a/drivers/tapdisk-control.c +++ b/drivers/tapdisk-control.c @@ -749,8 +749,10 @@ tapdisk_control_open_image(struct tapdisk_ctl_conn *conn, goto out; if (request->u.params.flags & TAPDISK_MESSAGE_FLAG_THIN) { + /* Save parameters in vbd for unpause */ + vbd->xlvhd_alloc_quantum = request->u.params.alloc_quantum; /* Set allocation Quantum only to the leaf */ - tapdisk_vbd_set_quantum(vbd, request->u.params.alloc_quantum); + tapdisk_vbd_set_quantum(vbd); } err = tapdisk_vbd_get_disk_info(vbd, &vbd->disk_info); diff --git a/drivers/tapdisk-vbd.c b/drivers/tapdisk-vbd.c index af5bcf7f..99574700 100644 --- a/drivers/tapdisk-vbd.c +++ b/drivers/tapdisk-vbd.c @@ -634,9 +634,10 @@ tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *name, td_flag_t flags, int prt_d } int -tapdisk_vbd_set_quantum(td_vbd_t *vbd, int alloc_quantum) +tapdisk_vbd_set_quantum(td_vbd_t *vbd) { - return td_set_quantum(tapdisk_vbd_first_image(vbd), alloc_quantum); + return td_set_quantum(tapdisk_vbd_first_image(vbd), + vbd->xlvhd_alloc_quantum); } void @@ -942,6 +943,10 @@ tapdisk_vbd_resume(td_vbd_t *vbd, const char *name) for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { err = tapdisk_vbd_open_vdi(vbd, name, vbd->flags | TD_OPEN_STRICT, -1); + if (vbd->flags & TD_OPEN_THIN) { + /* Set allocation Quantum only to the leaf */ + tapdisk_vbd_set_quantum(vbd); + } if (!err) break; diff --git a/drivers/tapdisk-vbd.h b/drivers/tapdisk-vbd.h index 89d7818b..e439fe90 100644 --- a/drivers/tapdisk-vbd.h +++ b/drivers/tapdisk-vbd.h @@ -147,6 +147,7 @@ struct td_vbd_handle { struct td_vbd_rrd rrd; stats_t vdi_stats; + int xlvhd_alloc_quantum; }; #define tapdisk_vbd_for_each_request(vreq, tmp, list) \ @@ -186,7 +187,7 @@ int tapdisk_vbd_close(td_vbd_t *); */ int tapdisk_vbd_open_vdi(td_vbd_t * vbd, const char *params, td_flag_t flags, int prt_devnum); -int tapdisk_vbd_set_quantum(td_vbd_t *vbd, int alloc_quantum); +int tapdisk_vbd_set_quantum(td_vbd_t *vbd); void tapdisk_vbd_close_vdi(td_vbd_t *); int tapdisk_vbd_attach(td_vbd_t *, const char *, int); From 162eb4fcf340f050750e6725f6241185c324111d Mon Sep 17 00:00:00 2001 From: Chandrika Srinivasan Date: Thu, 27 Aug 2015 13:20:09 +0000 Subject: [PATCH 42/43] CP-13881: Made xenvm config files non-persistent Moved xenvm config files from /etc/xenvm.d to /var/run/nonpersistent/xenvm.d Signed-off-by: Chandrika Srinivasan Acked-by: Germano Percossi --- lvm/lvm-util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lvm/lvm-util.c b/lvm/lvm-util.c index 2452d024..51c7cbb8 100644 --- a/lvm/lvm-util.c +++ b/lvm/lvm-util.c @@ -100,7 +100,7 @@ lvm_parse_pv(struct vg *vg, const char *name, int pvs, uint64_t start) static int lvm_create_cmd(char *out, const char *command, const char *vgname) { - char path[96] = "/etc/xenvm.d/"; + char path[96] = "/var/run/nonpersistent/xenvm.d/"; char vgs_opts[] = "vg_name,vg_extent_size,lv_count," "pv_count,pv_name,pe_start"; char lvs_opts[] = "lv_name,lv_size,segtype,seg_count," From 8ab7496313ee55b6fc7d3c16a1d2911a6b05a71c Mon Sep 17 00:00:00 2001 From: Germano Percossi Date: Thu, 24 Sep 2015 16:00:55 +0100 Subject: [PATCH 43/43] Pre-release 3.2.0 Signed-off-by: Germano Percossi --- VERSION | 2 +- WHATS_NEW | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 50e47c89..a4f52a5d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.1 \ No newline at end of file +3.2.0 \ No newline at end of file diff --git a/WHATS_NEW b/WHATS_NEW index a7e43063..9d0afad7 100644 --- a/WHATS_NEW +++ b/WHATS_NEW @@ -1,3 +1,7 @@ +Version 3.2.0 - 24th Sep 2015 +================================ +Added code to ask lvm blocks on demand (xlvhd thin-provisioning) + Version 3.1.1 - 24th Sep 2015 ================================ CA-148438: Expose mem_mode flag in statistics file