Message ID | 20210421220950.105017-1-vsementsov@virtuozzo.com |
---|---|
State | New |
Headers | show |
Series | block: simplify write-threshold and drop write notifiers | expand |
On 22/04/2021 00:09, Vladimir Sementsov-Ogievskiy wrote: > write-notifiers are used only for write-threshold. New code for such > purpose should create filters. > > Let's handle write-threshold simply in generic code and drop write > notifiers at all. > > Also move part of write-threshold API that is used only for testing to > the test. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> > --- > > I agree that this could be split into 2-3 parts and not combining > everything into one. But I'm tired now. I can send v2 if needed, so > consider it as RFC. Or take as is if you think it's not too much-in-one. Thank you for this patch. Since I am reworking on v2, if you want I can also integrate this patch with mines and send everything together once I am done. Emanuele > > I also suggest this as a prepartion (and partly substitution) for > "[PATCH v2 0/8] Block layer thread-safety, continued" > > include/block/block_int.h | 12 ----- > include/block/write-threshold.h | 24 --------- > block.c | 1 - > block/io.c | 21 +++++--- > block/write-threshold.c | 87 ++----------------------------- > tests/unit/test-write-threshold.c | 38 ++++++++++++++ > 6 files changed, 54 insertions(+), 129 deletions(-) > > diff --git a/include/block/block_int.h b/include/block/block_int.h > index 88e4111939..50af58af75 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -957,12 +957,8 @@ struct BlockDriverState { > */ > int64_t total_sectors; > > - /* Callback before write request is processed */ > - NotifierWithReturnList before_write_notifiers; > - > /* threshold limit for writes, in bytes. "High water mark". */ > uint64_t write_threshold_offset; > - NotifierWithReturn write_threshold_notifier; > > /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. > * Reading from the list can be done with either the BQL or the > @@ -1087,14 +1083,6 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, > bool bdrv_backing_overridden(BlockDriverState *bs); > > > -/** > - * bdrv_add_before_write_notifier: > - * > - * Register a callback that is invoked before write requests are processed but > - * after any throttling or waiting for overlapping requests. > - */ > -void bdrv_add_before_write_notifier(BlockDriverState *bs, > - NotifierWithReturn *notifier); > > /** > * bdrv_add_aio_context_notifier: > diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h > index c646f267a4..23e1bfc123 100644 > --- a/include/block/write-threshold.h > +++ b/include/block/write-threshold.h > @@ -35,28 +35,4 @@ void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes); > */ > uint64_t bdrv_write_threshold_get(const BlockDriverState *bs); > > -/* > - * bdrv_write_threshold_is_set > - * > - * Tell if a write threshold is set for a given BDS. > - */ > -bool bdrv_write_threshold_is_set(const BlockDriverState *bs); > - > -/* > - * bdrv_write_threshold_exceeded > - * > - * Return the extent of a write request that exceeded the threshold, > - * or zero if the request is below the threshold. > - * Return zero also if the threshold was not set. > - * > - * NOTE: here we assume the following holds for each request this code > - * deals with: > - * > - * assert((req->offset + req->bytes) <= UINT64_MAX) > - * > - * Please not there is *not* an actual C assert(). > - */ > -uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, > - const BdrvTrackedRequest *req); > - > #endif > diff --git a/block.c b/block.c > index c5b887cec1..001453105e 100644 > --- a/block.c > +++ b/block.c > @@ -381,7 +381,6 @@ BlockDriverState *bdrv_new(void) > for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { > QLIST_INIT(&bs->op_blockers[i]); > } > - notifier_with_return_list_init(&bs->before_write_notifiers); > qemu_co_mutex_init(&bs->reqs_lock); > qemu_mutex_init(&bs->dirty_bitmap_mutex); > bs->refcnt = 1; > diff --git a/block/io.c b/block/io.c > index ca2dca3007..e0aa775952 100644 > --- a/block/io.c > +++ b/block/io.c > @@ -36,6 +36,8 @@ > #include "qemu/main-loop.h" > #include "sysemu/replay.h" > > +#include "qapi/qapi-events-block-core.h" > + > /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ > #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) > > @@ -1974,6 +1976,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, > child->perm & BLK_PERM_RESIZE); > > switch (req->type) { > + uint64_t write_threshold; > + > case BDRV_TRACKED_WRITE: > case BDRV_TRACKED_DISCARD: > if (flags & BDRV_REQ_WRITE_UNCHANGED) { > @@ -1981,8 +1985,15 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, > } else { > assert(child->perm & BLK_PERM_WRITE); > } > - return notifier_with_return_list_notify(&bs->before_write_notifiers, > - req); > + write_threshold = qatomic_read(&bs->write_threshold_offset); > + if (write_threshold > 0 && offset + bytes > write_threshold) { > + qapi_event_send_block_write_threshold( > + bs->node_name, > + offset + bytes - write_threshold, > + write_threshold); > + qatomic_set(&bs->write_threshold_offset, 0); > + } > + return 0; > case BDRV_TRACKED_TRUNCATE: > assert(child->perm & BLK_PERM_RESIZE); > return 0; > @@ -3137,12 +3148,6 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) > return true; > } > > -void bdrv_add_before_write_notifier(BlockDriverState *bs, > - NotifierWithReturn *notifier) > -{ > - notifier_with_return_list_add(&bs->before_write_notifiers, notifier); > -} > - > void bdrv_io_plug(BlockDriverState *bs) > { > BdrvChild *child; > diff --git a/block/write-threshold.c b/block/write-threshold.c > index 85b78dc2a9..9bf4287c6e 100644 > --- a/block/write-threshold.c > +++ b/block/write-threshold.c > @@ -21,104 +21,23 @@ > > uint64_t bdrv_write_threshold_get(const BlockDriverState *bs) > { > - return bs->write_threshold_offset; > -} > - > -bool bdrv_write_threshold_is_set(const BlockDriverState *bs) > -{ > - return bs->write_threshold_offset > 0; > -} > - > -static void write_threshold_disable(BlockDriverState *bs) > -{ > - if (bdrv_write_threshold_is_set(bs)) { > - notifier_with_return_remove(&bs->write_threshold_notifier); > - bs->write_threshold_offset = 0; > - } > -} > - > -uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, > - const BdrvTrackedRequest *req) > -{ > - if (bdrv_write_threshold_is_set(bs)) { > - if (req->offset > bs->write_threshold_offset) { > - return (req->offset - bs->write_threshold_offset) + req->bytes; > - } > - if ((req->offset + req->bytes) > bs->write_threshold_offset) { > - return (req->offset + req->bytes) - bs->write_threshold_offset; > - } > - } > - return 0; > -} > - > -static int coroutine_fn before_write_notify(NotifierWithReturn *notifier, > - void *opaque) > -{ > - BdrvTrackedRequest *req = opaque; > - BlockDriverState *bs = req->bs; > - uint64_t amount = 0; > - > - amount = bdrv_write_threshold_exceeded(bs, req); > - if (amount > 0) { > - qapi_event_send_block_write_threshold( > - bs->node_name, > - amount, > - bs->write_threshold_offset); > - > - /* autodisable to avoid flooding the monitor */ > - write_threshold_disable(bs); > - } > - > - return 0; /* should always let other notifiers run */ > -} > - > -static void write_threshold_register_notifier(BlockDriverState *bs) > -{ > - bs->write_threshold_notifier.notify = before_write_notify; > - bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier); > -} > - > -static void write_threshold_update(BlockDriverState *bs, > - int64_t threshold_bytes) > -{ > - bs->write_threshold_offset = threshold_bytes; > + return qatomic_read(&bs->write_threshold_offset); > } > > void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes) > { > - if (bdrv_write_threshold_is_set(bs)) { > - if (threshold_bytes > 0) { > - write_threshold_update(bs, threshold_bytes); > - } else { > - write_threshold_disable(bs); > - } > - } else { > - if (threshold_bytes > 0) { > - /* avoid multiple registration */ > - write_threshold_register_notifier(bs); > - write_threshold_update(bs, threshold_bytes); > - } > - /* discard bogus disable request */ > - } > + qatomic_set(&bs->write_threshold_offset, threshold_bytes); > } > > void qmp_block_set_write_threshold(const char *node_name, > uint64_t threshold_bytes, > Error **errp) > { > - BlockDriverState *bs; > - AioContext *aio_context; > - > - bs = bdrv_find_node(node_name); > + BlockDriverState *bs = bdrv_find_node(node_name); > if (!bs) { > error_setg(errp, "Device '%s' not found", node_name); > return; > } > > - aio_context = bdrv_get_aio_context(bs); > - aio_context_acquire(aio_context); > - > bdrv_write_threshold_set(bs, threshold_bytes); > - > - aio_context_release(aio_context); > } > diff --git a/tests/unit/test-write-threshold.c b/tests/unit/test-write-threshold.c > index fc1c45a2eb..c2f4cd20d7 100644 > --- a/tests/unit/test-write-threshold.c > +++ b/tests/unit/test-write-threshold.c > @@ -12,6 +12,44 @@ > #include "block/write-threshold.h" > > > +/* > + * bdrv_write_threshold_is_set > + * > + * Tell if a write threshold is set for a given BDS. > + */ > +static bool bdrv_write_threshold_is_set(const BlockDriverState *bs) > +{ > + return bs->write_threshold_offset > 0; > +} > + > +/* > + * bdrv_write_threshold_exceeded > + * > + * Return the extent of a write request that exceeded the threshold, > + * or zero if the request is below the threshold. > + * Return zero also if the threshold was not set. > + * > + * NOTE: here we assume the following holds for each request this code > + * deals with: > + * > + * assert((req->offset + req->bytes) <= UINT64_MAX) > + * > + * Please not there is *not* an actual C assert(). > + */ > +static uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, > + const BdrvTrackedRequest *req) > +{ > + if (bdrv_write_threshold_is_set(bs)) { > + if (req->offset > bs->write_threshold_offset) { > + return (req->offset - bs->write_threshold_offset) + req->bytes; > + } > + if ((req->offset + req->bytes) > bs->write_threshold_offset) { > + return (req->offset + req->bytes) - bs->write_threshold_offset; > + } > + } > + return 0; > +} > + > static void test_threshold_not_set_on_init(void) > { > uint64_t res; >
22.04.2021 12:57, Emanuele Giuseppe Esposito wrote: > > > On 22/04/2021 00:09, Vladimir Sementsov-Ogievskiy wrote: >> write-notifiers are used only for write-threshold. New code for such >> purpose should create filters. >> >> Let's handle write-threshold simply in generic code and drop write >> notifiers at all. >> >> Also move part of write-threshold API that is used only for testing to >> the test. >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> >> --- >> >> I agree that this could be split into 2-3 parts and not combining >> everything into one. But I'm tired now. I can send v2 if needed, so >> consider it as RFC. Or take as is if you think it's not too much-in-one. > > Thank you for this patch. Since I am reworking on v2, if you want I can also integrate this patch with mines and send everything together once I am done. > > Emanuele I'd wait several days for comments. Maybe I'll have to resend v2 of this. Also, after this patch, is something of your patches 1-4 needed? Probably you may just resend your series not touching write-threshold, and just note in cover-letter that write-threshold is covered by "[PATCH] block: simplify write-threshold and drop write notifiers"
On 22.04.21 00:09, Vladimir Sementsov-Ogievskiy wrote: > write-notifiers are used only for write-threshold. New code for such > purpose should create filters. > > Let's handle write-threshold simply in generic code and drop write > notifiers at all. > > Also move part of write-threshold API that is used only for testing to > the test. > > Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> > --- > > I agree that this could be split into 2-3 parts and not combining > everything into one. But I'm tired now. Er... You should have put it off until the next day then? O:) It should be multiple patches. At least one to move the write threshold update to block/io.c, and then another to drop write notifiers. > I can send v2 if needed, so > consider it as RFC. Or take as is if you think it's not too much-in-one. > > I also suggest this as a prepartion (and partly substitution) for > "[PATCH v2 0/8] Block layer thread-safety, continued" > > include/block/block_int.h | 12 ----- > include/block/write-threshold.h | 24 --------- > block.c | 1 - > block/io.c | 21 +++++--- > block/write-threshold.c | 87 ++----------------------------- > tests/unit/test-write-threshold.c | 38 ++++++++++++++ > 6 files changed, 54 insertions(+), 129 deletions(-) [...] > diff --git a/block/io.c b/block/io.c > index ca2dca3007..e0aa775952 100644 > --- a/block/io.c > +++ b/block/io.c > @@ -36,6 +36,8 @@ > #include "qemu/main-loop.h" > #include "sysemu/replay.h" > > +#include "qapi/qapi-events-block-core.h" > + > /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ > #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) > > @@ -1974,6 +1976,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, > child->perm & BLK_PERM_RESIZE); > > switch (req->type) { > + uint64_t write_threshold; > + Works, but I think this is the first time I see a variable declared in a switch block. What I usually do for such cases is to put a block after the label. (i.e. case X: { uint64_t write_threshold; ... }) But it wouldn’t hurt to just declare this at the beginning of bdrv_co_write_req_prepare(), I think. > case BDRV_TRACKED_WRITE: > case BDRV_TRACKED_DISCARD: > if (flags & BDRV_REQ_WRITE_UNCHANGED) { > @@ -1981,8 +1985,15 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, > } else { > assert(child->perm & BLK_PERM_WRITE); > } > - return notifier_with_return_list_notify(&bs->before_write_notifiers, > - req); > + write_threshold = qatomic_read(&bs->write_threshold_offset); > + if (write_threshold > 0 && offset + bytes > write_threshold) { > + qapi_event_send_block_write_threshold( > + bs->node_name, > + offset + bytes - write_threshold, > + write_threshold); > + qatomic_set(&bs->write_threshold_offset, 0); > + } I’d put all of this into a function in block/write-threshold.c that’s called from here. Max > + return 0; > case BDRV_TRACKED_TRUNCATE: > assert(child->perm & BLK_PERM_RESIZE); > return 0; > @@ -3137,12 +3148,6 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) > return true; > } > > -void bdrv_add_before_write_notifier(BlockDriverState *bs, > - NotifierWithReturn *notifier) > -{ > - notifier_with_return_list_add(&bs->before_write_notifiers, notifier); > -} > - > void bdrv_io_plug(BlockDriverState *bs) > { > BdrvChild *child;
30.04.2021 13:04, Max Reitz wrote: > On 22.04.21 00:09, Vladimir Sementsov-Ogievskiy wrote: >> write-notifiers are used only for write-threshold. New code for such >> purpose should create filters. >> >> Let's handle write-threshold simply in generic code and drop write >> notifiers at all. >> >> Also move part of write-threshold API that is used only for testing to >> the test. >> >> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> >> --- >> >> I agree that this could be split into 2-3 parts and not combining >> everything into one. But I'm tired now. > > Er... You should have put it off until the next day then? O:) Yes, sorry. I wanted to send that day... Don't remember why :) Checked now, that was not Friday.. I wanted to drop write notifiers long ago, and when I finally do it I couldn't wait, so cool it seemed to me :) Thanks for comments, I'll send v2 soon. > > It should be multiple patches. At least one to move the write threshold update to block/io.c, and then another to drop write notifiers. > >> I can send v2 if needed, so >> consider it as RFC. Or take as is if you think it's not too much-in-one. >> >> I also suggest this as a prepartion (and partly substitution) for >> "[PATCH v2 0/8] Block layer thread-safety, continued" >> >> include/block/block_int.h | 12 ----- >> include/block/write-threshold.h | 24 --------- >> block.c | 1 - >> block/io.c | 21 +++++--- >> block/write-threshold.c | 87 ++----------------------------- >> tests/unit/test-write-threshold.c | 38 ++++++++++++++ >> 6 files changed, 54 insertions(+), 129 deletions(-) > > [...] > >> diff --git a/block/io.c b/block/io.c >> index ca2dca3007..e0aa775952 100644 >> --- a/block/io.c >> +++ b/block/io.c >> @@ -36,6 +36,8 @@ >> #include "qemu/main-loop.h" >> #include "sysemu/replay.h" >> +#include "qapi/qapi-events-block-core.h" >> + >> /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ >> #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) >> @@ -1974,6 +1976,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, >> child->perm & BLK_PERM_RESIZE); >> switch (req->type) { >> + uint64_t write_threshold; >> + > > Works, but I think this is the first time I see a variable declared in a switch block. What I usually do for such cases is to put a block after the label. (i.e. case X: { uint64_t write_threshold; ... }) > > But it wouldn’t hurt to just declare this at the beginning of bdrv_co_write_req_prepare(), I think. > >> case BDRV_TRACKED_WRITE: >> case BDRV_TRACKED_DISCARD: >> if (flags & BDRV_REQ_WRITE_UNCHANGED) { >> @@ -1981,8 +1985,15 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, >> } else { >> assert(child->perm & BLK_PERM_WRITE); >> } >> - return notifier_with_return_list_notify(&bs->before_write_notifiers, >> - req); >> + write_threshold = qatomic_read(&bs->write_threshold_offset); >> + if (write_threshold > 0 && offset + bytes > write_threshold) { >> + qapi_event_send_block_write_threshold( >> + bs->node_name, >> + offset + bytes - write_threshold, >> + write_threshold); >> + qatomic_set(&bs->write_threshold_offset, 0); >> + } > > I’d put all of this into a function in block/write-threshold.c that’s called from here. > > Max > >> + return 0; >> case BDRV_TRACKED_TRUNCATE: >> assert(child->perm & BLK_PERM_RESIZE); >> return 0; >> @@ -3137,12 +3148,6 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) >> return true; >> } >> -void bdrv_add_before_write_notifier(BlockDriverState *bs, >> - NotifierWithReturn *notifier) >> -{ >> - notifier_with_return_list_add(&bs->before_write_notifiers, notifier); >> -} >> - >> void bdrv_io_plug(BlockDriverState *bs) >> { >> BdrvChild *child; >
On Thu, Apr 22, 2021 at 01:09:50AM +0300, Vladimir Sementsov-Ogievskiy wrote: > @@ -1981,8 +1985,15 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, > } else { > assert(child->perm & BLK_PERM_WRITE); > } > - return notifier_with_return_list_notify(&bs->before_write_notifiers, > - req); > + write_threshold = qatomic_read(&bs->write_threshold_offset); > + if (write_threshold > 0 && offset + bytes > write_threshold) { > + qapi_event_send_block_write_threshold( > + bs->node_name, > + offset + bytes - write_threshold, > + write_threshold); > + qatomic_set(&bs->write_threshold_offset, 0); It's safer to reset the threshold before emitting the event. That way there is no race with the QMP client setting a new threshold via bdrv_write_threshold_is_set(). I guess the race is possible since qatomic is used and there is no lock. I like the idea of dropping write notifiers: Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
05.05.2021 13:10, Stefan Hajnoczi wrote: > On Thu, Apr 22, 2021 at 01:09:50AM +0300, Vladimir Sementsov-Ogievskiy wrote: >> @@ -1981,8 +1985,15 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, >> } else { >> assert(child->perm & BLK_PERM_WRITE); >> } >> - return notifier_with_return_list_notify(&bs->before_write_notifiers, >> - req); >> + write_threshold = qatomic_read(&bs->write_threshold_offset); >> + if (write_threshold > 0 && offset + bytes > write_threshold) { >> + qapi_event_send_block_write_threshold( >> + bs->node_name, >> + offset + bytes - write_threshold, >> + write_threshold); >> + qatomic_set(&bs->write_threshold_offset, 0); > > It's safer to reset the threshold before emitting the event. That way > there is no race with the QMP client setting a new threshold via > bdrv_write_threshold_is_set(). I guess the race is possible since > qatomic is used and there is no lock. > > I like the idea of dropping write notifiers: > Acked-by: Stefan Hajnoczi <stefanha@redhat.com> > Sorry, this is an outdated patch, I've sent a v2: [PATCH v2 0/9] block: refactor write threshold
diff --git a/include/block/block_int.h b/include/block/block_int.h index 88e4111939..50af58af75 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -957,12 +957,8 @@ struct BlockDriverState { */ int64_t total_sectors; - /* Callback before write request is processed */ - NotifierWithReturnList before_write_notifiers; - /* threshold limit for writes, in bytes. "High water mark". */ uint64_t write_threshold_offset; - NotifierWithReturn write_threshold_notifier; /* Writing to the list requires the BQL _and_ the dirty_bitmap_mutex. * Reading from the list can be done with either the BQL or the @@ -1087,14 +1083,6 @@ void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, bool bdrv_backing_overridden(BlockDriverState *bs); -/** - * bdrv_add_before_write_notifier: - * - * Register a callback that is invoked before write requests are processed but - * after any throttling or waiting for overlapping requests. - */ -void bdrv_add_before_write_notifier(BlockDriverState *bs, - NotifierWithReturn *notifier); /** * bdrv_add_aio_context_notifier: diff --git a/include/block/write-threshold.h b/include/block/write-threshold.h index c646f267a4..23e1bfc123 100644 --- a/include/block/write-threshold.h +++ b/include/block/write-threshold.h @@ -35,28 +35,4 @@ void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes); */ uint64_t bdrv_write_threshold_get(const BlockDriverState *bs); -/* - * bdrv_write_threshold_is_set - * - * Tell if a write threshold is set for a given BDS. - */ -bool bdrv_write_threshold_is_set(const BlockDriverState *bs); - -/* - * bdrv_write_threshold_exceeded - * - * Return the extent of a write request that exceeded the threshold, - * or zero if the request is below the threshold. - * Return zero also if the threshold was not set. - * - * NOTE: here we assume the following holds for each request this code - * deals with: - * - * assert((req->offset + req->bytes) <= UINT64_MAX) - * - * Please not there is *not* an actual C assert(). - */ -uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, - const BdrvTrackedRequest *req); - #endif diff --git a/block.c b/block.c index c5b887cec1..001453105e 100644 --- a/block.c +++ b/block.c @@ -381,7 +381,6 @@ BlockDriverState *bdrv_new(void) for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) { QLIST_INIT(&bs->op_blockers[i]); } - notifier_with_return_list_init(&bs->before_write_notifiers); qemu_co_mutex_init(&bs->reqs_lock); qemu_mutex_init(&bs->dirty_bitmap_mutex); bs->refcnt = 1; diff --git a/block/io.c b/block/io.c index ca2dca3007..e0aa775952 100644 --- a/block/io.c +++ b/block/io.c @@ -36,6 +36,8 @@ #include "qemu/main-loop.h" #include "sysemu/replay.h" +#include "qapi/qapi-events-block-core.h" + /* Maximum bounce buffer for copy-on-read and write zeroes, in bytes */ #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) @@ -1974,6 +1976,8 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, child->perm & BLK_PERM_RESIZE); switch (req->type) { + uint64_t write_threshold; + case BDRV_TRACKED_WRITE: case BDRV_TRACKED_DISCARD: if (flags & BDRV_REQ_WRITE_UNCHANGED) { @@ -1981,8 +1985,15 @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes, } else { assert(child->perm & BLK_PERM_WRITE); } - return notifier_with_return_list_notify(&bs->before_write_notifiers, - req); + write_threshold = qatomic_read(&bs->write_threshold_offset); + if (write_threshold > 0 && offset + bytes > write_threshold) { + qapi_event_send_block_write_threshold( + bs->node_name, + offset + bytes - write_threshold, + write_threshold); + qatomic_set(&bs->write_threshold_offset, 0); + } + return 0; case BDRV_TRACKED_TRUNCATE: assert(child->perm & BLK_PERM_RESIZE); return 0; @@ -3137,12 +3148,6 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) return true; } -void bdrv_add_before_write_notifier(BlockDriverState *bs, - NotifierWithReturn *notifier) -{ - notifier_with_return_list_add(&bs->before_write_notifiers, notifier); -} - void bdrv_io_plug(BlockDriverState *bs) { BdrvChild *child; diff --git a/block/write-threshold.c b/block/write-threshold.c index 85b78dc2a9..9bf4287c6e 100644 --- a/block/write-threshold.c +++ b/block/write-threshold.c @@ -21,104 +21,23 @@ uint64_t bdrv_write_threshold_get(const BlockDriverState *bs) { - return bs->write_threshold_offset; -} - -bool bdrv_write_threshold_is_set(const BlockDriverState *bs) -{ - return bs->write_threshold_offset > 0; -} - -static void write_threshold_disable(BlockDriverState *bs) -{ - if (bdrv_write_threshold_is_set(bs)) { - notifier_with_return_remove(&bs->write_threshold_notifier); - bs->write_threshold_offset = 0; - } -} - -uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, - const BdrvTrackedRequest *req) -{ - if (bdrv_write_threshold_is_set(bs)) { - if (req->offset > bs->write_threshold_offset) { - return (req->offset - bs->write_threshold_offset) + req->bytes; - } - if ((req->offset + req->bytes) > bs->write_threshold_offset) { - return (req->offset + req->bytes) - bs->write_threshold_offset; - } - } - return 0; -} - -static int coroutine_fn before_write_notify(NotifierWithReturn *notifier, - void *opaque) -{ - BdrvTrackedRequest *req = opaque; - BlockDriverState *bs = req->bs; - uint64_t amount = 0; - - amount = bdrv_write_threshold_exceeded(bs, req); - if (amount > 0) { - qapi_event_send_block_write_threshold( - bs->node_name, - amount, - bs->write_threshold_offset); - - /* autodisable to avoid flooding the monitor */ - write_threshold_disable(bs); - } - - return 0; /* should always let other notifiers run */ -} - -static void write_threshold_register_notifier(BlockDriverState *bs) -{ - bs->write_threshold_notifier.notify = before_write_notify; - bdrv_add_before_write_notifier(bs, &bs->write_threshold_notifier); -} - -static void write_threshold_update(BlockDriverState *bs, - int64_t threshold_bytes) -{ - bs->write_threshold_offset = threshold_bytes; + return qatomic_read(&bs->write_threshold_offset); } void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes) { - if (bdrv_write_threshold_is_set(bs)) { - if (threshold_bytes > 0) { - write_threshold_update(bs, threshold_bytes); - } else { - write_threshold_disable(bs); - } - } else { - if (threshold_bytes > 0) { - /* avoid multiple registration */ - write_threshold_register_notifier(bs); - write_threshold_update(bs, threshold_bytes); - } - /* discard bogus disable request */ - } + qatomic_set(&bs->write_threshold_offset, threshold_bytes); } void qmp_block_set_write_threshold(const char *node_name, uint64_t threshold_bytes, Error **errp) { - BlockDriverState *bs; - AioContext *aio_context; - - bs = bdrv_find_node(node_name); + BlockDriverState *bs = bdrv_find_node(node_name); if (!bs) { error_setg(errp, "Device '%s' not found", node_name); return; } - aio_context = bdrv_get_aio_context(bs); - aio_context_acquire(aio_context); - bdrv_write_threshold_set(bs, threshold_bytes); - - aio_context_release(aio_context); } diff --git a/tests/unit/test-write-threshold.c b/tests/unit/test-write-threshold.c index fc1c45a2eb..c2f4cd20d7 100644 --- a/tests/unit/test-write-threshold.c +++ b/tests/unit/test-write-threshold.c @@ -12,6 +12,44 @@ #include "block/write-threshold.h" +/* + * bdrv_write_threshold_is_set + * + * Tell if a write threshold is set for a given BDS. + */ +static bool bdrv_write_threshold_is_set(const BlockDriverState *bs) +{ + return bs->write_threshold_offset > 0; +} + +/* + * bdrv_write_threshold_exceeded + * + * Return the extent of a write request that exceeded the threshold, + * or zero if the request is below the threshold. + * Return zero also if the threshold was not set. + * + * NOTE: here we assume the following holds for each request this code + * deals with: + * + * assert((req->offset + req->bytes) <= UINT64_MAX) + * + * Please not there is *not* an actual C assert(). + */ +static uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req) +{ + if (bdrv_write_threshold_is_set(bs)) { + if (req->offset > bs->write_threshold_offset) { + return (req->offset - bs->write_threshold_offset) + req->bytes; + } + if ((req->offset + req->bytes) > bs->write_threshold_offset) { + return (req->offset + req->bytes) - bs->write_threshold_offset; + } + } + return 0; +} + static void test_threshold_not_set_on_init(void) { uint64_t res;
write-notifiers are used only for write-threshold. New code for such purpose should create filters. Let's handle write-threshold simply in generic code and drop write notifiers at all. Also move part of write-threshold API that is used only for testing to the test. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> --- I agree that this could be split into 2-3 parts and not combining everything into one. But I'm tired now. I can send v2 if needed, so consider it as RFC. Or take as is if you think it's not too much-in-one. I also suggest this as a prepartion (and partly substitution) for "[PATCH v2 0/8] Block layer thread-safety, continued" include/block/block_int.h | 12 ----- include/block/write-threshold.h | 24 --------- block.c | 1 - block/io.c | 21 +++++--- block/write-threshold.c | 87 ++----------------------------- tests/unit/test-write-threshold.c | 38 ++++++++++++++ 6 files changed, 54 insertions(+), 129 deletions(-)