:p
atchew
Login
The following changes since commit fe8d2d5737ab20ed0118863f5eb888cae37122ab: Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-3.0-pull-request' into staging (2018-07-04 22:38:10 +0100) are available in the git repository at: git://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to 7c20c808a5cbf5d244735bc78fc3138c739c1946: file-posix: Unlock FD after creation (2018-07-05 11:07:58 +0200) ---------------------------------------------------------------- Block layer patches: - qcow2: Use worker threads for compression to improve performance of 'qemu-img convert -W' and compressed backup jobs - blklogwrites: New filter driver to log write requests to an image in the dm-log-writes format - file-posix: Fix image locking during image creation - crypto: Fix memory leak in error path - Error out instead of silently truncating node names ---------------------------------------------------------------- Aapo Vienamo (1): block: Add blklogwrites Ari Sundholm (4): block: Move two block permission constants to the relevant enum block/blklogwrites: Change log_sector_size from int64_t to uint64_t block/blklogwrites: Add an option for appending to an old log block/blklogwrites: Add an option for the update interval of the log superblock Kevin Wolf (2): block: Don't silently truncate node names block/crypto: Fix memory leak in create error path Max Reitz (2): file-posix: Fix creation locking file-posix: Unlock FD after creation Vladimir Sementsov-Ogievskiy (3): qemu-img: allow compressed not-in-order writes qcow2: refactor data compression qcow2: add compress threads qapi/block-core.json | 38 ++- block/qcow2.h | 3 + include/block/block.h | 7 + block.c | 12 +- block/blklogwrites.c | 547 ++++++++++++++++++++++++++++++++++++++++++ block/crypto.c | 2 +- block/file-posix.c | 21 +- block/qcow2.c | 138 ++++++++--- qemu-img.c | 5 - MAINTAINERS | 6 + block/Makefile.objs | 1 + tests/qemu-iotests/051 | 15 ++ tests/qemu-iotests/051.out | 23 ++ tests/qemu-iotests/051.pc.out | 23 ++ 14 files changed, 791 insertions(+), 50 deletions(-) create mode 100644 block/blklogwrites.c
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> No reason to forbid them, and they are needed to improve performance with compress-threads in further patches. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qemu-img.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/qemu-img.c b/qemu-img.c index XXXXXXX..XXXXXXX 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) goto fail_getopt; } - if (!s.wr_in_order && s.compressed) { - error_report("Out of order write and compress are mutually exclusive"); - goto fail_getopt; - } - if (tgt_image_opts && !skip_create) { error_report("--target-image-opts requires use of -n flag"); goto fail_getopt; -- 2.13.6
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Make a separate function for compression to be parallelized later. - use .avail_out field instead of .next_out to calculate size of compressed data. It looks more natural and it allows to keep dest to be void pointer - set avail_out to be at least one byte less than input, to be sure avoid inefficient compression earlier Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/qcow2.c | 78 ++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/block/qcow2.c b/block/qcow2.c index XXXXXXX..XXXXXXX 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -XXX,XX +XXX,XX @@ */ #include "qemu/osdep.h" + +#define ZLIB_CONST +#include <zlib.h> + #include "block/block_int.h" #include "block/qdict.h" #include "sysemu/block-backend.h" #include "qemu/module.h" -#include <zlib.h> #include "qcow2.h" #include "qemu/error-report.h" #include "qapi/error.h" @@ -XXX,XX +XXX,XX @@ fail: return ret; } +/* + * qcow2_compress() + * + * @dest - destination buffer, at least of @size-1 bytes + * @src - source buffer, @size bytes + * + * Returns: compressed size on success + * -1 if compression is inefficient + * -2 on any other error + */ +static ssize_t qcow2_compress(void *dest, const void *src, size_t size) +{ + ssize_t ret; + z_stream strm; + + /* best compression, small window, no zlib header */ + memset(&strm, 0, sizeof(strm)); + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, + -12, 9, Z_DEFAULT_STRATEGY); + if (ret != 0) { + return -2; + } + + /* strm.next_in is not const in old zlib versions, such as those used on + * OpenBSD/NetBSD, so cast the const away */ + strm.avail_in = size; + strm.next_in = (void *) src; + strm.avail_out = size - 1; + strm.next_out = dest; + + ret = deflate(&strm, Z_FINISH); + if (ret == Z_STREAM_END) { + ret = size - 1 - strm.avail_out; + } else { + ret = (ret == Z_OK ? -1 : -2); + } + + deflateEnd(&strm); + + return ret; +} + /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, BDRVQcow2State *s = bs->opaque; QEMUIOVector hd_qiov; struct iovec iov; - z_stream strm; - int ret, out_len; + int ret; + size_t out_len; uint8_t *buf, *out_buf; int64_t cluster_offset; @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, out_buf = g_malloc(s->cluster_size); - /* best compression, small window, no zlib header */ - memset(&strm, 0, sizeof(strm)); - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, - Z_DEFLATED, -12, - 9, Z_DEFAULT_STRATEGY); - if (ret != 0) { + out_len = qcow2_compress(out_buf, buf, s->cluster_size); + if (out_len == -2) { ret = -EINVAL; goto fail; - } - - strm.avail_in = s->cluster_size; - strm.next_in = (uint8_t *)buf; - strm.avail_out = s->cluster_size; - strm.next_out = out_buf; - - ret = deflate(&strm, Z_FINISH); - if (ret != Z_STREAM_END && ret != Z_OK) { - deflateEnd(&strm); - ret = -EINVAL; - goto fail; - } - out_len = strm.next_out - out_buf; - - deflateEnd(&strm); - - if (ret != Z_STREAM_END || out_len >= s->cluster_size) { + } else if (out_len == -1) { /* could not compress: write normal cluster */ ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); if (ret < 0) { -- 2.13.6
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Do data compression in separate threads. This significantly improve performance for qemu-img convert with -W (allow async writes) and -c (compressed) options. Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/qcow2.h | 3 +++ block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/block/qcow2.h b/block/qcow2.h index XXXXXXX..XXXXXXX 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State { * override) */ char *image_backing_file; char *image_backing_format; + + CoQueue compress_wait_queue; + int nb_compress_threads; } BDRVQcow2State; typedef struct Qcow2COWRegion { diff --git a/block/qcow2.c b/block/qcow2.c index XXXXXXX..XXXXXXX 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -XXX,XX +XXX,XX @@ #include "qapi/qobject-input-visitor.h" #include "qapi/qapi-visit-block-core.h" #include "crypto.h" +#include "block/thread-pool.h" /* Differences with QCOW: @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, qcow2_check_refcounts(bs, &result, 0); } #endif + + qemu_co_queue_init(&s->compress_wait_queue); + return ret; fail: @@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size) return ret; } +#define MAX_COMPRESS_THREADS 4 + +typedef struct Qcow2CompressData { + void *dest; + const void *src; + size_t size; + ssize_t ret; +} Qcow2CompressData; + +static int qcow2_compress_pool_func(void *opaque) +{ + Qcow2CompressData *data = opaque; + + data->ret = qcow2_compress(data->dest, data->src, data->size); + + return 0; +} + +static void qcow2_compress_complete(void *opaque, int ret) +{ + qemu_coroutine_enter(opaque); +} + +/* See qcow2_compress definition for parameters description */ +static ssize_t qcow2_co_compress(BlockDriverState *bs, + void *dest, const void *src, size_t size) +{ + BDRVQcow2State *s = bs->opaque; + BlockAIOCB *acb; + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); + Qcow2CompressData arg = { + .dest = dest, + .src = src, + .size = size, + }; + + while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { + qemu_co_queue_wait(&s->compress_wait_queue, NULL); + } + + s->nb_compress_threads++; + acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, + qcow2_compress_complete, + qemu_coroutine_self()); + + if (!acb) { + s->nb_compress_threads--; + return -EINVAL; + } + qemu_coroutine_yield(); + s->nb_compress_threads--; + qemu_co_queue_next(&s->compress_wait_queue); + + return arg.ret; +} + /* XXX: put compressed sectors first, then all the cluster aligned tables to avoid losing bytes in alignment */ static coroutine_fn int @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, out_buf = g_malloc(s->cluster_size); - out_len = qcow2_compress(out_buf, buf, s->cluster_size); + out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size); if (out_len == -2) { ret = -EINVAL; goto fail; -- 2.13.6
From: Ari Sundholm <ari@tuxera.com> This allows using the two constants outside of block.c, which will happen in a subsequent patch. Signed-off-by: Ari Sundholm <ari@tuxera.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- include/block/block.h | 7 +++++++ block.c | 6 ------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/block/block.h b/include/block/block.h index XXXXXXX..XXXXXXX 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -XXX,XX +XXX,XX @@ enum { BLK_PERM_GRAPH_MOD = 0x10, BLK_PERM_ALL = 0x1f, + + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ + | BLK_PERM_WRITE + | BLK_PERM_WRITE_UNCHANGED + | BLK_PERM_RESIZE, + + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, }; char *bdrv_perm_names(uint64_t perm); diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, return 0; } -#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ - | BLK_PERM_WRITE \ - | BLK_PERM_WRITE_UNCHANGED \ - | BLK_PERM_RESIZE) -#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) - void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, const BdrvChildRole *role, BlockReopenQueue *reopen_queue, -- 2.13.6
From: Aapo Vienamo <aapo@tuxera.com> Implements a block device write logging system, similar to Linux kernel device mapper dm-log-writes. The write operations that are performed on a block device are logged to a file or another block device. The write log format is identical to the dm-log-writes format. Currently, log markers are not supported. This functionality can be used for crash consistency and fs consistency testing. By implementing it in qemu, tests utilizing write logs can be be used to test non-Linux drivers and older kernels. The driver accepts an optional parameter to set the sector size used for logging. This makes the driver require all requests to be aligned to this sector size and also makes offsets and sizes of writes in the log metadata to be expressed in terms of this value (the log format has a granularity of one sector for offsets and sizes). This allows accurate logging of writes to guest block devices that have unusual sector sizes. The implementation is based on the blkverify and blkdebug block drivers. Signed-off-by: Aapo Vienamo <aapo@tuxera.com> Signed-off-by: Ari Sundholm <ari@tuxera.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qapi/block-core.json | 33 +++- block/blklogwrites.c | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++ MAINTAINERS | 6 + block/Makefile.objs | 1 + 4 files changed, 448 insertions(+), 6 deletions(-) create mode 100644 block/blklogwrites.c diff --git a/qapi/block-core.json b/qapi/block-core.json index XXXXXXX..XXXXXXX 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -XXX,XX +XXX,XX @@ # @throttle: Since 2.11 # @nvme: Since 2.12 # @copy-on-read: Since 3.0 +# @blklogwrites: Since 3.0 # # Since: 2.9 ## { 'enum': 'BlockdevDriver', - 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read', - 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', - 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', - 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed', - 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh', - 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } + 'data': [ 'blkdebug', 'blklogwrites', 'blkverify', 'bochs', 'cloop', + 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps', 'gluster', + 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks', + 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', + 'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', + 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } ## # @BlockdevOptionsFile: @@ -XXX,XX +XXX,XX @@ '*set-state': ['BlkdebugSetStateOptions'] } } ## +# @BlockdevOptionsBlklogwrites: +# +# Driver specific block device options for blklogwrites. +# +# @file: block device +# +# @log: block device used to log writes to @file +# +# @log-sector-size: sector size used in logging writes to @file, determines +# granularity of offsets and sizes of writes (default: 512) +# +# Since: 3.0 +## +{ 'struct': 'BlockdevOptionsBlklogwrites', + 'data': { 'file': 'BlockdevRef', + 'log': 'BlockdevRef', + '*log-sector-size': 'uint32' } } + +## # @BlockdevOptionsBlkverify: # # Driver specific block device options for blkverify. @@ -XXX,XX +XXX,XX @@ 'discriminator': 'driver', 'data': { 'blkdebug': 'BlockdevOptionsBlkdebug', + 'blklogwrites':'BlockdevOptionsBlklogwrites', 'blkverify': 'BlockdevOptionsBlkverify', 'bochs': 'BlockdevOptionsGenericFormat', 'cloop': 'BlockdevOptionsGenericFormat', diff --git a/block/blklogwrites.c b/block/blklogwrites.c new file mode 100644 index XXXXXXX..XXXXXXX --- /dev/null +++ b/block/blklogwrites.c @@ -XXX,XX +XXX,XX @@ +/* + * Write logging blk driver based on blkverify and blkdebug. + * + * Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com> + * Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com> + * Copyright (c) 2018 Ari Sundholm <ari@tuxera.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ +#include "block/block_int.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qmp/qstring.h" +#include "qemu/cutils.h" +#include "qemu/option.h" + +/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */ + +#define LOG_FLUSH_FLAG (1 << 0) +#define LOG_FUA_FLAG (1 << 1) +#define LOG_DISCARD_FLAG (1 << 2) +#define LOG_MARK_FLAG (1 << 3) + +#define WRITE_LOG_VERSION 1ULL +#define WRITE_LOG_MAGIC 0x6a736677736872ULL + +/* All fields are little-endian. */ +struct log_write_super { + uint64_t magic; + uint64_t version; + uint64_t nr_entries; + uint32_t sectorsize; +} QEMU_PACKED; + +struct log_write_entry { + uint64_t sector; + uint64_t nr_sectors; + uint64_t flags; + uint64_t data_len; +} QEMU_PACKED; + +/* End of disk format structures. */ + +typedef struct { + BdrvChild *log_file; + uint32_t sectorsize; + uint32_t sectorbits; + uint64_t cur_log_sector; + uint64_t nr_entries; +} BDRVBlkLogWritesState; + +static QemuOptsList runtime_opts = { + .name = "blklogwrites", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "log-sector-size", + .type = QEMU_OPT_SIZE, + .help = "Log sector size", + }, + { /* end of list */ } + }, +}; + +static inline uint32_t blk_log_writes_log2(uint32_t value) +{ + assert(value > 0); + return 31 - clz32(value); +} + +static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + QemuOpts *opts; + Error *local_err = NULL; + int ret; + int64_t log_sector_size; + + opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + /* Open the file */ + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + log_sector_size = qemu_opt_get_size(opts, "log-sector-size", + BDRV_SECTOR_SIZE); + + if (log_sector_size < 0 || log_sector_size > (1ull << 23) || + !is_power_of_2(log_sector_size)) + { + ret = -EINVAL; + error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size); + goto fail; + } + + s->sectorsize = log_sector_size; + s->sectorbits = blk_log_writes_log2(log_sector_size); + s->cur_log_sector = 1; + s->nr_entries = 0; + + /* Open the log file */ + s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false, + &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail; + } + + ret = 0; +fail: + if (ret < 0) { + bdrv_unref_child(bs, bs->file); + bs->file = NULL; + } + qemu_opts_del(opts); + return ret; +} + +static void blk_log_writes_close(BlockDriverState *bs) +{ + BDRVBlkLogWritesState *s = bs->opaque; + + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; +} + +static int64_t blk_log_writes_getlength(BlockDriverState *bs) +{ + return bdrv_getlength(bs->file->bs); +} + +static void blk_log_writes_refresh_filename(BlockDriverState *bs, + QDict *options) +{ + BDRVBlkLogWritesState *s = bs->opaque; + + /* bs->file->bs has already been refreshed */ + bdrv_refresh_filename(s->log_file->bs); + + if (bs->file->bs->full_open_options + && s->log_file->bs->full_open_options) + { + QDict *opts = qdict_new(); + qdict_put_str(opts, "driver", "blklogwrites"); + + qobject_ref(bs->file->bs->full_open_options); + qdict_put_obj(opts, "file", QOBJECT(bs->file->bs->full_open_options)); + qobject_ref(s->log_file->bs->full_open_options); + qdict_put_obj(opts, "log", + QOBJECT(s->log_file->bs->full_open_options)); + qdict_put_int(opts, "log-sector-size", s->sectorsize); + + bs->full_open_options = opts; + } +} + +static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c, + const BdrvChildRole *role, + BlockReopenQueue *ro_q, + uint64_t perm, uint64_t shrd, + uint64_t *nperm, uint64_t *nshrd) +{ + if (!c) { + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; + *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; + return; + } + + if (!strcmp(c->name, "log")) { + bdrv_format_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); + } else { + bdrv_filter_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); + } +} + +static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp) +{ + BDRVBlkLogWritesState *s = bs->opaque; + bs->bl.request_alignment = s->sectorsize; +} + +static int coroutine_fn +blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); +} + +typedef struct BlkLogWritesFileReq { + BlockDriverState *bs; + uint64_t offset; + uint64_t bytes; + int file_flags; + QEMUIOVector *qiov; + int (*func)(struct BlkLogWritesFileReq *r); + int file_ret; +} BlkLogWritesFileReq; + +typedef struct { + BlockDriverState *bs; + QEMUIOVector *qiov; + struct log_write_entry entry; + uint64_t zero_size; + int log_ret; +} BlkLogWritesLogReq; + +static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr) +{ + BDRVBlkLogWritesState *s = lr->bs->opaque; + uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits; + + s->nr_entries++; + s->cur_log_sector += + ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size, + lr->qiov, 0); + + /* Logging for the "write zeroes" operation */ + if (lr->log_ret == 0 && lr->zero_size) { + cur_log_offset = s->cur_log_sector << s->sectorbits; + s->cur_log_sector += + ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits; + + lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset, + lr->zero_size, 0); + } + + /* Update super block on flush */ + if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) { + struct log_write_super super = { + .magic = cpu_to_le64(WRITE_LOG_MAGIC), + .version = cpu_to_le64(WRITE_LOG_VERSION), + .nr_entries = cpu_to_le64(s->nr_entries), + .sectorsize = cpu_to_le32(s->sectorsize), + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(super)); + QEMUIOVector qiov; + + qemu_iovec_init(&qiov, 2); + qemu_iovec_add(&qiov, &super, sizeof(super)); + qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super)); + + lr->log_ret = + bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0); + if (lr->log_ret == 0) { + lr->log_ret = bdrv_co_flush(s->log_file->bs); + } + qemu_iovec_destroy(&qiov); + g_free(zeroes); + } +} + +static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr) +{ + fr->file_ret = fr->func(fr); +} + +static int coroutine_fn +blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags, + int (*file_func)(BlkLogWritesFileReq *r), + uint64_t entry_flags, bool is_zero_write) +{ + QEMUIOVector log_qiov; + size_t niov = qiov ? qiov->niov : 0; + BDRVBlkLogWritesState *s = bs->opaque; + BlkLogWritesFileReq fr = { + .bs = bs, + .offset = offset, + .bytes = bytes, + .file_flags = flags, + .qiov = qiov, + .func = file_func, + }; + BlkLogWritesLogReq lr = { + .bs = bs, + .qiov = &log_qiov, + .entry = { + .sector = cpu_to_le64(offset >> s->sectorbits), + .nr_sectors = cpu_to_le64(bytes >> s->sectorbits), + .flags = cpu_to_le64(entry_flags), + .data_len = 0, + }, + .zero_size = is_zero_write ? bytes : 0, + }; + void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry)); + + assert((1 << s->sectorbits) == s->sectorsize); + assert(bs->bl.request_alignment == s->sectorsize); + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); + + qemu_iovec_init(&log_qiov, niov + 2); + qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry)); + qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry)); + if (qiov) { + qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size); + } + + blk_log_writes_co_do_file(&fr); + blk_log_writes_co_do_log(&lr); + + qemu_iovec_destroy(&log_qiov); + g_free(zeroes); + + if (lr.log_ret < 0) { + return lr.log_ret; + } + + return fr.file_ret; +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes, + fr->qiov, fr->file_flags); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes, + fr->file_flags); +} + +static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr) +{ + return bdrv_co_flush(fr->bs->file->bs); +} + +static int coroutine_fn +blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr) +{ + return bdrv_co_pdiscard(fr->bs->file->bs, fr->offset, fr->bytes); +} + +static int coroutine_fn +blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, + QEMUIOVector *qiov, int flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, qiov, flags, + blk_log_writes_co_do_file_pwritev, 0, false); +} + +static int coroutine_fn +blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, + BdrvRequestFlags flags) +{ + return blk_log_writes_co_log(bs, offset, bytes, NULL, flags, + blk_log_writes_co_do_file_pwrite_zeroes, 0, + true); +} + +static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs) +{ + return blk_log_writes_co_log(bs, 0, 0, NULL, 0, + blk_log_writes_co_do_file_flush, + LOG_FLUSH_FLAG, false); +} + +static int coroutine_fn +blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) +{ + return blk_log_writes_co_log(bs, offset, count, NULL, 0, + blk_log_writes_co_do_file_pdiscard, + LOG_DISCARD_FLAG, false); +} + +static BlockDriver bdrv_blk_log_writes = { + .format_name = "blklogwrites", + .instance_size = sizeof(BDRVBlkLogWritesState), + + .bdrv_open = blk_log_writes_open, + .bdrv_close = blk_log_writes_close, + .bdrv_getlength = blk_log_writes_getlength, + .bdrv_refresh_filename = blk_log_writes_refresh_filename, + .bdrv_child_perm = blk_log_writes_child_perm, + .bdrv_refresh_limits = blk_log_writes_refresh_limits, + + .bdrv_co_preadv = blk_log_writes_co_preadv, + .bdrv_co_pwritev = blk_log_writes_co_pwritev, + .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes, + .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk, + .bdrv_co_pdiscard = blk_log_writes_co_pdiscard, + .bdrv_co_block_status = bdrv_co_block_status_from_file, + + .is_filter = true, +}; + +static void bdrv_blk_log_writes_init(void) +{ + bdrv_register(&bdrv_blk_log_writes); +} + +block_init(bdrv_blk_log_writes_init); diff --git a/MAINTAINERS b/MAINTAINERS index XXXXXXX..XXXXXXX 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -XXX,XX +XXX,XX @@ S: Supported F: block/quorum.c L: qemu-block@nongnu.org +blklogwrites +M: Ari Sundholm <ari@tuxera.com> +L: qemu-block@nongnu.org +S: Supported +F: block/blklogwrites.c + blkverify M: Stefan Hajnoczi <stefanha@redhat.com> L: qemu-block@nongnu.org diff --git a/block/Makefile.objs b/block/Makefile.objs index XXXXXXX..XXXXXXX 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -XXX,XX +XXX,XX @@ block-obj-y += qed-check.o block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o block-obj-y += quorum.o block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o +block-obj-y += blklogwrites.o block-obj-y += block-backend.o snapshot.o qapi.o block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o block-obj-$(CONFIG_POSIX) += file-posix.o -- 2.13.6
If the user passes a too long node name string, we silently truncate it to fit into BlockDriverState.node_name, i.e. to 31 characters. Apart from surprising the user when the node has a different name than requested, this also bypasses the check for duplicate names, so that the same name can be assigned to multiple nodes. Fix this by just making too long node names an error. Reported-by: Peter Krempa <pkrempa@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block.c | 6 ++++++ tests/qemu-iotests/051 | 15 +++++++++++++++ tests/qemu-iotests/051.out | 23 +++++++++++++++++++++++ tests/qemu-iotests/051.pc.out | 23 +++++++++++++++++++++++ 4 files changed, 67 insertions(+) diff --git a/block.c b/block.c index XXXXXXX..XXXXXXX 100644 --- a/block.c +++ b/block.c @@ -XXX,XX +XXX,XX @@ static void bdrv_assign_node_name(BlockDriverState *bs, goto out; } + /* Make sure that the node name isn't truncated */ + if (strlen(node_name) >= sizeof(bs->node_name)) { + error_setg(errp, "Node name too long"); + goto out; + } + /* copy node name into the bs and insert it into the graph list */ pstrcpy(bs->node_name, sizeof(bs->node_name), node_name); QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list); diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051 index XXXXXXX..XXXXXXX 100755 --- a/tests/qemu-iotests/051 +++ b/tests/qemu-iotests/051 @@ -XXX,XX +XXX,XX @@ run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2 run_qemu -drive file="$TEST_IMG",driver=qcow2,format=qcow2 echo +echo === Node names === +echo + +# Maximum length: 31 characters +run_qemu -drive file="$TEST_IMG",node-name=x123456789012345678901234567890 +run_qemu -drive file="$TEST_IMG",node-name=x1234567890123456789012345678901 + +# First character must be alphabetic +# Following characters alphanumeric or -._ +run_qemu -drive file="$TEST_IMG",node-name=All-Types.of_all0wed_chars +run_qemu -drive file="$TEST_IMG",node-name=123foo +run_qemu -drive file="$TEST_IMG",node-name=_foo +run_qemu -drive file="$TEST_IMG",node-name=foo#12 + +echo echo === Device without drive === echo diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -XXX,XX +XXX,XX @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' +=== Node names === + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name + + === Device without drive === Testing: -device VIRTIO_SCSI -device scsi-hd diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out index XXXXXXX..XXXXXXX 100644 --- a/tests/qemu-iotests/051.pc.out +++ b/tests/qemu-iotests/051.pc.out @@ -XXX,XX +XXX,XX @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2 QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format' +=== Node names === + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890 +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) quit + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name + +Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12 +QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name + + === Device without drive === Testing: -device VIRTIO_SCSI -device scsi-hd -- 2.13.6
Fixes: Coverity CID 1393782 Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Daniel P. Berrangé <berrange@redhat.com> --- block/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/crypto.c b/block/crypto.c index XXXXXXX..XXXXXXX 100644 --- a/block/crypto.c +++ b/block/crypto.c @@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename, /* Create protocol layer */ ret = bdrv_create_file(filename, opts, errp); if (ret < 0) { - return ret; + goto fail; } bs = bdrv_open(filename, NULL, NULL, -- 2.13.6
From: Ari Sundholm <ari@tuxera.com> This was a simple oversight when working on intermediate versions of the original patch which introduced blklogwrites. Signed-off-by: Ari Sundholm <ari@tuxera.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/blklogwrites.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/blklogwrites.c b/block/blklogwrites.c index XXXXXXX..XXXXXXX 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; int ret; - int64_t log_sector_size; + uint64_t log_sector_size; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, log_sector_size = qemu_opt_get_size(opts, "log-sector-size", BDRV_SECTOR_SIZE); - if (log_sector_size < 0 || log_sector_size > (1ull << 23) || - !is_power_of_2(log_sector_size)) - { + if (log_sector_size > (1ull << 23) || !is_power_of_2(log_sector_size)) { ret = -EINVAL; - error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size); + error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); goto fail; } -- 2.13.6
From: Ari Sundholm <ari@tuxera.com> Suggested by Kevin Wolf. May be useful when testing multiple batches of writes or doing long-term testing involving restarts of the VM. Signed-off-by: Ari Sundholm <ari@tuxera.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qapi/block-core.json | 3 +- block/blklogwrites.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 135 insertions(+), 15 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index XXXXXXX..XXXXXXX 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -XXX,XX +XXX,XX @@ { 'struct': 'BlockdevOptionsBlklogwrites', 'data': { 'file': 'BlockdevRef', 'log': 'BlockdevRef', - '*log-sector-size': 'uint32' } } + '*log-sector-size': 'uint32', + '*log-append': 'bool' } } ## # @BlockdevOptionsBlkverify: diff --git a/block/blklogwrites.c b/block/blklogwrites.c index XXXXXXX..XXXXXXX 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -XXX,XX +XXX,XX @@ #define LOG_FUA_FLAG (1 << 1) #define LOG_DISCARD_FLAG (1 << 2) #define LOG_MARK_FLAG (1 << 3) +#define LOG_FLAG_MASK (LOG_FLUSH_FLAG \ + | LOG_FUA_FLAG \ + | LOG_DISCARD_FLAG \ + | LOG_MARK_FLAG) #define WRITE_LOG_VERSION 1ULL #define WRITE_LOG_MAGIC 0x6a736677736872ULL @@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = { .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), .desc = { { + .name = "log-append", + .type = QEMU_OPT_BOOL, + .help = "Append to an existing log", + }, + { .name = "log-sector-size", .type = QEMU_OPT_SIZE, .help = "Log sector size", @@ -XXX,XX +XXX,XX @@ static inline uint32_t blk_log_writes_log2(uint32_t value) return 31 - clz32(value); } +static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size) +{ + return sector_size < (1ull << 24) && is_power_of_2(sector_size); +} + +static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log, + uint32_t sector_size, + uint64_t nr_entries, + Error **errp) +{ + uint64_t cur_sector = 1; + uint64_t cur_idx = 0; + uint32_t sector_bits = blk_log_writes_log2(sector_size); + struct log_write_entry cur_entry; + + while (cur_idx < nr_entries) { + int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry, + sizeof(cur_entry)); + if (read_ret < 0) { + error_setg_errno(errp, -read_ret, + "Failed to read log entry %"PRIu64, cur_idx); + return (uint64_t)-1ull; + } + + if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) { + error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64, + le64_to_cpu(cur_entry.flags), cur_idx); + return (uint64_t)-1ull; + } + + /* Account for the sector of the entry itself */ + ++cur_sector; + + /* + * Account for the data of the write. + * For discards, this data is not present. + */ + if (!(cur_entry.flags & cpu_to_le64(LOG_DISCARD_FLAG))) { + cur_sector += le64_to_cpu(cur_entry.nr_sectors); + } + + ++cur_idx; + } + + return cur_sector; +} + static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; int ret; uint64_t log_sector_size; + bool log_append; opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); qemu_opts_absorb_qdict(opts, options, &local_err); @@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - log_sector_size = qemu_opt_get_size(opts, "log-sector-size", - BDRV_SECTOR_SIZE); - - if (log_sector_size > (1ull << 23) || !is_power_of_2(log_sector_size)) { - ret = -EINVAL; - error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); - goto fail; - } - - s->sectorsize = log_sector_size; - s->sectorbits = blk_log_writes_log2(log_sector_size); - s->cur_log_sector = 1; - s->nr_entries = 0; - /* Open the log file */ s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false, &local_err); @@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } + log_append = qemu_opt_get_bool(opts, "log-append", false); + + if (log_append) { + struct log_write_super log_sb = { 0, 0, 0, 0 }; + + if (qemu_opt_find(opts, "log-sector-size")) { + ret = -EINVAL; + error_setg(errp, "log-append and log-sector-size are mutually " + "exclusive"); + goto fail_log; + } + + /* Read log superblock or fake one for an empty log */ + if (!bdrv_getlength(s->log_file->bs)) { + log_sb.magic = cpu_to_le64(WRITE_LOG_MAGIC); + log_sb.version = cpu_to_le64(WRITE_LOG_VERSION); + log_sb.nr_entries = cpu_to_le64(0); + log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE); + } else { + ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb)); + if (ret < 0) { + error_setg_errno(errp, -ret, "Could not read log superblock"); + goto fail_log; + } + } + + if (log_sb.magic != cpu_to_le64(WRITE_LOG_MAGIC)) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock magic"); + goto fail_log; + } + + if (log_sb.version != cpu_to_le64(WRITE_LOG_VERSION)) { + ret = -EINVAL; + error_setg(errp, "Unsupported log version %"PRIu64, + le64_to_cpu(log_sb.version)); + goto fail_log; + } + + log_sector_size = le32_to_cpu(log_sb.sectorsize); + s->cur_log_sector = 1; + s->nr_entries = 0; + + if (blk_log_writes_sector_size_valid(log_sector_size)) { + s->cur_log_sector = + blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size, + le64_to_cpu(log_sb.nr_entries), &local_err); + if (local_err) { + ret = -EINVAL; + error_propagate(errp, local_err); + goto fail_log; + } + + s->nr_entries = le64_to_cpu(log_sb.nr_entries); + } + } else { + log_sector_size = qemu_opt_get_size(opts, "log-sector-size", + BDRV_SECTOR_SIZE); + s->cur_log_sector = 1; + s->nr_entries = 0; + } + + if (!blk_log_writes_sector_size_valid(log_sector_size)) { + ret = -EINVAL; + error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size); + goto fail_log; + } + + s->sectorsize = log_sector_size; + s->sectorbits = blk_log_writes_log2(log_sector_size); + ret = 0; +fail_log: + if (ret < 0) { + bdrv_unref_child(bs, s->log_file); + s->log_file = NULL; + } fail: if (ret < 0) { bdrv_unref_child(bs, bs->file); -- 2.13.6
From: Ari Sundholm <ari@tuxera.com> This is a way to ensure that the log superblock is periodically updated. Before, this was only done on flush requests, which may not be enough if the VM exits abnormally, omitting the final flush. The default interval is 4096 write requests. Signed-off-by: Ari Sundholm <ari@tuxera.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- qapi/block-core.json | 6 +++++- block/blklogwrites.c | 20 ++++++++++++++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index XXXXXXX..XXXXXXX 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -XXX,XX +XXX,XX @@ # @log-sector-size: sector size used in logging writes to @file, determines # granularity of offsets and sizes of writes (default: 512) # +# @log-super-update-interval: interval of write requests after which the log +# super block is updated to disk (default: 4096) +# # Since: 3.0 ## { 'struct': 'BlockdevOptionsBlklogwrites', 'data': { 'file': 'BlockdevRef', 'log': 'BlockdevRef', '*log-sector-size': 'uint32', - '*log-append': 'bool' } } + '*log-append': 'bool', + '*log-super-update-interval': 'uint64' } } ## # @BlockdevOptionsBlkverify: diff --git a/block/blklogwrites.c b/block/blklogwrites.c index XXXXXXX..XXXXXXX 100644 --- a/block/blklogwrites.c +++ b/block/blklogwrites.c @@ -XXX,XX +XXX,XX @@ typedef struct { uint32_t sectorbits; uint64_t cur_log_sector; uint64_t nr_entries; + uint64_t update_interval; } BDRVBlkLogWritesState; static QemuOptsList runtime_opts = { @@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = { .type = QEMU_OPT_SIZE, .help = "Log sector size", }, + { + .name = "log-super-update-interval", + .type = QEMU_OPT_NUMBER, + .help = "Log superblock update interval (# of write requests)", + }, { /* end of list */ } }, }; @@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, s->sectorsize = log_sector_size; s->sectorbits = blk_log_writes_log2(log_sector_size); + s->update_interval = qemu_opt_get_number(opts, "log-super-update-interval", + 4096); + if (!s->update_interval) { + ret = -EINVAL; + error_setg(errp, "Invalid log superblock update interval %"PRIu64, + s->update_interval); + goto fail_log; + } ret = 0; fail_log: @@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr) lr->zero_size, 0); } - /* Update super block on flush */ - if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) { + /* Update super block on flush or every update interval */ + if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG) + || (s->nr_entries % s->update_interval == 0))) + { struct log_write_super super = { .magic = cpu_to_le64(WRITE_LOG_MAGIC), .version = cpu_to_le64(WRITE_LOG_VERSION), -- 2.13.6
From: Max Reitz <mreitz@redhat.com> raw_apply_lock_bytes() takes a bit mask of "permissions that are NOT shared". Also, make the "perm" and "shared" variables uint64_t, because I do not particularly like using ~ on signed integers (and other permission masks are usually uint64_t, too). Reported-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/file-posix.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index XXXXXXX..XXXXXXX 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) { BlockdevCreateOptionsFile *file_opts; int fd; - int perm, shared; + uint64_t perm, shared; int result = 0; /* Validate options and set default values */ @@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE; /* Step one: Take locks */ - result = raw_apply_lock_bytes(fd, perm, shared, false, errp); + result = raw_apply_lock_bytes(fd, perm, ~shared, false, errp); if (result < 0) { goto out_close; } -- 2.13.6
From: Max Reitz <mreitz@redhat.com> Closing the FD does not necessarily mean that it is unlocked. Fix this by relinquishing all permission locks before qemu_close(). Reported-by: Kevin Wolf <kwolf@redhat.com> Signed-off-by: Max Reitz <mreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> --- block/file-posix.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index XXXXXXX..XXXXXXX 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_create(BlockdevCreateOptions *options, Error **errp) { BlockdevCreateOptionsFile *file_opts; + Error *local_err = NULL; int fd; uint64_t perm, shared; int result = 0; @@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) /* Step two: Check that nobody else has taken conflicting locks */ result = raw_check_lock_bytes(fd, perm, shared, errp); if (result < 0) { - goto out_close; + goto out_unlock; } /* Clear the file by truncating it to 0 */ result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp); if (result < 0) { - goto out_close; + goto out_unlock; } if (file_opts->nocow) { @@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp) result = raw_regular_truncate(NULL, fd, file_opts->size, file_opts->preallocation, errp); if (result < 0) { - goto out_close; + goto out_unlock; + } + +out_unlock: + raw_apply_lock_bytes(fd, 0, 0, true, &local_err); + if (local_err) { + /* The above call should not fail, and if it does, that does + * not mean the whole creation operation has failed. So + * report it the user for their convenience, but do not report + * it to the caller. */ + error_report_err(local_err); } out_close: -- 2.13.6
The following changes since commit 13356edb87506c148b163b8c7eb0695647d00c2a: Merge tag 'block-pull-request' of https://gitlab.com/stefanha/qemu into staging (2023-01-24 09:45:33 +0000) are available in the Git repository at: https://repo.or.cz/qemu/kevin.git tags/for-upstream for you to fetch changes up to d570177b50c389f379f93183155a27d44856ab46: qemu-img: Change info key names for protocol nodes (2023-02-01 16:52:33 +0100) v4: - Fixed the 'qemu-img-close-errors' test case to run only on Linux and only with the file protocol, use qemu-io instead of truncate v3: - Make the compiler happier on BSD and CentOS Stream 8 v2: - Rebased to resolve merge conflicts in coroutine.h ---------------------------------------------------------------- Block layer patches - qemu-img info: Show protocol-level information - Move more functions to coroutines - Make coroutine annotations ready for static analysis - qemu-img: Fix exit code for errors closing the image - qcow2 bitmaps: Fix theoretical corruption in error path - pflash: Only load non-zero parts of backend image to save memory - Code cleanup and test case improvements ---------------------------------------------------------------- Alberto Faria (2): coroutine: annotate coroutine_fn for libclang block: Add no_coroutine_fn and coroutine_mixed_fn marker Emanuele Giuseppe Esposito (14): block-coroutine-wrapper: support void functions block: Convert bdrv_io_plug() to co_wrapper block: Convert bdrv_io_unplug() to co_wrapper block: Convert bdrv_is_inserted() to co_wrapper block: Rename refresh_total_sectors to bdrv_refresh_total_sectors block: Convert bdrv_refresh_total_sectors() to co_wrapper_mixed block-backend: use bdrv_getlength instead of blk_getlength block: use bdrv_co_refresh_total_sectors when possible block: Convert bdrv_get_allocated_file_size() to co_wrapper block: Convert bdrv_get_info() to co_wrapper_mixed block: Convert bdrv_eject() to co_wrapper block: Convert bdrv_lock_medium() to co_wrapper block: Convert bdrv_debug_event() to co_wrapper_mixed block: Rename bdrv_load/save_vmstate() to bdrv_co_load/save_vmstate() Hanna Reitz (12): block: Improve empty format-specific info dump block/file: Add file-specific image info block/vmdk: Change extent info type block: Split BlockNodeInfo off of ImageInfo qemu-img: Use BlockNodeInfo block/qapi: Let bdrv_query_image_info() recurse block/qapi: Introduce BlockGraphInfo block/qapi: Add indentation to bdrv_node_info_dump() iotests: Filter child node information iotests/106, 214, 308: Read only one size line qemu-img: Let info print block graph qemu-img: Change info key names for protocol nodes Kevin Wolf (4): qcow2: Fix theoretical corruption in store_bitmap() error path qemu-img commit: Report errors while closing the image qemu-img bitmap: Report errors while closing the image qemu-iotests: Test qemu-img bitmap/commit exit code on error Paolo Bonzini (2): qemu-io: do not reinvent the blk_pwrite_zeroes wheel block: remove bdrv_coroutine_enter Philippe Mathieu-Daudé (1): block/nbd: Add missing <qemu/bswap.h> include Thomas Huth (2): tests/qemu-iotests/312: Mark "quorum" as required driver tests/qemu-iotests/262: Check for availability of "blkverify" first Xiang Zheng (1): pflash: Only read non-zero parts of backend image qapi/block-core.json | 123 +++++++- include/block/block-common.h | 11 +- include/block/block-io.h | 41 ++- include/block/block_int-common.h | 26 +- include/block/block_int-io.h | 5 +- include/block/nbd.h | 1 + include/block/qapi.h | 14 +- include/qemu/osdep.h | 44 +++ include/sysemu/block-backend-io.h | 31 +- block.c | 88 +++--- block/blkdebug.c | 11 +- block/blkio.c | 15 +- block/blklogwrites.c | 6 +- block/blkreplay.c | 6 +- block/blkverify.c | 6 +- block/block-backend.c | 38 +-- block/commit.c | 4 +- block/copy-on-read.c | 18 +- block/crypto.c | 14 +- block/curl.c | 10 +- block/file-posix.c | 137 +++++---- block/file-win32.c | 18 +- block/filter-compress.c | 20 +- block/gluster.c | 23 +- block/io.c | 76 ++--- block/iscsi.c | 17 +- block/mirror.c | 6 +- block/monitor/block-hmp-cmds.c | 2 +- block/nbd.c | 8 +- block/nfs.c | 4 +- block/null.c | 13 +- block/nvme.c | 14 +- block/preallocate.c | 16 +- block/qapi.c | 317 ++++++++++++++++----- block/qcow.c | 5 +- block/qcow2-bitmap.c | 5 +- block/qcow2-refcount.c | 2 +- block/qcow2.c | 17 +- block/qed.c | 11 +- block/quorum.c | 8 +- block/raw-format.c | 25 +- block/rbd.c | 9 +- block/replication.c | 6 +- block/ssh.c | 4 +- block/throttle.c | 6 +- block/vdi.c | 7 +- block/vhdx.c | 5 +- block/vmdk.c | 22 +- block/vpc.c | 5 +- blockdev.c | 8 +- hw/block/block.c | 36 ++- hw/scsi/scsi-disk.c | 5 + qemu-img.c | 100 +++++-- qemu-io-cmds.c | 62 +--- tests/unit/test-block-iothread.c | 3 + scripts/block-coroutine-wrapper.py | 20 +- tests/qemu-iotests/iotests.py | 18 +- block/meson.build | 1 + tests/qemu-iotests/065 | 2 +- tests/qemu-iotests/106 | 4 +- tests/qemu-iotests/214 | 6 +- tests/qemu-iotests/262 | 3 +- tests/qemu-iotests/302.out | 5 + tests/qemu-iotests/308 | 4 +- tests/qemu-iotests/312 | 1 + tests/qemu-iotests/common.filter | 22 +- tests/qemu-iotests/common.rc | 22 +- tests/qemu-iotests/tests/qemu-img-close-errors | 96 +++++++ tests/qemu-iotests/tests/qemu-img-close-errors.out | 23 ++ 69 files changed, 1209 insertions(+), 552 deletions(-) create mode 100755 tests/qemu-iotests/tests/qemu-img-close-errors create mode 100644 tests/qemu-iotests/tests/qemu-img-close-errors.out