From b6817097ec600c56d828a2514540d1fc7df1db33 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Sat, 9 Jan 2021 12:27:07 +0300 Subject: [PATCH 1/4] [RFC] bdev: Add API to get bdev capabilities The new API spdk_bdev_get_caps returns capabilities specific for bdev module. Change-Id: Ic9f42eff59bdc4c8c6e73deb76b3eecfc04f80a8 Signed-off-by: Alexey Marchuk --- CHANGELOG.md | 3 +++ include/spdk/bdev.h | 24 ++++++++++++++++++++++++ include/spdk/bdev_module.h | 3 +++ lib/bdev/bdev.c | 23 +++++++++++++++++++++++ module/bdev/delay/vbdev_delay.c | 11 +++++++++++ module/bdev/nvme/bdev_nvme.c | 11 +++++++++++ module/bdev/passthru/vbdev_passthru.c | 11 +++++++++++ 7 files changed, 86 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34ca68fb1e3..400e36b3ab5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,9 @@ parameter in spdk_bdev_get_opts function. Two fields `small_buf_pool_size` and `large_buf_pool_size` were added into spdk_bdev_opts, which were used to determine the small and large buffer pool size of the whole bdev module. +New API `spdk_bdev_get_caps` has been added, it allows to get extended bdev module +capabilities. + ### blob An `opts_size` element was added in the `spdk_bs_opts` structure to solve the diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index 95366887ec6..d48d6e209cf 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -1733,6 +1733,30 @@ void spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data size_t spdk_bdev_get_media_events(struct spdk_bdev_desc *bdev_desc, struct spdk_bdev_media_event *events, size_t max_events); +enum spdk_bdev_capability_type { + /** Bdev supports indirect memory access using Memory Key. + * That means that the user of ext bdev API can fill spdk_bdev_ext_io_opts_mem_type + * structure and set SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE flag in spdk_bdev_ext_io_opts structure */ + SPDK_BDEV_CAP_EXT_MEMORY_TYPE_MKEY = 1u << 0u, +}; + +/** Describes capabilities of Block device */ +struct spdk_bdev_capability { + /** Size of this structure in bytes, should be set by the user */ + size_t size; + /** bitwise combination of \ref spdk_bdev_capability_type */ + uint64_t flags; +}; + +/** + * Get bdev capabilities + * + * \param bdev Block device + * \param caps Capabilities of Block device to be filled by this function + * \return 0 on success, negated errno on failure. + */ +int spdk_bdev_get_caps(struct spdk_bdev *bdev, struct spdk_bdev_capability *caps); + #ifdef __cplusplus } #endif diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 77f95c68e7a..11f2bf9c8be 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -222,6 +222,9 @@ struct spdk_bdev_fn_table { /** Get bdev module context. */ void *(*get_module_ctx)(void *ctx); + + /** Get block device capabilities */ + void (*get_caps)(void *ctx, struct spdk_bdev_capability *caps); }; /** bdev I/O completion status */ diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 3b9728e8049..4deb351138e 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -6813,6 +6813,29 @@ bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch, return 0; } +int +spdk_bdev_get_caps(struct spdk_bdev *bdev, struct spdk_bdev_capability *caps) +{ + struct spdk_bdev_capability caps_local; + + if (!caps || !caps->size) { + return -EINVAL; + } + + memset(&caps_local, 0, sizeof(caps_local)); + caps_local.size = spdk_min(sizeof(caps_local), caps->size); + + if (bdev->fn_table->get_caps) { + bdev->fn_table->get_caps(bdev->ctxt, &caps_local); + } + + /* The user may use older or newer SPDK version where size of this structure can be different. + * Here we copy only the number of bytes requested by the user and supported by SPDK */ + memcpy(caps, &caps_local, caps_local.size); + + return 0; +} + SPDK_LOG_REGISTER_COMPONENT(bdev) SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV) diff --git a/module/bdev/delay/vbdev_delay.c b/module/bdev/delay/vbdev_delay.c index bbab2a40393..9344b39ae19 100644 --- a/module/bdev/delay/vbdev_delay.c +++ b/module/bdev/delay/vbdev_delay.c @@ -671,6 +671,16 @@ vbdev_delay_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx /* No config per bdev needed */ } +static void +vbdev_delay_get_caps(void *ctx, struct spdk_bdev_capability *caps) +{ + struct vbdev_delay *delay_node = (struct vbdev_delay *)ctx; + + if (delay_node->base_bdev->fn_table->get_caps) { + delay_node->base_bdev->fn_table->get_caps(delay_node->base_bdev, caps); + } +} + /* When we register our bdev this is how we specify our entry points. */ static const struct spdk_bdev_fn_table vbdev_delay_fn_table = { .destruct = vbdev_delay_destruct, @@ -679,6 +689,7 @@ static const struct spdk_bdev_fn_table vbdev_delay_fn_table = { .get_io_channel = vbdev_delay_get_io_channel, .dump_info_json = vbdev_delay_dump_info_json, .write_config_json = vbdev_delay_write_config_json, + .get_caps = vbdev_delay_get_caps, }; static void diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index dd861aa655b..293157498c3 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -980,6 +980,16 @@ bdev_nvme_get_module_ctx(void *ctx) return bdev_nvme_get_ctrlr(&nvme_bdev->disk); } +static void +bdev_nvme_get_caps(void *ctx, struct spdk_bdev_capability *caps) +{ + struct nvme_bdev *nbdev = ctx; + + if (nbdev->nvme_ns->ctrlr->connected_trid->trtype == SPDK_NVME_TRANSPORT_RDMA) { + caps->flags |= SPDK_BDEV_CAP_EXT_MEMORY_TYPE_MKEY; + } +} + static int bdev_nvme_dump_info_json(void *ctx, struct spdk_json_write_ctx *w) { @@ -1132,6 +1142,7 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = { .write_config_json = bdev_nvme_write_config_json, .get_spin_time = bdev_nvme_get_spin_time, .get_module_ctx = bdev_nvme_get_module_ctx, + .get_caps = bdev_nvme_get_caps, }; static int diff --git a/module/bdev/passthru/vbdev_passthru.c b/module/bdev/passthru/vbdev_passthru.c index 5d72bce465e..4d546df6cde 100644 --- a/module/bdev/passthru/vbdev_passthru.c +++ b/module/bdev/passthru/vbdev_passthru.c @@ -550,6 +550,16 @@ vbdev_passthru_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ /* No config per bdev needed */ } +static void +vbdev_passthru_get_caps(void *ctx, struct spdk_bdev_capability *caps) +{ + struct vbdev_passthru *pt_node = (struct vbdev_passthru *)ctx; + + if (pt_node->base_bdev->fn_table->get_caps) { + pt_node->base_bdev->fn_table->get_caps(pt_node->base_bdev, caps); + } +} + /* When we register our bdev this is how we specify our entry points. */ static const struct spdk_bdev_fn_table vbdev_passthru_fn_table = { .destruct = vbdev_passthru_destruct, @@ -558,6 +568,7 @@ static const struct spdk_bdev_fn_table vbdev_passthru_fn_table = { .get_io_channel = vbdev_passthru_get_io_channel, .dump_info_json = vbdev_passthru_dump_info_json, .write_config_json = vbdev_passthru_write_config_json, + .get_caps = vbdev_passthru_get_caps, }; static void From 7e1c1f0ae086090ee1fe0b17a9556e1b7ad13f96 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Sat, 9 Jan 2021 16:34:42 +0300 Subject: [PATCH 2/4] [RFC] bdev: Add extended versions of readv/writev_with_md New functions accept extedable structure of IO options Change-Id: If6864df151a3c0ad722785cb26d1f5d4309cd733 Signed-off-by: Alexey Marchuk --- CHANGELOG.md | 4 + include/spdk/bdev.h | 117 ++++++++++++++++++++ include/spdk/bdev_module.h | 8 ++ lib/bdev/bdev.c | 211 ++++++++++++++++++++++++------------- 4 files changed, 265 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 400e36b3ab5..9a475457dcf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ the small and large buffer pool size of the whole bdev module. New API `spdk_bdev_get_caps` has been added, it allows to get extended bdev module capabilities. +New API functions `spdk_bdev_readv_blocks_with_md_ext` and `spdk_bdev_writev_blocks_with_md_ext` +have been added. These function accept `spdk_bdev_ext_io_opts` structure with extended IO request +options. `opts_size` member of this structure must be set to valid value. + ### blob An `opts_size` element was added in the `spdk_bs_opts` structure to solve the diff --git a/include/spdk/bdev.h b/include/spdk/bdev.h index d48d6e209cf..c415ceadb3c 100644 --- a/include/spdk/bdev.h +++ b/include/spdk/bdev.h @@ -891,6 +891,88 @@ int spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_c uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Callback used to get a Memory Key per IO request + * + * pd is input parameter and should point to a memory domain + * mkey is an output value + */ +typedef int (*spdk_bdev_io_get_mkey)(void *cb_arg, void *address, size_t length, void *pd, + uint32_t *mkey); + +enum spdk_bdev_ext_io_opts_mem_types { + /** Memory in IO request belongs to another memory domain and it is described by Memory Key. + * If this value is set then \b mkey structure in spdk_bdev_ext_io_opts_mem_type contains a callback + * and its argument that can be used to get a Memory Key */ + SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY = 0, +}; + +struct spdk_bdev_ext_io_opts_mem_type { + /** This value determines which part of union should be used. Provides extensibility of this structure */ + enum spdk_bdev_ext_io_opts_mem_types type; + union { + struct { + spdk_bdev_io_get_mkey get_mkey_cb; + void *get_mkey_cb_arg; + } mkey; + } u; +}; + +enum spdk_bdev_ext_io_opts_flags { + /** This flag determines the type of memory passed in IO request. + * Refer to \ref spdk_bdev_ext_io_opts_mem_types for more information. + * If this flag is set in spdk_bdev_ext_io_opts then \b mem_type member of \b spdk_bdev_ext_io_opt + * should point to a structure that describes memory buffer */ + SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE = 1u << 0, +}; + +/** + * Structure with optional IO request parameters + */ +struct spdk_bdev_ext_io_opts { + /** Combination of bits defined in \b enum spdk_bdev_ext_io_opts_flags */ + uint64_t flags; + /** Describes type of the memory used in IO request. Applicable for block devices that report + * SPDK_BDEV_CAP_EXT_MEMORY_TYPE_MKEY capability in \ref spdk_bdev_get_caps function + * This structure must be filled by the user if \b SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE bit is set + * in \b flags member */ + struct spdk_bdev_ext_io_opts_mem_type *mem_type; +}; + +/** + * Submit a read request to the bdev on the given channel. This differs from + * spdk_bdev_read by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer into the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be read into. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to read. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * \param opts Optional structure with extended IO request options. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported or opts_size is incorrect + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + */ +int spdk_bdev_readv_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts); + /** * Submit a write request to the bdev on the given channel. * @@ -1060,6 +1142,41 @@ int spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_ uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +/** + * Submit a write request to the bdev on the given channel. This differs from + * spdk_bdev_write by allowing the data buffer to be described in a scatter + * gather list. Some physical devices place memory alignment requirements on + * data or metadata and may not be able to directly transfer out of the buffers + * provided. In this case, the request may fail. This function uses separate + * buffer for metadata transfer (valid only if bdev supports this mode). + * + * \ingroup bdev_io_submit_functions + * + * \param desc Block device descriptor. + * \param ch I/O channel. Obtained by calling spdk_bdev_get_io_channel(). + * \param iov A scatter gather list of buffers to be written from. + * \param iovcnt The number of elements in iov. + * \param md Metadata buffer. + * \param offset_blocks The offset, in blocks, from the start of the block device. + * \param num_blocks The number of blocks to write. + * \param cb Called when the request is complete. + * \param cb_arg Argument passed to cb. + * \param opts Optional structure with extended IO request options. + * + * \return 0 on success. On success, the callback will always + * be called (even if the request ultimately failed). Return + * negated errno on failure, in which case the callback will not be called. + * * -EINVAL - offset_blocks and/or num_blocks are out of range or separate + * metadata is not supported or opts_size is incorrect + * * -ENOMEM - spdk_bdev_io buffer cannot be allocated + * * -EBADF - desc not open for writing + */ +int spdk_bdev_writev_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts); + /** * Submit a compare request to the bdev on the given channel. * diff --git a/include/spdk/bdev_module.h b/include/spdk/bdev_module.h index 11f2bf9c8be..38e45181dd9 100644 --- a/include/spdk/bdev_module.h +++ b/include/spdk/bdev_module.h @@ -688,6 +688,14 @@ struct spdk_bdev_io { /** Enables queuing parent I/O when no bdev_ios available for split children. */ struct spdk_bdev_io_wait_entry waitq_entry; + + /** Copy of structure passed by the user in ext API */ + struct spdk_bdev_ext_io_opts ext_opts; + + /** Contains callbacks passed by the user in ext API. Content of this structure is valid if + * SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE flag is set in \b ext_opts */ + struct spdk_bdev_ext_io_opts_mem_type mem_type; + } internal; /** diff --git a/lib/bdev/bdev.c b/lib/bdev/bdev.c index 4deb351138e..3a2ddb16ee0 100644 --- a/lib/bdev/bdev.c +++ b/lib/bdev/bdev.c @@ -348,14 +348,18 @@ static void bdev_enable_qos_msg(struct spdk_io_channel_iter *i); static void bdev_enable_qos_done(struct spdk_io_channel_iter *i, int status); static int -bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, - uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg); +bdev_rw_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + enum spdk_bdev_io_type io_type); + static int -bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, +bdev_rw_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks, - spdk_bdev_io_completion_cb cb, void *cb_arg); + spdk_bdev_io_completion_cb cb, void *cb_arg, + enum spdk_bdev_io_type io_type, struct spdk_bdev_ext_io_opts *opts); static int bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch, @@ -2114,19 +2118,13 @@ _bdev_io_split(void *_bdev_io) bdev_io->u.bdev.split_outstanding++; - if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) { - rc = bdev_readv_blocks_with_md(bdev_io->internal.desc, - spdk_io_channel_from_ctx(bdev_io->internal.ch), - iov, iovcnt, md_buf, current_offset, - to_next_boundary, - bdev_io_split_done, bdev_io); - } else { - rc = bdev_writev_blocks_with_md(bdev_io->internal.desc, - spdk_io_channel_from_ctx(bdev_io->internal.ch), - iov, iovcnt, md_buf, current_offset, - to_next_boundary, - bdev_io_split_done, bdev_io); - } + rc = bdev_rw_blocks_with_md_ext(bdev_io->internal.desc, + spdk_io_channel_from_ctx(bdev_io->internal.ch), + iov, iovcnt, md_buf, current_offset, + to_next_boundary, + bdev_io_split_done, bdev_io, + (enum spdk_bdev_io_type)bdev_io->type, + &bdev_io->internal.ext_opts); if (rc == 0) { current_offset += to_next_boundary; @@ -2396,6 +2394,7 @@ bdev_io_init(struct spdk_bdev_io *bdev_io, bdev_io->num_retries = 0; bdev_io->internal.get_buf_cb = NULL; bdev_io->internal.get_aux_buf_cb = NULL; + bdev_io->internal.ext_opts.flags = 0; } static bool @@ -3562,15 +3561,39 @@ spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg); } +static inline void +bdev_fill_rw_io(struct spdk_bdev_desc *desc, struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_io *bdev_io, struct spdk_bdev_channel *channel, + struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type) +{ + bdev_io->internal.ch = channel; + bdev_io->internal.desc = desc; + bdev_io->type = io_type; + bdev_io->u.bdev.iovs = iov; + bdev_io->u.bdev.iovcnt = iovcnt; + bdev_io->u.bdev.md_buf = md_buf; + bdev_io->u.bdev.num_blocks = num_blocks; + bdev_io->u.bdev.offset_blocks = offset_blocks; + bdev_io_init(bdev_io, bdev, cb_arg, cb); +} + static int -bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks, - uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg) +bdev_rw_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + enum spdk_bdev_io_type io_type) { struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); struct spdk_bdev_io *bdev_io; struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + if (io_type == SPDK_BDEV_IO_TYPE_WRITE && !desc->write) { + return -EBADF; + } + if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { return -EINVAL; } @@ -3580,17 +3603,54 @@ bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *c return -ENOMEM; } - bdev_io->internal.ch = channel; - bdev_io->internal.desc = desc; - bdev_io->type = SPDK_BDEV_IO_TYPE_READ; - bdev_io->u.bdev.iovs = iov; - bdev_io->u.bdev.iovcnt = iovcnt; - bdev_io->u.bdev.md_buf = md_buf; - bdev_io->u.bdev.num_blocks = num_blocks; - bdev_io->u.bdev.offset_blocks = offset_blocks; - bdev_io_init(bdev_io, bdev, cb_arg, cb); + bdev_fill_rw_io(desc, iov, iovcnt, md_buf, offset_blocks, num_blocks, cb, cb_arg, bdev_io, channel, + bdev, io_type); + + bdev_io_submit(bdev_io); + + return 0; +} + +static int +bdev_rw_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md_buf, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + enum spdk_bdev_io_type io_type, + struct spdk_bdev_ext_io_opts *opts) +{ + struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); + struct spdk_bdev_io *bdev_io; + struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); + + if (io_type == SPDK_BDEV_IO_TYPE_WRITE && !desc->write) { + return -EBADF; + } + + if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { + return -EINVAL; + } + + bdev_io = bdev_channel_get_io(channel); + if (!bdev_io) { + return -ENOMEM; + } + + bdev_fill_rw_io(desc, iov, iovcnt, md_buf, offset_blocks, num_blocks, cb, cb_arg, bdev_io, channel, + bdev, io_type); + + if (opts) { + bdev_io->internal.ext_opts.flags = opts->flags; + bdev_io->internal.ext_opts.mem_type = &bdev_io->internal.mem_type; + + if (bdev_io->internal.ext_opts.flags & SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE) { + assert(opts->mem_type); + bdev_io->internal.mem_type = *opts->mem_type; + } + } bdev_io_submit(bdev_io); + return 0; } @@ -3599,8 +3659,8 @@ int spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel * uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg) { - return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, - num_blocks, cb, cb_arg); + return bdev_rw_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, + num_blocks, cb, cb_arg, SPDK_BDEV_IO_TYPE_READ); } int @@ -3617,8 +3677,27 @@ spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann return -EINVAL; } - return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, - num_blocks, cb, cb_arg); + return bdev_rw_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, + num_blocks, cb, cb_arg, SPDK_BDEV_IO_TYPE_READ); +} + +int +spdk_bdev_readv_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts) +{ + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } + + if (!_bdev_io_check_md_buf(iov, md)) { + return -EINVAL; + } + + return bdev_rw_blocks_with_md_ext(desc, ch, iov, iovcnt, md, offset_blocks, + num_blocks, cb, cb_arg, SPDK_BDEV_IO_TYPE_READ, opts); } static int @@ -3704,43 +3783,6 @@ spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chann cb, cb_arg); } -static int -bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, - struct iovec *iov, int iovcnt, void *md_buf, - uint64_t offset_blocks, uint64_t num_blocks, - spdk_bdev_io_completion_cb cb, void *cb_arg) -{ - struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc); - struct spdk_bdev_io *bdev_io; - struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch); - - if (!desc->write) { - return -EBADF; - } - - if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) { - return -EINVAL; - } - - bdev_io = bdev_channel_get_io(channel); - if (!bdev_io) { - return -ENOMEM; - } - - bdev_io->internal.ch = channel; - bdev_io->internal.desc = desc; - bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE; - bdev_io->u.bdev.iovs = iov; - bdev_io->u.bdev.iovcnt = iovcnt; - bdev_io->u.bdev.md_buf = md_buf; - bdev_io->u.bdev.num_blocks = num_blocks; - bdev_io->u.bdev.offset_blocks = offset_blocks; - bdev_io_init(bdev_io, bdev, cb_arg, cb); - - bdev_io_submit(bdev_io); - return 0; -} - int spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, struct iovec *iov, int iovcnt, @@ -3763,8 +3805,8 @@ spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, uint64_t offset_blocks, uint64_t num_blocks, spdk_bdev_io_completion_cb cb, void *cb_arg) { - return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, - num_blocks, cb, cb_arg); + return bdev_rw_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, + num_blocks, cb, cb_arg, SPDK_BDEV_IO_TYPE_WRITE); } int @@ -3781,8 +3823,27 @@ spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_chan return -EINVAL; } - return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks, - num_blocks, cb, cb_arg); + return bdev_rw_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks, + num_blocks, cb, cb_arg, SPDK_BDEV_IO_TYPE_WRITE); +} + +int +spdk_bdev_writev_blocks_with_md_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, + struct iovec *iov, int iovcnt, void *md, + uint64_t offset_blocks, uint64_t num_blocks, + spdk_bdev_io_completion_cb cb, void *cb_arg, + struct spdk_bdev_ext_io_opts *opts) +{ + if (!spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) { + return -EINVAL; + } + + if (!_bdev_io_check_md_buf(iov, md)) { + return -EINVAL; + } + + return bdev_rw_blocks_with_md_ext(desc, ch, iov, iovcnt, md, offset_blocks, + num_blocks, cb, cb_arg, SPDK_BDEV_IO_TYPE_WRITE, opts); } static void From 18b60530d8a44f036377955140eecff3cd326ffa Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Sat, 9 Jan 2021 17:06:31 +0300 Subject: [PATCH 3/4] [RFC] nvme: Add function spdk_nvme_ns_cmd_readv/writev_with_md_ext These functions accept extendable structure with IO request options. The options structure contains a callback to get a memory key per Io request. This callback is used in RDMA transport. Change-Id: I65bfba279904e77539348520c3dfac7aadbe80d9 Signed-off-by: Alexey Marchuk --- include/spdk/nvme.h | 121 +++++++++++++++++++++++++++++++++++++++ lib/nvme/nvme_internal.h | 9 +++ lib/nvme/nvme_ns_cmd.c | 94 ++++++++++++++++++++++++++++++ lib/nvme/nvme_rdma.c | 38 ++++++++---- 4 files changed, 252 insertions(+), 10 deletions(-) diff --git a/include/spdk/nvme.h b/include/spdk/nvme.h index c0654a6bb3e..001cd58480d 100644 --- a/include/spdk/nvme.h +++ b/include/spdk/nvme.h @@ -2545,6 +2545,90 @@ int spdk_nvme_ns_cmd_writev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qp spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, uint16_t apptag_mask, uint16_t apptag); +/** + * Callback used to get a Memory Key per IO request + * + * pd is input parameter and should point to a memory domain + * mkey is an output value + */ +typedef int (*spdk_nvme_ns_cmd_io_get_mkey)(void *cb_arg, void *address, size_t length, void *pd, + uint32_t *mkey); + +enum spdk_nvme_ns_cmd_ext_io_opts_mem_types { + /** Memory in IO request belongs to another memory domain and it is described by Memory Key. + * If this value is set then \b mkey structure in spdk_nvme_ns_cmd_ext_io_opts_mem_type contains + * a callback and its argument that can be used to get a Memory Key */ + SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY = 0, +}; + +struct spdk_nvme_ns_cmd_ext_io_opts_mem_type { + /** This value determines which part of union should be used. Provides extensibility for this structure */ + enum spdk_nvme_ns_cmd_ext_io_opts_mem_types type; + union { + struct { + spdk_nvme_ns_cmd_io_get_mkey get_mkey_cb; + } mkey; + } u; +}; + +enum spdk_nvme_ns_cmd_ext_io_opts_flags { + /** This flag determines the type of memory passed in IO request. + * Refer to \ref spdk_nvme_ns_cmd_ext_io_opts_mem_types for more information. + * If this flag is set in spdk_nvme_ns_cmd_ext_io_opts then \b mem_type member of + * \b spdk_nvme_ns_cmd_ext_io_opts should point to a structure that describes memory buffer */ + SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE = 1u << 0, +}; + +/** + * Structure with optional IO request parameters + */ +struct spdk_nvme_ns_cmd_ext_io_opts { + /** Combination of bits defined in \b enum spdk_nvme_ns_cmd_ext_io_opts_flags */ + uint64_t flags; + /** Describes type of the memory used in IO request + * This structure must be filled by the user if \b SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE bit is set + * in \b flags member. Used by RDMA transport, other transports ignore this extension */ + struct spdk_nvme_ns_cmd_ext_io_opts_mem_type *mem_type; +}; + +/** + * Submit a write I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any + * given time. + * + * \param ns NVMe namespace to submit the write I/O + * \param qpair I/O queue pair to submit the request + * \param lba starting LBA to write the data + * \param lba_count length (in sectors) for the write operation + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined in nvme_spec.h, for this I/O + * \param reset_sgl_fn callback function to reset scattered payload + * \param next_sge_fn callback function to iterate each scattered + * payload memory segment + * \param metadata virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size() + * \param apptag_mask application tag mask. + * \param apptag application tag to use end-to-end protection information. + * \param opts Optional structure with extended IO request options. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_writev_with_md_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag, + struct spdk_nvme_ns_cmd_ext_io_opts *opts); + /** * Submit a write I/O to the specified NVMe namespace. * @@ -2725,6 +2809,43 @@ int spdk_nvme_ns_cmd_readv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpa spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, uint16_t apptag_mask, uint16_t apptag); +/** + * Submit a read I/O to the specified NVMe namespace. + * + * The command is submitted to a qpair allocated by spdk_nvme_ctrlr_alloc_io_qpair(). + * The user must ensure that only one thread submits I/O on a given qpair at any given time. + * + * \param ns NVMe namespace to submit the read I/O + * \param qpair I/O queue pair to submit the request + * \param lba starting LBA to read the data + * \param lba_count length (in sectors) for the read operation + * \param cb_fn callback function to invoke when the I/O is completed + * \param cb_arg argument to pass to the callback function + * \param io_flags set flags, defined in nvme_spec.h, for this I/O + * \param reset_sgl_fn callback function to reset scattered payload + * \param next_sge_fn callback function to iterate each scattered + * payload memory segment + * \param metadata virtual address pointer to the metadata payload, the length + * of metadata is specified by spdk_nvme_ns_get_md_size() + * \param apptag_mask application tag mask. + * \param apptag application tag to use end-to-end protection information. + * \param opts Optional structure with extended IO request options. + * + * \return 0 if successfully submitted, negated errnos on the following error conditions: + * -EINVAL: The request is malformed. + * -ENOMEM: The request cannot be allocated. + * -ENXIO: The qpair is failed at the transport level. + * -EFAULT: Invalid address was specified as part of payload. cb_fn is also called + * with error status including dnr=1 in this case. + */ +int spdk_nvme_ns_cmd_readv_with_md_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag, + struct spdk_nvme_ns_cmd_ext_io_opts *opts); + /** * Submits a read I/O to the specified NVMe namespace. * diff --git a/lib/nvme/nvme_internal.h b/lib/nvme/nvme_internal.h index c57bf85e8ed..f17315fbfd0 100644 --- a/lib/nvme/nvme_internal.h +++ b/lib/nvme/nvme_internal.h @@ -214,12 +214,21 @@ struct nvme_payload { spdk_nvme_req_reset_sgl_cb reset_sgl_fn; spdk_nvme_req_next_sge_cb next_sge_fn; + /** + * Function to be used to get an mkey for scattered payload + */ + spdk_nvme_ns_cmd_io_get_mkey get_sge_mkey; + /** * If reset_sgl_fn == NULL, this is a contig payload, and contig_or_cb_arg contains the * virtual memory address of a single virtually contiguous buffer. * * If reset_sgl_fn != NULL, this is a SGL payload, and contig_or_cb_arg contains the * cb_arg that will be passed to the SGL callback functions. + * + * If get_sgl_mkey != NULL, this is a SGL payload, and contig_or_cb_arg contains the + * cb_arg that will be passed to the get_sge_mkey callback function. Moreover data returned + * by next_sge_fn and get_sgl_mkey belongs to another memory domain and can not be accessed by SPDK */ void *contig_or_cb_arg; diff --git a/lib/nvme/nvme_ns_cmd.c b/lib/nvme/nvme_ns_cmd.c index 908c16310f2..00215f05511 100644 --- a/lib/nvme/nvme_ns_cmd.c +++ b/lib/nvme/nvme_ns_cmd.c @@ -31,6 +31,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include #include "nvme_internal.h" static inline struct nvme_request *_nvme_ns_cmd_rw(struct spdk_nvme_ns *ns, @@ -709,6 +710,52 @@ spdk_nvme_ns_cmd_readv_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair * } } +int +spdk_nvme_ns_cmd_readv_with_md_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag, + struct spdk_nvme_ns_cmd_ext_io_opts *opts) +{ + struct nvme_request *req; + struct nvme_payload payload; + + if (!_is_io_flags_valid(io_flags)) { + return -EINVAL; + } + + if (reset_sgl_fn == NULL || next_sge_fn == NULL) { + return -EINVAL; + } + + payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, metadata); + + if (opts) { + if (opts->flags & SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE) { + if (opts->mem_type->type != SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY) { + SPDK_ERRLOG("Unknown memory type %d\n", opts->mem_type->type); + return -EINVAL; + } + payload.get_sge_mkey = opts->mem_type->u.mkey.get_mkey_cb; + } + } + + req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_READ, + io_flags, apptag_mask, apptag, true); + if (req != NULL) { + return nvme_qpair_submit_request(qpair, req); + } else if (nvme_ns_check_request_length(lba_count, + ns->sectors_per_max_io, + ns->sectors_per_stripe, + qpair->ctrlr->opts.io_queue_requests)) { + return -EINVAL; + } else { + return -ENOMEM; + } +} + int spdk_nvme_ns_cmd_write(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, void *buffer, uint64_t lba, @@ -836,6 +883,53 @@ spdk_nvme_ns_cmd_writev_with_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair } } +int +spdk_nvme_ns_cmd_writev_with_md_ext(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, + uint64_t lba, uint32_t lba_count, + spdk_nvme_cmd_cb cb_fn, void *cb_arg, uint32_t io_flags, + spdk_nvme_req_reset_sgl_cb reset_sgl_fn, + spdk_nvme_req_next_sge_cb next_sge_fn, void *metadata, + uint16_t apptag_mask, uint16_t apptag, + struct spdk_nvme_ns_cmd_ext_io_opts *opts) +{ + struct nvme_request *req; + struct nvme_payload payload; + + if (!_is_io_flags_valid(io_flags)) { + return -EINVAL; + } + + if (reset_sgl_fn == NULL || next_sge_fn == NULL) { + return -EINVAL; + } + + payload = NVME_PAYLOAD_SGL(reset_sgl_fn, next_sge_fn, cb_arg, metadata); + + if (opts) { + if (opts->flags & SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE) { + if (opts->mem_type->type != SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY) { + SPDK_ERRLOG("Unknown memory type %d\n", opts->mem_type->type); + return -EINVAL; + } + payload.get_sge_mkey = opts->mem_type->u.mkey.get_mkey_cb; + } + } + + req = _nvme_ns_cmd_rw(ns, qpair, &payload, 0, 0, lba, lba_count, cb_fn, cb_arg, SPDK_NVME_OPC_WRITE, + io_flags, apptag_mask, apptag, true); + if (req != NULL) { + return nvme_qpair_submit_request(qpair, req); + } else if (nvme_ns_check_request_length(lba_count, + ns->sectors_per_max_io, + ns->sectors_per_stripe, + qpair->ctrlr->opts.io_queue_requests)) { + return -EINVAL; + } else { + return -ENOMEM; + } + +} + int spdk_nvme_ns_cmd_write_zeroes(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, uint64_t lba, uint32_t lba_count, diff --git a/lib/nvme/nvme_rdma.c b/lib/nvme/nvme_rdma.c index 24000ec1338..40a37b221dd 100644 --- a/lib/nvme/nvme_rdma.c +++ b/lib/nvme/nvme_rdma.c @@ -1564,7 +1564,7 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair, uint32_t remaining_size; uint32_t sge_length; int rc, max_num_sgl, num_sgl_desc; - uint32_t rkey = 0; + uint32_t mkey = 0; assert(req->payload_size != 0); assert(nvme_payload_type(&req->payload) == NVME_PAYLOAD_TYPE_SGL); @@ -1590,12 +1590,21 @@ nvme_rdma_build_sgl_request(struct nvme_rdma_qpair *rqpair, return -1; } - if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, virt_addr, sge_length, - NVME_RDMA_MR_RKEY, &rkey))) { - return -1; + if (req->payload.get_sge_mkey) { + rc = req->payload.get_sge_mkey(req->payload.contig_or_cb_arg, virt_addr, sge_length, + rqpair->rdma_qp->qp->pd, &mkey); + if (spdk_unlikely(rc)) { + SPDK_ERRLOG("Memory translation failed, rc %d\n", rc); + return -1; + } + } else { + if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, virt_addr, sge_length, + NVME_RDMA_MR_RKEY, &mkey))) { + return -1; + } } - cmd->sgl[num_sgl_desc].keyed.key = rkey; + cmd->sgl[num_sgl_desc].keyed.key = mkey; cmd->sgl[num_sgl_desc].keyed.type = SPDK_NVME_SGL_TYPE_KEYED_DATA_BLOCK; cmd->sgl[num_sgl_desc].keyed.subtype = SPDK_NVME_SGL_SUBTYPE_ADDRESS; cmd->sgl[num_sgl_desc].keyed.length = sge_length; @@ -1664,7 +1673,7 @@ nvme_rdma_build_sgl_inline_request(struct nvme_rdma_qpair *rqpair, struct spdk_nvme_rdma_req *rdma_req) { struct nvme_request *req = rdma_req->req; - uint32_t lkey = 0; + uint32_t mkey = 0; uint32_t length; void *virt_addr; int rc; @@ -1689,14 +1698,23 @@ nvme_rdma_build_sgl_inline_request(struct nvme_rdma_qpair *rqpair, length = req->payload_size; } - if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, virt_addr, length, - NVME_RDMA_MR_LKEY, &lkey))) { - return -1; + if (req->payload.get_sge_mkey) { + rc = req->payload.get_sge_mkey(req->payload.contig_or_cb_arg, virt_addr, length, + rqpair->rdma_qp->qp->pd, &mkey); + if (spdk_unlikely(rc)) { + SPDK_ERRLOG("Memory translation failed, rc %d\n", rc); + return -1; + } + } else { + if (spdk_unlikely(!nvme_rdma_get_key(rqpair->mr_map->map, virt_addr, length, + NVME_RDMA_MR_LKEY, &mkey))) { + return -1; + } } rdma_req->send_sgl[1].addr = (uint64_t)virt_addr; rdma_req->send_sgl[1].length = length; - rdma_req->send_sgl[1].lkey = lkey; + rdma_req->send_sgl[1].lkey = mkey; rdma_req->send_wr.num_sge = 2; From 130167c71b25698f400bb6441776daa18403a3d9 Mon Sep 17 00:00:00 2001 From: Alexey Marchuk Date: Sat, 9 Jan 2021 18:10:18 +0300 Subject: [PATCH 4/4] [RFC] bdev_nvme: Use new extended API The new API is used if flags is not zero. Change-Id: I414b5d19bff54114d6708efed89ba19b5955f56a Signed-off-by: Alexey Marchuk --- module/bdev/nvme/bdev_nvme.c | 71 +++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 2 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 293157498c3..965b1fc3f13 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -2539,12 +2539,42 @@ bdev_nvme_no_pi_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, return rc; } +static inline enum spdk_nvme_ns_cmd_ext_io_opts_mem_types +bdev_nvme_map_io_mkey_type(enum spdk_bdev_ext_io_opts_mem_types mem_type) { + + switch (mem_type) + { + case SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY: + return SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY; + default: + SPDK_ERRLOG("Unknown get_mkey ctx type %d\n", mem_type); + assert(0); + } +} + +static int bdev_nvme_ns_cmd_io_get_mkey(void *cb_arg, void *address, size_t length, void *pd, + uint32_t *mkey) +{ + struct nvme_bdev_io *bio = cb_arg; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); + int rc; + + assert(bdev_io->internal.ext_opts.mem_type->type == SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY && + bdev_io->internal.ext_opts.mem_type->u.mkey.get_mkey_cb != NULL); + + rc = bdev_io->internal.ext_opts.mem_type->u.mkey.get_mkey_cb( + bdev_io->internal.ext_opts.mem_type->u.mkey.get_mkey_cb_arg, address, length, pd, mkey); + + return rc; +} + static int bdev_nvme_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt, void *md, uint64_t lba_count, uint64_t lba, uint32_t flags) { int rc; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); SPDK_DEBUGLOG(bdev_nvme, "read %" PRIu64 " blocks with offset %#" PRIx64 "\n", lba_count, lba); @@ -2554,7 +2584,25 @@ bdev_nvme_readv(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, bio->iovpos = 0; bio->iov_offset = 0; - if (iovcnt == 1) { + if (bdev_io->internal.ext_opts.flags) { + struct spdk_nvme_ns_cmd_ext_io_opts opts; + struct spdk_nvme_ns_cmd_ext_io_opts_mem_type mem_type; + + opts.flags = bdev_io->internal.ext_opts.flags; + + if (bdev_io->internal.ext_opts.flags & SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE) { + opts.mem_type = &mem_type; + mem_type.type = bdev_nvme_map_io_mkey_type(bdev_io->internal.mem_type.type); + if (mem_type.type == SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY) { + mem_type.u.mkey.get_mkey_cb = bdev_nvme_ns_cmd_io_get_mkey; + } + } + + rc = spdk_nvme_ns_cmd_readv_with_md_ext(ns, qpair, lba, lba_count, + bdev_nvme_readv_done, bio, flags, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, + md, 0, 0, &opts); + } else if (iovcnt == 1) { rc = spdk_nvme_ns_cmd_read_with_md(ns, qpair, iov[0].iov_base, md, lba, lba_count, bdev_nvme_readv_done, bio, @@ -2580,6 +2628,7 @@ bdev_nvme_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, uint32_t flags) { int rc; + struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); SPDK_DEBUGLOG(bdev_nvme, "write %" PRIu64 " blocks with offset %#" PRIx64 "\n", lba_count, lba); @@ -2589,7 +2638,25 @@ bdev_nvme_writev(struct spdk_nvme_ns *ns, struct spdk_nvme_qpair *qpair, bio->iovpos = 0; bio->iov_offset = 0; - if (iovcnt == 1) { + if (bdev_io->internal.ext_opts.flags) { + struct spdk_nvme_ns_cmd_ext_io_opts opts; + struct spdk_nvme_ns_cmd_ext_io_opts_mem_type mem_type; + + opts.flags = bdev_io->internal.ext_opts.flags; + + if (bdev_io->internal.ext_opts.flags & SPDK_BDEV_EXT_IO_OPTS_MEM_TYPE) { + opts.mem_type = &mem_type; + mem_type.type = bdev_nvme_map_io_mkey_type(bdev_io->internal.mem_type.type); + if (mem_type.type == SPDK_NVME_NS_CMD_EXT_IO_OPTS_MEM_TYPE_MEMORY_KEY) { + mem_type.u.mkey.get_mkey_cb = bdev_nvme_ns_cmd_io_get_mkey; + } + } + + rc = spdk_nvme_ns_cmd_writev_with_md_ext(ns, qpair, lba, lba_count, + bdev_nvme_readv_done, bio, flags, + bdev_nvme_queued_reset_sgl, bdev_nvme_queued_next_sge, + md, 0, 0, &opts); + } else if (iovcnt == 1) { rc = spdk_nvme_ns_cmd_write_with_md(ns, qpair, iov[0].iov_base, md, lba, lba_count, bdev_nvme_readv_done, bio,