From 530c3a2e50ddb406ffa0c0f61ad4f5ad59fd52ba Mon Sep 17 00:00:00 2001 From: Christopher Clark Date: Sun, 6 Jan 2019 21:06:05 -0800 Subject: [PATCH] argo: implement the notify op Queries for data about space availability in registered rings and causes notification to be sent when space has become available. The hypercall op populates a supplied data structure with information about ring state, and if insufficient space is currently available in a given ring, the hypervisor will record the domain's expressed interest and notify it when it observes that space has become available. Checks for free space occur when this notify op is invoked, so it may be intentionally invoked with no data structure to populate (ie. a NULL argument) to trigger such a check and consequent notifications. Limit the maximum number of notify requests in a single operation to a simple fixed limit of 256. Signed-off-by: Christopher Clark v3 #07 Jan: fix format string indention in printks v3 (general) Jan: drop fixed width types for ringbuf_payload_space v3 #07 Jan: rename ring_find_info_by_match to find_ring_info_by_match v3 #07 Jan: fix numeric entries in printk format strings v3: ringbuf_payload_space: simpler return 0 if get_sanitized_ring fails v3 #10 Roger: simplify ringbuf_payload_space for empty rings v3 #10 Roger: ringbuf_payload_space: add comment to explain how ret < INT32_MAX v3 #10 Roger: drop out label, use return -EFAULT in fill_ring_data v3 #10 Roger: add newline in signal_domid v3 #10 Roger: move find functions to top of file and drop prototypes v3 #04 Jan: meld the compat hypercall arg checking v3 #04 Roger/Jan: make lock names clearer and assert their state v3 #04 Jan: port -> aport with type; distinguish argo port from evtchn v3 self: drop braces in foreach of notify_check_pending v3 feedback Roger/Jan: ASSERT currd is current->domain or use 'd' variable name v2 feedback Jan: drop cookie, implement teardown v2 notify: add flag to indicate ring is shared v2 argument name for fill_ring_data arg is now currd v2 self: check ring size vs request and flag error rather than queue signal v2 feedback Jan: drop 'message' from 'argo_message_op' v2 self: simplify signal_domid, drop unnecessary label + goto v2 self: skip the cookie check in pending_cancel v2 self: implement npending limit on number of pending entries v1 feedback #16 Jan: sanitize_ring in ringbuf_payload_space v2 self: inline fill_ring_data_array v2 self: avoid retesting dst_d for put_domain v2 self/Jan: remove use of magic verification field and tidy up v1 feedback #16 Jan: remove testing of magic in guest-supplied structure v2 self: s/argo_pending_ent/pending_ent/g v2 feedback v1#13 Roger: use OS-supplied roundup; drop from public header v1,2 feedback Jan/Roger/Paul: drop errno returning guest access functions v1 feedback Roger, Jan: drop argo prefix on static functions v2 self: reduce indentation via goto out if arg NULL v1 feedback #13 Jan: resolve checking of array handle and use of __copy v1 #5 (#16) feedback Paul: notify op: use currd in do_argo_message_op v1 #5 (#16) feedback Paul: notify op: use currd in argo_notify v1 #5 (#16) feedback Paul: notify op: use currd in argo_notify_check_pending v1 #5 (#16) feedback Paul: notify op: use currd in argo_fill_ring_data_array v1 #13 (#16) feedback Paul: notify op: do/while: reindent only v1 #13 (#16) feedback Paul: notify op: do/while: goto v1 : add compat xlat.lst entries v1: add definition for copy_field_from_guest_errno v1 #13 feedback Jan: make 'ring data' comment comply with single-line style v1 feedback #13 Jan: use __copy; so define and use __copy_field_to_guest_errno v1: #13 feedback Jan: public namespace: prefix with xen v1: #13 feedback Jan: add blank line after case in do_argo_message_op v1: self: rename ent id to domain_id v1: self: ent id-> domain_id v1: self: drop signal if domain_cookie mismatches v1. feedback #15 Jan: make loop i unsigned v1. self: drop unnecessary mb() in argo_notify_check_pending v1. self: add blank line v1 #16 feedback Jan: const domain arg to +argo_fill_ring_data v1. feedback #15 Jan: check unusued hypercall args are zero v1 feedback #16 Jan: add comment on space available signal policy v1. feedback #16 Jan: move declr, drop braces, lower indent v1. feedback #18 Jan: meld the resource limits into the main commit v1. feedback #16 Jan: clarify use of magic field v1. self: use single copy to read notify ring data struct v1: argo_fill_ring_data: fix dprintk types for port field v1: self: use %x for printing port as per other print sites v1. feedback Jan: add comments explaining ring full vs empty v1. following Jan: fix argo_ringbuf_payload_space calculation for empty ring --- xen/common/argo.c | 352 ++++++++++++++++++++++++++++++++++++++ xen/common/compat/argo.c | 18 ++ xen/include/public/argo.h | 67 ++++++++ xen/include/xlat.lst | 2 + 4 files changed, 439 insertions(+) diff --git a/xen/common/argo.c b/xen/common/argo.c index 1537fcc89846..13f7fd541aec 100644 --- a/xen/common/argo.c +++ b/xen/common/argo.c @@ -30,6 +30,7 @@ #include #define MAX_RINGS_PER_DOMAIN 128U +#define MAX_NOTIFY_COUNT 256U #define MAX_PENDING_PER_RING 32U /* All messages on the ring are padded to a multiple of the slot size. */ @@ -49,6 +50,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t); +DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t); +DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t); @@ -411,6 +414,18 @@ signal_domain(struct domain *d) send_guest_global_virq(d, VIRQ_ARGO_MESSAGE); } +static void +signal_domid(domid_t domain_id) +{ + struct domain *d = get_domain_by_id(domain_id); + + if ( !d ) + return; + + signal_domain(d); + put_domain(d); +} + static void ring_unmap(const struct domain *d, struct argo_ring_info *ring_info) { @@ -599,6 +614,66 @@ get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring, return 0; } +static unsigned int +ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info) +{ + xen_argo_ring_t ring; + unsigned int len; + int ret; + + ASSERT(LOCKING_L3(d, ring_info)); + + len = ring_info->len; + if ( !len ) + return 0; + + if ( get_sanitized_ring(d, &ring, ring_info) ) + return 0; + + argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n", + ring.tx_ptr, ring.rx_ptr); + + /* + * rx_ptr == tx_ptr means that the ring has been emptied. + * See message size checking logic in the entry to ringbuf_insert which + * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1) + * left available, preventing a ring from being entirely filled. + * This ensures that matching ring indexes always indicate an empty ring + * and never a full one. + */ + ret = ring.rx_ptr - ring.tx_ptr; + if ( ret <= 0 ) + ret += len; + + /* + * In a sanitized ring, we can rely on: + * (rx_ptr < ring_info->len) && + * (tx_ptr < ring_info->len) && + * (ring_info->len <= XEN_ARGO_MAX_RING_SIZE) + * + * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX + * therefore right here: ret < INT32_MAX + * and we are safe to return it as a unsigned value from this function. + * The subtractions below cannot increase its value. + */ + + /* + * The maximum size payload for a message that will be accepted is: + * (the available space between the ring indexes) + * minus (space for a message header) + * minus (space for one message slot) + * since ringbuf_insert requires that one message slot be left + * unfilled, to avoid filling the ring to capacity and confusing a full + * ring with an empty one. + * Since the ring indexes are sanitized, the value in ret is aligned, so + * the simple subtraction here works to return the aligned value needed: + */ + ret -= sizeof(struct xen_argo_ring_message_header); + ret -= ROUNDUP_MESSAGE(1); + + return (ret < 0) ? 0 : ret; +} + /* * iov_count returns its count on success via an out variable to avoid * potential for a negative return value to be used incorrectly @@ -933,6 +1008,61 @@ pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info) ring_info->npending = 0; } +static void +pending_notify(struct hlist_head *to_notify) +{ + struct hlist_node *node, *next; + struct pending_ent *ent; + + ASSERT(LOCKING_Read_L1); + + hlist_for_each_entry_safe(ent, node, next, to_notify, node) + { + hlist_del(&ent->node); + signal_domid(ent->domain_id); + xfree(ent); + } +} + +static void +pending_find(const struct domain *d, struct argo_ring_info *ring_info, + unsigned int payload_space, struct hlist_head *to_notify) +{ + struct hlist_node *node, *next; + struct pending_ent *ent; + + ASSERT(LOCKING_Read_rings_L2(d)); + + /* + * TODO: Current policy here is to signal _all_ of the waiting domains + * interested in sending a message of size less than payload_space. + * + * This is likely to be suboptimal, since once one of them has added + * their message to the ring, there may well be insufficient room + * available for any of the others to transmit, meaning that they were + * woken in vain, which created extra work just to requeue their wait. + * + * Retain this simple policy for now since it at least avoids starving a + * domain of available space notifications because of a policy that only + * notified other domains instead. Improvement may be possible; + * investigation required. + */ + + spin_lock(&ring_info->L3_lock); + hlist_for_each_entry_safe(ent, node, next, &ring_info->pending, node) + { + if ( payload_space >= ent->len ) + { + if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY ) + wildcard_pending_list_remove(ent->domain_id, ent); + hlist_del(&ent->node); + ring_info->npending--; + hlist_add_head(&ent->node, to_notify); + } + } + spin_unlock(&ring_info->L3_lock); +} + static int pending_queue(const struct domain *d, struct argo_ring_info *ring_info, domid_t src_id, unsigned int len) @@ -993,6 +1123,28 @@ pending_requeue(const struct domain *d, struct argo_ring_info *ring_info, return pending_queue(d, ring_info, src_id, len); } +static void +pending_cancel(const struct domain *d, struct argo_ring_info *ring_info, + domid_t src_id) +{ + struct hlist_node *node, *next; + struct pending_ent *ent; + + ASSERT(LOCKING_L3(d, ring_info)); + + hlist_for_each_entry_safe(ent, node, next, &ring_info->pending, node) + { + if ( ent->domain_id == src_id ) + { + if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY ) + wildcard_pending_list_remove(ent->domain_id, ent); + hlist_del(&ent->node); + xfree(ent); + ring_info->npending--; + } + } +} + static void wildcard_rings_pending_remove(struct domain *d) { @@ -1120,6 +1272,88 @@ partner_rings_remove(struct domain *src_d) * FIXME for 4.12: investigate using check_get_page_from_gfn() * and rewrite this function using it or with adopted logic */ +static int +fill_ring_data(const struct domain *currd, + XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd) +{ + xen_argo_ring_data_ent_t ent; + struct domain *dst_d; + struct argo_ring_info *ring_info; + + ASSERT(currd == current->domain); + ASSERT(LOCKING_Read_L1); + + if ( __copy_from_guest(&ent, data_ent_hnd, 1) ) + return -EFAULT; + + argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n", + ent.ring.domain_id, ent.ring.aport); + + ent.flags = 0; + + dst_d = get_domain_by_id(ent.ring.domain_id); + if ( dst_d ) + { + if ( dst_d->argo ) + { + read_lock(&dst_d->argo->rings_L2_rwlock); + + ring_info = find_ring_info_by_match(dst_d, ent.ring.aport, + currd->domain_id); + if ( ring_info ) + { + unsigned int space_avail; + + ent.flags |= XEN_ARGO_RING_DATA_F_EXISTS; + ent.max_message_size = ring_info->len - + sizeof(struct xen_argo_ring_message_header) - + ROUNDUP_MESSAGE(1); + + if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY ) + ent.flags |= XEN_ARGO_RING_DATA_F_SHARED; + + spin_lock(&ring_info->L3_lock); + + space_avail = ringbuf_payload_space(dst_d, ring_info); + + argo_dprintk("fill_ring_data: aport=%x space_avail=%u" + " space_wanted=%u\n", + ring_info->id.aport, space_avail, + ent.space_required); + + /* Do not queue a notification for an unachievable size */ + if ( ent.space_required > ent.max_message_size ) + ent.flags |= XEN_ARGO_RING_DATA_F_EMSGSIZE; + else if ( space_avail >= ent.space_required ) + { + pending_cancel(dst_d, ring_info, currd->domain_id); + ent.flags |= XEN_ARGO_RING_DATA_F_SUFFICIENT; + } + else + { + pending_requeue(dst_d, ring_info, currd->domain_id, + ent.space_required); + ent.flags |= XEN_ARGO_RING_DATA_F_PENDING; + } + + spin_unlock(&ring_info->L3_lock); + + if ( space_avail == ent.max_message_size ) + ent.flags |= XEN_ARGO_RING_DATA_F_EMPTY; + + } + read_unlock(&dst_d->argo->rings_L2_rwlock); + } + put_domain(dst_d); + } + + if ( __copy_field_to_guest(data_ent_hnd, &ent, flags) || + __copy_field_to_guest(data_ent_hnd, &ent, max_message_size) ) + return -EFAULT; + + return 0; +} + static int find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn) { @@ -1553,6 +1787,109 @@ register_ring(struct domain *currd, return ret; } +static void +notify_ring(const struct domain *d, struct argo_ring_info *ring_info, + struct hlist_head *to_notify) +{ + unsigned int space; + + ASSERT(LOCKING_Read_rings_L2(d)); + + spin_lock(&ring_info->L3_lock); + + if ( ring_info->len ) + space = ringbuf_payload_space(d, ring_info); + else + space = 0; + + spin_unlock(&ring_info->L3_lock); + + if ( space ) + pending_find(d, ring_info, space, to_notify); +} + +static void +notify_check_pending(struct domain *d) +{ + unsigned int i; + HLIST_HEAD(to_notify); + + ASSERT(LOCKING_Read_L1); + + read_lock(&d->argo->rings_L2_rwlock); + + for ( i = 0; i < ARGO_HTABLE_SIZE; i++ ) + { + struct hlist_node *node, *next; + struct argo_ring_info *ring_info; + + hlist_for_each_entry_safe(ring_info, node, next, + &d->argo->ring_hash[i], node) + notify_ring(d, ring_info, &to_notify); + } + + read_unlock(&d->argo->rings_L2_rwlock); + + if ( !hlist_empty(&to_notify) ) + pending_notify(&to_notify); +} + +static long +notify(struct domain *currd, + XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd) +{ + XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd; + xen_argo_ring_data_t ring_data; + int ret = 0; + + ASSERT(currd == current->domain); + + read_lock(&L1_global_argo_rwlock); + + if ( !currd->argo ) + { + argo_dprintk("!d->argo, ENODEV\n"); + ret = -ENODEV; + goto out; + } + + notify_check_pending(currd); + + if ( guest_handle_is_null(ring_data_hnd) ) + goto out; + + ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0; + if ( ret ) + goto out; + + if ( ring_data.nent > MAX_NOTIFY_COUNT ) + { + gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n", + ring_data.nent, MAX_NOTIFY_COUNT); + ret = -EACCES; + goto out; + } + + ent_hnd = guest_handle_for_field(ring_data_hnd, + xen_argo_ring_data_ent_t, data[0]); + if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) ) + { + ret = -EFAULT; + goto out; + } + + while ( !ret && ring_data.nent-- ) + { + ret = fill_ring_data(currd, ent_hnd); + guest_handle_add_offset(ent_hnd, 1); + } + + out: + read_unlock(&L1_global_argo_rwlock); + + return ret; +} + static long sendv(struct domain *src_d, const xen_argo_addr_t *src_addr, const xen_argo_addr_t *dst_addr, @@ -1752,6 +2089,21 @@ do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1, break; } + case XEN_ARGO_OP_notify: + { + XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd = + guest_handle_cast(arg1, xen_argo_ring_data_t); + + if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) ) + { + rc = -EINVAL; + break; + } + + rc = notify(currd, ring_data_hnd); + break; + } + default: rc = -EOPNOTSUPP; break; diff --git a/xen/common/compat/argo.c b/xen/common/compat/argo.c index 6290ed61f085..4fac59751531 100644 --- a/xen/common/compat/argo.c +++ b/xen/common/compat/argo.c @@ -41,4 +41,22 @@ static inline int __maybe_unused name(k xen_ ## n *x, k compat_ ## n *c) \ } CHECK_argo_send_addr; +CHECK_argo_ring_data_ent; CHECK_argo_iov; + +/* + * Disable sizeof type checking for the following struct checks because + * these structs have fields of types that differ in the compat vs non-compat + * structs with variable size which prevents the size check validation. + */ + +#undef CHECK_FIELD_COMMON_ +#define CHECK_FIELD_COMMON_(k, name, n, f) \ +static inline int __maybe_unused name(k xen_ ## n *x, k compat_ ## n *c) \ +{ \ + BUILD_BUG_ON(offsetof(k xen_ ## n, f) != \ + offsetof(k compat_ ## n, f)); \ + return 1; \ +} + +CHECK_argo_ring_data; diff --git a/xen/include/public/argo.h b/xen/include/public/argo.h index c12a50f67d2b..d2cb59443cac 100644 --- a/xen/include/public/argo.h +++ b/xen/include/public/argo.h @@ -123,6 +123,42 @@ typedef struct xen_argo_unregister_ring /* Messages on the ring are padded to a multiple of this size. */ #define XEN_ARGO_MSG_SLOT_SIZE 0x10 +/* + * Notify flags + */ +/* Ring is empty */ +#define XEN_ARGO_RING_DATA_F_EMPTY (1U << 0) +/* Ring exists */ +#define XEN_ARGO_RING_DATA_F_EXISTS (1U << 1) +/* Pending interrupt exists. Do not rely on this field - for profiling only */ +#define XEN_ARGO_RING_DATA_F_PENDING (1U << 2) +/* Sufficient space to queue space_required bytes exists */ +#define XEN_ARGO_RING_DATA_F_SUFFICIENT (1U << 3) +/* Insufficient ring size for space_required bytes */ +#define XEN_ARGO_RING_DATA_F_EMSGSIZE (1U << 4) +/* Ring is shared, not unicast */ +#define XEN_ARGO_RING_DATA_F_SHARED (1U << 5) + +typedef struct xen_argo_ring_data_ent +{ + xen_argo_addr_t ring; + uint16_t flags; + uint16_t pad; + uint32_t space_required; + uint32_t max_message_size; +} xen_argo_ring_data_ent_t; + +typedef struct xen_argo_ring_data +{ + uint32_t nent; + uint32_t pad; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + xen_argo_ring_data_ent_t data[]; +#elif defined(__GNUC__) + xen_argo_ring_data_ent_t data[0]; +#endif +} xen_argo_ring_data_t; + struct xen_argo_ring_message_header { uint32_t len; @@ -217,4 +253,35 @@ struct xen_argo_ring_message_header */ #define XEN_ARGO_OP_sendv 3 +/* + * XEN_ARGO_OP_notify + * + * Asks Xen for information about other rings in the system. + * + * ent->ring is the xen_argo_addr_t of the ring you want information on. + * Uses the same ring matching rules as XEN_ARGO_OP_sendv. + * + * ent->space_required : if this field is not null then Xen will check + * that there is space in the destination ring for this many bytes of payload. + * If the ring is too small for the requested space_required, it will set the + * XEN_ARGO_RING_DATA_F_EMSGSIZE flag on return. + * If sufficient space is available, it will set XEN_ARGO_RING_DATA_F_SUFFICIENT + * and CANCEL any pending notification for that ent->ring; otherwise it + * will schedule a notification event and the flag will not be set. + * + * These flags are set by Xen when notify replies: + * XEN_ARGO_RING_DATA_F_EMPTY ring is empty + * XEN_ARGO_RING_DATA_F_PENDING notify event is pending *don't rely on this* + * XEN_ARGO_RING_DATA_F_SUFFICIENT sufficient space for space_required is there + * XEN_ARGO_RING_DATA_F_EXISTS ring exists + * XEN_ARGO_RING_DATA_F_EMSGSIZE space_required too large for the ring size + * XEN_ARGO_RING_DATA_F_SHARED ring is registered for wildcard partner + * + * arg1: XEN_GUEST_HANDLE(xen_argo_ring_data_t) ring_data (may be NULL) + * arg2: NULL + * arg3: 0 (ZERO) + * arg4: 0 (ZERO) + */ +#define XEN_ARGO_OP_notify 4 + #endif diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst index 3723980a877e..e45b60e3cafa 100644 --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -154,3 +154,5 @@ ? argo_unregister_ring argo.h ? argo_iov argo.h ? argo_send_addr argo.h +? argo_ring_data_ent argo.h +? argo_ring_data argo.h