Skip to content

Commit

Permalink
BUG/MINOR: improve BBR throughput on very fast links
Browse files Browse the repository at this point in the history
This patch fixes the loss of information when computing the delivery rate
(quic_cc_drs.c) on links with very low latency due to usage of 32bits
variables with the millisecond as precision.

Initialize the quic_conn task with TASK_F_WANTS_TIME flag ask it to ask
the scheduler to update the call date of this task. This allows this task to get
a nanosecond resolution on the call date calling task_mono_time(). This is enabled
only for congestion control algorithms with delivery rate estimation support
(BBR only at this time).

Store the send date with nanosecond precision of each TX packet into
->time_sent_ns new quic_tx_packet struct member to store the date a packet was
sent in nanoseconds thanks to task_mono_time().

Make use of this new timestamp by the delivery rate estimation algorithm (quic_cc_drs.c).

Rename current ->time_sent member from quic_tx_packet struct to ->time_sent_ms to
distinguish the unit used by this variable (millisecond) and update the code which
uses this variable. The logic found in quic_loss.c is not modified at all.

Must be backported to 3.1.
  • Loading branch information
haproxyFred committed Nov 28, 2024
1 parent e379761 commit f8b697c
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 51 deletions.
2 changes: 1 addition & 1 deletion include/haproxy/quic_cc-t.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct quic_cc {
/* <conn> is there only for debugging purpose. */
struct quic_conn *qc;
struct quic_cc_algo *algo;
uint32_t priv[158];
uint32_t priv[160];
};

struct quic_cc_path {
Expand Down
14 changes: 7 additions & 7 deletions include/haproxy/quic_cc_drs.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ struct quic_cc_rs {
uint64_t lost;
uint64_t prior_lost;
int64_t last_end_seq;
uint32_t interval;
uint32_t prior_time;
uint32_t send_elapsed;
uint32_t ack_elapsed;
uint64_t prior_time_ns;
uint32_t interval_us;
uint32_t send_elapsed_us;
uint32_t ack_elapsed_us;
uint32_t is_app_limited;
};

Expand All @@ -26,8 +26,8 @@ struct quic_cc_drs {
uint64_t delivered;
uint64_t lost;
int64_t last_seq;
uint32_t delivered_time;
uint32_t first_sent_time;
uint64_t delivered_time_ns;
uint64_t first_sent_time_ns;
int is_cwnd_limited; /* boolean */
int app_limited; /* boolean */
};
Expand All @@ -36,6 +36,6 @@ void quic_cc_drs_init(struct quic_cc_drs *drs);
void quic_cc_drs_on_pkt_sent(struct quic_cc_path *path,
struct quic_tx_packet *pkt, struct quic_cc_drs *drs);
void quic_cc_drs_update_rate_sample(struct quic_cc_drs *drs,
struct quic_tx_packet *pkt);
struct quic_tx_packet *pkt, uint64_t time_ns);
void quic_cc_drs_on_ack_recv(struct quic_cc_drs *drs, struct quic_cc_path *path,
uint64_t pkt_delivered);
7 changes: 4 additions & 3 deletions include/haproxy/quic_tx-t.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@ struct quic_tx_packet {
/* The list of frames of this packet. */
struct list frms;
/* The time this packet was sent (ms). */
unsigned int time_sent;
unsigned int time_sent_ms;
uint64_t time_sent_ns;
/* Packet number spakce. */
struct quic_pktns *pktns;
/* Flags. */
Expand All @@ -70,8 +71,8 @@ struct quic_tx_packet {
uint64_t tx_in_flight;
uint64_t lost;
int64_t end_seq;
uint32_t delivered_time;
uint32_t first_sent_time;
uint64_t delivered_time_ns;
uint64_t first_sent_time_ns;
int is_app_limited;
} rs;
unsigned char type;
Expand Down
58 changes: 30 additions & 28 deletions src/quic_cc_drs.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,21 @@
#include <haproxy/quic_cc-t.h>
#include <haproxy/quic_cc_drs.h>
#include <haproxy/quic_tx-t.h>
#include <haproxy/task.h>
#include <haproxy/ticks.h>
#include <haproxy/window_filter.h>

static void quic_cc_rs_init(struct quic_cc_rs *rs)
{
rs->interval = UINT32_MAX;
rs->interval_us = UINT32_MAX;
rs->delivered = 0;
rs->prior_delivered = 0;
rs->prior_time = TICK_ETERNITY;
rs->prior_time_ns = 0;
rs->tx_in_flight = 0;
rs->lost = 0;
rs->prior_lost = 0;
rs->send_elapsed = 0;
rs->ack_elapsed = 0;
rs->send_elapsed_us = 0;
rs->ack_elapsed_us = 0;
rs->last_end_seq = -1;
rs->is_app_limited = 0;
}
Expand All @@ -31,8 +32,8 @@ void quic_cc_drs_init(struct quic_cc_drs *drs)
drs->delivered = 0;
drs->lost = 0;
drs->last_seq = -1;
drs->delivered_time = TICK_ETERNITY;
drs->first_sent_time = TICK_ETERNITY;
drs->delivered_time_ns = 0;
drs->first_sent_time_ns = 0;
drs->app_limited = 0;
drs->is_cwnd_limited = 0;
}
Expand All @@ -44,12 +45,12 @@ void quic_cc_drs_on_pkt_sent(struct quic_cc_path *path,
struct quic_tx_packet *pkt, struct quic_cc_drs *drs)
{
if (!path->in_flight)
drs->first_sent_time = drs->delivered_time = pkt->time_sent;
drs->first_sent_time_ns = drs->delivered_time_ns = pkt->time_sent_ns;

pkt->rs.first_sent_time = drs->first_sent_time;
pkt->rs.delivered_time = drs->delivered_time;
pkt->rs.delivered = drs->delivered;
pkt->rs.is_app_limited = drs->app_limited != 0;
pkt->rs.first_sent_time_ns = drs->first_sent_time_ns;
pkt->rs.delivered_time_ns = drs->delivered_time_ns;
pkt->rs.delivered = drs->delivered;
pkt->rs.is_app_limited = drs->app_limited != 0;

pkt->rs.tx_in_flight = path->in_flight + pkt->len;
pkt->rs.lost = drs->lost;
Expand All @@ -62,8 +63,8 @@ void quic_cc_drs_on_pkt_sent(struct quic_cc_path *path,
static inline int quic_cc_drs_is_newest_packet(struct quic_cc_drs *drs,
struct quic_tx_packet *pkt)
{
return tick_is_lt(drs->first_sent_time, pkt->time_sent) ||
(pkt->time_sent == drs->first_sent_time &&
return drs->first_sent_time_ns < pkt->time_sent_ns ||
(pkt->time_sent_ns == drs->first_sent_time_ns &&
pkt->rs.end_seq > drs->rs.last_end_seq);
}

Expand All @@ -83,32 +84,32 @@ static inline int quic_cc_drs_is_newest_packet(struct quic_cc_drs *drs,
* RFC UpdateRateSample() called from first part of GenerateRateSample().
*/
void quic_cc_drs_update_rate_sample(struct quic_cc_drs *drs,
struct quic_tx_packet *pkt)
struct quic_tx_packet *pkt, uint64_t time_ns)
{
struct quic_cc_rs *rs = &drs->rs;

if (!tick_isset(pkt->rs.delivered_time))
if (!pkt->rs.delivered_time_ns)
return;

drs->delivered += pkt->len;
drs->delivered_time = now_ms;
drs->delivered_time_ns = time_ns;
/* Update info using the newest packet. */
if (tick_isset(rs->prior_time) && !quic_cc_drs_is_newest_packet(drs, pkt))
if (rs->prior_time_ns && !quic_cc_drs_is_newest_packet(drs, pkt))
return;

rs->prior_delivered = pkt->rs.delivered;
rs->prior_time = pkt->rs.delivered_time;
rs->prior_time_ns = pkt->rs.delivered_time_ns;
rs->is_app_limited = pkt->rs.is_app_limited;
rs->send_elapsed = pkt->time_sent - pkt->rs.first_sent_time;
rs->ack_elapsed = drs->delivered_time - pkt->rs.delivered_time;
rs->send_elapsed_us = (pkt->time_sent_ns - pkt->rs.first_sent_time_ns) / 1000;
rs->ack_elapsed_us = (drs->delivered_time_ns - pkt->rs.delivered_time_ns) / 1000;
rs->tx_in_flight = pkt->rs.tx_in_flight;
rs->prior_lost = pkt->rs.lost;
rs->last_end_seq = pkt->rs.end_seq;
drs->first_sent_time = pkt->time_sent;
drs->first_sent_time_ns = pkt->time_sent_ns;
/* Mark the packet as delivered once it's SACKed to
* avoid being used again when it's cumulatively acked.
*/
pkt->rs.delivered_time = TICK_ETERNITY;
pkt->rs.delivered_time_ns = 0;
}

/* RFC https://datatracker.ietf.org/doc/draft-ietf-ccwg-bbr/
Expand All @@ -131,25 +132,26 @@ void quic_cc_drs_on_ack_recv(struct quic_cc_drs *drs, struct quic_cc_path *path,
++drs->round_count;
}

if (!tick_isset(rs->prior_time))
if (!rs->prior_time_ns)
return;

rs->interval = MAX(rs->send_elapsed, rs->ack_elapsed);
rs->interval_us = MAX(rs->send_elapsed_us, rs->ack_elapsed_us);

BUG_ON(drs->delivered <= rs->prior_delivered);
rs->delivered = drs->delivered - rs->prior_delivered;
BUG_ON(drs->lost < rs->prior_lost);
rs->lost = drs->lost - rs->prior_lost;

if (rs->interval < path->loss.rtt_min) {
rs->interval = UINT32_MAX;
if (rs->interval_us < path->loss.rtt_min * 1000) {
rs->interval_us = UINT32_MAX;
return;
}

if (!rs->interval)
if (!rs->interval_us)
return;

rate = rs->delivered * 1000 / rs->interval;
/* <rate> is in bytes/s. */
rate = rs->delivered * 1000000 / rs->interval_us;
if (rate >= wf_get_max(&drs->wf) || !drs->app_limited)
path->delivery_rate = wf_max_update(&drs->wf, rate, drs->round_count);
}
5 changes: 5 additions & 0 deletions src/quic_conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -1247,6 +1247,11 @@ struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4,
}
qc->wait_event.tasklet->process = quic_conn_io_cb;
qc->wait_event.tasklet->context = qc;
/* Enable TASK_F_WANTS_TIME task flag for congestion control algorithms with
* delivery rate estimation only.
*/
if (qc->path->cc.algo->get_drs)
qc->wait_event.tasklet->state |= TASK_F_WANTS_TIME;
qc->wait_event.events = 0;
qc->subs = NULL;

Expand Down
8 changes: 4 additions & 4 deletions src/quic_loss.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ void qc_packet_loss_lookup(struct quic_pktns *pktns, struct quic_conn *qc,
if ((int64_t)pkt->pn_node.key > largest_acked_pn)
break;

time_sent = pkt->time_sent;
time_sent = pkt->time_sent_ms;
loss_time_limit = tick_add(time_sent, loss_delay);

reordered = (int64_t)largest_acked_pn >= pkt->pn_node.key + pktthresh;
Expand Down Expand Up @@ -290,12 +290,12 @@ int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns,
struct quic_cc_event ev = { };

ev.type = QUIC_CC_EVT_LOSS;
ev.loss.time_sent = newest_lost->time_sent;
ev.loss.time_sent = newest_lost->time_sent_ms;
ev.loss.count = tot_lost;

quic_cc_event(cc, &ev);
if (cc->algo->congestion_event)
cc->algo->congestion_event(cc, newest_lost->time_sent);
cc->algo->congestion_event(cc, newest_lost->time_sent_ms);
}

/* If an RTT have been already sampled, <rtt_min> has been set.
Expand All @@ -304,7 +304,7 @@ int qc_release_lost_pkts(struct quic_conn *qc, struct quic_pktns *pktns,
* slow start state.
*/
if (qc->path->loss.rtt_min && newest_lost != oldest_lost) {
unsigned int period = newest_lost->time_sent - oldest_lost->time_sent;
unsigned int period = newest_lost->time_sent_ms - oldest_lost->time_sent_ms;

if (quic_loss_persistent_congestion(&qc->path->loss, period,
now_ms, qc->max_ack_delay) &&
Expand Down
10 changes: 7 additions & 3 deletions src/quic_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,9 +432,13 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
struct quic_cc_drs *drs =
p->cc.algo->get_drs ? p->cc.algo->get_drs(&p->cc) : NULL;
unsigned int bytes_delivered = 0, pkt_delivered = 0;
uint64_t time_ns;

TRACE_ENTER(QUIC_EV_CONN_PRSAFRM, qc);

if (drs)
time_ns = task_mono_time();

list_for_each_entry_safe(pkt, tmp, newly_acked_pkts, list) {
pkt->pktns->tx.in_flight -= pkt->in_flight_len;
p->prep_in_flight -= pkt->in_flight_len;
Expand All @@ -450,12 +454,12 @@ static void qc_notify_cc_of_newly_acked_pkts(struct quic_conn *qc,
bytes_delivered += pkt->len;
pkt_delivered = pkt->rs.delivered;
ev.ack.acked = pkt->in_flight_len;
ev.ack.time_sent = pkt->time_sent;
ev.ack.time_sent = pkt->time_sent_ms;
ev.ack.pn = pkt->pn_node.key;
/* Note that this event is not emitted for BBR. */
quic_cc_event(&p->cc, &ev);
if (drs && (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING))
quic_cc_drs_update_rate_sample(drs, pkt);
quic_cc_drs_update_rate_sample(drs, pkt, time_ns);
LIST_DEL_INIT(&pkt->list);
quic_tx_packet_refdec(pkt);
}
Expand Down Expand Up @@ -523,7 +527,7 @@ static int qc_parse_ack_frm(struct quic_conn *qc,
}
else {
time_sent = eb64_entry(largest_node,
struct quic_tx_packet, pn_node)->time_sent;
struct quic_tx_packet, pn_node)->time_sent_ms;
new_largest_acked_pn = 1;
}
}
Expand Down
14 changes: 9 additions & 5 deletions src/quic_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,8 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)
struct buffer tmpbuf = { };
struct quic_tx_packet *first_pkt, *pkt, *next_pkt;
uint16_t dglen, gso = 0, gso_fallback = 0;
unsigned int time_sent;
uint64_t time_sent_ns;
unsigned int time_sent_ms;

pos = (unsigned char *)b_head(buf);
dglen = read_u16(pos);
Expand Down Expand Up @@ -360,7 +361,8 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)

b_del(buf, dglen + QUIC_DGRAM_HEADLEN);
qc->bytes.tx += tmpbuf.data;
time_sent = now_ms;
time_sent_ms = now_ms;
time_sent_ns = task_mono_time();

for (pkt = first_pkt; pkt; pkt = next_pkt) {
struct quic_cc *cc = &qc->path->cc;
Expand Down Expand Up @@ -391,9 +393,10 @@ static int qc_send_ppkts(struct buffer *buf, struct ssl_sock_ctx *ctx)

qc->cntrs.sent_pkt++;

pkt->time_sent = time_sent;
pkt->time_sent_ns = time_sent_ns;
pkt->time_sent_ms = time_sent_ms;
if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) {
pkt->pktns->tx.time_of_last_eliciting = time_sent;
pkt->pktns->tx.time_of_last_eliciting = time_sent_ms;
qc->path->ifae_pkts++;
if (qc->flags & QUIC_FL_CONN_IDLE_TIMER_RESTARTED_AFTER_READ)
qc_idle_timer_rearm(qc, 0, 0);
Expand Down Expand Up @@ -2202,7 +2205,8 @@ static inline void quic_tx_packet_init(struct quic_tx_packet *pkt, int type)
pkt->in_flight_len = 0;
pkt->pn_node.key = (uint64_t)-1;
LIST_INIT(&pkt->frms);
pkt->time_sent = TICK_ETERNITY;
pkt->time_sent_ms = TICK_ETERNITY;
pkt->time_sent_ns = 0;
pkt->next = NULL;
pkt->prev = NULL;
pkt->largest_acked_pn = -1;
Expand Down

0 comments on commit f8b697c

Please sign in to comment.