Skip to content

Commit

Permalink
Make fbuffer_inc_capa easier to inline
Browse files Browse the repository at this point in the history
With the extra logic added for stack allocation, and especially the
memcpy, it became harder for compilers to inline.

This doesn't fully reclaim the speed lost with the stack allocation,
but it's getting closer.

Before:

```
== Encoding twitter.json (466906 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json   160.000 i/100ms
                  oj   225.000 i/100ms
Calculating -------------------------------------
                json      1.577k (± 2.0%) i/s  (634.20 μs/i) -      8.000k in   5.075561s
                  oj      2.264k (± 2.3%) i/s  (441.79 μs/i) -     11.475k in   5.072205s

Comparison:
                json:     1576.8 i/s
                  oj:     2263.5 i/s - 1.44x  faster

== Encoding citm_catalog.json (500298 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
                json   101.000 i/100ms
                  oj   123.000 i/100ms
Calculating -------------------------------------
                json      1.033k (± 2.6%) i/s  (968.06 μs/i) -      5.252k in   5.087617s
                  oj      1.257k (± 2.2%) i/s  (795.54 μs/i) -      6.396k in   5.090830s

Comparison:
                json:     1033.0 i/s
                  oj:     1257.0 i/s - 1.22x  faster
```

After:

```
== Encoding twitter.json (466906 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin23]
Warming up --------------------------------------
                json   213.000 i/100ms
                  oj   230.000 i/100ms
Calculating -------------------------------------
                json      2.064k (± 3.6%) i/s  (484.44 μs/i) -     10.437k in   5.063685s
                  oj      2.246k (± 0.7%) i/s  (445.19 μs/i) -     11.270k in   5.017541s

Comparison:
                json:     2064.2 i/s
                  oj:     2246.2 i/s - 1.09x  faster

== Encoding citm_catalog.json (500298 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) [arm64-darwin23]
Warming up --------------------------------------
                json   133.000 i/100ms
                  oj   132.000 i/100ms
Calculating -------------------------------------
                json      1.327k (± 1.7%) i/s  (753.69 μs/i) -      6.650k in   5.013565s
                  oj      1.305k (± 2.2%) i/s  (766.40 μs/i) -      6.600k in   5.061089s

Comparison:
                json:     1326.8 i/s
                  oj:     1304.8 i/s - same-ish: difference falls within error
```
  • Loading branch information
byroot committed Oct 29, 2024
1 parent 4e0972b commit fcebd74
Showing 1 changed file with 26 additions and 21 deletions.
47 changes: 26 additions & 21 deletions ext/json/ext/fbuffer/fbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len);
#ifdef JSON_GENERATOR
static void fbuffer_append_long(FBuffer *fb, long number);
#endif
static void fbuffer_append_char(FBuffer *fb, char newchr);
static inline void fbuffer_append_char(FBuffer *fb, char newchr);
#ifdef JSON_GENERATOR
static VALUE fbuffer_to_s(FBuffer *fb);
#endif
Expand Down Expand Up @@ -66,29 +66,34 @@ static void fbuffer_clear(FBuffer *fb)
}
#endif

static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
{
if (RB_UNLIKELY(requested > fb->capa - fb->len)) {
unsigned long required;
unsigned long required;

if (RB_UNLIKELY(!fb->ptr)) {
fb->ptr = ALLOC_N(char, fb->initial_length);
fb->capa = fb->initial_length;
}
if (RB_UNLIKELY(!fb->ptr)) {
fb->ptr = ALLOC_N(char, fb->initial_length);
fb->capa = fb->initial_length;
}

for (required = fb->capa; requested > required - fb->len; required <<= 1);

for (required = fb->capa; requested > required - fb->len; required <<= 1);

if (required > fb->capa) {
if (fb->type == STACK) {
const char *old_buffer = fb->ptr;
fb->ptr = ALLOC_N(char, required);
fb->type = HEAP;
MEMCPY(fb->ptr, old_buffer, char, fb->len);
} else {
REALLOC_N(fb->ptr, char, required);
}
fb->capa = required;
if (required > fb->capa) {
if (fb->type == STACK) {
const char *old_buffer = fb->ptr;
fb->ptr = ALLOC_N(char, required);
fb->type = HEAP;
MEMCPY(fb->ptr, old_buffer, char, fb->len);
} else {
REALLOC_N(fb->ptr, char, required);
}
fb->capa = required;
}
}

static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
{
if (RB_UNLIKELY(requested > fb->capa - fb->len)) {
fbuffer_do_inc_capa(fb, requested);
}
}

Expand All @@ -113,7 +118,7 @@ static void fbuffer_append_str(FBuffer *fb, VALUE str)
}
#endif

static void fbuffer_append_char(FBuffer *fb, char newchr)
static inline void fbuffer_append_char(FBuffer *fb, char newchr)
{
fbuffer_inc_capa(fb, 1);
*(fb->ptr + fb->len) = newchr;
Expand Down

0 comments on commit fcebd74

Please sign in to comment.