From 0a753d2ff7584fe8943ea1250a1062382ef2e29c Mon Sep 17 00:00:00 2001 From: SpringMT Date: Thu, 11 Apr 2024 13:20:25 +0900 Subject: [PATCH 1/6] feat: Standardized dictionary handling Feature * Zstd.compress supports :level and :dict keyward args * Zstd.decompress supports :dict keyward args * Zstd::StreamingCompress.new supports :level and :dict keyward args * Zstd::StreamingDecompress.new supports :dict keyward args Breaking Change * Zstd.compress uses ZSTD_compressStream2 instead of ZSTD_compress * Zstd.decompress uses ZSTD_decompressDCtx instead of ZSTD_decompress Deprecation * Zstd.compress_using_dict adds deprecation warning * use Zstd.compress with :dict keyward args * Zstd.decompress_using_dict adds deprecation warning * use Zstd.decompress with :dict keyward args * Zstd.compress with level args add deprecation warning * use Zstd.compress with :level keyward args * Zstd::StreamingCompress.new with level args add deprecation warning * use Zstd::StreamingCompress.new with :level keyward args --- benchmarks/results/city.json.gzip | Bin 224439 -> 224439 bytes examples/sinatra/Gemfile | 5 ++ examples/sinatra/Gemfile.lock | 41 +++++++++++++ examples/sinatra/app.rb | 7 +++ ext/zstdruby/common.h | 53 ++++++++++++++++- ext/zstdruby/main.c | 3 +- ext/zstdruby/skippable_frame.c | 2 +- ext/zstdruby/streaming_compress.c | 29 +++------- ext/zstdruby/streaming_compress.h | 5 -- ext/zstdruby/streaming_decompress.c | 39 +++++-------- ext/zstdruby/zstdruby.c | 56 ++++++++++++------ spec/zstd-ruby-streaming-compress_spec.rb | 18 +++--- spec/zstd-ruby-streaming-decompress_spec.rb | 14 ++--- spec/zstd-ruby-using-dict_spec.rb | 61 +++++++++++++++++++- spec/zstd-ruby_spec.rb | 22 +++++-- zstd-ruby.gemspec | 2 +- 16 files changed, 262 insertions(+), 95 deletions(-) create mode 100644 examples/sinatra/Gemfile create mode 100644 examples/sinatra/Gemfile.lock create mode 100644 examples/sinatra/app.rb delete mode 100644 ext/zstdruby/streaming_compress.h diff --git a/benchmarks/results/city.json.gzip b/benchmarks/results/city.json.gzip index 25c90ba8384300639caac431588dfce9936ae397..0b57e9d06e2bf674a22190a934e189ecc34765a5 100644 GIT binary patch delta 27 icmdmflXv?~UUvCz4u+Iu@kaJmcE(nArmgJE8*%`GaR^WV delta 27 jcmdmflXv?~UUvCz4u-vn)#?2N7KOk3HRH{<{ShCv95 diff --git a/examples/sinatra/Gemfile b/examples/sinatra/Gemfile new file mode 100644 index 0000000..3200287 --- /dev/null +++ b/examples/sinatra/Gemfile @@ -0,0 +1,5 @@ +source "https://rubygems.org" + +gem "sinatra" +gem "rackup" +gem "zstd-ruby", path: "../../" diff --git a/examples/sinatra/Gemfile.lock b/examples/sinatra/Gemfile.lock new file mode 100644 index 0000000..8708d15 --- /dev/null +++ b/examples/sinatra/Gemfile.lock @@ -0,0 +1,41 @@ +PATH + remote: ../.. + specs: + zstd-ruby (1.5.6.1) + +GEM + remote: https://rubygems.org/ + specs: + base64 (0.2.0) + mustermann (3.0.0) + ruby2_keywords (~> 0.0.1) + rack (3.0.10) + rack-protection (4.0.0) + base64 (>= 0.1.0) + rack (>= 3.0.0, < 4) + rack-session (2.0.0) + rack (>= 3.0.0) + rackup (2.1.0) + rack (>= 3) + webrick (~> 1.8) + ruby2_keywords (0.0.5) + sinatra (4.0.0) + mustermann (~> 3.0) + rack (>= 3.0.0, < 4) + rack-protection (= 4.0.0) + rack-session (>= 2.0.0, < 3) + tilt (~> 2.0) + tilt (2.3.0) + webrick (1.8.1) + +PLATFORMS + arm64-darwin-21 + ruby + +DEPENDENCIES + rackup + sinatra + zstd-ruby! + +BUNDLED WITH + 2.5.7 diff --git a/examples/sinatra/app.rb b/examples/sinatra/app.rb new file mode 100644 index 0000000..fa618c0 --- /dev/null +++ b/examples/sinatra/app.rb @@ -0,0 +1,7 @@ +require 'sinatra' +require 'zstd-ruby' + +get '/' do + headers["Content-Encoding"] = "zstd" + Zstd.compress('Hello world!') +end \ No newline at end of file diff --git a/ext/zstdruby/common.h b/ext/zstdruby/common.h index 8bccfdd..8bc953a 100644 --- a/ext/zstdruby/common.h +++ b/ext/zstdruby/common.h @@ -1,7 +1,7 @@ #ifndef ZSTD_RUBY_H #define ZSTD_RUBY_H 1 -#include "ruby.h" +#include #include "./libzstd/zstd.h" static int convert_compression_level(VALUE compression_level_value) @@ -12,4 +12,55 @@ static int convert_compression_level(VALUE compression_level_value) return NUM2INT(compression_level_value); } +static size_t zstd_compress(ZSTD_CCtx* const ctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input, ZSTD_EndDirective endOp) +{ + return ZSTD_compressStream2(ctx, output, input, endOp); +} + +static void set_compress_params(ZSTD_CCtx* const ctx, VALUE level_from_args, VALUE kwargs) +{ + ID kwargs_keys[2]; + kwargs_keys[0] = rb_intern("level"); + kwargs_keys[1] = rb_intern("dict"); + VALUE kwargs_values[2]; + rb_get_kwargs(kwargs, kwargs_keys, 0, 2, kwargs_values); + + int compression_level = ZSTD_CLEVEL_DEFAULT; + if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) { + compression_level = convert_compression_level(kwargs_values[0]); + } else if (!NIL_P(level_from_args)) { + rb_warn("`level` in args is deprecated; use keyword args `level:` instead."); + compression_level = convert_compression_level(level_from_args); + } + ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, compression_level); + + if (kwargs_values[1] != Qundef && kwargs_values[1] != Qnil) { + char* dict_buffer = RSTRING_PTR(kwargs_values[1]); + size_t dict_size = RSTRING_LEN(kwargs_values[1]); + size_t load_dict_ret = ZSTD_CCtx_loadDictionary(ctx, dict_buffer, dict_size); + if (ZSTD_isError(load_dict_ret)) { + ZSTD_freeCCtx(ctx); + rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed"); + } + } +} + +static void set_decompress_params(ZSTD_DCtx* const dctx, VALUE kwargs) +{ + ID kwargs_keys[1]; + kwargs_keys[0] = rb_intern("dict"); + VALUE kwargs_values[1]; + rb_get_kwargs(kwargs, kwargs_keys, 0, 1, kwargs_values); + + if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) { + char* dict_buffer = RSTRING_PTR(kwargs_values[0]); + size_t dict_size = RSTRING_LEN(kwargs_values[0]); + size_t load_dict_ret = ZSTD_DCtx_loadDictionary(dctx, dict_buffer, dict_size); + if (ZSTD_isError(load_dict_ret)) { + ZSTD_freeDCtx(dctx); + rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed"); + } + } +} + #endif /* ZSTD_RUBY_H */ diff --git a/ext/zstdruby/main.c b/ext/zstdruby/main.c index 7ecf25a..0f2198b 100644 --- a/ext/zstdruby/main.c +++ b/ext/zstdruby/main.c @@ -1,4 +1,5 @@ -#include +#include "common.h" + VALUE rb_mZstd; void zstd_ruby_init(void); void zstd_ruby_skippable_frame_init(void); diff --git a/ext/zstdruby/skippable_frame.c b/ext/zstdruby/skippable_frame.c index 03220e2..73534b8 100644 --- a/ext/zstdruby/skippable_frame.c +++ b/ext/zstdruby/skippable_frame.c @@ -1,4 +1,4 @@ -#include +#include "common.h" extern VALUE rb_mZstd; diff --git a/ext/zstdruby/streaming_compress.c b/ext/zstdruby/streaming_compress.c index fe61146..85e3906 100644 --- a/ext/zstdruby/streaming_compress.c +++ b/ext/zstdruby/streaming_compress.c @@ -1,5 +1,4 @@ -#include -#include +#include "common.h" struct streaming_compress_t { ZSTD_CCtx* ctx; @@ -71,14 +70,9 @@ rb_streaming_compress_allocate(VALUE klass) static VALUE rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj) { + VALUE kwargs; VALUE compression_level_value; - rb_scan_args(argc, argv, "01", &compression_level_value); - int compression_level = convert_compression_level(compression_level_value); - - ID kwargs_keys[1]; - kwargs_keys[0] = rb_intern("dict"); - VALUE kwargs_values[1]; - rb_get_kwargs(kwargs, kwargs_keys, 0, 1, kwargs_values); + rb_scan_args(argc, argv, "01:", &compression_level_value, &kwargs); struct streaming_compress_t* sc; TypedData_Get_Struct(obj, struct streaming_compress_t, &streaming_compress_type, sc); @@ -88,15 +82,8 @@ rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj) if (ctx == NULL) { rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); } - if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) { - char* dict_buffer = RSTRING_PTR(kwargs_values[0]); - size_t dict_size = RSTRING_LEN(kwargs_values[0]); - size_t load_dict_ret = ZSTD_CCtx_loadDictionary(ctx, dict_buffer, dict_size); - if (ZSTD_isError(load_dict_ret)) { - rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed"); - } - } - ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, compression_level); + set_compress_params(ctx, compression_level_value, kwargs); + sc->ctx = ctx; sc->buf = rb_str_new(NULL, buffOutSize); sc->buf_size = buffOutSize; @@ -119,7 +106,7 @@ no_compress(struct streaming_compress_t* sc, ZSTD_EndDirective endOp) do { ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 }; - size_t const ret = ZSTD_compressStream2(sc->ctx, &output, &input, endOp); + size_t const ret = zstd_compress(sc->ctx, &output, &input, endOp); if (ZSTD_isError(ret)) { rb_raise(rb_eRuntimeError, "flush error error code: %s", ZSTD_getErrorName(ret)); } @@ -143,7 +130,7 @@ rb_streaming_compress_compress(VALUE obj, VALUE src) VALUE result = rb_str_new(0, 0); while (input.pos < input.size) { ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 }; - size_t const ret = ZSTD_compressStream2(sc->ctx, &output, &input, ZSTD_e_continue); + size_t const ret = zstd_compress(sc->ctx, &output, &input, ZSTD_e_continue); if (ZSTD_isError(ret)) { rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret)); } @@ -170,7 +157,7 @@ rb_streaming_compress_write(int argc, VALUE *argv, VALUE obj) ZSTD_inBuffer input = { input_data, input_size, 0 }; while (input.pos < input.size) { - size_t const ret = ZSTD_compressStream2(sc->ctx, &output, &input, ZSTD_e_continue); + size_t const ret = zstd_compress(sc->ctx, &output, &input, ZSTD_e_continue); if (ZSTD_isError(ret)) { rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret)); } diff --git a/ext/zstdruby/streaming_compress.h b/ext/zstdruby/streaming_compress.h deleted file mode 100644 index 451e964..0000000 --- a/ext/zstdruby/streaming_compress.h +++ /dev/null @@ -1,5 +0,0 @@ -#if !defined(STREAMING_COMPRESS_H) -#define STREAMING_COMPRESS_H - - -#endif // STREAMING_COMPRESS_H diff --git a/ext/zstdruby/streaming_decompress.c b/ext/zstdruby/streaming_decompress.c index 4b816de..6152d21 100644 --- a/ext/zstdruby/streaming_decompress.c +++ b/ext/zstdruby/streaming_decompress.c @@ -1,7 +1,7 @@ -#include +#include "common.h" struct streaming_decompress_t { - ZSTD_DCtx* ctx; + ZSTD_DCtx* dctx; VALUE buf; size_t buf_size; }; @@ -21,9 +21,9 @@ static void streaming_decompress_free(void *p) { struct streaming_decompress_t *sd = p; - ZSTD_DCtx* ctx = sd->ctx; - if (ctx != NULL) { - ZSTD_freeDCtx(ctx); + ZSTD_DCtx* dctx = sd->dctx; + if (dctx != NULL) { + ZSTD_freeDCtx(dctx); } xfree(sd); } @@ -61,40 +61,29 @@ rb_streaming_decompress_allocate(VALUE klass) { struct streaming_decompress_t* sd; VALUE obj = TypedData_Make_Struct(klass, struct streaming_decompress_t, &streaming_decompress_type, sd); - sd->ctx = NULL; + sd->dctx = NULL; sd->buf = Qnil; sd->buf_size = 0; return obj; } static VALUE -rb_streaming_decompress_initialize(VALUE obj) +rb_streaming_decompress_initialize(int argc, VALUE *argv, VALUE obj) { VALUE kwargs; rb_scan_args(argc, argv, "00:", &kwargs); - ID kwargs_keys[1]; - kwargs_keys[0] = rb_intern("dict"); - VALUE kwargs_values[1]; - rb_get_kwargs(kwargs, kwargs_keys, 0, 1, kwargs_values); - struct streaming_decompress_t* sd; TypedData_Get_Struct(obj, struct streaming_decompress_t, &streaming_decompress_type, sd); size_t const buffOutSize = ZSTD_DStreamOutSize(); - ZSTD_DCtx* ctx = ZSTD_createDCtx(); - if (ctx == NULL) { + ZSTD_DCtx* dctx = ZSTD_createDCtx(); + if (dctx == NULL) { rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDCtx error"); } - if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) { - char* dict_buffer = RSTRING_PTR(kwargs_values[0]); - size_t dict_size = RSTRING_LEN(kwargs_values[0]); - size_t load_dict_ret = ZSTD_DCtx_loadDictionary(ctx, dict_buffer, dict_size); - if (ZSTD_isError(load_dict_ret)) { - rb_raise(rb_eRuntimeError, "%s", "ZSTD_DCtx_loadDictionary failed"); - } - } - sd->ctx = ctx; + set_decompress_params(dctx, kwargs); + + sd->dctx = dctx; sd->buf = rb_str_new(NULL, buffOutSize); sd->buf_size = buffOutSize; @@ -115,7 +104,7 @@ rb_streaming_decompress_decompress(VALUE obj, VALUE src) VALUE result = rb_str_new(0, 0); while (input.pos < input.size) { ZSTD_outBuffer output = { (void*)output_data, sd->buf_size, 0 }; - size_t const ret = ZSTD_decompressStream(sd->ctx, &output, &input); + size_t const ret = ZSTD_decompressStream(sd->dctx, &output, &input); if (ZSTD_isError(ret)) { rb_raise(rb_eRuntimeError, "decompress error error code: %s", ZSTD_getErrorName(ret)); } @@ -130,6 +119,6 @@ zstd_ruby_streaming_decompress_init(void) { VALUE cStreamingDecompress = rb_define_class_under(rb_mZstd, "StreamingDecompress", rb_cObject); rb_define_alloc_func(cStreamingDecompress, rb_streaming_decompress_allocate); - rb_define_method(cStreamingDecompress, "initialize", rb_streaming_decompress_initialize, 0); + rb_define_method(cStreamingDecompress, "initialize", rb_streaming_decompress_initialize, -1); rb_define_method(cStreamingDecompress, "decompress", rb_streaming_decompress_decompress, 1); } diff --git a/ext/zstdruby/zstdruby.c b/ext/zstdruby/zstdruby.c index ca67169..ba2fcd6 100644 --- a/ext/zstdruby/zstdruby.c +++ b/ext/zstdruby/zstdruby.c @@ -12,28 +12,39 @@ static VALUE rb_compress(int argc, VALUE *argv, VALUE self) { VALUE input_value; VALUE compression_level_value; - rb_scan_args(argc, argv, "11", &input_value, &compression_level_value); - int compression_level = convert_compression_level(compression_level_value); + VALUE kwargs; + rb_scan_args(argc, argv, "11:", &input_value, &compression_level_value, &kwargs); + + ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + if (ctx == NULL) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); + } + + set_compress_params(ctx, compression_level_value, kwargs); StringValue(input_value); char* input_data = RSTRING_PTR(input_value); size_t input_size = RSTRING_LEN(input_value); + ZSTD_inBuffer input = { input_data, input_size, 0 }; size_t max_compressed_size = ZSTD_compressBound(input_size); + VALUE buf = rb_str_new(NULL, max_compressed_size); + char* output_data = RSTRING_PTR(buf); + ZSTD_outBuffer output = { (void*)output_data, max_compressed_size, 0 }; - VALUE output = rb_str_new(NULL, max_compressed_size); - char* output_data = RSTRING_PTR(output); - size_t compressed_size = ZSTD_compress((void*)output_data, max_compressed_size, - (void*)input_data, input_size, compression_level); - if (ZSTD_isError(compressed_size)) { - rb_raise(rb_eRuntimeError, "%s: %s", "compress failed", ZSTD_getErrorName(compressed_size)); + size_t const ret = zstd_compress(ctx, &output, &input, ZSTD_e_end); + if (ZSTD_isError(ret)) { + ZSTD_freeCCtx(ctx); + rb_raise(rb_eRuntimeError, "%s: %s", "compress failed", ZSTD_getErrorName(ret)); } - - rb_str_resize(output, compressed_size); - return output; + VALUE result = rb_str_new(0, 0); + rb_str_cat(result, output.dst, output.pos); + ZSTD_freeCCtx(ctx); + return result; } static VALUE rb_compress_using_dict(int argc, VALUE *argv, VALUE self) { + rb_warn("Zstd.compress_using_dict is deprecated; use Zstd.compress with `dict:` instead."); VALUE input_value; VALUE dict; VALUE compression_level_value; @@ -78,8 +89,6 @@ static VALUE rb_compress_using_dict(int argc, VALUE *argv, VALUE self) static VALUE decompress_buffered(const char* input_data, size_t input_size) { - const size_t outputBufferSize = 4096; - ZSTD_DStream* const dstream = ZSTD_createDStream(); if (dstream == NULL) { rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDStream failed"); @@ -96,7 +105,7 @@ static VALUE decompress_buffered(const char* input_data, size_t input_size) ZSTD_inBuffer input = { input_data, input_size, 0 }; while (input.pos < input.size) { - output.size += outputBufferSize; + output.size += ZSTD_DStreamOutSize(); rb_str_resize(output_string, output.size); output.dst = RSTRING_PTR(output_string); @@ -112,8 +121,11 @@ static VALUE decompress_buffered(const char* input_data, size_t input_size) return output_string; } -static VALUE rb_decompress(VALUE self, VALUE input_value) +static VALUE rb_decompress(int argc, VALUE *argv, VALUE self) { + VALUE input_value; + VALUE kwargs; + rb_scan_args(argc, argv, "10:", &input_value, &kwargs); StringValue(input_value); char* input_data = RSTRING_PTR(input_value); size_t input_size = RSTRING_LEN(input_value); @@ -122,15 +134,22 @@ static VALUE rb_decompress(VALUE self, VALUE input_value) if (uncompressed_size == ZSTD_CONTENTSIZE_ERROR) { rb_raise(rb_eRuntimeError, "%s: %s", "not compressed by zstd", ZSTD_getErrorName(uncompressed_size)); } + // ZSTD_decompressStream may be called multiple times when ZSTD_CONTENTSIZE_UNKNOWN, causing slowness. + // Therefore, we will not standardize on ZSTD_decompressStream if (uncompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { return decompress_buffered(input_data, input_size); } + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + if (dctx == NULL) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDCtx failed"); + } + set_decompress_params(dctx, kwargs); + VALUE output = rb_str_new(NULL, uncompressed_size); char* output_data = RSTRING_PTR(output); - size_t const decompress_size = ZSTD_decompress((void*)output_data, uncompressed_size, - (void*)input_data, input_size); + size_t const decompress_size = ZSTD_decompressDCtx(dctx, output_data, uncompressed_size, input_data, input_size); if (ZSTD_isError(decompress_size)) { rb_raise(rb_eRuntimeError, "%s: %s", "decompress error", ZSTD_getErrorName(decompress_size)); } @@ -140,6 +159,7 @@ static VALUE rb_decompress(VALUE self, VALUE input_value) static VALUE rb_decompress_using_dict(int argc, VALUE *argv, VALUE self) { + rb_warn("Zstd.decompress_using_dict is deprecated; use Zstd.decompress with `dict:` instead."); VALUE input_value; VALUE dict; rb_scan_args(argc, argv, "20", &input_value, &dict); @@ -193,6 +213,6 @@ zstd_ruby_init(void) rb_define_module_function(rb_mZstd, "zstd_version", zstdVersion, 0); rb_define_module_function(rb_mZstd, "compress", rb_compress, -1); rb_define_module_function(rb_mZstd, "compress_using_dict", rb_compress_using_dict, -1); - rb_define_module_function(rb_mZstd, "decompress", rb_decompress, 1); + rb_define_module_function(rb_mZstd, "decompress", rb_decompress, -1); rb_define_module_function(rb_mZstd, "decompress_using_dict", rb_decompress_using_dict, -1); } diff --git a/spec/zstd-ruby-streaming-compress_spec.rb b/spec/zstd-ruby-streaming-compress_spec.rb index aac60c5..770a2bb 100644 --- a/spec/zstd-ruby-streaming-compress_spec.rb +++ b/spec/zstd-ruby-streaming-compress_spec.rb @@ -46,7 +46,7 @@ describe 'compression level' do it 'shoud work' do - stream = Zstd::StreamingCompress.new(5) + stream = Zstd::StreamingCompress.new(level: 5) stream << "abc" << "def" res = stream.finish expect(Zstd.decompress(res)).to eq('abcdef') @@ -55,16 +55,16 @@ describe 'dictionary' do let(:dictionary) do - IO.read("#{__dir__}/dictionary") + File.read("#{__dir__}/dictionary") end let(:user_json) do - IO.read("#{__dir__}/user_springmt.json") + File.read("#{__dir__}/user_springmt.json") end it 'shoud work' do - dict_stream = Zstd::StreamingCompress.new(5, dict: dictionary, no_gvl: no_gvl) + dict_stream = Zstd::StreamingCompress.new(level: 5, dict: dictionary) dict_stream << user_json dict_res = dict_stream.finish - stream = Zstd::StreamingCompress.new(5, no_gvl: no_gvl) + stream = Zstd::StreamingCompress.new(level: 5) stream << user_json res = stream.finish @@ -74,13 +74,13 @@ describe 'nil dictionary' do let(:user_json) do - IO.read("#{__dir__}/user_springmt.json") + File.read("#{__dir__}/user_springmt.json") end it 'shoud work' do - dict_stream = Zstd::StreamingCompress.new(5, dict: nil, no_gvl: no_gvl) + dict_stream = Zstd::StreamingCompress.new(level: 5, dict: nil) dict_stream << user_json dict_res = dict_stream.finish - stream = Zstd::StreamingCompress.new(5, no_gvl: no_gvl) + stream = Zstd::StreamingCompress.new(level: 5) stream << user_json res = stream.finish @@ -92,7 +92,7 @@ describe 'Ractor' do it 'should be supported' do r = Ractor.new { - stream = Zstd::StreamingCompress.new(5) + stream = Zstd::StreamingCompress.new(level: 5) stream << "abc" << "def" res = stream.finish } diff --git a/spec/zstd-ruby-streaming-decompress_spec.rb b/spec/zstd-ruby-streaming-decompress_spec.rb index 96f9f01..d635171 100644 --- a/spec/zstd-ruby-streaming-decompress_spec.rb +++ b/spec/zstd-ruby-streaming-decompress_spec.rb @@ -34,14 +34,14 @@ describe 'dictionary streaming decompress + GC.compact' do let(:dictionary) do - IO.read("#{__dir__}/dictionary") + File.read("#{__dir__}/dictionary") end let(:user_json) do - IO.read("#{__dir__}/user_springmt.json") + File.read("#{__dir__}/user_springmt.json") end it 'shoud work' do - compressed_json = Zstd.compress_using_dict(user_json, dictionary) - stream = Zstd::StreamingDecompress.new(dict: dictionary, no_gvl: no_gvl) + compressed_json = Zstd.compress(user_json, dict: dictionary) + stream = Zstd::StreamingDecompress.new(dict: dictionary) result = '' result << stream.decompress(compressed_json[0, 5]) result << stream.decompress(compressed_json[5, 5]) @@ -53,14 +53,14 @@ describe 'nil dictionary streaming decompress + GC.compact' do let(:dictionary) do - IO.read("#{__dir__}/dictionary") + File.read("#{__dir__}/dictionary") end let(:user_json) do - IO.read("#{__dir__}/user_springmt.json") + File.read("#{__dir__}/user_springmt.json") end it 'shoud work' do compressed_json = Zstd.compress(user_json) - stream = Zstd::StreamingDecompress.new(dict: nil, no_gvl: no_gvl) + stream = Zstd::StreamingDecompress.new(dict: nil) result = '' result << stream.decompress(compressed_json[0, 5]) result << stream.decompress(compressed_json[5, 5]) diff --git a/spec/zstd-ruby-using-dict_spec.rb b/spec/zstd-ruby-using-dict_spec.rb index 9a26d8d..0bff350 100644 --- a/spec/zstd-ruby-using-dict_spec.rb +++ b/spec/zstd-ruby-using-dict_spec.rb @@ -7,6 +7,51 @@ # https://github.com/facebook/zstd/releases/tag/v1.1.3 RSpec.describe Zstd do + describe 'compress and decompress with dict keyward args' do + let(:user_json) do + File.read("#{__dir__}/user_springmt.json") + end + let(:dictionary) do + File.read("#{__dir__}/dictionary") + end + + it 'should work' do + compressed_using_dict = Zstd.compress(user_json, dict: dictionary) + compressed = Zstd.compress(user_json) + expect(compressed_using_dict.length).to be < compressed.length + expect(user_json).to eq(Zstd.decompress(compressed_using_dict, dict: dictionary)) + end + + it 'should work with simple string' do + compressed_using_dict = Zstd.compress("abc", dict: dictionary) + expect("abc").to eq(Zstd.decompress(compressed_using_dict, dict: dictionary)) + end + + it 'should work with blank input' do + compressed_using_dict = Zstd.compress("", dict: dictionary) + expect("").to eq(Zstd.decompress(compressed_using_dict, dict: dictionary)) + end + + it 'should work with blank dictionary' do + compressed_using_dict = Zstd.compress(user_json, dict: "") + expect(user_json).to eq(Zstd.decompress(compressed_using_dict, dict: "")) + expect(user_json).to eq(Zstd.decompress(compressed_using_dict)) + end + + it 'should support compression levels' do + compressed_using_dict = Zstd.compress(user_json, dict: dictionary) + compressed_using_dict_10 = Zstd.compress(user_json, dict: dictionary, level: 10) + expect(compressed_using_dict_10.length).to be < compressed_using_dict.length + expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10, dict: dictionary)) + end + + it 'should support compression levels with blank dictionary' do + compressed_using_dict_10 = Zstd.compress_using_dict(user_json, dict: dictionary, level: 10) + expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10, dict: "")) + expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10)) + end + end + describe 'compress_using_dict' do let(:user_json) do File.read("#{__dir__}/user_springmt.json") @@ -27,17 +72,29 @@ expect("abc").to eq(Zstd.decompress_using_dict(compressed_using_dict, dictionary)) end - it 'should work with blank' do + it 'should work with blank input' do compressed_using_dict = Zstd.compress_using_dict("", dictionary) expect("").to eq(Zstd.decompress_using_dict(compressed_using_dict, dictionary)) end + it 'should work with blank dictionary' do + compressed_using_dict = Zstd.compress_using_dict(user_json, "") + expect(user_json).to eq(Zstd.decompress_using_dict(compressed_using_dict, "")) + expect(user_json).to eq(Zstd.decompress(compressed_using_dict)) + end + it 'should support compression levels' do compressed_using_dict = Zstd.compress_using_dict(user_json, dictionary) compressed_using_dict_10 = Zstd.compress_using_dict(user_json, dictionary, 10) expect(compressed_using_dict_10.length).to be < compressed_using_dict.length + expect(user_json).to eq(Zstd.decompress_using_dict(compressed_using_dict_10, dictionary)) + end + + it 'should support compression levels with blank dictionary' do + compressed_using_dict_10 = Zstd.compress_using_dict(user_json, "", 10) + expect(user_json).to eq(Zstd.decompress_using_dict(compressed_using_dict_10, "")) + expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10)) end end end - diff --git a/spec/zstd-ruby_spec.rb b/spec/zstd-ruby_spec.rb index 36ed267..5569a7b 100644 --- a/spec/zstd-ruby_spec.rb +++ b/spec/zstd-ruby_spec.rb @@ -3,6 +3,10 @@ require 'securerandom' RSpec.describe Zstd do + let(:user_json) do + File.read("#{__dir__}/user_springmt.json") + end + it "has a version number" do expect(Zstd::VERSION).not_to be nil end @@ -15,15 +19,25 @@ describe 'compress' do it 'should work' do - compressed = Zstd.compress('abc' * 10) + compressed = Zstd.compress(user_json) expect(compressed).to be_a(String) - expect(compressed).to_not eq('abc' * 10) + expect(compressed).to_not eq(user_json) end it 'should support compression levels' do - compressed = Zstd.compress('abc', 1) + compressed = Zstd.compress(user_json, 1) + expect(compressed).to be_a(String) + expect(compressed).to_not eq(user_json) + end + + it 'should support compression keyward args levels' do + compressed = Zstd.compress(user_json, level: 1) + compressed_with_arg = Zstd.compress(user_json, 1) + compressed_default = Zstd.compress(user_json) expect(compressed).to be_a(String) - expect(compressed).to_not eq('abc') + expect(compressed).to_not eq(user_json) + expect(compressed).to eq(compressed_with_arg) + expect(compressed_default.length).to be < compressed_with_arg.length end it 'should raise exception with unsupported object' do diff --git a/zstd-ruby.gemspec b/zstd-ruby.gemspec index a0d0b68..6c6e594 100644 --- a/zstd-ruby.gemspec +++ b/zstd-ruby.gemspec @@ -24,7 +24,7 @@ Gem::Specification.new do |spec| #end spec.files = `git ls-files -z`.split("\x0").reject do |f| - f.match(%r{^(test|spec|features|benchmarks|zstd|.github)/}) + f.match(%r{^(test|spec|features|benchmarks|zstd|.github|examples)/}) end spec.bindir = "exe" spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } From e4d97ab943d8ac298418fc5da1a25fa4f52d5278 Mon Sep 17 00:00:00 2001 From: SpringMT Date: Thu, 11 Apr 2024 15:31:45 +0900 Subject: [PATCH 2/6] docs: update README.md --- README.md | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 09af9ea..63467ad 100644 --- a/README.md +++ b/README.md @@ -34,20 +34,22 @@ Or install it yourself as: require 'zstd-ruby' ``` -### Simple Compression +### Compression + +#### Simple Compression ```ruby compressed_data = Zstd.compress(data) -compressed_data = Zstd.compress(data, complession_level) # default compression_level is 0 +compressed_data = Zstd.compress(data, level: complession_level) # default compression_level is 3 ``` -### Compression using Dictionary +#### Compression with Dictionary ```ruby # dictionary is supposed to have been created using `zstd --train` -compressed_using_dict = Zstd.compress_using_dict("", IO.read('dictionary_file')) +compressed_using_dict = Zstd.compress("", dict: IO.read('dictionary_file')) ``` -### Streaming Compression +#### Streaming Compression ```ruby stream = Zstd::StreamingCompress.new stream << "abc" << "def" @@ -66,7 +68,7 @@ res << stream.compress("def") res << stream.finish ``` -### Streaming Compression using Dictionary +#### Streaming Compression with Dictionary ```ruby stream = Zstd::StreamingCompress.new(dict: IO.read('dictionary_file')) stream << "abc" << "def" @@ -75,19 +77,30 @@ stream << "ghi" res << stream.finish ``` -### Simple Decompression +#### Streaming Compression with level and Dictionary +```ruby +stream = Zstd::StreamingCompress.new(level: 5, dict: IO.read('dictionary_file')) +stream << "abc" << "def" +res = stream.flush +stream << "ghi" +res << stream.finish +``` + +### Decompression + +#### Simple Decompression ```ruby data = Zstd.decompress(compressed_data) ``` -### Decomporession using Dictionary +#### Decompression with Dictionary ```ruby # dictionary is supposed to have been created using `zstd --train` -Zstd.decompress_using_dict(compressed_using_dict, IO.read('dictionary_file')) +Zstd.decompress(compressed_using_dict, dict: IO.read('dictionary_file')) ``` -### Streaming Decompression +#### Streaming Decompression ```ruby cstr = "" # Compressed data stream = Zstd::StreamingDecompress.new @@ -96,7 +109,7 @@ result << stream.decompress(cstr[0, 10]) result << stream.decompress(cstr[10..-1]) ``` -### Streaming Decompression using dictionary +#### Streaming Decompression with dictionary ```ruby cstr = "" # Compressed data stream = Zstd::StreamingDecompress.new(dict: IO.read('dictionary_file')) From f12556f9cad34450fb8e85c75bd0cc79f4428a7a Mon Sep 17 00:00:00 2001 From: SpringMT Date: Thu, 11 Apr 2024 16:05:28 +0900 Subject: [PATCH 3/6] fix: tests --- spec/zstd-ruby-using-dict_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/zstd-ruby-using-dict_spec.rb b/spec/zstd-ruby-using-dict_spec.rb index 0bff350..bfab595 100644 --- a/spec/zstd-ruby-using-dict_spec.rb +++ b/spec/zstd-ruby-using-dict_spec.rb @@ -46,7 +46,7 @@ end it 'should support compression levels with blank dictionary' do - compressed_using_dict_10 = Zstd.compress_using_dict(user_json, dict: dictionary, level: 10) + compressed_using_dict_10 = Zstd.compress(user_json, dict: "", level: 10) expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10, dict: "")) expect(user_json).to eq(Zstd.decompress(compressed_using_dict_10)) end From ba677ca81fb51855a9025dc79773fc529394cc19 Mon Sep 17 00:00:00 2001 From: Spring_MT Date: Thu, 11 Apr 2024 16:54:16 +0900 Subject: [PATCH 4/6] Apply suggestions from code review --- examples/sinatra/app.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/sinatra/app.rb b/examples/sinatra/app.rb index fa618c0..ba92b5f 100644 --- a/examples/sinatra/app.rb +++ b/examples/sinatra/app.rb @@ -4,4 +4,4 @@ get '/' do headers["Content-Encoding"] = "zstd" Zstd.compress('Hello world!') -end \ No newline at end of file +end From 318152c4547f1e22d62a829231ccaa8319b24b24 Mon Sep 17 00:00:00 2001 From: SpringMT Date: Thu, 11 Apr 2024 17:36:17 +0900 Subject: [PATCH 5/6] fix: reflesh outdata buf --- ext/zstdruby/streaming_compress.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ext/zstdruby/streaming_compress.c b/ext/zstdruby/streaming_compress.c index 85e3906..d6ca1cb 100644 --- a/ext/zstdruby/streaming_compress.c +++ b/ext/zstdruby/streaming_compress.c @@ -147,7 +147,6 @@ rb_streaming_compress_write(int argc, VALUE *argv, VALUE obj) struct streaming_compress_t* sc; TypedData_Get_Struct(obj, struct streaming_compress_t, &streaming_compress_type, sc); const char* output_data = RSTRING_PTR(sc->buf); - ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 }; while (argc-- > 0) { VALUE str = *argv++; @@ -157,6 +156,7 @@ rb_streaming_compress_write(int argc, VALUE *argv, VALUE obj) ZSTD_inBuffer input = { input_data, input_size, 0 }; while (input.pos < input.size) { + ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 }; size_t const ret = zstd_compress(sc->ctx, &output, &input, ZSTD_e_continue); if (ZSTD_isError(ret)) { rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret)); From 1060b32455320d33b0decf761d9139cd700b2c66 Mon Sep 17 00:00:00 2001 From: SpringMT Date: Thu, 11 Apr 2024 22:20:08 +0900 Subject: [PATCH 6/6] fix: benchmark format --- benchmarks/zstd_compress_memory.rb | 4 ++-- benchmarks/zstd_decompress_memory.rb | 2 +- benchmarks/zstd_streaming_compress_memory.rb | 3 ++- benchmarks/zstd_streaming_decompress_memory.rb | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/benchmarks/zstd_compress_memory.rb b/benchmarks/zstd_compress_memory.rb index c3beb31..d18e6e9 100644 --- a/benchmarks/zstd_compress_memory.rb +++ b/benchmarks/zstd_compress_memory.rb @@ -14,11 +14,11 @@ json_string = json_data.to_json i = 0 - +start_time = Time.now while true do Zstd.compress(json_string) if ((i % 1000) == 0 ) - puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" + puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" end i += 1 end diff --git a/benchmarks/zstd_decompress_memory.rb b/benchmarks/zstd_decompress_memory.rb index 8f9d67b..e305355 100644 --- a/benchmarks/zstd_decompress_memory.rb +++ b/benchmarks/zstd_decompress_memory.rb @@ -16,7 +16,7 @@ while true do Zstd.decompress IO.read("./results/#{sample_file_name}.zstd") if ((i % 1000) == 0 ) - puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" + puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" end i += 1 end diff --git a/benchmarks/zstd_streaming_compress_memory.rb b/benchmarks/zstd_streaming_compress_memory.rb index ce8d334..3ae99cf 100644 --- a/benchmarks/zstd_streaming_compress_memory.rb +++ b/benchmarks/zstd_streaming_compress_memory.rb @@ -13,6 +13,7 @@ json_string = IO.read("./samples/#{sample_file_name}") i = 0 +start_time = Time.now while true do stream = Zstd::StreamingCompress.new stream << json_string[0, 5] @@ -21,7 +22,7 @@ res << stream.finish if ((i % 1000) == 0 ) GC.start - puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" + puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" end i += 1 end diff --git a/benchmarks/zstd_streaming_decompress_memory.rb b/benchmarks/zstd_streaming_decompress_memory.rb index 0f62a7a..166bd98 100644 --- a/benchmarks/zstd_streaming_decompress_memory.rb +++ b/benchmarks/zstd_streaming_decompress_memory.rb @@ -12,6 +12,7 @@ cstr = IO.read("./results/#{sample_file_name}.zstd") i = 0 +start_time = Time.now while true do stream = Zstd::StreamingDecompress.new result = '' @@ -20,7 +21,7 @@ if ((i % 1000) == 0 ) GC.start - puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" + puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}" end i += 1 end