Skip to content

Commit

Permalink
Merge pull request #77 from SpringMT/feature/dictionary-standardized
Browse files Browse the repository at this point in the history
Feature/dictionary standardized
  • Loading branch information
SpringMT authored Apr 11, 2024
2 parents cf9b448 + 1060b32 commit 4ad9fc7
Show file tree
Hide file tree
Showing 21 changed files with 294 additions and 112 deletions.
35 changes: 24 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,22 @@ Or install it yourself as:
require 'zstd-ruby'
```

### Simple Compression
### Compression

#### Simple Compression

```ruby
compressed_data = Zstd.compress(data)
compressed_data = Zstd.compress(data, complession_level) # default compression_level is 0
compressed_data = Zstd.compress(data, level: complession_level) # default compression_level is 3
```

### Compression using Dictionary
#### Compression with Dictionary
```ruby
# dictionary is supposed to have been created using `zstd --train`
compressed_using_dict = Zstd.compress_using_dict("", IO.read('dictionary_file'))
compressed_using_dict = Zstd.compress("", dict: IO.read('dictionary_file'))
```

### Streaming Compression
#### Streaming Compression
```ruby
stream = Zstd::StreamingCompress.new
stream << "abc" << "def"
Expand All @@ -66,7 +68,7 @@ res << stream.compress("def")
res << stream.finish
```

### Streaming Compression using Dictionary
#### Streaming Compression with Dictionary
```ruby
stream = Zstd::StreamingCompress.new(dict: IO.read('dictionary_file'))
stream << "abc" << "def"
Expand All @@ -75,19 +77,30 @@ stream << "ghi"
res << stream.finish
```

### Simple Decompression
#### Streaming Compression with level and Dictionary
```ruby
stream = Zstd::StreamingCompress.new(level: 5, dict: IO.read('dictionary_file'))
stream << "abc" << "def"
res = stream.flush
stream << "ghi"
res << stream.finish
```

### Decompression

#### Simple Decompression

```ruby
data = Zstd.decompress(compressed_data)
```

### Decomporession using Dictionary
#### Decompression with Dictionary
```ruby
# dictionary is supposed to have been created using `zstd --train`
Zstd.decompress_using_dict(compressed_using_dict, IO.read('dictionary_file'))
Zstd.decompress(compressed_using_dict, dict: IO.read('dictionary_file'))
```

### Streaming Decompression
#### Streaming Decompression
```ruby
cstr = "" # Compressed data
stream = Zstd::StreamingDecompress.new
Expand All @@ -96,7 +109,7 @@ result << stream.decompress(cstr[0, 10])
result << stream.decompress(cstr[10..-1])
```

### Streaming Decompression using dictionary
#### Streaming Decompression with dictionary
```ruby
cstr = "" # Compressed data
stream = Zstd::StreamingDecompress.new(dict: IO.read('dictionary_file'))
Expand Down
Binary file modified benchmarks/results/city.json.gzip
Binary file not shown.
4 changes: 2 additions & 2 deletions benchmarks/zstd_compress_memory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
json_string = json_data.to_json

i = 0

start_time = Time.now
while true do
Zstd.compress(json_string)
if ((i % 1000) == 0 )
puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
end
i += 1
end
2 changes: 1 addition & 1 deletion benchmarks/zstd_decompress_memory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
while true do
Zstd.decompress IO.read("./results/#{sample_file_name}.zstd")
if ((i % 1000) == 0 )
puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
end
i += 1
end
3 changes: 2 additions & 1 deletion benchmarks/zstd_streaming_compress_memory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
json_string = IO.read("./samples/#{sample_file_name}")

i = 0
start_time = Time.now
while true do
stream = Zstd::StreamingCompress.new
stream << json_string[0, 5]
Expand All @@ -21,7 +22,7 @@
res << stream.finish
if ((i % 1000) == 0 )
GC.start
puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
end
i += 1
end
3 changes: 2 additions & 1 deletion benchmarks/zstd_streaming_decompress_memory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

cstr = IO.read("./results/#{sample_file_name}.zstd")
i = 0
start_time = Time.now
while true do
stream = Zstd::StreamingDecompress.new
result = ''
Expand All @@ -20,7 +21,7 @@

if ((i % 1000) == 0 )
GC.start
puts "count:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
puts "sec:#{Time.now - start_time}\tcount:#{i}\truby_memory:#{ObjectSpace.memsize_of_all/1000}\tobject_count:#{ObjectSpace.count_objects}\trss:#{`ps -o rss= -p #{Process.pid}`.to_i}"
end
i += 1
end
5 changes: 5 additions & 0 deletions examples/sinatra/Gemfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
source "https://rubygems.org"

gem "sinatra"
gem "rackup"
gem "zstd-ruby", path: "../../"
41 changes: 41 additions & 0 deletions examples/sinatra/Gemfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
PATH
remote: ../..
specs:
zstd-ruby (1.5.6.1)

GEM
remote: https://rubygems.org/
specs:
base64 (0.2.0)
mustermann (3.0.0)
ruby2_keywords (~> 0.0.1)
rack (3.0.10)
rack-protection (4.0.0)
base64 (>= 0.1.0)
rack (>= 3.0.0, < 4)
rack-session (2.0.0)
rack (>= 3.0.0)
rackup (2.1.0)
rack (>= 3)
webrick (~> 1.8)
ruby2_keywords (0.0.5)
sinatra (4.0.0)
mustermann (~> 3.0)
rack (>= 3.0.0, < 4)
rack-protection (= 4.0.0)
rack-session (>= 2.0.0, < 3)
tilt (~> 2.0)
tilt (2.3.0)
webrick (1.8.1)

PLATFORMS
arm64-darwin-21
ruby

DEPENDENCIES
rackup
sinatra
zstd-ruby!

BUNDLED WITH
2.5.7
7 changes: 7 additions & 0 deletions examples/sinatra/app.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
require 'sinatra'
require 'zstd-ruby'

get '/' do
headers["Content-Encoding"] = "zstd"
Zstd.compress('Hello world!')
end
53 changes: 52 additions & 1 deletion ext/zstdruby/common.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#ifndef ZSTD_RUBY_H
#define ZSTD_RUBY_H 1

#include "ruby.h"
#include <ruby.h>
#include "./libzstd/zstd.h"

static int convert_compression_level(VALUE compression_level_value)
Expand All @@ -12,4 +12,55 @@ static int convert_compression_level(VALUE compression_level_value)
return NUM2INT(compression_level_value);
}

static size_t zstd_compress(ZSTD_CCtx* const ctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input, ZSTD_EndDirective endOp)
{
return ZSTD_compressStream2(ctx, output, input, endOp);
}

static void set_compress_params(ZSTD_CCtx* const ctx, VALUE level_from_args, VALUE kwargs)
{
ID kwargs_keys[2];
kwargs_keys[0] = rb_intern("level");
kwargs_keys[1] = rb_intern("dict");
VALUE kwargs_values[2];
rb_get_kwargs(kwargs, kwargs_keys, 0, 2, kwargs_values);

int compression_level = ZSTD_CLEVEL_DEFAULT;
if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) {
compression_level = convert_compression_level(kwargs_values[0]);
} else if (!NIL_P(level_from_args)) {
rb_warn("`level` in args is deprecated; use keyword args `level:` instead.");
compression_level = convert_compression_level(level_from_args);
}
ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, compression_level);

if (kwargs_values[1] != Qundef && kwargs_values[1] != Qnil) {
char* dict_buffer = RSTRING_PTR(kwargs_values[1]);
size_t dict_size = RSTRING_LEN(kwargs_values[1]);
size_t load_dict_ret = ZSTD_CCtx_loadDictionary(ctx, dict_buffer, dict_size);
if (ZSTD_isError(load_dict_ret)) {
ZSTD_freeCCtx(ctx);
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed");
}
}
}

static void set_decompress_params(ZSTD_DCtx* const dctx, VALUE kwargs)
{
ID kwargs_keys[1];
kwargs_keys[0] = rb_intern("dict");
VALUE kwargs_values[1];
rb_get_kwargs(kwargs, kwargs_keys, 0, 1, kwargs_values);

if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) {
char* dict_buffer = RSTRING_PTR(kwargs_values[0]);
size_t dict_size = RSTRING_LEN(kwargs_values[0]);
size_t load_dict_ret = ZSTD_DCtx_loadDictionary(dctx, dict_buffer, dict_size);
if (ZSTD_isError(load_dict_ret)) {
ZSTD_freeDCtx(dctx);
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed");
}
}
}

#endif /* ZSTD_RUBY_H */
3 changes: 2 additions & 1 deletion ext/zstdruby/main.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include <common.h>
#include "common.h"

VALUE rb_mZstd;
void zstd_ruby_init(void);
void zstd_ruby_skippable_frame_init(void);
Expand Down
2 changes: 1 addition & 1 deletion ext/zstdruby/skippable_frame.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include <common.h>
#include "common.h"

extern VALUE rb_mZstd;

Expand Down
31 changes: 9 additions & 22 deletions ext/zstdruby/streaming_compress.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include <common.h>
#include <streaming_compress.h>
#include "common.h"

struct streaming_compress_t {
ZSTD_CCtx* ctx;
Expand Down Expand Up @@ -71,14 +70,9 @@ rb_streaming_compress_allocate(VALUE klass)
static VALUE
rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj)
{
VALUE kwargs;
VALUE compression_level_value;
rb_scan_args(argc, argv, "01", &compression_level_value);
int compression_level = convert_compression_level(compression_level_value);

ID kwargs_keys[1];
kwargs_keys[0] = rb_intern("dict");
VALUE kwargs_values[1];
rb_get_kwargs(kwargs, kwargs_keys, 0, 1, kwargs_values);
rb_scan_args(argc, argv, "01:", &compression_level_value, &kwargs);

struct streaming_compress_t* sc;
TypedData_Get_Struct(obj, struct streaming_compress_t, &streaming_compress_type, sc);
Expand All @@ -88,15 +82,8 @@ rb_streaming_compress_initialize(int argc, VALUE *argv, VALUE obj)
if (ctx == NULL) {
rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error");
}
if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) {
char* dict_buffer = RSTRING_PTR(kwargs_values[0]);
size_t dict_size = RSTRING_LEN(kwargs_values[0]);
size_t load_dict_ret = ZSTD_CCtx_loadDictionary(ctx, dict_buffer, dict_size);
if (ZSTD_isError(load_dict_ret)) {
rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed");
}
}
ZSTD_CCtx_setParameter(ctx, ZSTD_c_compressionLevel, compression_level);
set_compress_params(ctx, compression_level_value, kwargs);

sc->ctx = ctx;
sc->buf = rb_str_new(NULL, buffOutSize);
sc->buf_size = buffOutSize;
Expand All @@ -119,7 +106,7 @@ no_compress(struct streaming_compress_t* sc, ZSTD_EndDirective endOp)
do {
ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 };

size_t const ret = ZSTD_compressStream2(sc->ctx, &output, &input, endOp);
size_t const ret = zstd_compress(sc->ctx, &output, &input, endOp);
if (ZSTD_isError(ret)) {
rb_raise(rb_eRuntimeError, "flush error error code: %s", ZSTD_getErrorName(ret));
}
Expand All @@ -143,7 +130,7 @@ rb_streaming_compress_compress(VALUE obj, VALUE src)
VALUE result = rb_str_new(0, 0);
while (input.pos < input.size) {
ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 };
size_t const ret = ZSTD_compressStream2(sc->ctx, &output, &input, ZSTD_e_continue);
size_t const ret = zstd_compress(sc->ctx, &output, &input, ZSTD_e_continue);
if (ZSTD_isError(ret)) {
rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret));
}
Expand All @@ -160,7 +147,6 @@ rb_streaming_compress_write(int argc, VALUE *argv, VALUE obj)
struct streaming_compress_t* sc;
TypedData_Get_Struct(obj, struct streaming_compress_t, &streaming_compress_type, sc);
const char* output_data = RSTRING_PTR(sc->buf);
ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 };

while (argc-- > 0) {
VALUE str = *argv++;
Expand All @@ -170,7 +156,8 @@ rb_streaming_compress_write(int argc, VALUE *argv, VALUE obj)
ZSTD_inBuffer input = { input_data, input_size, 0 };

while (input.pos < input.size) {
size_t const ret = ZSTD_compressStream2(sc->ctx, &output, &input, ZSTD_e_continue);
ZSTD_outBuffer output = { (void*)output_data, sc->buf_size, 0 };
size_t const ret = zstd_compress(sc->ctx, &output, &input, ZSTD_e_continue);
if (ZSTD_isError(ret)) {
rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret));
}
Expand Down
5 changes: 0 additions & 5 deletions ext/zstdruby/streaming_compress.h

This file was deleted.

Loading

0 comments on commit 4ad9fc7

Please sign in to comment.