Skip to content

Commit

Permalink
Updates the point of return Java String to return the one encoded wit…
Browse files Browse the repository at this point in the history
…h data input encoding, to do not change encoding
  • Loading branch information
andsel committed Jan 31, 2025
1 parent 0f76bd0 commit a5d6bef
Showing 1 changed file with 18 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,17 @@

package org.logstash.common;

import org.jruby.Ruby;
import org.jruby.RubyArray;
import org.jruby.RubyClass;
import org.jruby.RubyObject;
import org.jruby.RubyString;
import org.jruby.*;
import org.jruby.anno.JRubyClass;
import org.jruby.anno.JRubyMethod;
import org.jruby.runtime.ThreadContext;
import org.jruby.runtime.builtin.IRubyObject;
import org.jruby.util.ByteList;
import org.logstash.RubyUtil;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

@JRubyClass(name = "BufferedTokenizer")
public class BufferedTokenizerExt extends RubyObject {

Expand All @@ -46,6 +46,7 @@ public class BufferedTokenizerExt extends RubyObject {
private boolean hasSizeLimit;
private int inputSize;
private boolean bufferFullErrorNotified = false;
private String encodingName;

public BufferedTokenizerExt(final Ruby runtime, final RubyClass metaClass) {
super(runtime, metaClass);
Expand Down Expand Up @@ -82,6 +83,8 @@ public IRubyObject init(final ThreadContext context, IRubyObject[] args) {
@JRubyMethod
@SuppressWarnings("rawtypes")
public RubyArray extract(final ThreadContext context, IRubyObject data) {
RubyEncoding encoding = (RubyEncoding) data.convertToString().encoding(context);
encodingName = encoding.getEncoding().getCharsetName();
final RubyArray entities = data.convertToString().split(delimiter, -1);
if (!bufferFullErrorNotified) {
input.clear();
Expand Down Expand Up @@ -134,7 +137,10 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
// if there is a pending token part, merge it with the first token segment present
// in the accumulator, and clean the pending token part.
headToken.append(input.shift(context)); // append buffer to first element and
input.unshift(RubyUtil.toRubyObject(headToken.toString())); // reinsert it into the array
// create new RubyString with the data specified encoding
RubyString encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(headToken.toString().getBytes(Charset.forName(encodingName))));
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
input.unshift(encodedHeadToken); // reinsert it into the array
headToken = new StringBuilder();
}
headToken.append(input.pop(context)); // put the leftovers in headToken for later
Expand All @@ -154,7 +160,12 @@ public IRubyObject flush(final ThreadContext context) {
final IRubyObject buffer = RubyUtil.toRubyObject(headToken.toString());
headToken = new StringBuilder();
inputSize = 0;
return buffer;

// create new RubyString with the last data specified encoding
RubyString encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(buffer.toString().getBytes(Charset.forName(encodingName))));
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));

return encodedHeadToken;
}

@JRubyMethod(name = "empty?")
Expand Down

0 comments on commit a5d6bef

Please sign in to comment.