Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ end
JAVA_DIR = "java/src/json/ext"
JAVA_RAGEL_PATH = "#{JAVA_DIR}/ParserConfig.rl"
JAVA_PARSER_SRC = "#{JAVA_DIR}/ParserConfig.java"
JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"]
JAVA_SOURCES = FileList["#{JAVA_DIR}/*.java"].exclude("#{JAVA_DIR}/Vectorized*.java")
JAVA_VEC_SOURCES = FileList["#{JAVA_DIR}/Vectorized*.java"]
JAVA_CLASSES = []
JRUBY_PARSER_JAR = File.expand_path("lib/json/ext/parser.jar")
JRUBY_GENERATOR_JAR = File.expand_path("lib/json/ext/generator.jar")
Expand Down Expand Up @@ -142,8 +143,8 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'

JRUBY_JAR = File.join(CONFIG["libdir"], "jruby.jar")
if File.exist?(JRUBY_JAR)
classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator
JAVA_SOURCES.each do |src|
classpath = (Dir['java/lib/*.jar'] << 'java/src' << JRUBY_JAR) * path_separator
obj = src.sub(/\.java\Z/, '.class')
file obj => src do
if File.exist?(File.join(ENV['JAVA_HOME'], "lib", "modules"))
Expand All @@ -154,6 +155,20 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
end
JAVA_CLASSES << obj
end

JAVA_VEC_SOURCES.each do |src|
obj = src.sub(/\.java\Z/, '.class')
file obj => src do
sh 'javac', '--add-modules', 'jdk.incubator.vector', '-classpath', classpath, '--release', '16', src do |success, status|
if success
puts "*** 'jdk.incubator.vector' support enabled ***"
else
puts "*** 'jdk.incubator.vector' support disabled ***"
end
end
end
JAVA_CLASSES << obj
end
else
warn "WARNING: Cannot find jruby in path => Cannot build jruby extension!"
end
Expand Down Expand Up @@ -199,11 +214,13 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby'
generator_classes = FileList[
"json/ext/*ByteList*.class",
"json/ext/OptionsReader*.class",
"json/ext/EscapeScanner*.class",
"json/ext/Generator*.class",
"json/ext/RuntimeInfo*.class",
"json/ext/*StringEncoder*.class",
"json/ext/Utils*.class"
]
puts "Creating generator jar with classes: #{generator_classes.join(', ')}"
sh 'jar', 'cf', File.basename(JRUBY_GENERATOR_JAR), *generator_classes
mv File.basename(JRUBY_GENERATOR_JAR), File.dirname(JRUBY_GENERATOR_JAR)
end
Expand Down
2 changes: 0 additions & 2 deletions java/src/json/ext/AbstractByteListDirectOutputStream.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ abstract class AbstractByteListDirectOutputStream extends OutputStream {
static {
String useSegmentedOutputStream = System.getProperty(PROP_SEGMENTED_BUFFER, PROP_SEGMENTED_BUFFER_DEFAULT);
USE_SEGMENTED_BUFFER = Boolean.parseBoolean(useSegmentedOutputStream);
// XXX Is there a logger we can use here?
// System.out.println("Using segmented output stream: " + USE_SEGMENTED_BUFFER);
}

public static AbstractByteListDirectOutputStream create(int estimatedSize) {
Expand Down
4 changes: 2 additions & 2 deletions java/src/json/ext/SWARBasicStringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void encode(ByteList src) throws IOException {
}
}

private boolean skipChunk(long x) {
boolean skipChunk(long x) {
long is_ascii = 0x8080808080808080L & ~x;
long xor2 = x ^ 0x0202020202020202L;
long lt32_or_eq34 = xor2 - 0x2121212121212121L;
Expand All @@ -80,7 +80,7 @@ private boolean skipChunk(long x) {
return ((lt32_or_eq34 | eq92) & is_ascii) == 0;
}

private boolean skipChunk(int x) {
boolean skipChunk(int x) {
int is_ascii = 0x80808080 & ~x;
int xor2 = x ^ 0x02020202;
int lt32_or_eq34 = xor2 - 0x21212121;
Expand Down
37 changes: 34 additions & 3 deletions java/src/json/ext/StringEncoder.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import java.io.IOException;
import java.io.OutputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.nio.charset.StandardCharsets;

import org.jcodings.Encoding;
Expand Down Expand Up @@ -114,15 +116,37 @@ class StringEncoder extends ByteListTranscoder {

protected final byte[] escapeTable;

private static final String VECTORIZED_STRING_ENCODER_CLASS = "json.ext.VectorizedStringEncoder";
private static final String USE_VECTORIZED_BASIC_ENCODER_PROP = "jruby.json.useVectorizedBasicEncoder";
private static final String USE_VECTORIZED_BASIC_ENCODER_DEFAULT = "false";
private static final boolean USE_VECTORIZED_BASIC_ENCODER;
private static final StringEncoder VECTORIZED_SCANNER;

private static final String USE_SWAR_BASIC_ENCODER_PROP = "jruby.json.useSWARBasicEncoder";
private static final String USE_SWAR_BASIC_ENCODER_DEFAULT = "true";
private static final boolean USE_BASIC_SWAR_ENCODER;

static {
String enableVectorizedScanner = System.getProperty(USE_VECTORIZED_BASIC_ENCODER_PROP, USE_VECTORIZED_BASIC_ENCODER_DEFAULT);
if ("true".equalsIgnoreCase(enableVectorizedScanner) || "1".equalsIgnoreCase(enableVectorizedScanner)) {
StringEncoder scanner;
try {
Class<?> vectorizedStringEncoderClass = StringEncoder.class.getClassLoader().loadClass(VECTORIZED_STRING_ENCODER_CLASS);
Constructor<?> vectorizedStringEncoderConstructor = vectorizedStringEncoderClass.getDeclaredConstructor();
scanner = (StringEncoder) vectorizedStringEncoderConstructor.newInstance();
} catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | IllegalAccessException | InvocationTargetException e) {
// Fallback to the StringEncoder if we cannot load the VectorizedStringEncoder.
scanner = null;
}
VECTORIZED_SCANNER = scanner;
USE_VECTORIZED_BASIC_ENCODER = scanner != null;
} else {
VECTORIZED_SCANNER = null;
USE_VECTORIZED_BASIC_ENCODER = false;
}

USE_BASIC_SWAR_ENCODER = Boolean.parseBoolean(
System.getProperty(USE_SWAR_BASIC_ENCODER_PROP, USE_SWAR_BASIC_ENCODER_DEFAULT));
// XXX Is there a logger we can use here?
// System.out.println("Using SWAR basic encoder: " + USE_BASIC_SWAR_ENCODER);
}

OutputStream out;
Expand All @@ -149,8 +173,15 @@ class StringEncoder extends ByteListTranscoder {
this.escapeTable = escapeTable;
}

@Override
public StringEncoder clone() {
return new StringEncoder(escapeTable);
}

static StringEncoder createBasicEncoder() {
if (USE_BASIC_SWAR_ENCODER) {
if (USE_VECTORIZED_BASIC_ENCODER) {
return (StringEncoder) VECTORIZED_SCANNER.clone();
} else if (USE_BASIC_SWAR_ENCODER) {
return new SWARBasicStringEncoder();
} else {
return new StringEncoder(false);
Expand Down
104 changes: 104 additions & 0 deletions java/src/json/ext/VectorizedStringEncoder.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package json.ext;

import java.io.IOException;
import java.nio.ByteBuffer;

import org.jruby.util.ByteList;

import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorMask;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;

class VectorizedStringEncoder extends SWARBasicStringEncoder {
private static final VectorSpecies<Byte> SP = ByteVector.SPECIES_PREFERRED;
private static final ByteVector ZERO = ByteVector.zero(SP);
private static final ByteVector TWO = ByteVector.broadcast(SP, 2);
private static final ByteVector THIRTY_THREE = ByteVector.broadcast(SP, 33);
private static final ByteVector BACKSLASH = ByteVector.broadcast(SP, '\\');

@Override
public StringEncoder clone() {
return new VectorizedStringEncoder();
}

@Override
void encode(ByteList src) throws IOException {
byte[] ptrBytes = src.unsafeBytes();
int ptr = src.begin();
int len = src.realSize();
int beg = 0;
int pos = ptr;

while ((pos + SP.length() <= len)) {
ByteVector chunk = ByteVector.fromArray(SP, ptrBytes, ptr + pos);
// bytes are signed in java, so we need to remove negative values
VectorMask<Byte> negative = chunk.lt(ZERO);
VectorMask<Byte> tooLowOrDblQuote = chunk.lanewise(VectorOperators.XOR, TWO).lt(THIRTY_THREE).andNot(negative);
VectorMask<Byte> needsEscape = chunk.eq(BACKSLASH).or(tooLowOrDblQuote);
if (needsEscape.anyTrue()) {
int chunkStart = pos;
long mask = needsEscape.toLong();

while(mask > 0) {
// nextMatch inlined
int index = Long.numberOfTrailingZeros(mask);
mask &= (mask - 1);
pos = chunkStart + index;
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);

beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, aux, HEX);
}

// Skip over any remaining characters in the current chunk
pos = chunkStart + SP.length();
continue;
}

pos += SP.length();
}

ByteBuffer bb = ByteBuffer.wrap(ptrBytes, ptr, len);
if (pos + 8 <= len) {
long x = bb.getLong(ptr + pos);
if (skipChunk(x)) {
pos += 8;
} else {
int chunkEnd = ptr + pos + 8;
while (pos < chunkEnd) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, aux, HEX);
} else {
pos++;
}
}
}
}

if (pos + 4 <= len) {
int x = bb.getInt(ptr + pos);
if (skipChunk(x)) {
pos += 4;
}
}

while (pos < len) {
int ch = Byte.toUnsignedInt(ptrBytes[ptr + pos]);
int ch_len = ESCAPE_TABLE[ch];
if (ch_len > 0) {
beg = pos = flushPos(pos, beg, ptrBytes, ptr, 1);
escapeAscii(ch, aux, HEX);
} else {
pos++;
}
}

if (beg < len) {
append(ptrBytes, ptr + beg, len - beg);
}
}
}
4 changes: 4 additions & 0 deletions test/json/json_encoding_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ def test_generate_shared_string
assert_equal '"234567890"', JSON.dump(s[2..-1])
s = '01234567890123456789"a"b"c"d"e"f"g"h'
assert_equal '"\"a\"b\"c\"d\"e\"f\"g\""', JSON.dump(s[20, 15])
s = "0123456789001234567890012345678900123456789001234567890"
assert_equal '"23456789001234567890012345678900123456789001234567890"', JSON.dump(s[2..-1])
s = "0123456789001234567890012345678900123456789001234567890"
assert_equal '"567890012345678900123456789001234567890012345678"', JSON.dump(s[5..-3])
end

def test_unicode
Expand Down