1717def implementations ( ruby_obj )
1818 state = JSON ::State . new ( JSON . dump_default_options )
1919 {
20- json_state : [ "json (reuse)" , proc { state . generate ( ruby_obj ) } ] ,
2120 json : [ "json" , proc { JSON . generate ( ruby_obj ) } ] ,
2221 oj : [ "oj" , proc { Oj . dump ( ruby_obj ) } ] ,
2322 }
@@ -58,27 +57,24 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
5857# NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
5958
6059# On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the
61- # generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster
60+ # generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10-20 % faster
6261# than `Oj.dump`.
6362benchmark_encoding "small mixed" , [ 1 , "string" , { a : 1 , b : 2 } , [ 3 , 4 , 5 ] ]
6463benchmark_encoding "small nested array" , [ [ 1 , 2 , 3 , 4 , 5 ] ] *10
65-
66- # On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why
67- # Hash serialization doesn't perform as well as other types.
6864benchmark_encoding "small hash" , { "username" => "jhawthorn" , "id" => 123 , "event" => "wrote json serializer" }
6965
70- # On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10 % slower when dealing
71- # with mostly multi-byte characters. This is a tradeoff .
72- benchmark_encoding "mixed utf8" , ( [ ( "a" * 5000 ) + "€" + ( "a" * 5000 ) ] * 500 ) , except : %i( json_state )
73- benchmark_encoding "mostly utf8" , ( [ ( "€" * 3333 ) ] * 500 ) , except : %i( json_state )
66+ # On string encoding we're ~20% faster when dealing with mostly ASCII, but ~50 % slower when dealing
67+ # with mostly multi-byte characters. There's likely some gains left to be had in multi-byte handling .
68+ benchmark_encoding "mixed utf8" , ( [ ( "a" * 5000 ) + "€" + ( "a" * 5000 ) ] * 500 )
69+ benchmark_encoding "mostly utf8" , ( [ ( "€" * 3333 ) ] * 500 )
7470
7571# On these benchmarks we perform well, we're on par or better.
7672benchmark_encoding "integers" , ( 1_000_000 ..1_001_000 ) . to_a , except : %i( json_state )
77- benchmark_encoding "activitypub.json" , JSON . load_file ( "#{ __dir__ } /data/activitypub.json" ) , except : %i( json_state )
78- benchmark_encoding "citm_catalog.json" , JSON . load_file ( "#{ __dir__ } /data/citm_catalog.json" ) , except : %i( json_state )
73+ benchmark_encoding "activitypub.json" , JSON . load_file ( "#{ __dir__ } /data/activitypub.json" )
74+ benchmark_encoding "citm_catalog.json" , JSON . load_file ( "#{ __dir__ } /data/citm_catalog.json" )
7975
80- # On twitter.json we're still about 10 % slower, this is worth investigating.
81- benchmark_encoding "twitter.json" , JSON . load_file ( "#{ __dir__ } /data/twitter.json" ) , except : %i( json_state )
76+ # On twitter.json we're still about 6 % slower, this is worth investigating.
77+ benchmark_encoding "twitter.json" , JSON . load_file ( "#{ __dir__ } /data/twitter.json" )
8278
8379# This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation
8480# which uses a relatively old version of dtoa.c from David M. Gay.
@@ -89,8 +85,8 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [
8985# but all these are implemented in C++11 or newer, making it hard if not impossible to include them.
9086# Short of a pure C99 implementation of these newer algorithms, there isn't much that can be done to match
9187# Oj speed without losing precision.
92- benchmark_encoding "canada.json" , JSON . load_file ( "#{ __dir__ } /data/canada.json" ) , check_expected : false , except : %i( json_state )
88+ benchmark_encoding "canada.json" , JSON . load_file ( "#{ __dir__ } /data/canada.json" ) , check_expected : false
9389
9490# We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be
9591# opportunities here.
96- benchmark_encoding "many #to_json calls" , [ { object : Object . new , int : 12 , float : 54.3 , class : Float , time : Time . now , date : Date . today } ] * 20 , except : %i( json_state )
92+ benchmark_encoding "many #to_json calls" , [ { object : Object . new , int : 12 , float : 54.3 , class : Float , time : Time . now , date : Date . today } ] * 20
0 commit comments