From a3d913d93841eb83554f4896aed67b4183880f43 Mon Sep 17 00:00:00 2001 From: Doug Roper Date: Wed, 9 Mar 2022 14:50:29 -0500 Subject: [PATCH] Add parseNumberCounter. numbersoup test fails --- .../main/scala/bench/JsoniterScalaBench.scala | 22 ++++--- .../scala/bench/JsoniterScalaBenchTests.scala | 12 ++++ project/plugins.sbt | 2 +- .../WeePickleJsonValueCodecs.scala | 66 ++++++++++++++++++- 4 files changed, 90 insertions(+), 12 deletions(-) create mode 100644 bench/src/test/scala/bench/JsoniterScalaBenchTests.scala diff --git a/bench/src/main/scala/bench/JsoniterScalaBench.scala b/bench/src/main/scala/bench/JsoniterScalaBench.scala index d81bd690..f4c3cde2 100644 --- a/bench/src/main/scala/bench/JsoniterScalaBench.scala +++ b/bench/src/main/scala/bench/JsoniterScalaBench.scala @@ -1,7 +1,7 @@ package bench +import com.rallyhealth.weejson.v1.BufferedValue import com.rallyhealth.weejson.v1.wee_jsoniter_scala.FromJsoniterScala -import com.rallyhealth.weepickle.v1.WeePickle.ToScala import org.openjdk.jmh.annotations._ import java.util.concurrent.TimeUnit @@ -10,17 +10,17 @@ import java.util.concurrent.TimeUnit * Quick and dirty test to see how badly we're butchering performance of floats. * * ==Quick Run== - * bench / Jmh / run .*JsoniterScalabench + * bench / Jmh / run .*JsoniterScalaBench * * ==Profile with Flight Recorder== - * bench / Jmh / run -prof jfr .*JsoniterScalabench + * bench / Jmh / run -prof jfr .*JsoniterScalaBench * * ==Jmh Visualizer Report== - * bench / Jmh / run -prof gc -rf json -rff JsoniterScalabench-results.json .*JsoniterScalabench + * bench / Jmh / run -prof gc -rf json -rff JsoniterScalaBench-results.json .*JsoniterScalaBench * * @see https://github.com/ktoso/sbt-jmh */ -@Warmup(iterations = 15, time = 1, timeUnit = TimeUnit.SECONDS) +@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS) @Measurement(iterations = 15, time = 1, timeUnit = TimeUnit.SECONDS) @State(Scope.Benchmark) @BenchmarkMode(Array(Mode.Throughput)) @@ -31,11 +31,15 @@ import java.util.concurrent.TimeUnit value = 1 ) class JsoniterScalaBench { - private val input = "348249875e105".getBytes() + + /** + * Values that end with a number throw an expensive exception internally when reaching EOF. + * The only time this would happen in the wild would be when parsing a JSON text of a single number. + * To make this more realistic, we're intentionally adding a whitespace suffix here. + */ + private val piBytes = "-3.14 ".getBytes() @Benchmark - def parseDouble = { - FromJsoniterScala(input).transform(ToScala[Double]) - } + def pi = FromJsoniterScala(piBytes).transform(BufferedValue.Builder) } diff --git a/bench/src/test/scala/bench/JsoniterScalaBenchTests.scala b/bench/src/test/scala/bench/JsoniterScalaBenchTests.scala new file mode 100644 index 00000000..5c5db630 --- /dev/null +++ b/bench/src/test/scala/bench/JsoniterScalaBenchTests.scala @@ -0,0 +1,12 @@ +package bench + +import com.rallyhealth.weejson.v1.BufferedValue._ +import utest._ + +object JsoniterScalaBenchTests extends TestSuite { + + val tests = Tests { + val bench = new JsoniterScalaBench() + test("pi")(bench.pi ==> Num("-3.14", 2, -1)) + } +} diff --git a/project/plugins.sbt b/project/plugins.sbt index 7504d02e..5cc43401 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,5 +1,5 @@ addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.9.2") addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1") -addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.3.7") +addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.3") addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.9.7") addSbtPlugin("com.github.sbt" % "sbt-pgp" % "2.1.2") diff --git a/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala b/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala index 4e7259dd..b02a6a8d 100644 --- a/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala +++ b/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala @@ -74,7 +74,7 @@ object WeePickleJsonValueCodecs { if (in.readBoolean()) v.visitTrue() else v.visitFalse() } else if ((b >= '0' && b <= '9') || b == '-') { in.rollbackToken() - parseNumber(in, v) + parseNumberCounter(in, v) } else if (b == '[') { val depthM1 = depth - 1 if (depthM1 < 0) in.decodeError("depth limit exceeded") @@ -121,7 +121,69 @@ object WeePickleJsonValueCodecs { } } - private def parseNumber[J]( + private def parseNumberCounter[J]( + in: JsonReader, + v: Visitor[_, J] + ): J = { + in.setMark() + var b = in.nextByte() + var digits, index = 0 + var decIndex, expIndex = -1 + if (b == '-') { + b = in.nextByte() + index += 1 + } + try { + digits -= index + while (b >= '0' && b <= '9') { + b = in.nextByte() + index += 1 + } + digits += index + if (b == '.') { + decIndex = index + b = in.nextByte() + index += 1 + } + digits -= index + while (b >= '0' && b <= '9') { + b = in.nextByte() + index += 1 + } + digits += index + if ((b | 0x20) == 'e') { + expIndex = index + b = in.nextByte() + index += 1 + if (b == '-' || b == '+') { + b = in.nextByte() + index += 1 + } + while (b >= '0' && b <= '9') { + b = in.nextByte() + index += 1 + } + } + } catch { + case _: JsonReaderException => + index += 1 // for length calcs, pretend that nextByte() didn't hit EOF + } finally in.rollbackToMark() + if ((decIndex & expIndex) == -1) { + if (digits < 19) v.visitInt64(in.readLong()) + else { + val x = in.readBigInt(null) + if (x.bitLength < 64) v.visitInt64(x.longValue) + else v.visitFloat64StringParts(x.toString, -1, -1) + } + } else { + val cs = new String(in.readRawValAsBytes(), StandardCharsets.US_ASCII) + require(cs.length == index, "invalid number") + v.visitFloat64StringParts(cs, decIndex, expIndex) + } + } + + + private def parseNumberRegex[J]( in: JsonReader, v: Visitor[_, J] ): J = {