From b649e469ecc45f5f645fb589b1e2bcc8d5ed33bd Mon Sep 17 00:00:00 2001 From: Doug Roper Date: Fri, 25 Mar 2022 17:41:48 -0400 Subject: [PATCH] Remove regex. Tests pass --- .../main/scala/bench/JsoniterScalaBench.scala | 8 +- .../scala/bench/JsoniterScalaBenchTests.scala | 3 +- project/build.properties | 2 +- .../WeePickleJsonValueCodecs.scala | 112 +++++------------- .../rallyhealth/weepickle/v1/NumberSoup.scala | 22 +++- .../rallyhealth/weepickle/v1/ParserSpec.scala | 3 +- 6 files changed, 59 insertions(+), 91 deletions(-) diff --git a/bench/src/main/scala/bench/JsoniterScalaBench.scala b/bench/src/main/scala/bench/JsoniterScalaBench.scala index f4c3cde2..7753af42 100644 --- a/bench/src/main/scala/bench/JsoniterScalaBench.scala +++ b/bench/src/main/scala/bench/JsoniterScalaBench.scala @@ -37,9 +37,13 @@ class JsoniterScalaBench { * The only time this would happen in the wild would be when parsing a JSON text of a single number. * To make this more realistic, we're intentionally adding a whitespace suffix here. */ - private val piBytes = "-3.14 ".getBytes() + private val floatBytes = "-3.14159 ".getBytes() @Benchmark - def pi = FromJsoniterScala(piBytes).transform(BufferedValue.Builder) + def parseFloat = FromJsoniterScala(floatBytes).transform(BufferedValue.Builder) + private val intBytes = "186282 ".getBytes() + + @Benchmark + def parseInt = FromJsoniterScala(intBytes).transform(BufferedValue.Builder) } diff --git a/bench/src/test/scala/bench/JsoniterScalaBenchTests.scala b/bench/src/test/scala/bench/JsoniterScalaBenchTests.scala index 5c5db630..7a7e6ff0 100644 --- a/bench/src/test/scala/bench/JsoniterScalaBenchTests.scala +++ b/bench/src/test/scala/bench/JsoniterScalaBenchTests.scala @@ -7,6 +7,7 @@ object JsoniterScalaBenchTests extends TestSuite { val tests = Tests { val bench = new JsoniterScalaBench() - test("pi")(bench.pi ==> Num("-3.14", 2, -1)) + test("parseFloat")(bench.parseFloat ==> Num("-3.14159", 2, -1)) + test("parseInt")(bench.parseInt ==> NumLong(186282)) } } diff --git a/project/build.properties b/project/build.properties index 10fd9eee..c8fcab54 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.5.5 +sbt.version=1.6.2 diff --git a/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala b/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala index b02a6a8d..ac686bbb 100644 --- a/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala +++ b/weejson-jsoniter-scala/src/main/scala/com/rallyhealth/weejson/v1/wee_jsoniter_scala/WeePickleJsonValueCodecs.scala @@ -74,7 +74,7 @@ object WeePickleJsonValueCodecs { if (in.readBoolean()) v.visitTrue() else v.visitFalse() } else if ((b >= '0' && b <= '9') || b == '-') { in.rollbackToken() - parseNumberCounter(in, v) + parseNumber(in, v) } else if (b == '[') { val depthM1 = depth - 1 if (depthM1 < 0) in.decodeError("depth limit exceeded") @@ -121,120 +121,74 @@ object WeePickleJsonValueCodecs { } } - private def parseNumberCounter[J]( + private def parseNumber[J]( in: JsonReader, v: Visitor[_, J] ): J = { in.setMark() var b = in.nextByte() - var digits, index = 0 + var intDigits, fracDigits, expDigits, punct = 0 var decIndex, expIndex = -1 if (b == '-') { + punct += 1 b = in.nextByte() - index += 1 } try { - digits -= index - while (b >= '0' && b <= '9') { + if (b == '0') { + intDigits += 1 b = in.nextByte() - index += 1 + + if (b >= '0' && b <= '9') { + in.decodeError("invalid number") + } } - digits += index - if (b == '.') { - decIndex = index + while (b >= '0' && b <= '9') { + intDigits += 1 b = in.nextByte() - index += 1 } - digits -= index - while (b >= '0' && b <= '9') { + if (b == '.') { + decIndex = punct + intDigits + punct += 1 b = in.nextByte() - index += 1 + while (b >= '0' && b <= '9') { + fracDigits += 1 + b = in.nextByte() + } } - digits += index + if ((b | 0x20) == 'e') { - expIndex = index + expIndex = punct + intDigits + fracDigits + punct += 1 b = in.nextByte() - index += 1 if (b == '-' || b == '+') { + punct += 1 b = in.nextByte() - index += 1 } while (b >= '0' && b <= '9') { + expDigits += 1 b = in.nextByte() - index += 1 } } } catch { case _: JsonReaderException => - index += 1 // for length calcs, pretend that nextByte() didn't hit EOF } finally in.rollbackToMark() if ((decIndex & expIndex) == -1) { - if (digits < 19) v.visitInt64(in.readLong()) + if (intDigits < 19) v.visitInt64(in.readLong()) else { val x = in.readBigInt(null) if (x.bitLength < 64) v.visitInt64(x.longValue) else v.visitFloat64StringParts(x.toString, -1, -1) } } else { - val cs = new String(in.readRawValAsBytes(), StandardCharsets.US_ASCII) - require(cs.length == index, "invalid number") - v.visitFloat64StringParts(cs, decIndex, expIndex) - } - } - - - private def parseNumberRegex[J]( - in: JsonReader, - v: Visitor[_, J] - ): J = { - in.setMark() - var digits = 0 - var b = in.nextByte() - if (b == '-') b = in.nextByte() - try { - while (b >= '0' && b <= '9') { - b = in.nextByte() - digits += 1 - } - } catch { - case _: JsonReaderException => // ignore the end of input error for now - } finally in.rollbackToMark() + if (intDigits == 0 || + decIndex != -1 && fracDigits == 0 || + expIndex != -1 && expDigits == 0 + ) in.decodeError("invalid number") - if ((b | 0x20) != 'e' && b != '.') { - if (digits < 19) { - val l = in.readLong() - v.visitInt64(l) - } else { - val x = in.readBigInt(null) - if (x.bitLength < 64) v.visitInt64(x.longValue) - else v.visitFloat64StringParts(x.toString, -1, -1) - } - } else { val cs = new String(in.readRawValAsBytes(), StandardCharsets.US_ASCII) - - /** - * This regex performs rather badly, but gets the tests passing. - * - * We're looking for a value we can pass through the Visitor interface-- - * either a primitive Double or a CharSequence representing a *valid* - * number conforming to https://datatracker.ietf.org/doc/html/rfc7159#page-6. - * - * `in.readRawValAsBytes()` does NOT do that validation. It will happily - * return a String of "------". - * - * `in.readBigDecimal(null).toString` is tempting, but will not provide the raw input. - * Instead, it transforms the input from "0.00000001" to "1.0E-8". - * This fails roundtrip tests. - * - * I tried combining the two approaches, `in.readBigDecimal(null)` for validation, - * then `in.rollbackToMark()` + `in.readRawValAsBytes()` to capture the raw input, - * but for a value like "1.0-----", `in.readBigDecimal(null)` will read "1.0", - * then `in.readRawValAsBytes()` will return the whole string, including the unwanted - * trailing hyphens. - * - */ - require(ValidJsonNum.pattern.matcher(cs).matches(), "invalid number") - v.visitFloat64String(cs) + val len = intDigits + fracDigits + expDigits + punct + if (cs.length != len) in.decodeError("invalid number") + v.visitFloat64StringParts(cs, decIndex, expIndex) } } @@ -256,6 +210,4 @@ object WeePickleJsonValueCodecs { throw new UnsupportedOperationException("only supports decoding") } } - - private val ValidJsonNum = """-?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?""".r // based on https://datatracker.ietf.org/doc/html/rfc7159#page-6 } diff --git a/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/NumberSoup.scala b/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/NumberSoup.scala index 2f3f1aab..e90c179c 100644 --- a/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/NumberSoup.scala +++ b/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/NumberSoup.scala @@ -3,26 +3,38 @@ package com.rallyhealth.weepickle.v1 import com.rallyhealth.weepickle.v1.NumberSoup.ValidJsonNum import org.scalacheck.{Arbitrary, Gen} +/** + * @param value some random concatenation of number-related characters that may or may not be a valid number. + */ case class NumberSoup(value: String) { - override def toString: String = value + override def toString: String = s""""$value"""" - def isValid: Boolean = ValidJsonNum.pattern.matcher(value).matches() + def isValidJson: Boolean = ValidJsonNum.pattern.matcher(value).matches() - def isInvalid: Boolean = !isValid + def isInvalidJson: Boolean = !isValidJson } object NumberSoup { - val ValidJsonNum = """-?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?""".r // based on https://datatracker.ietf.org/doc/html/rfc7159#page-6 + val ValidJsonNum = """\s*-?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?\s*""".r // based on https://datatracker.ietf.org/doc/html/rfc7159#page-6 implicit val gen: Gen[NumberSoup] = { + val maybeSpace = Gen.option(Gen.oneOf(" ", " ", "\n")) val numberParts = Gen.frequency( 1 -> Gen.numStr, 1 -> ".", 1 -> "-", 1 -> "+", 1 -> "E", - 1 -> "e" + 1 -> "e", + 1 -> "0", + 1 -> " " ) + val numberString = for { + prefix <- maybeSpace + parts <- Gen.listOf(numberParts).map(_.mkString) + suffix <- maybeSpace + } yield s"$prefix$parts$suffix" + Gen.listOf(numberParts).map(_.mkString).map(NumberSoup(_)) } implicit val arb: Arbitrary[NumberSoup] = Arbitrary(gen) diff --git a/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/ParserSpec.scala b/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/ParserSpec.scala index 70e002df..0c27cf3c 100644 --- a/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/ParserSpec.scala +++ b/weepickle-tests/src/test/scala/com/rallyhealth/weepickle/v1/ParserSpec.scala @@ -32,11 +32,10 @@ abstract class ParserSpec(parse: Array[Byte] => FromInput, depthLimit: Int = 100 private implicit def toBytes(s: String): Array[Byte] = s.getBytes "roundtrip" in testJson() - "roundtrip example" in testValue(NumDouble(1.3424780377262655E-5)) "deep arr" in testDepth(Arr(_)) "deep obj" in testDepth(b => Obj("k" -> b)) "number soup" in forAll { (soup: NumberSoup) => - whenever(soup.isInvalid) { + whenever(soup.isInvalidJson) { intercept[Exception] { parse(soup.value.getBytes()).transform(NoOpVisitor) }