Skip to content

Commit

Permalink
Remove regex. Tests pass
Browse files Browse the repository at this point in the history
  • Loading branch information
htmldoug committed Mar 25, 2022
1 parent a3d913d commit b649e46
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 91 deletions.
8 changes: 6 additions & 2 deletions bench/src/main/scala/bench/JsoniterScalaBench.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@ class JsoniterScalaBench {
* The only time this would happen in the wild would be when parsing a JSON text of a single number.
* To make this more realistic, we're intentionally adding a whitespace suffix here.
*/
private val piBytes = "-3.14 ".getBytes()
private val floatBytes = "-3.14159 ".getBytes()

@Benchmark
def pi = FromJsoniterScala(piBytes).transform(BufferedValue.Builder)
def parseFloat = FromJsoniterScala(floatBytes).transform(BufferedValue.Builder)

private val intBytes = "186282 ".getBytes()

@Benchmark
def parseInt = FromJsoniterScala(intBytes).transform(BufferedValue.Builder)
}
3 changes: 2 additions & 1 deletion bench/src/test/scala/bench/JsoniterScalaBenchTests.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ object JsoniterScalaBenchTests extends TestSuite {

val tests = Tests {
val bench = new JsoniterScalaBench()
test("pi")(bench.pi ==> Num("-3.14", 2, -1))
test("parseFloat")(bench.parseFloat ==> Num("-3.14159", 2, -1))
test("parseInt")(bench.parseInt ==> NumLong(186282))
}
}
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=1.5.5
sbt.version=1.6.2
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ object WeePickleJsonValueCodecs {
if (in.readBoolean()) v.visitTrue() else v.visitFalse()
} else if ((b >= '0' && b <= '9') || b == '-') {
in.rollbackToken()
parseNumberCounter(in, v)
parseNumber(in, v)
} else if (b == '[') {
val depthM1 = depth - 1
if (depthM1 < 0) in.decodeError("depth limit exceeded")
Expand Down Expand Up @@ -121,120 +121,74 @@ object WeePickleJsonValueCodecs {
}
}

private def parseNumberCounter[J](
private def parseNumber[J](
in: JsonReader,
v: Visitor[_, J]
): J = {
in.setMark()
var b = in.nextByte()
var digits, index = 0
var intDigits, fracDigits, expDigits, punct = 0
var decIndex, expIndex = -1
if (b == '-') {
punct += 1
b = in.nextByte()
index += 1
}
try {
digits -= index
while (b >= '0' && b <= '9') {
if (b == '0') {
intDigits += 1
b = in.nextByte()
index += 1

if (b >= '0' && b <= '9') {
in.decodeError("invalid number")
}
}
digits += index
if (b == '.') {
decIndex = index
while (b >= '0' && b <= '9') {
intDigits += 1
b = in.nextByte()
index += 1
}
digits -= index
while (b >= '0' && b <= '9') {
if (b == '.') {
decIndex = punct + intDigits
punct += 1
b = in.nextByte()
index += 1
while (b >= '0' && b <= '9') {
fracDigits += 1
b = in.nextByte()
}
}
digits += index

if ((b | 0x20) == 'e') {
expIndex = index
expIndex = punct + intDigits + fracDigits
punct += 1
b = in.nextByte()
index += 1
if (b == '-' || b == '+') {
punct += 1
b = in.nextByte()
index += 1
}
while (b >= '0' && b <= '9') {
expDigits += 1
b = in.nextByte()
index += 1
}
}
} catch {
case _: JsonReaderException =>
index += 1 // for length calcs, pretend that nextByte() didn't hit EOF
} finally in.rollbackToMark()
if ((decIndex & expIndex) == -1) {
if (digits < 19) v.visitInt64(in.readLong())
if (intDigits < 19) v.visitInt64(in.readLong())
else {
val x = in.readBigInt(null)
if (x.bitLength < 64) v.visitInt64(x.longValue)
else v.visitFloat64StringParts(x.toString, -1, -1)
}
} else {
val cs = new String(in.readRawValAsBytes(), StandardCharsets.US_ASCII)
require(cs.length == index, "invalid number")
v.visitFloat64StringParts(cs, decIndex, expIndex)
}
}


private def parseNumberRegex[J](
in: JsonReader,
v: Visitor[_, J]
): J = {
in.setMark()
var digits = 0
var b = in.nextByte()
if (b == '-') b = in.nextByte()
try {
while (b >= '0' && b <= '9') {
b = in.nextByte()
digits += 1
}
} catch {
case _: JsonReaderException => // ignore the end of input error for now
} finally in.rollbackToMark()
if (intDigits == 0 ||
decIndex != -1 && fracDigits == 0 ||
expIndex != -1 && expDigits == 0
) in.decodeError("invalid number")

if ((b | 0x20) != 'e' && b != '.') {
if (digits < 19) {
val l = in.readLong()
v.visitInt64(l)
} else {
val x = in.readBigInt(null)
if (x.bitLength < 64) v.visitInt64(x.longValue)
else v.visitFloat64StringParts(x.toString, -1, -1)
}
} else {
val cs = new String(in.readRawValAsBytes(), StandardCharsets.US_ASCII)

/**
* This regex performs rather badly, but gets the tests passing.
*
* We're looking for a value we can pass through the Visitor interface--
* either a primitive Double or a CharSequence representing a *valid*
* number conforming to https://datatracker.ietf.org/doc/html/rfc7159#page-6.
*
* `in.readRawValAsBytes()` does NOT do that validation. It will happily
* return a String of "------".
*
* `in.readBigDecimal(null).toString` is tempting, but will not provide the raw input.
* Instead, it transforms the input from "0.00000001" to "1.0E-8".
* This fails roundtrip tests.
*
* I tried combining the two approaches, `in.readBigDecimal(null)` for validation,
* then `in.rollbackToMark()` + `in.readRawValAsBytes()` to capture the raw input,
* but for a value like "1.0-----", `in.readBigDecimal(null)` will read "1.0",
* then `in.readRawValAsBytes()` will return the whole string, including the unwanted
* trailing hyphens.
*
*/
require(ValidJsonNum.pattern.matcher(cs).matches(), "invalid number")
v.visitFloat64String(cs)
val len = intDigits + fracDigits + expDigits + punct
if (cs.length != len) in.decodeError("invalid number")
v.visitFloat64StringParts(cs, decIndex, expIndex)
}
}

Expand All @@ -256,6 +210,4 @@ object WeePickleJsonValueCodecs {
throw new UnsupportedOperationException("only supports decoding")
}
}

private val ValidJsonNum = """-?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?""".r // based on https://datatracker.ietf.org/doc/html/rfc7159#page-6
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,38 @@ package com.rallyhealth.weepickle.v1
import com.rallyhealth.weepickle.v1.NumberSoup.ValidJsonNum
import org.scalacheck.{Arbitrary, Gen}

/**
* @param value some random concatenation of number-related characters that may or may not be a valid number.
*/
case class NumberSoup(value: String) {

override def toString: String = value
override def toString: String = s""""$value""""

def isValid: Boolean = ValidJsonNum.pattern.matcher(value).matches()
def isValidJson: Boolean = ValidJsonNum.pattern.matcher(value).matches()

def isInvalid: Boolean = !isValid
def isInvalidJson: Boolean = !isValidJson
}
object NumberSoup {
val ValidJsonNum = """-?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?""".r // based on https://datatracker.ietf.org/doc/html/rfc7159#page-6
val ValidJsonNum = """\s*-?(0|[1-9]\d*)(\.\d+)?([eE][-+]?\d+)?\s*""".r // based on https://datatracker.ietf.org/doc/html/rfc7159#page-6

implicit val gen: Gen[NumberSoup] = {
val maybeSpace = Gen.option(Gen.oneOf(" ", " ", "\n"))
val numberParts = Gen.frequency(
1 -> Gen.numStr,
1 -> ".",
1 -> "-",
1 -> "+",
1 -> "E",
1 -> "e"
1 -> "e",
1 -> "0",
1 -> " "
)
val numberString = for {
prefix <- maybeSpace
parts <- Gen.listOf(numberParts).map(_.mkString)
suffix <- maybeSpace
} yield s"$prefix$parts$suffix"

Gen.listOf(numberParts).map(_.mkString).map(NumberSoup(_))
}
implicit val arb: Arbitrary[NumberSoup] = Arbitrary(gen)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,10 @@ abstract class ParserSpec(parse: Array[Byte] => FromInput, depthLimit: Int = 100
private implicit def toBytes(s: String): Array[Byte] = s.getBytes

"roundtrip" in testJson()
"roundtrip example" in testValue(NumDouble(1.3424780377262655E-5))
"deep arr" in testDepth(Arr(_))
"deep obj" in testDepth(b => Obj("k" -> b))
"number soup" in forAll { (soup: NumberSoup) =>
whenever(soup.isInvalid) {
whenever(soup.isInvalidJson) {
intercept[Exception] {
parse(soup.value.getBytes()).transform(NoOpVisitor)
}
Expand Down

0 comments on commit b649e46

Please sign in to comment.