From 600979c7969dd69f5f39f87577f4ae968bc70078 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sun, 19 Mar 2023 16:38:24 +0800 Subject: [PATCH] use checkSafeIndex for string parsing --- implicits/src/upickle/implicits/Readers.scala | 2 +- ujson/templates/ElemParser.scala | 58 +++++++++++-------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/implicits/src/upickle/implicits/Readers.scala b/implicits/src/upickle/implicits/Readers.scala index 3b6fd6432..2606b1d20 100644 --- a/implicits/src/upickle/implicits/Readers.scala +++ b/implicits/src/upickle/implicits/Readers.scala @@ -152,7 +152,7 @@ trait Readers extends upickle.core.Types implicit val CharReader: Reader[Char] = new NumericReader[Char] { override def expectedMsg = "expected char" - override def visitString(d: CharSequence, index: Int) = d.charAt(0) + override def visitString(d: CharSequence, index: Int) = d.toString.charAt(0) override def visitChar(d: Char, index: Int) = d override def visitInt32(d: Int, index: Int) = d.toChar override def visitInt64(d: Long, index: Int) = d.toChar diff --git a/ujson/templates/ElemParser.scala b/ujson/templates/ElemParser.scala index 115a72f3f..3aeda2802 100644 --- a/ujson/templates/ElemParser.scala +++ b/ujson/templates/ElemParser.scala @@ -512,61 +512,71 @@ abstract class ElemParser[J] extends upickle.core.BufferingElemParser{ * This method expects the data to be in UTF-16 and accesses it as * chars. */ - protected[this] final def parseStringSimple(i: Int): Int = { + protected[this] final def parseStringSimple(i: Int, safeIndex0: Int = 0): Int = { var j = i - var c = elemOps.toUnsignedInt(getElemSafe(j)) - while (c != '"') { - if (c < ' ') die(j, s"control char (${c}) in string") - if (c == '\\' || c > 127) return -1 - j - j += 1 - c = elemOps.toUnsignedInt(getElemSafe(j)) + var safeIndex = safeIndex0 + while (true) { + // request batches of `Elem` at a time, so within each batch we can use + // `getElemUnsafe` to avoid making a `requestUntil` for every single + // element we fetch + if (j >= safeIndex) safeIndex = checkSafeIndex(j) + val c = getElemUnsafe(j) + (c: @switch) match { + case '"' => return j + 1 + case 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | + 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | + 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | + 30 | 31 => die(j, s"control char (${upickle.core.ElemOps.toUnsignedInt(c)}) in string") + case '\\' => return -1 - j + case _ => j += 1 + } } - j + 1 + ??? } /** * Parse a string that is known to have escape sequences. */ - protected[this] final def parseStringComplex(i0: Int): Int = { + protected[this] final def parseStringComplex(i0: Int, safeIndex0: Int = 0): Int = { + val elemOps = upickle.core.ElemOps var i = i0 - var c = elemOps.toUnsignedInt(getElemSafe(i)) - while (c != '"') { - - if (c < ' ') die(i, s"control char (${c}) in string") + var safeIndex = safeIndex0 + while (true) { + val c = elemOps.toUnsignedInt(getElemSafe(i)) + if (c == '"') return i + 1 + else if (c < ' ') die(i, s"control char (${c}) in string") else if (c == '\\') { - (getElemSafe(i + 1): @switch) match { + if (i + 1 >= safeIndex) safeIndex = checkSafeIndex(i + 1) + (getElemUnsafe(i + 1): @switch) match { case 'b' => { outputBuilder.append('\b'); i += 2 } case 'f' => { outputBuilder.append('\f'); i += 2 } case 'n' => { outputBuilder.append('\n'); i += 2 } case 'r' => { outputBuilder.append('\r'); i += 2 } case 't' => { outputBuilder.append('\t'); i += 2 } - case '"' => { outputBuilder.append('"'); i += 2 } case '/' => { outputBuilder.append('/'); i += 2 } case '\\' => { outputBuilder.append('\\'); i += 2 } // if there's a problem then descape will explode case 'u' => - val d = descape(i) - outputBuilder.appendC(d) - + outputBuilder.appendC(descape(i)) i += 6 case c => die(i + 1, s"illegal escape sequence after \\") } - } else { + }else{ // this case is for "normal" code points that are just one Char. // // we don't have to worry about surrogate pairs, since those // will all be in the ranges D800–DBFF (high surrogates) or // DC00–DFFF (low surrogates). - outputBuilder.append(c) - i += 1 + val k = parseStringSimple(i, safeIndex) + val normalizedK = if (k >= 0) k else -k + appendElemsToBuilder(outputBuilder, i, normalizedK - i - 1) + i = normalizedK - 1 } - c = elemOps.toUnsignedInt(getElemSafe(i)) } - - i + 1 + ??? } /**