Skip to content

Commit

Permalink
use checkSafeIndex for string parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
lihaoyi committed Mar 19, 2023
1 parent 6329d73 commit 600979c
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 25 deletions.
2 changes: 1 addition & 1 deletion implicits/src/upickle/implicits/Readers.scala
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ trait Readers extends upickle.core.Types

implicit val CharReader: Reader[Char] = new NumericReader[Char] {
override def expectedMsg = "expected char"
override def visitString(d: CharSequence, index: Int) = d.charAt(0)
override def visitString(d: CharSequence, index: Int) = d.toString.charAt(0)
override def visitChar(d: Char, index: Int) = d
override def visitInt32(d: Int, index: Int) = d.toChar
override def visitInt64(d: Long, index: Int) = d.toChar
Expand Down
58 changes: 34 additions & 24 deletions ujson/templates/ElemParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -512,61 +512,71 @@ abstract class ElemParser[J] extends upickle.core.BufferingElemParser{
* This method expects the data to be in UTF-16 and accesses it as
* chars.
*/
protected[this] final def parseStringSimple(i: Int): Int = {
protected[this] final def parseStringSimple(i: Int, safeIndex0: Int = 0): Int = {
var j = i
var c = elemOps.toUnsignedInt(getElemSafe(j))
while (c != '"') {
if (c < ' ') die(j, s"control char (${c}) in string")
if (c == '\\' || c > 127) return -1 - j
j += 1
c = elemOps.toUnsignedInt(getElemSafe(j))
var safeIndex = safeIndex0
while (true) {
// request batches of `Elem` at a time, so within each batch we can use
// `getElemUnsafe` to avoid making a `requestUntil` for every single
// element we fetch
if (j >= safeIndex) safeIndex = checkSafeIndex(j)
val c = getElemUnsafe(j)
(c: @switch) match {
case '"' => return j + 1
case 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 31 => die(j, s"control char (${upickle.core.ElemOps.toUnsignedInt(c)}) in string")
case '\\' => return -1 - j
case _ => j += 1
}
}
j + 1
???
}

/**
* Parse a string that is known to have escape sequences.
*/
protected[this] final def parseStringComplex(i0: Int): Int = {
protected[this] final def parseStringComplex(i0: Int, safeIndex0: Int = 0): Int = {
val elemOps = upickle.core.ElemOps
var i = i0
var c = elemOps.toUnsignedInt(getElemSafe(i))
while (c != '"') {

if (c < ' ') die(i, s"control char (${c}) in string")
var safeIndex = safeIndex0
while (true) {
val c = elemOps.toUnsignedInt(getElemSafe(i))
if (c == '"') return i + 1
else if (c < ' ') die(i, s"control char (${c}) in string")
else if (c == '\\') {
(getElemSafe(i + 1): @switch) match {
if (i + 1 >= safeIndex) safeIndex = checkSafeIndex(i + 1)
(getElemUnsafe(i + 1): @switch) match {
case 'b' => { outputBuilder.append('\b'); i += 2 }
case 'f' => { outputBuilder.append('\f'); i += 2 }
case 'n' => { outputBuilder.append('\n'); i += 2 }
case 'r' => { outputBuilder.append('\r'); i += 2 }
case 't' => { outputBuilder.append('\t'); i += 2 }

case '"' => { outputBuilder.append('"'); i += 2 }
case '/' => { outputBuilder.append('/'); i += 2 }
case '\\' => { outputBuilder.append('\\'); i += 2 }

// if there's a problem then descape will explode
case 'u' =>
val d = descape(i)
outputBuilder.appendC(d)

outputBuilder.appendC(descape(i))
i += 6

case c => die(i + 1, s"illegal escape sequence after \\")
}
} else {
}else{
// this case is for "normal" code points that are just one Char.
//
// we don't have to worry about surrogate pairs, since those
// will all be in the ranges D800–DBFF (high surrogates) or
// DC00–DFFF (low surrogates).
outputBuilder.append(c)
i += 1
val k = parseStringSimple(i, safeIndex)
val normalizedK = if (k >= 0) k else -k
appendElemsToBuilder(outputBuilder, i, normalizedK - i - 1)
i = normalizedK - 1
}
c = elemOps.toUnsignedInt(getElemSafe(i))
}

i + 1
???
}

/**
Expand Down

0 comments on commit 600979c

Please sign in to comment.