Skip to content

Commit

Permalink
Add test case for latest fuzzer timeout issue and fix the pathologica…
Browse files Browse the repository at this point in the history
…l performance (#343)

The pathological test case was taking ~10 seconds (!!). Now it's just
200ms.

The issue was that it was relying on `ast.FileInfo` to provide things
like the line numbers for tokens. But those are all computed on-demand.
They require allocations, a O(log n) search through line-ending
offsets (n is number of lines in file), then an O(n) scan to compute
column (n is length of line). This operation was being done frequently
enough that it induced pathologically bad performance.

So now we have just a _little_ bit more book-keeping in the lexer, so
that we can decide when to donate a comment to prior token as a trailing
comment without having to ask the file info to compute these things.
  • Loading branch information
jhump authored Sep 25, 2024
1 parent 6707954 commit bb0828d
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 21 deletions.
62 changes: 41 additions & 21 deletions parser/lexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ func (rr *runeReader) getMark() string {
return string(rr.data[rr.mark:rr.pos])
}

type comment struct {
tok ast.Token
isBlock bool
}

type protoLex struct {
input *runeReader
info *ast.FileInfo
Expand All @@ -86,7 +91,10 @@ type protoLex struct {
prevOffset int
eof ast.Token

comments []ast.Token
prevLine, curLine int

maybeDonateComment int
comments []comment
}

var utf8Bom = []byte{0xEF, 0xBB, 0xBF}
Expand Down Expand Up @@ -160,6 +168,12 @@ var keywords = map[string]int{
func (l *protoLex) maybeNewLine(r rune) {
if r == '\n' {
l.info.AddLine(l.input.offset())
l.curLine++
if len(l.comments) > 0 && l.comments[0].isBlock && l.maybeDonateComment > 0 {
// Newline after trailing block comment? Increment the signal that
// we may be able to donate comment to previous token.
l.maybeDonateComment++
}
}
}

Expand Down Expand Up @@ -305,13 +319,15 @@ func (l *protoLex) Lex(lval *protoSymType) int {
return int(c)
}
if cn == '/' {
startLine := l.curLine
if hasErr := l.skipToEndOfLineComment(lval); hasErr {
return _ERROR
}
l.comments = append(l.comments, l.newToken())
l.addComment(false, startLine)
continue
}
if cn == '*' {
startLine := l.curLine
ok, hasErr := l.skipToEndOfBlockComment(lval)
if hasErr {
return _ERROR
Expand All @@ -320,7 +336,7 @@ func (l *protoLex) Lex(lval *protoSymType) int {
l.setError(lval, errors.New("block comment never terminates, unexpected EOF"))
return _ERROR
}
l.comments = append(l.comments, l.newToken())
l.addComment(true, startLine)
continue
}
l.input.unreadRune(szn)
Expand Down Expand Up @@ -366,34 +382,37 @@ func (l *protoLex) newToken() ast.Token {
return l.info.AddToken(offset, length)
}

func (l *protoLex) addComment(isBlock bool, startLine int) {
if len(l.comments) == 0 && startLine == l.prevLine {
l.maybeDonateComment++
}
l.comments = append(l.comments, comment{l.newToken(), isBlock})
}

func (l *protoLex) setPrevAndAddComments(n ast.TerminalNode) {
comments := l.comments
l.comments = nil
var prevTrailingComments []ast.Token
comments, maybeDonateComment := l.comments, l.maybeDonateComment
l.comments, l.maybeDonateComment = nil, 0
var prevTrailingComments []comment
if l.prevSym != nil && len(comments) > 0 {
prevEnd := l.info.NodeInfo(l.prevSym).End().Line
info := l.info.NodeInfo(n)
nStart := info.Start().Line
if nStart == prevEnd {
cur := l.curLine
if cur == l.prevLine {
if rn, ok := n.(*ast.RuneNode); ok && rn.Rune == 0 {
// if current token is EOF, pretend its on separate line
// if current token is EOF, pretend it's on separate line
// so that the logic below can attribute a final trailing
// comment to the previous token
nStart++
cur++
}
}
c := comments[0]
commentInfo := l.info.TokenInfo(c)
commentStart := commentInfo.Start().Line
if nStart > prevEnd && commentStart == prevEnd {
if cur > l.prevLine && maybeDonateComment > 0 {
// Comment starts right after the previous token. If it's a
// line comment, we record that as a trailing comment.
//
// But if it's a block comment, it is only a trailing comment
// if there are multiple comments or if the block comment ends
// on a line before n.
canDonate := strings.HasPrefix(commentInfo.RawText(), "//") ||
len(comments) > 1 || commentInfo.End().Line < nStart
// on a line before n. This lattermost condition is signaled
// via l.maybeDonateComment > 1.
canDonate := !comments[0].isBlock ||
len(comments) > 1 || maybeDonateComment > 1

if canDonate {
prevTrailingComments = comments[:1]
Expand All @@ -404,13 +423,14 @@ func (l *protoLex) setPrevAndAddComments(n ast.TerminalNode) {

// now we can associate comments
for _, c := range prevTrailingComments {
l.info.AddComment(c, l.prevSym.Token())
l.info.AddComment(c.tok, l.prevSym.Token())
}
for _, c := range comments {
l.info.AddComment(c, n.Token())
l.info.AddComment(c.tok, n.Token())
}

l.prevSym = n
l.prevLine = l.curLine
}

func (l *protoLex) setString(lval *protoSymType, val string) {
Expand Down
2 changes: 2 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -984,9 +984,11 @@ func TestPathological(t *testing.T) {
// adequate performance.
// https://oss-fuzz.com/testcase-detail/4766256800858112
// https://oss-fuzz.com/testcase-detail/4952577018298368
// https://oss-fuzz.com/testcase-detail/5539164995518464
testCases := map[string]bool{
"pathological.proto": true,
"pathological2.proto": false,
"pathological3.proto": false,
}
for fileName := range testCases {
fileName, canParse := fileName, testCases[fileName] // don't want test func below to capture loop var
Expand Down
Loading

0 comments on commit bb0828d

Please sign in to comment.