-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
start writing lexer tests; fix some unicode issues
- Loading branch information
Showing
24 changed files
with
459 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
// Copyright 2020-2024 Buf Technologies, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package parser_test | ||
|
||
import ( | ||
"fmt" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/bufbuild/protocompile/experimental/ast" | ||
"github.com/bufbuild/protocompile/experimental/parser" | ||
"github.com/bufbuild/protocompile/experimental/report" | ||
"github.com/bufbuild/protocompile/experimental/token" | ||
"github.com/bufbuild/protocompile/internal/golden" | ||
) | ||
|
||
func TestRender(t *testing.T) { | ||
t.Parallel() | ||
|
||
corpus := golden.Corpus{ | ||
Root: "testdata/lexer", | ||
Refresh: "PROTOCOMPILE_REFRESH", | ||
Extension: "proto", | ||
Outputs: []golden.Output{ | ||
{Extension: "tokens.tsv"}, | ||
{Extension: "stderr.txt"}, | ||
}, | ||
} | ||
|
||
corpus.Run(t, func(t *testing.T, path, text string, outputs []string) { | ||
errs := &report.Report{Tracing: 10} | ||
ctx := ast.NewContext(report.File{Path: path, Text: text}) | ||
parser.Lex(ctx, errs) | ||
|
||
stderr, _, _ := report.Renderer{ | ||
Colorize: true, | ||
ShowDebug: true, | ||
}.RenderString(errs) | ||
t.Log(stderr) | ||
outputs[1], _, _ = report.Renderer{}.RenderString(errs) | ||
|
||
var tsv strings.Builder | ||
tsv.WriteString("#\t\tkind\t\toffsets\t\tlinecol\t\ttext\n") | ||
ctx.Stream().All()(func(tok token.Token) bool { | ||
sp := tok.Span() | ||
start := ctx.Stream().IndexedFile.Search(sp.Start) | ||
fmt.Fprintf( | ||
&tsv, "%v\t\t%v\t\t%03d:%03d\t\t%03d:%03d\t\t%q", | ||
int32(tok.ID())-1, tok.Kind(), | ||
sp.Start, sp.End, | ||
start.Line, start.Column, | ||
tok.Text(), | ||
) | ||
|
||
if v, ok := tok.AsInt(); ok { | ||
fmt.Fprintf(&tsv, "\tint:%d", v) | ||
} else if v, ok := tok.AsFloat(); ok { | ||
fmt.Fprintf(&tsv, "\tfloat:%g", v) | ||
} else if v, ok := tok.AsString(); ok { | ||
fmt.Fprintf(&tsv, "\tstring:%q", v) | ||
} | ||
|
||
tsv.WriteByte('\n') | ||
return true | ||
}) | ||
outputs[0] = tsv.String() | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
// This comment does not end in a newline. |
2 changes: 2 additions & 0 deletions
2
experimental/parser/testdata/lexer/comments/eof.proto.tokens.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# kind offsets linecol text | ||
0 Comment 000:042 001:001 "// This comment does not end in a newline." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
/* | ||
Nesting | ||
/* is not allowed */ | ||
*/ |
8 changes: 8 additions & 0 deletions
8
experimental/parser/testdata/lexer/comments/nested.proto.stderr.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
error: encountered unterminated `*/` delimiter | ||
--> testdata/lexer/comments/nested.proto:4:1 | ||
| | ||
4 | */ | ||
| ^^ expected to be opened by `/*` | ||
= note: Protobuf does not support nested block comments | ||
|
||
encountered 1 error |
4 changes: 4 additions & 0 deletions
4
experimental/parser/testdata/lexer/comments/nested.proto.tokens.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
# kind offsets linecol text | ||
0 Comment 000:039 001:001 "/*\n Nesting\n /* is not allowed */" | ||
1 Space 039:040 003:025 "\n" | ||
2 Unrecognized 040:042 004:001 "*/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
// Single line comment. | ||
//go:style-intrinsic | ||
/* | ||
Multiline comment | ||
*/ | ||
|
||
// | ||
/**/ // Empty |
9 changes: 9 additions & 0 deletions
9
experimental/parser/testdata/lexer/comments/ok.proto.tokens.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# kind offsets linecol text | ||
0 Comment 000:024 001:001 "// Single line comment.\n" | ||
1 Comment 024:045 002:001 "//go:style-intrinsic\n" | ||
2 Comment 045:072 003:001 "/*\n Multiline comment\n*/" | ||
3 Space 072:074 005:003 "\n\n" | ||
4 Comment 074:077 007:001 "//\n" | ||
5 Comment 077:081 008:001 "/**/" | ||
6 Space 081:082 008:005 " " | ||
7 Comment 082:091 008:006 "// Empty\n" |
2 changes: 2 additions & 0 deletions
2
experimental/parser/testdata/lexer/comments/unterminated.proto
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
/* | ||
Oops I dropped my * / |
7 changes: 7 additions & 0 deletions
7
experimental/parser/testdata/lexer/comments/unterminated.proto.stderr.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
error: encountered unterminated `/*` delimiter | ||
--> testdata/lexer/comments/unterminated.proto:1:1 | ||
| | ||
1 | /* | ||
| ^^ expected to be closed by `*/` | ||
|
||
encountered 1 error |
2 changes: 2 additions & 0 deletions
2
experimental/parser/testdata/lexer/comments/unterminated.proto.tokens.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# kind offsets linecol text | ||
0 Comment 000:028 001:001 "/*\n Oops I dropped my * /" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
kitty_🐈⬛ | ||
黑猫 | ||
काली बिल्ली | ||
黑猫_suffix |
31 changes: 31 additions & 0 deletions
31
experimental/parser/testdata/lexer/idents/non-ascii.proto.stderr.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
error: unrecongnized token | ||
--> testdata/lexer/idents/non-ascii.proto:1:7 | ||
| | ||
1 | kitty_🐈<U+200D>⬛ | ||
| ^^^^^^^^^^^^ | ||
|
||
error: non-ASCII identifiers are not allowed | ||
--> testdata/lexer/idents/non-ascii.proto:2:1 | ||
| | ||
2 | 黑猫 | ||
| ^^^^ | ||
|
||
error: non-ASCII identifiers are not allowed | ||
--> testdata/lexer/idents/non-ascii.proto:3:1 | ||
| | ||
3 | काली बिल्ली | ||
| ^^^^ | ||
|
||
error: non-ASCII identifiers are not allowed | ||
--> testdata/lexer/idents/non-ascii.proto:3:6 | ||
| | ||
3 | काली बिल्ली | ||
| ^^^^^ | ||
|
||
error: non-ASCII identifiers are not allowed | ||
--> testdata/lexer/idents/non-ascii.proto:4:1 | ||
| | ||
4 | 黑猫_suffix | ||
| ^^^^^^^^^^^ | ||
|
||
encountered 5 errors |
11 changes: 11 additions & 0 deletions
11
experimental/parser/testdata/lexer/idents/non-ascii.proto.tokens.tsv
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# kind offsets linecol text | ||
0 Ident 000:006 001:001 "kitty_" | ||
1 Unrecognized 006:016 001:007 "🐈\u200d⬛" | ||
2 Space 016:017 001:009 "\n" | ||
3 Ident 017:023 002:001 "黑猫" | ||
4 Space 023:025 002:005 " \n" | ||
5 Ident 025:037 003:001 "काली" | ||
6 Space 037:038 003:005 " " | ||
7 Ident 038:056 003:006 "बिल्ली" | ||
8 Space 056:057 003:011 "\n" | ||
9 Ident 057:070 004:001 "黑猫_suffix" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
a b c | ||
string message | ||
foo_bar fooBar | ||
_ __ ____ | ||
__snake__ | ||
SCREAMING | ||
abcdefghijklmnopqrstuvwxyz | ||
ABCDEFGHIJKLMNOPQRSTUVWXYZ | ||
_0123456789 | ||
_0_ |
Oops, something went wrong.