Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
feat(search): Enable improved symbol parsing for large repos (when us…
Browse files Browse the repository at this point in the history
…ing Rockskip) (#63988)

During an investigation, we saw that Rockskip was not using scip-ctags
for symbol parsing when applicable. This means that
1. Rockskip is getting less than optimal symbols for certain languages
(like Go)
2. Rockskip is getting no symbols for languages not in universal ctags
(Magik)

This PR attempts to solve this problem but updating Rockskip to re-use
the ctags parser pool logic from symbol service.

### Key Changes
- Update parser pool to be re-usable
- Push common logic for parser type detection into the parser pool
module
- Update rockskip service config to take a parser pool 
- Update and add unit/integration tests


## Questions
- What performance impact will using this pooled parser have compared to
its previous behavior of spawning a new ctags process each time?



## Test plan
- [x] Add unit tests
- [x] Update integration tests
- [x] Manually test rockskip
- [x] Manually test symbolservice (in case of regression)

---------

Co-authored-by: Keegan Carruthers-Smith <[email protected]>
  • Loading branch information
mmanela and keegancsmith authored Jul 31, 2024
1 parent 60d450b commit b2e550c
Show file tree
Hide file tree
Showing 18 changed files with 333 additions and 146 deletions.
1 change: 1 addition & 0 deletions cmd/symbols/internal/api/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ go_test(
"//cmd/symbols/internal/gitserver",
"//cmd/symbols/internal/parser",
"//internal/api",
"//internal/conf",
"//internal/ctags_config",
"//internal/database/dbmocks",
"//internal/diskcache",
Expand Down
108 changes: 74 additions & 34 deletions cmd/symbols/internal/api/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package api
import (
"context"
"net/http/httptest"
"sort"
"testing"
"time"

Expand All @@ -17,6 +18,7 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/fetcher"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/gitserver"
"github.com/sourcegraph/sourcegraph/cmd/symbols/internal/parser"
"github.com/sourcegraph/sourcegraph/internal/conf"
"github.com/sourcegraph/sourcegraph/internal/ctags_config"
"github.com/sourcegraph/sourcegraph/internal/database/dbmocks"
"github.com/sourcegraph/sourcegraph/internal/diskcache"
Expand All @@ -38,42 +40,68 @@ func TestHandler(t *testing.T) {
tmpDir := t.TempDir()

cache := diskcache.NewStore(tmpDir, "symbols", diskcache.WithBackgroundTimeout(20*time.Minute))

// This ensures the ctags config is initialized properly
conf.MockAndNotifyWatchers(&conf.Unified{})
parserFactory := func(source ctags_config.ParserType) (ctags.Parser, error) {
pathToEntries := map[string][]*ctags.Entry{
"a.js": {
{
Name: "x",
Path: "a.js",
Language: "JavaScript",
Line: 1, // ctags line numbers are 1-based
var pathToEntries map[string][]*ctags.Entry
if source == ctags_config.UniversalCtags {
pathToEntries = map[string][]*ctags.Entry{
"a.pl": {
{
Name: "x",
Path: "a.pl",
Language: "Perl",
Line: 1, // ctags line numbers are 1-based
},
{
Name: "y",
Path: "a.pl",
Language: "Perl",
Line: 2,
},
},
{
Name: "y",
Path: "a.js",
Language: "JavaScript",
Line: 2,
".zshrc": {
{
Name: "z",
Path: ".zshrc",
Language: "Zsh",
Line: 1,
},
},
},
".zshrc": {
{
Name: "z",
Path: ".zshrc",
Language: "Zsh",
Line: 1,
}
} else if source == ctags_config.ScipCtags {
pathToEntries = map[string][]*ctags.Entry{
"b.magik": {
{
Name: "v",
Path: "b.magik",
Language: "Magik",
Line: 1, // ctags line numbers are 1-based
},
{
Name: "w",
Path: "b.magik",
Language: "Magik",
Line: 2,
},
},
},
}
} else {
t.Errorf("Invalid ctags type %d", source)
}

return newMockParser(pathToEntries), nil
}
parserPool, err := parser.NewParserPool(parserFactory, 15, parser.DefaultParserTypes)

parserPool, err := parser.NewParserPool(observation.TestContextTB(t), "test", parserFactory, 15, parser.DefaultParserTypes)
if err != nil {
t.Fatal(err)
}

files := map[string]string{
"a.js": "var x = 1\nvar y = 2",
".zshrc": "z=42",
"a.pl": "$x = 1\n$y = 2",
".zshrc": "z=42",
"b.magik": "v << 1\nw<<2",
}
gitserverClient := NewMockGitserverClient()
gitserverClient.FetchTarFunc.SetDefaultHook(gitserver.CreateTestFetchTarFunc(files))
Expand All @@ -94,16 +122,18 @@ func TestHandler(t *testing.T) {
GRPCConnectionCache: connectionCache,
}

x := result.Symbol{Name: "x", Path: "a.js", Language: "JavaScript", Line: 0, Character: 4}
y := result.Symbol{Name: "y", Path: "a.js", Language: "JavaScript", Line: 1, Character: 4}
x := result.Symbol{Name: "x", Path: "a.pl", Language: "Perl", Line: 0, Character: 1}
y := result.Symbol{Name: "y", Path: "a.pl", Language: "Perl", Line: 1, Character: 1}
z := result.Symbol{Name: "z", Path: ".zshrc", Language: "Zsh", Line: 0, Character: 0}
v := result.Symbol{Name: "v", Path: "b.magik", Language: "Magik", Line: 0, Character: 0}
w := result.Symbol{Name: "w", Path: "b.magik", Language: "Magik", Line: 1, Character: 0}

testCases := map[string]struct {
args search.SymbolsParameters
expected result.Symbols
}{
"simple": {
args: search.SymbolsParameters{IncludePatterns: []string{"^a.js$"}, First: 10},
args: search.SymbolsParameters{IncludePatterns: []string{"^a.pl$"}, First: 10},
expected: []result.Symbol{x, y},
},
"onematch": {
Expand All @@ -127,38 +157,48 @@ func TestHandler(t *testing.T) {
expected: nil,
},
"caseinsensitiveexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, First: 10},
args: search.SymbolsParameters{IncludePatterns: []string{"^A.pl$"}, First: 10},
expected: []result.Symbol{x, y},
},
"casesensitiveexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^a.js$"}, IsCaseSensitive: true, First: 10},
args: search.SymbolsParameters{IncludePatterns: []string{"^a.pl$"}, IsCaseSensitive: true, First: 10},
expected: []result.Symbol{x, y},
},
"casesensitivenoexactpathmatch": {
args: search.SymbolsParameters{IncludePatterns: []string{"^A.js$"}, IsCaseSensitive: true, First: 10},
args: search.SymbolsParameters{IncludePatterns: []string{"^A.pl$"}, IsCaseSensitive: true, First: 10},
expected: nil,
},
"exclude": {
args: search.SymbolsParameters{ExcludePattern: "a.js", IsCaseSensitive: true, First: 10},
expected: []result.Symbol{z},
args: search.SymbolsParameters{ExcludePattern: "a.pl", IsCaseSensitive: true, First: 10},
expected: []result.Symbol{z, v, w},
},
"include lang filters": {
args: search.SymbolsParameters{Query: ".*", IncludeLangs: []string{"Javascript"}, IsCaseSensitive: true, First: 10},
args: search.SymbolsParameters{Query: ".*", IncludeLangs: []string{"Perl"}, IsCaseSensitive: true, First: 10},
expected: []result.Symbol{x, y},
},
"include lang filters with ctags conversion": {
args: search.SymbolsParameters{Query: ".*", IncludeLangs: []string{"Shell"}, IsCaseSensitive: true, First: 10},
expected: []result.Symbol{z},
},
"exclude lang filters": {
args: search.SymbolsParameters{Query: ".*", ExcludeLangs: []string{"Javascript"}, IsCaseSensitive: true, First: 10},
args: search.SymbolsParameters{Query: ".*", ExcludeLangs: []string{"Perl", "Magik"}, IsCaseSensitive: true, First: 10},
expected: []result.Symbol{z},
},
"scip-ctags only language": {
args: search.SymbolsParameters{Query: ".*", IncludeLangs: []string{"Magik"}, IsCaseSensitive: true, First: 10},
expected: []result.Symbol{v, w},
},
}

for label, testCase := range testCases {
t.Run(label, func(t *testing.T) {
resultSymbols, limitHit, err := client.Search(context.Background(), testCase.args)

// Sort to ensure consistent comparisons
sort.Slice(resultSymbols, func(i, j int) bool {
return resultSymbols[i].Path < resultSymbols[j].Path
})

if err != nil {
t.Fatalf("unexpected error performing search: %s", err)
}
Expand Down
18 changes: 0 additions & 18 deletions cmd/symbols/internal/parser/observability.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ import (

type operations struct {
parsing prometheus.Gauge
parseQueueSize prometheus.Gauge
parseQueueTimeouts prometheus.Counter
parseFailed prometheus.Counter
parseCanceled prometheus.Counter
parse *observation.Operation
Expand All @@ -29,20 +27,6 @@ func newOperations(observationCtx *observation.Context) *operations {
})
observationCtx.Registerer.MustRegister(parsing)

parseQueueSize := prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "src",
Name: "codeintel_symbols_parse_queue_size",
Help: "The number of parse jobs enqueued.",
})
observationCtx.Registerer.MustRegister(parseQueueSize)

parseQueueTimeouts := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "src",
Name: "codeintel_symbols_parse_queue_timeouts_total",
Help: "The total number of parse jobs that timed out while enqueued.",
})
observationCtx.Registerer.MustRegister(parseQueueTimeouts)

parseFailed := prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "src",
Name: "codeintel_symbols_parse_failed_total",
Expand Down Expand Up @@ -87,8 +71,6 @@ func newOperations(observationCtx *observation.Context) *operations {

return &operations{
parsing: parsing,
parseQueueSize: parseQueueSize,
parseQueueTimeouts: parseQueueTimeouts,
parseFailed: parseFailed,
parseCanceled: parseCanceled,
parse: observationCtx.Operation(op("Parse")),
Expand Down
48 changes: 13 additions & 35 deletions cmd/symbols/internal/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"github.com/sourcegraph/sourcegraph/internal/observation"
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/search/result"
"github.com/sourcegraph/sourcegraph/lib/codeintel/languages"
"github.com/sourcegraph/sourcegraph/lib/errors"
)

Expand All @@ -33,7 +32,7 @@ type SymbolOrError struct {
}

type parser struct {
parserPool *parserPool
parserPool *ParserPool
repositoryFetcher fetcher.RepositoryFetcher
requestBufferSize int
numParserProcesses int
Expand All @@ -42,7 +41,7 @@ type parser struct {

func NewParser(
observationCtx *observation.Context,
parserPool *parserPool,
parserPool *ParserPool,
repositoryFetcher fetcher.RepositoryFetcher,
requestBufferSize int,
numParserProcesses int,
Expand Down Expand Up @@ -144,20 +143,20 @@ func (p *parser) handleParseRequest(
}})
defer endObservation(1, observation.Args{})

language, found := languages.GetMostLikelyLanguage(parseRequest.Path, string(parseRequest.Data))
if !found {
return nil
parser, parserType, err := p.parserPool.GetParser(ctx, parseRequest.Path, parseRequest.Data)

// If the language has a parser but we cannot retrieve it, we get an error
if err != nil {
return err
}

source := GetParserType(language)
if ctags_config.ParserIsNoop(source) {
// If we cannot determine type of ctags it means we don't support symbols for
// this file type so we bail out early. This is not considered an error since
// many file types may not be supported
if ctags_config.ParserIsNoop(parserType) {
return nil
}

parser, err := p.parserFromPool(ctx, source)
if err != nil {
return err
}
trace.AddEvent("parser", attribute.String("event", "acquired parser from pool"))

defer func() {
Expand All @@ -168,7 +167,7 @@ func (p *parser) handleParseRequest(
}

if err == nil {
p.parserPool.Done(parser, source)
p.parserPool.Done(parser, parserType)
} else {
// If we are canceled we still kill the parser just in case, but
// we do not record as failure nor logspam since this is a more
Expand All @@ -182,7 +181,7 @@ func (p *parser) handleParseRequest(
// Close parser and return nil to pool, indicating that the next
// receiver should create a new parser
parser.Close()
p.parserPool.Done(nil, source)
p.parserPool.Done(nil, parserType)
}
}()

Expand Down Expand Up @@ -240,27 +239,6 @@ func (p *parser) handleParseRequest(
return nil
}

func (p *parser) parserFromPool(ctx context.Context, source ctags_config.ParserType) (ctags.Parser, error) {
if ctags_config.ParserIsNoop(source) {
return nil, errors.New("Should not pass Noop ParserType to this function")
}

p.operations.parseQueueSize.Inc()
defer p.operations.parseQueueSize.Dec()

parser, err := p.parserPool.Get(ctx, source)
if err != nil {
if err == context.DeadlineExceeded {
p.operations.parseQueueTimeouts.Inc()
}
if err != ctx.Err() {
err = errors.Wrap(err, "failed to create parser")
}
}

return parser, err
}

func shouldPersistEntry(e *ctags.Entry) bool {
if e.Name == "" {
return false
Expand Down
Loading

0 comments on commit b2e550c

Please sign in to comment.