Skip to content
This repository has been archived by the owner on Sep 30, 2024. It is now read-only.

Commit

Permalink
[Backport 5.2] Search: fix delimiting escape sequences (#58935)
Browse files Browse the repository at this point in the history
Search: fix delimiting escape sequences (#57877)

For code insights, we wrap regex regex patterns with slashes when we're rebuilding the query string from the parsed query. However, we cannot just wrap a regex pattern in /.../ because the query scanning logic respects escape sequences, so anything that would be interpreted as an escape sequence by the query scanner would break the intent of the original regex.

This fixes the StringHuman method by correctly escaping regex patterns when delimiting them with /.../.

(cherry picked from commit 807c357)
  • Loading branch information
camdencheek authored Dec 13, 2023
1 parent da5f914 commit fb0a625
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 25 deletions.
10 changes: 10 additions & 0 deletions internal/insights/query/querybuilder/builder_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,16 @@ func TestPointDiffQuery(t *testing.T) {
SearchQuery: BasicQuery(`content:"TEST" patternType:regexp`),
},
autogold.Expect(BasicQuery(`after:2022-01-01T01:01:00Z before:2022-02-01T01:01:00Z type:diff patterntype:regexp content:"TEST"`)),
}, {
// Test for #57877. Previously, a slash in a regex pattern would not be escaped when we wrapped it with slashes.
"no mangle slashes",
PointDiffQueryOpts{
Before: before,
After: &after,
RepoList: []string{},
SearchQuery: BasicQuery(`patterntype:regexp <tag>value</tag>`),
},
autogold.Expect(BasicQuery("after:2022-01-01T01:01:00Z before:2022-02-01T01:01:00Z type:diff patterntype:regexp /<tag>value<\\/tag>/")),
},
}
for _, test := range tests {
Expand Down
23 changes: 4 additions & 19 deletions internal/insights/query/querybuilder/regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,7 @@ type PatternReplacer interface {
HasCaptureGroups() bool
}

var ptn = regexp.MustCompile(`[^\\]\/`)

func (r *regexpReplacer) replaceContent(replacement string) (BasicQuery, error) {
if r.needsSlashEscape {
replacement = strings.ReplaceAll(replacement, `/`, `\/`)
}

modified := searchquery.MapPattern(r.original.ToQ(), func(patternValue string, negated bool, annotation searchquery.Annotation) searchquery.Node {
return searchquery.Pattern{
Value: replacement,
Expand All @@ -136,10 +130,9 @@ func (r *regexpReplacer) replaceContent(replacement string) (BasicQuery, error)
}

type regexpReplacer struct {
original searchquery.Plan
pattern string
groups []group
needsSlashEscape bool
original searchquery.Plan
pattern string
groups []group
}

func (r *regexpReplacer) Replace(replacement string) (BasicQuery, error) {
Expand Down Expand Up @@ -188,19 +181,11 @@ func NewPatternReplacer(query BasicQuery, searchType searchquery.SearchType) (Pa
return nil, UnsupportedPatternTypeErr
}

needsSlashEscape := true
pattern := patterns[0]
if !pattern.Annotation.Labels.IsSet(searchquery.Regexp) {
return nil, UnsupportedPatternTypeErr
} else if !ptn.MatchString(pattern.Value) {
// because regexp annotated patterns implicitly escapes slashes in the regular expression we need to translate the pattern into
// a compatible pattern with `patternType:standard`, ie. escape the slashes `/`. We need to do this _before_ the replacement
// otherwise we may accidentally double escape in places we don't intend. However, if the string was already escaped we don't
// want to re-escape because it will break the semantic of the query. This means the only time we _don't_ escape slashes
// is if we detect a pattern that has an escaped slash.
needsSlashEscape = false
}

regexpGroups := findGroups(pattern.Value)
return &regexpReplacer{original: plan, groups: regexpGroups, pattern: pattern.Value, needsSlashEscape: needsSlashEscape}, nil
return &regexpReplacer{original: plan, groups: regexpGroups, pattern: pattern.Value}, nil
}
8 changes: 4 additions & 4 deletions internal/insights/query/querybuilder/regexp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,25 +212,25 @@ func TestReplace_Valid(t *testing.T) {
{
query: `\/insi(g)ht[s]\/`,
replacement: "ggg",
want: autogold.Expect(BasicQuery(`/\/insi(?:ggg)ht[s]\//`)),
want: autogold.Expect(BasicQuery("/\\\\\\/insi(?:ggg)ht[s]\\\\\\//")),
searchType: query.SearchTypeRegex,
},
{
query: `<title>(.*)</title>`,
replacement: "findme",
want: autogold.Expect(BasicQuery(`/<title>(?:findme)<\/title>/`)),
want: autogold.Expect(BasicQuery("/<title>(?:findme)<\\/title>/")),
searchType: query.SearchTypeRegex,
},
{
query: `(/\w+/)`,
replacement: `/sourcegraph/`,
want: autogold.Expect(BasicQuery(`/(?:\/sourcegraph\/)/`)),
want: autogold.Expect(BasicQuery("/(?:\\/sourcegraph\\/)/")),
searchType: query.SearchTypeRegex,
},
{
query: `/<title>(.*)<\/title>/`,
replacement: "findme",
want: autogold.Expect(BasicQuery(`/<title>(?:findme)<\/title>/`)),
want: autogold.Expect(BasicQuery("/<title>(?:findme)<\\/title>/")),
searchType: query.SearchTypeStandard,
},
}
Expand Down
19 changes: 19 additions & 0 deletions internal/search/query/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,25 @@ loop:
return string(result), count, nil
}

// Delimit inverts the process of ScanDelimiter, escaping any special
// characters or delimiters in s.
//
// NOTE: this does not provide a clean roundtrip with ScanDelimited because
// ScanDelimited is lossy. We cannot know whether a backslash was passed
// through because it was escaped or because its successor rune was not
// escapable.
func Delimit(s string, delimiter rune) string {
ds := string(delimiter)
delimitReplacer := strings.NewReplacer(
"\n", "\\n",
"\r", "\\r",
"\t", "\\t",
"\\", "\\\\",
ds, "\\"+ds,
)
return ds + delimitReplacer.Replace(s) + ds
}

// ScanField scans an optional '-' at the beginning of a string, and then scans
// one or more alphabetic characters until it encounters a ':'. The prefix
// string is checked against valid fields. If it is valid, the function returns
Expand Down
31 changes: 31 additions & 0 deletions internal/search/query/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,37 @@ func TestScanDelimited(t *testing.T) {
_ = test(`a"`, '"')
}

func TestDelimited(t *testing.T) {
inputs := []string{
"test",
"test\nabc",
"test\r\nabc",
"test\a\fabc",
"test\t\tabc",
"'test'",
"\"test\"",
"\"/test/\"",
"/test/",
"/test\\/abc/",
"\\\\",
"\\",
"\\/",
}
delimiters := []rune{'/', '"', '\''}

for _, input := range inputs {
for _, delimiter := range delimiters {
delimited := Delimit(input, delimiter)
undelimited, _, err := ScanDelimited([]byte(delimited), false, delimiter)
if err != nil {
t.Fatal(err)
}
redelimited := Delimit(undelimited, delimiter)
require.Equal(t, delimited, redelimited)
}
}
}

func TestMergePatterns(t *testing.T) {
test := func(input string) string {
p := &parser{buf: []byte(input), heuristics: parensAsPatterns}
Expand Down
2 changes: 1 addition & 1 deletion internal/search/query/printer.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ func stringHumanPattern(nodes []Node) string {
v = strconv.Quote(v)
}
if n.Annotation.Labels.IsSet(Regexp) {
v = fmt.Sprintf("/%s/", v)
v = Delimit(v, '/')
}
if _, _, ok := ScanBalancedPattern([]byte(v)); !ok && !n.Annotation.Labels.IsSet(IsAlias) && n.Annotation.Labels.IsSet(Literal) {
v = fmt.Sprintf(`content:%s`, strconv.Quote(v))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"Input": "/abcd\\// patterntype:regexp",
"Result": "patterntype:regexp /abcd//"
"Result": "patterntype:regexp /abcd\\//"
}

0 comments on commit fb0a625

Please sign in to comment.