Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C#: Fix glob pattern processing: allow **/ to match empty string #16895

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions csharp/extractor/Semmle.Extraction.Tests/FilePathFilter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,9 @@ public void TestFiltersWithIncludeExcludeComplexPatterns1()

var expectedRegexMessages = new[]
{
"Filtering in files matching '^c/.*/i\\.[^/]*.*'. Original glob filter: 'include:c/**/i.*'",
"Filtering in files matching '^c/d/.*/[^/]*\\.cs.*'. Original glob filter: 'include:c/d/**/*.cs'",
"Filtering out files matching '^.*/z/i\\.cs.*'. Original glob filter: 'exclude:**/z/i.cs'"
"Filtering in files matching '^c/(.*/|)i\\.[^/]*.*'. Original glob filter: 'include:c/**/i.*'",
"Filtering in files matching '^c/d/(.*/|)[^/]*\\.cs.*'. Original glob filter: 'include:c/d/**/*.cs'",
"Filtering out files matching '^(.*/|)z/i\\.cs.*'. Original glob filter: 'exclude:**/z/i.cs'"
};
Assert.Equivalent(expectedRegexMessages, logger.Messages, strict: false);
}
Expand All @@ -244,10 +244,41 @@ public void TestFiltersWithIncludeExcludeComplexPatterns2()

var expectedRegexMessages = new[]
{
"Filtering in files matching '^.*/i\\.[^/]*.*'. Original glob filter: 'include:**/i.*'",
"Filtering out files matching '^.*/z/i\\.cs.*'. Original glob filter: 'exclude:**/z/i.cs'"
"Filtering in files matching '^(.*/|)i\\.[^/]*.*'. Original glob filter: 'include:**/i.*'",
"Filtering out files matching '^(.*/|)z/i\\.cs.*'. Original glob filter: 'exclude:**/z/i.cs'"
};
Assert.Equivalent(expectedRegexMessages, logger.Messages, strict: false);
}

[Fact]
public void TestFiltersWithIncludeExcludeComplexPatternsRelativeRoot()
{
(var testSubject, var logger, var files) = TestSetup();

// 'c' is the start of the relative path so we want to ensure the `**/` glob can match start
Environment.SetEnvironmentVariable("LGTM_INDEX_FILTERS", """
include:**/c/**/i.*
exclude:**/c/**/z/i.cs
exclude:**/**/c/**/z/i.cs
""");

var filtered = testSubject.Filter(files);

var expected = GetExpected(
[
"/a/b/c/x/y/i.cs",
]);

AssertFileInfoEquivalence(expected, filtered);
var expectedRegexMessages = new[]
{
"Filtering in files matching '^(.*/|)c/(.*/|)i\\.[^/]*.*'. Original glob filter: 'include:**/c/**/i.*'",
"Filtering out files matching '^(.*/|)c/(.*/|)z/i\\.cs.*'. Original glob filter: 'exclude:**/c/**/z/i.cs'",
"Filtering out files matching '^(.*/|)(.*/|)c/(.*/|)z/i\\.cs.*'. Original glob filter: 'exclude:**/**/c/**/z/i.cs'"
};


Assert.Equivalent(expectedRegexMessages, logger.Messages, strict: false);
}
}
}
6 changes: 4 additions & 2 deletions csharp/extractor/Semmle.Extraction.Tests/FilePattern.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ public void TestRegexCompilation()
var fp = new FilePattern("/hadoop*");
Assert.Equal("^hadoop[^/]*.*", fp.RegexPattern);
fp = new FilePattern("**/org/apache/hadoop");
Assert.Equal("^.*/org/apache/hadoop.*", fp.RegexPattern);
Assert.Equal("^(.*/|)org/apache/hadoop.*", fp.RegexPattern);
fp = new FilePattern("hadoop-common/**/test// ");
Assert.Equal("^hadoop-common/.*/test(?<doubleslash>/).*", fp.RegexPattern);
Assert.Equal("^hadoop-common/(.*/|)test(?<doubleslash>/).*", fp.RegexPattern);
fp = new FilePattern(@"-C:\agent\root\asdf//");
Assert.Equal("^C:/agent/root/asdf(?<doubleslash>/).*", fp.RegexPattern);
fp = new FilePattern(@"-C:\agent+\[root]\asdf//");
Assert.Equal(@"^C:/agent\+/\[root]/asdf(?<doubleslash>/).*", fp.RegexPattern);
fp = new FilePattern(@"**/**/abc/**/def/**");
Assert.Equal(@"^(.*/|)(.*/|)abc/(.*/|)def/.*", fp.RegexPattern);
}

[Fact]
Expand Down
16 changes: 14 additions & 2 deletions csharp/extractor/Semmle.Extraction/FilePattern.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,20 @@ bool HasCharAt(int i, Predicate<char> p) =>
throw new InvalidFilePatternException(pattern, "'**' preceeded by non-`/` character.");
if (HasCharAt(i + 2, c => c != '/'))
throw new InvalidFilePatternException(pattern, "'**' succeeded by non-`/` character");
sb.Append(".*");
i += 2;

if (i + 2 < pattern.Length)
{
// Processing .../**/...
sb.Append("(.*/|)");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had took look at this a while for parsing it: It is an alternation construct that matches any sequence of characters that ends with / OR "nothing".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's the idea.

i += 3;
}
else
Copy link
Contributor

@michaelnebel michaelnebel Jul 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need this case?
What about

if (i + 2 < pattern.Length)
{
    // Processing .../**/...
    sb.Append("(.*/|)");
}
i += 3;

If adding three to the index puts the pointer at the end of the pattern we are parsing then we break out of the loop and append .*

Copy link
Contributor Author

@tamasvajk tamasvajk Jul 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this would also work, but I slightly prefer to handle this case explicitly. I think it's going to be easier to understand and maintain going forward.

{
// Processing .../**
sb.Append(".*");
// There's no need to add another .* to the end outside the loop, we can return early.
return sb;
}
}
else
{
Expand Down
Loading