Skip to content

Commit

Permalink
Merge pull request #17712 from yoff/python/re-finditer-match
Browse files Browse the repository at this point in the history
Python: model that `re.finditer` returns an iterable of `re.Match` objects
  • Loading branch information
yoff authored Oct 11, 2024
2 parents ac8b973 + 6bd4614 commit 2af60f1
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 20 deletions.
4 changes: 4 additions & 0 deletions python/ql/lib/change-notes/2024-10-09-finditer-match.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: minorAnalysis
---
* Modelled that `re.finditer` returns an iterable of `re.Match` objects. This is now understood by the API graph in many cases.
60 changes: 40 additions & 20 deletions python/ql/lib/semmle/python/frameworks/Stdlib.qll
Original file line number Diff line number Diff line change
Expand Up @@ -3284,6 +3284,18 @@ module StdlibPrivate {
}
}

/**
* A base API node for regular expression functions.
* Either the `re` module or a compiled regular expression.
*/
private API::Node re(boolean compiled) {
result = API::moduleImport("re") and
compiled = false
or
result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() and
compiled = true
}

/**
* A flow summary for methods returning a `re.Match` object
*
Expand All @@ -3293,17 +3305,18 @@ module StdlibPrivate {
ReMatchSummary() { this = ["re.Match", "compiled re.Match"] }

override DataFlow::CallCfgNode getACall() {
this = "re.Match" and
result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall()
or
this = "compiled re.Match" and
result =
any(RePatternSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(["match", "search", "fullmatch"])
.getACall()
exists(API::Node re, boolean compiled |
re = re(compiled) and
(
compiled = false and
this = "re.Match"
or
compiled = true and
this = "compiled re.Match"
)
|
result = re.getMember(["match", "search", "fullmatch"]).getACall()
)
}

override DataFlow::ArgumentNode getACallback() { none() }
Expand Down Expand Up @@ -3340,6 +3353,13 @@ module StdlibPrivate {
}
}

/** An API node for a `re.Match` object */
private API::Node match() {
result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn()
or
result = re(_).getMember("finditer").getReturn().getASubscript()
}

/**
* A flow summary for methods on a `re.Match` object
*
Expand All @@ -3353,15 +3373,7 @@ module StdlibPrivate {
methodName in ["expand", "group", "groups", "groupdict"]
}

override DataFlow::CallCfgNode getACall() {
result =
any(ReMatchSummary c)
.getACall()
.(API::CallNode)
.getReturn()
.getMember(methodName)
.getACall()
}
override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() }

override DataFlow::ArgumentNode getACallback() { none() }

Expand Down Expand Up @@ -3463,6 +3475,14 @@ module StdlibPrivate {
) and
preservesValue = false
)
or
// flow from input string to attribute on match object
exists(int arg | arg = methodName.(RegexExecutionMethod).getStringArgIndex() - offset |
input in ["Argument[" + arg + "]", "Argument[string:]"] and
methodName = "finditer" and
output = "ReturnValue.ListElement.Attribute[string]" and
preservesValue = true
)
)
}
}
Expand Down
6 changes: 6 additions & 0 deletions python/ql/test/library-tests/frameworks/stdlib/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@

compiled_pat.match(ts).string, # $ tainted
re.compile(ts).match("safe").re.pattern, # $ tainted

list(re.finditer(pat, ts))[0].string, # $ tainted
[m.string for m in re.finditer(pat, ts)], # $ tainted

list(re.finditer(pat, ts))[0].groups()[0], # $ MISSING: tainted // this requires list content in type tracking
[m.groups()[0] for m in re.finditer(pat, ts)], # $ tainted
)
ensure_not_tainted(
safe_match.expand("Hello \1"),
Expand Down

0 comments on commit 2af60f1

Please sign in to comment.