-
Notifications
You must be signed in to change notification settings - Fork 1.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added detection for untrusted domains in script content - edited existing library and added new query and tests ("the Polyfill PR") #16886
Changes from all commits
a1b0703
ceda46e
1744a98
c985c9a
b4d8c48
73fc6bc
d289fb4
e2b37f9
1fe14e2
86afd54
dae2aeb
0aab2ae
01ec7c2
d71be8a
3f37fe6
040f948
00d91dc
61df4d2
c9af53f
11249e7
79980a9
de5ec1f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
/** | ||
* Provides classes for finding functionality that is loaded from untrusted sources and used in script or frame elements. | ||
*/ | ||
|
||
import javascript | ||
|
||
/** A location that adds a reference to an untrusted source. */ | ||
abstract class AddsUntrustedUrl extends Locatable { | ||
/** Gets an explanation why this source is untrusted. */ | ||
abstract string getProblem(); | ||
|
||
/** Gets the URL of the untrusted source. */ | ||
abstract string getUrl(); | ||
} | ||
|
||
/** Looks for static creation of an element and source. */ | ||
module StaticCreation { | ||
/** Holds if `host` is an alias of localhost. */ | ||
bindingset[host] | ||
predicate isLocalhostPrefix(string host) { | ||
host.toLowerCase() | ||
.regexpMatch([ | ||
"(?i)localhost(:[0-9]+)?/.*", "127.0.0.1(:[0-9]+)?/.*", "::1/.*", "\\[::1\\]:[0-9]+/.*" | ||
]) | ||
} | ||
|
||
/** Holds if `url` is a url that is vulnerable to a MITM attack. */ | ||
bindingset[url] | ||
predicate isUntrustedSourceUrl(string url) { | ||
exists(string hostPath | hostPath = url.regexpCapture("(?i)http://(.*)", 1) | | ||
not isLocalhostPrefix(hostPath) | ||
) | ||
} | ||
|
||
/** Holds if `url` refers to a CDN that needs an integrity check - even with https. */ | ||
bindingset[url] | ||
predicate isCdnUrlWithCheckingRequired(string url) { | ||
// Some CDN URLs are required to have an integrity attribute. We only add CDNs to that list | ||
// that recommend integrity-checking. | ||
exists(string hostname, string requiredCheckingHostname | | ||
hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*\\.js$", 1) and | ||
isCdnDomainWithCheckingRequired(requiredCheckingHostname) and | ||
hostname = requiredCheckingHostname | ||
) | ||
} | ||
|
||
/** A script element that refers to untrusted content. */ | ||
class ScriptElementWithUntrustedContent extends AddsUntrustedUrl instanceof HTML::ScriptElement { | ||
ScriptElementWithUntrustedContent() { | ||
not exists(string digest | not digest = "" | super.getIntegrityDigest() = digest) and | ||
isUntrustedSourceUrl(super.getSourcePath()) | ||
} | ||
|
||
override string getUrl() { result = super.getSourcePath() } | ||
|
||
override string getProblem() { result = "Script loaded using unencrypted connection." } | ||
} | ||
|
||
/** A script element that refers to untrusted content. */ | ||
class CdnScriptElementWithUntrustedContent extends AddsUntrustedUrl, HTML::ScriptElement { | ||
CdnScriptElementWithUntrustedContent() { | ||
not exists(string digest | not digest = "" | this.getIntegrityDigest() = digest) and | ||
( | ||
isCdnUrlWithCheckingRequired(this.getSourcePath()) | ||
or | ||
isUrlWithUntrustedDomain(super.getSourcePath()) | ||
) | ||
} | ||
|
||
override string getUrl() { result = this.getSourcePath() } | ||
|
||
override string getProblem() { | ||
result = "Script loaded from content delivery network with no integrity check." | ||
} | ||
} | ||
|
||
/** An iframe element that includes untrusted content. */ | ||
class IframeElementWithUntrustedContent extends AddsUntrustedUrl instanceof HTML::IframeElement { | ||
IframeElementWithUntrustedContent() { isUntrustedSourceUrl(super.getSourcePath()) } | ||
|
||
override string getUrl() { result = super.getSourcePath() } | ||
|
||
override string getProblem() { result = "Iframe loaded using unencrypted connection." } | ||
} | ||
} | ||
|
||
/** Holds if `url` refers to an URL that uses an untrusted domain. */ | ||
bindingset[url] | ||
predicate isUrlWithUntrustedDomain(string url) { | ||
exists(string hostname | | ||
hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*", 1) and | ||
isUntrustedHostname(hostname) | ||
) | ||
} | ||
|
||
/** Holds if `hostname` refers to a domain or subdomain that is untrusted. */ | ||
bindingset[hostname] | ||
predicate isUntrustedHostname(string hostname) { | ||
exists(string domain | | ||
(hostname = domain or hostname.matches("%." + domain)) and | ||
isUntrustedDomain(domain) | ||
) | ||
} | ||
|
||
// The following predicates are extended in data extensions under javascript/ql/lib/semmle/javascript/security/domains/ | ||
// and can be extended with custom model packs as necessary. | ||
/** Holds for hostnames defined in data extensions */ | ||
extensible predicate isCdnDomainWithCheckingRequired(string hostname); | ||
|
||
/** Holds for domains defined in data extensions */ | ||
extensible predicate isUntrustedDomain(string domain); | ||
|
||
/** Looks for dyanmic creation of an element and source. */ | ||
module DynamicCreation { | ||
/** Holds if `call` creates a tag of kind `name`. */ | ||
predicate isCreateElementNode(DataFlow::CallNode call, string name) { | ||
call = DataFlow::globalVarRef("document").getAMethodCall("createElement") and | ||
call.getArgument(0).getStringValue().toLowerCase() = name | ||
} | ||
|
||
/** Get the right-hand side of an assignment to a named attribute. */ | ||
DataFlow::Node getAttributeAssignmentRhs(DataFlow::CallNode createCall, string name) { | ||
result = createCall.getAPropertyWrite(name).getRhs() | ||
or | ||
exists(DataFlow::InvokeNode inv | inv = createCall.getAMemberInvocation("setAttribute") | | ||
inv.getArgument(0).getStringValue() = name and | ||
result = inv.getArgument(1) | ||
) | ||
} | ||
|
||
/** | ||
* Holds if `createCall` creates a `<script ../>` element which never | ||
* has its `integrity` attribute set locally. | ||
*/ | ||
predicate isCreateScriptNodeWoIntegrityCheck(DataFlow::CallNode createCall) { | ||
isCreateElementNode(createCall, "script") and | ||
not exists(getAttributeAssignmentRhs(createCall, "integrity")) | ||
} | ||
|
||
/** Holds if `t` tracks a URL that is loaded from an untrusted source. */ | ||
DataFlow::Node urlTrackedFromUnsafeSourceLiteral(DataFlow::TypeTracker t) { | ||
t.start() and result.getStringValue().regexpMatch("(?i)http:.*") | ||
or | ||
exists(DataFlow::TypeTracker t2, DataFlow::Node prev | | ||
prev = urlTrackedFromUnsafeSourceLiteral(t2) | ||
| | ||
not exists(string httpsUrl | httpsUrl.toLowerCase() = "https:" + any(string rest) | | ||
// when the result may have a string value starting with https, | ||
// we're most likely with an assignment like: | ||
// e.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js' | ||
// these assignments, we don't want to fix - once the browser is using http, | ||
// MITM attacks are possible anyway. | ||
result.mayHaveStringValue(httpsUrl) | ||
) and | ||
( | ||
t2 = t.smallstep(prev, result) | ||
or | ||
TaintTracking::sharedTaintStep(prev, result) and | ||
t = t2 | ||
) | ||
) | ||
} | ||
|
||
/** Holds a dataflow node is traked from an untrusted source. */ | ||
DataFlow::Node urlTrackedFromUnsafeSourceLiteral() { | ||
result = urlTrackedFromUnsafeSourceLiteral(DataFlow::TypeTracker::end()) | ||
} | ||
|
||
/** Holds if `sink` is assigned to the attribute `name` of any HTML element. */ | ||
predicate isAssignedToSrcAttribute(string name, DataFlow::Node sink) { | ||
exists(DataFlow::CallNode createElementCall | | ||
sink = getAttributeAssignmentRhs(createElementCall, "src") and | ||
( | ||
name = "script" and | ||
isCreateScriptNodeWoIntegrityCheck(createElementCall) | ||
or | ||
name = "iframe" and | ||
isCreateElementNode(createElementCall, "iframe") | ||
) | ||
) | ||
} | ||
|
||
/** A script or iframe element that refers to untrusted content. */ | ||
class IframeOrScriptSrcAssignment extends AddsUntrustedUrl { | ||
string name; | ||
|
||
IframeOrScriptSrcAssignment() { | ||
name = ["script", "iframe"] and | ||
exists(DataFlow::Node n | n.asExpr() = this | | ||
isAssignedToSrcAttribute(name, n) and | ||
n = urlTrackedFromUnsafeSourceLiteral() | ||
) | ||
} | ||
|
||
override string getUrl() { | ||
exists(DataFlow::Node n | n.asExpr() = this | | ||
isAssignedToSrcAttribute(name, n) and | ||
result = n.getStringValue() | ||
) | ||
} | ||
|
||
override string getProblem() { | ||
name = "script" and result = "Script loaded using unencrypted connection." | ||
or | ||
name = "iframe" and result = "Iframe loaded using unencrypted connection." | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
extensions: | ||
- addsTo: | ||
pack: codeql/javascript-all | ||
extensible: isCdnDomainWithCheckingRequired | ||
data: | ||
- ["code.jquery.com"] | ||
- ["cdnjs.cloudflare.com"] | ||
- ["cdnjs.com"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
extensions: | ||
- addsTo: | ||
pack: codeql/javascript-all | ||
extensible: isUntrustedDomain | ||
data: | ||
- ["polyfill.io"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
extensions: | ||
- addsTo: | ||
pack: codeql/javascript-all | ||
extensible: isUntrustedDomain | ||
data: | ||
# new location of the polyfill.io CDN, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained | ||
- ["polyfill.com"] | ||
- ["polyfillcache.com"] | ||
|
||
# domains operated by the same owner as polyfill.io, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained | ||
- ["bootcdn.net"] | ||
- ["bootcss.com"] | ||
- ["staticfile.net"] | ||
- ["staticfile.org"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suggest that we move this information into the Customizing Library Models for JavaScript — CodeQL article as a practical example. Alternatively, we could create a new article in https://github.com/github/codeql/tree/main/docs/codeql/codeql-language-guides, which could be extended with future examples. This would require converting the format to RST, but the content is fairly simple so this shouldn't be too much of an overhead and we can provide support if needed. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# Extending the library list of untrusted sources and domains | ||
|
||
You can expand the list of untrusted domains in the CodeQL library used by the `js/functionality-from-untrusted-source` and `js/functionality-from-untrusted-domain` queries using [CodeQL data extensions](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/). | ||
|
||
This allows you to add additional domains to warn users about and to require Subresource Integrity (SRI) checks on specific content delivery network (CDN) hostnames. | ||
|
||
For example, this YAML model can be used inside a CodeQL model pack to alert on uses of `example.com` in imported functionality, extending the `js/functionality-from-untrusted-domain` query: | ||
|
||
```yaml | ||
extensions: | ||
- addsTo: | ||
pack: codeql/javascript-all | ||
extensible: untrustedDomain | ||
data: | ||
- ["example.com"] | ||
``` | ||
|
||
To add new hostnames that always require SRI checking, this YAML model can be used to require SRI on `cdn.example.com`, extending the `js/functionality-from-untrusted-source` query: | ||
|
||
```yaml | ||
extensions: | ||
- addsTo: | ||
pack: codeql/javascript-all | ||
extensible: isCdnDomainWithCheckingRequired | ||
data: | ||
- ["cdn.example.com"] | ||
``` | ||
|
||
You would create a model pack with this information using metadata similar to that in the example below: | ||
|
||
```yaml | ||
name: my-org/javascript-untrusted-functionality-model-pack | ||
version: 1.0.0 | ||
extensionTargets: | ||
codeql/java-all: '*' | ||
dataExtensions: | ||
- models/**/*.yml | ||
``` | ||
|
||
## References | ||
|
||
- [Customizing library models for javascript](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/) | ||
- [Creating and working with CodeQL packs](https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
<!DOCTYPE qhelp PUBLIC | ||
"-//Semmle//qhelp//EN" | ||
"qhelp.dtd"> | ||
<qhelp> | ||
<overview> | ||
<p> | ||
Content Delivery Networks (CDNs) are used to deliver content to users quickly and efficiently. | ||
|
||
However, they can change hands or be operated by untrustworthy owners, risking the security of the sites that use them. | ||
|
||
Some CDN domains are operated by entities that have used CDNs to deliver malware, which this query identifies. | ||
</p> | ||
|
||
<p> | ||
For example, <code>polyfill.io</code> was a popular JavaScript CDN, | ||
used to support new web browser standards on older browsers. | ||
|
||
In February 2024 the domain was sold, and in June 2024 it was publicised that the domain | ||
had been used to serve malicious scripts. It was taken down later in that month, leaving a window | ||
where sites that used the service could have been compromised. | ||
|
||
The same operator runs several other CDNs, undermining trust in those too. | ||
</p> | ||
|
||
<p> | ||
Including a resource from an untrusted source or using an untrusted channel may | ||
allow an attacker to include arbitrary code in the response. | ||
When including an external resource (for example, a <code>script</code> element) on a page, | ||
it is important to ensure that the received data is not malicious. | ||
</p> | ||
|
||
<p> | ||
Even when <code>https</code> is used, an untrustworthy operator might deliver malware. | ||
</p> | ||
|
||
<p> | ||
See the [`CUSTOMIZING.md`](https://github.com/github/codeql/blob/main/javascript/ql/src/Security/CWE-830/CUSTOMIZING.md) file in the source code for this query for information on how to extend the list of untrusted domains used by this query. | ||
</p> | ||
</overview> | ||
|
||
<recommendation> | ||
<p> | ||
Carefully research the ownership of a Content Delivery Network (CDN) before using it in your application. | ||
</p> | ||
|
||
<p> | ||
If you find code that originated from an untrusted domain in your application, you should review your logs to check for compromise. | ||
</p> | ||
|
||
<p> | ||
To help mitigate the risk of including a script that could be compromised in the future, consider whether you need to | ||
use polyfill or another library at all. Modern browsers do not require a polyfill, and other popular libraries were made redundant by enhancements to HTML 5. | ||
</p> | ||
|
||
<p> | ||
If you do need a polyfill service or library, move to using a CDN that you trust. | ||
</p> | ||
|
||
<p> | ||
When you use a <code>script</code> or <code>link</code> element, | ||
you should check for <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">subresource integrity (SRI)</a>, | ||
and pin to a hash of a version of the service that you can trust (for example, because you have audited it for security and unwanted features). | ||
|
||
A dynamic service cannot be easily used with SRI. Nevertheless, | ||
it is possible to list multiple acceptable SHA hashes in the <code>integrity</code> attribute, | ||
such as hashes for the content required for the major browsers used by your users. | ||
</p> | ||
|
||
<p> | ||
You can also choose to self-host an uncompromised version of the service or library. | ||
</p> | ||
</recommendation> | ||
|
||
<example> | ||
<p> | ||
The following example loads the Polyfill.io library from the <code>polyfill.io</code> CDN. This use was open to malicious scripts being served by the CDN. | ||
</p> | ||
|
||
<sample src="polyfill-compromised.html" /> | ||
|
||
<p> | ||
Instead, load the Polyfill library from a trusted CDN, as in the next example: | ||
</p> | ||
|
||
<sample src="polyfill-trusted.html" /> | ||
|
||
<p> | ||
If you know which browsers are used by the majority of your users, you can list the hashes of the polyfills for those browsers: | ||
</p> | ||
|
||
<sample src="polyfill-sri.html" /> | ||
|
||
</example> | ||
|
||
<references> | ||
<li>Sansec: <a href="https://sansec.io/research/polyfill-supply-chain-attack">Polyfill supply chain attack hits 100K+ sites</a></li> | ||
<li>Cloudflare: <a href="https://cdnjs.cloudflare.com/polyfill">Upgrade the web. Automatically. Delivers only the polyfills required by the user's web browser.</a></li> | ||
<li>Fastly: <a href="https://community.fastly.com/t/new-options-for-polyfill-io-users/2540">New options for Polyfill.io users</a></li> | ||
<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Polyfill_(programming)">Polyfill (programming)</a></li> | ||
<li>MDN Web Docs: <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">Subresource Integrity</a></li> | ||
</references> | ||
</qhelp> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice, those are still active.
That removes my argument for keeping the new query in the
experimental/
folder, you can move the new query to the default suite.