github · sidshank · Jul 12, 2024 · Jul 1, 2024 · Jul 1, 2024 · Jul 1, 2024
@@ -16,4 +16,5 @@ dependencies:
 dataExtensions:
   - semmle/javascript/frameworks/**/model.yml
   - semmle/javascript/frameworks/**/*.model.yml
+  - semmle/javascript/security/domains/**/*.model.yml
 warnOnImplicitThis: true
@@ -0,0 +1,208 @@
+/**
+ * Provides classes for finding functionality that is loaded from untrusted sources and used in script or frame elements.
+ */
+
+import javascript
+
+/** A location that adds a reference to an untrusted source. */
+abstract class AddsUntrustedUrl extends Locatable {
+  /** Gets an explanation why this source is untrusted. */
+  abstract string getProblem();
+
+  /** Gets the URL of the untrusted source. */
+  abstract string getUrl();
+}
+
+/** Looks for static creation of an element and source. */
+module StaticCreation {
+  /** Holds if `host` is an alias of localhost. */
+  bindingset[host]
+  predicate isLocalhostPrefix(string host) {
+    host.toLowerCase()
+        .regexpMatch([
+            "(?i)localhost(:[0-9]+)?/.*", "127.0.0.1(:[0-9]+)?/.*", "::1/.*", "\\[::1\\]:[0-9]+/.*"
+          ])
+  }
+
+  /** Holds if `url` is a url that is vulnerable to a MITM attack. */
+  bindingset[url]
+  predicate isUntrustedSourceUrl(string url) {
+    exists(string hostPath | hostPath = url.regexpCapture("(?i)http://(.*)", 1) |
+      not isLocalhostPrefix(hostPath)
+    )
+  }
+
+  /** Holds if `url` refers to a CDN that needs an integrity check - even with https. */
+  bindingset[url]
+  predicate isCdnUrlWithCheckingRequired(string url) {
+    // Some CDN URLs are required to have an integrity attribute. We only add CDNs to that list
+    // that recommend integrity-checking.
+    exists(string hostname, string requiredCheckingHostname |
+      hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*\\.js$", 1) and
+      isCdnDomainWithCheckingRequired(requiredCheckingHostname) and
+      hostname = requiredCheckingHostname
+    )
+  }
+
+  /** A script element that refers to untrusted content. */
+  class ScriptElementWithUntrustedContent extends AddsUntrustedUrl instanceof HTML::ScriptElement {
+    ScriptElementWithUntrustedContent() {
+      not exists(string digest | not digest = "" | super.getIntegrityDigest() = digest) and
+      isUntrustedSourceUrl(super.getSourcePath())
+    }
+
+    override string getUrl() { result = super.getSourcePath() }
+
+    override string getProblem() { result = "Script loaded using unencrypted connection." }
+  }
+
+  /** A script element that refers to untrusted content. */
+  class CdnScriptElementWithUntrustedContent extends AddsUntrustedUrl, HTML::ScriptElement {
+    CdnScriptElementWithUntrustedContent() {
+      not exists(string digest | not digest = "" | this.getIntegrityDigest() = digest) and
+      (
+        isCdnUrlWithCheckingRequired(this.getSourcePath())
+        or
+        isUrlWithUntrustedDomain(super.getSourcePath())
+      )
+    }
+
+    override string getUrl() { result = this.getSourcePath() }
+
+    override string getProblem() {
+      result = "Script loaded from content delivery network with no integrity check."
+    }
+  }
+
+  /** An iframe element that includes untrusted content. */
+  class IframeElementWithUntrustedContent extends AddsUntrustedUrl instanceof HTML::IframeElement {
+    IframeElementWithUntrustedContent() { isUntrustedSourceUrl(super.getSourcePath()) }
+
+    override string getUrl() { result = super.getSourcePath() }
+
+    override string getProblem() { result = "Iframe loaded using unencrypted connection." }
+  }
+}
+
+/** Holds if `url` refers to an URL that uses an untrusted domain. */
+bindingset[url]
+predicate isUrlWithUntrustedDomain(string url) {
+  exists(string hostname |
+    hostname = url.regexpCapture("(?i)^(?:https?:)?//([^/]+)/.*", 1) and
+    isUntrustedHostname(hostname)
+  )
+}
+
+/** Holds if `hostname` refers to a domain or subdomain that is untrusted. */
+bindingset[hostname]
+predicate isUntrustedHostname(string hostname) {
+  exists(string domain |
+    (hostname = domain or hostname.matches("%." + domain)) and
+    isUntrustedDomain(domain)
+  )
+}
+
+// The following predicates are extended in data extensions under javascript/ql/lib/semmle/javascript/security/domains/
+// and can be extended with custom model packs as necessary.
+/** Holds for hostnames defined in data extensions */
+extensible predicate isCdnDomainWithCheckingRequired(string hostname);
+
+/** Holds for domains defined in data extensions */
+extensible predicate isUntrustedDomain(string domain);
+
+/** Looks for dyanmic creation of an element and source. */
+module DynamicCreation {
+  /** Holds if `call` creates a tag of kind `name`. */
+  predicate isCreateElementNode(DataFlow::CallNode call, string name) {
+    call = DataFlow::globalVarRef("document").getAMethodCall("createElement") and
+    call.getArgument(0).getStringValue().toLowerCase() = name
+  }
+
+  /** Get the right-hand side of an assignment to a named attribute. */
+  DataFlow::Node getAttributeAssignmentRhs(DataFlow::CallNode createCall, string name) {
+    result = createCall.getAPropertyWrite(name).getRhs()
+    or
+    exists(DataFlow::InvokeNode inv | inv = createCall.getAMemberInvocation("setAttribute") |
+      inv.getArgument(0).getStringValue() = name and
+      result = inv.getArgument(1)
+    )
+  }
+
+  /**
+   * Holds if `createCall` creates a `<script ../>` element which never
+   * has its `integrity` attribute set locally.
+   */
+  predicate isCreateScriptNodeWoIntegrityCheck(DataFlow::CallNode createCall) {
+    isCreateElementNode(createCall, "script") and
+    not exists(getAttributeAssignmentRhs(createCall, "integrity"))
+  }
+
+  /** Holds if `t` tracks a URL that is loaded from an untrusted source. */
+  DataFlow::Node urlTrackedFromUnsafeSourceLiteral(DataFlow::TypeTracker t) {
+    t.start() and result.getStringValue().regexpMatch("(?i)http:.*")
+    or
+    exists(DataFlow::TypeTracker t2, DataFlow::Node prev |
+      prev = urlTrackedFromUnsafeSourceLiteral(t2)
+    |
+      not exists(string httpsUrl | httpsUrl.toLowerCase() = "https:" + any(string rest) |
+        // when the result may have a string value starting with https,
+        // we're most likely with an assignment like:
+        // e.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js'
+        // these assignments, we don't want to fix - once the browser is using http,
+        // MITM attacks are possible anyway.
+        result.mayHaveStringValue(httpsUrl)
+      ) and
+      (
+        t2 = t.smallstep(prev, result)
+        or
+        TaintTracking::sharedTaintStep(prev, result) and
+        t = t2
+      )
+    )
+  }
+
+  /** Holds a dataflow node is traked from an untrusted source. */
+  DataFlow::Node urlTrackedFromUnsafeSourceLiteral() {
+    result = urlTrackedFromUnsafeSourceLiteral(DataFlow::TypeTracker::end())
+  }
+
+  /** Holds if `sink` is assigned to the attribute `name` of any HTML element. */
+  predicate isAssignedToSrcAttribute(string name, DataFlow::Node sink) {
+    exists(DataFlow::CallNode createElementCall |
+      sink = getAttributeAssignmentRhs(createElementCall, "src") and
+      (
+        name = "script" and
+        isCreateScriptNodeWoIntegrityCheck(createElementCall)
+        or
+        name = "iframe" and
+        isCreateElementNode(createElementCall, "iframe")
+      )
+    )
+  }
+
+  /** A script or iframe element that refers to untrusted content. */
+  class IframeOrScriptSrcAssignment extends AddsUntrustedUrl {
+    string name;
+
+    IframeOrScriptSrcAssignment() {
+      name = ["script", "iframe"] and
+      exists(DataFlow::Node n | n.asExpr() = this |
+        isAssignedToSrcAttribute(name, n) and
+        n = urlTrackedFromUnsafeSourceLiteral()
+      )
+    }
+
+    override string getUrl() {
+      exists(DataFlow::Node n | n.asExpr() = this |
+        isAssignedToSrcAttribute(name, n) and
+        result = n.getStringValue()
+      )
+    }
+
+    override string getProblem() {
+      name = "script" and result = "Script loaded using unencrypted connection."
+      or
+      name = "iframe" and result = "Iframe loaded using unencrypted connection."
+    }
+  }
+}
@@ -0,0 +1,8 @@
+extensions:
+  - addsTo: 
+      pack: codeql/javascript-all
+      extensible: isCdnDomainWithCheckingRequired
+    data:
+      - ["code.jquery.com"]
+      - ["cdnjs.cloudflare.com"]
+      - ["cdnjs.com"]
@@ -0,0 +1,6 @@
+extensions:
+  - addsTo: 
+      pack: codeql/javascript-all
+      extensible: isUntrustedDomain
+    data:
+      - ["polyfill.io"]
@@ -0,0 +1,14 @@
+extensions:
+  - addsTo: 
+      pack: codeql/javascript-all
+      extensible: isUntrustedDomain
+    data:
+      # new location of the polyfill.io CDN, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained
+      - ["polyfill.com"]
+      - ["polyfillcache.com"]
+
+      # domains operated by the same owner as polyfill.io, which was used to serve malware. See: https://www.cside.dev/blog/the-polyfill-attack-explained
+      - ["bootcdn.net"]
+      - ["bootcss.com"]
+      - ["staticfile.net"]
+      - ["staticfile.org"]
@@ -0,0 +1,43 @@
+# Extending the library list of untrusted sources and domains
+
+You can expand the list of untrusted domains in the CodeQL library used by the `js/functionality-from-untrusted-source` and `js/functionality-from-untrusted-domain` queries using [CodeQL data extensions](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/).
+
+This allows you to add additional domains to warn users about and to require Subresource Integrity (SRI) checks on specific content delivery network (CDN) hostnames.
+
+For example, this YAML model can be used inside a CodeQL model pack to alert on uses of `example.com` in imported functionality, extending the `js/functionality-from-untrusted-domain` query:
+
+```yaml
+extensions:
+  - addsTo:
+      pack: codeql/javascript-all
+      extensible: untrustedDomain
+    data:
+      - ["example.com"]
+```
+
+To add new hostnames that always require SRI checking, this YAML model can be used to require SRI on `cdn.example.com`, extending the `js/functionality-from-untrusted-source` query:
+
+```yaml
+extensions:
+  - addsTo:
+      pack: codeql/javascript-all
+      extensible: isCdnDomainWithCheckingRequired
+    data:
+      - ["cdn.example.com"]
+```
+
+You would create a model pack with this information using metadata similar to that in the example below:
+
+```yaml
+name: my-org/javascript-untrusted-functionality-model-pack
+version: 1.0.0
+extensionTargets:
+  codeql/java-all: '*'
+dataExtensions:
+  - models/**/*.yml
+```
+
+## References
+
+- [Customizing library models for javascript](https://codeql.github.com/docs/codeql-language-guides/customizing-library-models-for-javascript/)
+- [Creating and working with CodeQL packs](https://docs.github.com/en/code-security/codeql-cli/using-the-advanced-functionality-of-the-codeql-cli/creating-and-working-with-codeql-packs#creating-a-codeql-model-pack)
@@ -0,0 +1,102 @@
+<!DOCTYPE qhelp PUBLIC
+"-//Semmle//qhelp//EN"
+"qhelp.dtd">
+<qhelp>
+	<overview>
+		<p>
+			Content Delivery Networks (CDNs) are used to deliver content to users quickly and efficiently.
+
+			However, they can change hands or be operated by untrustworthy owners, risking the security of the sites that use them.
+
+			Some CDN domains are operated by entities that have used CDNs to deliver malware, which this query identifies.
+		</p>
+
+		<p>
+		    For example, <code>polyfill.io</code> was a popular JavaScript CDN,
+			used to support new web browser standards on older browsers.
+
+		    In February 2024 the domain was sold, and in June 2024 it was publicised that the domain
+		    had been used to serve malicious scripts. It was taken down later in that month, leaving a window
+			where sites that used the service could have been compromised.
+
+			The same operator runs several other CDNs, undermining trust in those too.
+		</p>
+
+		<p>
+			Including a resource from an untrusted source or using an untrusted channel may
+			allow an attacker to include arbitrary code in the response.
+			When including an external resource (for example, a <code>script</code> element) on a page,
+			it is important to ensure that the received data is not malicious.
+		</p>
+
+		<p>
+			Even when <code>https</code> is used, an untrustworthy operator might deliver malware.
+		</p>
+
+		<p>
+			See the [`CUSTOMIZING.md`](https://github.com/github/codeql/blob/main/javascript/ql/src/Security/CWE-830/CUSTOMIZING.md) file in the source code for this query for information on how to extend the list of untrusted domains used by this query.
+		</p>
+	</overview>
+
+	<recommendation>
+		<p>
+			Carefully research the ownership of a Content Delivery Network (CDN) before using it in your application.
+		</p>
+
+		<p>
+			If you find code that originated from an untrusted domain in your application, you should review your logs to check for compromise.
+		</p>
+
+		<p>
+			To help mitigate the risk of including a script that could be compromised in the future, consider whether you need to
+			use polyfill or another library at all. Modern browsers do not require a polyfill, and other popular libraries were made redundant by enhancements to HTML 5.
+		</p>
+
+		<p>
+			If you do need a polyfill service or library, move to using a CDN that you trust.
+		</p>
+
+		<p>
+			When you use a <code>script</code> or <code>link</code> element,
+			you should check for <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">subresource integrity (SRI)</a>,
+			and pin to a hash of a version of the service that you can trust (for example, because you have audited it for security and unwanted features).
+
+			A dynamic service cannot be easily used with SRI. Nevertheless,
+			it is possible to list multiple acceptable SHA hashes in the <code>integrity</code> attribute,
+			such as hashes for the content required for the major browsers used by your users.
+		</p>
+
+		<p>
+			You can also choose to self-host an uncompromised version of the service or library.
+		</p>
+	</recommendation>
+
+	<example>
+		<p>
+			The following example loads the Polyfill.io library from the <code>polyfill.io</code> CDN. This use was open to malicious scripts being served by the CDN.
+		</p>
+
+		<sample src="polyfill-compromised.html" />
+
+		<p>
+			Instead, load the Polyfill library from a trusted CDN, as in the next example:
+		</p>
+
+		<sample src="polyfill-trusted.html" />
+
+		<p>
+			If you know which browsers are used by the majority of your users, you can list the hashes of the polyfills for those browsers:
+		</p>
+
+		<sample src="polyfill-sri.html" />
+
+	</example>
+
+	<references>
+		<li>Sansec: <a href="https://sansec.io/research/polyfill-supply-chain-attack">Polyfill supply chain attack hits 100K+ sites</a></li>
+		<li>Cloudflare: <a href="https://cdnjs.cloudflare.com/polyfill">Upgrade the web. Automatically. Delivers only the polyfills required by the user's web browser.</a></li>
+		<li>Fastly: <a href="https://community.fastly.com/t/new-options-for-polyfill-io-users/2540">New options for Polyfill.io users</a></li>
+		<li>Wikipedia: <a href="https://en.wikipedia.org/wiki/Polyfill_(programming)">Polyfill (programming)</a></li>
+		<li>MDN Web Docs: <a href="https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity">Subresource Integrity</a></li>
+	</references>
+</qhelp>