From 5ec8e5dd301ebab4291b3830434c26635d7fcf2c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 11:21:27 +0200 Subject: [PATCH 01/21] Python: Setup support for threat-models Naming in other languages: - `SourceNode` (for QL only modeling) - `ThreatModelFlowSource` (for active sources from QL or data-extensions) However, since we use `LocalSourceNode` in Python, and `SourceNode` in JS (for local source nodes), it seems a bit confusing to follow the same naming convention as other languages, and instead I came up with new names. --- python/ql/lib/qlpack.yml | 1 + python/ql/lib/semmle/python/Concepts.qll | 48 +++++++++++++++++++ .../python/dataflow/new/RemoteFlowSources.qll | 10 ++-- .../python/frameworks/data/ModelsAsData.qll | 13 +++-- 4 files changed, 61 insertions(+), 11 deletions(-) diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml index 81d09c13b5d3..2abc452ef503 100644 --- a/python/ql/lib/qlpack.yml +++ b/python/ql/lib/qlpack.yml @@ -9,6 +9,7 @@ dependencies: codeql/dataflow: ${workspace} codeql/mad: ${workspace} codeql/regex: ${workspace} + codeql/threat-models: ${workspace} codeql/tutorial: ${workspace} codeql/util: ${workspace} codeql/xml: ${workspace} diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 75b884a9dd4d..4703249ab93d 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -10,6 +10,54 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.TaintTracking private import semmle.python.Frameworks private import semmle.python.security.internal.EncryptionKeySizes +private import codeql.threatmodels.ThreatModels + +/** + * A data flow source, for a specific threat-model. + * + * Extend this class to refine existing API models. If you want to model new APIs, + * extend `ThreatModelSource::Range` instead. + */ +class ThreatModelSource extends DataFlow::Node instanceof ThreatModelSource::Range { + /** + * Gets a string that represents the source kind with respect to threat modeling. + */ + string getThreatModel() { result = super.getThreatModel() } + + /** Gets a string that describes the type of this threat-model source. */ + string getSourceType() { result = super.getSourceType() } +} + +/** Provides a class for modeling new sources for specific threat-models. */ +module ThreatModelSource { + /** + * A data flow source, for a specific threat-model. + * + * Extend this class to model new APIs. If you want to refine existing API models, + * extend `ThreatModelSource` instead. + */ + abstract class Range extends DataFlow::Node { + /** + * Gets a string that represents the source kind with respect to threat modeling. + */ + abstract string getThreatModel(); + + /** Gets a string that describes the type of this threat-model source. */ + abstract string getSourceType(); + } +} + +/** + * A data flow source that is enabled in the current threat model configuration. + */ +class ActiveThreatModelSource extends DataFlow::Node { + ActiveThreatModelSource() { + exists(string kind | + currentThreatModel(kind) and + this.(ThreatModelSource).getThreatModel() = kind + ) + } +} /** * A data-flow node that executes an operating system command, diff --git a/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll b/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll index 4ad0aee1f313..8975b967c813 100644 --- a/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll +++ b/python/ql/lib/semmle/python/dataflow/new/RemoteFlowSources.qll @@ -15,10 +15,7 @@ private import semmle.python.Concepts * Extend this class to refine existing API models. If you want to model new APIs, * extend `RemoteFlowSource::Range` instead. */ -class RemoteFlowSource extends DataFlow::Node instanceof RemoteFlowSource::Range { - /** Gets a string that describes the type of this remote flow source. */ - string getSourceType() { result = super.getSourceType() } -} +class RemoteFlowSource extends ThreatModelSource instanceof RemoteFlowSource::Range { } /** Provides a class for modeling new sources of remote user input. */ module RemoteFlowSource { @@ -28,8 +25,7 @@ module RemoteFlowSource { * Extend this class to model new APIs. If you want to refine existing API models, * extend `RemoteFlowSource` instead. */ - abstract class Range extends DataFlow::Node { - /** Gets a string that describes the type of this remote flow source. */ - abstract string getSourceType(); + abstract class Range extends ThreatModelSource::Range { + override string getThreatModel() { result = "remote" } } } diff --git a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll index c2176c0644b9..11c6b285f2aa 100644 --- a/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll +++ b/python/ql/lib/semmle/python/frameworks/data/ModelsAsData.qll @@ -18,14 +18,19 @@ private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.dataflow.new.DataFlow private import semmle.python.ApiGraphs private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.Concepts /** - * A remote flow source originating from a CSV source row. + * A threat-model flow source originating from a data extension. */ -private class RemoteFlowSourceFromCsv extends RemoteFlowSource::Range { - RemoteFlowSourceFromCsv() { this = ModelOutput::getASourceNode("remote").asSource() } +private class ThreatModelSourceFromDataExtension extends ThreatModelSource::Range { + ThreatModelSourceFromDataExtension() { this = ModelOutput::getASourceNode(_).asSource() } - override string getSourceType() { result = "Remote flow (from model)" } + override string getThreatModel() { this = ModelOutput::getASourceNode(result).asSource() } + + override string getSourceType() { + result = "Source node (" + this.getThreatModel() + ") [from data-extension]" + } } private class SummarizedCallableFromModel extends SummarizedCallable { From 766dcc4dd601e2e112de8c3a2f49ebdb4df7903b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 11:21:51 +0200 Subject: [PATCH 02/21] ThreatModels: Expose `knownThreatModel` Without, it's impossible to write test showing what threat-models are active by default... unless I provide a hardcoded list in the test itself, which is not any fun. --- shared/threat-models/codeql/threatmodels/ThreatModels.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shared/threat-models/codeql/threatmodels/ThreatModels.qll b/shared/threat-models/codeql/threatmodels/ThreatModels.qll index d12139ef28ea..19dfd0d1a656 100644 --- a/shared/threat-models/codeql/threatmodels/ThreatModels.qll +++ b/shared/threat-models/codeql/threatmodels/ThreatModels.qll @@ -29,7 +29,7 @@ extensible predicate threatModelConfiguration(string kind, boolean enable, int p extensible private predicate threatModelGrouping(string kind, string group); /** Holds if the specified threat model kind is mentioned in either the configuration or grouping table. */ -private predicate knownThreatModel(string kind) { +predicate knownThreatModel(string kind) { threatModelConfiguration(kind, _, _) or threatModelGrouping(kind, _) or threatModelGrouping(_, kind) or From 617ab27c7502060d611224ef771b554ae1e65828 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 11:24:37 +0200 Subject: [PATCH 03/21] Python: Add test showing default active threat-models --- .../threat-models/default/ActiveKinds.expected | 4 ++++ .../library-tests/threat-models/default/ActiveKinds.ql | 7 +++++++ 2 files changed, 11 insertions(+) create mode 100644 python/ql/test/library-tests/threat-models/default/ActiveKinds.expected create mode 100644 python/ql/test/library-tests/threat-models/default/ActiveKinds.ql diff --git a/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected b/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected new file mode 100644 index 000000000000..c471a7cc9129 --- /dev/null +++ b/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected @@ -0,0 +1,4 @@ +| default | +| remote | +| request | +| response | diff --git a/python/ql/test/library-tests/threat-models/default/ActiveKinds.ql b/python/ql/test/library-tests/threat-models/default/ActiveKinds.ql new file mode 100644 index 000000000000..93a1354b7af8 --- /dev/null +++ b/python/ql/test/library-tests/threat-models/default/ActiveKinds.ql @@ -0,0 +1,7 @@ +private import codeql.threatmodels.ThreatModels + +from string kind +where + knownThreatModel(kind) and + currentThreatModel(kind) +select kind From 8f7dec07b8c61b0cd493511688a3d0c6eddcc600 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 11:29:16 +0200 Subject: [PATCH 04/21] Python: Remove 'response' from default threat-models I didn't want to put the configuration file in `semmle/python/frameworks/**/*.model.yml`, so created `ext/` as in other languages --- python/ql/lib/ext/default-threat-models-fixup.model.yml | 8 ++++++++ python/ql/lib/qlpack.yml | 1 + .../threat-models/default/ActiveKinds.expected | 1 - 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 python/ql/lib/ext/default-threat-models-fixup.model.yml diff --git a/python/ql/lib/ext/default-threat-models-fixup.model.yml b/python/ql/lib/ext/default-threat-models-fixup.model.yml new file mode 100644 index 000000000000..cc1cb20517ec --- /dev/null +++ b/python/ql/lib/ext/default-threat-models-fixup.model.yml @@ -0,0 +1,8 @@ +extensions: + - addsTo: + pack: codeql/threat-models + extensible: threatModelConfiguration + data: + # Since responses are enabled by default in the shared threat-models configuration, + # we need to disable it here to keep existing behavior for the python analysis. + - ["response", false, -2147483647] diff --git a/python/ql/lib/qlpack.yml b/python/ql/lib/qlpack.yml index 2abc452ef503..36b1f7242be0 100644 --- a/python/ql/lib/qlpack.yml +++ b/python/ql/lib/qlpack.yml @@ -16,4 +16,5 @@ dependencies: codeql/yaml: ${workspace} dataExtensions: - semmle/python/frameworks/**/*.model.yml + - ext/*.model.yml warnOnImplicitThis: true diff --git a/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected b/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected index c471a7cc9129..892f0fa5f6c3 100644 --- a/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected +++ b/python/ql/test/library-tests/threat-models/default/ActiveKinds.expected @@ -1,4 +1,3 @@ | default | | remote | | request | -| response | From 528f08fb8345e74a8f4f93a7f5e1d8866c9fba7d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 11:52:54 +0200 Subject: [PATCH 05/21] Python: Make queries use ActiveThreatModelSource --- .../security/dataflow/CodeInjectionCustomizations.qll | 9 +++++++-- .../dataflow/CommandInjectionCustomizations.qll | 9 +++++++-- .../dataflow/CookieInjectionCustomizations.qll | 9 +++++++-- .../dataflow/HttpHeaderInjectionCustomizations.qll | 9 +++++++-- .../security/dataflow/LdapInjectionCustomizations.qll | 9 +++++++-- .../security/dataflow/LogInjectionCustomizations.qll | 9 +++++++-- .../dataflow/PamAuthorizationCustomizations.qll | 10 ++++++++-- .../security/dataflow/PathInjectionCustomizations.qll | 9 +++++++-- .../dataflow/PolynomialReDoSCustomizations.qll | 9 +++++++-- .../security/dataflow/ReflectedXSSCustomizations.qll | 9 +++++++-- .../security/dataflow/RegexInjectionCustomizations.qll | 9 +++++++-- .../ServerSideRequestForgeryCustomizations.qll | 9 +++++++-- .../security/dataflow/SqlInjectionCustomizations.qll | 9 +++++++-- .../dataflow/UnsafeDeserializationCustomizations.qll | 9 +++++++-- .../security/dataflow/UrlRedirectCustomizations.qll | 9 +++++++-- .../security/dataflow/XpathInjectionCustomizations.qll | 9 +++++++-- .../CWE-074/TemplateInjectionCustomizations.qll | 9 +++++++-- .../Security/CWE-091/XsltInjectionCustomizations.qll | 9 +++++++-- python/ql/src/experimental/Security/CWE-094/Js2Py.ql | 2 +- 19 files changed, 128 insertions(+), 37 deletions(-) diff --git a/python/ql/lib/semmle/python/security/dataflow/CodeInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CodeInjectionCustomizations.qll index 294b3c63fdad..354d098186f6 100644 --- a/python/ql/lib/semmle/python/security/dataflow/CodeInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/CodeInjectionCustomizations.qll @@ -33,9 +33,14 @@ module CodeInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A code execution, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll index a8d17026e40c..f3786700967b 100644 --- a/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll @@ -33,9 +33,14 @@ module CommandInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A command execution, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/CookieInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/CookieInjectionCustomizations.qll index dd3792182de8..cee2b7b98e73 100644 --- a/python/ql/lib/semmle/python/security/dataflow/CookieInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/CookieInjectionCustomizations.qll @@ -31,9 +31,14 @@ module CookieInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A write to a cookie, considered as a sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/HttpHeaderInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/HttpHeaderInjectionCustomizations.qll index e529d3f29e0f..92cd46a3408a 100644 --- a/python/ql/lib/semmle/python/security/dataflow/HttpHeaderInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/HttpHeaderInjectionCustomizations.qll @@ -32,9 +32,14 @@ module HttpHeaderInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A HTTP header write, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/LdapInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/LdapInjectionCustomizations.qll index 6c2b664bd965..f0229bc0c824 100644 --- a/python/ql/lib/semmle/python/security/dataflow/LdapInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/LdapInjectionCustomizations.qll @@ -42,9 +42,14 @@ module LdapInjection { abstract class FilterSanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A logging operation, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll index f92b87c73faf..71648b2c6281 100644 --- a/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/LogInjectionCustomizations.qll @@ -33,9 +33,14 @@ module LogInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A logging operation, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/PamAuthorizationCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/PamAuthorizationCustomizations.qll index afba208e0e45..eb858be8f95d 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PamAuthorizationCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PamAuthorizationCustomizations.qll @@ -7,6 +7,7 @@ import python import semmle.python.ApiGraphs import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.Concepts /** * Provides default sources, sinks and sanitizers for detecting @@ -39,9 +40,14 @@ module PamAuthorizationCustomizations { abstract class Sink extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A vulnerable `pam_authenticate` call considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll index cda71df2f905..eff7d715c268 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll @@ -43,9 +43,14 @@ module PathInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A file system access, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSCustomizations.qll index 23cd45312300..2cd8b0a64bd7 100644 --- a/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/PolynomialReDoSCustomizations.qll @@ -47,9 +47,14 @@ module PolynomialReDoS { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A regex execution, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/ReflectedXSSCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/ReflectedXSSCustomizations.qll index ee2dec407d14..3a7177eb4c74 100644 --- a/python/ql/lib/semmle/python/security/dataflow/ReflectedXSSCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/ReflectedXSSCustomizations.qll @@ -33,9 +33,14 @@ module ReflectedXss { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A data flow sink for "reflected cross-site scripting" vulnerabilities. diff --git a/python/ql/lib/semmle/python/security/dataflow/RegexInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/RegexInjectionCustomizations.qll index 72dc66430b6d..559b1f66e7e6 100644 --- a/python/ql/lib/semmle/python/security/dataflow/RegexInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/RegexInjectionCustomizations.qll @@ -40,9 +40,14 @@ module RegexInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A regex escaping, considered as a sanitizer. diff --git a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll index a4e3ecc9ee18..552d022c98ec 100644 --- a/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/ServerSideRequestForgeryCustomizations.qll @@ -45,9 +45,14 @@ module ServerSideRequestForgery { abstract class FullUrlControlSanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** The URL of an HTTP request, considered as a sink. */ class HttpRequestUrlAsSink extends Sink { diff --git a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll index 7e0969d854f0..e2a5b4d624a2 100644 --- a/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/SqlInjectionCustomizations.qll @@ -32,9 +32,14 @@ module SqlInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A SQL statement of a SQL construction, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/UnsafeDeserializationCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/UnsafeDeserializationCustomizations.qll index 27b6a44580d0..ca909e5de1a0 100644 --- a/python/ql/lib/semmle/python/security/dataflow/UnsafeDeserializationCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/UnsafeDeserializationCustomizations.qll @@ -33,9 +33,14 @@ module UnsafeDeserialization { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * An insecure decoding, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll index fa913b28fbdf..0c05a36eaebb 100644 --- a/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/UrlRedirectCustomizations.qll @@ -77,9 +77,14 @@ module UrlRedirect { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A HTTP redirect response, considered as a flow sink. diff --git a/python/ql/lib/semmle/python/security/dataflow/XpathInjectionCustomizations.qll b/python/ql/lib/semmle/python/security/dataflow/XpathInjectionCustomizations.qll index ef30b3f81ce0..10b47f174390 100644 --- a/python/ql/lib/semmle/python/security/dataflow/XpathInjectionCustomizations.qll +++ b/python/ql/lib/semmle/python/security/dataflow/XpathInjectionCustomizations.qll @@ -30,9 +30,14 @@ module XpathInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A construction of an XPath expression, considered as a sink. diff --git a/python/ql/src/experimental/Security/CWE-074/TemplateInjectionCustomizations.qll b/python/ql/src/experimental/Security/CWE-074/TemplateInjectionCustomizations.qll index dcd6c1848616..e5c5013f3ba6 100644 --- a/python/ql/src/experimental/Security/CWE-074/TemplateInjectionCustomizations.qll +++ b/python/ql/src/experimental/Security/CWE-074/TemplateInjectionCustomizations.qll @@ -33,9 +33,14 @@ module TemplateInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * A SQL statement of a SQL construction, considered as a flow sink. diff --git a/python/ql/src/experimental/Security/CWE-091/XsltInjectionCustomizations.qll b/python/ql/src/experimental/Security/CWE-091/XsltInjectionCustomizations.qll index bda2fe646c95..4cacf1f85dee 100644 --- a/python/ql/src/experimental/Security/CWE-091/XsltInjectionCustomizations.qll +++ b/python/ql/src/experimental/Security/CWE-091/XsltInjectionCustomizations.qll @@ -33,9 +33,14 @@ module XsltInjection { abstract class Sanitizer extends DataFlow::Node { } /** - * A source of remote user input, considered as a flow source. + * DEPRECATED: Use `ActiveThreatModelSource` from Concepts instead! */ - class RemoteFlowSourceAsSource extends Source, RemoteFlowSource { } + deprecated class RemoteFlowSourceAsSource = ActiveThreatModelSourceAsSource; + + /** + * An active threat-model source, considered as a flow source. + */ + private class ActiveThreatModelSourceAsSource extends Source, ActiveThreatModelSource { } /** * An XSLT construction, considered as a flow sink. diff --git a/python/ql/src/experimental/Security/CWE-094/Js2Py.ql b/python/ql/src/experimental/Security/CWE-094/Js2Py.ql index 5dc160077873..f5d6e3a6c10e 100644 --- a/python/ql/src/experimental/Security/CWE-094/Js2Py.ql +++ b/python/ql/src/experimental/Security/CWE-094/Js2Py.ql @@ -18,7 +18,7 @@ import semmle.python.dataflow.new.RemoteFlowSources import semmle.python.Concepts module Js2PyFlowConfig implements DataFlow::ConfigSig { - predicate isSource(DataFlow::Node node) { node instanceof RemoteFlowSource } + predicate isSource(DataFlow::Node node) { node instanceof ActiveThreatModelSource } predicate isSink(DataFlow::Node node) { API::moduleImport("js2py").getMember(["eval_js", "eval_js6", "EvalJs"]).getACall().getArg(_) = From b9239d7101d05ba98531c3b52e21e7ca9476d845 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 15:04:53 +0200 Subject: [PATCH 06/21] Python: Add basic support for environment/commandargs threat-models --- .../semmle/python/frameworks/Stdlib.model.yml | 16 ++++++ .../test/experimental/meta/ConceptsTest.qll | 19 ++++++- .../frameworks/stdlib/threat_models.py | 49 +++++++++++++++++++ .../stdlib/wsgiref_simple_server_test.py | 2 +- 4 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 python/ql/lib/semmle/python/frameworks/Stdlib.model.yml create mode 100644 python/ql/test/library-tests/frameworks/stdlib/threat_models.py diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml new file mode 100644 index 000000000000..757aedf60ed1 --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -0,0 +1,16 @@ +extensions: + - addsTo: + pack: codeql/python-all + extensible: sourceModel + data: + - ['os', 'Member[getenv].ReturnValue', 'environment'] + - ['os', 'Member[getenvb].ReturnValue', 'environment'] + - ['os', 'Member[environ].DictionaryElementAny', 'environment'] + - ['os', 'Member[environb].DictionaryElementAny', 'environment'] + - ['posix', 'Member[environ].DictionaryElementAny', 'environment'] + + - ['sys', 'Member[argv].DictionaryElementAny', 'commandargs'] + - ['sys', 'Member[orig_argv].DictionaryElementAny', 'commandargs'] + + # TODO: argparse + # TODO: input / read from stdin diff --git a/python/ql/test/experimental/meta/ConceptsTest.qll b/python/ql/test/experimental/meta/ConceptsTest.qll index a53171de88ac..87addb8a1fc3 100644 --- a/python/ql/test/experimental/meta/ConceptsTest.qll +++ b/python/ql/test/experimental/meta/ConceptsTest.qll @@ -3,6 +3,7 @@ import semmle.python.dataflow.new.DataFlow import semmle.python.Concepts import TestUtilities.InlineExpectationsTest private import semmle.python.dataflow.new.internal.PrintNode +private import codeql.threatmodels.ThreatModels module SystemCommandExecutionTest implements TestSig { string getARelevantTag() { result = "getCommand" } @@ -632,6 +633,22 @@ module XmlParsingTest implements TestSig { } } +module ThreatModelSourceTest implements TestSig { + string getARelevantTag() { + exists(string kind | knownThreatModel(kind) | result = "threatModelSource" + "[" + kind + "]") + } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(ThreatModelSource src | not src.getThreatModel() = "remote" | + location = src.getLocation() and + element = src.toString() and + value = prettyNodeForInlineTest(src) and + tag = "threatModelSource[" + src.getThreatModel() + "]" + ) + } +} + import MakeTest, MergeTests5, MergeTests5>> + CsrfLocalProtectionSettingTest, MergeTests>>> diff --git a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py new file mode 100644 index 000000000000..41249967eb99 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py @@ -0,0 +1,49 @@ +import os +import sys +import posix + +os.getenv("foo") # $ threatModelSource[environment]=os.getenv(..) +os.getenvb("bar") # $ threatModelSource[environment]=os.getenvb(..) + +os.environ["foo"] # $ threatModelSource[environment]=os.environ["foo"] +os.environ.get("foo") # $ MISSING: threatModelSource[environment]=os.environ.get(..) + +os.environb["bar"] # $ threatModelSource[environment]=os.environb["bar"] +posix.environ[b"foo"] # $ threatModelSource[environment]=posix.environ[b"foo"] + + +sys.argv[1] # $ threatModelSource[commandargs]=sys.argv[1] +sys.orig_argv[1] # $ threatModelSource[commandargs]=sys.orig_argv[1] + +######################################## +# argparse +######################################## + +import argparse +parser = argparse.ArgumentParser() +parser.add_argument("foo") + +args = parser.parse_args() +args.foo # $ MISSING: threatModelSource[commandargs]=args.foo + +explicit_argv_parsing = parser.parse_args(sys.argv) +explicit_argv_parsing.foo # $ MISSING: threatModelSource[commandargs]=explicit_argv_parsing.foo + +fake_args = parser.parse_args([""]) +fake_args.foo + +######################################## +# reading input from stdin +######################################## + +sys.stdin.readline() # $ MISSING: threatModelSource +input() # $ MISSING: threatModelSource + +######################################## +# socket +######################################## + +import socket +s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +s.connect(("example.com", 1234)) +s.recv(1024) # $ MISSING: threatModelSource[socket] diff --git a/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py b/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py index 7327385c0647..161968799a55 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py +++ b/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py @@ -45,7 +45,7 @@ def func2(environ, start_response): # $ requestHandler start_response(status, headers) # $ headerWriteBulk=headers headerWriteBulkUnsanitized=name,value return [b"Hello"] # $ HttpResponse responseBody=List -case = sys.argv[1] +case = sys.argv[1] # $ threatModelSource[commandargs]=sys.argv[1] if case == "1": server = wsgiref.simple_server.WSGIServer(ADDRESS, wsgiref.simple_server.WSGIRequestHandler) server.set_app(func) From 56c85ffe549209ea83274418bf7ab2442103840b Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 15:19:42 +0200 Subject: [PATCH 07/21] Python: Fixup threat-models for `os.environ.get()` Since using `.DictionaryElementAny` doesn't actually do a store on the source, (so we can later follow any dict read-steps). I added the ensure_tainted steps to highlight that the result of the WHOLE expression ends up "tainted", and that we don't just mark `os.environ` as the source without further flow. --- .../semmle/python/frameworks/Stdlib.model.yml | 10 ++--- .../experimental/meta/InlineTaintTest.qll | 3 +- .../frameworks/stdlib/threat_models.py | 41 +++++++++++-------- .../stdlib/wsgiref_simple_server_test.py | 2 +- 4 files changed, 33 insertions(+), 23 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index 757aedf60ed1..f3bfd5ac8890 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -5,12 +5,12 @@ extensions: data: - ['os', 'Member[getenv].ReturnValue', 'environment'] - ['os', 'Member[getenvb].ReturnValue', 'environment'] - - ['os', 'Member[environ].DictionaryElementAny', 'environment'] - - ['os', 'Member[environb].DictionaryElementAny', 'environment'] - - ['posix', 'Member[environ].DictionaryElementAny', 'environment'] + - ['os', 'Member[environ]', 'environment'] + - ['os', 'Member[environb]', 'environment'] + - ['posix', 'Member[environ]', 'environment'] - - ['sys', 'Member[argv].DictionaryElementAny', 'commandargs'] - - ['sys', 'Member[orig_argv].DictionaryElementAny', 'commandargs'] + - ['sys', 'Member[argv]', 'commandargs'] + - ['sys', 'Member[orig_argv]', 'commandargs'] # TODO: argparse # TODO: input / read from stdin diff --git a/python/ql/test/experimental/meta/InlineTaintTest.qll b/python/ql/test/experimental/meta/InlineTaintTest.qll index 24f67bcf2a45..a09cc9aabc19 100644 --- a/python/ql/test/experimental/meta/InlineTaintTest.qll +++ b/python/ql/test/experimental/meta/InlineTaintTest.qll @@ -15,6 +15,7 @@ import semmle.python.dataflow.new.TaintTracking import semmle.python.dataflow.new.RemoteFlowSources import TestUtilities.InlineExpectationsTest private import semmle.python.dataflow.new.internal.PrintNode +private import semmle.python.Concepts DataFlow::Node shouldBeTainted() { exists(DataFlow::CallCfgNode call | @@ -45,7 +46,7 @@ module Conf { source.(DataFlow::CfgNode).getNode() = call.getAnArg() ) or - source instanceof RemoteFlowSource + source instanceof ThreatModelSource } predicate isSink(DataFlow::Node sink) { diff --git a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py index 41249967eb99..cac276eacad0 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py +++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py @@ -2,18 +2,25 @@ import sys import posix -os.getenv("foo") # $ threatModelSource[environment]=os.getenv(..) -os.getenvb("bar") # $ threatModelSource[environment]=os.getenvb(..) +ensure_tainted( + os.getenv("foo"), # $ tainted threatModelSource[environment]=os.getenv(..) + os.getenvb("bar"), # $ tainted threatModelSource[environment]=os.getenvb(..) -os.environ["foo"] # $ threatModelSource[environment]=os.environ["foo"] -os.environ.get("foo") # $ MISSING: threatModelSource[environment]=os.environ.get(..) + os.environ["foo"], # $ tainted threatModelSource[environment]=os.environ + os.environ.get("foo"), # $ tainted threatModelSource[environment]=os.environ -os.environb["bar"] # $ threatModelSource[environment]=os.environb["bar"] -posix.environ[b"foo"] # $ threatModelSource[environment]=posix.environ[b"foo"] + os.environb["bar"], # $ tainted threatModelSource[environment]=os.environb + posix.environ[b"foo"], # $ tainted threatModelSource[environment]=posix.environ -sys.argv[1] # $ threatModelSource[commandargs]=sys.argv[1] -sys.orig_argv[1] # $ threatModelSource[commandargs]=sys.orig_argv[1] + sys.argv[1], # $ tainted threatModelSource[commandargs]=sys.argv + sys.orig_argv[1], # $ tainted threatModelSource[commandargs]=sys.orig_argv +) + +for k,v in os.environ.items(): # $ threatModelSource[environment]=os.environ + ensure_tainted(k) # $ tainted + ensure_tainted(v) # $ tainted + ######################################## # argparse @@ -23,21 +30,23 @@ parser = argparse.ArgumentParser() parser.add_argument("foo") -args = parser.parse_args() -args.foo # $ MISSING: threatModelSource[commandargs]=args.foo +args = parser.parse_args() # $ MISSING: threatModelSource[commandargs]=parser.parse_args() +ensure_tainted(args.foo) # $ MISSING: tainted -explicit_argv_parsing = parser.parse_args(sys.argv) -explicit_argv_parsing.foo # $ MISSING: threatModelSource[commandargs]=explicit_argv_parsing.foo +explicit_argv_parsing = parser.parse_args(sys.argv) # $ threatModelSource[commandargs]=sys.argv +ensure_tainted(explicit_argv_parsing.foo) # $ MISSING: tainted fake_args = parser.parse_args([""]) -fake_args.foo +ensure_not_tainted(fake_args.foo) ######################################## # reading input from stdin ######################################## -sys.stdin.readline() # $ MISSING: threatModelSource -input() # $ MISSING: threatModelSource +ensure_tainted( + sys.stdin.readline(), # $ MISSING: tainted threatModelSource + input(), # $ MISSING: tainted threatModelSource +) ######################################## # socket @@ -46,4 +55,4 @@ import socket s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(("example.com", 1234)) -s.recv(1024) # $ MISSING: threatModelSource[socket] +ensure_tainted(s.recv(1024)) # $ MISSING: tainted threatModelSource[socket] diff --git a/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py b/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py index 161968799a55..fd852337aba3 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py +++ b/python/ql/test/library-tests/frameworks/stdlib/wsgiref_simple_server_test.py @@ -45,7 +45,7 @@ def func2(environ, start_response): # $ requestHandler start_response(status, headers) # $ headerWriteBulk=headers headerWriteBulkUnsanitized=name,value return [b"Hello"] # $ HttpResponse responseBody=List -case = sys.argv[1] # $ threatModelSource[commandargs]=sys.argv[1] +case = sys.argv[1] # $ threatModelSource[commandargs]=sys.argv if case == "1": server = wsgiref.simple_server.WSGIServer(ADDRESS, wsgiref.simple_server.WSGIRequestHandler) server.set_app(func) From e1801f3a297aaec8b3eaa585f128a279cdcbb419 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 16:20:41 +0200 Subject: [PATCH 08/21] Python: Proper threat-model handling for argparse --- .../semmle/python/frameworks/Stdlib.model.yml | 10 +++++++++- .../lib/semmle/python/frameworks/Stdlib.qll | 20 +++++++++++++++++++ .../frameworks/stdlib/threat_models.py | 8 ++++---- 3 files changed, 33 insertions(+), 5 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index f3bfd5ac8890..ea5d810d8db8 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -12,5 +12,13 @@ extensions: - ['sys', 'Member[argv]', 'commandargs'] - ['sys', 'Member[orig_argv]', 'commandargs'] - # TODO: argparse + # if no argument is given, the default is to use sys.argv[1:] + - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs'] + - addsTo: + pack: codeql/python-all + extensible: summaryModel + data: + - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args]', 'Argument[0,args:]', 'ReturnValue', 'taint'] + # note: taint of attribute lookups is handled in QL + # TODO: input / read from stdin diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 3c23b3929911..db20a25567c8 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -4989,6 +4989,26 @@ module StdlibPrivate { override string getKind() { result = Escaping::getHtmlKind() } } + + // --------------------------------------------------------------------------- + // argparse + // --------------------------------------------------------------------------- + /** + * if result of `parse_args` is tainted (because it uses command-line arguments), + * then the parsed values accesssed on any attribute lookup is also tainted. + */ + private class ArgumentParserAnyAttributeStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + nodeFrom = + API::moduleImport("argparse") + .getMember("ArgumentParser") + .getReturn() + .getMember("parse_args") + .getReturn() + .getAValueReachableFromSource() and + nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom + } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py index cac276eacad0..ecaf981849f3 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py +++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py @@ -30,14 +30,14 @@ parser = argparse.ArgumentParser() parser.add_argument("foo") -args = parser.parse_args() # $ MISSING: threatModelSource[commandargs]=parser.parse_args() -ensure_tainted(args.foo) # $ MISSING: tainted +args = parser.parse_args() # $ threatModelSource[commandargs]=parser.parse_args() +ensure_tainted(args.foo) # $ tainted explicit_argv_parsing = parser.parse_args(sys.argv) # $ threatModelSource[commandargs]=sys.argv -ensure_tainted(explicit_argv_parsing.foo) # $ MISSING: tainted +ensure_tainted(explicit_argv_parsing.foo) # $ tainted fake_args = parser.parse_args([""]) -ensure_not_tainted(fake_args.foo) +ensure_not_tainted(fake_args.foo) # $ SPURIOUS: tainted ######################################## # reading input from stdin From 66f389a4b630e5de8f97c3f06537fb5c6d60a99f Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 9 Aug 2024 16:36:19 +0200 Subject: [PATCH 09/21] Python: Model stdin thread-model --- .../lib/semmle/python/frameworks/Stdlib.model.yml | 5 +++-- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 13 +++++++++++++ .../frameworks/stdlib/threat_models.py | 4 ++-- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index ea5d810d8db8..b934be067757 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -12,6 +12,9 @@ extensions: - ['sys', 'Member[argv]', 'commandargs'] - ['sys', 'Member[orig_argv]', 'commandargs'] + - ['sys', 'Member[stdin]', 'stdin'] + - ['builtins', 'Member[input].ReturnValue', 'stdin'] + # if no argument is given, the default is to use sys.argv[1:] - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs'] - addsTo: @@ -20,5 +23,3 @@ extensions: data: - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args]', 'Argument[0,args:]', 'ReturnValue', 'taint'] # note: taint of attribute lookups is handled in QL - - # TODO: input / read from stdin diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index db20a25567c8..44e050df272b 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -5009,6 +5009,19 @@ module StdlibPrivate { nodeTo.(DataFlow::AttrRead).getObject() = nodeFrom } } + + // --------------------------------------------------------------------------- + // sys + // --------------------------------------------------------------------------- + /** + * An access of `sys.stdin`/`sys.stdout`/`sys.stderr`, to get additional FileLike + * modeling. + */ + private class SysStandardStreams extends Stdlib::FileLikeObject::InstanceSource, DataFlow::Node { + SysStandardStreams() { + this = API::moduleImport("sys").getMember(["stdin", "stdout", "stderr"]).asSource() + } + } } // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py index ecaf981849f3..919fded1169c 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py +++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py @@ -44,8 +44,8 @@ ######################################## ensure_tainted( - sys.stdin.readline(), # $ MISSING: tainted threatModelSource - input(), # $ MISSING: tainted threatModelSource + sys.stdin.readline(), # $ tainted threatModelSource[stdin]=sys.stdin + input(), # $ tainted threatModelSource[stdin]=input() ) ######################################## From d245db54a1e0c63a06f71c06488bbd0d2549968a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 12 Aug 2024 15:18:18 +0200 Subject: [PATCH 10/21] Python: Model `file` threat-model --- .../semmle/python/frameworks/Stdlib.model.yml | 2 ++ .../lib/semmle/python/frameworks/Stdlib.qll | 6 ++++- .../frameworks/stdlib/FileSystemAccess.py | 22 +++++++++---------- .../frameworks/stdlib/threat_models.py | 13 +++++++++++ 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index b934be067757..34b9dceb8b28 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -17,6 +17,8 @@ extensions: # if no argument is given, the default is to use sys.argv[1:] - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs'] + + - ['os', 'Member[read].ReturnValue', 'file'] - addsTo: pack: codeql/python-all extensible: summaryModel diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 44e050df272b..1e1575c18ded 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -1499,13 +1499,17 @@ module StdlibPrivate { * See https://docs.python.org/3/library/functions.html#open */ private class OpenCall extends FileSystemAccess::Range, Stdlib::FileLikeObject::InstanceSource, - DataFlow::CallCfgNode + ThreatModelSource::Range, DataFlow::CallCfgNode { OpenCall() { this = getOpenFunctionRef().getACall() } override DataFlow::Node getAPathArgument() { result in [this.getArg(0), this.getArgByName("file")] } + + override string getThreatModel() { result = "file" } + + override string getSourceType() { result = "open()" } } /** diff --git a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py index 197ccd8eb919..75b84d9fd51c 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py +++ b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py @@ -5,25 +5,25 @@ import tempfile import shutil -open("file") # $ getAPathArgument="file" -open(file="file") # $ getAPathArgument="file" +open("file") # $ getAPathArgument="file" threatModelSource[file]=open(..) +open(file="file") # $ getAPathArgument="file" threatModelSource[file]=open(..) o = open -o("file") # $ getAPathArgument="file" -o(file="file") # $ getAPathArgument="file" +o("file") # $ getAPathArgument="file" threatModelSource[file]=o(..) +o(file="file") # $ getAPathArgument="file" threatModelSource[file]=o(..) -builtins.open("file") # $ getAPathArgument="file" -builtins.open(file="file") # $ getAPathArgument="file" +builtins.open("file") # $ getAPathArgument="file" threatModelSource[file]=builtins.open(..) +builtins.open(file="file") # $ getAPathArgument="file" threatModelSource[file]=builtins.open(..) -io.open("file") # $ getAPathArgument="file" -io.open(file="file") # $ getAPathArgument="file" +io.open("file") # $ getAPathArgument="file" threatModelSource[file]=io.open(..) +io.open(file="file") # $ getAPathArgument="file" threatModelSource[file]=io.open(..) io.open_code("file") # $ getAPathArgument="file" io.FileIO("file") # $ getAPathArgument="file" -f = open("path") # $ getAPathArgument="path" +f = open("path") # $ getAPathArgument="path" threatModelSource[file]=open(..) f.write("foo") # $ getAPathArgument="path" fileWriteData="foo" lines = ["foo"] f.writelines(lines) # $ getAPathArgument="path" fileWriteData=lines @@ -87,8 +87,8 @@ def test_fspath(): os.fspath(path=TAINTED_STRING), # $ tainted ) -os.open("path", os.O_RDONLY) # $ getAPathArgument="path" -os.open(path="path", flags=os.O_RDONLY) # $ getAPathArgument="path" +os.open("path", os.O_RDONLY) # $ getAPathArgument="path" SPURIOUS: threatModelSource[file]=os.open(..) +os.open(path="path", flags=os.O_RDONLY) # $ getAPathArgument="path" SPURIOUS: threatModelSource[file]=os.open(..) os.access("path", os.R_OK) # $ getAPathArgument="path" os.access(path="path", mode=os.R_OK) # $ getAPathArgument="path" diff --git a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py index 919fded1169c..8ff50b0f211e 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py +++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py @@ -48,6 +48,19 @@ input(), # $ tainted threatModelSource[stdin]=input() ) +######################################## +# reading data from files +######################################## + +ensure_tainted( + open("foo"), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo" + open("foo").read(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo" + open("foo").readline(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo" + open("foo").readlines(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo" + + os.read(os.open("foo"), 1024), # $ tainted threatModelSource[file]=os.read(..) SPURIOUS: threatModelSource[file]=os.open(..) getAPathArgument="foo" +) + ######################################## # socket ######################################## From 7483075b7eeb577c5bedb43a9ce3b6a37e427dc2 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 13 Aug 2024 14:57:24 +0200 Subject: [PATCH 11/21] Python: Fixup modeling of `os.open` --- python/ql/lib/semmle/python/frameworks/Stdlib.qll | 11 ++++++++--- .../frameworks/stdlib/FileSystemAccess.py | 4 ++-- .../library-tests/frameworks/stdlib/threat_models.py | 2 +- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index 1e1575c18ded..876691648e7d 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -338,7 +338,7 @@ module StdlibPrivate { * Modeling of path related functions in the `os` module. * Wrapped in QL module to make it easy to fold/unfold. */ - private module OsFileSystemAccessModeling { + module OsFileSystemAccessModeling { /** * A call to the `os.fsencode` function. * @@ -395,7 +395,7 @@ module StdlibPrivate { * * See https://docs.python.org/3/library/os.html#os.open */ - private class OsOpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode { + class OsOpenCall extends FileSystemAccess::Range, DataFlow::CallCfgNode { OsOpenCall() { this = os().getMember("open").getACall() } override DataFlow::Node getAPathArgument() { @@ -1501,7 +1501,12 @@ module StdlibPrivate { private class OpenCall extends FileSystemAccess::Range, Stdlib::FileLikeObject::InstanceSource, ThreatModelSource::Range, DataFlow::CallCfgNode { - OpenCall() { this = getOpenFunctionRef().getACall() } + OpenCall() { + this = getOpenFunctionRef().getACall() and + // when analyzing stdlib code for os.py we wrongly assume that `os.open` is an + // alias of the builtins `open` function + not this instanceof OsFileSystemAccessModeling::OsOpenCall + } override DataFlow::Node getAPathArgument() { result in [this.getArg(0), this.getArgByName("file")] diff --git a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py index 75b84d9fd51c..7d52f53858cf 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py +++ b/python/ql/test/library-tests/frameworks/stdlib/FileSystemAccess.py @@ -87,8 +87,8 @@ def test_fspath(): os.fspath(path=TAINTED_STRING), # $ tainted ) -os.open("path", os.O_RDONLY) # $ getAPathArgument="path" SPURIOUS: threatModelSource[file]=os.open(..) -os.open(path="path", flags=os.O_RDONLY) # $ getAPathArgument="path" SPURIOUS: threatModelSource[file]=os.open(..) +os.open("path", os.O_RDONLY) # $ getAPathArgument="path" +os.open(path="path", flags=os.O_RDONLY) # $ getAPathArgument="path" os.access("path", os.R_OK) # $ getAPathArgument="path" os.access(path="path", mode=os.R_OK) # $ getAPathArgument="path" diff --git a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py index 8ff50b0f211e..23b800ce576e 100644 --- a/python/ql/test/library-tests/frameworks/stdlib/threat_models.py +++ b/python/ql/test/library-tests/frameworks/stdlib/threat_models.py @@ -58,7 +58,7 @@ open("foo").readline(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo" open("foo").readlines(), # $ tainted threatModelSource[file]=open(..) getAPathArgument="foo" - os.read(os.open("foo"), 1024), # $ tainted threatModelSource[file]=os.read(..) SPURIOUS: threatModelSource[file]=os.open(..) getAPathArgument="foo" + os.read(os.open("foo"), 1024), # $ tainted threatModelSource[file]=os.read(..) getAPathArgument="foo" ) ######################################## From 8d8cd05b94bed31a24542155f2796de926dd6b84 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 16 Aug 2024 10:49:49 +0200 Subject: [PATCH 12/21] Python: Add basic support for `database` threat-model --- .../lib/semmle/python/frameworks/PEP249.qll | 18 ++++++++++++++++ .../frameworks/psycopg/pep249.py | 21 +++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/PEP249.qll b/python/ql/lib/semmle/python/frameworks/PEP249.qll index 2425f4514f84..1eecc12b3d1e 100644 --- a/python/ql/lib/semmle/python/frameworks/PEP249.qll +++ b/python/ql/lib/semmle/python/frameworks/PEP249.qll @@ -81,6 +81,24 @@ module PEP249 { } } + /** A call to a method that fetches rows from a previous execution. */ + private class FetchMethodCall extends ThreatModelSource::Range, API::CallNode { + FetchMethodCall() { + exists(API::Node start | + start instanceof DatabaseCursor or start instanceof DatabaseConnection + | + // note: since we can't currently provide accesspaths for sources, these are all + // lumped together, although clearly the fetchmany/fetchall returns a + // list/iterable with rows. + this = start.getMember(["fetchone", "fetchmany", "fetchall"]).getACall() + ) + } + + override string getThreatModel() { result = "database" } + + override string getSourceType() { result = "cursor.fetch*()" } + } + // --------------------------------------------------------------------------- // asyncio implementations // --------------------------------------------------------------------------- diff --git a/python/ql/test/library-tests/frameworks/psycopg/pep249.py b/python/ql/test/library-tests/frameworks/psycopg/pep249.py index 0336facb079e..e61700d16ed6 100644 --- a/python/ql/test/library-tests/frameworks/psycopg/pep249.py +++ b/python/ql/test/library-tests/frameworks/psycopg/pep249.py @@ -12,3 +12,24 @@ with conn.cursor() as cursor: cursor.execute("some sql", (42,)) # $ getSql="some sql" cursor.executemany("some sql", [(42,)]) # $ getSql="some sql" + + + ### test of threat-model sources + row = cursor.fetchone() # $ threatModelSource[database]=cursor.fetchone() + rows_many = cursor.fetchmany(10) # $ threatModelSource[database]=cursor.fetchmany(..) + rows_all = cursor.fetchall() # $ threatModelSource[database]=cursor.fetchall() + + ensure_tainted( + row[0], # $ tainted + rows_many[0][0], # $ tainted + rows_all[0][0], # $ tainted + + # pretending we created cursor to return dictionary results + row["column"], # $ tainted + rows_many[0]["column"], # $ tainted + rows_all[0]["column"], # $ tainted + ) + for row in rows_many: + ensure_tainted(row[0], row["column"]) # $ tainted + for row in rows_all: + ensure_tainted(row[0], row["column"]) # tainted From a0b24d6194b86cbc9f79145acab2f52ebf430a09 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 16 Aug 2024 11:07:37 +0200 Subject: [PATCH 13/21] Python: Add e2e threat-model test --- .../SqlInjection.expected | 8 ++++++++ .../SqlInjection.ext.yml | 6 ++++++ .../SqlInjection.qlref | 1 + .../CWE-089-SqlInjection-local-threat-model/test.py | 6 ++++++ 4 files changed, 21 insertions(+) create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.expected create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.ext.yml create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.qlref create mode 100644 python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/test.py diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.expected b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.expected new file mode 100644 index 000000000000..1e4ba8b95305 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.expected @@ -0,0 +1,8 @@ +edges +| test.py:6:14:6:21 | ControlFlowNode for Attribute | test.py:6:14:6:24 | ControlFlowNode for Subscript | provenance | Src:MaD:17 | +nodes +| test.py:6:14:6:21 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute | +| test.py:6:14:6:24 | ControlFlowNode for Subscript | semmle.label | ControlFlowNode for Subscript | +subpaths +#select +| test.py:6:14:6:24 | ControlFlowNode for Subscript | test.py:6:14:6:21 | ControlFlowNode for Attribute | test.py:6:14:6:24 | ControlFlowNode for Subscript | This SQL query depends on a $@. | test.py:6:14:6:21 | ControlFlowNode for Attribute | user-provided value | diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.ext.yml b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.ext.yml new file mode 100644 index 000000000000..63507f477386 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.ext.yml @@ -0,0 +1,6 @@ +extensions: + - addsTo: + pack: codeql/threat-models + extensible: threatModelConfiguration + data: + - ["local", true, 0] diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.qlref b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.qlref new file mode 100644 index 000000000000..d1d02cbe8d37 --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/SqlInjection.qlref @@ -0,0 +1 @@ +Security/CWE-089/SqlInjection.ql diff --git a/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/test.py b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/test.py new file mode 100644 index 000000000000..97bfa393cedf --- /dev/null +++ b/python/ql/test/query-tests/Security/CWE-089-SqlInjection-local-threat-model/test.py @@ -0,0 +1,6 @@ +# test that enabling local threat-model works end-to-end +import sys +import psycopg + +conn = psycopg.connect(...) +conn.execute(sys.argv[1]) From 0ccb5b198a426c792428745de4ae3879f6fa99d0 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 16 Aug 2024 11:11:11 +0200 Subject: [PATCH 14/21] Python: Add change-note --- python/ql/lib/change-notes/2024-08-16-threat-models.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 python/ql/lib/change-notes/2024-08-16-threat-models.md diff --git a/python/ql/lib/change-notes/2024-08-16-threat-models.md b/python/ql/lib/change-notes/2024-08-16-threat-models.md new file mode 100644 index 000000000000..ba01e6f6fbda --- /dev/null +++ b/python/ql/lib/change-notes/2024-08-16-threat-models.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* Added support for custom threat-models, which can be used in most of our taint-tracking queries, see our [documentation](https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#extending-codeql-coverage-with-threat-models) for more details. From 7d3793e7181917bfaad10c71dcc284f18ee79a3a Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 19 Aug 2024 10:57:21 +0200 Subject: [PATCH 15/21] Docs: Update threat-model list to include Python --- docs/codeql/reusables/beta-note-threat-models.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/codeql/reusables/beta-note-threat-models.rst b/docs/codeql/reusables/beta-note-threat-models.rst index 80c97d93376c..9fcca40975a1 100644 --- a/docs/codeql/reusables/beta-note-threat-models.rst +++ b/docs/codeql/reusables/beta-note-threat-models.rst @@ -2,4 +2,4 @@ Note - Threat models are currently in beta and subject to change. During the beta, threat models are supported only by Java and C# analysis. + Threat models are currently in beta and subject to change. During the beta, threat models are supported only by Java, C# and Python analysis. From 333367c07d10e69cc2df68dfa52bc094c583662c Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 16 Aug 2024 11:15:51 +0200 Subject: [PATCH 16/21] Python: Add threat-modeling of `raw_input` --- python/ql/lib/semmle/python/frameworks/Stdlib.model.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index 34b9dceb8b28..53d918d07ac3 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -14,6 +14,8 @@ extensions: - ['sys', 'Member[stdin]', 'stdin'] - ['builtins', 'Member[input].ReturnValue', 'stdin'] + - ['builtins', 'Member[raw_input].ReturnValue', 'stdin'] # python 2 only + # if no argument is given, the default is to use sys.argv[1:] - ['argparse.ArgumentParser', 'Member[parse_args,parse_known_args].WithArity[0].ReturnValue', 'commandargs'] From cbebf7b3922dbf39eca2d3538dcf75d8f1c78065 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 16 Aug 2024 11:51:07 +0200 Subject: [PATCH 17/21] Python: Additional threatModelSource annotations --- .../ql/test/library-tests/frameworks/django-v2-v3/manage.py | 4 ++-- .../library-tests/frameworks/django-v2-v3/testproj/asgi.py | 2 +- .../library-tests/frameworks/django-v2-v3/testproj/wsgi.py | 2 +- .../ql/test/library-tests/frameworks/rest_framework/manage.py | 4 ++-- .../library-tests/frameworks/stdlib-py3/FileSystemAccess.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/ql/test/library-tests/frameworks/django-v2-v3/manage.py b/python/ql/test/library-tests/frameworks/django-v2-v3/manage.py index 0e1a0b64a6e6..9fa5846c9551 100755 --- a/python/ql/test/library-tests/frameworks/django-v2-v3/manage.py +++ b/python/ql/test/library-tests/frameworks/django-v2-v3/manage.py @@ -6,7 +6,7 @@ def main(): """Run administrative tasks.""" - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ try: from django.core.management import execute_from_command_line except ImportError as exc: @@ -15,7 +15,7 @@ def main(): "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) from exc - execute_from_command_line(sys.argv) + execute_from_command_line(sys.argv) # $ threatModelSource[commandargs]=sys.argv if __name__ == '__main__': diff --git a/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/asgi.py b/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/asgi.py index 33b113ce911d..4906268e4161 100644 --- a/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/asgi.py +++ b/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/asgi.py @@ -11,6 +11,6 @@ from django.core.asgi import get_asgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ application = get_asgi_application() diff --git a/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/wsgi.py b/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/wsgi.py index b466637895b7..a3f803658abc 100644 --- a/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/wsgi.py +++ b/python/ql/test/library-tests/frameworks/django-v2-v3/testproj/wsgi.py @@ -11,6 +11,6 @@ from django.core.wsgi import get_wsgi_application -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ application = get_wsgi_application() diff --git a/python/ql/test/library-tests/frameworks/rest_framework/manage.py b/python/ql/test/library-tests/frameworks/rest_framework/manage.py index 0e1a0b64a6e6..9fa5846c9551 100755 --- a/python/ql/test/library-tests/frameworks/rest_framework/manage.py +++ b/python/ql/test/library-tests/frameworks/rest_framework/manage.py @@ -6,7 +6,7 @@ def main(): """Run administrative tasks.""" - os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'testproj.settings') # $ threatModelSource[environment]=os.environ try: from django.core.management import execute_from_command_line except ImportError as exc: @@ -15,7 +15,7 @@ def main(): "available on your PYTHONPATH environment variable? Did you " "forget to activate a virtual environment?" ) from exc - execute_from_command_line(sys.argv) + execute_from_command_line(sys.argv) # $ threatModelSource[commandargs]=sys.argv if __name__ == '__main__': diff --git a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py index 4de7f3a3c329..45eff39d82cb 100644 --- a/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py +++ b/python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py @@ -17,7 +17,7 @@ name = windows.parent.name o = open -o(name) # $ getAPathArgument=name +o(name) # $ getAPathArgument=name threatModelSource[file]=o(..) wb = p.write_bytes wb(b"hello") # $ getAPathArgument=p fileWriteData=b"hello" From 5ff7b6557f523392c66db52f867a23e705c65839 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 30 Aug 2024 11:56:44 +0200 Subject: [PATCH 18/21] Python: Add links to threat-model docs --- python/ql/lib/semmle/python/Concepts.qll | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index 4703249ab93d..a5a7b8f5b9ac 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -21,6 +21,10 @@ private import codeql.threatmodels.ThreatModels class ThreatModelSource extends DataFlow::Node instanceof ThreatModelSource::Range { /** * Gets a string that represents the source kind with respect to threat modeling. + * + * See + * - https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst + * - https://github.com/github/codeql/blob/main/shared/threat-models/ext/threat-model-grouping.model.yml */ string getThreatModel() { result = super.getThreatModel() } @@ -39,6 +43,10 @@ module ThreatModelSource { abstract class Range extends DataFlow::Node { /** * Gets a string that represents the source kind with respect to threat modeling. + * + * See + * - https://github.com/github/codeql/blob/main/docs/codeql/reusables/threat-model-description.rst + * - https://github.com/github/codeql/blob/main/shared/threat-models/ext/threat-model-grouping.model.yml */ abstract string getThreatModel(); From e35c2b243a5cb101f3d97d89aca34cef222a79c3 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 10 Sep 2024 16:44:03 +0200 Subject: [PATCH 19/21] Docs: Include 'Threat models' for Python --- .../customizing-library-models-for-python.rst | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst index a244bd00f547..cba9226ccb0f 100644 --- a/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst @@ -427,7 +427,7 @@ Kinds Source kinds ~~~~~~~~~~~~ -- **remote**: A generic source of remote flow. Most taint-tracking queries will use such a source. Currently this is the only supported source kind. +See documentation below for :ref:`Threat models `. Sink kinds ~~~~~~~~~~ @@ -449,3 +449,8 @@ Summary kinds - **taint**: A summary that propagates taint. This means the output is not necessarily equal to the input, but it was derived from the input in an unrestrictive way. An attacker who controls the input will have significant control over the output as well. - **value**: A summary that preserves the value of the input or creates a copy of the input such that all of its object properties are preserved. + +Threat models +------------- + +.. include:: ../reusables/threat-model-description.rst From e11bfc27bd20271c1b1c69f7b5ff0e23040b86a4 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Tue, 10 Sep 2024 16:53:52 +0200 Subject: [PATCH 20/21] Docs: Fix link --- .../customizing-library-models-for-python.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst b/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst index cba9226ccb0f..30888f7b6092 100644 --- a/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst +++ b/docs/codeql/codeql-language-guides/customizing-library-models-for-python.rst @@ -427,7 +427,7 @@ Kinds Source kinds ~~~~~~~~~~~~ -See documentation below for :ref:`Threat models `. +See documentation below for :ref:`Threat models `. Sink kinds ~~~~~~~~~~ @@ -450,6 +450,8 @@ Summary kinds - **taint**: A summary that propagates taint. This means the output is not necessarily equal to the input, but it was derived from the input in an unrestrictive way. An attacker who controls the input will have significant control over the output as well. - **value**: A summary that preserves the value of the input or creates a copy of the input such that all of its object properties are preserved. +.. _threat-models-python: + Threat models ------------- From 535db988239430f8fa6d9209117351d8ea2686a6 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Mon, 23 Sep 2024 11:21:55 +0200 Subject: [PATCH 21/21] Python: Minor simplification of `ActiveThreatModelSource` Co-authored-by: Taus --- python/ql/lib/semmle/python/Concepts.qll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/ql/lib/semmle/python/Concepts.qll b/python/ql/lib/semmle/python/Concepts.qll index a5a7b8f5b9ac..50506439dfba 100644 --- a/python/ql/lib/semmle/python/Concepts.qll +++ b/python/ql/lib/semmle/python/Concepts.qll @@ -58,11 +58,11 @@ module ThreatModelSource { /** * A data flow source that is enabled in the current threat model configuration. */ -class ActiveThreatModelSource extends DataFlow::Node { +class ActiveThreatModelSource extends ThreatModelSource { ActiveThreatModelSource() { exists(string kind | currentThreatModel(kind) and - this.(ThreatModelSource).getThreatModel() = kind + this.getThreatModel() = kind ) } }