diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll index 581e85182a81..cab164846e2d 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll @@ -13,6 +13,7 @@ private import semmle.code.csharp.Unification private import semmle.code.csharp.controlflow.Guards private import semmle.code.csharp.dispatch.Dispatch private import semmle.code.csharp.frameworks.EntityFramework +private import semmle.code.csharp.frameworks.system.linq.Expressions private import semmle.code.csharp.frameworks.NHibernate private import semmle.code.csharp.frameworks.Razor private import semmle.code.csharp.frameworks.system.Collections @@ -1146,7 +1147,11 @@ private module Cached { TPrimaryConstructorParameterContent(Parameter p) { p.getCallable() instanceof PrimaryConstructor } or - TCapturedVariableContent(VariableCapture::CapturedVariable v) + TCapturedVariableContent(VariableCapture::CapturedVariable v) or + TDelegateCallArgumentContent(int i) { + i = [0 .. max(any(DelegateLikeCall dc).getNumberOfArguments()) - 1] + } or + TDelegateCallReturnContent() cached newtype TContentSet = @@ -1162,7 +1167,9 @@ private module Cached { TPrimaryConstructorParameterApproxContent(string firstChar) { firstChar = approximatePrimaryConstructorParameterContent(_) } or - TCapturedVariableContentApprox(VariableCapture::CapturedVariable v) + TCapturedVariableContentApprox(VariableCapture::CapturedVariable v) or + TDelegateCallArgumentApproxContent() or + TDelegateCallReturnApproxContent() pragma[nomagic] private predicate commonSubTypeGeneral(DataFlowTypeOrUnifiable t1, RelevantGvnType t2) { @@ -2273,6 +2280,21 @@ private predicate recordProperty(RecordType t, ContentSet c, string name) { ) } +/** + * Holds if data can flow from `node1` to `node2` via an assignment to + * the content set `c` of a delegate call. + * + * If there is a delegate call f(x), then we store "x" on "f" + * using a delegate argument content set. + */ +private predicate storeStepDelegateCall(ExplicitArgumentNode node1, ContentSet c, Node node2) { + exists(ExplicitDelegateLikeDataFlowCall call, int i | + node1.argumentOf(call, TPositionalArgumentPosition(i)) and + lambdaCall(call, _, node2.(PostUpdateNode).getPreUpdateNode()) and + c.isDelegateCallArgument(i) + ) +} + /** * Holds if data can flow from `node1` to `node2` via an assignment to * content `c`. @@ -2305,6 +2327,8 @@ predicate storeStep(Node node1, ContentSet c, Node node2) { or FlowSummaryImpl::Private::Steps::summaryStoreStep(node1.(FlowSummaryNode).getSummaryNode(), c, node2.(FlowSummaryNode).getSummaryNode()) + or + storeStepDelegateCall(node1, c, node2) } private class ReadStepConfiguration extends ControlFlowReachabilityConfiguration { @@ -2425,6 +2449,21 @@ private predicate readContentStep(Node node1, Content c, Node node2) { VariableCapture::readStep(node1, c, node2) } +/** + * Holds if data can flow from `node1` to `node2` via an assignment to + * the content set `c` of a delegate call. + * + * If there is a delegate call f(x), then we read the return of the delegate + * call. + */ +private predicate readStepDelegateCall(Node node1, ContentSet c, OutNode node2) { + exists(ExplicitDelegateLikeDataFlowCall call | + lambdaCall(call, _, node1) and + node2.getCall(TNormalReturnKind()) = call and + c.isDelegateCallReturn() + ) +} + /** * Holds if data can flow from `node1` to `node2` via a read of content `c`. */ @@ -2443,6 +2482,8 @@ predicate readStep(Node node1, ContentSet c, Node node2) { or FlowSummaryImpl::Private::Steps::summaryReadStep(node1.(FlowSummaryNode).getSummaryNode(), c, node2.(FlowSummaryNode).getSummaryNode()) + or + readStepDelegateCall(node1, c, node2) } private predicate clearsCont(Node n, Content c) { @@ -3037,6 +3078,12 @@ class ContentApprox extends TContentApprox { exists(VariableCapture::CapturedVariable v | this = TCapturedVariableContentApprox(v) and result = "captured " + v ) + or + this = TDelegateCallArgumentApproxContent() and + result = "approximated delegate call argument" + or + this = TDelegateCallReturnApproxContent() and + result = "approximated delegate call return" } } @@ -3073,6 +3120,12 @@ ContentApprox getContentApprox(Content c) { TPrimaryConstructorParameterApproxContent(approximatePrimaryConstructorParameterContent(c)) or result = TCapturedVariableContentApprox(VariableCapture::getCapturedVariableContent(c)) + or + c instanceof DelegateCallArgumentContent and + result = TDelegateCallArgumentApproxContent() + or + c instanceof DelegateCallReturnContent and + result = TDelegateCallReturnApproxContent() } /** diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll index 6717f8659eab..c5460652746b 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPublic.qll @@ -3,6 +3,7 @@ private import DataFlowDispatch private import DataFlowPrivate private import semmle.code.csharp.controlflow.Guards private import semmle.code.csharp.Unification +private import semmle.code.csharp.frameworks.system.linq.Expressions /** * An element, viewed as a node in a data flow graph. Either an expression @@ -238,6 +239,30 @@ class PropertyContent extends Content, TPropertyContent { override Location getLocation() { result = p.getLocation() } } +/** + * A reference to the index of an argument of a delegate call. + */ +class DelegateCallArgumentContent extends Content, TDelegateCallArgumentContent { + private int i; + + DelegateCallArgumentContent() { this = TDelegateCallArgumentContent(i) } + + override string toString() { result = "delegate argument at position " + i } + + override Location getLocation() { result instanceof EmptyLocation } +} + +/** + * A reference to the return of a delegate call. + */ +class DelegateCallReturnContent extends Content, TDelegateCallReturnContent { + DelegateCallReturnContent() { this = TDelegateCallReturnContent() } + + override string toString() { result = "delegate return" } + + override Location getLocation() { result instanceof EmptyLocation } +} + /** * A reference to a synthetic field corresponding to a * primary constructor parameter. @@ -299,6 +324,16 @@ class ContentSet extends TContentSet { */ predicate isProperty(Property p) { this = TPropertyContentSet(p) } + /** + * Holds if this content set represents the `i`th argument of a delegate call. + */ + predicate isDelegateCallArgument(int i) { this.isSingleton(TDelegateCallArgumentContent(i)) } + + /** + * Holds if this content set represents the return of a delegate call. + */ + predicate isDelegateCallReturn() { this.isSingleton(TDelegateCallReturnContent()) } + /** Holds if this content set represents the field `f`. */ predicate isField(Field f) { this.isSingleton(TFieldContent(f)) } diff --git a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll index fb2bfafbf258..aa456fe2c790 100644 --- a/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/csharp/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -101,7 +101,9 @@ module ModelGeneratorInput implements ModelGeneratorInputSig MyFunction; + // summary=Models;BasicFlow;false;ApplyMyFunction;(System.Object);;Argument[0];Argument[this];taint;df-generated + // summary=Models;BasicFlow;false;ApplyMyFunction;(System.Object);;Argument[this];ReturnValue;taint;df-generated + // No content based flow as MaD doesn't support callback logic in fields and properties. + public object ApplyMyFunction(object o) + { + return MyFunction(o); + } } public class CollectionFlow @@ -497,18 +506,55 @@ public Type M6(Type t) } } -// No models as higher order methods are excluded -// from model generation. +// Methods in this class are "neutral" with respect to the heuristic model generation, but +// the content based model generation is able to produce flow summaries for them. public class HigherOrderParameters { + // neutral=Models;HigherOrderParameters;M1;(System.String,System.Func);summary;df-generated + // contentbased-summary=Models;HigherOrderParameters;false;M1;(System.String,System.Func);;Argument[0];ReturnValue;value;dfc-generated public string M1(string s, Func map) { return s; } - public object M2(Func map, object o) + // neutral=Models;HigherOrderParameters;Apply;(System.Func,System.Object);summary;df-generated + // contentbased-summary=Models;HigherOrderParameters;false;Apply;(System.Func,System.Object);;Argument[1];Argument[0].Parameter[0];value;dfc-generated + // contentbased-summary=Models;HigherOrderParameters;false;Apply;(System.Func,System.Object);;Argument[0].ReturnValue;ReturnValue;value;dfc-generated + public object Apply(Func f, object o) + { + return f(o); + } + + // neutral=Models;HigherOrderParameters;Apply2;(System.Object,System.Func);summary;df-generated + // contentbased-summary=Models;HigherOrderParameters;false;Apply2;(System.Object,System.Func);;Argument[0];Argument[1].Parameter[1];value;dfc-generated + // contentbased-summary=Models;HigherOrderParameters;false;Apply2;(System.Object,System.Func);;Argument[1].ReturnValue;ReturnValue;value;dfc-generated + public object Apply2(object o, Func f) { - return map(o); + var x = f(null, o); + return x; + } + + // neutral=Models;HigherOrderParameters;Apply;(System.Action,System.Object);summary;df-generated + // contentbased-summary=Models;HigherOrderParameters;false;Apply;(System.Action,System.Object);;Argument[1];Argument[0].Parameter[0];value;dfc-generated + public void Apply(Action a, object o) + { + a(o); + } +} + +public static class HigherOrderExtensionMethods +{ + // neutral=Models;HigherOrderExtensionMethods;Select;(System.Collections.Generic.IEnumerable,System.Func);summary;df-generated + // contentbased-summary=Models;HigherOrderExtensionMethods;false;Select;(System.Collections.Generic.IEnumerable,System.Func);;Argument[0].Element;Argument[1].Parameter[0];value;dfc-generated + // contentbased-summary=Models;HigherOrderExtensionMethods;false;Select;(System.Collections.Generic.IEnumerable,System.Func);;Argument[1].ReturnValue;ReturnValue.Element;value;dfc-generated + public static IEnumerable Select( + this IEnumerable source, + Func selector) + { + foreach (var item in source) + { + yield return selector(item); + } } } diff --git a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll index 6239b535c986..3e8859be9326 100644 --- a/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll +++ b/java/ql/src/utils/modelgenerator/internal/CaptureModels.qll @@ -88,6 +88,8 @@ module ModelGeneratorInput implements ModelGeneratorInputSig */ predicate isField(Lang::ContentSet c); + /** + * Holds if the content set `c` is callback like. + */ + predicate isCallback(Lang::ContentSet c); + /** * Gets the MaD synthetic name string representation for the content set `c`, if any. */ @@ -223,6 +228,15 @@ signature module ModelGeneratorInputSig */ predicate isUninterestingForDataFlowModels(Callable api); + /** + * Holds if it is irrelevant to generate models for `api` based on the heuristic + * (non-content) flow analysis. + * + * This serves as an extra filter for the `relevant` + * and `isUninterestingForDataFlowModels` predicates. + */ + predicate isUninterestingForHeuristicDataFlowModels(Callable api); + /** * Holds if `namespace`, `type`, `extensible`, `name` and `parameters` are string representations * for the corresponding MaD columns for `api`. @@ -300,7 +314,7 @@ module MakeModelGenerator< } } - string getOutput(ReturnNodeExt node) { + private string getOutput(ReturnNodeExt node) { result = PrintReturnNodeExt::getOutput(node) } @@ -432,7 +446,11 @@ module MakeModelGenerator< predicate isSource(DataFlow::Node source, FlowState state) { source instanceof DataFlow::ParameterNode and - source.(NodeExtended).getEnclosingCallable() instanceof DataFlowSummaryTargetApi and + exists(Callable c | + c = source.(NodeExtended).getEnclosingCallable() and + c instanceof DataFlowSummaryTargetApi and + not isUninterestingForHeuristicDataFlowModels(c) + ) and state.(TaintRead).getStep() = 0 } @@ -605,6 +623,24 @@ module MakeModelGenerator< isField(ap.getAtIndex(_)) } + /** + * Holds if this access path `ap` mentions a callback. + */ + private predicate mentionsCallback(PropagateContentFlow::AccessPath ap) { + isCallback(ap.getAtIndex(_)) + } + + /** + * Holds if the access path `ap` is not a parameter or returnvalue of a callback + * stored in a field. + * + * That is, we currently don't include summaries that rely on parameters or return values + * of callbacks stored in fields. + */ + private predicate validateAccessPath(PropagateContentFlow::AccessPath ap) { + not (mentionsField(ap) and mentionsCallback(ap)) + } + private predicate apiFlow( DataFlowSummaryTargetApi api, DataFlow::ParameterNode p, PropagateContentFlow::AccessPath reads, ReturnNodeExt returnNodeExt, @@ -846,6 +882,8 @@ module MakeModelGenerator< input = parameterNodeAsContentInput(p) + printReadAccessPath(reads) and output = getContentOutput(returnNodeExt) + printStoreAccessPath(stores) and input != output and + validateAccessPath(reads) and + validateAccessPath(stores) and ( if mentionsField(reads) or mentionsField(stores) then lift = false and api.isRelevant()