Skip to content

Commit

Permalink
Merge pull request #17871 from paldepind/rust-data-flow-skeleton
Browse files Browse the repository at this point in the history
Rust: Add basic skeleton setup for data flow
  • Loading branch information
paldepind authored Oct 31, 2024
2 parents c936468 + efa59fd commit b96698a
Show file tree
Hide file tree
Showing 10 changed files with 441 additions and 0 deletions.
36 changes: 36 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/DataFlow.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/**
* Provides a module for performing local (intra-procedural) and global
* (inter-procedural) data flow analyses.
*/

private import rust
private import codeql.dataflow.DataFlow
private import internal.DataFlowImpl as DataFlowImpl
private import DataFlowImpl::Node as Node

/**
* Provides classes for performing local (intra-procedural) and global
* (inter-procedural) data flow analyses.
*/
module DataFlow {
final class Node = Node::Node;

final class ParameterNode = Node::ParameterNode;

final class PostUpdateNode = Node::PostUpdateNode;

/**
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
* (intra-procedural) step.
*/
predicate localFlowStep = DataFlowImpl::localFlowStepImpl/2;

/**
* Holds if data flows from `source` to `sink` in zero or more local
* (intra-procedural) steps.
*/
pragma[inline]
predicate localFlow(Node::Node source, Node::Node sink) { localFlowStep*(source, sink) }

import DataFlowMake<Location, DataFlowImpl::RustDataFlow>
}
17 changes: 17 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/TaintTracking.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Provides the module `TaintTracking` for performing local (intra-procedural)
* and global (inter-procedural) taint-tracking analyses.
*/

private import rust

/**
* Provides a library for performing local (intra-procedural) and global
* (inter-procedural) taint-tracking analyses.
*/
module TaintTracking {
private import codeql.dataflow.TaintTracking
private import internal.DataFlowImpl
private import internal.TaintTrackingImpl
import TaintFlowMake<Location, RustDataFlow, RustTaintTracking>
}
308 changes: 308 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
/**
* Provides Rust-specific definitions for use in the data flow library.
*/

private import codeql.util.Void
private import codeql.util.Unit
private import codeql.dataflow.DataFlow
private import codeql.dataflow.internal.DataFlowImpl
private import rust
private import codeql.rust.controlflow.ControlFlowGraph
private import codeql.rust.dataflow.Ssa

module Node {
/**
* An element, viewed as a node in a data flow graph. Either an expression
* (`ExprNode`) or a parameter (`ParameterNode`).
*/
abstract class Node extends TNode {
/** Gets the location of this node. */
abstract Location getLocation();

/** Gets a textual representation of this node. */
abstract string toString();

/**
* Gets the expression that corresponds to this node, if any.
*/
Expr asExpr() { none() }

/**
* Gets the control flow node that corresponds to this data flow node.
*/
CfgNode getCfgNode() { none() }

/**
* Gets this node's underlying SSA definition, if any.
*/
Ssa::Definition asDefinition() { none() }

/**
* Gets the parameter that corresponds to this node, if any.
*/
Param asParameter() { none() }
}

/** A node type that is not implemented. */
final class NaNode extends Node {
NaNode() { none() }

override string toString() { result = "N/A" }

override Location getLocation() { none() }
}

/**
* The value of a parameter at function entry, viewed as a node in a data
* flow graph.
*/
final class ParameterNode extends Node {
Param param;

ParameterNode() { this = TSourceParameterNode(param) }

override Location getLocation() { result = param.getLocation() }

override string toString() { result = param.toString() }
}

final class ArgumentNode = NaNode;

final class ReturnNode extends NaNode {
RustDataFlow::ReturnKind getKind() { none() }
}

final class OutNode = NaNode;

/**
* A node associated with an object after an operation that might have
* changed its state.
*
* This can be either the argument to a callable after the callable returns
* (which might have mutated the argument), or the qualifier of a field after
* an update to the field.
*
* Nodes corresponding to AST elements, for example `ExprNode`, usually refer
* to the value before the update.
*/
final class PostUpdateNode extends Node::NaNode {
/** Gets the node before the state update. */
Node getPreUpdateNode() { none() }
}

final class CastNode = NaNode;
}

module RustDataFlow implements InputSig<Location> {
/**
* An element, viewed as a node in a data flow graph. Either an expression
* (`ExprNode`) or a parameter (`ParameterNode`).
*/
final class Node = Node::Node;

final class ParameterNode = Node::ParameterNode;

final class ArgumentNode = Node::ArgumentNode;

final class ReturnNode = Node::ReturnNode;

final class OutNode = Node::OutNode;

final class PostUpdateNode = Node::PostUpdateNode;

final class CastNode = Node::NaNode;

predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) { none() }

predicate isArgumentNode(ArgumentNode n, DataFlowCall call, ArgumentPosition pos) { none() }

DataFlowCallable nodeGetEnclosingCallable(Node node) { none() }

DataFlowType getNodeType(Node node) { none() }

predicate nodeIsHidden(Node node) { none() }

class DataFlowExpr = Void;

/** Gets the node corresponding to `e`. */
Node exprNode(DataFlowExpr e) { none() }

final class DataFlowCall extends TNormalCall {
private CallExpr c;

DataFlowCall() { this = TNormalCall(c) }

DataFlowCallable getEnclosingCallable() { none() }

string toString() { result = c.toString() }

Location getLocation() { result = c.getLocation() }
}

final class DataFlowCallable = CfgScope;

final class ReturnKind = Void;

/** Gets a viable implementation of the target of the given `Call`. */
DataFlowCallable viableCallable(DataFlowCall c) { none() }

OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { none() }

final class DataFlowType = Unit;

predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }

predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }

final class Content = Void;

predicate forceHighPrecision(Content c) { none() }

class ContentSet extends TContentSet {
/** Gets a textual representation of this element. */
string toString() { result = "ContentSet" }

/** Gets a content that may be stored into when storing into this set. */
Content getAStoreContent() { none() }

/** Gets a content that may be read from when reading from this set. */
Content getAReadContent() { none() }
}

final class ContentApprox = Void;

ContentApprox getContentApprox(Content c) { any() }

class ParameterPosition extends string {
ParameterPosition() { this = "pos" }
}

class ArgumentPosition extends string {
ArgumentPosition() { this = "pos" }
}

/**
* Holds if the parameter position `ppos` matches the argument position
* `apos`.
*/
predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { none() }

/**
* Holds if there is a simple local flow step from `node1` to `node2`. These
* are the value-preserving intra-callable flow steps.
*/
predicate simpleLocalFlowStep(Node node1, Node node2, string model) { none() }

/**
* Holds if data can flow from `node1` to `node2` through a non-local step
* that does not follow a call edge. For example, a step through a global
* variable.
*/
predicate jumpStep(Node node1, Node node2) { none() }

/**
* Holds if data can flow from `node1` to `node2` via a read of `c`. Thus,
* `node1` references an object with a content `c.getAReadContent()` whose
* value ends up in `node2`.
*/
predicate readStep(Node node1, ContentSet c, Node node2) { none() }

/**
* Holds if data can flow from `node1` to `node2` via a store into `c`. Thus,
* `node2` references an object with a content `c.getAStoreContent()` that
* contains the value of `node1`.
*/
predicate storeStep(Node node1, ContentSet c, Node node2) { none() }

/**
* Holds if values stored inside content `c` are cleared at node `n`. For example,
* any value stored inside `f` is cleared at the pre-update node associated with `x`
* in `x.f = newValue`.
*/
predicate clearsContent(Node n, ContentSet c) { none() }

/**
* Holds if the value that is being tracked is expected to be stored inside content `c`
* at node `n`.
*/
predicate expectsContent(Node n, ContentSet c) { none() }

class NodeRegion instanceof Void {
string toString() { result = "NodeRegion" }

predicate contains(Node n) { none() }
}

/**
* Holds if the nodes in `nr` are unreachable when the call context is `call`.
*/
predicate isUnreachableInCall(NodeRegion nr, DataFlowCall call) { none() }

/**
* Holds if flow is allowed to pass from parameter `p` and back to itself as a
* side-effect, resulting in a summary from `p` to itself.
*
* One example would be to allow flow like `p.foo = p.bar;`, which is disallowed
* by default as a heuristic.
*/
predicate allowParameterReturnInSelf(ParameterNode p) { none() }

/**
* Holds if the value of `node2` is given by `node1`.
*
* This predicate is combined with type information in the following way: If
* the data flow library is able to compute an improved type for `node1` then
* it will also conclude that this type applies to `node2`. Vice versa, if
* `node2` must be visited along a flow path, then any type known for `node2`
* must also apply to `node1`.
*/
predicate localMustFlowStep(Node node1, Node node2) { none() }

class LambdaCallKind = Void;

// class LambdaCallKind;
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() }

/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */
predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() }

/** Extra data-flow steps needed for lambda flow analysis. */
predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() }

predicate knownSourceModel(Node source, string model) { none() }

predicate knownSinkModel(Node sink, string model) { none() }

class DataFlowSecondLevelScope = Void;
}

final class ContentSet = RustDataFlow::ContentSet;

import MakeImpl<Location, RustDataFlow>

/** A collection of cached types and predicates to be evaluated in the same stage. */
cached
private module Cached {
cached
newtype TNode =
TExprNode(CfgNode n, Expr e) { n.getAstNode() = e } or
TSourceParameterNode(Param param)

cached
newtype TDataFlowCall = TNormalCall(CallExpr c)

cached
newtype TOptionalContentSet =
TAnyElementContent() or
TAnyContent()

cached
class TContentSet = TAnyElementContent or TAnyContent;

/** This is the local flow predicate that is exposed. */
cached
predicate localFlowStepImpl(Node::Node nodeFrom, Node::Node nodeTo) { none() }
}

import Cached
20 changes: 20 additions & 0 deletions rust/ql/lib/codeql/rust/dataflow/internal/TaintTrackingImpl.qll
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
private import rust
private import codeql.dataflow.TaintTracking
private import DataFlowImpl

module RustTaintTracking implements InputSig<Location, RustDataFlow> {
predicate defaultTaintSanitizer(Node::Node node) { none() }

/**
* Holds if the additional step from `src` to `sink` should be included in all
* global taint flow configurations.
*/
predicate defaultAdditionalTaintStep(Node::Node src, Node::Node sink, string model) { none() }

/**
* Holds if taint flow configurations should allow implicit reads of `c` at sinks
* and inputs to additional taint steps.
*/
bindingset[node]
predicate defaultImplicitTaintRead(Node::Node node, ContentSet c) { none() }
}
Empty file.
Loading

0 comments on commit b96698a

Please sign in to comment.