From 12e11b2b3afca1b70cda5ba4210ceb873a554dc6 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Wed, 3 Jul 2024 11:35:24 +0200 Subject: [PATCH] SSA: Add data-flow integration layer --- shared/ssa/codeql/ssa/Ssa.qll | 504 ++++++++++++++++++++++++++++++++++ 1 file changed, 504 insertions(+) diff --git a/shared/ssa/codeql/ssa/Ssa.qll b/shared/ssa/codeql/ssa/Ssa.qll index 4e61d61efa717..92c2dfc237a7a 100644 --- a/shared/ssa/codeql/ssa/Ssa.qll +++ b/shared/ssa/codeql/ssa/Ssa.qll @@ -1163,4 +1163,508 @@ module Make Input> { ) } } + + /** Provides the input to `DataFlowIntegration`. */ + signature module DataFlowIntegrationInputSig { + /** + * An expression with a value. That is, we expect these expressions to be + * represented in the data flow graph. + */ + class Expr { + /** Gets a textual representation of this expression. */ + string toString(); + + /** Holds if the `i`th node of basic block `bb` evaluates this expression. */ + predicate hasCfgNode(BasicBlock bb, int i); + } + + /** Holds is SSA definition `def` assigns `value` to the underlying variable. */ + predicate ssaDefAssigns(WriteDefinition def, Expr value); + + /** A parameter. */ + class Parameter { + /** Gets a textual representation of this parameter. */ + string toString(); + + /** Gets the location of this parameter. */ + Location getLocation(); + } + + /** Holds if SSA definition `def` initializes parameter `p` at function entry. */ + predicate ssaDefInitializesParam(WriteDefinition def, Parameter p); + + /** + * Holds if flow should be allowed into uncertain SSA definition `def` from + * previous definitions or reads. + */ + default predicate allowFlowIntoUncertainDef(UncertainWriteDefinition def) { none() } + } + + /** + * Constructs the type `Node` and associated value step relation, which are + * intended to be included in the `DataFlow::Node` type and local step relation. + * + * Additionally, this module also provides a barrier guards implementation. + */ + module DataFlowIntegration { + private import codeql.util.Boolean + + final private class DefinitionExtFinal = DefinitionExt; + + pragma[nomagic] + private predicate adjacentDefReachesReadExt( + DefinitionExt def, SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2 + ) { + adjacentDefReadExt(def, v, bb1, i1, bb2, i2) and + ( + def.definesAt(v, bb1, i1, _) + or + variableRead(bb1, i1, v, true) + ) + or + exists(BasicBlock bb3, int i3 | + adjacentDefReachesReadExt(def, v, bb1, i1, bb3, i3) and + variableRead(bb3, i3, v, false) and + adjacentDefReadExt(def, v, bb3, i3, bb2, i2) + ) + } + + pragma[nomagic] + private predicate adjacentDefReachesUncertainReadExt( + DefinitionExt def, SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2 + ) { + adjacentDefReachesReadExt(def, v, bb1, i1, bb2, i2) and + variableRead(bb2, i2, v, false) + } + + /** + * Holds if the reference to `def` at index `i` in basic block `bb` can reach + * another definition `next` of the same underlying source variable, without + * passing through another write or non-pseudo read. + * + * The reference is either a read of `def` or `def` itself. + */ + pragma[nomagic] + private predicate lastRefBeforeRedefExt( + DefinitionExt def, SourceVariable v, BasicBlock bb, int i, BasicBlock input, + DefinitionExt next + ) { + lastRefRedefExt(def, v, bb, i, input, next) and + not variableRead(bb, i, v, false) + or + exists(BasicBlock bb0, int i0 | + lastRefRedefExt(def, v, bb0, i0, input, next) and + adjacentDefReachesUncertainReadExt(def, v, bb, i, bb0, i0) + ) + } + + /** Same as `adjacentDefReadExt`, but skips uncertain reads. */ + pragma[nomagic] + private predicate adjacentDefSkipUncertainReadsExt( + DefinitionExt def, SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2 + ) { + adjacentDefReachesReadExt(def, v, bb1, i1, bb2, i2) and + variableRead(bb2, i2, v, true) + } + + pragma[nomagic] + private predicate adjacentReadPairExt(DefinitionExt def, ReadNode read1, ReadNode read2) { + exists(SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2 | + read1.readsAt(bb1, i1, v) and + adjacentDefSkipUncertainReadsExt(def, v, bb1, i1, bb2, i2) and + read2.readsAt(bb2, i2, v) + ) + } + + /** An SSA definition into which another SSA definition may flow. */ + private class SsaInputDefinitionExt extends DefinitionExtFinal { + SsaInputDefinitionExt() { + this instanceof PhiNode + or + this instanceof PhiReadNode + or + DfInput::allowFlowIntoUncertainDef(this) + } + + /** Holds if `def` may flow into this definition via basic block `input`. */ + predicate hasInputFromBlock( + DefinitionExt def, SourceVariable v, BasicBlock bb, int i, BasicBlock input + ) { + lastRefBeforeRedefExt(def, v, bb, i, input, this) + } + } + + private newtype TNode = + TParamNode(DfInput::Parameter p) or + TExprNode(DfInput::Expr e, Boolean isPost) { + exists(BasicBlock bb, int i | + variableRead(bb, i, _, true) and + e.hasCfgNode(bb, i) + ) + or + isPost = false + } or + TSsaDefinitionNode(DefinitionExt def) or + TSsaInputNode(SsaInputDefinitionExt def, BasicBlock input) { + def.hasInputFromBlock(_, _, _, _, input) + } + + /** + * A data flow node that we need to reference in the value step relation. + * + * Note that only the `SsaNode` subclass is expected to be added as additional + * nodes in `DataFlow::Node`. The other subclasses are expected to already be + * present and are included here in order to reference them in the step relation. + */ + abstract private class NodeImpl extends TNode { + /** Gets the location of this node. */ + abstract Location getLocation(); + + /** Gets a textual representation of this node. */ + abstract string toString(); + } + + final class Node = NodeImpl; + + /** A parameter node. */ + private class ParameterNodeImpl extends NodeImpl, TParamNode { + private DfInput::Parameter p; + + ParameterNodeImpl() { this = TParamNode(p) } + + /** Gets the underlying parameter. */ + DfInput::Parameter getParameter() { result = p } + + override string toString() { result = p.toString() } + + override Location getLocation() { result = p.getLocation() } + } + + final class ParameterNode = ParameterNodeImpl; + + /** A (post-update) expression node. */ + abstract private class ExprNodePreOrPostImpl extends NodeImpl, TExprNode { + DfInput::Expr e; + boolean isPost; + + ExprNodePreOrPostImpl() { this = TExprNode(e, isPost) } + + /** Gets the underlying expression. */ + DfInput::Expr getExpr() { result = e } + + override Location getLocation() { + exists(BasicBlock bb, int i | + e.hasCfgNode(bb, i) and + result = bb.getNode(i).getLocation() + ) + } + } + + final class ExprNodePreOrPost = ExprNodePreOrPostImpl; + + /** An expression node. */ + private class ExprNodeImpl extends ExprNodePreOrPostImpl { + ExprNodeImpl() { isPost = false } + + override string toString() { result = e.toString() } + } + + final class ExprNode = ExprNodeImpl; + + /** A post-update expression node. */ + private class ExprPostUpdateNodeImpl extends ExprNodePreOrPostImpl { + ExprPostUpdateNodeImpl() { isPost = true } + + /** Gets the pre-update expression node. */ + ExprNode getPreUpdateNode() { result = TExprNode(e, false) } + + override string toString() { result = e.toString() + " [postupdate]" } + } + + final class ExprPostUpdateNode = ExprPostUpdateNodeImpl; + + private class ReadNodeImpl extends ExprNodeImpl { + private BasicBlock bb_; + private int i_; + private SourceVariable v_; + + ReadNodeImpl() { + variableRead(bb_, i_, v_, true) and + this.getExpr().hasCfgNode(bb_, i_) + } + + SourceVariable getVariable() { result = v_ } + + pragma[nomagic] + predicate readsAt(BasicBlock bb, int i, SourceVariable v) { + bb = bb_ and + i = i_ and + v = v_ + } + } + + final private class ReadNode = ReadNodeImpl; + + /** A synthesized SSA data flow node. */ + abstract private class SsaNodeImpl extends NodeImpl { + /** Gets the underlying SSA definition. */ + abstract DefinitionExt getDefinitionExt(); + } + + final class SsaNode = SsaNodeImpl; + + /** An SSA definition, viewed as a node in a data flow graph. */ + private class SsaDefinitionExtNodeImpl extends SsaNodeImpl, TSsaDefinitionNode { + private DefinitionExt def; + + SsaDefinitionExtNodeImpl() { this = TSsaDefinitionNode(def) } + + override DefinitionExt getDefinitionExt() { result = def } + + override Location getLocation() { result = def.getLocation() } + + override string toString() { result = def.toString() } + } + + final class SsaDefinitionExtNode = SsaDefinitionExtNodeImpl; + + /** + * A node that represents an input to an SSA phi (read) definition. + * + * This allows for barrier guards to filter input to phi nodes. For example, in + * + * ```rb + * x = taint + * if x != "safe" then + * x = "safe" + * end + * sink x + * ``` + * + * the `false` edge out of `x != "safe"` guards the input from `x = taint` into the + * `phi` node after the condition. + * + * It is also relevant to filter input into phi read nodes: + * + * ```rb + * x = taint + * if b then + * if x != "safe1" then + * return + * end + * else + * if x != "safe2" then + * return + * end + * end + * + * sink x + * ``` + * + * both inputs into the phi read node after the outer condition are guarded. + */ + private class SsaInputNodeImpl extends SsaNodeImpl, TSsaInputNode { + private SsaInputDefinitionExt def_; + private BasicBlock input_; + + SsaInputNodeImpl() { this = TSsaInputNode(def_, input_) } + + /** Holds if this node represents input into SSA definition `def` via basic block `input`. */ + predicate isInputInto(DefinitionExt def, BasicBlock input) { + def = def_ and + input = input_ + } + + override SsaInputDefinitionExt getDefinitionExt() { result = def_ } + + override Location getLocation() { result = input_.getNode(input_.length() - 1).getLocation() } + + override string toString() { result = "[input] " + def_.toString() } + } + + final class SsaInputNode = SsaInputNodeImpl; + + /** + * Holds if `nodeFrom` is a node for SSA definition `def`, which can input + * node `nodeTo`. + */ + pragma[nomagic] + private predicate inputFromDef( + DefinitionExt def, SsaDefinitionExtNode nodeFrom, SsaInputNode nodeTo + ) { + exists(SourceVariable v, BasicBlock bb, int i, BasicBlock input, SsaInputDefinitionExt next | + next.hasInputFromBlock(def, v, bb, i, input) and + def = nodeFrom.getDefinitionExt() and + def.definesAt(v, bb, i, _) and + nodeTo = TSsaInputNode(next, input) + ) + } + + /** + * Holds if `nodeFrom` is a last read of SSA definition `def`, which + * can reach input node `nodeTo`. + */ + pragma[nomagic] + private predicate inputFromRead(DefinitionExt def, ReadNode nodeFrom, SsaInputNode nodeTo) { + exists(SourceVariable v, BasicBlock bb, int i, BasicBlock input, SsaInputDefinitionExt next | + next.hasInputFromBlock(def, v, bb, i, input) and + nodeFrom.readsAt(bb, i, v) and + nodeTo = TSsaInputNode(next, input) + ) + } + + pragma[nomagic] + private predicate firstReadExt(DefinitionExt def, ReadNode read) { + exists(SourceVariable v, BasicBlock bb1, int i1, BasicBlock bb2, int i2 | + def.definesAt(v, bb1, i1, _) and + adjacentDefSkipUncertainReadsExt(def, v, bb1, i1, bb2, i2) and + read.readsAt(bb2, i2, v) + ) + } + + /** Holds if there is a local flow step from `nodeFrom` to `nodeTo`. */ + predicate localFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo) { + ( + // Flow from assignment into SSA definition + DfInput::ssaDefAssigns(def, nodeFrom.(ExprNode).getExpr()) + or + // Flow from parameter into entry definition + DfInput::ssaDefInitializesParam(def, nodeFrom.(ParameterNode).getParameter()) + ) and + nodeTo.(SsaDefinitionExtNode).getDefinitionExt() = def + or + // Flow from SSA definition to first read + def = nodeFrom.(SsaDefinitionExtNode).getDefinitionExt() and + firstReadExt(def, nodeTo) + or + // Flow from (post-update) read to next read + adjacentReadPairExt(def, [nodeFrom, nodeFrom.(ExprPostUpdateNode).getPreUpdateNode()], nodeTo) and + nodeFrom != nodeTo + or + // Flow into phi (read) SSA definition node from def + inputFromDef(def, nodeFrom, nodeTo) + or + // Flow into phi (read) SSA definition node from (post-update) read + inputFromRead(def, [nodeFrom, nodeFrom.(ExprPostUpdateNode).getPreUpdateNode()], nodeTo) + or + // Flow from input node to def + nodeTo.(SsaDefinitionExtNode).getDefinitionExt() = def and + def = nodeFrom.(SsaInputNode).getDefinitionExt() + } + + pragma[nomagic] + private DfInput::Expr getARead(Definition def) { + exists(SourceVariable v, BasicBlock bb, int i | + ssaDefReachesRead(v, def, bb, i) and + variableRead(bb, i, v, true) and + result.hasCfgNode(bb, i) + ) + } + + /** Holds if the value of `nodeTo` is given by `nodeFrom`. */ + predicate localMustFlowStep(DefinitionExt def, Node nodeFrom, Node nodeTo) { + ( + // Flow from assignment into SSA definition + DfInput::ssaDefAssigns(def, nodeFrom.(ExprNode).getExpr()) + or + // Flow from parameter into entry definition + DfInput::ssaDefInitializesParam(def, nodeFrom.(ParameterNode).getParameter()) + ) and + nodeTo.(SsaDefinitionExtNode).getDefinitionExt() = def + or + // Flow from SSA definition to read + nodeFrom.(SsaDefinitionExtNode).getDefinitionExt() = def and + nodeTo.(ExprNode).getExpr() = getARead(def) + } + + /** Provides the input to `BarrierGuards`. */ + signature module BarrierGuardsInputSig { + /** A (potential) guard. */ + class Guard { + /** Gets a textual representation of this guard. */ + string toString(); + + /** Holds if the `i`th node of basic block `bb` evaluates this guard. */ + predicate hasCfgNode(BasicBlock bb, int i); + } + + /** Holds if the guard `guard` controls block `bb` upon evaluating to `branch`. */ + predicate guardControlsBlock(Guard guard, BasicBlock bb, boolean branch); + + /** Gets an immediate conditional successor of basic block `bb`, if any. */ + BasicBlock getAConditionalBasicBlockSuccessor(BasicBlock bb, boolean branch); + } + + /** Provides an implementatino of the `BarrierGuard` module. */ + module BarrierGuards { + /** + * Holds if the guard `g` validates the expression `e` upon evaluating to `branch`. + * + * The expression `e` is expected to be a syntactic part of the guard `g`. + * For example, the guard `g` might be a call `isSafe(x)` and the expression `e` + * the argument `x`. + */ + signature predicate guardChecksSig(BgInput::Guard g, DfInput::Expr e, boolean branch); + + /** + * Provides a set of barrier nodes for a guard that validates an expression. + * + * This is expected to be used in `isBarrier`/`isSanitizer` definitions + * in data flow and taint tracking. + */ + module BarrierGuard { + pragma[nomagic] + private predicate guardChecksSsaDef(BgInput::Guard g, boolean branch, Definition def) { + guardChecks(g, getARead(def), branch) + } + + pragma[nomagic] + private predicate guardControlsSsaRead( + BgInput::Guard g, boolean branch, Definition def, ExprNode n + ) { + exists(BasicBlock bb, DfInput::Expr e | + e = n.getExpr() and + getARead(def) = e and + BgInput::guardControlsBlock(g, bb, branch) and + e.hasCfgNode(bb, _) + ) + } + + pragma[nomagic] + private predicate guardControlsPhiInput( + BgInput::Guard g, boolean branch, Definition def, BasicBlock input, + SsaInputDefinitionExt phi + ) { + phi.hasInputFromBlock(def, _, _, _, input) and + ( + BgInput::guardControlsBlock(g, input, branch) + or + exists(int last | + last = input.length() - 1 and + g.hasCfgNode(input, last) and + BgInput::getAConditionalBasicBlockSuccessor(input, branch) = phi.getBasicBlock() + ) + ) + } + + /** Gets a node that is safely guarded by the given guard check. */ + pragma[nomagic] + Node getABarrierNode() { + exists(BgInput::Guard g, boolean branch, Definition def | + guardChecksSsaDef(g, branch, def) and + guardControlsSsaRead(g, branch, def, result) + ) + or + exists( + BgInput::Guard g, boolean branch, Definition def, BasicBlock input, + SsaInputDefinitionExt phi + | + guardChecksSsaDef(g, branch, def) and + guardControlsPhiInput(g, branch, def, input, phi) and + result.(SsaInputNode).isInputInto(phi, input) + ) + } + } + } + } }