github · owen-mc · Nov 10, 2023 · Nov 28, 2023 · Nov 28, 2023 · Nov 28, 2023
@@ -166,6 +166,13 @@ class SsaDefinition extends TSsaDefinition {
   ) {
     this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
   }
+
+  /**
+   * Gets the first instruction that the value of this `SsaDefinition` can
+   * reach without passing through any other instructions, but possibly through
+   * phi nodes.
+   */
+  IR::Instruction getAFirstUse() { firstUse(this, result) }
 }
 
 /**
@@ -410,3 +417,12 @@ DataFlow::Node getASimilarReadNode(DataFlow::Node node) {
     result = readFields.similar().getAUse()
   )
 }
+
+/**
+ * Gets an instruction such that  `pred` and `result` form an adjacent
+ * use-use-pair of the same`SsaSourceVariable`, that is, the value read in
+ * `pred` can reach `result` without passing through any other use or any SSA
+ * definition of the variable except for phi nodes and uncertain implicit
+ * updates.
+ */
+IR::Instruction getAnAdjacentUse(IR::Instruction pred) { adjacentUseUse(pred, result) }
@@ -199,6 +199,8 @@
   /**
    * Holds if the `i`th node of `bb` is a use or an SSA definition of variable `v`, with
    * `k` indicating whether it is the former or the latter.
+   *
+   * Note this includes phi nodes, whereas `ref` above only includes explicit writes and captures.
    */
   private predicate ssaRef(ReachableBasicBlock bb, int i, SsaSourceVariable v, RefKind k) {
     useAt(bb, i, v) and k = ReadRef()
@@ -290,6 +292,174 @@
     or
     rewindReads(bb, i, v) = 1 and result = getDefReachingEndOf(bb.getImmediateDominator(), v)
   }
+
+  private module AdjacentUsesImpl {
+    /** Holds if `v` is defined or used in `b`. */
+    private predicate varOccursInBlock(SsaSourceVariable v, ReachableBasicBlock b) {
+      ssaRef(b, _, v, _)
+    }
+
+    /** Holds if `v` occurs in `b` or one of `b`'s transitive successors. */
+    private predicate blockPrecedesVar(SsaSourceVariable v, ReachableBasicBlock b) {
+      varOccursInBlock(v, b)
+      or
+      exists(getDefReachingEndOf(b, v))
+    }
+
+    /**
+     * Holds if `v` occurs in `b1` and `b2` is one of `b1`'s successors.
+     *
+     * Factored out of `varBlockReaches` to force join order compared to the larger
+     * set `blockPrecedesVar(v, b2)`.
+     */
+    pragma[noinline]
+    private predicate varBlockReachesBaseCand(
+      SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
+    ) {
+      varOccursInBlock(v, b1) and
+      b2 = b1.getASuccessor()
+    }
+
+    /**
+     * Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
+     * in `b2` or one of its transitive successors but not in any block on the path
+     * between `b1` and `b2`. Unlike `varBlockReaches` this may include blocks `b2`
+     * where `v` is dead.
+     *
+     * Factored out of `varBlockReaches` to force join order compared to the larger
+     * set `blockPrecedesVar(v, b2)`.
+     */
+    pragma[noinline]
+    private predicate varBlockReachesRecCand(
+      SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock mid, ReachableBasicBlock b2
+    ) {
+      varBlockReaches(v, b1, mid) and
+      not varOccursInBlock(v, mid) and
+      b2 = mid.getASuccessor()
+    }
+
+    /**
+     * Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
+     * in `b2` or one of its transitive successors but not in any block on the path
+     * between `b1` and `b2`.
+     */
+    private predicate varBlockReaches(
+      SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
+    ) {
+      varBlockReachesBaseCand(v, b1, b2) and
+      blockPrecedesVar(v, b2)
+      or
+      exists(ReachableBasicBlock mid |
+        varBlockReachesRecCand(v, b1, mid, b2) and
+        blockPrecedesVar(v, b2)
+      )
+    }
+
+    /**
+     * Holds if `b2` is a transitive successor of `b1` and `v` occurs in `b1` and
+     * `b2` but not in any block on the path between `b1` and `b2`.
+     */
+    private predicate varBlockStep(
+      SsaSourceVariable v, ReachableBasicBlock b1, ReachableBasicBlock b2
+    ) {
+      varBlockReaches(v, b1, b2) and
+      varOccursInBlock(v, b2)
+    }
+
+    /**
+     * Gets the maximum rank among all SSA references to `v` in basic block `bb`.
+     */
+    private int maxSsaRefRank(ReachableBasicBlock bb, SsaSourceVariable v) {
+      result = max(ssaRefRank(bb, _, v, _))
+    }
+
+    /**
+     * Holds if `v` occurs at index `i1` in `b1` and at index `i2` in `b2` and
+     * there is a path between them without any occurrence of `v`.
+     */
+    pragma[nomagic]
+    predicate adjacentVarRefs(
+      SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2
+    ) {
+      exists(int rankix |
+        b1 = b2 and
+        ssaRefRank(b1, i1, v, _) = rankix and
+        ssaRefRank(b2, i2, v, _) = rankix + 1
+      )
+      or
+      maxSsaRefRank(b1, v) = ssaRefRank(b1, i1, v, _) and
+      varBlockStep(v, b1, b2) and
+      ssaRefRank(b2, i2, v, _) = 1
+    }
+
+    predicate variableUse(SsaSourceVariable v, IR::Instruction use, ReachableBasicBlock bb, int i) {
+      bb.getNode(i) = use and
+      exists(SsaVariable sv |
+        sv.getSourceVariable() = v and
+        use = sv.getAUse()
+      )
+    }
+  }
+
+  private import AdjacentUsesImpl
+
+  /**
+   * Holds if the value defined at `def` can reach `use` without passing through
+   * any other uses, but possibly through phi nodes.
+   */
+  cached
+  predicate firstUse(SsaDefinition def, IR::Instruction use) {
+    exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
+      adjacentVarRefs(v, b1, i1, b2, i2) and
+      def.definesAt(b1, i1, v) and
+      variableUse(v, use, b2, i2)
+    )
+    or
+    exists(
+      SsaSourceVariable v, SsaPhiNode redef, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
+      int i2
+    |
+      adjacentVarRefs(v, b1, i1, b2, i2) and
+      def.definesAt(b1, i1, v) and
+      redef.definesAt(b2, i2, v) and
+      firstUse(redef, use)
+    )
+  }
+
+  /**
+   * Holds if `use1` and `use2` form an adjacent use-use-pair of the same SSA
+   * variable, that is, the value read in `use1` can reach `use2` without passing
+   * through any other use or any SSA definition of the variable.
+   */
+  cached
+  predicate adjacentUseUseSameVar(IR::Instruction use1, IR::Instruction use2) {
+    exists(SsaSourceVariable v, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2, int i2 |
+      adjacentVarRefs(v, b1, i1, b2, i2) and
+      variableUse(v, use1, b1, i1) and
+      variableUse(v, use2, b2, i2)
+    )
+  }
+
+  /**
+   * Holds if `use1` and `use2` form an adjacent use-use-pair of the same
+   * `SsaSourceVariable`, that is, the value read in `use1` can reach `use2`
+   * without passing through any other use or any SSA definition of the variable
+   * except for phi nodes and uncertain implicit updates.
+   */
+  cached
+  predicate adjacentUseUse(IR::Instruction use1, IR::Instruction use2) {
+    adjacentUseUseSameVar(use1, use2)
+    or
+    exists(
+      SsaSourceVariable v, SsaPhiNode def, ReachableBasicBlock b1, int i1, ReachableBasicBlock b2,
+      int i2
+    |
+      adjacentVarRefs(v, b1, i1, b2, i2) and
+      variableUse(v, use1, b1, i1) and
+      def.definesAt(b2, i2, v) and
+      firstUse(def, use2)
+    )
+  }
 }
 
 import Internal
@@ -65,23 +65,34 @@ predicate basicLocalFlowStep(Node nodeFrom, Node nodeTo) {
     else nodeTo.asInstruction() = evalAssert
   )
   or
-  // Instruction -> SSA
+  // Instruction -> SSA defn
   exists(IR::Instruction pred, SsaExplicitDefinition succ |
     succ.getRhs() = pred and
     nodeFrom = instructionNode(pred) and
-    nodeTo = ssaNode(succ)
+    nodeTo = ssaNode(succ.getVariable())
   )
   or
-  // SSA -> SSA
-  exists(SsaDefinition pred, SsaPseudoDefinition succ | succ.getAnInput() = pred |
-    nodeFrom = ssaNode(pred) and
-    nodeTo = ssaNode(succ)
+  // SSA defn -> SSA capture
+  exists(SsaExplicitDefinition pred, SsaVariableCapture succ |
+    // Check: should these flow from PHIs as well? Perhaps they should be included
+    // in the use-use graph?
+    succ.getSourceVariable() = pred.getSourceVariable()
+  |
+    nodeFrom = ssaNode(pred.getVariable()) and
+    nodeTo = ssaNode(succ.getVariable())
   )
   or
-  // SSA -> Instruction
-  exists(SsaDefinition pred, IR::Instruction succ |
-    succ = pred.getVariable().getAUse() and
-    nodeFrom = ssaNode(pred) and
+  // SSA defn -> first SSA use
+  exists(SsaDefinition pred, IR::Instruction succ | succ = pred.getAFirstUse() |
+    (pred instanceof SsaExplicitDefinition or pred instanceof SsaVariableCapture) and
+    nodeFrom = ssaNode(pred.getVariable()) and
+    nodeTo = instructionNode(succ)
+  )
+  or
+  // SSA use -> successive SSA use
+  // Note this case includes Phi node traversal
+  exists(IR::Instruction pred, IR::Instruction succ | succ = getAnAdjacentUse(pred) |
+    nodeFrom = instructionNode(pred) and
     nodeTo = instructionNode(succ)
   )
   or

@@ -32,7 +32,10 @@ module AllocationSizeOverflow {
   /**
    * A data-flow node that is an operand to an operation that may overflow.
    */
-  abstract class OverflowProneOperand extends DataFlow::Node { }
+  abstract class OverflowProneOperand extends DataFlow::Node {
+    /** Gets the operation that may overflow that `this` is an operand of. */
+    abstract DataFlow::Node getOverflowProneOperation();
+  }
 
   /**
    * A data-flow node that represents the size argument of an allocation, such as the `n` in
@@ -91,8 +94,7 @@ module AllocationSizeOverflow {
     AllocationSize allocsz;
 
     DefaultSink() {
-      this instanceof OverflowProneOperand and
-      localStep*(this, allocsz) and
+      localStep*(this.(OverflowProneOperand).getOverflowProneOperation(), allocsz) and
       not allocsz instanceof AllocationSizeCheckBarrier
     }
 
@@ -134,15 +136,18 @@ module AllocationSizeOverflow {
 
   /** An operand of an arithmetic expression that could cause overflow. */
   private class DefaultOverflowProneOperand extends OverflowProneOperand {
+    OperatorExpr parent;
+
     DefaultOverflowProneOperand() {
-      exists(OperatorExpr parent | isOverflowProne(parent) |
-        this.asExpr() = parent.getAnOperand() and
-        // only consider outermost operands to avoid double reporting
-        not exists(OperatorExpr grandparent | parent = grandparent.getAnOperand().stripParens() |
-          isOverflowProne(grandparent)
-        )
+      isOverflowProne(parent) and
+      this.asExpr() = parent.getAnOperand() and
+      // only consider outermost operands to avoid double reporting
+      not exists(OperatorExpr grandparent | parent = grandparent.getAnOperand().stripParens() |
+        isOverflowProne(grandparent)
       )
     }
+
+    override DataFlow::Node getOverflowProneOperation() { result.asExpr() = parent }
   }
 
   /**

@@ -55,6 +55,8 @@ module CleartextLogging {
       |
         this.asExpr().(Ident).getName() = name
         or
+        this.(DataFlow::SsaNode).getSourceVariable().getName() = name
+        or
         this.(DataFlow::FieldReadNode).getFieldName() = name
         or
         this.(DataFlow::CallNode).getCalleeName() = name
@@ -143,7 +145,7 @@ module CleartextLogging {
       not this instanceof NonCleartextPassword and
       name.regexpMatch(maybePassword()) and
       (
-        this.asExpr().(Ident).getName() = name
+        this.(DataFlow::SsaNode).getSourceVariable().getName() = name
         or
         exists(DataFlow::FieldReadNode fn |
           fn = this and

@@ -80,6 +80,28 @@ module CommandInjection {
       node instanceof Sanitizer or
       node = any(ArgumentArrayWithDoubleDash array).getASanitizedElement()
     }
+
+    // Hack: with use-use flow, we might have x (use at line 1) -> x (use at line 2),
+    // x (use at line 1) -> array at line 1 and x (use at line 2) -> array at line 2,
+    // in the context
+    //
+    // array1 := {"--", x}
+    // array2 := {x, "--"}
+    //
+    // We want to taint array2 but not array1, which suggests excluding the edge x (use 1) -> array1
+    // However isSanitizer only allows us to remove nodes (isSanitizerIn/Out permit removing all outgoing
+    // or incoming edges); we can't remove an individual edge, so instead we supply extra edges connecting
+    // the definition with the next use.
+    predicate isAdditionalFlowStep(DataFlow::Node pred, DataFlow::Node succ) {
+      exists(
+        ArgumentArrayWithDoubleDash array, DataFlow::InstructionNode sanitized,
+        DataFlow::SsaNode defn
+      |
+        sanitized = array.getASanitizedElement() and sanitized = defn.getAUse()
+      |
+        pred = defn and succ = sanitized.getASuccessor()
+      )
+    }
   }
 
   /**

@@ -290,13 +290,17 @@ private predicate integerTypeBound(IntegerType it, int bitSize, int architecture
  * the type assertion succeeded. If it is not checked then there will be a
  * run-time panic if the type assertion fails, so we can assume it succeeded.
  */
-class TypeAssertionCheck extends DataFlow::ExprNode, FlowStateTransformer {
+class TypeAssertionCheck extends DataFlow::InstructionNode, FlowStateTransformer {
   IntegerType it;
 
   TypeAssertionCheck() {
-    exists(TypeAssertExpr tae |
-      this = DataFlow::exprNode(tae.getExpr()) and
-      it = tae.getTypeExpr().getType().getUnderlyingType()
+    exists(IR::Instruction evalAssert, TypeAssertExpr assert |
+      it = assert.getTypeExpr().getType().getUnderlyingType() and
+      evalAssert = IR::evalExprInstruction(assert)
+    |
+      if exists(IR::extractTupleElement(evalAssert, _))
+      then this.asInstruction() = IR::extractTupleElement(evalAssert, 0)
+      else this.asInstruction() = evalAssert
     )
   }