github · adityasharad · Dec 22, 2021 · Dec 22, 2021 · Dec 22, 2021 · Dec 22, 2021
@@ -53,7 +53,11 @@ predicate isSomeModeledArgument(DataFlow::Node n) {
 /**
  * Holds if `n` appears to be a numeric value.
  */
-predicate isNumeric(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
+// Performance optimisation: This predicate operates on a large set of
+// starting nodes, so use binding hints to suggest computing that set last.
+predicate isNumeric(DataFlow::Node n) {
+  getAnAccessedName(pragma[only_bind_into](n)).regexpMatch(".*index.*")
+}
 
 /**
  * Holds if `n` is an argument to a library without sinks.

@@ -462,15 +462,15 @@ module AccessPath {
       ReachableBasicBlock bb, Root root, string path, int ranking, AccessPathKind type
     ) {
       result =
-        rank[ranking](ControlFlowNode ref |
+        rank[ranking](ControlFlowNode ref, int i |
           ref = getAccessTo(root, path, _) and
-          ref.getBasicBlock() = bb and
+          ref = bb.getNode(i) and
           // Prunes the accesses where there does not exists a read and write within the same basicblock.
           // This could be more precise, but doing it like this avoids massive joins.
           hasRead(bb) and
           hasWrite(bb)
         |
-          ref order by any(int i | ref = bb.getNode(i))
+          ref order by i
         ) and
       result = getAccessTo(root, path, type)
     }
@@ -492,7 +492,7 @@ module AccessPath {
      */
     pragma[noinline]
     private predicate hasWrite(ReachableBasicBlock bb) {
-      bb = getAccessTo(_, _, AccessPathRead()).getBasicBlock()
+      bb = getAccessTo(_, _, AccessPathWrite()).getBasicBlock()
     }
 
     /**
@@ -565,9 +565,12 @@ module AccessPath {
       )
       or
       // across basic blocks.
-      exists(Root root, string path |
+      exists(Root root, string path, ReachableBasicBlock readBlock |
         read.asExpr() = getAccessTo(root, path, AccessPathRead()) and
-        getAWriteBlock(root, path).strictlyDominates(read.getBasicBlock())
+        readBlock = read.getBasicBlock() and
+        // Performance optimisation: check that `read` is in a *reachable* basic block
+        // before looking for a dominating write block.
+        getAWriteBlock(root, path).strictlyDominates(pragma[only_bind_out](readBlock))
       )
     }
   }

@@ -217,7 +217,7 @@ private class AnalyzedImplicitInit extends AnalyzedSsaDefinition, SsaImplicitIni
  */
 private class AnalyzedVariableCapture extends AnalyzedSsaDefinition, SsaVariableCapture {
   override AbstractValue getAnRhsValue() {
-    exists(LocalVariable v | v = getSourceVariable() |
+    exists(LocalVariable v | v = this.getSourceVariable() |
       result = v.(AnalyzedCapturedVariable).getALocalValue()
       or
       result = any(AnalyzedExplicitDefinition def | def.getSourceVariable() = v).getAnRhsValue()

@@ -56,9 +56,7 @@ predicate isGeneratedCodeFile(File f) { isGenerated(f.getATopLevel()) }
 predicate isTestFile(File f) {
   exists(Test t | t.getFile() = f)
   or
-  exists(string stemExt | stemExt = "test" or stemExt = "spec" |
-    f = getTestFile(any(File orig), stemExt)
-  )
+  f = getATestFile(_)
   or
   f.getAbsolutePath().regexpMatch(".*/__(mocks|tests)__/.*")
 }

@@ -40,14 +40,41 @@ class BDDTest extends Test, @call_expr {
 
 /**
  * Gets the test file for `f` with stem extension `stemExt`.
- * That is, a file named file named `<base>.<stemExt>.<ext>` in the
+ * That is, a file named `<base>.<stemExt>.<ext>` in the
  * same directory as `f` which is named `<base>.<ext>`.
  */
 bindingset[stemExt]
 File getTestFile(File f, string stemExt) {
   result = f.getParentContainer().getFile(f.getStem() + "." + stemExt + "." + f.getExtension())
 }
 
+/**
+ * Gets a test file for `f`.
+ * That is, a file named `<base>.<stemExt>.<ext>` in the
+ * same directory as `f`, where `f` is named `<base>.<ext>` and
+ * `<stemExt>` is a well-known test file identifier, such as `test` or `spec`.
+ */
+File getATestFile(File f) {
+  result = f.getParentContainer().getFile(getATestFileName(f))
+}
+
+/**
+ * Gets a name of a test file for `f`.
+ * That is, `<base>.<stemExt>.<ext>` where
+ * `f` is named `<base>.<ext>` and `<stemExt>` is
+ * a well-known test file identifier, such as `test` or `spec`.
+ */
+// Helper predicate factored out for performance.
+// This predicate is linear in the size of f, and forces
+// callers to join only once against f rather than two separate joins
+// when computing the stem and the extension.
+// This loses some flexibility because callers cannot specify
+// an arbitrary stemExt.
+pragma[nomagic]
+private string getATestFileName(File f) {
+  result = f.getStem() + "." + ["test", "spec"] + "." + f.getExtension()
+}
+
 /**
  * A Jest test, that is, an invocation of a global function named
  * `test` where the first argument is a string and the second

@@ -16,15 +16,23 @@ import javascript
  */
 bindingset[regexp]
 predicate isReadFrom(DataFlow::Node read, string regexp) {
+  getAnAccessedName(read).regexpMatch(regexp)
+}
+
+/**
+ * Gets the "name" accessed by `read`. The "name" is one of:
+ * - the name of the read variable, if `read` is a variable read
+ * - the name of the read property, if `read` is a property read
+ * - the suffix of the getter-method name, if `read` is a getter invocation, for example "Number" in "getNumber"
+ */
+string getAnAccessedName(DataFlow::Node read) {
   exists(DataFlow::Node actualRead |
     actualRead = read.asExpr().getUnderlyingValue().(LogOrExpr).getAnOperand().flow() or // unfold `x || y` once
     actualRead = read
   |
-    exists(string name | name.regexpMatch(regexp) |
-      actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = name or
-      actualRead.(DataFlow::PropRead).getPropertyName() = name or
-      actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + name
-    )
+    actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = result or
+    actualRead.(DataFlow::PropRead).getPropertyName() = result or
+    actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + result
   )
 }
 

@@ -50,8 +50,12 @@ module CorsMisconfigurationForCredentials {
       |
         routeHandler.getAResponseHeader(_) = origin and
         routeHandler.getAResponseHeader(_) = credentials and
-        origin.definesExplicitly("access-control-allow-origin", this.asExpr()) and
-        credentials.definesExplicitly("access-control-allow-credentials", credentialsValue)
+        // Performance optimisation: start with the set of all route handlers
+        // rather than the set of all exprs.
+        pragma[only_bind_into](origin)
+            .definesExplicitly("access-control-allow-origin", this.asExpr()) and
+        pragma[only_bind_into](credentials)
+            .definesExplicitly("access-control-allow-credentials", credentialsValue)
       |
         credentialsValue.mayHaveBooleanValue(true) or
         credentialsValue.mayHaveStringValue("true")

@@ -437,8 +437,27 @@ module DomBasedXss {
         b = phi.getAnInput().getDefinition() and
         count(phi.getAnInput()) = 2 and
         not a = b and
-        sanitizer = DataFlow::valueNode(a.getDef().getSource()) and
-        sanitizer.getAnArgument().asExpr().(VarAccess).getVariable() = b.getSourceVariable()
+        /*
+         * Performance optimisation:
+         *
+         * When join-ordering and evaluating this conjunction,
+         * it is preferable to start with the relatively small set of
+         * `sanitizer` calls, then compute the set of SSA variables accessed
+         * as the arguments of those sanitizer calls, then reason about how
+         * those variables are used in phi nodes.
+         *
+         * Use directional binding pragmas to encourage this join order,
+         * starting with `sanitizer`.
+         *
+         * Without these pragmas, the join orderer may choose the opposite order:
+         * start with all `phi` nodes, then compute the set of SSA variables involved,
+         * then the (potentially large) set of accesses to those variables,
+         * then the set of accesses used as the argument of a sanitizer call.
+         */
+
+        pragma[only_bind_out](sanitizer) = DataFlow::valueNode(a.getDef().getSource()) and
+        pragma[only_bind_out](sanitizer.getAnArgument().asExpr()) =
+          b.getSourceVariable().getAnAccess()
       |
         pred = DataFlow::ssaDefinitionNode(b) and
         succ = DataFlow::ssaDefinitionNode(phi)