Skip to content

JS: Improve performance of ATM queries on large databases #7475

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ predicate isSomeModeledArgument(DataFlow::Node n) {
/**
* Holds if `n` appears to be a numeric value.
*/
predicate isNumeric(DataFlow::Node n) { isReadFrom(n, ".*index.*") }
// Performance optimisation: This predicate operates on a large set of
// starting nodes, so use binding hints to suggest computing that set last.
predicate isNumeric(DataFlow::Node n) {
getAnAccessedName(pragma[only_bind_into](n)).regexpMatch(".*index.*")
}

/**
* Holds if `n` is an argument to a library without sinks.
Expand Down
15 changes: 9 additions & 6 deletions javascript/ql/lib/semmle/javascript/GlobalAccessPaths.qll
Original file line number Diff line number Diff line change
Expand Up @@ -462,15 +462,15 @@ module AccessPath {
ReachableBasicBlock bb, Root root, string path, int ranking, AccessPathKind type
) {
result =
rank[ranking](ControlFlowNode ref |
rank[ranking](ControlFlowNode ref, int i |
ref = getAccessTo(root, path, _) and
ref.getBasicBlock() = bb and
ref = bb.getNode(i) and
// Prunes the accesses where there does not exists a read and write within the same basicblock.
// This could be more precise, but doing it like this avoids massive joins.
hasRead(bb) and
hasWrite(bb)
|
ref order by any(int i | ref = bb.getNode(i))
ref order by i
) and
result = getAccessTo(root, path, type)
}
Expand All @@ -492,7 +492,7 @@ module AccessPath {
*/
pragma[noinline]
private predicate hasWrite(ReachableBasicBlock bb) {
bb = getAccessTo(_, _, AccessPathRead()).getBasicBlock()
bb = getAccessTo(_, _, AccessPathWrite()).getBasicBlock()
}

/**
Expand Down Expand Up @@ -565,9 +565,12 @@ module AccessPath {
)
or
// across basic blocks.
exists(Root root, string path |
exists(Root root, string path, ReachableBasicBlock readBlock |
read.asExpr() = getAccessTo(root, path, AccessPathRead()) and
getAWriteBlock(root, path).strictlyDominates(read.getBasicBlock())
readBlock = read.getBasicBlock() and
// Performance optimisation: check that `read` is in a *reachable* basic block
// before looking for a dominating write block.
getAWriteBlock(root, path).strictlyDominates(pragma[only_bind_out](readBlock))
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ private class AnalyzedImplicitInit extends AnalyzedSsaDefinition, SsaImplicitIni
*/
private class AnalyzedVariableCapture extends AnalyzedSsaDefinition, SsaVariableCapture {
override AbstractValue getAnRhsValue() {
exists(LocalVariable v | v = getSourceVariable() |
exists(LocalVariable v | v = this.getSourceVariable() |
result = v.(AnalyzedCapturedVariable).getALocalValue()
or
result = any(AnalyzedExplicitDefinition def | def.getSourceVariable() = v).getAnRhsValue()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ predicate isGeneratedCodeFile(File f) { isGenerated(f.getATopLevel()) }
predicate isTestFile(File f) {
exists(Test t | t.getFile() = f)
or
exists(string stemExt | stemExt = "test" or stemExt = "spec" |
f = getTestFile(any(File orig), stemExt)
)
f = getATestFile(_)
or
f.getAbsolutePath().regexpMatch(".*/__(mocks|tests)__/.*")
}
Expand Down
29 changes: 28 additions & 1 deletion javascript/ql/lib/semmle/javascript/frameworks/Testing.qll
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,41 @@ class BDDTest extends Test, @call_expr {

/**
* Gets the test file for `f` with stem extension `stemExt`.
* That is, a file named file named `<base>.<stemExt>.<ext>` in the
* That is, a file named `<base>.<stemExt>.<ext>` in the
* same directory as `f` which is named `<base>.<ext>`.
*/
bindingset[stemExt]
File getTestFile(File f, string stemExt) {
result = f.getParentContainer().getFile(f.getStem() + "." + stemExt + "." + f.getExtension())
}

/**
* Gets a test file for `f`.
* That is, a file named `<base>.<stemExt>.<ext>` in the
* same directory as `f`, where `f` is named `<base>.<ext>` and
* `<stemExt>` is a well-known test file identifier, such as `test` or `spec`.
*/
File getATestFile(File f) {
result = f.getParentContainer().getFile(getATestFileName(f))
}

/**
* Gets a name of a test file for `f`.
* That is, `<base>.<stemExt>.<ext>` where
* `f` is named `<base>.<ext>` and `<stemExt>` is
* a well-known test file identifier, such as `test` or `spec`.
*/
// Helper predicate factored out for performance.
// This predicate is linear in the size of f, and forces
// callers to join only once against f rather than two separate joins
// when computing the stem and the extension.
// This loses some flexibility because callers cannot specify
// an arbitrary stemExt.
pragma[nomagic]
private string getATestFileName(File f) {
result = f.getStem() + "." + ["test", "spec"] + "." + f.getExtension()
}

/**
* A Jest test, that is, an invocation of a global function named
* `test` where the first argument is a string and the second
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,23 @@ import javascript
*/
bindingset[regexp]
predicate isReadFrom(DataFlow::Node read, string regexp) {
getAnAccessedName(read).regexpMatch(regexp)
}

/**
* Gets the "name" accessed by `read`. The "name" is one of:
* - the name of the read variable, if `read` is a variable read
* - the name of the read property, if `read` is a property read
* - the suffix of the getter-method name, if `read` is a getter invocation, for example "Number" in "getNumber"
*/
string getAnAccessedName(DataFlow::Node read) {
exists(DataFlow::Node actualRead |
actualRead = read.asExpr().getUnderlyingValue().(LogOrExpr).getAnOperand().flow() or // unfold `x || y` once
actualRead = read
|
exists(string name | name.regexpMatch(regexp) |
actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = name or
actualRead.(DataFlow::PropRead).getPropertyName() = name or
actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + name
)
actualRead.asExpr().getUnderlyingValue().(VarAccess).getName() = result or
actualRead.(DataFlow::PropRead).getPropertyName() = result or
actualRead.(DataFlow::InvokeNode).getCalleeName() = "get" + result
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,12 @@ module CorsMisconfigurationForCredentials {
|
routeHandler.getAResponseHeader(_) = origin and
routeHandler.getAResponseHeader(_) = credentials and
origin.definesExplicitly("access-control-allow-origin", this.asExpr()) and
credentials.definesExplicitly("access-control-allow-credentials", credentialsValue)
// Performance optimisation: start with the set of all route handlers
// rather than the set of all exprs.
pragma[only_bind_into](origin)
.definesExplicitly("access-control-allow-origin", this.asExpr()) and
pragma[only_bind_into](credentials)
.definesExplicitly("access-control-allow-credentials", credentialsValue)
|
credentialsValue.mayHaveBooleanValue(true) or
credentialsValue.mayHaveStringValue("true")
Expand Down
23 changes: 21 additions & 2 deletions javascript/ql/lib/semmle/javascript/security/dataflow/Xss.qll
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,27 @@ module DomBasedXss {
b = phi.getAnInput().getDefinition() and
count(phi.getAnInput()) = 2 and
not a = b and
sanitizer = DataFlow::valueNode(a.getDef().getSource()) and
sanitizer.getAnArgument().asExpr().(VarAccess).getVariable() = b.getSourceVariable()
/*
* Performance optimisation:
*
* When join-ordering and evaluating this conjunction,
* it is preferable to start with the relatively small set of
* `sanitizer` calls, then compute the set of SSA variables accessed
* as the arguments of those sanitizer calls, then reason about how
* those variables are used in phi nodes.
*
* Use directional binding pragmas to encourage this join order,
* starting with `sanitizer`.
*
* Without these pragmas, the join orderer may choose the opposite order:
* start with all `phi` nodes, then compute the set of SSA variables involved,
* then the (potentially large) set of accesses to those variables,
* then the set of accesses used as the argument of a sanitizer call.
*/

pragma[only_bind_out](sanitizer) = DataFlow::valueNode(a.getDef().getSource()) and
pragma[only_bind_out](sanitizer.getAnArgument().asExpr()) =
b.getSourceVariable().getAnAccess()
|
pred = DataFlow::ssaDefinitionNode(b) and
succ = DataFlow::ssaDefinitionNode(phi)
Expand Down