Skip to content

Commit 13f7daf

Browse files
authored
Merge pull request #13982 from aschackmull/dataflow/typeflow-calledge-pruning
Dataflow: Add type-based call-edge pruning.
2 parents 3d8231b + 4205453 commit 13f7daf

File tree

19 files changed

+919
-195
lines changed

19 files changed

+919
-195
lines changed

cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ predicate expectsContent(Node n, ContentSet c) { none() }
208208

209209
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
210210

211+
predicate localMustFlowStep(Node node1, Node node2) { none() }
212+
211213
/** Gets the type of `n` used for type pruning. */
212214
Type getNodeType(Node n) {
213215
suppressUnusedNode(n) and

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,8 @@ predicate expectsContent(Node n, ContentSet c) { none() }
804804

805805
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
806806

807+
predicate localMustFlowStep(Node node1, Node node2) { none() }
808+
807809
/** Gets the type of `n` used for type pruning. */
808810
DataFlowType getNodeType(Node n) {
809811
suppressUnusedNode(n) and

csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,40 @@ module LocalFlow {
533533
) and
534534
not exists(getALastEvalNode(result))
535535
}
536+
537+
/**
538+
* Holds if the value of `node2` is given by `node1`.
539+
*/
540+
predicate localMustFlowStep(Node node1, Node node2) {
541+
exists(Callable c, Expr e |
542+
node1.(InstanceParameterNode).getCallable() = c and
543+
node2.asExpr() = e and
544+
(e instanceof ThisAccess or e instanceof BaseAccess) and
545+
c = e.getEnclosingCallable()
546+
)
547+
or
548+
hasNodePath(any(LocalExprStepConfiguration x), node1, node2) and
549+
(
550+
node2 instanceof SsaDefinitionExtNode or
551+
node2.asExpr() instanceof Cast or
552+
node2.asExpr() instanceof AssignExpr
553+
)
554+
or
555+
exists(SsaImpl::Definition def |
556+
def = getSsaDefinitionExt(node1) and
557+
exists(SsaImpl::getAReadAtNode(def, node2.(ExprNode).getControlFlowNode()))
558+
)
559+
or
560+
node1 =
561+
unique(FlowSummaryNode n1 |
562+
FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(),
563+
node2.(FlowSummaryNode).getSummaryNode(), true)
564+
)
565+
}
536566
}
537567

568+
predicate localMustFlowStep = LocalFlow::localMustFlowStep/2;
569+
538570
/**
539571
* This is the local flow predicate that is used as a building block in global
540572
* data flow. It excludes SSA flow through instance fields, as flow through fields

go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@ predicate expectsContent(Node n, ContentSet c) {
205205

206206
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
207207

208+
predicate localMustFlowStep(Node node1, Node node2) { none() }
209+
208210
/** Gets the type of `n` used for type pruning. */
209211
DataFlowType getNodeType(Node n) { result = TTodoDataFlowType() and exists(n) }
210212

java/ql/lib/semmle/code/java/Expr.qll

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,13 +1676,25 @@ abstract class InstanceAccess extends Expr {
16761676
/** Holds if this instance access is to an enclosing instance of type `t`. */
16771677
predicate isEnclosingInstanceAccess(RefType t) {
16781678
t = this.getQualifier().getType().(RefType).getSourceDeclaration() and
1679-
t != this.getEnclosingCallable().getDeclaringType()
1679+
t != this.getEnclosingCallable().getDeclaringType() and
1680+
not this.isSuperInterfaceAccess()
16801681
or
1681-
not exists(this.getQualifier()) and
1682+
(not exists(this.getQualifier()) or this.isSuperInterfaceAccess()) and
16821683
exists(LambdaExpr lam | lam.asMethod() = this.getEnclosingCallable() |
16831684
t = lam.getAnonymousClass().getEnclosingType()
16841685
)
16851686
}
1687+
1688+
// A default method on an interface, `I`, may be invoked using `I.super.m()`.
1689+
// This always refers to the implemented interfaces of `this`. This form of
1690+
// qualified `super` cannot be combined with accessing an enclosing instance.
1691+
// JLS 15.11.2. "Accessing Superclass Members using super"
1692+
// JLS 15.12. "Method Invocation Expressions"
1693+
// JLS 15.12.1. "Compile-Time Step 1: Determine Type to Search"
1694+
private predicate isSuperInterfaceAccess() {
1695+
this instanceof SuperAccess and
1696+
this.getQualifier().getType().(RefType).getSourceDeclaration() instanceof Interface
1697+
}
16861698
}
16871699

16881700
/**

java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,18 @@ predicate arrayInstanceOfGuarded(ArrayAccess aa, RefType t) {
440440
)
441441
}
442442

443+
/**
444+
* Holds if `t` is the type of the `this` value corresponding to the the
445+
* `SuperAccess`. As the `SuperAccess` expression has the type of the supertype,
446+
* the type `t` is a stronger type bound.
447+
*/
448+
private predicate superAccess(SuperAccess sup, RefType t) {
449+
sup.isEnclosingInstanceAccess(t)
450+
or
451+
sup.isOwnInstanceAccess() and
452+
t = sup.getEnclosingCallable().getDeclaringType()
453+
}
454+
443455
/**
444456
* Holds if `n` has type `t` and this information is discarded, such that `t`
445457
* might be a better type bound for nodes where `n` flows to. This might include
@@ -452,7 +464,8 @@ private predicate typeFlowBaseCand(TypeFlowNode n, RefType t) {
452464
downcastSuccessor(n.asExpr(), srctype) or
453465
instanceOfGuarded(n.asExpr(), srctype) or
454466
arrayInstanceOfGuarded(n.asExpr(), srctype) or
455-
n.asExpr().(FunctionalExpr).getConstructedType() = srctype
467+
n.asExpr().(FunctionalExpr).getConstructedType() = srctype or
468+
superAccess(n.asExpr(), srctype)
456469
|
457470
t = srctype.(BoundedType).getAnUltimateUpperBoundType()
458471
or

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,13 +326,18 @@ string ppReprType(DataFlowType t) {
326326
else result = t.toString()
327327
}
328328

329+
pragma[nomagic]
330+
private predicate compatibleTypes0(DataFlowType t1, DataFlowType t2) {
331+
erasedHaveIntersection(t1, t2)
332+
}
333+
329334
/**
330335
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
331336
* a node of type `t1` to a node of type `t2`.
332337
*/
333338
bindingset[t1, t2]
334339
pragma[inline_late]
335-
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { erasedHaveIntersection(t1, t2) }
340+
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { compatibleTypes0(t1, t2) }
336341

337342
/** A node that performs a type cast. */
338343
class CastNode extends ExprNode {

java/ql/lib/semmle/code/java/dataflow/internal/DataFlowUtil.qll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,39 @@ private module Cached {
133133
}
134134
}
135135

136+
/**
137+
* Holds if the value of `node2` is given by `node1`.
138+
*/
139+
predicate localMustFlowStep(Node node1, Node node2) {
140+
exists(Callable c | node1.(InstanceParameterNode).getCallable() = c |
141+
exists(InstanceAccess ia |
142+
ia = node2.asExpr() and ia.getEnclosingCallable() = c and ia.isOwnInstanceAccess()
143+
)
144+
or
145+
c =
146+
node2.(ImplicitInstanceAccess).getInstanceAccess().(OwnInstanceAccess).getEnclosingCallable()
147+
)
148+
or
149+
exists(SsaImplicitInit init |
150+
init.isParameterDefinition(node1.asParameter()) and init.getAUse() = node2.asExpr()
151+
)
152+
or
153+
exists(SsaExplicitUpdate upd |
154+
upd.getDefiningExpr().(VariableAssign).getSource() = node1.asExpr() and
155+
upd.getAUse() = node2.asExpr()
156+
)
157+
or
158+
node2.asExpr().(CastingExpr).getExpr() = node1.asExpr()
159+
or
160+
node2.asExpr().(AssignExpr).getSource() = node1.asExpr()
161+
or
162+
node1 =
163+
unique(FlowSummaryNode n1 |
164+
FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(),
165+
node2.(FlowSummaryNode).getSummaryNode(), true)
166+
)
167+
}
168+
136169
import Cached
137170

138171
private predicate capturedVariableRead(Node n) {

java/ql/lib/semmle/code/java/frameworks/Thrift.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import java
88
* A file detected as generated by the Apache Thrift Compiler.
99
*/
1010
class ThriftGeneratedFile extends GeneratedFile {
11+
cached
1112
ThriftGeneratedFile() {
1213
exists(JavadocElement t | t.getFile() = this |
1314
exists(string msg | msg = t.getText() | msg.regexpMatch("(?i).*\\bAutogenerated by Thrift.*"))
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import java.util.*;
2+
import java.util.function.*;
3+
4+
public class A {
5+
static String source(String tag) { return null; }
6+
7+
static void sink(Object o) { }
8+
9+
interface MyConsumer {
10+
void run(Object o);
11+
}
12+
13+
void apply(MyConsumer f, Object x) {
14+
f.run(x);
15+
}
16+
17+
void apply_wrap(MyConsumer f, Object x) {
18+
apply(f, x);
19+
}
20+
21+
void testLambdaDispatch1() {
22+
apply_wrap(x -> { sink(x); }, source("A")); // $ hasValueFlow=A
23+
apply_wrap(x -> { sink(x); }, null); // no flow
24+
apply_wrap(x -> { }, source("B"));
25+
apply_wrap(x -> { }, null);
26+
}
27+
28+
void forEach_wrap(List<Object> l, Consumer<Object> f) {
29+
l.forEach(f);
30+
}
31+
32+
void testLambdaDispatch2() {
33+
List<Object> tainted = new ArrayList<>();
34+
tainted.add(source("L"));
35+
List<Object> safe = new ArrayList<>();
36+
forEach_wrap(safe, x -> { sink(x); }); // no flow
37+
forEach_wrap(tainted, x -> { sink(x); }); // $ hasValueFlow=L
38+
}
39+
40+
static class TaintedClass {
41+
public String toString() { return source("TaintedClass"); }
42+
}
43+
44+
static class SafeClass {
45+
public String toString() { return "safe"; }
46+
}
47+
48+
String convertToString(Object o) {
49+
return o.toString();
50+
}
51+
52+
String convertToString_wrap(Object o) {
53+
return convertToString(o);
54+
}
55+
56+
void testToString1() {
57+
String unused = convertToString_wrap(new TaintedClass());
58+
sink(convertToString_wrap(new SafeClass())); // no flow
59+
}
60+
}

java/ql/test/library-tests/dataflow/typeflow-dispatch/test.expected

Whitespace-only changes.
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import TestUtilities.InlineFlowTest
2+
import DefaultFlowTest

python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,8 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }
539539

540540
predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }
541541

542+
predicate localMustFlowStep(Node node1, Node node2) { none() }
543+
542544
/**
543545
* Gets the type of `node`.
544546
*/

ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1697,6 +1697,8 @@ private predicate mustHaveLambdaType(CfgNodes::ExprCfgNode e, Callable c) {
16971697
)
16981698
}
16991699

1700+
predicate localMustFlowStep(Node node1, Node node2) { none() }
1701+
17001702
/** Gets the type of `n` used for type pruning. */
17011703
DataFlowType getNodeType(Node n) {
17021704
result = TLambdaDataFlowType(n.(LambdaSelfReferenceNode).getCallable())
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
---
2+
category: majorAnalysis
3+
---
4+
* Added support for type-based call edge pruning. This removes data flow call edges that are incompatible with the set of flow paths that reach it based on type information. This improves dispatch precision for constructs like lambdas, `Object.toString()` calls, and the visitor pattern. For now this is only enabled for Java and C#.

shared/dataflow/codeql/dataflow/DataFlow.qll

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,11 @@ signature module InputSig {
197197
*/
198198
predicate allowParameterReturnInSelf(ParameterNode p);
199199

200+
/**
201+
* Holds if the value of `node2` is given by `node1`.
202+
*/
203+
predicate localMustFlowStep(Node node1, Node node2);
204+
200205
class LambdaCallKind;
201206

202207
/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */

0 commit comments

Comments
 (0)