Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 66 additions & 16 deletions cpp/ql/src/semmle/code/cpp/models/implementations/Pure.qll
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
import semmle.code.cpp.models.interfaces.ArrayFunction
import semmle.code.cpp.models.interfaces.Taint
import semmle.code.cpp.models.interfaces.Alias
import semmle.code.cpp.models.interfaces.SideEffect

class PureFunction extends ArrayFunction, TaintFunction {
PureFunction() {
class PureStrFunction extends AliasFunction, ArrayFunction, TaintFunction, SideEffectFunction {
PureStrFunction() {
exists(string name |
hasName(name) and
hasGlobalName(name) and
(
name = "abs"
or name = "atof"
name = "atof"
or name = "atoi"
or name = "atol"
or name = "atoll"
or name = "labs"
or name = "strcasestr"
or name = "strchnul"
or name = "strchr"
or name = "strchrnul"
or name = "strstr"
or name = "strpbrk"
or name = "strcmp"
or name = "strcspn"
or name = "strlen"
or name = "strncmp"
or name = "strnlen"
or name = "strrchr"
or name = "strspn"
or name = "strstr"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By strstr family I meant also strcasestr, strchr, strchrnul, and strrchr. Instead of enumerating them, you can consider matching them by signature: it's all the functions on this list that return a pointer type.

or name = "strtod"
or name = "strtof"
or name = "strtol"
Expand All @@ -40,18 +41,67 @@ class PureFunction extends ArrayFunction, TaintFunction {

override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
exists (ParameterIndex i |
input.isInParameter(i) or
(
input.isInParameterPointer(i) and
getParameter(i).getUnspecifiedType() instanceof PointerType
)
input.isInParameter(i) and
exists(getParameter(i))
or
input.isInParameterPointer(i) and
getParameter(i).getUnspecifiedType() instanceof PointerType
) and
(
(
output.isOutReturnPointer() and
getUnspecifiedType() instanceof PointerType
) or
output.isOutReturnPointer() and
getUnspecifiedType() instanceof PointerType
or
output.isOutReturnValue()
)
}

override predicate parameterNeverEscapes(int i) {
getParameter(i).getUnspecifiedType() instanceof PointerType and
not parameterEscapesOnlyViaReturn(i)
}

override predicate parameterEscapesOnlyViaReturn(int i) {
i = 0 and
getUnspecifiedType() instanceof PointerType
}

override predicate parameterIsAlwaysReturned(int i) {
none()
}

override predicate neverReadsMemory() {
none()
}

override predicate neverWritesMemory() {
any()
}
}

class PureFunction extends TaintFunction, SideEffectFunction {
PureFunction() {
exists(string name |
hasGlobalName(name) and
(
name = "abs" or
name = "labs"
)
)
}

override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) {
exists (ParameterIndex i |
input.isInParameter(i) and
exists(getParameter(i))
) and
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there's a function in the snapshot with 20 parameters, then this predicate claims that parameter 19 of abs flows to its return value. That seems wasteful and confusing.

I'd much prefer that we only define taint flow for parameters that actually exist. The same issue is in PureStrFunction.

To make this easier to detect in the future, we could make ParameterIndex private and put a bindingset annotation on isInParameter etc. But that's probably not for this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've narrowed it to only report parameters that actually exist

output.isOutReturnValue()
}

override predicate neverReadsMemory() {
any()
}

override predicate neverWritesMemory() {
any()
}
}
49 changes: 49 additions & 0 deletions cpp/ql/test/library-tests/ir/ssa/aliased_ssa_ir.expected
Original file line number Diff line number Diff line change
Expand Up @@ -758,3 +758,52 @@ ssa.cpp:
# 184| v0_22(void) = ReturnVoid :
# 184| v0_23(void) = UnmodeledUse : mu*
# 184| v0_24(void) = ExitFunction :

# 198| int PureFunctions(char*, char*, int)
# 198| Block 0
# 198| v0_0(void) = EnterFunction :
# 198| m0_1(unknown) = AliasedDefinition :
# 198| mu0_2(unknown) = UnmodeledDefinition :
# 198| r0_3(glval<char *>) = VariableAddress[str1] :
# 198| m0_4(char *) = InitializeParameter[str1] : &:r0_3
# 198| r0_5(glval<char *>) = VariableAddress[str2] :
# 198| m0_6(char *) = InitializeParameter[str2] : &:r0_5
# 198| r0_7(glval<int>) = VariableAddress[x] :
# 198| m0_8(int) = InitializeParameter[x] : &:r0_7
# 199| r0_9(glval<int>) = VariableAddress[ret] :
# 199| r0_10(glval<unknown>) = FunctionAddress[strcmp] :
# 199| r0_11(glval<char *>) = VariableAddress[str1] :
# 199| r0_12(char *) = Load : &:r0_11, m0_4
# 199| r0_13(char *) = Convert : r0_12
# 199| r0_14(glval<char *>) = VariableAddress[str2] :
# 199| r0_15(char *) = Load : &:r0_14, m0_6
# 199| r0_16(char *) = Convert : r0_15
# 199| r0_17(int) = Call : func:r0_10, 0:r0_13, 1:r0_16
# 199| v0_18(void) = ^CallReadSideEffect : ~m0_1
# 199| m0_19(int) = Store : &:r0_9, r0_17
# 200| r0_20(glval<unknown>) = FunctionAddress[strlen] :
# 200| r0_21(glval<char *>) = VariableAddress[str1] :
# 200| r0_22(char *) = Load : &:r0_21, m0_4
# 200| r0_23(char *) = Convert : r0_22
# 200| r0_24(int) = Call : func:r0_20, 0:r0_23
# 200| v0_25(void) = ^CallReadSideEffect : ~m0_1
# 200| r0_26(glval<int>) = VariableAddress[ret] :
# 200| r0_27(int) = Load : &:r0_26, m0_19
# 200| r0_28(int) = Add : r0_27, r0_24
# 200| m0_29(int) = Store : &:r0_26, r0_28
# 201| r0_30(glval<unknown>) = FunctionAddress[abs] :
# 201| r0_31(glval<int>) = VariableAddress[x] :
# 201| r0_32(int) = Load : &:r0_31, m0_8
# 201| r0_33(int) = Call : func:r0_30, 0:r0_32
# 201| r0_34(glval<int>) = VariableAddress[ret] :
# 201| r0_35(int) = Load : &:r0_34, m0_29
# 201| r0_36(int) = Add : r0_35, r0_33
# 201| m0_37(int) = Store : &:r0_34, r0_36
# 202| r0_38(glval<int>) = VariableAddress[#return] :
# 202| r0_39(glval<int>) = VariableAddress[ret] :
# 202| r0_40(int) = Load : &:r0_39, m0_37
# 202| m0_41(int) = Store : &:r0_38, r0_40
# 198| r0_42(glval<int>) = VariableAddress[#return] :
# 198| v0_43(void) = ReturnValue : &:r0_42, m0_41
# 198| v0_44(void) = UnmodeledUse : mu*
# 198| v0_45(void) = ExitFunction :
11 changes: 11 additions & 0 deletions cpp/ql/test/library-tests/ir/ssa/ssa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,4 +189,15 @@ static void AsmStmtWithOutputs(unsigned int& a, unsigned int& b, unsigned int& c
: "+a" (a), "+b" (b)
: "c" (c), "d" (d)
);
}

int strcmp(const char *, const char *);
int strlen(const char *);
int abs(int);

int PureFunctions(char *str1, char *str2, int x) {
int ret = strcmp(str1, str2);
ret += strlen(str1);
ret += abs(x);
return ret;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You wrote in #1542 (comment) that this PR was about the side effects of strlen, so shouldn't that be included in the test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added.

}
49 changes: 49 additions & 0 deletions cpp/ql/test/library-tests/ir/ssa/unaliased_ssa_ir.expected
Original file line number Diff line number Diff line change
Expand Up @@ -724,3 +724,52 @@ ssa.cpp:
# 184| v0_17(void) = ReturnVoid :
# 184| v0_18(void) = UnmodeledUse : mu*
# 184| v0_19(void) = ExitFunction :

# 198| int PureFunctions(char*, char*, int)
# 198| Block 0
# 198| v0_0(void) = EnterFunction :
# 198| mu0_1(unknown) = AliasedDefinition :
# 198| mu0_2(unknown) = UnmodeledDefinition :
# 198| r0_3(glval<char *>) = VariableAddress[str1] :
# 198| m0_4(char *) = InitializeParameter[str1] : &:r0_3
# 198| r0_5(glval<char *>) = VariableAddress[str2] :
# 198| m0_6(char *) = InitializeParameter[str2] : &:r0_5
# 198| r0_7(glval<int>) = VariableAddress[x] :
# 198| m0_8(int) = InitializeParameter[x] : &:r0_7
# 199| r0_9(glval<int>) = VariableAddress[ret] :
# 199| r0_10(glval<unknown>) = FunctionAddress[strcmp] :
# 199| r0_11(glval<char *>) = VariableAddress[str1] :
# 199| r0_12(char *) = Load : &:r0_11, m0_4
# 199| r0_13(char *) = Convert : r0_12
# 199| r0_14(glval<char *>) = VariableAddress[str2] :
# 199| r0_15(char *) = Load : &:r0_14, m0_6
# 199| r0_16(char *) = Convert : r0_15
# 199| r0_17(int) = Call : func:r0_10, 0:r0_13, 1:r0_16
# 199| v0_18(void) = ^CallReadSideEffect : ~mu0_2
# 199| m0_19(int) = Store : &:r0_9, r0_17
# 200| r0_20(glval<unknown>) = FunctionAddress[strlen] :
# 200| r0_21(glval<char *>) = VariableAddress[str1] :
# 200| r0_22(char *) = Load : &:r0_21, m0_4
# 200| r0_23(char *) = Convert : r0_22
# 200| r0_24(int) = Call : func:r0_20, 0:r0_23
# 200| v0_25(void) = ^CallReadSideEffect : ~mu0_2
# 200| r0_26(glval<int>) = VariableAddress[ret] :
# 200| r0_27(int) = Load : &:r0_26, m0_19
# 200| r0_28(int) = Add : r0_27, r0_24
# 200| m0_29(int) = Store : &:r0_26, r0_28
# 201| r0_30(glval<unknown>) = FunctionAddress[abs] :
# 201| r0_31(glval<int>) = VariableAddress[x] :
# 201| r0_32(int) = Load : &:r0_31, m0_8
# 201| r0_33(int) = Call : func:r0_30, 0:r0_32
# 201| r0_34(glval<int>) = VariableAddress[ret] :
# 201| r0_35(int) = Load : &:r0_34, m0_29
# 201| r0_36(int) = Add : r0_35, r0_33
# 201| m0_37(int) = Store : &:r0_34, r0_36
# 202| r0_38(glval<int>) = VariableAddress[#return] :
# 202| r0_39(glval<int>) = VariableAddress[ret] :
# 202| r0_40(int) = Load : &:r0_39, m0_37
# 202| m0_41(int) = Store : &:r0_38, r0_40
# 198| r0_42(glval<int>) = VariableAddress[#return] :
# 198| v0_43(void) = ReturnValue : &:r0_42, m0_41
# 198| v0_44(void) = UnmodeledUse : mu*
# 198| v0_45(void) = ExitFunction :