Skip to content

WIP: Smtlib Prelude Injection #340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions wp/lib/bap_wp/src/runner.ml
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,17 @@ let post_reg_values

(* Parses the user's smtlib queries for use in comparative analysis. *)
let smtlib
~orig:(sub1,env1)
~modif:(sub2,env2)
~precond:(precond : string)
~postcond:(postcond : string)
: (Comp.comparator * Comp.comparator) option =
if String.is_empty precond && String.is_empty postcond then
None
else
Some (Comp.compare_subs_smtlib ~smtlib_hyp:precond ~smtlib_post:postcond)
let prelude = Z3_utils.compare_prelude_smtlib2 sub1 sub2 env1 env2 in
info "Injected CBAT smtlib prelude: %s" prelude;
Some (Comp.compare_subs_smtlib ~smtlib_hyp:(prelude ^ precond) ~smtlib_post:(prelude ^ postcond))

(* obtain a set of general purpose registers
* based on their string names and architecture. *)
Expand Down Expand Up @@ -294,7 +298,8 @@ let comparators_of_flags
func_calls p.compare_func_calls;
post_reg_values p.compare_post_reg_values
~orig:(sub1, env1) ~modif:(sub2, env2);
smtlib ~precond:p.precond ~postcond:p.postcond;
smtlib ~orig:(sub1, env1) ~modif:(sub2, env2)
~precond:p.precond ~postcond:p.postcond;
gen_pointer_flag_comparators p.pointer_reg_list
env1 env2 pointer_env1_vars pointer_env2_vars;
mem_eq p.rewrite_addresses
Expand Down
109 changes: 108 additions & 1 deletion wp/lib/bap_wp/src/z3_utils.ml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,113 @@ let get_z3_name (map : Constr.z3_expr Var.Map.t) (name : string) (fmt : Var.t ->
else
None)

(** Sexp.t helper constructors for smtlib constructs *)
let define_fun name args retsort body =
let args = List.map ~f:(fun (v,sort) -> Sexp.List [v; sort]) args in
Sexp.List [Sexp.Atom "define-fun"; Sexp.Atom name; Sexp.List args; retsort; body]
let synonym name ret body = define_fun name [] ret body
Comment on lines +104 to +107
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be possible to annotate the types of the arguments to these functions?

let define_sort (name : string) s = Sexp.List [
Sexp.Atom "define-sort"; Sexp.Atom name; Sexp.List []; s]
let sexp_of_sort (s : Z3.Sort.sort) : Sexp.t = Sexp.of_string (Z3.Sort.to_string s)

(* [make_arg_synonyms] constructs smtlib formula for the arguments of a subroutine.
This make synonyms that for high level c-like names ot low level locations *)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is ot supposed to be at?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have to admit I still don't really get this sentence.

Maybe an example here?

let make_arg_synonyms
?postfix:(postfix = "")
(sub : Sub.t)
(init_var_map : Expr.expr Var.Map.t)
(var_map : Expr.expr Var.Map.t) : Sexp.t list =
let args = Term.enum arg_t sub |> Seq.to_list in
List.concat_map args ~f:(fun arg ->
let cname = Arg.lhs arg in
let syn prefix z3_reg =
let sort = Sexp.of_string (Z3.Sort.to_string (Z3.Expr.get_sort z3_reg)) in
synonym (prefix ^ Var.name cname ^ postfix) sort
(Sexp.Atom (Expr.to_string z3_reg))
in
let reg = Arg.rhs arg in
match reg with
| Var reg -> begin
match Arg.intent arg with
| Some In -> [syn "init_" (Var.Map.find_exn init_var_map reg)]
| Some Out -> [syn "" (Var.Map.find_exn var_map reg)]
| Some Both -> [syn "init_" (Var.Map.find_exn init_var_map reg);
syn "" (Var.Map.find_exn var_map reg)]
| None -> []
end
| _ -> [] (* We don't make synonym unless c variable is just held in a bap variable *)
)

(** [args_prelude_compare] builds smtlib synonyms in a comparison for the arguments of
two compared subroutines. *)
let args_prelude_compare
(main_sub1 : Sub.t)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This might need to change on rebase: there's a new type 'a code which represents a pair of an 'a term and an env.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(it's in compare.mli)

(main_sub2 : Sub.t)
(env1 : Env.t)
(env2 : Env.t) : Sexp.t list =
let var_map1 = Env.get_var_map env1 in
let var_map2 = Env.get_var_map env2 in
let init_var_map1 = Env.get_init_var_map env1 in
let init_var_map2 = Env.get_init_var_map env2 in
let syn1 = make_arg_synonyms ~postfix:"_orig" main_sub1 init_var_map1 var_map1 in
let syn2 = make_arg_synonyms ~postfix:"_mod" main_sub2 init_var_map2 var_map2 in
syn1 @ syn2



(** [def_pointer_sort] constructs an smtlib sort synonym of pointer sort.
Typically it will be `(define-sort pointer () BV64)` *)
let def_pointer_sort env =
let mem = Env.get_mem env in
let var_map = Env.get_var_map env in
let z3_mem = Var.Map.find_exn var_map mem in
let pointer_sort = Z3.Z3Array.get_domain (Expr.get_sort z3_mem) in
define_sort "pointer" (sexp_of_sort pointer_sort)

(** [def_mem_sort] constructs an smtlib sort synonym of the memory array.
Typically it will be `(define-sort memsort () (Array (BV64 BV8))` *)
let def_mem_sort env =
let mem = Env.get_mem env in
let var_map = Env.get_var_map env in
let z3_mem = Var.Map.find_exn var_map mem in
define_sort "memsort" (sexp_of_sort (Z3.Expr.get_sort z3_mem))

let compare_prelude_smtlib2 main_sub1 main_sub2 (env1 : Env.t) (env2 : Env.t) : string =
let std_prelude = "
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should have some sort of file that contains the prelude that we read in over here rather than having a hard-coded string.

Copy link
Contributor Author

@philzook58 philzook58 Sep 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I understand the impulse, but I'd rather not. A large amount of the string is programmatically generated and going to an external file adds a lot of complexity in terms of finding the file in question from wherever the wp is invoked. I think going to an external file was a mistake we made in vibes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I stand by the vibes decision though.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, I feel like the string is short enough to allow minor sins.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We added an unnecessary and weird installation step that can easily go wrong or get stale just to avoid recompiling when we as developers edit the string. No user is ever going to edit this string and they can achieve the same functionality by writing define-fun in their own smtlib. There is the possibility of adding another command line feature of --wp-user-lib=mylib.smt2 to avoid duplication between the pre and post condition. I'm not entirely sure we have to have both pre and post condition flags anyway, since this can be achieved via =>

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How so?

Copy link
Contributor Author

@philzook58 philzook58 Sep 2, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(=> (preconds referring to init_ variables ) (post-conds referring to both init and non-init)). I feel like this is how I've seen Chris write specs in vibes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True. I guess we have some redundancy between init_var and the pre flag.

(
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Sort synonyms
(define-sort BV8 () (_ BitVec 8))
(define-sort BV16 () (_ BitVec 16))
(define-sort BV32 () (_ BitVec 32))
(define-sort BV64 () (_ BitVec 64))
(define-sort char () (_ BitVec 8))
(define-sort byte () (_ BitVec 8))
(define-sort int16 () (_ BitVec 16))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the difference between a BV16 and an int16 in this case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nothing. I thought it was reasonable or at least unharmful to have C-esque looking types and something that is just a nice shorthand evocative of (_ BitVec 64). Maybe I should use int16_t instead

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely int16_t!

(define-sort int32 () (_ BitVec 32))
(define-sort int64 () (_ BitVec 64))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Predefined predicates
(define-fun init-mem-equal () Bool (= init_mem_orig init_mem_mod))
(define-fun mem-equal () Bool (= mem_orig mem_mod))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
)" in
let std_prelude = match Sexp.of_string std_prelude with
| Sexp.List l -> l
| _ ->
failwith "compare_prelude_smtlib2: Improper constant sexp string"
in
let cvar_defs = args_prelude_compare main_sub1 main_sub2 env1 env2 in
let prelude_sexp = [
def_pointer_sort env1;
def_mem_sort env1;
] @
std_prelude @
cvar_defs
in
let prelude_sexp = List.map ~f:Sexp.to_string_hum prelude_sexp in
String.concat ~sep:"\n" prelude_sexp

(** [mk_smtlib2_compare] builds a constraint out of an smtlib2 string that can be used
as a comparison predicate between an original and modified binary. *)
let mk_smtlib2_compare (env1 : Env.t) (env2 : Env.t) (smtlib_str : string) : Constr.t =
Expand Down Expand Up @@ -130,7 +237,7 @@ let mk_smtlib2_compare (env1 : Env.t) (env2 : Env.t) (smtlib_str : string) : Con
let declsym = declsym1 @ declsym2 in
let ctx = Env.get_context env1 in
mk_smtlib2 ctx smtlib_str declsym

let mk_smtlib2_single ?(name = None) (env : Env.t) (smt_post : string) : Constr.t =
let var_map = Env.get_var_map env in
let init_var_map = Env.get_init_var_map env in
Expand Down
4 changes: 4 additions & 0 deletions wp/lib/bap_wp/src/z3_utils.mli
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,7 @@ val check_external : Z3.Solver.solver
(** [mk_smtlib2_compare] builds a constraint out of an smtlib2 string that can be used
as a comparison predicate between an original and modified binary. *)
val mk_smtlib2_compare : Env.t -> Env.t -> string -> Constr.t

(** [compare_prelude_smtlib2] constructs an smtlib2 string of useful definitions for
specification. *)
val compare_prelude_smtlib2 : Bap.Std.Sub.t -> Bap.Std.Sub.t -> Env.t -> Env.t -> string
13 changes: 13 additions & 0 deletions wp/resources/sample_binaries/prelude_test/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
SRC_DIR = src
BIN_DIR = bin

all: $(BIN_DIR)/main_1 $(BIN_DIR)/main_2

$(BIN_DIR):
mkdir -p $@

$(BIN_DIR)/%: $(SRC_DIR)/%.c $(BIN_DIR)
gcc -Wall -g -o $@ $<

clean:
rm -rf $(BIN_DIR)
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions wp/resources/sample_binaries/prelude_test/c/main_1.h
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
int foo(int a, int b, int c);
20 changes: 20 additions & 0 deletions wp/resources/sample_binaries/prelude_test/run_wp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# A test that gives RAX the value of RDI + 1. The two binaries differ in the
# order of the increment and move instructions, but have the same output.

# Runs WP with a postcondition that states that the end value of RAX in the
# modified binary is equal to the initial value of RDI in the original
# binary + 1.

# Should return UNSAT
#--compare-post-reg-values=RAX \

run () {
bap wp \
--func=main \
--precond="(assert (= init_main_argc_mod init_main_argc_orig))" \
--postcond="(assert (= main_result_mod main_result_orig))" \
--no-glibc-runtime \
-- ./bin/main_1 ./bin/main_2
}

run
19 changes: 19 additions & 0 deletions wp/resources/sample_binaries/prelude_test/run_wp_sat.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# A test that gives RAX the value of RDI + 1. The two binaries differ in the
# order of the increment and move instructions, but have the same output.

# Runs WP with a postcondition that states that the end value of RAX in the
# modified binary is equal to the initial value of RDI in the original
# binary + 2, which is impossible.

# Should return SAT

run () {
bap wp \
--func=main \
--compare-post-reg-values=RAX \
--postcond="(assert (= RAX_mod (bvadd init_RDI_orig #x0000000000000002)))" \
--no-glibc-runtime \
-- ./bin/main_1 ./bin/main_2
}

run
8 changes: 8 additions & 0 deletions wp/resources/sample_binaries/prelude_test/src/main_1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

int foo(int a, int b, int c){
return a + b * c;
}

int main(int argc, char* argv[]){
return foo(argc, 1, 2);
}
8 changes: 8 additions & 0 deletions wp/resources/sample_binaries/prelude_test/src/main_2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@

int foo(int a, int b, int c){
return a + b * c;
}

int main(int argc, char* argv[]){
return foo(argc, 1, 2);
}