diff --git a/conf/examples/very-precise.json b/conf/examples/very-precise.json index 84cbf53585..2197335eaf 100644 --- a/conf/examples/very-precise.json +++ b/conf/examples/very-precise.json @@ -61,7 +61,9 @@ "structs" : { "domain" : "combined-sk" }, - "limit-string-addresses": false + "strings": { + "domain": "disjoint" + } } }, "exp": { diff --git a/src/cdomains/addressDomain.ml b/src/cdomains/addressDomain.ml index 5981caf9ea..55b1aceefc 100644 --- a/src/cdomains/addressDomain.ml +++ b/src/cdomains/addressDomain.ml @@ -5,6 +5,7 @@ open IntOps module M = Messages module Mval_outer = Mval +module SD = StringDomain module AddressBase (Mval: Printable.S) = @@ -14,23 +15,14 @@ struct | Addr of Mval.t | NullPtr | UnknownPtr - | StrPtr of string option + | StrPtr of SD.t [@@deriving eq, ord, hash] (* TODO: StrPtr equal problematic if the same literal appears more than once *) let name () = Format.sprintf "address (%s)" (Mval.name ()) - let hash x = match x with - | StrPtr _ -> - if GobConfig.get_bool "ana.base.limit-string-addresses" then - 13859 - else - hash x - | _ -> hash x - let show = function | Addr m -> Mval.show m - | StrPtr (Some x) -> "\"" ^ x ^ "\"" - | StrPtr None -> "(unknown string)" + | StrPtr s -> StringDomain.show s | UnknownPtr -> "?" | NullPtr -> "NULL" @@ -42,31 +34,18 @@ struct ) (* strings *) - let of_string x = StrPtr (Some x) + let of_string x = StrPtr (SD.of_string x) let to_string = function - | StrPtr (Some x) -> Some x + | StrPtr s -> SD.to_string s | _ -> None - (* only keep part before first null byte *) let to_c_string = function - | StrPtr (Some x) -> - begin match String.split_on_char '\x00' x with - | s::_ -> Some s - | [] -> None - end + | StrPtr s -> SD.to_c_string s | _ -> None - let to_n_c_string n x = - match to_c_string x with - | Some x -> - if n > String.length x then - Some x - else if n < 0 then - None - else - Some (String.sub x 0 n) + let to_n_c_string n = function + | StrPtr s -> SD.to_n_c_string n s | _ -> None - let to_string_length x = - match to_c_string x with - | Some x -> Some (String.length x) + let to_string_length = function + | StrPtr s -> SD.to_string_length s | _ -> None let arbitrary () = QCheck.always UnknownPtr (* S TODO: non-unknown *) @@ -101,8 +80,7 @@ struct (* TODO: seems to be unused *) let to_exp = function | Addr m -> AddrOf (Mval.to_cil m) - | StrPtr (Some x) -> mkString x - | StrPtr None -> raise (Lattice.Unsupported "Cannot express unknown string pointer as expression.") + | StrPtr s -> SD.to_exp s | NullPtr -> integer 0 | UnknownPtr -> raise Lattice.TopValue (* TODO: unused *) @@ -123,9 +101,7 @@ struct let semantic_equal x y = match x, y with | Addr x, Addr y -> Mval.semantic_equal x y - | StrPtr None, StrPtr _ - | StrPtr _, StrPtr None -> Some true - | StrPtr (Some a), StrPtr (Some b) -> if a = b then None else Some false + | StrPtr s1, StrPtr s2 -> SD.semantic_equal s1 s2 | NullPtr, NullPtr -> Some true | UnknownPtr, UnknownPtr | UnknownPtr, Addr _ @@ -135,8 +111,7 @@ struct | _, _ -> Some false let leq x y = match x, y with - | StrPtr _, StrPtr None -> true - | StrPtr a, StrPtr b -> a = b + | StrPtr s1, StrPtr s2 -> SD.leq s1 s2 | Addr x, Addr y -> Mval.leq x y | _ -> x = y @@ -144,26 +119,6 @@ struct | Addr x -> Addr (Mval.top_indices x) | x -> x - let join_string_ptr x y = match x, y with - | None, _ - | _, None -> None - | Some a, Some b when a = b -> Some a - | Some a, Some b (* when a <> b *) -> - if GobConfig.get_bool "ana.base.limit-string-addresses" then - None - else - raise Lattice.Uncomparable - - let meet_string_ptr x y = match x, y with - | None, a - | a, None -> a - | Some a, Some b when a = b -> Some a - | Some a, Some b (* when a <> b *) -> - if GobConfig.get_bool "ana.base.limit-string-addresses" then - raise Lattice.BotValue - else - raise Lattice.Uncomparable - let merge mop sop x y = match x, y with | UnknownPtr, UnknownPtr -> UnknownPtr @@ -172,10 +127,10 @@ struct | Addr x, Addr y -> Addr (mop x y) | _ -> raise Lattice.Uncomparable - let join = merge Mval.join join_string_ptr - let widen = merge Mval.widen join_string_ptr - let meet = merge Mval.meet meet_string_ptr - let narrow = merge Mval.narrow meet_string_ptr + let join = merge Mval.join SD.join + let widen = merge Mval.widen SD.join + let meet = merge Mval.meet SD.meet + let narrow = merge Mval.narrow SD.meet include Lattice.NoBotTop @@ -194,8 +149,7 @@ struct let of_elt (x: elt): t = match x with | Addr (v, o) -> Addr v - | StrPtr _ when GobConfig.get_bool "ana.base.limit-string-addresses" -> StrPtr None (* all strings together if limited *) - | StrPtr x -> StrPtr x (* everything else is kept separate, including strings if not limited *) + | StrPtr s -> StrPtr (SD.repr s) | NullPtr -> NullPtr | UnknownPtr -> UnknownPtr end @@ -211,8 +165,7 @@ struct let of_elt (x: elt): t = match x with | Addr (v, o) -> Addr (v, Offset.Unit.of_offs o) (* addrs grouped by var and part of offset *) - | StrPtr _ when GobConfig.get_bool "ana.base.limit-string-addresses" -> StrPtr None (* all strings together if limited *) - | StrPtr x -> StrPtr x (* everything else is kept separate, including strings if not limited *) + | StrPtr s -> StrPtr (SD.repr s) | NullPtr -> NullPtr | UnknownPtr -> UnknownPtr end diff --git a/src/cdomains/addressDomain_intf.ml b/src/cdomains/addressDomain_intf.ml index 0ef3d6dd8d..f65b2977c4 100644 --- a/src/cdomains/addressDomain_intf.ml +++ b/src/cdomains/addressDomain_intf.ml @@ -7,7 +7,7 @@ sig | Addr of Mval.t (** Pointer to mvalue. *) | NullPtr (** NULL pointer. *) | UnknownPtr (** Unknown pointer. Could point to globals, heap and escaped variables. *) - | StrPtr of string option (** String literal pointer. [StrPtr None] abstracts any string pointer *) + | StrPtr of StringDomain.t (** String literal pointer. [StrPtr None] abstracts any string pointer *) include Printable.S with type t := t (** @closed *) val of_string: string -> t @@ -16,8 +16,6 @@ sig val to_string: t -> string option (** Convert {!StrPtr} to string if possible. *) - (** C strings are different from OCaml strings as they are not processed after the first [NUL] byte, even though the OCaml string (and a C string literal) may be longer. *) - val to_c_string: t -> string option (** Convert {!StrPtr} to C string if possible. *) @@ -71,7 +69,7 @@ sig - Each {!Addr}, modulo precise index expressions in the offset, is a sublattice with ordering induced by {!Mval}. - {!NullPtr} is a singleton sublattice. - {!UnknownPtr} is a singleton sublattice. - - If [ana.base.limit-string-addresses] is enabled, then all {!StrPtr} are together in one sublattice with flat ordering. If [ana.base.limit-string-addresses] is disabled, then each {!StrPtr} is a singleton sublattice. *) + - If [ana.base.strings.domain] is disjoint, then each {!StrPtr} is a singleton sublattice. Otherwise, all {!StrPtr} are together in one sublattice with flat ordering. *) module AddressLattice (Mval: Mval.Lattice): sig include module type of AddressPrintable (Mval) diff --git a/src/cdomains/stringDomain.ml b/src/cdomains/stringDomain.ml new file mode 100644 index 0000000000..0621f37eb6 --- /dev/null +++ b/src/cdomains/stringDomain.ml @@ -0,0 +1,114 @@ +include Printable.StdLeaf + +let name () = "string" + +type string_domain = Unit | Disjoint | Flat + +let string_domain: string_domain ResettableLazy.t = + ResettableLazy.from_fun (fun () -> + match GobConfig.get_string "ana.base.strings.domain" with + | "unit" -> Unit + | "disjoint" -> Disjoint + | "flat" -> Flat + | _ -> failwith "ana.base.strings.domain: illegal value" + ) + +let get_string_domain () = ResettableLazy.force string_domain + +let reset_lazy () = + ResettableLazy.reset string_domain + + +type t = string option [@@deriving eq, ord, hash] + +let hash x = + if get_string_domain () = Disjoint then + hash x + else + 13859 + +let show = function + | Some x -> "\"" ^ x ^ "\"" + | None -> "(unknown string)" + +include Printable.SimpleShow ( + struct + type nonrec t = t + let show = show + end + ) + +let of_string x = + if get_string_domain () = Unit then + None + else + Some x +let to_string x = x + +(* only keep part before first null byte *) +let to_c_string = function + | Some x -> + begin match String.split_on_char '\x00' x with + | s::_ -> Some s + | [] -> None + end + | None -> None + +let to_n_c_string n x = + match to_c_string x with + | Some x -> + if n > String.length x then + Some x + else if n < 0 then + None + else + Some (String.sub x 0 n) + | None -> None + +let to_string_length x = + match to_c_string x with + | Some x -> Some (String.length x) + | None -> None + +let to_exp = function + | Some x -> GoblintCil.mkString x + | None -> raise (Lattice.Unsupported "Cannot express unknown string pointer as expression.") + +let semantic_equal x y = + match x, y with + | None, _ + | _, None -> Some true + | Some a, Some b -> if a = b then None else Some false + +let leq x y = + match x, y with + | _, None -> true + | a, b -> a = b + +let join x y = + match x, y with + | None, _ + | _, None -> None + | Some a, Some b when a = b -> Some a + | Some a, Some b (* when a <> b *) -> + if get_string_domain () = Disjoint then + raise Lattice.Uncomparable + else + None + +let meet x y = + match x, y with + | None, a + | a, None -> a + | Some a, Some b when a = b -> Some a + | Some a, Some b (* when a <> b *) -> + if get_string_domain () = Disjoint then + raise Lattice.Uncomparable + else + raise Lattice.BotValue + +let repr x = + if get_string_domain () = Disjoint then + x (* everything else is kept separate, including strings if not limited *) + else + None (* all strings together if limited *) diff --git a/src/cdomains/stringDomain.mli b/src/cdomains/stringDomain.mli new file mode 100644 index 0000000000..66423caa0b --- /dev/null +++ b/src/cdomains/stringDomain.mli @@ -0,0 +1,40 @@ +(** String literals domain. *) + +include Printable.S + +val reset_lazy: unit -> unit +(** Reset the cached configuration of the string domain. *) + +val of_string: string -> t +(** Convert from string. *) + +val to_string: t -> string option +(** Convert to string if possible. *) + +(** C strings are different from OCaml strings as they are not processed after the first [NUL] byte, even though the OCaml string (and a C string literal) may be longer. *) + +val to_c_string: t -> string option +(** Convert to C string if possible. *) + +val to_n_c_string: int -> t -> string option +(** Convert to C string of given maximum length if possible. *) + +val to_string_length: t -> int option +(** Find length of C string if possible. *) + +val to_exp: t -> GoblintCil.exp +(** Convert to CIL expression. *) + +val semantic_equal: t -> t -> bool option +(** Check semantic equality of two strings. + + @return [Some true] if definitely equal, [Some false] if definitely not equal, [None] if unknown. *) + +(** Some {!Lattice.S} operations. *) + +val leq: t -> t -> bool +val join: t -> t -> t +val meet: t -> t -> t + +val repr : t -> t +(** Representative for address lattice. *) diff --git a/src/common/util/options.schema.json b/src/common/util/options.schema.json index 13ccd687be..7c921c4f53 100644 --- a/src/common/util/options.schema.json +++ b/src/common/util/options.schema.json @@ -655,11 +655,19 @@ }, "additionalProperties": false }, - "limit-string-addresses": { - "title": "ana.base.limit-string-addresses", - "description": "Limit abstract address sets to keep at most one distinct string pointer.", - "type": "boolean", - "default": true + "strings": { + "title": "ana.base.strings", + "type": "object", + "properties": { + "domain": { + "title": "ana.base.strings.domain", + "description": "Domain for string literals.", + "type": "string", + "enum": ["unit", "flat", "disjoint"], + "default": "flat" + } + }, + "additionalProperties": false }, "partition-arrays": { "title": "ana.base.partition-arrays", diff --git a/src/goblint_lib.ml b/src/goblint_lib.ml index cb18ad0dd7..70f331b5ac 100644 --- a/src/goblint_lib.ml +++ b/src/goblint_lib.ml @@ -213,6 +213,7 @@ module FloatDomain = FloatDomain module Mval = Mval module Offset = Offset +module StringDomain = StringDomain module AddressDomain = AddressDomain (** {5 Complex} *) diff --git a/src/util/server.ml b/src/util/server.ml index 22f5a03350..829ee92ee8 100644 --- a/src/util/server.ml +++ b/src/util/server.ml @@ -280,6 +280,7 @@ let analyze ?(reset=false) (s: t) = InvariantCil.reset_lazy (); WideningThresholds.reset_lazy (); IntDomain.reset_lazy (); + StringDomain.reset_lazy (); PrecisionUtil.reset_lazy (); ApronDomain.reset_lazy (); AutoTune.reset_lazy (); diff --git a/tests/regression/02-base/88-string-ptrs-limited.c b/tests/regression/02-base/88-string-ptrs-limited.c index ab8b2fefe8..c4f39dc711 100644 --- a/tests/regression/02-base/88-string-ptrs-limited.c +++ b/tests/regression/02-base/88-string-ptrs-limited.c @@ -1,4 +1,4 @@ -//PARAM: --enable ana.base.limit-string-addresses +//PARAM: --set ana.base.strings.domain flat #include #include diff --git a/tests/regression/02-base/89-string-ptrs-not-limited.c b/tests/regression/02-base/89-string-ptrs-not-limited.c index 96100d230d..ab30e21fd8 100644 --- a/tests/regression/02-base/89-string-ptrs-not-limited.c +++ b/tests/regression/02-base/89-string-ptrs-not-limited.c @@ -1,4 +1,4 @@ -//PARAM: --disable ana.base.limit-string-addresses +//PARAM: --set ana.base.strings.domain disjoint #include #include diff --git a/tests/regression/73-strings/01-string_literals.c b/tests/regression/73-strings/01-string_literals.c index 36e4ed121c..42086e07b6 100644 --- a/tests/regression/73-strings/01-string_literals.c +++ b/tests/regression/73-strings/01-string_literals.c @@ -1,4 +1,4 @@ -// PARAM: --disable ana.base.limit-string-addresses --enable ana.int.interval +// PARAM: --set ana.base.strings.domain disjoint --enable ana.int.interval #include #include @@ -21,7 +21,7 @@ int main() { char* s1 = "abcde"; char* s2 = "abcdfg"; char* s3 = hello_world(); - + int i = strlen(s1); __goblint_check(i == 5); @@ -96,10 +96,10 @@ int main() { #define STRNCAT strncat(s1, "hi", 1) #endif STRNCAT; // WARN - + #ifdef __APPLE__ // do nothing => no warning - #else + #else char s4[] = "hello"; strcpy(s4, s2); // NOWARN strncpy(s4, s3, 2); // NOWARN diff --git a/tests/regression/73-strings/02-string_literals_with_null.c b/tests/regression/73-strings/02-string_literals_with_null.c index 75d000bbb8..cc41e9e287 100644 --- a/tests/regression/73-strings/02-string_literals_with_null.c +++ b/tests/regression/73-strings/02-string_literals_with_null.c @@ -1,4 +1,4 @@ -// PARAM: --disable ana.base.limit-string-addresses --enable ana.int.interval +// PARAM: --set ana.base.strings.domain disjoint --enable ana.int.interval #include #include diff --git a/tests/regression/73-strings/03-string_basics.c b/tests/regression/73-strings/03-string_basics.c index db196c64b4..a9d02d5e8b 100644 --- a/tests/regression/73-strings/03-string_basics.c +++ b/tests/regression/73-strings/03-string_basics.c @@ -1,4 +1,4 @@ -// PARAM: --disable ana.base.limit-string-addresses --enable ana.int.interval +// PARAM: --set ana.base.strings.domain disjoint --enable ana.int.interval #include #include diff --git a/tests/regression/73-strings/05-string-unit-domain.c b/tests/regression/73-strings/05-string-unit-domain.c new file mode 100644 index 0000000000..70e6bed5bf --- /dev/null +++ b/tests/regression/73-strings/05-string-unit-domain.c @@ -0,0 +1,15 @@ +// PARAM: --set ana.base.strings.domain unit +#include +#include + +void foo(char *s) { + int l = strlen(s); + __goblint_check(l == 3 || l == 6); // UNKNOWN +} + +int main() { + foo("foo"); + foo("bar"); + foo("foobar"); + return 0; +}