diff --git a/claripy/ast/bv.py b/claripy/ast/bv.py index 6dff5de26..4de96dd55 100644 --- a/claripy/ast/bv.py +++ b/claripy/ast/bv.py @@ -134,7 +134,7 @@ def concat(self, *args): @staticmethod def _from_int(like, value): - return BVV(value, like.length) + return BVV(value, like.length or 64) @staticmethod def _from_Bool(like, value): diff --git a/claripy/ast/strings.py b/claripy/ast/strings.py index 8631a6f18..8d701c7b6 100644 --- a/claripy/ast/strings.py +++ b/claripy/ast/strings.py @@ -1,80 +1,23 @@ from __future__ import annotations from claripy import operations -from claripy.ast.base import _make_name -from .bits import Bits +from .base import Base, _make_name from .bool import Bool -from .bv import BV, BVS, BVV +from .bv import BV -class String(Bits): +class String(Base): """ - Base class that represent the AST of a String object and implements all the operation useful to create and modify the AST. + Base class that represent the AST of a String object and implements all the + operation useful to create and modify the AST. - Do not instantiate this class directly, instead use StringS or StringV to construct a symbol or value, and then use - operations to construct more complicated expressions. + Do not instantiate this class directly, instead use StringS or StringV to + construct a symbol or value, and then use operations to construct more + complicated expressions. """ - __slots__ = ("string_length",) - - # Identifier used by composite solver in order to identify if a certain constraints contains - # variables of type string... In this case cvc4 would handle the solving part. - # - # TODO: Find a smarter way to do this! - STRING_TYPE_IDENTIFIER = "STRING_" - GENERATED_BVS_IDENTIFIER = "BVS_" - MAX_LENGTH = 10000 - - def __init__(self, *args, length: int, **kwargs): - """ - :param length: The string bit length - """ - super().__init__(*args, length=length, **kwargs) - self.string_length: int = self.length // 8 - - def __getitem__(self, rng): - """ - This is a big endian indexer that takes its arguments as bits but returns bytes. - Returns the range of bits in self defined by: [rng.start, rng.end], indexed from the end - The bit range may not internally divide any bytes; i.e. low and (high+1) must both be divisible by 8 - Examples: - self[7:0] -- returns the last byte of self - self[15:0] -- returns the last two bytes of self - self[8:0] -- Error! [8:0] is 9 bits, it asks for individual bits of the second to last byte! - self[8:1] -- Error! [8:1] asks for 1 bit from the second to last byte and 7 from the last byte! - """ - if type(rng) is slice: - bits_low = rng.start if rng.start is not None else 0 - bits_high = rng.stop if rng.stop is not None else 8 * (self.string_length - 1) - if bits_high % 8 != 0 or (bits_low + 1) % 8 != 0: - raise ValueError("Bit indicies must not internally divide bytes!") - # high / low form a reverse-indexed byte index - high = bits_high // 8 - low = bits_low // 8 - if high < 0: - high = self.string_length + high - if low < 0: - low = self.string_length + low - # StrSubstr takes a front-indexed byte index as a starting point, and a length - start_idx = self.string_length - 1 - low - if high > low: - return StrSubstr(start_idx, 0, self) - return StrSubstr(start_idx, 1 + low - high, self) - raise ValueError("Only slices allowed for string extraction") - - @staticmethod - # TODO: Figure out what to convert here - # - # The use case that i found useful here is during the concretization - # of a symbolic address in memory. - # When angr has to do this it creates a claripy.If constraints in this form - # If(condition, , ). - # In some case can be MEM_UNINITIALIZED expressed as BVV - # - # What should we return in this case? - def _from_BV(like, value): # pylint: disable=unused-argument - return value + __slots__ = () @staticmethod def _from_str(like, value): # pylint: disable=unused-argument @@ -107,38 +50,22 @@ def indexOf(self, pattern, start_idx, bitlength): """ return StrIndexOf(self, pattern, start_idx, bitlength) - def raw_to_bv(self): - """ - A counterpart to FP.raw_to_bv - does nothing and returns itself. - """ - if self.symbolic: - return BVS( - next(iter(self.variables)).replace(self.STRING_TYPE_IDENTIFIER, self.GENERATED_BVS_IDENTIFIER), - self.length, - ) - return BVV(ord(self.args[0]), self.length) - - def raw_to_fp(self): - return self.raw_to_bv().raw_to_fp() - -def StringS(name, size, uninitialized=False, explicit_name=False, **kwargs): +def StringS(name, uninitialized=False, explicit_name=False, **kwargs): """ Create a new symbolic string (analogous to z3.String()) :param name: The name of the symbolic string (i. e. the name of the variable) - :param size: The size in bytes of the string (i. e. the length of the string) :param uninitialized: Whether this value should be counted as an "uninitialized" value in the course of an analysis. :param bool explicit_name: If False, an identifier is appended to the name to ensure uniqueness. :returns: The String object representing the symbolic string """ - n = _make_name(String.STRING_TYPE_IDENTIFIER + name, size, False if explicit_name is None else explicit_name) + n = _make_name(name, 0, False if explicit_name is None else explicit_name) return String( "StringS", - n, - length=8 * size, + (n,), symbolic=True, uninitialized=uninitialized, variables=frozenset((n,)), @@ -146,41 +73,28 @@ def StringS(name, size, uninitialized=False, explicit_name=False, **kwargs): ) -def StringV(value, length: int | None = None, **kwargs): +def StringV(value, **kwargs): """ Create a new Concrete string (analogous to z3.StringVal()) :param value: The constant value of the concrete string - :param length: The byte length of the string :returns: The String object representing the concrete string """ - if length is None: - length = len(value) - - if length < len(value): - raise ValueError("Can't make a concrete string value longer than the specified length!") - - return String("StringV", (value, length), length=8 * length, **kwargs) + return String("StringV", (value,), **kwargs) -StrConcat = operations.op("StrConcat", String, String, calc_length=operations.str_concat_length_calc) -StrSubstr = operations.op("StrSubstr", (BV, BV, String), String, calc_length=operations.substr_length_calc) +StrConcat = operations.op("StrConcat", String, String) +StrSubstr = operations.op("StrSubstr", (BV, BV, String), String) StrLen = operations.op("StrLen", (String), BV, calc_length=lambda *_: 64) -StrReplace = operations.op( - "StrReplace", - (String, String, String), - String, - extra_check=operations.str_replace_check, - calc_length=operations.str_replace_length_calc, -) +StrReplace = operations.op("StrReplace", (String, String, String), String) StrContains = operations.op("StrContains", (String, String), Bool) StrPrefixOf = operations.op("StrPrefixOf", (String, String), Bool) StrSuffixOf = operations.op("StrSuffixOf", (String, String), Bool) StrIndexOf = operations.op("StrIndexOf", (String, String, BV), BV, calc_length=lambda *_: 64) StrToInt = operations.op("StrToInt", (String), BV, calc_length=lambda *_: 64) -IntToStr = operations.op("IntToStr", (BV,), String, calc_length=operations.int_to_str_length_calc) +IntToStr = operations.op("IntToStr", (BV,), String) StrIsDigit = operations.op("StrIsDigit", (String,), Bool) # Equality / inequality check diff --git a/claripy/backends/backend_concrete/backend_concrete.py b/claripy/backends/backend_concrete/backend_concrete.py index 2e677e6bf..117b03cc4 100644 --- a/claripy/backends/backend_concrete/backend_concrete.py +++ b/claripy/backends/backend_concrete/backend_concrete.py @@ -61,7 +61,7 @@ def BVV(value, size): return bv.BVV(value, size) @staticmethod - def StringV(value, size): # pylint: disable=unused-argument + def StringV(value): return strings.StringV(value) @staticmethod diff --git a/claripy/backends/backend_z3.py b/claripy/backends/backend_z3.py index 309a22c5b..9e9c772d3 100644 --- a/claripy/backends/backend_z3.py +++ b/claripy/backends/backend_z3.py @@ -377,12 +377,10 @@ def BoolV(self, ast): # pylint:disable=unused-argument @condom def StringV(self, ast): - return z3.StringVal(ast.args[0].ljust(ast.args[1], "\0"), ctx=self._context) + return z3.StringVal(ast.args[0], ctx=self._context) @condom def StringS(self, ast): - # Maybe this should be an error? Warning for now to support reliant code - log.warning("Converting claripy StringS' to z3 looses length information.") return z3.String(ast.args[0], ctx=self._context) # diff --git a/claripy/frontend/frontend.py b/claripy/frontend/frontend.py index 15baeee41..0aab44059 100644 --- a/claripy/frontend/frontend.py +++ b/claripy/frontend/frontend.py @@ -61,8 +61,7 @@ def eval_to_ast(self, e, n, extra_constraints=(), exact=None): if isinstance(e, BV): return [BVV(v, e.size()) for v in values] if isinstance(e, String): - return [StringV(v, length=e.string_length) for v in values] - + return [StringV(v) for v in values] # TODO: Implement support for other types raise NotImplementedError diff --git a/claripy/operations.py b/claripy/operations.py index 8f6fdccca..cfee13564 100644 --- a/claripy/operations.py +++ b/claripy/operations.py @@ -169,48 +169,10 @@ def extract_length_calc(high, low, _): return high + 1 - low -def str_basic_length_calc(str_1): - return str_1.length - - -def int_to_str_length_calc(int_val): # pylint: disable=unused-argument - return 8 * claripy.ast.String.MAX_LENGTH - - -def str_replace_check(*args): - str_1, str_2, _ = args - if str_1.length < str_2.length: - return False, "The pattern that has to be replaced is longer than the string itself" - return True, "" - - -def substr_length_calc(start_idx, count, strval): # pylint: disable=unused-argument - # FIXME: How can I get the value of a concrete object without a solver - return strval.length if not count.concrete else 8 * count.args[0] - - def ext_length_calc(ext, orig): return orig.length + ext -def str_concat_length_calc(*args): - return sum(arg.length for arg in args) - - -def str_replace_length_calc(*args): - str_1, str_2, str_3 = args - # Return the maximum length that the string can assume after the replace - # operation - # - # If the part that has to be replaced if greater than - # the replacement than the we have the maximum length possible - # when the part that has to be replaced is not found inside the string - if str_2.length >= str_3.length: - return str_1.length - # Otherwise We have the maximum length when teh replacement happens - return str_1.length - str_2.length + str_3.length - - # # Operation lists # diff --git a/tests/test_solver.py b/tests/test_solver.py index 2b0d34486..97926b4bd 100644 --- a/tests/test_solver.py +++ b/tests/test_solver.py @@ -455,7 +455,7 @@ def test_composite_solver_with_strings(self): x = claripy.BVS("x", 32) y = claripy.BVS("y", 32) z = claripy.BVS("z", 32) - str_1 = claripy.StringS("sym_str_1", 1024) + str_1 = claripy.StringS("sym_str_1") c = claripy.And(x == 1, y == 2, z == 3, str_1 == claripy.StringV("cavallo")) s.add(c) assert len(s._solver_list) == 4 diff --git a/tests/test_strings.py b/tests/test_strings.py index 8e4939e12..a8435b35a 100644 --- a/tests/test_strings.py +++ b/tests/test_strings.py @@ -14,7 +14,7 @@ def get_solver(self): def test_concat(self): str_concrete = claripy.StringV("conc") - str_symbol = claripy.StringS("symb_concat", 4, explicit_name=True) + str_symbol = claripy.StringS("symb_concat", explicit_name=True) solver = self.get_solver() res = str_concrete + str_symbol solver.add(res == claripy.StringV("concrete")) @@ -38,7 +38,7 @@ def test_concat_simplification(self): self.assertEqual(["conc"], list(result)) def test_substr(self): - str_symbol = claripy.StringS("symb_subst", 4, explicit_name=True) + str_symbol = claripy.StringS("symb_subst", explicit_name=True) solver = self.get_solver() solver.add(claripy.StrSubstr(1, 2, str_symbol) == claripy.StringV("o")) self.assertTrue(solver.satisfiable()) @@ -57,7 +57,7 @@ def test_substr_simplification(self): self.assertEqual(list(result), ["concrete"]) def test_replace(self): - str_to_replace_symb = claripy.StringS("symb_repl", 4, explicit_name=True) + str_to_replace_symb = claripy.StringS("symb_repl", explicit_name=True) sub_str_to_repl = claripy.StringV("a") replacement = claripy.StringV("b") solver = self.get_solver() @@ -88,7 +88,7 @@ def test_replace_simplification(self): self.assertEqual(["cane"], list(result)) def test_ne(self): - str_symb = claripy.StringS("symb_ne", 12, explicit_name=True) + str_symb = claripy.StringS("symb_ne", explicit_name=True) solver = self.get_solver() solver.add(str_symb != claripy.StringV("concrete")) self.assertTrue(solver.satisfiable()) @@ -97,7 +97,7 @@ def test_ne(self): self.assertTrue("concrete" not in result) def test_length(self): - str_symb = claripy.StringS("symb_length", 12, explicit_name=True) + str_symb = claripy.StringS("symb_length", explicit_name=True) solver = self.get_solver() # TODO: How do we want to deal with the size of a symbolic string? solver.add(claripy.StrLen(str_symb) == 14) @@ -119,7 +119,7 @@ def test_length_simplification(self): self.assertTrue(len(r) == 8) def test_or(self): - str_symb = claripy.StringS("Symb_or", 4, explicit_name=True) + str_symb = claripy.StringS("Symb_or", explicit_name=True) solver = self.get_solver() res = claripy.Or((str_symb == claripy.StringV("abc")), (str_symb == claripy.StringV("ciao"))) solver.add(res) @@ -129,7 +129,7 @@ def test_or(self): self.assertEqual({"ciao", "abc"}, set(result)) def test_lt_etc(self): - str_symb = claripy.StringS("Symb_2", 4) + str_symb = claripy.StringS("Symb_2") solver = self.get_solver() c1 = claripy.StrLen(str_symb) <= 4 c2 = claripy.StrLen(str_symb) < 4 @@ -142,7 +142,7 @@ def test_lt_etc(self): self.assertFalse(solver.satisfiable()) def test_substr_BV_concrete_index(self): - str_symbol = claripy.StringS("symb_subst", 4, explicit_name=True) + str_symbol = claripy.StringS("symb_subst", explicit_name=True) solver = self.get_solver() bv1 = claripy.BVV(1, 32) bv2 = claripy.BVV(2, 32) @@ -152,7 +152,7 @@ def test_substr_BV_concrete_index(self): self.assertEqual("on", solver.eval(str_symbol, 1)[0][1:3]) def test_substr_BV_symbolic_index(self): - str_symbol = claripy.StringS("symb_subst", 4, explicit_name=True) + str_symbol = claripy.StringS("symb_subst", explicit_name=True) solver = self.get_solver() start = claripy.BVS("start_idx", 32) count = claripy.BVS("count", 32) @@ -170,7 +170,7 @@ def test_substr_BV_symbolic_index(self): self.assertEqual("on", solver.eval(str_symbol, 1, extra_constraints=(start == 1, count == 4))[0][1:]) def test_substr_BV_mixed_index(self): - str_symbol = claripy.StringS("symb_subst", 4, explicit_name=True) + str_symbol = claripy.StringS("symb_subst", explicit_name=True) solver = self.get_solver() start = claripy.BVS("symb_subst_start_idx", 32, explicit_name=True) count = claripy.BVV(2, 32) @@ -182,7 +182,7 @@ def test_substr_BV_mixed_index(self): self.assertEqual("on", solver.eval(str_symbol, 1, extra_constraints=(start == 2,))[0][2:4]) def test_contains(self): - str_symb = claripy.StringS("symb_contains", 4, explicit_name=True) + str_symb = claripy.StringS("symb_contains", explicit_name=True) res = claripy.StrContains(str_symb, claripy.StringV("an")) solver = self.get_solver() solver.add(res) @@ -202,7 +202,7 @@ def test_contains_simplification(self): self.assertEqual((True,), solver.eval(res, 2)) def test_prefix(self): - str_symb = claripy.StringS("symb_prefix", 4, explicit_name=True) + str_symb = claripy.StringS("symb_prefix", explicit_name=True) res = claripy.StrPrefixOf(claripy.StringV("an"), str_symb) solver = self.get_solver() solver.add(res) @@ -213,7 +213,7 @@ def test_prefix(self): self.assertTrue(sol.startswith("an")) def test_suffix(self): - str_symb = claripy.StringS("symb_suffix", 4, explicit_name=True) + str_symb = claripy.StringS("symb_suffix", explicit_name=True) res = claripy.StrSuffixOf(claripy.StringV("an"), str_symb) solver = self.get_solver() solver.add(res) @@ -244,7 +244,7 @@ def test_suffix_simplification(self): self.assertEqual((True,), solver.eval(res, 2)) def test_index_of(self): - str_symb = claripy.StringS("symb_suffix", 4, explicit_name=True) + str_symb = claripy.StringS("symb_suffix", explicit_name=True) res = claripy.StrIndexOf(str_symb, claripy.StringV("an"), 0) solver = self.get_solver() @@ -270,7 +270,7 @@ def test_index_of_simplification(self): @unittest.skip("Usually hangs") def test_index_of_symbolic_start_idx(self): - str_symb = claripy.StringS("symb_index_of", 4, explicit_name=True) + str_symb = claripy.StringS("symb_index_of", explicit_name=True) start_idx = claripy.BVS("symb_start_idx", 32, explicit_name=True) solver = self.get_solver()