Skip to content

Commit

Permalink
Remove length from String (#514)
Browse files Browse the repository at this point in the history
* Remove length from String

* Fix StringS name being iterated for args

* Remove some cruft in strings.py

* Improve lint
  • Loading branch information
twizmwazin authored Sep 24, 2024
1 parent f123448 commit 06eb310
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 165 deletions.
2 changes: 1 addition & 1 deletion claripy/ast/bv.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def concat(self, *args):

@staticmethod
def _from_int(like, value):
return BVV(value, like.length)
return BVV(value, like.length or 64)

@staticmethod
def _from_Bool(like, value):
Expand Down
122 changes: 18 additions & 104 deletions claripy/ast/strings.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,23 @@
from __future__ import annotations

from claripy import operations
from claripy.ast.base import _make_name

from .bits import Bits
from .base import Base, _make_name
from .bool import Bool
from .bv import BV, BVS, BVV
from .bv import BV


class String(Bits):
class String(Base):
"""
Base class that represent the AST of a String object and implements all the operation useful to create and modify the AST.
Base class that represent the AST of a String object and implements all the
operation useful to create and modify the AST.
Do not instantiate this class directly, instead use StringS or StringV to construct a symbol or value, and then use
operations to construct more complicated expressions.
Do not instantiate this class directly, instead use StringS or StringV to
construct a symbol or value, and then use operations to construct more
complicated expressions.
"""

__slots__ = ("string_length",)

# Identifier used by composite solver in order to identify if a certain constraints contains
# variables of type string... In this case cvc4 would handle the solving part.
#
# TODO: Find a smarter way to do this!
STRING_TYPE_IDENTIFIER = "STRING_"
GENERATED_BVS_IDENTIFIER = "BVS_"
MAX_LENGTH = 10000

def __init__(self, *args, length: int, **kwargs):
"""
:param length: The string bit length
"""
super().__init__(*args, length=length, **kwargs)
self.string_length: int = self.length // 8

def __getitem__(self, rng):
"""
This is a big endian indexer that takes its arguments as bits but returns bytes.
Returns the range of bits in self defined by: [rng.start, rng.end], indexed from the end
The bit range may not internally divide any bytes; i.e. low and (high+1) must both be divisible by 8
Examples:
self[7:0] -- returns the last byte of self
self[15:0] -- returns the last two bytes of self
self[8:0] -- Error! [8:0] is 9 bits, it asks for individual bits of the second to last byte!
self[8:1] -- Error! [8:1] asks for 1 bit from the second to last byte and 7 from the last byte!
"""
if type(rng) is slice:
bits_low = rng.start if rng.start is not None else 0
bits_high = rng.stop if rng.stop is not None else 8 * (self.string_length - 1)
if bits_high % 8 != 0 or (bits_low + 1) % 8 != 0:
raise ValueError("Bit indicies must not internally divide bytes!")
# high / low form a reverse-indexed byte index
high = bits_high // 8
low = bits_low // 8
if high < 0:
high = self.string_length + high
if low < 0:
low = self.string_length + low
# StrSubstr takes a front-indexed byte index as a starting point, and a length
start_idx = self.string_length - 1 - low
if high > low:
return StrSubstr(start_idx, 0, self)
return StrSubstr(start_idx, 1 + low - high, self)
raise ValueError("Only slices allowed for string extraction")

@staticmethod
# TODO: Figure out what to convert here
#
# The use case that i found useful here is during the concretization
# of a symbolic address in memory.
# When angr has to do this it creates a claripy.If constraints in this form
# If(condition, <value_read_if_condition_true>, <value_read_if_condition_false>).
# In some case <value_read_if_condition_false> can be MEM_UNINITIALIZED expressed as BVV
#
# What should we return in this case?
def _from_BV(like, value): # pylint: disable=unused-argument
return value
__slots__ = ()

@staticmethod
def _from_str(like, value): # pylint: disable=unused-argument
Expand Down Expand Up @@ -107,80 +50,51 @@ def indexOf(self, pattern, start_idx, bitlength):
"""
return StrIndexOf(self, pattern, start_idx, bitlength)

def raw_to_bv(self):
"""
A counterpart to FP.raw_to_bv - does nothing and returns itself.
"""
if self.symbolic:
return BVS(
next(iter(self.variables)).replace(self.STRING_TYPE_IDENTIFIER, self.GENERATED_BVS_IDENTIFIER),
self.length,
)
return BVV(ord(self.args[0]), self.length)

def raw_to_fp(self):
return self.raw_to_bv().raw_to_fp()


def StringS(name, size, uninitialized=False, explicit_name=False, **kwargs):
def StringS(name, uninitialized=False, explicit_name=False, **kwargs):
"""
Create a new symbolic string (analogous to z3.String())
:param name: The name of the symbolic string (i. e. the name of the variable)
:param size: The size in bytes of the string (i. e. the length of the string)
:param uninitialized: Whether this value should be counted as an "uninitialized" value in the course of an
analysis.
:param bool explicit_name: If False, an identifier is appended to the name to ensure uniqueness.
:returns: The String object representing the symbolic string
"""
n = _make_name(String.STRING_TYPE_IDENTIFIER + name, size, False if explicit_name is None else explicit_name)
n = _make_name(name, 0, False if explicit_name is None else explicit_name)
return String(
"StringS",
n,
length=8 * size,
(n,),
symbolic=True,
uninitialized=uninitialized,
variables=frozenset((n,)),
**kwargs,
)


def StringV(value, length: int | None = None, **kwargs):
def StringV(value, **kwargs):
"""
Create a new Concrete string (analogous to z3.StringVal())
:param value: The constant value of the concrete string
:param length: The byte length of the string
:returns: The String object representing the concrete string
"""

if length is None:
length = len(value)

if length < len(value):
raise ValueError("Can't make a concrete string value longer than the specified length!")

return String("StringV", (value, length), length=8 * length, **kwargs)
return String("StringV", (value,), **kwargs)


StrConcat = operations.op("StrConcat", String, String, calc_length=operations.str_concat_length_calc)
StrSubstr = operations.op("StrSubstr", (BV, BV, String), String, calc_length=operations.substr_length_calc)
StrConcat = operations.op("StrConcat", String, String)
StrSubstr = operations.op("StrSubstr", (BV, BV, String), String)
StrLen = operations.op("StrLen", (String), BV, calc_length=lambda *_: 64)
StrReplace = operations.op(
"StrReplace",
(String, String, String),
String,
extra_check=operations.str_replace_check,
calc_length=operations.str_replace_length_calc,
)
StrReplace = operations.op("StrReplace", (String, String, String), String)
StrContains = operations.op("StrContains", (String, String), Bool)
StrPrefixOf = operations.op("StrPrefixOf", (String, String), Bool)
StrSuffixOf = operations.op("StrSuffixOf", (String, String), Bool)
StrIndexOf = operations.op("StrIndexOf", (String, String, BV), BV, calc_length=lambda *_: 64)
StrToInt = operations.op("StrToInt", (String), BV, calc_length=lambda *_: 64)
IntToStr = operations.op("IntToStr", (BV,), String, calc_length=operations.int_to_str_length_calc)
IntToStr = operations.op("IntToStr", (BV,), String)
StrIsDigit = operations.op("StrIsDigit", (String,), Bool)

# Equality / inequality check
Expand Down
2 changes: 1 addition & 1 deletion claripy/backends/backend_concrete/backend_concrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def BVV(value, size):
return bv.BVV(value, size)

@staticmethod
def StringV(value, size): # pylint: disable=unused-argument
def StringV(value):
return strings.StringV(value)

@staticmethod
Expand Down
4 changes: 1 addition & 3 deletions claripy/backends/backend_z3.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,12 +377,10 @@ def BoolV(self, ast): # pylint:disable=unused-argument

@condom
def StringV(self, ast):
return z3.StringVal(ast.args[0].ljust(ast.args[1], "\0"), ctx=self._context)
return z3.StringVal(ast.args[0], ctx=self._context)

@condom
def StringS(self, ast):
# Maybe this should be an error? Warning for now to support reliant code
log.warning("Converting claripy StringS' to z3 looses length information.")
return z3.String(ast.args[0], ctx=self._context)

#
Expand Down
3 changes: 1 addition & 2 deletions claripy/frontend/frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ def eval_to_ast(self, e, n, extra_constraints=(), exact=None):
if isinstance(e, BV):
return [BVV(v, e.size()) for v in values]
if isinstance(e, String):
return [StringV(v, length=e.string_length) for v in values]

return [StringV(v) for v in values]
# TODO: Implement support for other types
raise NotImplementedError

Expand Down
38 changes: 0 additions & 38 deletions claripy/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,48 +169,10 @@ def extract_length_calc(high, low, _):
return high + 1 - low


def str_basic_length_calc(str_1):
return str_1.length


def int_to_str_length_calc(int_val): # pylint: disable=unused-argument
return 8 * claripy.ast.String.MAX_LENGTH


def str_replace_check(*args):
str_1, str_2, _ = args
if str_1.length < str_2.length:
return False, "The pattern that has to be replaced is longer than the string itself"
return True, ""


def substr_length_calc(start_idx, count, strval): # pylint: disable=unused-argument
# FIXME: How can I get the value of a concrete object without a solver
return strval.length if not count.concrete else 8 * count.args[0]


def ext_length_calc(ext, orig):
return orig.length + ext


def str_concat_length_calc(*args):
return sum(arg.length for arg in args)


def str_replace_length_calc(*args):
str_1, str_2, str_3 = args
# Return the maximum length that the string can assume after the replace
# operation
#
# If the part that has to be replaced if greater than
# the replacement than the we have the maximum length possible
# when the part that has to be replaced is not found inside the string
if str_2.length >= str_3.length:
return str_1.length
# Otherwise We have the maximum length when teh replacement happens
return str_1.length - str_2.length + str_3.length


#
# Operation lists
#
Expand Down
2 changes: 1 addition & 1 deletion tests/test_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def test_composite_solver_with_strings(self):
x = claripy.BVS("x", 32)
y = claripy.BVS("y", 32)
z = claripy.BVS("z", 32)
str_1 = claripy.StringS("sym_str_1", 1024)
str_1 = claripy.StringS("sym_str_1")
c = claripy.And(x == 1, y == 2, z == 3, str_1 == claripy.StringV("cavallo"))
s.add(c)
assert len(s._solver_list) == 4
Expand Down
Loading

0 comments on commit 06eb310

Please sign in to comment.