From 280c435ecabb25f6f3731a30010c49c993720104 Mon Sep 17 00:00:00 2001 From: Sonya Schriner Date: Wed, 22 Apr 2020 17:17:03 -0400 Subject: [PATCH 1/6] Start of model for basic_string::append(char *) needs debugging --- manticore/native/models.py | 79 +++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/manticore/native/models.py b/manticore/native/models.py index 4cbfa3951..ab4bcb01a 100644 --- a/manticore/native/models.py +++ b/manticore/native/models.py @@ -2,11 +2,13 @@ Models here are intended to be passed to :meth:`~manticore.native.state.State.invoke_model`, not invoked directly. """ -from .cpu.abstractcpu import ConcretizeArgument +from .cpu.abstractcpu import ConcretizeArgument, Cpu from ..core.smtlib import issymbolic from ..core.smtlib.solver import Z3Solver from ..core.smtlib.operators import ITEBV, ZEXTEND +from dataclasses import dataclass, field + VARIADIC_FUNC_ATTR = "_variadic" @@ -140,3 +142,78 @@ def strlen(state, s): ret = ITEBV(cpu.address_bit_size, byt == 0, offset, ret) return ret + + +@dataclass +class basic_string_class: + _cpu: Cpu + addr: int + _M_dataplus__M_p_addr: int = field(init=False) + _M_string_length_addr: int = field(init=False) + _M_local_buf_addr: int = field(init=False) + _M_allocated_capacity_addr: int = field(init=False) + + def __post_init__(self): + """ + Represents basic_string object information + Naming conventions below preserve basic_string variables for easy reference. + See: https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/basic_string.h + """ + # FIXME: automate memory size info and remove the hardcoded memory values below + # FIXME: symbolic input completely unsupported - make sure concrete input functions correctly first + self._M_dataplus__M_p_addr = self.addr # address of c_str address + self._M_string_length_addr = self.addr + 8 # string length address + # These two values are contained in a union. + # See: https://github.com/gcc-mirror/gcc/blob/2930bb321794c241d8df5591a5bf447bf89c6e82/libstdc%2B%2B-v3/include/bits/basic_string.h#L171 + self._M_local_buf_addr = self.addr + 16 + self._M_allocated_capacity_addr = self.addr + 16 + + print(f"Length = {self.len}\n{self.addr:016x}") + + def update_len(self, new_len): + """ + :param new_length: integer of desired new length + """ + self._cpu.write_int(self._M_string_length_addr, new_len, 64) + + def update_c_str(self, new_str): + """ + :param new_str: address of the start of new string + """ + self._cpu.write_int(self._M_dataplus__M_p_addr, new_str, 64) + + @property + def star_this(self): + return self._cpu.read_int(self.objref, 256) + + @property + def c_str(self): + return self._cpu.read_int(self._M_dataplus__M_p_addr, 64) + + @property + def len(self): + return self._cpu.read_int(self._M_string_length_addr, 64) + + +def basic_string_append_c_str(state, objref, s): + """ + Extends the basic_string by appending additional characters at the end of its current value + + :param State state: current program state + :param int objref: Address of basic_string object (this) + :param int s: Address of char * string to append + :return: *this + :rtype: std::basic_string + """ + cpu = state.cpu + b_string = basic_string_class(cpu, objref) + # TODO: add support for when c_str() there is out of space + zero_idx = _find_zero(cpu, state.constraints, s) + for i in range(0, zero_idx - 1): + src_addr = s + i + dest_addr = b_string.c_str + b_string.len + i + c = cpu.read_int(src_addr, 8) + cpu.write_int(char_addr, c, 8) + b_string.update_len(b_string.c_str + b_string.len) + cpu.write_int(b_string.c_str + b_string.len, 0, 8) + return b_string.star_this From 21258660b7ee7b275a896fd30c683def1c1b719f Mon Sep 17 00:00:00 2001 From: Sonya Schriner Date: Thu, 23 Apr 2020 11:12:27 -0400 Subject: [PATCH 2/6] Corrected several typos --- manticore/native/models.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/manticore/native/models.py b/manticore/native/models.py index ab4bcb01a..040a87370 100644 --- a/manticore/native/models.py +++ b/manticore/native/models.py @@ -184,7 +184,7 @@ def update_c_str(self, new_str): @property def star_this(self): - return self._cpu.read_int(self.objref, 256) + return self._cpu.read_int(self.addr, 256) @property def c_str(self): @@ -208,12 +208,18 @@ def basic_string_append_c_str(state, objref, s): cpu = state.cpu b_string = basic_string_class(cpu, objref) # TODO: add support for when c_str() there is out of space + # TODO: implement capacity & resize then finish this zero_idx = _find_zero(cpu, state.constraints, s) - for i in range(0, zero_idx - 1): + for i in range(0, zero_idx): src_addr = s + i - dest_addr = b_string.c_str + b_string.len + i + dst_addr = b_string.c_str + b_string.len + i c = cpu.read_int(src_addr, 8) - cpu.write_int(char_addr, c, 8) - b_string.update_len(b_string.c_str + b_string.len) + print(i, ":", c) + cpu.write_int(dst_addr, c, 8) + new_len = zero_idx + b_string.len + print("New length", new_len) + b_string.update_len(new_len) cpu.write_int(b_string.c_str + b_string.len, 0, 8) return b_string.star_this + + From 5ead1de2f852bb138894744280198557f088e651 Mon Sep 17 00:00:00 2001 From: Sonya Schriner Date: Thu, 23 Apr 2020 13:42:27 -0400 Subject: [PATCH 3/6] Beginning of work on capacity() --- manticore/native/models.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/manticore/native/models.py b/manticore/native/models.py index 040a87370..e471f3ecd 100644 --- a/manticore/native/models.py +++ b/manticore/native/models.py @@ -167,6 +167,7 @@ def __post_init__(self): # See: https://github.com/gcc-mirror/gcc/blob/2930bb321794c241d8df5591a5bf447bf89c6e82/libstdc%2B%2B-v3/include/bits/basic_string.h#L171 self._M_local_buf_addr = self.addr + 16 self._M_allocated_capacity_addr = self.addr + 16 + self._S_local_capacity = 15 # 15 chars + '\000' print(f"Length = {self.len}\n{self.addr:016x}") @@ -184,16 +185,42 @@ def update_c_str(self, new_str): @property def star_this(self): + """ + :return *this: return dereferenced object + """ return self._cpu.read_int(self.addr, 256) @property def c_str(self): + """ + :return int: internal c_str address + """ return self._cpu.read_int(self._M_dataplus__M_p_addr, 64) @property def len(self): + """ + :return int: length of string + """ return self._cpu.read_int(self._M_string_length_addr, 64) + @property + def is_local(self): + """ + :return bool: whether the string is stored in local buffer + """ + return self.c_str == self._M_local_buf_addr + + @property + def capacity(self): + """ + :return: The size of the storage capacity + """ + if self.is_local: + return self._S_local_capacity + else: + return self._cpu.read_int(self._M_allocated_capacity_addr, 64) + def basic_string_append_c_str(state, objref, s): """ @@ -223,3 +250,12 @@ def basic_string_append_c_str(state, objref, s): return b_string.star_this +def basic_string_capacity(state, objref): + """ + The size of the storage capacity currently allocated for the basic_string. + + Member type size_type is an unsigned integral type. + """ + cpu = state.cpu + b_string = basic_string_class(cpu, objref) + return b_string.capacity From a819fb2826cf3075ec7d415d0d8a17b3c99155ee Mon Sep 17 00:00:00 2001 From: Sonya Schriner Date: Thu, 23 Apr 2020 14:47:31 -0400 Subject: [PATCH 4/6] Separated string modeling into it's own file --- manticore/native/basic_string_models.py | 133 ++++++++++++++++++++++++ manticore/native/models.py | 120 +-------------------- 2 files changed, 134 insertions(+), 119 deletions(-) create mode 100644 manticore/native/basic_string_models.py diff --git a/manticore/native/basic_string_models.py b/manticore/native/basic_string_models.py new file mode 100644 index 000000000..2f54a3141 --- /dev/null +++ b/manticore/native/basic_string_models.py @@ -0,0 +1,133 @@ +""" +Models here are intended to be passed to :meth:`~manticore.native.state.State.invoke_model`, not invoked directly. +""" + +from .cpu.abstractcpu import ConcretizeArgument, Cpu +from ..core.smtlib import issymbolic +from ..core.smtlib.solver import Z3Solver +from ..core.smtlib.operators import ITEBV, ZEXTEND +from .models import _find_zero + +from dataclasses import dataclass, field + + +@dataclass +class basic_string_class: + _cpu: Cpu + addr: int + _M_dataplus__M_p_addr: int = field(init=False) + _M_string_length_addr: int = field(init=False) + _M_local_buf_addr: int = field(init=False) + _M_allocated_capacity_addr: int = field(init=False) + + def __post_init__(self): + """ + Represents basic_string object information + Naming conventions below preserve basic_string variables for easy reference. + See: https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/basic_string.h + """ + # FIXME: automate memory size info and remove the hardcoded memory values below + # FIXME: symbolic input completely unsupported - make sure concrete input functions correctly first + self._M_dataplus__M_p_addr = self.addr # address of c_str address + self._M_string_length_addr = self.addr + 8 # string length address + # These two values are contained in a union. + # See: https://github.com/gcc-mirror/gcc/blob/2930bb321794c241d8df5591a5bf447bf89c6e82/libstdc%2B%2B-v3/include/bits/basic_string.h#L171 + self._M_local_buf_addr = self.addr + 16 + self._M_allocated_capacity_addr = self.addr + 16 + self._S_local_capacity = 15 # 15 chars + '\000' + + print(f"Length = {self.len}\n{self.addr:016x}") + + def update_len(self, new_len): + """ + :param new_length: integer of desired new length + """ + self._cpu.write_int(self._M_string_length_addr, new_len, 64) + + def update_c_str(self, new_str): + """ + :param new_str: address of the start of new string + """ + self._cpu.write_int(self._M_dataplus__M_p_addr, new_str, 64) + + def resize(self, new_size): + """ + """ + return None + + @property + def star_this(self): + """ + :return *this: return dereferenced object + """ + return self._cpu.read_int(self.addr, 256) + + @property + def c_str(self): + """ + :return int: internal c_str address + """ + return self._cpu.read_int(self._M_dataplus__M_p_addr, 64) + + @property + def len(self): + """ + :return int: length of string + """ + return self._cpu.read_int(self._M_string_length_addr, 64) + + @property + def is_local(self): + """ + :return bool: whether the string is stored in local buffer + """ + return self.c_str == self._M_local_buf_addr + + @property + def capacity(self): + """ + :return: The size of the storage capacity + """ + if self.is_local: + return self._S_local_capacity + else: + return self._cpu.read_int(self._M_allocated_capacity_addr, 64) + + +def basic_string_append_c_str(state, objref, s): + """ + Extends the basic_string by appending additional characters at the end of its current value + + :param State state: current program state + :param int objref: Address of basic_string object (this) + :param int s: Address of char * string to append + :return: *this + :rtype: std::basic_string + """ + cpu = state.cpu + b_string = basic_string_class(cpu, objref) + # TODO: add support for when c_str() there is out of space + # TODO: implement capacity & resize then finish this + zero_idx = _find_zero(cpu, state.constraints, s) + for i in range(0, zero_idx): + src_addr = s + i + dst_addr = b_string.c_str + b_string.len + i + c = cpu.read_int(src_addr, 8) + print(i, ":", c) + cpu.write_int(dst_addr, c, 8) + new_len = zero_idx + b_string.len + print("New length", new_len) + b_string.update_len(new_len) + cpu.write_int(b_string.c_str + b_string.len, 0, 8) + return b_string.star_this + + +def basic_string_capacity(state, objref): + """ + The size of the storage capacity currently allocated for the basic_string. + + Member type size_type is an unsigned integral type. + """ + cpu = state.cpu + b_string = basic_string_class(cpu, objref) + return b_string.capacity diff --git a/manticore/native/models.py b/manticore/native/models.py index e471f3ecd..f4136f1be 100644 --- a/manticore/native/models.py +++ b/manticore/native/models.py @@ -2,12 +2,11 @@ Models here are intended to be passed to :meth:`~manticore.native.state.State.invoke_model`, not invoked directly. """ -from .cpu.abstractcpu import ConcretizeArgument, Cpu +from .cpu.abstractcpu import ConcretizeArgument from ..core.smtlib import issymbolic from ..core.smtlib.solver import Z3Solver from ..core.smtlib.operators import ITEBV, ZEXTEND -from dataclasses import dataclass, field VARIADIC_FUNC_ATTR = "_variadic" @@ -142,120 +141,3 @@ def strlen(state, s): ret = ITEBV(cpu.address_bit_size, byt == 0, offset, ret) return ret - - -@dataclass -class basic_string_class: - _cpu: Cpu - addr: int - _M_dataplus__M_p_addr: int = field(init=False) - _M_string_length_addr: int = field(init=False) - _M_local_buf_addr: int = field(init=False) - _M_allocated_capacity_addr: int = field(init=False) - - def __post_init__(self): - """ - Represents basic_string object information - Naming conventions below preserve basic_string variables for easy reference. - See: https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/basic_string.h - """ - # FIXME: automate memory size info and remove the hardcoded memory values below - # FIXME: symbolic input completely unsupported - make sure concrete input functions correctly first - self._M_dataplus__M_p_addr = self.addr # address of c_str address - self._M_string_length_addr = self.addr + 8 # string length address - # These two values are contained in a union. - # See: https://github.com/gcc-mirror/gcc/blob/2930bb321794c241d8df5591a5bf447bf89c6e82/libstdc%2B%2B-v3/include/bits/basic_string.h#L171 - self._M_local_buf_addr = self.addr + 16 - self._M_allocated_capacity_addr = self.addr + 16 - self._S_local_capacity = 15 # 15 chars + '\000' - - print(f"Length = {self.len}\n{self.addr:016x}") - - def update_len(self, new_len): - """ - :param new_length: integer of desired new length - """ - self._cpu.write_int(self._M_string_length_addr, new_len, 64) - - def update_c_str(self, new_str): - """ - :param new_str: address of the start of new string - """ - self._cpu.write_int(self._M_dataplus__M_p_addr, new_str, 64) - - @property - def star_this(self): - """ - :return *this: return dereferenced object - """ - return self._cpu.read_int(self.addr, 256) - - @property - def c_str(self): - """ - :return int: internal c_str address - """ - return self._cpu.read_int(self._M_dataplus__M_p_addr, 64) - - @property - def len(self): - """ - :return int: length of string - """ - return self._cpu.read_int(self._M_string_length_addr, 64) - - @property - def is_local(self): - """ - :return bool: whether the string is stored in local buffer - """ - return self.c_str == self._M_local_buf_addr - - @property - def capacity(self): - """ - :return: The size of the storage capacity - """ - if self.is_local: - return self._S_local_capacity - else: - return self._cpu.read_int(self._M_allocated_capacity_addr, 64) - - -def basic_string_append_c_str(state, objref, s): - """ - Extends the basic_string by appending additional characters at the end of its current value - - :param State state: current program state - :param int objref: Address of basic_string object (this) - :param int s: Address of char * string to append - :return: *this - :rtype: std::basic_string - """ - cpu = state.cpu - b_string = basic_string_class(cpu, objref) - # TODO: add support for when c_str() there is out of space - # TODO: implement capacity & resize then finish this - zero_idx = _find_zero(cpu, state.constraints, s) - for i in range(0, zero_idx): - src_addr = s + i - dst_addr = b_string.c_str + b_string.len + i - c = cpu.read_int(src_addr, 8) - print(i, ":", c) - cpu.write_int(dst_addr, c, 8) - new_len = zero_idx + b_string.len - print("New length", new_len) - b_string.update_len(new_len) - cpu.write_int(b_string.c_str + b_string.len, 0, 8) - return b_string.star_this - - -def basic_string_capacity(state, objref): - """ - The size of the storage capacity currently allocated for the basic_string. - - Member type size_type is an unsigned integral type. - """ - cpu = state.cpu - b_string = basic_string_class(cpu, objref) - return b_string.capacity From d2a3dcfdcafbfdf775f81e594780cd56d95203e9 Mon Sep 17 00:00:00 2001 From: Sonya Schriner Date: Thu, 23 Apr 2020 14:52:16 -0400 Subject: [PATCH 5/6] Removed trailing whitespace for codeclimate --- manticore/native/basic_string_models.py | 11 +++++++---- manticore/native/models.py | 1 - 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/manticore/native/basic_string_models.py b/manticore/native/basic_string_models.py index 2f54a3141..56794cd1f 100644 --- a/manticore/native/basic_string_models.py +++ b/manticore/native/basic_string_models.py @@ -10,7 +10,6 @@ from dataclasses import dataclass, field - @dataclass class basic_string_class: _cpu: Cpu @@ -22,7 +21,7 @@ class basic_string_class: def __post_init__(self): """ - Represents basic_string object information + Represents basic_string object information Naming conventions below preserve basic_string variables for easy reference. See: https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/include/bits/basic_string.h """ @@ -53,7 +52,11 @@ def update_c_str(self, new_str): def resize(self, new_size): """ """ - return None + allocate new mem + and copy to it + if not self.is_local: + delete the old memory + return @property def star_this(self): @@ -86,7 +89,7 @@ def is_local(self): @property def capacity(self): """ - :return: The size of the storage capacity + :return: The size of the storage capacity """ if self.is_local: return self._S_local_capacity diff --git a/manticore/native/models.py b/manticore/native/models.py index f4136f1be..4cbfa3951 100644 --- a/manticore/native/models.py +++ b/manticore/native/models.py @@ -7,7 +7,6 @@ from ..core.smtlib.solver import Z3Solver from ..core.smtlib.operators import ITEBV, ZEXTEND - VARIADIC_FUNC_ATTR = "_variadic" From 29e3d866370365632568dc28efef9a8ab80ccbf3 Mon Sep 17 00:00:00 2001 From: Sonya Schriner Date: Thu, 23 Apr 2020 16:17:53 -0400 Subject: [PATCH 6/6] Updated reserve moving to solve strcpy and malloc needs --- manticore/native/basic_string_models.py | 56 ++++++++++++++++++------- 1 file changed, 40 insertions(+), 16 deletions(-) diff --git a/manticore/native/basic_string_models.py b/manticore/native/basic_string_models.py index 56794cd1f..0bada0820 100644 --- a/manticore/native/basic_string_models.py +++ b/manticore/native/basic_string_models.py @@ -6,10 +6,11 @@ from ..core.smtlib import issymbolic from ..core.smtlib.solver import Z3Solver from ..core.smtlib.operators import ITEBV, ZEXTEND -from .models import _find_zero +from .models import _find_zero # , strcpy from dataclasses import dataclass, field + @dataclass class basic_string_class: _cpu: Cpu @@ -43,20 +44,29 @@ def update_len(self, new_len): """ self._cpu.write_int(self._M_string_length_addr, new_len, 64) - def update_c_str(self, new_str): + def reserve(self, new_size): """ - :param new_str: address of the start of new string + :param n: Planned length for the basic_string. + :return: none + + Note: the resulting string capacity may be equal or greater than n. """ - self._cpu.write_int(self._M_dataplus__M_p_addr, new_str, 64) + if new_size != self.capacity: + new_str = malloc(state, new_size) # TODO: this needs to be implemented in models + strcpy(state, new_str, self.c_str) # TODO: this needs to be implemented in models + if not self.is_local: + dealloc(state, self.c_str()) # TODO: this needs to be implemented in models + self._update_c_str(new_str) + self._update_capacity(new_size) - def resize(self, new_size): + def _update_c_str(self, new_str): """ + :param new_str: address of the start of new string """ - allocate new mem - and copy to it - if not self.is_local: - delete the old memory - return + self._cpu.write_int(self._M_dataplus__M_p_addr, new_str, 64) + + def _update_capacity(self, new_capacity): + self._cpu.write_int(self._M_allocated_capacity_addr, new_capacity, 64) @property def star_this(self): @@ -111,15 +121,17 @@ def basic_string_append_c_str(state, objref, s): b_string = basic_string_class(cpu, objref) # TODO: add support for when c_str() there is out of space # TODO: implement capacity & resize then finish this - zero_idx = _find_zero(cpu, state.constraints, s) + new_len = zero_idx + b_string.len + # if new_len > self.capacity: + # reserve(some_new_size) # FIXME: figure out what heuristic is used when the string is resized + zero_idx = _find_zero( + cpu, state.constraints, s + ) # FIXME this prob needs to be strlen but this should be determined when adding symbolic values for i in range(0, zero_idx): src_addr = s + i dst_addr = b_string.c_str + b_string.len + i c = cpu.read_int(src_addr, 8) - print(i, ":", c) cpu.write_int(dst_addr, c, 8) - new_len = zero_idx + b_string.len - print("New length", new_len) b_string.update_len(new_len) cpu.write_int(b_string.c_str + b_string.len, 0, 8) return b_string.star_this @@ -131,6 +143,18 @@ def basic_string_capacity(state, objref): Member type size_type is an unsigned integral type. """ - cpu = state.cpu - b_string = basic_string_class(cpu, objref) + b_string = basic_string_class(state.cpu, objref) return b_string.capacity + + +def basic_string_reserve(state, objref, size): + """ + Request a change in capacity + Requests that the string capacity be adapted to a planned change in size to a length of up to n characters. + """ + b_string = basic_string_class(state.cpu, objref) + # This function has no effect on the string length and cannot alter its content. + # Thus make sure the string is not shrunk less than it's current length + if size < basic_string.length: + size = self.length + b_string.reserve(size)