From 6d6a5d946e2198de47099878373adf6eb64617b0 Mon Sep 17 00:00:00 2001 From: James Gross <45212823+rakuy0@users.noreply.github.com> Date: Thu, 16 Nov 2023 15:19:10 -0500 Subject: [PATCH] Deal with uninitialized data sections in PE files and tackle some recursion issues (#622) --- envi/codeflow.py | 83 +++++++++++++++++++----- envi/const.py | 1 + vivisect/__init__.py | 6 +- vivisect/analysis/generic/codeblocks.py | 10 +++ vivisect/analysis/generic/funcentries.py | 2 + vivisect/base.py | 8 +-- vivisect/parsers/pe.py | 2 + vivisect/tests/testvivisect.py | 1 - vivisect/tools/graphutil.py | 14 +++- 9 files changed, 104 insertions(+), 23 deletions(-) diff --git a/envi/codeflow.py b/envi/codeflow.py index 795cbd456..db287d80f 100644 --- a/envi/codeflow.py +++ b/envi/codeflow.py @@ -49,11 +49,17 @@ def __init__(self, mem, persist=False, exptable=True, recurse=True): self._cf_recurse = recurse self._cf_exptable = exptable self._cf_blocks = [] + + self._cf_blocked = collections.OrderedDict() + self._cf_delaying = collections.defaultdict(set) + self._cf_delayed = collections.defaultdict(set) + self._calls_from = {} + self._dynamic_branch_handlers = [] def _cb_opcode(self, va, op, branches): ''' - Extend CodeFlowContext and implement this method to recieve + Extend CodeFlowContext and implement this method to receive a callback for every newly discovered opcode. ''' return branches @@ -70,7 +76,7 @@ def _cb_noflow(self, va, tva): ''' Implement this method to receive a callback when a given code branch is skipped due to being in the noflow dictionary. - ( likely due to prodedural branch to noreturn address ) + ( likely due to procedural branch to noreturn address ) ''' pass @@ -89,8 +95,8 @@ def _cb_branchtable(self, tableva, ptrva, destva): def _cb_dynamic_branch(self, va, op, bflags, branches): ''' - if codeflow finds a branch to a non-discrete value (eg. to a register) - we handle it here. by default, we simply track the dynamic branch in a global + if codeflow finds a branch to a non-discrete value (eg: to a register) + we handle it here. By default, we simply track the dynamic branch in a global VaSet which is added to every workspace. ''' ''' @@ -128,7 +134,7 @@ def addFunctionDef(self, fva, calls_from): def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT): ''' - Do code flow disassembly from the specified address. Returnes a list + Do code flow disassembly from the specified address. Returns a list of the procedural branch targets discovered during code flow... Set persist=True to store 'opdone' and never disassemble the same thing twice @@ -179,7 +185,8 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT): bva, bflags = branches.pop() - # look for dynamic branches (ie. branches which don't have a known target). assume at least one branch + # look for dynamic branches (ie. branches which don't have a known target). + # Assume at least one branch if bva is None: self._cb_dynamic_branch(va, op, bflags, branches) @@ -217,6 +224,9 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT): if not self._mem.probeMemory(bva, 1, e_const.MM_EXEC): continue + if self._mem.probeMemory(bva, 1, e_const.MM_UNINIT): + continue + if bflags & envi.BR_PROC: # Record that the current code flow has a call from it @@ -225,18 +235,18 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT): if bva != nextva: # NOTE: avoid call 0 constructs - # Now we decend so we do deepest func callbacks first! + # Now we descend so we do deepest func callbacks first! if self._cf_recurse: # descend into functions, but make sure we don't descend into # recursive functions if bva in self._cf_blocks: - logger.debug("not recursing to function 0x%x (at 0x%x): it's already in analysis call path (ie. it called *this* func)", + logger.debug("not recursing to function 0x%x (at 0x%x): it's already in analysis call path (ie. it called *this* func)", bva, va) logger.debug("call path: \t" + ", ".join([hex(x) for x in self._cf_blocks])) - # the function that we want to make prodcedural + # the function that we want to make procedural # called us so we can't call to make it procedural # until it's done - cf_eps[bva] = bflags + cf_eps[bva] = (startva, bflags) else: logger.debug("descending into function 0x%x (from 0x%x)", bva, va) self.addEntryPoint(bva, arch=bflags) @@ -249,6 +259,17 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT): # We only go up to procedural branches, not across continue + + # we're jumping to a function we're in the middle of + # it's effectively a call from, but we should block + # until the other finishes processing to avoid some...odd + # issues with noret detection + if bva in self._cf_blocks and op.iflags & envi.IF_BRANCH: + if self._cf_recurse and startva != bva: + self._cf_delayed[startva].add(bva) + self._cf_delaying[bva].add(startva) + + continue except Exception as e: logger.warning("codeflow: %r", e, exc_info=True) @@ -257,10 +278,23 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT): # remove our local blocks from global block stack self._cf_blocks.pop() - while cf_eps: - fva, arch = cf_eps.popitem() - if not self._mem.isFunction(fva): - self.addEntryPoint(fva, arch=arch) + for fva, (pva, othrarch) in cf_eps.items(): + if fva in self._cf_blocks: + self._cf_blocked[fva] = (pva, othrarch) + else: + if not self._mem.isFunction(fva): + self.addEntryPoint(fva, arch=othrarch) + + fallback = collections.OrderedDict() + items = list(self._cf_blocked.items()) + for fva, othrarch in items: + if fva not in self._cf_blocks and not self._mem.isFunction(fva): + self._funcs.pop(fva, None) + self._cf_blocked.pop(fva, None) + self.addEntryPoint(fva, arch=othrarch) + else: + fallback[fva] = arch + self._cf_blocked = fallback return list(calls_from.keys()) @@ -291,7 +325,26 @@ def addEntryPoint(self, va, arch=envi.ARCH_DEFAULT): # logger.debug('addEntryPoint(0x%x): calls_from: %r', va, calls_from) # Finally, notify the callback of a new function - self._cb_function(va, {'CallsFrom': calls_from}) + # we gotta hold some of these off for a bit + if va not in self._cf_delayed: + self._cb_function(va, {'CallsFrom': calls_from}) + # remove this function from any blocking lists + if va in self._cf_delaying: + todo = [] + for blocked in self._cf_delaying[va]: + self._cf_delayed[blocked].discard(va) + if len(self._cf_delayed[blocked]) == 0: + todo.append(blocked) + + self._cf_delaying.pop(va, None) + for ova in todo: + self._cf_delayed.pop(ova, None) + calls = self._calls_from.pop(ova, {}) + self._cb_function(ova, {'CallsFrom': calls}) + else: + # stash these off for later + self._calls_from[va] = calls_from + return va def flushFunction(self, fva): diff --git a/envi/const.py b/envi/const.py index 28d146ab8..d12412dba 100644 --- a/envi/const.py +++ b/envi/const.py @@ -18,6 +18,7 @@ MM_WRITE = 0x2 MM_EXEC = 0x1 MM_SHARED = 0x08 +MM_UNINIT = 0x10 MM_READ_WRITE = MM_READ | MM_WRITE MM_READ_EXEC = MM_READ | MM_EXEC diff --git a/vivisect/__init__.py b/vivisect/__init__.py index 8214b1914..a6d8e1a76 100644 --- a/vivisect/__init__.py +++ b/vivisect/__init__.py @@ -21,8 +21,9 @@ import envi import envi.exc as e_exc import envi.bits as e_bits -import envi.common as e_common import envi.memory as e_mem +import envi.const as e_const +import envi.common as e_common import envi.config as e_config import envi.bytesig as e_bytesig import envi.symstore.resolver as e_resolv @@ -964,6 +965,9 @@ def findPointers(self, cache=True): for mva, msize, mperm, mname in self.getMemoryMaps(): + if mperm & e_const.MM_UNINIT: + continue + offset, bytes = self.getByteDef(mva) maxsize = len(bytes) - size diff --git a/vivisect/analysis/generic/codeblocks.py b/vivisect/analysis/generic/codeblocks.py index 707e835e6..70a85a547 100644 --- a/vivisect/analysis/generic/codeblocks.py +++ b/vivisect/analysis/generic/codeblocks.py @@ -8,6 +8,7 @@ import collections import envi +import envi.const as e_const from vivisect.const import REF_CODE, LOC_POINTER, LOC_OP @@ -97,6 +98,12 @@ def analyzeFunction(vw, funcva): if rflags & envi.BR_DEREF: continue + mmap = vw.getMemoryMap(tova) + if mmap: + mva, msize, mperm, mname = mmap + if mperm & e_const.MM_UNINIT: + continue + branch = True todo.append(tova) @@ -136,6 +143,9 @@ def analyzeFunction(vw, funcva): # (like during dynamic branch analysis) try: bsize = blocks[bva] + if bsize == 0: + continue + tmpcb = vw.getCodeBlock(bva) # sometimes codeblocks can be deleted if owned by multiple functions if bva not in oldblocks or tmpcb is None: diff --git a/vivisect/analysis/generic/funcentries.py b/vivisect/analysis/generic/funcentries.py index 180372cfe..fdc2ba93c 100644 --- a/vivisect/analysis/generic/funcentries.py +++ b/vivisect/analysis/generic/funcentries.py @@ -31,6 +31,8 @@ def analyze(vw): # Segment permissions check for likely code stuff at all if not mapflags & e_const.MM_EXEC: continue + if mapflags & e_const.MM_UNINIT: + continue i = 0 maxsize = mapsize - 4 diff --git a/vivisect/base.py b/vivisect/base.py index 394d9b019..f191c83c6 100644 --- a/vivisect/base.py +++ b/vivisect/base.py @@ -728,8 +728,8 @@ def _mcb_WorkspaceServer(self, name, wshost): def _fmcb_Thunk(self, funcva, th, thunkname): # If the function being made a thunk is registered # in NoReturnApis, update codeflow... - if self.getMeta('NoReturnApis').get( thunkname.lower() ): - self.cfctx.addNoReturnAddr( funcva ) + if self.getMeta('NoReturnApis').get(thunkname.lower()): + self.cfctx.addNoReturnAddr(funcva) def _fmcb_CallsFrom(self, funcva, th, callsfrom): for va in callsfrom: @@ -823,7 +823,7 @@ def _cb_function(self, fva, fmeta): fname = vw.getName( fva ) if vw.getMeta('NoReturnApis').get( fname.lower() ): - self._cf_noret[ fva ] = True + self._cf_noret[fva] = True if len( vw.getFunctionBlocks( fva )) == 1: return @@ -833,7 +833,7 @@ def _cb_function(self, fva, fmeta): va = lva[0] ctup = vw.getCodeBlock(va) if ctup and fva == ctup[2] and vw.getFunctionMeta(fva, 'BlockCount', default=0) == 1: - self._cf_noret[ fva ] = True + self._cf_noret[fva] = True break def _cb_branchtable(self, tablebase, tableva, destva): diff --git a/vivisect/parsers/pe.py b/vivisect/parsers/pe.py index f55547bfa..11ca1a023 100644 --- a/vivisect/parsers/pe.py +++ b/vivisect/parsers/pe.py @@ -286,6 +286,8 @@ def loadPeIntoWorkspace(vw, pe, filename=None, baseaddr=None): mapflags |= e_const.MM_EXEC if chars & PE.IMAGE_SCN_CNT_CODE: mapflags |= e_const.MM_EXEC + if chars & PE.IMAGE_SCN_CNT_UNINITIALIZED_DATA: + mapflags |= e_const.MM_UNINIT secrva = sec.VirtualAddress secvsize = sec.VirtualSize diff --git a/vivisect/tests/testvivisect.py b/vivisect/tests/testvivisect.py index c9a1fe1fd..3cfa43933 100644 --- a/vivisect/tests/testvivisect.py +++ b/vivisect/tests/testvivisect.py @@ -373,7 +373,6 @@ def test_cli_xrefs(self): self.assertIn("From: 0x0804fe94, To: 0x080490d0, Type: Code, Flags: 0x00010001\n", output) self.chgrp_vw.canvas.clearCanvas() - def test_loc_types(self): ''' Test that we have data consistency in locations diff --git a/vivisect/tools/graphutil.py b/vivisect/tools/graphutil.py index b0e556ad1..1a1ecb286 100644 --- a/vivisect/tools/graphutil.py +++ b/vivisect/tools/graphutil.py @@ -3,13 +3,17 @@ Some glue code to do workspace related things based on visgraph ''' import time -import envi import logging -import vivisect import collections + +import envi +import envi.const as e_const + import visgraph.pathcore as vg_pathcore import visgraph.graphcore as vg_graphcore +import vivisect + xrskip = envi.BR_PROC | envi.BR_DEREF logger = logging.getLogger(__name__) @@ -475,6 +479,12 @@ def buildFunctionGraph(vw, fva, revloop=False, g=None): if xrflags & xrskip: continue + mmap = vw.getMemoryMap(xrto) + if mmap: + mva, msize, mperm, mname = mmap + if mperm & e_const.MM_UNINIT: + continue + if not g.hasNode(xrto): cblock = vw.getCodeBlock(xrto) if cblock is None: