Skip to content

Commit

Permalink
Deal with uninitialized data sections in PE files and tackle some rec…
Browse files Browse the repository at this point in the history
…ursion issues (vivisect#622)
  • Loading branch information
rakuy0 authored Nov 16, 2023
1 parent 1567510 commit 6d6a5d9
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 23 deletions.
83 changes: 68 additions & 15 deletions envi/codeflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,17 @@ def __init__(self, mem, persist=False, exptable=True, recurse=True):
self._cf_recurse = recurse
self._cf_exptable = exptable
self._cf_blocks = []

self._cf_blocked = collections.OrderedDict()
self._cf_delaying = collections.defaultdict(set)
self._cf_delayed = collections.defaultdict(set)
self._calls_from = {}

self._dynamic_branch_handlers = []

def _cb_opcode(self, va, op, branches):
'''
Extend CodeFlowContext and implement this method to recieve
Extend CodeFlowContext and implement this method to receive
a callback for every newly discovered opcode.
'''
return branches
Expand All @@ -70,7 +76,7 @@ def _cb_noflow(self, va, tva):
'''
Implement this method to receive a callback when a given code
branch is skipped due to being in the noflow dictionary.
( likely due to prodedural branch to noreturn address )
( likely due to procedural branch to noreturn address )
'''
pass

Expand All @@ -89,8 +95,8 @@ def _cb_branchtable(self, tableva, ptrva, destva):

def _cb_dynamic_branch(self, va, op, bflags, branches):
'''
if codeflow finds a branch to a non-discrete value (eg. to a register)
we handle it here. by default, we simply track the dynamic branch in a global
if codeflow finds a branch to a non-discrete value (eg: to a register)
we handle it here. By default, we simply track the dynamic branch in a global
VaSet which is added to every workspace.
'''
'''
Expand Down Expand Up @@ -128,7 +134,7 @@ def addFunctionDef(self, fva, calls_from):

def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):
'''
Do code flow disassembly from the specified address. Returnes a list
Do code flow disassembly from the specified address. Returns a list
of the procedural branch targets discovered during code flow...
Set persist=True to store 'opdone' and never disassemble the same thing twice
Expand Down Expand Up @@ -179,7 +185,8 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

bva, bflags = branches.pop()

# look for dynamic branches (ie. branches which don't have a known target). assume at least one branch
# look for dynamic branches (ie. branches which don't have a known target).
# Assume at least one branch
if bva is None:
self._cb_dynamic_branch(va, op, bflags, branches)

Expand Down Expand Up @@ -217,6 +224,9 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):
if not self._mem.probeMemory(bva, 1, e_const.MM_EXEC):
continue

if self._mem.probeMemory(bva, 1, e_const.MM_UNINIT):
continue

if bflags & envi.BR_PROC:

# Record that the current code flow has a call from it
Expand All @@ -225,18 +235,18 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

if bva != nextva: # NOTE: avoid call 0 constructs

# Now we decend so we do deepest func callbacks first!
# Now we descend so we do deepest func callbacks first!
if self._cf_recurse:
# descend into functions, but make sure we don't descend into
# recursive functions
if bva in self._cf_blocks:
logger.debug("not recursing to function 0x%x (at 0x%x): it's already in analysis call path (ie. it called *this* func)",
logger.debug("not recursing to function 0x%x (at 0x%x): it's already in analysis call path (ie. it called *this* func)",
bva, va)
logger.debug("call path: \t" + ", ".join([hex(x) for x in self._cf_blocks]))
# the function that we want to make prodcedural
# the function that we want to make procedural
# called us so we can't call to make it procedural
# until it's done
cf_eps[bva] = bflags
cf_eps[bva] = (startva, bflags)
else:
logger.debug("descending into function 0x%x (from 0x%x)", bva, va)
self.addEntryPoint(bva, arch=bflags)
Expand All @@ -249,6 +259,17 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

# We only go up to procedural branches, not across
continue

# we're jumping to a function we're in the middle of
# it's effectively a call from, but we should block
# until the other finishes processing to avoid some...odd
# issues with noret detection
if bva in self._cf_blocks and op.iflags & envi.IF_BRANCH:
if self._cf_recurse and startva != bva:
self._cf_delayed[startva].add(bva)
self._cf_delaying[bva].add(startva)

continue
except Exception as e:
logger.warning("codeflow: %r", e, exc_info=True)

Expand All @@ -257,10 +278,23 @@ def addCodeFlow(self, va, arch=envi.ARCH_DEFAULT):

# remove our local blocks from global block stack
self._cf_blocks.pop()
while cf_eps:
fva, arch = cf_eps.popitem()
if not self._mem.isFunction(fva):
self.addEntryPoint(fva, arch=arch)
for fva, (pva, othrarch) in cf_eps.items():
if fva in self._cf_blocks:
self._cf_blocked[fva] = (pva, othrarch)
else:
if not self._mem.isFunction(fva):
self.addEntryPoint(fva, arch=othrarch)

fallback = collections.OrderedDict()
items = list(self._cf_blocked.items())
for fva, othrarch in items:
if fva not in self._cf_blocks and not self._mem.isFunction(fva):
self._funcs.pop(fva, None)
self._cf_blocked.pop(fva, None)
self.addEntryPoint(fva, arch=othrarch)
else:
fallback[fva] = arch
self._cf_blocked = fallback

return list(calls_from.keys())

Expand Down Expand Up @@ -291,7 +325,26 @@ def addEntryPoint(self, va, arch=envi.ARCH_DEFAULT):
# logger.debug('addEntryPoint(0x%x): calls_from: %r', va, calls_from)

# Finally, notify the callback of a new function
self._cb_function(va, {'CallsFrom': calls_from})
# we gotta hold some of these off for a bit
if va not in self._cf_delayed:
self._cb_function(va, {'CallsFrom': calls_from})
# remove this function from any blocking lists
if va in self._cf_delaying:
todo = []
for blocked in self._cf_delaying[va]:
self._cf_delayed[blocked].discard(va)
if len(self._cf_delayed[blocked]) == 0:
todo.append(blocked)

self._cf_delaying.pop(va, None)
for ova in todo:
self._cf_delayed.pop(ova, None)
calls = self._calls_from.pop(ova, {})
self._cb_function(ova, {'CallsFrom': calls})
else:
# stash these off for later
self._calls_from[va] = calls_from

return va

def flushFunction(self, fva):
Expand Down
1 change: 1 addition & 0 deletions envi/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
MM_WRITE = 0x2
MM_EXEC = 0x1
MM_SHARED = 0x08
MM_UNINIT = 0x10

MM_READ_WRITE = MM_READ | MM_WRITE
MM_READ_EXEC = MM_READ | MM_EXEC
Expand Down
6 changes: 5 additions & 1 deletion vivisect/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@
import envi
import envi.exc as e_exc
import envi.bits as e_bits
import envi.common as e_common
import envi.memory as e_mem
import envi.const as e_const
import envi.common as e_common
import envi.config as e_config
import envi.bytesig as e_bytesig
import envi.symstore.resolver as e_resolv
Expand Down Expand Up @@ -964,6 +965,9 @@ def findPointers(self, cache=True):

for mva, msize, mperm, mname in self.getMemoryMaps():

if mperm & e_const.MM_UNINIT:
continue

offset, bytes = self.getByteDef(mva)
maxsize = len(bytes) - size

Expand Down
10 changes: 10 additions & 0 deletions vivisect/analysis/generic/codeblocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import collections

import envi
import envi.const as e_const

from vivisect.const import REF_CODE, LOC_POINTER, LOC_OP

Expand Down Expand Up @@ -97,6 +98,12 @@ def analyzeFunction(vw, funcva):
if rflags & envi.BR_DEREF:
continue

mmap = vw.getMemoryMap(tova)
if mmap:
mva, msize, mperm, mname = mmap
if mperm & e_const.MM_UNINIT:
continue

branch = True
todo.append(tova)

Expand Down Expand Up @@ -136,6 +143,9 @@ def analyzeFunction(vw, funcva):
# (like during dynamic branch analysis)
try:
bsize = blocks[bva]
if bsize == 0:
continue

tmpcb = vw.getCodeBlock(bva)
# sometimes codeblocks can be deleted if owned by multiple functions
if bva not in oldblocks or tmpcb is None:
Expand Down
2 changes: 2 additions & 0 deletions vivisect/analysis/generic/funcentries.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def analyze(vw):
# Segment permissions check for likely code stuff at all
if not mapflags & e_const.MM_EXEC:
continue
if mapflags & e_const.MM_UNINIT:
continue

i = 0
maxsize = mapsize - 4
Expand Down
8 changes: 4 additions & 4 deletions vivisect/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,8 +728,8 @@ def _mcb_WorkspaceServer(self, name, wshost):
def _fmcb_Thunk(self, funcva, th, thunkname):
# If the function being made a thunk is registered
# in NoReturnApis, update codeflow...
if self.getMeta('NoReturnApis').get( thunkname.lower() ):
self.cfctx.addNoReturnAddr( funcva )
if self.getMeta('NoReturnApis').get(thunkname.lower()):
self.cfctx.addNoReturnAddr(funcva)

def _fmcb_CallsFrom(self, funcva, th, callsfrom):
for va in callsfrom:
Expand Down Expand Up @@ -823,7 +823,7 @@ def _cb_function(self, fva, fmeta):

fname = vw.getName( fva )
if vw.getMeta('NoReturnApis').get( fname.lower() ):
self._cf_noret[ fva ] = True
self._cf_noret[fva] = True

if len( vw.getFunctionBlocks( fva )) == 1:
return
Expand All @@ -833,7 +833,7 @@ def _cb_function(self, fva, fmeta):
va = lva[0]
ctup = vw.getCodeBlock(va)
if ctup and fva == ctup[2] and vw.getFunctionMeta(fva, 'BlockCount', default=0) == 1:
self._cf_noret[ fva ] = True
self._cf_noret[fva] = True
break

def _cb_branchtable(self, tablebase, tableva, destva):
Expand Down
2 changes: 2 additions & 0 deletions vivisect/parsers/pe.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,8 @@ def loadPeIntoWorkspace(vw, pe, filename=None, baseaddr=None):
mapflags |= e_const.MM_EXEC
if chars & PE.IMAGE_SCN_CNT_CODE:
mapflags |= e_const.MM_EXEC
if chars & PE.IMAGE_SCN_CNT_UNINITIALIZED_DATA:
mapflags |= e_const.MM_UNINIT

secrva = sec.VirtualAddress
secvsize = sec.VirtualSize
Expand Down
1 change: 0 additions & 1 deletion vivisect/tests/testvivisect.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,6 @@ def test_cli_xrefs(self):
self.assertIn("From: 0x0804fe94, To: 0x080490d0, Type: Code, Flags: 0x00010001\n", output)
self.chgrp_vw.canvas.clearCanvas()


def test_loc_types(self):
'''
Test that we have data consistency in locations
Expand Down
14 changes: 12 additions & 2 deletions vivisect/tools/graphutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
Some glue code to do workspace related things based on visgraph
'''
import time
import envi
import logging
import vivisect
import collections

import envi
import envi.const as e_const

import visgraph.pathcore as vg_pathcore
import visgraph.graphcore as vg_graphcore

import vivisect

xrskip = envi.BR_PROC | envi.BR_DEREF

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -475,6 +479,12 @@ def buildFunctionGraph(vw, fva, revloop=False, g=None):
if xrflags & xrskip:
continue

mmap = vw.getMemoryMap(xrto)
if mmap:
mva, msize, mperm, mname = mmap
if mperm & e_const.MM_UNINIT:
continue

if not g.hasNode(xrto):
cblock = vw.getCodeBlock(xrto)
if cblock is None:
Expand Down

0 comments on commit 6d6a5d9

Please sign in to comment.