Skip to content

Commit

Permalink
Merge branch 'unittest-automation'
Browse files Browse the repository at this point in the history
  • Loading branch information
christian-intra2net committed Dec 22, 2023
2 parents 859a3c6 + e9d2543 commit f827e31
Show file tree
Hide file tree
Showing 28 changed files with 414 additions and 171 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/unittests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: Python package

on:
push:
branches: [master]
pull_request:
branches: [master]

jobs:
check:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ["ubuntu-latest", "windows-latest", "macos-latest"]
python-version: ["2.x", "3.x", "pypy-3.9"]
include:
- python-version: 3.x
runlint: 1

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -c "import sys; import platform; print(sys.version); print(sys.platform); print(platform.python_implementation()); print(platform.system())"
cat requirements.txt
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pylint
- name: Run pylint
if: ${{ matrix.runlint }}
run: pylint -E --ignore=thirdparty oletools tests
- name: Run unittests
run: python -m unittest discover -f
2 changes: 1 addition & 1 deletion oletools/common/io_encoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
if PY3:
from builtins import open as builtin_open
else:
from __builtin__ import open as builtin_open
from __builtin__ import open as builtin_open # pylint: disable=import-error

# -- CONSTANTS ----------------------------------------------------------------
#: encoding to use for redirection if no good encoding can be found
Expand Down
37 changes: 32 additions & 5 deletions oletools/common/log_helper/_json_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,52 @@
class JsonFormatter(logging.Formatter):
"""
Format every message to be logged as a JSON object
Uses the standard :py:class:`logging.Formatter` with standard arguments
to do the actual formatting, could save and use a user-supplied formatter
instead.
"""
_is_first_line = True

def __init__(self, other_logger_has_first_line=False):
if other_logger_has_first_line:
self._is_first_line = False
self.msg_formatter = logging.Formatter() # could adjust this

def format(self, record):
"""
Since we don't buffer messages, we always prepend messages with a comma to make
the output JSON-compatible. The only exception is when printing the first line,
so we need to keep track of it.
We assume that all input comes from the OletoolsLoggerAdapter which
ensures that there is a `type` field in the record. Otherwise will have
to add a try-except around the access to `record.type`.
The actual conversion from :py:class:`logging.LogRecord` to a text message
(i.e. %-formatting, adding exception information, etc.) is delegated to the
standard :py:class:`logging.Formatter.
The dumped json structure contains fields `msg` with the formatted message,
`level` with the log-level of the message and `type`, which is created by
:py:class:`oletools.common.log_helper.OletoolsLoggerAdapter` or added here
(for input from e.g. captured warnings, third-party libraries)
"""
json_dict = dict(msg=record.msg.replace('\n', ' '), level=record.levelname)
json_dict['type'] = record.type
json_dict = dict(msg='', level='', type='')
try:
msg = self.msg_formatter.format(record)
json_dict['msg'] = msg.replace('\n', ' ')
json_dict['level'] = record.levelname
json_dict['type'] = record.type
except AttributeError: # most probably: record has no "type" field
if record.name == 'py.warnings': # this is from python's warning-capture logger
json_dict['type'] = 'warning'
else:
json_dict['type'] = 'msg' # message of unknown origin
except Exception as exc:
try:
json_dict['msg'] = "Ignore {0} when formatting '{1}': {2}".format(type(exc), record.msg, exc)
except Exception as exc2:
json_dict['msg'] = 'Caught {0} in logging'.format(str(exc2))
json_dict['type'] = 'log-warning'
json_dict['level'] = 'warning'

formatted_message = ' ' + json.dumps(json_dict)

if self._is_first_line:
Expand Down
12 changes: 11 additions & 1 deletion oletools/common/log_helper/_logger_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ class OletoolsLoggerAdapter(logging.LoggerAdapter):
Adapter class for all loggers returned by the logging module.
"""
_json_enabled = None
_is_warn_logger = False # this is always False

def print_str(self, message, **kwargs):
"""
Expand Down Expand Up @@ -44,7 +45,10 @@ def process(self, msg, kwargs):
kwargs['extra']['type'] = kwargs['type']
del kwargs['type'] # downstream loggers cannot deal with this
if 'type' not in kwargs['extra']:
kwargs['extra']['type'] = 'msg' # type will be added to LogRecord
if self._is_warn_logger:
kwargs['extra']['type'] = 'warning' # this will add field
else:
kwargs['extra']['type'] = 'msg' # 'type' to LogRecord
return msg, kwargs

def set_json_enabled_function(self, json_enabled):
Expand All @@ -53,6 +57,12 @@ def set_json_enabled_function(self, json_enabled):
"""
self._json_enabled = json_enabled

def set_warnings_logger(self):
"""Make this the logger for warnings"""
# create a object attribute that shadows the class attribute which is
# always False
self._is_warn_logger = True

def level(self):
"""Return current level of logger."""
return self.logger.level
Expand Down
6 changes: 6 additions & 0 deletions oletools/common/log_helper/log_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@ def enable_logging(self, use_json=False, level='warning', log_format=DEFAULT_MES
self._use_json = use_json
sys.excepthook = self._get_except_hook(sys.excepthook)

# make sure warnings do not mess up our output
logging.captureWarnings(True)
warn_logger = self.get_or_create_silent_logger('py.warnings')
warn_logger.set_warnings_logger()

# since there could be loggers already created we go through all of them
# and set their levels to 0 so they will use the root logger's level
for name in self._all_names:
Expand All @@ -174,6 +179,7 @@ def end_logging(self):

# end logging
self._all_names = set()
logging.captureWarnings(False)
logging.shutdown()

# end json list
Expand Down
2 changes: 2 additions & 0 deletions oletools/doc/Contribute.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ to **send feedback**.

The code is available in [a repository on GitHub](https://github.com/decalage2/oletools).
You may use it to **submit enhancements** using forks and pull requests.
When submitting a PR, GitHub will automatically check that unittests pass and
`pylint -E` does not report anything for the code files you changed.

--------------------------------------------------------------------------

Expand Down
8 changes: 4 additions & 4 deletions oletools/mraptor_milter.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@

# --- IMPORTS ----------------------------------------------------------------

import Milter
import Milter # not part of requirements, therefore: # pylint: disable=import-error
import io
import time
import email
Expand All @@ -78,7 +78,7 @@
import logging
import logging.handlers
import datetime
import StringIO
import StringIO # not part of requirements, therefore: # pylint: disable=import-error

from socket import AF_INET6

Expand All @@ -96,7 +96,7 @@

from oletools import olevba, mraptor

from Milter.utils import parse_addr
from Milter.utils import parse_addr # not part of requirements, therefore: # pylint: disable=import-error

from zipfile import is_zipfile

Expand Down Expand Up @@ -389,7 +389,7 @@ def main():

# Using daemonize:
# See http://daemonize.readthedocs.io/en/latest/
from daemonize import Daemonize
from daemonize import Daemonize # not part of requirements, therefore: # pylint: disable=import-error
daemon = Daemonize(app="mraptor_milter", pid=PIDFILE, action=main)
daemon.start()

Expand Down
92 changes: 46 additions & 46 deletions oletools/msodde.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,69 +149,69 @@
# switches_with_args, switches_without_args, format_switches)
FIELD_BLACKLIST = (
# date and time:
('CREATEDATE', 0, 0, '', 'hs', 'datetime'), # pylint: disable=bad-whitespace
('DATE', 0, 0, '', 'hls', 'datetime'), # pylint: disable=bad-whitespace
('EDITTIME', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
('PRINTDATE', 0, 0, '', 'hs', 'datetime'), # pylint: disable=bad-whitespace
('SAVEDATE', 0, 0, '', 'hs', 'datetime'), # pylint: disable=bad-whitespace
('TIME', 0, 0, '', '', 'datetime'), # pylint: disable=bad-whitespace
('CREATEDATE', 0, 0, '', 'hs', 'datetime'),
('DATE', 0, 0, '', 'hls', 'datetime'),
('EDITTIME', 0, 0, '', '', 'numeric'),
('PRINTDATE', 0, 0, '', 'hs', 'datetime'),
('SAVEDATE', 0, 0, '', 'hs', 'datetime'),
('TIME', 0, 0, '', '', 'datetime'),
# exclude document automation (we hate the "auto" in "automation")
# (COMPARE, DOCVARIABLE, GOTOBUTTON, IF, MACROBUTTON, PRINT)
# document information
('AUTHOR', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('COMMENTS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('DOCPROPERTY', 1, 0, '', '', 'string/numeric/datetime'), # pylint: disable=bad-whitespace
('FILENAME', 0, 0, '', 'p', 'string'), # pylint: disable=bad-whitespace
('FILESIZE', 0, 0, '', 'km', 'numeric'), # pylint: disable=bad-whitespace
('KEYWORDS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('LASTSAVEDBY', 0, 0, '', '', 'string'), # pylint: disable=bad-whitespace
('NUMCHARS', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
('NUMPAGES', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
('NUMWORDS', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
('SUBJECT', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('TEMPLATE', 0, 0, '', 'p', 'string'), # pylint: disable=bad-whitespace
('TITLE', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('AUTHOR', 0, 1, '', '', 'string'),
('COMMENTS', 0, 1, '', '', 'string'),
('DOCPROPERTY', 1, 0, '', '', 'string/numeric/datetime'),
('FILENAME', 0, 0, '', 'p', 'string'),
('FILESIZE', 0, 0, '', 'km', 'numeric'),
('KEYWORDS', 0, 1, '', '', 'string'),
('LASTSAVEDBY', 0, 0, '', '', 'string'),
('NUMCHARS', 0, 0, '', '', 'numeric'),
('NUMPAGES', 0, 0, '', '', 'numeric'),
('NUMWORDS', 0, 0, '', '', 'numeric'),
('SUBJECT', 0, 1, '', '', 'string'),
('TEMPLATE', 0, 0, '', 'p', 'string'),
('TITLE', 0, 1, '', '', 'string'),
# equations and formulas
# exlude '=' formulae because they have different syntax (and can be bad)
('ADVANCE', 0, 0, 'dlruxy', '', ''), # pylint: disable=bad-whitespace
('SYMBOL', 1, 0, 'fs', 'ahju', ''), # pylint: disable=bad-whitespace
('ADVANCE', 0, 0, 'dlruxy', '', ''),
('SYMBOL', 1, 0, 'fs', 'ahju', ''),
# form fields
('FORMCHECKBOX', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
('FORMDROPDOWN', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
('FORMTEXT', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
('FORMCHECKBOX', 0, 0, '', '', ''),
('FORMDROPDOWN', 0, 0, '', '', ''),
('FORMTEXT', 0, 0, '', '', ''),
# index and tables
('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''), # pylint: disable=bad-whitespace
('INDEX', 0, 0, 'bcdefghklpsz', 'ry', ''),
# exlude RD since that imports data from other files
('TA', 0, 0, 'clrs', 'bi', ''), # pylint: disable=bad-whitespace
('TC', 1, 0, 'fl', 'n', ''), # pylint: disable=bad-whitespace
('TOA', 0, 0, 'bcdegls', 'fhp', ''), # pylint: disable=bad-whitespace
('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''), # pylint: disable=bad-whitespace
('XE', 1, 0, 'frty', 'bi', ''), # pylint: disable=bad-whitespace
('TA', 0, 0, 'clrs', 'bi', ''),
('TC', 1, 0, 'fl', 'n', ''),
('TOA', 0, 0, 'bcdegls', 'fhp', ''),
('TOC', 0, 0, 'abcdflnopst', 'huwxz', ''),
('XE', 1, 0, 'frty', 'bi', ''),
# links and references
# exclude AUTOTEXT and AUTOTEXTLIST since we do not like stuff with 'AUTO'
('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''), # pylint: disable=bad-whitespace
('CITATION', 1, 0, 'lfspvm', 'nty', ''), # pylint: disable=bad-whitespace
('BIBLIOGRAPHY', 0, 0, 'lfm', '', ''),
('CITATION', 1, 0, 'lfspvm', 'nty', ''),
# exclude HYPERLINK since we are allergic to URLs
# exclude INCLUDEPICTURE and INCLUDETEXT (other file or maybe even URL?)
# exclude LINK and REF (could reference other files)
('NOTEREF', 1, 0, '', 'fhp', ''), # pylint: disable=bad-whitespace
('PAGEREF', 1, 0, '', 'hp', ''), # pylint: disable=bad-whitespace
('QUOTE', 1, 0, '', '', 'datetime'), # pylint: disable=bad-whitespace
('STYLEREF', 1, 0, '', 'lnprtw', ''), # pylint: disable=bad-whitespace
('NOTEREF', 1, 0, '', 'fhp', ''),
('PAGEREF', 1, 0, '', 'hp', ''),
('QUOTE', 1, 0, '', '', 'datetime'),
('STYLEREF', 1, 0, '', 'lnprtw', ''),
# exclude all Mail Merge commands since they import data from other files
# (ADDRESSBLOCK, ASK, COMPARE, DATABASE, FILLIN, GREETINGLINE, IF,
# MERGEFIELD, MERGEREC, MERGESEQ, NEXT, NEXTIF, SET, SKIPIF)
# Numbering
('LISTNUM', 0, 1, 'ls', '', ''), # pylint: disable=bad-whitespace
('PAGE', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
('REVNUM', 0, 0, '', '', ''), # pylint: disable=bad-whitespace
('SECTION', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
('SECTIONPAGES', 0, 0, '', '', 'numeric'), # pylint: disable=bad-whitespace
('SEQ', 1, 1, 'rs', 'chn', 'numeric'), # pylint: disable=bad-whitespace
# user information # pylint: disable=bad-whitespace
('USERADDRESS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('USERINITIALS', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('USERNAME', 0, 1, '', '', 'string'), # pylint: disable=bad-whitespace
('LISTNUM', 0, 1, 'ls', '', ''),
('PAGE', 0, 0, '', '', 'numeric'),
('REVNUM', 0, 0, '', '', ''),
('SECTION', 0, 0, '', '', 'numeric'),
('SECTIONPAGES', 0, 0, '', '', 'numeric'),
('SEQ', 1, 1, 'rs', 'chn', 'numeric'),
# user information
('USERADDRESS', 0, 1, '', '', 'string'),
('USERINITIALS', 0, 1, '', '', 'string'),
('USERNAME', 0, 1, '', '', 'string'),
)

FIELD_DDE_REGEX = re.compile(r'^\s*dde(auto)?\s+', re.I)
Expand Down
2 changes: 1 addition & 1 deletion oletools/oleobj.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def enable_logging():
NULL_CHAR = '\x00'
else:
# Python 3.x
NULL_CHAR = 0 # pylint: disable=redefined-variable-type
NULL_CHAR = 0
xrange = range # pylint: disable=redefined-builtin, invalid-name

OOXML_RELATIONSHIP_TAG = '{http://schemas.openxmlformats.org/package/2006/relationships}Relationship'
Expand Down
2 changes: 1 addition & 1 deletion oletools/olevba.py
Original file line number Diff line number Diff line change
Expand Up @@ -3087,7 +3087,7 @@ def open_ppt(self):
log.info('Check whether OLE file is PPT')
try:
ppt = ppt_parser.PptParser(self.ole_file, fast_fail=True)
for vba_data in ppt.iter_vba_data():
for vba_data in ppt.iter_vba_data(): # pylint: disable=no-value-for-parameter
self.append_subfile(None, vba_data, container='PptParser')
log.info('File is PPT')
self.ole_file.close() # just in case
Expand Down
2 changes: 1 addition & 1 deletion oletools/ooxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def debug_str(elem):
def isstr(some_var):
""" version-independent test for isinstance(some_var, (str, unicode)) """
if sys.version_info.major == 2:
return isinstance(some_var, basestring) # true for str and unicode
return isinstance(some_var, basestring) # true for str and unicode # pylint: disable=undefined-variable
return isinstance(some_var, str) # there is no unicode


Expand Down
14 changes: 7 additions & 7 deletions oletools/ppt_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1377,7 +1377,7 @@ def parse_document_persist_object(self, stream):
# first identified in step 3 of Part 1, that is, the UserEditAtom
# record closest to the end of the stream.
if self.persist_object_directory is None:
self.parse_persist_object_directory()
self.parse_persist_object_directory() # pylint: disable=no-value-for-parameter

# Step 2: Lookup the value of the docPersistIdRef field in the persist
# object directory constructed in step 8 of Part 1 to find the stream
Expand Down Expand Up @@ -1462,7 +1462,7 @@ def search_vba_info(self, stream):
rec_len=VBAInfoAtom.RECORD_LENGTH)

# try parse
for idx in self.search_pattern(pattern):
for idx in self.search_pattern(pattern): # pylint: disable=no-value-for-parameter
# assume that in stream at idx there is a VBAInfoContainer
stream.seek(idx)
log.debug('extracting at idx {0}'.format(idx))
Expand Down Expand Up @@ -1515,7 +1515,7 @@ def search_vba_storage(self, stream):
pattern = obj_type.generate_pattern()

# try parse
for idx in self.search_pattern(pattern):
for idx in self.search_pattern(pattern): # pylint: disable=no-value-for-parameter
# assume a ExternalObjectStorage in stream at idx
stream.seek(idx)
log.debug('extracting at idx {0}'.format(idx))
Expand Down Expand Up @@ -1589,21 +1589,21 @@ def iter_vba_data(self, stream):

n_infos = 0
n_macros = 0
for info in self.search_vba_info():
for info in self.search_vba_info(stream):
n_infos += 1
if info.vba_info_atom.f_has_macros > 0:
n_macros += 1
# TODO: does it make sense at all to continue if n_macros == 0?
# --> no vba-info, so all storages probably ActiveX or other OLE
n_storages = 0
n_compressed = 0
for storage in self.search_vba_storage():
for storage in self.search_vba_storage(): # pylint: disable=no-value-for-parameter
n_storages += 1
if storage.is_compressed:
n_compressed += 1
yield self.decompress_vba_storage(storage)
yield self.decompress_vba_storage(storage) # pylint: disable=no-value-for-parameter
else:
yield self.read_vba_storage_data(storage)
yield self.read_vba_storage_data(storage) # pylint: disable=no-value-for-parameter

log.info('found {0} infos ({1} with macros) and {2} storages '
'({3} compressed)'
Expand Down
Loading

0 comments on commit f827e31

Please sign in to comment.