Skip to content

Commit

Permalink
Merge pull request #446 from TeamMsgExtractor/next-release
Browse files Browse the repository at this point in the history
Version 0.52.0
  • Loading branch information
TheElementalOfDestruction authored Oct 22, 2024
2 parents 1302d6f + d2e321e commit 373f6c1
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 14 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
**v0.52.0**
* [[TeamMsgExtractor #444](https://github.com/TeamMsgExtractor/msg-extractor/issues/444)] Fix typo in string that prevented HTML body from generating from the plain text body properly.
* Adjusted the behavior of `MSGFile.areStringsUnicode` to prioritize the property specified by the parent MSG files for MSG files that are embedded. Additionally, added a fallback to rely on whether or not there is a stream using the `001F` type to determine the property value if it is entirely missing.
* Adjusted `OleWriter.fromMsg()` and `MSGFile.export()` to add the argument `allowBadEmbed` which helps to correct a few different issues that may appear in embedded MSG files. These corrections allow the embedded file to still be extracted and to open properly in Outlook.
* In addition to the above, the errors that some of those corrections will suppress are now significantly more informative about what went wrong.

**v0.51.1**
* Add class type added in last version to known class types.

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ your access to the newest major version of extract-msg.
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.51.1-blue.svg
:target: https://pypi.org/project/extract-msg/0.51.1/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.52.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.52.0/

.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-3810/
Expand Down
4 changes: 2 additions & 2 deletions extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__author__ = 'Destiny Peterson & Matthew Walker'
__date__ = '2024-10-11'
__version__ = '0.51.1'
__date__ = '2024-10-22'
__version__ = '0.52.0'

__all__ = [
# Modules:
Expand Down
2 changes: 1 addition & 1 deletion extract_msg/msg_classes/message_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1170,7 +1170,7 @@ def htmlBody(self) -> Optional[bytes]:
# Convert the plain text body to html.
logger.info('HTML body was not found, attempting to generate from plain text body.')
correctedBody = html.escape(self.body).replace('\r', '').replace('\n', '<br />')
htmlBody = f'<html><body>{correctedBody}</body></head>'.encode('ascii', 'xmlreplace')
htmlBody = f'<html><body>{correctedBody}</body></head>'.encode('ascii', 'xmlcharrefreplace')

if not htmlBody:
logger.info('HTML body could not be found nor generated.')
Expand Down
24 changes: 19 additions & 5 deletions extract_msg/msg_classes/msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,7 @@ def existsTypedProperty(self, _id: str, location = None, _type = None, prefix: b
foundNumber += 1
return (foundNumber > 0), foundNumber

def export(self, path) -> None:
def export(self, path, allowBadEmbed: bool = False) -> None:
"""
Exports the contents of this MSG file to a new MSG files specified by
the path given.
Expand All @@ -492,21 +492,26 @@ def export(self, path) -> None:
:param path: A path-like object (including strings and ``pathlib.Path``
objects) or an IO device with a write method which accepts bytes.
:param allowBadEmbed: If True, attempts to skip steps that will fail if
the embedded MSG file violates standards. It will also attempt to repair the data to try to ensure it can open in Outlook.
"""
from ..ole_writer import OleWriter

# Create an instance of the class used for writing a new OLE file.
writer = OleWriter()
# Add all file and directory entries to it. If this
writer.fromMsg(self)
writer.fromMsg(self, allowBadEmbed = allowBadEmbed)
writer.write(path)

def exportBytes(self) -> bytes:
def exportBytes(self, allowBadEmbed: bool = False) -> bytes:
"""
Saves a new copy of the MSG file, returning the bytes.
:param allowBadEmbed: If True, attempts to skip steps that will fail if
the embedded MSG file violates standards. It will also attempt to repair the data to try to ensure it can open in Outlook.
"""
out = io.BytesIO()
self.export(out)
self.export(out, allowBadEmbed)
return out.getvalue()

def fixPath(self, inp: MSG_PATH, prefix: bool = True) -> str:
Expand Down Expand Up @@ -843,7 +848,16 @@ def areStringsUnicode(self) -> bool:
"""
Whether the strings are Unicode encoded or not.
"""
return (self.getPropertyVal('340D0003', 0) & 0x40000) != 0
val = self.getPropertyVal('340D0003')
if val is None:
# Try to get this value from the parent.
if self.prefix:
if self.__parentMsg and (msg := self.__parentMsg()) is not None:
return msg.areStringsUnicode

# Final attempt: check the actual streams.
return any(x[-1].upper().endswith('001F') for x in self.listDir())
return (val & 0x40000) != 0

@functools.cached_property
def attachments(self) -> Union[List[AttachmentBase], List[SignedAttachment]]:
Expand Down
31 changes: 27 additions & 4 deletions extract_msg/ole_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from . import constants
from .constants import MSG_PATH
from .enums import Color, DirectoryEntryType
from .exceptions import TooManySectorsError
from .exceptions import StandardViolationError, TooManySectorsError
from .utils import ceilDiv, dictGetCasedKey, inputToMsgPath
from olefile.olefile import OleDirectoryEntry, OleFileIO
from red_black_dict_mod import RedBlackTree
Expand Down Expand Up @@ -804,9 +804,15 @@ def editEntry(self, path: MSG_PATH, **kwargs) -> None:
# Send it to be modified using the arguments given.
self.__modifyEntry(entry, **kwargs)

def fromMsg(self, msg: MSGFile) -> None:
def fromMsg(self, msg: MSGFile, allowBadEmbed: bool = False) -> None:
"""
Copies the streams and stream information necessary from the MSG file.
:param allowBadEmbed: If True, attempts to skip steps that will fail if
the embedded MSG file violates standards. It will also attempt to repair the data to try to ensure it can open in Outlook.
:raises StandardViolationError: Something about the embedded data has a
fundemental issue that violates the standard.
"""
# Get the root OLE entry's CLSID.
self.__rootEntry.clsid = _unClsid(msg._getOleEntry('/').clsid)
Expand All @@ -825,7 +831,17 @@ def fromMsg(self, msg: MSGFile) -> None:
# specific place. So let's check if we are doing the properties
# stream and then if we are embedded.
if x[0] == '__properties_version1.0' and msg.prefixLen > 0:
data = data[:24] + b'\x00\x00\x00\x00\x00\x00\x00\x00' + data[24:]
if len(data) % 16 != 0:
data = data[:24] + b'\x00\x00\x00\x00\x00\x00\x00\x00' + data[24:]
elif not allowBadEmbed:
# If we are not allowing bad data, throw an error.
raise StandardViolationError('Embedded msg file attempted to be extracted that contains a top level properties stream.')
if allowBadEmbed:
# See if we need to fix the properties stream at all.
if msg.getPropertyVal('340D0003') is None:
if msg.areStringsUnicode:
# We need to add a property to allow this file to open:
data += b'\x03\x00\x0D\x34\x02\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00'
self.addOleEntry(x, entry, data)

# Now check if it is an embedded file. If so, we need to copy the named
Expand All @@ -834,7 +850,14 @@ def fromMsg(self, msg: MSGFile) -> None:
# Get the entry for the named properties directory and add it
# immediately if it exists. If it doesn't exist, this whole
# section will be skipped.
self.addOleEntry('__nameid_version1.0', msg._getOleEntry('__nameid_version1.0', False), None)
try:
self.addOleEntry('__nameid_version1.0', msg._getOleEntry('__nameid_version1.0', False), None)
except OSError as e:
if str(e).startswith('Cannot add an entry'):
if allowBadEmbed:
return
raise StandardViolationError('Embedded msg file attempted to be extracted that contains it\'s own named streams.')
raise

# Now that we know it exists, grab all the file inside and copy
# them to our root.
Expand Down

0 comments on commit 373f6c1

Please sign in to comment.