From ea8bc0f066694c6c84f724e7ed2d23c020337190 Mon Sep 17 00:00:00 2001 From: Kees Date: Sat, 9 Sep 2023 17:18:06 +0200 Subject: [PATCH] Modernize code, fix typos and add ALT tag --- MsgReader.sln.DotSettings | 9 ++ MsgReaderCore/Mime/Decode/Base64.cs | 8 +- MsgReaderCore/Mime/Decode/EncodedWord.cs | 2 +- MsgReaderCore/Mime/Decode/EncodingFinder.cs | 10 +- MsgReaderCore/Mime/Decode/QuotedPrintable.cs | 94 +++++++------ MsgReaderCore/Mime/Decode/Rfc2231Decoder.cs | 6 +- MsgReaderCore/Mime/Decode/Rfc2822DateTime.cs | 4 +- MsgReaderCore/Mime/Decode/SizeParser.cs | 2 +- MsgReaderCore/Mime/Decode/UUEncode.cs | 121 ++++++++--------- .../Mime/Header/ContentTransferEncoding.cs | 1 + MsgReaderCore/Mime/Header/HeaderExtractor.cs | 123 +++++++++--------- .../Mime/Header/HeaderFieldParser.cs | 15 ++- MsgReaderCore/Mime/Header/Received.cs | 9 +- MsgReaderCore/Mime/Header/RfcMailAddress.cs | 36 +---- MsgReaderCore/Mime/Message.cs | 5 +- MsgReaderCore/Mime/MessagePart.cs | 26 ++-- .../Mime/Traverse/AttachmentFinder.cs | 42 +++--- 17 files changed, 245 insertions(+), 268 deletions(-) diff --git a/MsgReader.sln.DotSettings b/MsgReader.sln.DotSettings index 3c7a7125..77b754f5 100644 --- a/MsgReader.sln.DotSettings +++ b/MsgReader.sln.DotSettings @@ -3,22 +3,26 @@ MR NL RDN + True True True True True True True + True True True True True True + True True True True True True + True True True True @@ -71,6 +75,7 @@ True True True + True True True True @@ -81,6 +86,7 @@ True True True + True True True @@ -91,6 +97,7 @@ True True True + True True True True @@ -102,6 +109,8 @@ True True True + True + True True True \ No newline at end of file diff --git a/MsgReaderCore/Mime/Decode/Base64.cs b/MsgReaderCore/Mime/Decode/Base64.cs index 7766502a..6f0ba145 100644 --- a/MsgReaderCore/Mime/Decode/Base64.cs +++ b/MsgReaderCore/Mime/Decode/Base64.cs @@ -34,7 +34,7 @@ public static byte[] Decode(string base64Encoded) catch (Exception) { Logger.WriteToLog("Base64 decoding still failed returning empty byte array"); - return new byte[0]; + return Array.Empty(); } } } @@ -82,9 +82,9 @@ private static string RemoveInvalidBase64Chars(string base64Encoded) foreach (var chr in base64Encoded) { var val = (int)chr; - if ((val >= 65 && val <= 90) || // 'A'..'Z' - (val >= 97 && val <= 122) || // 'a'..'z' - (val >= 48 && val <= 57) || // '0'..'9' + if (val is >= 65 and <= 90 || // 'A'..'Z' + val is >= 97 and <= 122 || // 'a'..'z' + val is >= 48 and <= 57 || // '0'..'9' val == 43 || val == 47) // '+' and '/' result.Append(chr); else diff --git a/MsgReaderCore/Mime/Decode/EncodedWord.cs b/MsgReaderCore/Mime/Decode/EncodedWord.cs index e906283c..3c3ee9f7 100644 --- a/MsgReaderCore/Mime/Decode/EncodedWord.cs +++ b/MsgReaderCore/Mime/Decode/EncodedWord.cs @@ -122,7 +122,7 @@ public static string Decode(string encodedWords) break; default: - throw new ArgumentException("The encoding " + encoding + " was not recognized"); + throw new ArgumentException($"The encoding {encoding} was not recognized"); } // Replace our encoded value with our decoded value diff --git a/MsgReaderCore/Mime/Decode/EncodingFinder.cs b/MsgReaderCore/Mime/Decode/EncodingFinder.cs index d7f20a68..e78e8bd5 100644 --- a/MsgReaderCore/Mime/Decode/EncodingFinder.cs +++ b/MsgReaderCore/Mime/Decode/EncodingFinder.cs @@ -52,7 +52,7 @@ public static class EncodingFinder public static FallbackDecoderDelegate FallbackDecoder { private get; set; } /// - /// Mapping from charactersets to encodings. + /// Mapping from character sets to encodings. /// private static Dictionary EncodingMap { get; set; } #endregion @@ -69,7 +69,7 @@ static EncodingFinder() #region Reset /// - /// Used to reset this static class to facilite isolated unit testing. + /// Used to reset this static class to facilitate isolated unit testing. /// internal static void Reset() { @@ -97,8 +97,8 @@ internal static Encoding FindEncoding(string characterSet) var charSetUpper = characterSet.ToUpperInvariant(); // Check if the characterSet is explicitly mapped to an encoding - if (EncodingMap.ContainsKey(charSetUpper)) - return EncodingMap[charSetUpper]; + if (EncodingMap.TryGetValue(charSetUpper, out var encoding)) + return encoding; // Try to generally find the encoding try @@ -133,7 +133,7 @@ internal static Encoding FindEncoding(string characterSet) if (fallbackDecoderResult != null) return fallbackDecoderResult; - // If no solution was found, throw catched exception + // If no solution was found, throw exception throw; } } diff --git a/MsgReaderCore/Mime/Decode/QuotedPrintable.cs b/MsgReaderCore/Mime/Decode/QuotedPrintable.cs index 8d92b6b2..9b22a4f3 100644 --- a/MsgReaderCore/Mime/Decode/QuotedPrintable.cs +++ b/MsgReaderCore/Mime/Decode/QuotedPrintable.cs @@ -79,52 +79,50 @@ private static byte[] Rfc2047QuotedPrintableDecode(string toDecode, bool encoded throw new ArgumentNullException(nameof(toDecode)); // Create a byte array builder which is roughly equivalent to a StringBuilder - using (var byteArrayBuilder = StreamHelpers.Manager.GetStream()) - { - // Remove illegal control characters - toDecode = RemoveIllegalControlCharacters(toDecode); + using var byteArrayBuilder = StreamHelpers.Manager.GetStream(); + // Remove illegal control characters + toDecode = RemoveIllegalControlCharacters(toDecode); - // Run through the whole string that needs to be decoded - for (var i = 0; i < toDecode.Length; i++) + // Run through the whole string that needs to be decoded + for (var i = 0; i < toDecode.Length; i++) + { + var currentChar = toDecode[i]; + if (currentChar == '=') { - var currentChar = toDecode[i]; - if (currentChar == '=') + // Check that there is at least two characters behind the equal sign + if (toDecode.Length - i < 3) { - // Check that there is at least two characters behind the equal sign - if (toDecode.Length - i < 3) - { - // We are at the end of the toDecode string, but something is missing. Handle it the way RFC 2045 states - WriteAllBytesToStream(byteArrayBuilder, DecodeEqualSignNotLongEnough(toDecode.Substring(i))); - - // Since it was the last part, we should stop parsing anymore - break; - } - - // Decode the Quoted-Printable part - var quotedPrintablePart = toDecode.Substring(i, 3); - WriteAllBytesToStream(byteArrayBuilder, DecodeEqualSign(quotedPrintablePart)); - - // We now consumed two extra characters. Go forward two extra characters - i += 2; + // We are at the end of the toDecode string, but something is missing. Handle it the way RFC 2045 states + WriteAllBytesToStream(byteArrayBuilder, DecodeEqualSignNotLongEnough(toDecode.Substring(i))); + + // Since it was the last part, we should stop parsing anymore + break; } + + // Decode the Quoted-Printable part + var quotedPrintablePart = toDecode.Substring(i, 3); + WriteAllBytesToStream(byteArrayBuilder, DecodeEqualSign(quotedPrintablePart)); + + // We now consumed two extra characters. Go forward two extra characters + i += 2; + } + else + { + // This character is not quoted printable hex encoded. + + // Could it be the _ character, which represents space + // and are we using the encoded word variant of QuotedPrintable + if (currentChar == '_' && encodedWordVariant) + // The RFC specifies that the "_" always represents hexadecimal 20 even if the + // SPACE character occupies a different code position in the character set in use. + byteArrayBuilder.WriteByte(0x20); else - { - // This character is not quoted printable hex encoded. - - // Could it be the _ character, which represents space - // and are we using the encoded word variant of QuotedPrintable - if (currentChar == '_' && encodedWordVariant) - // The RFC specifies that the "_" always represents hexadecimal 20 even if the - // SPACE character occupies a different code position in the character set in use. - byteArrayBuilder.WriteByte(0x20); - else - // This is not encoded at all. This is a literal which should just be included into the output. - byteArrayBuilder.WriteByte((byte)currentChar); - } + // This is not encoded at all. This is a literal which should just be included into the output. + byteArrayBuilder.WriteByte((byte)currentChar); } - - return byteArrayBuilder.ToArray(); } + + return byteArrayBuilder.ToArray(); } #endregion @@ -162,7 +160,7 @@ private static string RemoveIllegalControlCharacters(string input) throw new ArgumentNullException(nameof(input)); // First we remove any \r or \n which is not part of a \r\n pair - input = RemoveCarriageReturnAndNewLinewIfNotInPair(input); + input = RemoveCarriageReturnAndNewLineIfNotInPair(input); // Here only legal \r\n is left over // We now simply keep them, and the \t which is also allowed @@ -173,14 +171,14 @@ private static string RemoveIllegalControlCharacters(string input) } #endregion - #region RemoveCarriageReturnAndNewLinewIfNotInPair + #region RemoveCarriageReturnAndNewLineIfNotInPair /// /// This method will remove any \r and \n which is not paired as \r\n /// /// String to remove lonely \r and \n's from /// A string without lonely \r and \n's /// If is - private static string RemoveCarriageReturnAndNewLinewIfNotInPair(string input) + private static string RemoveCarriageReturnAndNewLineIfNotInPair(string input) { if (input == null) throw new ArgumentNullException(nameof(input)); @@ -263,7 +261,7 @@ private static byte[] DecodeEqualSignNotLongEnough(string decode) throw new ArgumentException(@"First part of decode must be an equal sign", nameof(decode)); // We will now believe that the string sent to us, was actually not encoded - // Therefore it must be in US-ASCII and we will return the bytes it corrosponds to + // Therefore it must be in US-ASCII and we will return the bytes it corresponds to return Encoding.ASCII.GetBytes(decode); } #endregion @@ -297,14 +295,14 @@ private static byte[] DecodeEqualSign(string decode) // It might be a // - hex-string like =3D, denoting the character with hex value 3D // - it might be the last character on the line before a CRLF - // pair, denoting a soft linebreak, which simply + // pair, denoting a soft line break, which simply // splits the text up, because of the 76 chars per line restriction if (decode.Contains("\r\n")) // Soft break detected // We want to return string.Empty which is equivalent to a zero-length byte array - return new byte[0]; + return Array.Empty(); - // Hex string detected. Convertion needed. + // Hex string detected. Conversion needed. // It might be that the string located after the equal sign is not hex characters // An example: =JU // In that case we would like to catch the FormatException and do something else @@ -338,12 +336,12 @@ private static byte[] DecodeEqualSign(string decode) // the data. // So we choose to believe this is actually an un-encoded string - // Therefore it must be in US-ASCII and we will return the bytes it corrosponds to + // Therefore it must be in US-ASCII and we will return the bytes it corresponds to return Encoding.ASCII.GetBytes(decode); } catch (Exception) { - return new byte[0]; + return Array.Empty(); } } #endregion diff --git a/MsgReaderCore/Mime/Decode/Rfc2231Decoder.cs b/MsgReaderCore/Mime/Decode/Rfc2231Decoder.cs index 5ff24373..3f3931b1 100644 --- a/MsgReaderCore/Mime/Decode/Rfc2231Decoder.cs +++ b/MsgReaderCore/Mime/Decode/Rfc2231Decoder.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Text; using System.Text.RegularExpressions; +// ReSharper disable CommentTypo namespace MsgReader.Mime.Decode; @@ -213,7 +214,7 @@ private static List> DecodePairs(IList> DecodePairs(IList(key, value)); diff --git a/MsgReaderCore/Mime/Decode/Rfc2822DateTime.cs b/MsgReaderCore/Mime/Decode/Rfc2822DateTime.cs index 0cf781ae..b9d533e3 100644 --- a/MsgReaderCore/Mime/Decode/Rfc2822DateTime.cs +++ b/MsgReaderCore/Mime/Decode/Rfc2822DateTime.cs @@ -305,12 +305,12 @@ private static DateTime ExtractDateTime(string dateInput) // Needs to find: 21 Nov 1997 09:55:06 // Seconds does not need to be specified - // Even though it is illigal, sometimes hours, minutes or seconds are only specified with one digit + // Even though it is illegal, sometimes hours, minutes or seconds are only specified with one digit // Year with 2 or 4 digits (1922 or 22) const string year = @"(\d\d\d\d|\d\d)"; - // Time with one or two digits for hour and minute and optinal seconds (06:04:06 or 6:4:6 or 06:04 or 6:4) + // Time with one or two digits for hour and minute and optional seconds (06:04:06 or 6:4:6 or 06:04 or 6:4) const string time = @"\d?\d:\d?\d(:\d?\d)?"; // Correct format is 21 Nov 1997 09:55:06 diff --git a/MsgReaderCore/Mime/Decode/SizeParser.cs b/MsgReaderCore/Mime/Decode/SizeParser.cs index 57ec6a37..65d896f0 100644 --- a/MsgReaderCore/Mime/Decode/SizeParser.cs +++ b/MsgReaderCore/Mime/Decode/SizeParser.cs @@ -66,7 +66,7 @@ private static string ExtractUnit(string sizeWithUnit) private static bool IsDigit(char value) { // we don't want to use char.IsDigit since it would accept esoterical unicode digits - return value >= '0' && value <= '9'; + return value is >= '0' and <= '9'; } #endregion diff --git a/MsgReaderCore/Mime/Decode/UUEncode.cs b/MsgReaderCore/Mime/Decode/UUEncode.cs index 479b33d0..817d8290 100644 --- a/MsgReaderCore/Mime/Decode/UUEncode.cs +++ b/MsgReaderCore/Mime/Decode/UUEncode.cs @@ -1,11 +1,13 @@ using System; using System.IO; using MsgReader.Helpers; +// ReSharper disable InconsistentNaming namespace MsgReader.Mime.Decode; internal static class UUEncode { + #region Fields private static readonly byte[] UUDecMap = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -25,80 +27,81 @@ internal static class UUEncode 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + #endregion + #region Decode public static byte[] Decode(byte[] encodeBytes) { - using (Stream input = StreamHelpers.Manager.GetStream("UUEncode.cs", encodeBytes, 0, encodeBytes.Length)) - using (var output = StreamHelpers.Manager.GetStream()) + using Stream input = StreamHelpers.Manager.GetStream("UUEncode.cs", encodeBytes, 0, encodeBytes.Length); + using var output = StreamHelpers.Manager.GetStream(); + try { - try - { - if (input == null) - throw new ArgumentNullException("input"); - - var len = input.Length; - if (len == 0) - return new byte[0]; + if (input == null) + throw new ArgumentNullException(nameof(encodeBytes)); - long didx = 0; - var nextByte = input.ReadByte(); - while (nextByte >= 0) - { - // Get line length (in number of encoded octets) - int line_len = UUDecMap[nextByte]; + var len = input.Length; + if (len == 0) + return Array.Empty(); - // Ascii printable to 0-63 and 4-byte to 3-byte conversion - var end = didx + line_len; - byte A, B, C, D; - if (end > 2) - while (didx < end - 2) - { - A = UUDecMap[input.ReadByte()]; - B = UUDecMap[input.ReadByte()]; - C = UUDecMap[input.ReadByte()]; - D = UUDecMap[input.ReadByte()]; - - output.WriteByte((byte)(((A << 2) & 255) | ((B >> 4) & 3))); - output.WriteByte((byte)(((B << 4) & 255) | ((C >> 2) & 15))); - output.WriteByte((byte)(((C << 6) & 255) | (D & 63))); - didx += 3; - } + long didX = 0; + var nextByte = input.ReadByte(); + while (nextByte >= 0) + { + // Get line length (in number of encoded octets) + int lineLen = UUDecMap[nextByte]; - if (didx < end) + // Ascii printable to 0-63 and 4-byte to 3-byte conversion + var end = didX + lineLen; + byte a, b, c; + if (end > 2) + while (didX < end - 2) { - A = UUDecMap[input.ReadByte()]; - B = UUDecMap[input.ReadByte()]; - output.WriteByte((byte)(((A << 2) & 255) | ((B >> 4) & 3))); - didx++; - } + a = UUDecMap[input.ReadByte()]; + b = UUDecMap[input.ReadByte()]; + c = UUDecMap[input.ReadByte()]; + var d = UUDecMap[input.ReadByte()]; - if (didx < end) - { - B = UUDecMap[input.ReadByte()]; - C = UUDecMap[input.ReadByte()]; - output.WriteByte((byte)(((B << 4) & 255) | ((C >> 2) & 15))); - didx++; + output.WriteByte((byte)(((a << 2) & 255) | ((b >> 4) & 3))); + output.WriteByte((byte)(((b << 4) & 255) | ((c >> 2) & 15))); + output.WriteByte((byte)(((c << 6) & 255) | (d & 63))); + didX += 3; } - // Skip padding - do - { - nextByte = input.ReadByte(); - } while (nextByte >= 0 && nextByte != '\n' && nextByte != '\r'); + if (didX < end) + { + a = UUDecMap[input.ReadByte()]; + b = UUDecMap[input.ReadByte()]; + output.WriteByte((byte)(((a << 2) & 255) | ((b >> 4) & 3))); + didX++; + } - // Skip end of line - do - { - nextByte = input.ReadByte(); - } while (nextByte >= 0 && (nextByte == '\n' || nextByte == '\r')); + if (didX < end) + { + b = UUDecMap[input.ReadByte()]; + c = UUDecMap[input.ReadByte()]; + output.WriteByte((byte)(((b << 4) & 255) | ((c >> 2) & 15))); + didX++; } - return output.ToArray(); - } - catch (Exception) - { - return new byte[0]; + // Skip padding + do + { + nextByte = input.ReadByte(); + } while (nextByte >= 0 && nextByte != '\n' && nextByte != '\r'); + + // Skip end of line + do + { + nextByte = input.ReadByte(); + } while (nextByte >= 0 && nextByte is '\n' or '\r'); } + + return output.ToArray(); + } + catch (Exception) + { + return Array.Empty(); } } + #endregion } \ No newline at end of file diff --git a/MsgReaderCore/Mime/Header/ContentTransferEncoding.cs b/MsgReaderCore/Mime/Header/ContentTransferEncoding.cs index bf1ad926..8bf78a38 100644 --- a/MsgReaderCore/Mime/Header/ContentTransferEncoding.cs +++ b/MsgReaderCore/Mime/Header/ContentTransferEncoding.cs @@ -1,4 +1,5 @@ using System; +// ReSharper disable InconsistentNaming namespace MsgReader.Mime.Header; diff --git a/MsgReaderCore/Mime/Header/HeaderExtractor.cs b/MsgReaderCore/Mime/Header/HeaderExtractor.cs index 53ba08aa..974492d7 100644 --- a/MsgReaderCore/Mime/Header/HeaderExtractor.cs +++ b/MsgReaderCore/Mime/Header/HeaderExtractor.cs @@ -41,21 +41,18 @@ private static int FindHeaderEndPosition(byte[] messageContent) throw new ArgumentNullException(nameof(messageContent)); // Convert the byte array into a stream - using (Stream stream = - StreamHelpers.Manager.GetStream("HeaderExtractor,cs", messageContent, 0, messageContent.Length)) + using Stream stream = StreamHelpers.Manager.GetStream("HeaderExtractor,cs", messageContent, 0, messageContent.Length); + while (true) { - while (true) - { - // Read a line from the stream. We know headers are in US-ASCII - // therefore it is not problem to read them as such - var line = StreamUtility.ReadLineAsAscii(stream); - - // The end of headers is signaled when a blank line is found - // or if the line is null - in which case the email is actually an email with - // only headers but no body - if (string.IsNullOrEmpty(line)) - return (int)stream.Position; - } + // Read a line from the stream. We know headers are in US-ASCII + // therefore it is not problem to read them as such + var line = StreamUtility.ReadLineAsAscii(stream); + + // The end of headers is signaled when a blank line is found + // or if the line is null - in which case the email is actually an email with + // only headers but no body + if (string.IsNullOrEmpty(line)) + return (int)stream.Position; } } #endregion @@ -83,8 +80,8 @@ public static void ExtractHeadersAndBody(byte[] fullRawMessage, out MessageHeade // using US-ASCII encoding //var headersString = Encoding.ASCII.GetString(fullRawMessage, 0, endOfHeaderLocation); - // MIME headers should aways be ASCII encoded, but sometimes they don't so we read them as UTF8. - // It should not make any difference if we do it this way because UTF-8 superseeds ASCII encoding + // MIME headers should always be ASCII encoded, but sometimes they don't so we read them as UTF8. + // It should not make any difference if we do it this way because UTF-8 super seeds ASCII encoding var headersString = Encoding.UTF8.GetString(fullRawMessage, 0, endOfHeaderLocation); // Now parse the headers to a NameValueCollection @@ -119,59 +116,55 @@ private static NameValueCollection ExtractHeaders(string messageContent) var headers = new NameValueCollection(); - using (var messageReader = new StringReader(messageContent)) + using var messageReader = new StringReader(messageContent); + // Read until all headers have ended. + // The headers ends when an empty line is encountered + // An empty message might actually not have an empty line, in which + // case the headers end with null value. + string line; + + while (!string.IsNullOrEmpty(line = messageReader.ReadLine())) { - // Read until all headers have ended. - // The headers ends when an empty line is encountered - // An empty message might actually not have an empty line, in which - // case the headers end with null value. - string line; + // Split into name and value + var header = SeparateHeaderNameAndValue(line); + + // First index is header name + var headerName = header.Key; + + // Second index is the header value. + // Use a StringBuilder since the header value may be continued on the next line + var headerValue = new StringBuilder(header.Value); + + // Keep reading until we would hit next header + // This if for handling multi line headers + while (IsMoreLinesInHeaderValue(messageReader)) + { + // Unfolding is accomplished by simply removing any CRLF + // that is immediately followed by WSP + // This was done using ReadLine (it discards CRLF) + // See http://tools.ietf.org/html/rfc822#section-3.1.1 for more information + var moreHeaderValue = messageReader.ReadLine(); + + // If this exception is ever raised, there is an serious algorithm failure + // IsMoreLinesInHeaderValue does not return true if the next line does not exist + // This check is only included to stop the nagging "possibly null" code analysis hint + if (moreHeaderValue == null) + throw new ArgumentException("This will never happen"); + + // Simply append the line just read to the header value + headerValue.Append(moreHeaderValue); + } + + // Now we have the name and full value. Add it - while (!string.IsNullOrEmpty(line = messageReader.ReadLine())) + if (headers.AllKeys.Contains(headerName)) { - // Split into name and value - var header = SeparateHeaderNameAndValue(line); - - // First index is header name - var headerName = header.Key; - - // Second index is the header value. - // Use a StringBuilder since the header value may be continued on the next line - var headerValue = new StringBuilder(header.Value); - - // Keep reading until we would hit next header - // This if for handling multi line headers - while (IsMoreLinesInHeaderValue(messageReader)) - { - // Unfolding is accomplished by simply removing any CRLF - // that is immediately followed by WSP - // This was done using ReadLine (it discards CRLF) - // See http://tools.ietf.org/html/rfc822#section-3.1.1 for more information - var moreHeaderValue = messageReader.ReadLine(); - - // If this exception is ever raised, there is an serious algorithm failure - // IsMoreLinesInHeaderValue does not return true if the next line does not exist - // This check is only included to stop the nagging "possibly null" code analysis hint - if (moreHeaderValue == null) - throw new ArgumentException("This will never happen"); - - // Simply append the line just read to the header value - headerValue.Append(moreHeaderValue); - } - - // Now we have the name and full value. Add it - - if (headers.AllKeys.Contains(headerName)) - { - var value = headers[headerName]; - value += "," + headerValue; - headers[headerName] = value; - } - else - { - headers.Add(headerName, headerValue.ToString()); - } + var value = headers[headerName]; + value += "," + headerValue; + headers[headerName] = value; } + else + headers.Add(headerName, headerValue.ToString()); } return headers; diff --git a/MsgReaderCore/Mime/Header/HeaderFieldParser.cs b/MsgReaderCore/Mime/Header/HeaderFieldParser.cs index d5fe634e..2bf5cdff 100644 --- a/MsgReaderCore/Mime/Header/HeaderFieldParser.cs +++ b/MsgReaderCore/Mime/Header/HeaderFieldParser.cs @@ -204,7 +204,7 @@ public static ContentDisposition ParseContentDisposition(string headerValue) switch (key) { case "": - // This is the DispisitionType - it has no key since it is the first one + // This is the Disposition type - it has no key since it is the first one // and has no = in it. contentDisposition.DispositionType = value; break; @@ -214,7 +214,7 @@ public static ContentDisposition ParseContentDisposition(string headerValue) case "NAME": case "FILENAME": case "REMOTE-IMAGE": - // The filename might be in qoutes, and it might be encoded-word encoded + // The filename might be in quotes, and it might be encoded-word encoded contentDisposition.FileName = EncodedWord.Decode(value); break; @@ -231,8 +231,8 @@ public static ContentDisposition ParseContentDisposition(string headerValue) case "MODIFICATION-DATE": case "MODIFICATION-DATE-PARM": - var midificationDate = new DateTime(Rfc2822DateTime.StringToDate(value).Ticks); - contentDisposition.ModificationDate = midificationDate; + var modificationDate = new DateTime(Rfc2822DateTime.StringToDate(value).Ticks); + contentDisposition.ModificationDate = modificationDate; break; case "READ-DATE": @@ -244,14 +244,17 @@ public static ContentDisposition ParseContentDisposition(string headerValue) contentDisposition.Size = SizeParser.Parse(value); break; + case "ALT": + contentDisposition.Parameters.Add(key, value); + break; + case "CHARSET": // ignoring invalid parameter in Content-Disposition case "VOICE": break; default: if (!key.StartsWith("X-")) - throw new ArgumentException( - "Unknown parameter in Content-Disposition. Ask developer to fix! Parameter: " + key); + throw new ArgumentException("Unknown parameter in Content-Disposition. Ask developer to fix! Parameter: " + key); contentDisposition.Parameters.Add(key, value); break; } diff --git a/MsgReaderCore/Mime/Header/Received.cs b/MsgReaderCore/Mime/Header/Received.cs index af31b8e3..3fab2f59 100644 --- a/MsgReaderCore/Mime/Header/Received.cs +++ b/MsgReaderCore/Mime/Header/Received.cs @@ -55,16 +55,13 @@ public class Received /// public Received(string headerValue) { - if (headerValue == null) - throw new ArgumentNullException(nameof(headerValue)); - - // Remember the raw input if someone whishes to use it - Raw = headerValue; + // Remember the raw input if someone wishes to use it + Raw = headerValue ?? throw new ArgumentNullException(nameof(headerValue)); // Default Date value Date = DateTime.MinValue; - // The date part is the last part of the string, and is preceeded by a semicolon + // The date part is the last part of the string, and is preceded by a semicolon // Some emails forgets to specify the date, therefore we need to check if it is there if (headerValue.Contains(";")) { diff --git a/MsgReaderCore/Mime/Header/RfcMailAddress.cs b/MsgReaderCore/Mime/Header/RfcMailAddress.cs index 7a0b991f..0dea9206 100644 --- a/MsgReaderCore/Mime/Header/RfcMailAddress.cs +++ b/MsgReaderCore/Mime/Header/RfcMailAddress.cs @@ -86,16 +86,10 @@ public class RfcMailAddress /// private RfcMailAddress(MailAddress mailAddress, string raw) { - if (mailAddress == null) - throw new ArgumentNullException(nameof(mailAddress)); - - if (raw == null) - throw new ArgumentNullException(nameof(raw)); - - MailAddress = mailAddress; + MailAddress = mailAddress ?? throw new ArgumentNullException(nameof(mailAddress)); Address = mailAddress.Address; DisplayName = mailAddress.DisplayName; - Raw = raw; + Raw = raw ?? throw new ArgumentNullException(nameof(raw)); } /// @@ -106,12 +100,9 @@ private RfcMailAddress(MailAddress mailAddress, string raw) /// If is private RfcMailAddress(string raw) { - if (raw == null) - throw new ArgumentNullException(nameof(raw)); - MailAddress = null; Address = string.Empty; - DisplayName = raw; + DisplayName = raw ?? throw new ArgumentNullException(nameof(raw)); Raw = raw; } #endregion @@ -130,13 +121,6 @@ public override string ToString() #region Parsing /// /// Parses an email address from a MIME header
- ///
- /// Examples of input: - /// Eksperten mailrobot <noreply@mail.eksperten.dk>
- /// "Eksperten mailrobot" <noreply@mail.eksperten.dk>
- /// <noreply@mail.eksperten.dk>
- /// noreply@mail.eksperten.dk
- ///
/// It might also contain encoded text, which will then be decoded. ///
/// The value to parse out and email and/or a username @@ -200,33 +184,27 @@ internal static RfcMailAddress ParseMailAddress(string input) var emailLength = indexEndEmail - indexStartEmail; var emailAddress = input.Substring(indexStartEmail, emailLength).Trim(); - // There has been cases where there was no emailaddress between the < and > + // There has been cases where there was no email address between the < and > if (!string.IsNullOrEmpty(emailAddress)) // If the username is quoted, MailAddress' constructor will remove them for us return new RfcMailAddress(new MailAddress(emailAddress, username), input); } // This might be on the form noreply@mail.eksperten.dk - // Check if there is an email, if notm there is no need to try + // Check if there is an email, if not there is no need to try if (input.Contains("@")) return new RfcMailAddress(new MailAddress(input), input); } catch (FormatException) { - // Sometimes invalid emails are sent, like sqlmap-user@sourceforge.net. (last period is illigal) + // Sometimes invalid emails are sent, like sqlmap-user@sourceforge.net. (last period is illegal) } - // It could be that the format used was simply a name - // which is indeed valid according to the RFC - // Example: - // Eksperten mailrobot return new RfcMailAddress(input); } /// - /// Parses input of the form
- /// Eksperten mailrobot <noreply@mail.eksperten.dk>, ...
- /// to a list of RFCMailAddresses + /// Parses input to a list of RFCMailAddresses ///
/// The input that is a comma-separated list of EmailAddresses to parse /// A List of objects extracted from the parameter. diff --git a/MsgReaderCore/Mime/Message.cs b/MsgReaderCore/Mime/Message.cs index 169563af..1e0901e5 100644 --- a/MsgReaderCore/Mime/Message.cs +++ b/MsgReaderCore/Mime/Message.cs @@ -270,8 +270,7 @@ private void ProcessSignedContent(byte[] data) } // Get the decoded attachment - using (var memoryStream = StreamHelpers.Manager.GetStream("Message.cs", signedCms.ContentInfo.Content, 0, - signedCms.ContentInfo.Content.Length)) + using (var memoryStream = StreamHelpers.Manager.GetStream("Message.cs", signedCms.ContentInfo.Content, 0, signedCms.ContentInfo.Content.Length)) { var eml = Load(memoryStream); if (eml.TextBody != null) @@ -279,8 +278,6 @@ private void ProcessSignedContent(byte[] data) if (eml.HtmlBody != null) HtmlBody = eml.HtmlBody; - - //foreach (var emlAttachment in eml.Attachments) } Logger.WriteToLog("Signed content processed"); diff --git a/MsgReaderCore/Mime/MessagePart.cs b/MsgReaderCore/Mime/MessagePart.cs index 9a46b4c0..32ff2199 100644 --- a/MsgReaderCore/Mime/MessagePart.cs +++ b/MsgReaderCore/Mime/MessagePart.cs @@ -9,6 +9,7 @@ using MsgReader.Mime.Header; using MsgReader.Tnef; using MsgReader.Tnef.Enums; +// ReSharper disable UnusedMember.Global // ReSharper disable UnusedAutoPropertyAccessor.Global @@ -354,17 +355,20 @@ private static string FindFileName(byte[] rawBody, MessageHeader headers, string if (!string.IsNullOrEmpty(headers.Subject)) return FileManager.RemoveInvalidFileNameChars(headers.Subject) + extensionFromContentType; - if (extensionFromContentType.Equals(".eml", StringComparison.OrdinalIgnoreCase)) - try - { - var message = new Message(rawBody); - if (!string.IsNullOrEmpty(message.Headers?.Subject)) - return FileManager.RemoveInvalidFileNameChars(message.Headers.Subject) + extensionFromContentType; - } - // ReSharper disable once EmptyGeneralCatchClause - catch - { - } + if (!extensionFromContentType.Equals(".eml", StringComparison.OrdinalIgnoreCase)) + return !string.IsNullOrEmpty(contentTypeName) + ? FileManager.RemoveInvalidFileNameChars(contentTypeName) + : FileManager.RemoveInvalidFileNameChars(defaultName + extensionFromContentType); + try + { + var message = new Message(rawBody); + if (!string.IsNullOrEmpty(message.Headers?.Subject)) + return FileManager.RemoveInvalidFileNameChars(message.Headers.Subject) + extensionFromContentType; + } + catch + { + // Ignore + } return !string.IsNullOrEmpty(contentTypeName) ? FileManager.RemoveInvalidFileNameChars(contentTypeName) diff --git a/MsgReaderCore/Mime/Traverse/AttachmentFinder.cs b/MsgReaderCore/Mime/Traverse/AttachmentFinder.cs index 61884843..8df8375c 100644 --- a/MsgReaderCore/Mime/Traverse/AttachmentFinder.cs +++ b/MsgReaderCore/Mime/Traverse/AttachmentFinder.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Linq; using MsgReader.Helpers; using MsgReader.Tnef; @@ -19,39 +20,32 @@ protected override List CaseLeaf(MessagePart messagePart) // Maximum space needed is one var leafAnswer = new List(1); - if (messagePart.IsAttachment) + if (!messagePart.IsAttachment) return leafAnswer; + if (messagePart.FileName.ToLowerInvariant() == "winmail.dat") { - if (messagePart.FileName.ToLowerInvariant() == "winmail.dat") + try { - try + Logger.WriteToLog("Found winmail.dat attachment, trying to get attachments from it"); + var stream = StreamHelpers.Manager.GetStream("AttachmentFinder.CaseLeaf", messagePart.Body, 0, messagePart.Body.Length); + using var tnefReader = new TnefReader(stream); { - Logger.WriteToLog("Found winmail.dat attachment, trying to get attachments from it"); - var stream = StreamHelpers.Manager.GetStream("AttachmentFinder.CaseLeaf", messagePart.Body, 0, messagePart.Body.Length); - using var tnefReader = new TnefReader(stream); + var attachments = Part.ExtractAttachments(tnefReader); + var count = attachments.Count; + if (count > 0) { - var attachments = Part.ExtractAttachments(tnefReader); - var count = attachments.Count; - if (count > 0) - { - Logger.WriteToLog($"Found {count} attachment{(count == 1 ? string.Empty : "s")}, removing winmail.dat and adding {(count == 1 ? "this attachment" : "these attachments")}"); - - foreach (var attachment in attachments) - { - var temp = new MessagePart(attachment); - leafAnswer.Add(temp); - } - } + Logger.WriteToLog($"Found {count} attachment{(count == 1 ? string.Empty : "s")}, removing winmail.dat and adding {(count == 1 ? "this attachment" : "these attachments")}"); + leafAnswer.AddRange(attachments.Select(attachment => new MessagePart(attachment))); } } - catch (Exception exception) - { - Logger.WriteToLog($"Could not parse winmail.dat attachment, error: {ExceptionHelpers.GetInnerException(exception)}"); - leafAnswer.Add(messagePart); - } } - else + catch (Exception exception) + { + Logger.WriteToLog($"Could not parse winmail.dat attachment, error: {ExceptionHelpers.GetInnerException(exception)}"); leafAnswer.Add(messagePart); + } } + else + leafAnswer.Add(messagePart); return leafAnswer; }