From c9046a2448bb6f4e08a8024d2b6637e2c12c23e5 Mon Sep 17 00:00:00 2001 From: Farid Bagishev Date: Wed, 7 Oct 2020 16:08:16 +0500 Subject: [PATCH] Convert RTF to HTML respecting original code page --- lib/mapi/mime.rb | 10 +++++++--- lib/mapi/rtf.rb | 16 +++++++++++----- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/lib/mapi/mime.rb b/lib/mapi/mime.rb index 3271de8..3cf5e6f 100644 --- a/lib/mapi/mime.rb +++ b/lib/mapi/mime.rb @@ -69,6 +69,10 @@ def initialize str, ignore_body=false end end + def encode(x) + x.encoding == Encoding::UTF_8 ? x : x.encode('utf-8', 'cp1252') + end + def multipart? @content_type && @content_type =~ /^multipart/ ? true : false end @@ -97,7 +101,7 @@ def to_s opts={} opts = {:boundary_counter => 0}.merge opts if multipart? boundary = Mime.make_boundary opts[:boundary_counter] += 1, self - @body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue]. + @body = [encode(preamble), parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + encode(epilogue)]. flatten.join("\r\n--" + boundary) content_type, attrs = Mime.split_header @headers['Content-Type'][0] attrs['boundary'] = boundary @@ -106,9 +110,9 @@ def to_s opts={} str = '' @headers.each do |key, vals| - vals.each { |val| str << "#{key}: #{val}\r\n" } + vals.each { |val| str << "#{encode(key)}: #{encode(val)}\r\n" } end - str << "\r\n" + @body + str << "\r\n" + encode(@body) end def self.split_header header diff --git a/lib/mapi/rtf.rb b/lib/mapi/rtf.rb index 4130066..e406049 100644 --- a/lib/mapi/rtf.rb +++ b/lib/mapi/rtf.rb @@ -122,7 +122,7 @@ def self.rtf2text str, format=:text end end - RTF_PREBUF = + RTF_PREBUF = "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \ "{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \ "\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \ @@ -175,7 +175,7 @@ def rtfdecompr data else # unknown magic number raise "Unknown compression type (magic number 0x%08x)" % magic end - + # not sure if its due to a bug in the above code. doesn't seem to be # in my tests, but sometimes there's a trailing null. we chomp it here, # which actually makes the resultant rtf smaller than its advertised @@ -189,7 +189,7 @@ def rtfdecompr data # # Returns +nil+ if it doesn't look like an rtf encapsulated rtf. # - # Some cases that the original didn't deal with have been patched up, eg from + # Some cases that the original didn't deal with have been patched up, eg from # this chunk, where there are tags outside of the htmlrtf ignore block. # # "{\\*\\htmltag116
}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84