Skip to content

Commit

Permalink
Convert RTF to HTML respecting original code page
Browse files Browse the repository at this point in the history
  • Loading branch information
faridco committed Nov 10, 2020
1 parent 34fb32e commit c9046a2
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 8 deletions.
10 changes: 7 additions & 3 deletions lib/mapi/mime.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def initialize str, ignore_body=false
end
end

def encode(x)
x.encoding == Encoding::UTF_8 ? x : x.encode('utf-8', 'cp1252')
end

def multipart?
@content_type && @content_type =~ /^multipart/ ? true : false
end
Expand Down Expand Up @@ -97,7 +101,7 @@ def to_s opts={}
opts = {:boundary_counter => 0}.merge opts
if multipart?
boundary = Mime.make_boundary opts[:boundary_counter] += 1, self
@body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue].
@body = [encode(preamble), parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + encode(epilogue)].
flatten.join("\r\n--" + boundary)
content_type, attrs = Mime.split_header @headers['Content-Type'][0]
attrs['boundary'] = boundary
Expand All @@ -106,9 +110,9 @@ def to_s opts={}

str = ''
@headers.each do |key, vals|
vals.each { |val| str << "#{key}: #{val}\r\n" }
vals.each { |val| str << "#{encode(key)}: #{encode(val)}\r\n" }
end
str << "\r\n" + @body
str << "\r\n" + encode(@body)
end

def self.split_header header
Expand Down
16 changes: 11 additions & 5 deletions lib/mapi/rtf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def self.rtf2text str, format=:text
end
end

RTF_PREBUF =
RTF_PREBUF =
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
"{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
"\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
Expand Down Expand Up @@ -175,7 +175,7 @@ def rtfdecompr data
else # unknown magic number
raise "Unknown compression type (magic number 0x%08x)" % magic
end

# not sure if its due to a bug in the above code. doesn't seem to be
# in my tests, but sometimes there's a trailing null. we chomp it here,
# which actually makes the resultant rtf smaller than its advertised
Expand All @@ -189,7 +189,7 @@ def rtfdecompr data
#
# Returns +nil+ if it doesn't look like an rtf encapsulated rtf.
#
# Some cases that the original didn't deal with have been patched up, eg from
# Some cases that the original didn't deal with have been patched up, eg from
# this chunk, where there are tags outside of the htmlrtf ignore block.
#
# "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..."
Expand Down Expand Up @@ -229,8 +229,14 @@ def rtfdecompr data
def rtf2html rtf
scan = StringScanner.new rtf
# require \fromhtml. is this worth keeping? apparently you see \\fromtext if it
# was converted from plain text.
# was converted from plain text.
return nil unless rtf["\\fromhtml"]
if scan.scan_until(/\\ansicpg/)
code_page = "cp" + scan.scan(/\d+/)
scan.pos = 0
else
code_page = 'ascii'
end
html = ''
ignore_tag = nil
# skip up to the first htmltag. return nil if we don't ever find one
Expand Down Expand Up @@ -270,7 +276,7 @@ def rtf2html rtf
p :wtf
end
end
html.strip.empty? ? nil : html
html.strip.empty? ? nil : html.encode('utf-8', code_page)
end

module_function :rtf2html, :rtfdecompr
Expand Down

0 comments on commit c9046a2

Please sign in to comment.