Skip to content

Commit

Permalink
testregexmarkdown.py OK
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandre-mbm committed Jul 15, 2015
1 parent 8816553 commit d12c9ee
Showing 1 changed file with 3 additions and 22 deletions.
25 changes: 3 additions & 22 deletions paicemana/testregexmarkdown.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import re
import html2text # TODO in setup.py
import lxml.html # TODO in setup.py
import markdown2 # TODO in setup.py


src = """23.06.‒29.06.2015
Expand All @@ -22,30 +19,14 @@
## Mapeamento"""


out = re.findall(
r'\[caption.*caption\]',
src,
flags = re.MULTILINE + re.DOTALL
)[0]

html = markdown2.markdown(out)
html = lxml.html.fromstring(html)

out = lxml.html.tostring(html).decode("utf-8")

out = re.sub(r'\%0A', '', out)
out = re.sub(r'\n', ' ', out)
out = re.sub(r'(\(http[^\)]*) ', r'\1', out)

html2text.BODY_WIDTH = 0 # don't work in Python 3

a1 = r'\[caption id="attachment_.*" align="alignnone" width="640"\]'
a2 = r'\[\/caption]'

out = re.sub(a1, '', out)
out = re.sub(a2, '', out)

#print(out)

markdown = html2text.html2text(out)

print(markdown)
print(out)

0 comments on commit d12c9ee

Please sign in to comment.