Skip to content

Commit

Permalink
wordpressrxmlrpc.py: MarkdownDownload() ‒ ref. #13
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandre-mbm committed Jul 15, 2015
1 parent d12c9ee commit eaf2a65
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 33 deletions.
32 changes: 0 additions & 32 deletions paicemana/testregexmarkdown.py

This file was deleted.

55 changes: 54 additions & 1 deletion paicemana/wordpressrxmlrpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from wordpress_xmlrpc.methods import posts

import re
import html2text


def test():
Expand Down Expand Up @@ -89,7 +90,54 @@ def do_title(self):
return ExtractorPosting.do(self.post.title, self.lang)


if __name__ == "__main__":
# TODO a class MarkdownDownload() here, using the ExtractorPosting()

class MarkdownDownload(object):
"""Class to download text weeklyosm.eu"""

def __init__(self, user, password, archive, sync=False):
"""
@params
archive - number in permalink like www.weeklyosm.eu/archives/4205
sync - True for downloading the brazilian version already published
"""

client = Client('http://www.weeklyosm.eu/xmlrpc.php', user, password)
self.post = client.call(posts.GetPost(archive))

lang = 'en' if not sync else 'pt'
extractor = ExtractorPosting(self.post, lang)
content = extractor.do_content()
markdown = html2text.html2text(content)

s = markdown
s = re.sub(r'^ *\*', '*', s, flags = re.MULTILINE)
s = re.sub(r'([^\n]\n)\*', r'\1\n*', s, flags = re.MULTILINE)
s = re.sub(r'…', '...', s)
s = re.sub(r'“', '"', s)
s = re.sub(r'”', '"', s)
markdown = s
#print(markdown)

caption = re.findall(
r'\[caption.*caption\]',
markdown,
flags = re.MULTILINE + re.DOTALL
)[0]

out = re.sub(r'\n', ' ', caption)
out = re.sub(r'(\(http[^\)]*) ', r'\1', out)

markdown = markdown.replace(caption, out)

self.filename = 'archive-%s.md' % archive
self.markdown = markdown

with open(self.filename, 'w') as text_file:
text_file.write(markdown)


def test_a():
#test()
post = MockPost()
#print(post)
Expand All @@ -103,3 +151,8 @@ def do_title(self):
print(extractor.do_title())
print(extractor.do_content())


if __name__ == "__main__":
#test_a()
MarkdownDownload('alexandre', 'SENHA', 4391, True)

0 comments on commit eaf2a65

Please sign in to comment.