Skip to content

Commit

Permalink
Option -s/--sync
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandre-mbm committed Jul 10, 2015
1 parent 28df87a commit 6dd0b22
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 10 deletions.
17 changes: 10 additions & 7 deletions paicemana/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,20 @@
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option('-g', '--archive', type=int,
help='Number in permalink like www.weeklyosm.eu/archives/4205')
def cli(archive):
@click.option('-s', '--sync', is_flag=True,
help='Downloads the brazilian version already published')
def cli(archive, sync):
"""A helper script for works at OSMBrasil/semanario"""
if not archive:
raise click.UsageError('try the -h/--help option')
try:
download = MarkdownDownload(archive)
analyzer = MarkdownAnalyzer(download.filename)
organizer = analyzer.getOrganizer()
translators = ['alexandre-mbm', 'jgpacker', 'vgeorge']
organizer.distribute_for(translators)
print('\n%s\n\n%s\n' % (organizer, organizer.scores()))
download = MarkdownDownload(archive, sync)
if not sync:
analyzer = MarkdownAnalyzer(download.filename)
organizer = analyzer.getOrganizer()
translators = ['alexandre-mbm', 'jgpacker', 'vgeorge']
organizer.distribute_for(translators)
print('\n%s\n\n%s\n' % (organizer, organizer.scores()))
except HTTPError as e:
click.echo(e)

12 changes: 9 additions & 3 deletions paicemana/textdownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,20 @@
class MarkdownDownload(object):
"""Class to download text weeklyosm.eu"""

def __init__(self, archive):
def __init__(self, archive, sync=False):
"""
@params
archive - number in permalink like www.weeklyosm.eu/archives/4205
sync - True for downloading the brazilian version already published
"""

self.url = 'http://www.weeklyosm.eu/archives/%s' % archive
lang = 'en' if not sync else 'pt'
self.url = 'http://www.weeklyosm.eu/%s/archives/%s' % (lang, archive)
self.page = html.fromstring(urllib.request.urlopen(self.url).read())

root = self.page.xpath('//article')[0]
etree.strip_tags(root,'div','span')
root_html = etree.tostring(root, pretty_print=True)
root_html = etree.tostring(root, encoding='utf-8', pretty_print=True, method='html')

markdown = html2text.html2text(root_html.decode('utf-8'))

Expand All @@ -29,7 +31,11 @@ def __init__(self, archive):
s = re.sub(r'\n', '\n\n', s)
s = re.sub(r'\n\n\n\n?', '\n\n', s)
s = re.sub(r'…', '...', s)
s = re.sub(r'“', '"', s)
s = re.sub(r'”', '"', s)
s = re.sub(r' \[\]\(.*\/OSMBrasil\/semanario.*\n\n.*\)', '', s)
s = s.split('### Share this:')[0]
s = s.split('### Compartilhe isso:')[0]
markdown = s
#print(markdown)

Expand Down

0 comments on commit 6dd0b22

Please sign in to comment.