Skip to content

Commit

Permalink
Fix issue #109
Browse files Browse the repository at this point in the history
  • Loading branch information
Nandaka committed Mar 19, 2016
1 parent 7a0350f commit c5a88be
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 13 deletions.
2 changes: 1 addition & 1 deletion PixivConstant.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# pylint: disable=I0011, C, C0302


PIXIVUTIL_VERSION = '20160308-beta1'
PIXIVUTIL_VERSION = '20160319'
PIXIVUTIL_LINK = 'https://nandaka.wordpress.com/tag/pixiv-downloader/'
PIXIV_URL = 'http://www.pixiv.net'
PIXIV_URL_SSL = 'https://www.secure.pixiv.net/login.php'
Expand Down
43 changes: 33 additions & 10 deletions PixivModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,8 @@ def IsNeedPermission(self, page):
def IsDeleted(self, page):
errorMessages = ['該当イラストは削除されたか、存在しないイラストIDです。|該当作品は削除されたか、存在しない作品IDです。',
'The following work is either deleted, or the ID does not exist.',
'This work was deleted.']
'This work was deleted.',
'Work has been deleted or the ID does not exist.']
return PixivHelper.HaveStrings(page, errorMessages)

def IsGuroDisabled(self, page):
Expand Down Expand Up @@ -317,16 +318,38 @@ def ParseInfo(self, page):
# remove premium-introduction-modal so we can get caption from work-info
# somehow selecting section doesn't works
premium_introduction_modal = page.findAll('div', attrs={'id': 'premium-introduction-modal'})
premium_introduction_modal.extend(page.findAll('div', attrs={'id': 'popular-search-trial-end-introduction-modal'}))
for modal in premium_introduction_modal:
modal.extract()

meta_data = page.findAll('meta')
for meta in meta_data:
if meta.has_key("property"):
if "og:title" == meta["property"]:
self.imageTitle = meta["content"].split("|")[0].strip()
if "og:description" in meta["property"]:
self.imageCaption = meta["content"]
if modal is not None:
modal.extract()

#meta_data = page.findAll('meta')
#for meta in meta_data:
# if meta.has_key("property"):
# if "og:title" == meta["property"]:
# self.imageTitle = meta["content"].split("|")[0].strip()
# if "og:description" in meta["property"]:
# self.imageCaption = meta["content"]

# new layout on 20160319
tempTitles = page.findAll('h1', attrs={'class':'title'})
for tempTitle in tempTitles:
if tempTitle is None or tempTitle.string is None:
continue
elif len(tempTitle.string) == 0:
continue
else:
self.imageTitle = tempTitle.string
break
tempCaptions = page.findAll('p', attrs={'class':'caption'})
for tempCaption in tempCaptions:
if tempCaption is None or tempCaption.string is None:
continue
elif len(tempCaption.string) == 0:
continue
else:
self.imageCaption = tempCaption.string
break

self.jd_rtv = int(page.find(attrs={'class': 'view-count'}).string)
self.jd_rtc = int(page.find(attrs={'class': 'rated-count'}).string)
Expand Down
3 changes: 2 additions & 1 deletion changelog.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
20160308-beta1
20160319
- Fix unicode decode error when downloading image.
- Implement #107: add option to specify different database.
- Update readme.txt
- Fix Issue #109: update parser.

20160211:
- Fix Issue #98: Fix error message when download fail.
Expand Down
2 changes: 1 addition & 1 deletion readme.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ Q6: httperror_seek_wrapper: HTTP Error 403: request disallowed by robots.txt
1 - Download by member_id
(required: followed by member_ids separated by space)
2 - Download by image_id
(required: folled by image_ids separated by space)
(required: followed by image_ids separated by space)
3 - Download by tags
(required: [y/n] for wildcard, start page, end page,
followed by tags)
Expand Down

0 comments on commit c5a88be

Please sign in to comment.