Fix issue #109

Nandaka · Mar 19, 2016 · c5a88be · c5a88be
1 parent 7a0350f
commit c5a88be
Show file tree

Hide file tree

Showing 4 changed files with 37 additions and 13 deletions.
diff --git a/PixivConstant.py b/PixivConstant.py
@@ -2,7 +2,7 @@
 # pylint: disable=I0011, C, C0302
 
 
-PIXIVUTIL_VERSION = '20160308-beta1'
+PIXIVUTIL_VERSION = '20160319'
 PIXIVUTIL_LINK = 'https://nandaka.wordpress.com/tag/pixiv-downloader/'
 PIXIV_URL = 'http://www.pixiv.net'
 PIXIV_URL_SSL = 'https://www.secure.pixiv.net/login.php'

diff --git a/PixivModel.py b/PixivModel.py
@@ -271,7 +271,8 @@ def IsNeedPermission(self, page):
     def IsDeleted(self, page):
         errorMessages = ['該当イラストは削除されたか、存在しないイラストIDです。|該当作品は削除されたか、存在しない作品IDです。',
                          'The following work is either deleted, or the ID does not exist.',
-                         'This work was deleted.']
+                         'This work was deleted.',
+                         'Work has been deleted or the ID does not exist.']
         return PixivHelper.HaveStrings(page, errorMessages)
 
     def IsGuroDisabled(self, page):
@@ -317,16 +318,38 @@ def ParseInfo(self, page):
         # remove premium-introduction-modal so we can get caption from work-info
         # somehow selecting section doesn't works
         premium_introduction_modal = page.findAll('div', attrs={'id': 'premium-introduction-modal'})
+        premium_introduction_modal.extend(page.findAll('div', attrs={'id': 'popular-search-trial-end-introduction-modal'}))
         for modal in premium_introduction_modal:
-            modal.extract()
-
-        meta_data = page.findAll('meta')
-        for meta in meta_data:
-            if meta.has_key("property"):
-                if "og:title" == meta["property"]:
-                    self.imageTitle = meta["content"].split("|")[0].strip()
-                if "og:description" in meta["property"]:
-                    self.imageCaption = meta["content"]
+            if modal is not None:
+                modal.extract()
+
+        #meta_data = page.findAll('meta')
+        #for meta in meta_data:
+        #    if meta.has_key("property"):
+        #        if "og:title" == meta["property"]:
+        #            self.imageTitle = meta["content"].split("|")[0].strip()
+        #        if "og:description" in meta["property"]:
+        #            self.imageCaption = meta["content"]
+
+        # new layout on 20160319
+        tempTitles = page.findAll('h1', attrs={'class':'title'})
+        for tempTitle in tempTitles:
+            if tempTitle is None or tempTitle.string is None:
+                continue
+            elif len(tempTitle.string) == 0:
+                continue
+            else:
+                self.imageTitle = tempTitle.string
+                break
+        tempCaptions = page.findAll('p', attrs={'class':'caption'})
+        for tempCaption in tempCaptions:
+            if tempCaption is None or tempCaption.string is None:
+                continue
+            elif len(tempCaption.string) == 0:
+                continue
+            else:
+                self.imageCaption = tempCaption.string
+                break
 
         self.jd_rtv = int(page.find(attrs={'class': 'view-count'}).string)
         self.jd_rtc = int(page.find(attrs={'class': 'rated-count'}).string)

diff --git a/changelog.txt b/changelog.txt
@@ -1,7 +1,8 @@
-20160308-beta1
+20160319
 - Fix unicode decode error when downloading image.
 - Implement #107: add option to specify different database.
 - Update readme.txt
+- Fix Issue #109: update parser.
 
 20160211:
 - Fix Issue #98: Fix error message when download fail.

diff --git a/readme.txt b/readme.txt
@@ -169,7 +169,7 @@ Q6: httperror_seek_wrapper: HTTP Error 403: request disallowed by robots.txt
                         1 - Download by member_id 
                             (required: followed by member_ids separated by space)
                         2 - Download by image_id  
-                            (required: folled by image_ids separated by space)
+                            (required: followed by image_ids separated by space)
                         3 - Download by tags      
                             (required: [y/n] for wildcard, start page, end page, 
                              followed by tags)