Skip to content

Commit

Permalink
Fix image caption parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
Nandaka committed Aug 6, 2016
1 parent c2424e9 commit 0c6845a
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions PixivModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,21 +346,21 @@ def ParseInfo(self, page):
else:
self.imageTitle = tempTitle.string
break
tempCaptions = page.findAll('p', attrs={'class':'caption'})
for tempCaption in tempCaptions:
if tempCaption is None or tempCaption.string is None:

descriptionPara = page.findAll("p", attrs={'class': 'caption'})
for tempCaption in descriptionPara:
if tempCaption is None or tempCaption.text is None:
continue
elif len(tempCaption.string) == 0:
elif len(tempCaption.text.strip()) == 0:
continue
else:
self.imageCaption = tempCaption.string
break
self.imageCaption = tempCaption.text
#break

self.jd_rtv = int(page.find(attrs={'class': 'view-count'}).string)
self.jd_rtc = int(page.find(attrs={'class': 'rated-count'}).string)
self.jd_rtt = int(page.find(attrs={'class': 'score-count'}).string)

descriptionPara = page.findAll("p", attrs={'class': 'caption'})
if descriptionPara is not None and len(descriptionPara) > 0:
for para in descriptionPara:
links = para.findAll("a")
Expand Down

0 comments on commit 0c6845a

Please sign in to comment.