Skip to content

Commit

Permalink
fix excepsion handling on error 404
Browse files Browse the repository at this point in the history
  • Loading branch information
safirex committed Aug 13, 2022
1 parent 449366f commit acf035b
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 4 deletions.
5 changes: 3 additions & 2 deletions archive_updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@

def dev_tests():
# x = Novel('n6912eh', 'My Skills Are Too Strong to Be a Heroine')
x = Novel("1177354054882979595", "She Is a Quiet Girl, But a Noisy Telepath")
x = x.updateObject()
# x = Novel("1177354054882979595", "She Is a Quiet Girl, But a Noisy Telepath")
x = NovelPia(Novel('49942',"Omniscient First Person View "))
# x = x.updateObject()
print(type(x))
x.setLastChapter(0)
x.processNovel()
Expand Down
38 changes: 36 additions & 2 deletions src/Downloaders.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# coding: utf-8
from abc import ABC, abstractmethod
from urllib.error import HTTPError
import requests
import re
from bs4 import BeautifulSoup
Expand Down Expand Up @@ -174,6 +175,7 @@ def fetchTOCPage(self):
print()
rep = requests.get(url, headers=headers)
rep.encoding = 'utf-8'
rep.raise_for_status()
html = rep.text
self.html = html
return html
Expand All @@ -189,8 +191,11 @@ def parseTocResume(self, html=''):
def processNovel(self):
print("novel " + self.titre)
print('last chapter: ' + str(self.getLastChapter()))

html = self.fetchTOCPage();
try:
html = self.fetchTOCPage();
except requests.HTTPError :
print("can't acces the novel TOC page")
return ''
# get the number of chapters (solely for user feedback)
online_chapter_list = self.parseOnlineChapterList(html)
if (self.getLastChapter() == 0):
Expand Down Expand Up @@ -624,3 +629,32 @@ def connectViaMechanize(self, url):
content = resp.get_data()
soup = BeautifulSoup(content, 'html.parser')
return str(soup)

class NovelPia(Novel):
def __init__(self, novel):
super().__init__(novel.code, novel.titre, novel.keep_text_format)

def setUrl(self):
self.url = 'https://novelpia.com/novel//%s'%self.code

def fetchTOCPage(self):
from requests_html import HTMLSession
session = HTMLSession()
r = session.get(self.url)
r.html.render()
print(r.html.text)
# print(r.html.find('div'))

# list = r.html.find('#episode_list').text
# print(list)


def parseOnlineChapterList(self, html) -> list:
# print(html)
return super().parseOnlineChapterList(html)
def parseTitle(self, TocHTML) -> str:
return super().parseTitle(TocHTML)
def parseTocResume(self, html=''):
return super().parseTocResume(html)


0 comments on commit acf035b

Please sign in to comment.