add 顶点小说网 23us.la支持

Quanwei1992 · Jan 6, 2020 · 107c4e9 · 107c4e9
1 parent f952aad
commit 107c4e9
Show file tree

Hide file tree

Showing 3 changed files with 312 additions and 9 deletions.
diff --git a/23us.la.go b/23us.la.go
@@ -0,0 +1,274 @@
+package main
+
+import (
+	"fmt"
+	"strings"
+	"sync"
+
+	"github.com/Aiicy/htmlquery"
+	pool "github.com/dgrr/goslaves"
+	"gopkg.in/schollz/progressbar.v2"
+)
+
+//参考地址，创建规则
+//https://www.23us.la/html/151/151850/ -> 罪域的骨终为王
+//https://www.23us.la/html/209/209550/ -> 文娱万岁
+//https://www.23us.la/html/113/113444/ -> 不朽凡人
+//顶点小说网 23us.la
+type Ebook23US struct {
+	Url string
+}
+
+func New23US() Ebook23US {
+	return Ebook23US{
+		Url: "https://www.23us.la",
+	}
+}
+
+func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {
+
+	var bi BookInfo
+	var volumes []Volume
+	var chapters []Chapter
+	pollURL := this.Url + "/" + "html/" + handleBookid(bookid) + "/"
+
+	//当 proxy 不为空的时候，表示设置代理
+	if proxy != "" {
+		doc, err := htmlquery.LoadURLWithProxy(pollURL, proxy)
+		if err != nil {
+			fmt.Println(err.Error())
+		}
+
+		//获取书名字
+		bookNameMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:book_name']")
+		bookName := htmlquery.SelectAttr(bookNameMeta, "content")
+		fmt.Println("书名 = ", bookName)
+
+		//获取书作者
+		AuthorMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:author']")
+		author := htmlquery.SelectAttr(AuthorMeta, "content")
+		fmt.Println("作者 = ", author)
+
+		//获取书的描述信息
+		DescriptionMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:description']")
+		description := htmlquery.SelectAttr(DescriptionMeta, "content")
+		fmt.Println("简介 = ", description)
+
+		//获取书分卷信息
+		dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
+		testVolStr := htmlquery.InnerText(dtNode[1])
+		if TestContainVolume(testVolStr) {
+			bi.ChangeVolumeState(true)
+			if len(dtNode) == 2 { //就是说刚好两个节点，我们要去除第一个，只保留第二个
+				var tmp Volume
+				tmp.CurrentVolume = htmlquery.InnerText(dtNode[1])
+				volumes = append(volumes, tmp)
+			} else { //当len(dtNode) >= 3
+				for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分，需要去掉
+					var tmp Volume
+					//tmp.PrevChapterId =
+					PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点，查找上一个dd节点
+					aNode, _ := htmlquery.Find(PrevChapter, "//a")
+					tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
+					tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
+					tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])
+					volumes = append(volumes, tmp)
+				}
+			}
+			volumes[0].PrevChapterId = 0      //第一分卷，前面的章节，设置为0
+			volumes[0].PrevChapter.Link = ""  //第一分卷，前面的章节，连接设置为空
+			volumes[0].PrevChapter.Title = "" //第一分卷，前面的章节，标题设置为空
+		}
+		//获取书章节列表
+		ddNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dd")
+		for i := 0; i < len(ddNode); i++ {
+			var tmp Chapter
+			aNode, _ := htmlquery.Find(ddNode[i], "//a")
+			tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
+			tmp.Title = htmlquery.InnerText(aNode[0])
+			if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
+				for index := 1; index < len(volumes); index++ { //第二个分卷开始，前面就有章节内容了
+					if volumes[index].PrevChapter.Link == tmp.Link {
+						volumes[index].PrevChapterId = i
+					}
+				}
+			}
+			chapters = append(chapters, tmp)
+		}
+
+		//导入信息
+		bi = BookInfo{
+			Name:        bookName,
+			Author:      author,
+			Description: description,
+			Volumes:     volumes,
+			Chapters:    chapters,
+		}
+	} else { //没有设置代理
+		doc, err := htmlquery.LoadURL(pollURL)
+		if err != nil {
+			fmt.Println(err.Error())
+		}
+
+		//获取书名字
+		bookNameMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:book_name']")
+		bookName := htmlquery.SelectAttr(bookNameMeta, "content")
+		fmt.Println("书名 = ", bookName)
+
+		//获取书作者
+		AuthorMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:author']")
+		author := htmlquery.SelectAttr(AuthorMeta, "content")
+		fmt.Println("作者 = ", author)
+
+		//获取书的描述信息
+		DescriptionMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:description']")
+		description := htmlquery.SelectAttr(DescriptionMeta, "content")
+		fmt.Println("简介 = ", description)
+
+		//获取书分卷信息
+		dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
+		testVolStr := htmlquery.InnerText(dtNode[1])
+		if TestContainVolume(testVolStr) {
+			bi.ChangeVolumeState(true)
+			if len(dtNode) == 2 { //就是说刚好两个节点，我们要去除第一个，只保留第二个
+				var tmp Volume
+				tmp.CurrentVolume = htmlquery.InnerText(dtNode[1])
+				volumes = append(volumes, tmp)
+			} else { //当len(dtNode) >= 3
+				for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分，需要去掉
+					var tmp Volume
+					//tmp.PrevChapterId =
+					PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点，查找上一个dd节点
+					aNode, _ := htmlquery.Find(PrevChapter, "//a")
+					tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
+					tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
+					tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])
+					volumes = append(volumes, tmp)
+				}
+			}
+			volumes[0].PrevChapterId = 0      //第一分卷，前面的章节，设置为0
+			volumes[0].PrevChapter.Link = ""  //第一分卷，前面的章节，连接设置为空
+			volumes[0].PrevChapter.Title = "" //第一分卷，前面的章节，标题设置为空
+		}
+		//获取书章节列表
+		ddNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dd")
+		for i := 12; i < len(ddNode); i++ { //因为前面的12个ddNode为显示最新的12章，与后面的会重复，所以直接Drop
+			var tmp Chapter
+			aNode, _ := htmlquery.Find(ddNode[i], "//a")
+			tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
+			tmp.Title = htmlquery.InnerText(aNode[0])
+
+			if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
+				for index := 1; index < len(volumes); index++ { //第二个分卷开始，前面就有章节内容了
+					if volumes[index].PrevChapter.Link == tmp.Link {
+						volumes[index].PrevChapterId = i
+					}
+				}
+			}
+			chapters = append(chapters, tmp)
+		}
+
+		//导入信息
+		bi = BookInfo{
+			Name:        bookName,
+			Author:      author,
+			Description: description,
+			Volumes:     volumes,
+			Chapters:    chapters,
+		}
+	}
+	return bi
+}
+
+func (this Ebook23US) GetChapterContent(pc ProxyChapter) Chapter {
+	pollURL := pc.C.Link
+	proxy := pc.Proxy
+	var result Chapter
+
+	if proxy != "" {
+		doc, _ := htmlquery.LoadURLWithProxy(pollURL, proxy)
+		contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
+		contentText := htmlquery.InnerText(contentNode)
+
+		//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
+		tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)
+
+		//把 readx(); 替换成 ""
+		//tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)
+
+		//tmp = tmp + "\r\n"
+		//返回数据，填写Content内容
+		result = Chapter{
+			Title:   pc.C.Title,
+			Link:    pc.C.Link,
+			Content: tmp,
+		}
+	} else {
+		doc, _ := htmlquery.LoadURL(pollURL)
+		contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
+		contentText := htmlquery.InnerText(contentNode)
+
+		//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
+		tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)
+
+		//把 readx(); 替换成 ""
+		//tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)
+
+		//tmp = tmp + "\r\n"
+		//返回数据，填写Content内容
+		result = Chapter{
+			Title:   pc.C.Title,
+			Link:    pc.C.Link,
+			Content: tmp,
+		}
+	}
+
+	return result
+}
+
+//根据每个章节的 url连接，下载每章对应的内容Content当中
+func (this Ebook23US) DownloadChapters(Bi BookInfo, proxy string) BookInfo {
+	chapters := Bi.Chapters
+	NumChapter := len(chapters)
+	ch := make(chan Chapter, 1)
+	locker := sync.Mutex{}
+	var bar *progressbar.ProgressBar
+
+	sp := pool.NewPool(0, func(obj interface{}) {
+		locker.Lock()
+		tmp := obj.(ProxyChapter)
+		content := this.GetChapterContent(tmp)
+		locker.Unlock()
+		ch <- content
+
+	})
+
+	go excuteServe(&sp, chapters, proxy)
+
+	//下载章节的时候显示进度条
+	bar = progressbar.New(NumChapter)
+	bar.RenderBlank()
+
+	for i := 0; i < len(chapters); {
+		select {
+		case c := <-ch:
+			chapters[i].Content = c.Content
+			i++
+		}
+		bar.Add(1)
+	}
+	sp.Close()
+
+	result := BookInfo{
+		Name:        Bi.Name,
+		Author:      Bi.Author,
+		Description: Bi.Description,
+		Chapters:    chapters,
+	}
+
+	return result
+}
+
+func TestContainVolume(src string) bool {
+	return !strings.Contains(src, "正文卷")
+}
diff --git a/README.md b/README.md
@@ -14,10 +14,11 @@
   .\ebookdownloader.exe --bookid=0_642 --txt #只生成txt文本
   .\ebookdownloader.exe --bookid=0_642 --mobi #只生成mobi电子书
   .\ebookdownloader.exe --bookid=0_642 --txt --mobi #生成txt 和 mobi
-    .\ebookdownloader.exe --bookid=0_642 --txt --awz3 #生成txt 和 awz3
+  .\ebookdownloader.exe --bookid=0_642 --txt --awz3 #生成txt 和 awz3
   .\ebookdownloader.exe --proxy="http://proxyip:proxyport" --bookid=0_642 --mobi #生成mobi电子书，在下载章节的过程中使用 Proxy
   .\ebookdownloader.exe --ebhost=xsbiquge.com --bookid=0_642 --txt --mobi #使用xsbiquge.com做为下载源，生成txt 和 mobi
-    .\ebookdownloader.exe --ebhost=999xs.com --bookid=0_642 --txt --mobi #使用999xs.com做为下载源，生成txt 和 mobi
+  .\ebookdownloader.exe --ebhost=999xs.com --bookid=0_642 --txt --mobi #使用999xs.com做为下载源，生成txt 和 mobi
+  .\ebookdownloader.exe --ebhost=23us.la --bookid=127064 --pv #新功能，用于打印小说的分卷信息，此时不下载小说任何内容
   .\ebookdownloader.exe --help #显示帮助信息
   ```
 
@@ -32,6 +33,10 @@
 
   ## 更新日志
 
+      2020.01.06 go版本 更新
+                 1. 添加顶点小说 23us.la支持
+                 2. 初始支持把分卷信息写入相应的volumes结构体当中（还没有正式测试生成二级目录功能)
+                 
       2020.01.05 go版本 更新
                  1. 实现二级目录直接写入 tpl_*.html文件当中
                  2. 添加tpl/tpl_volume.html 用于生成目录分卷

diff --git a/ebookdl.go b/ebookdl.go
@@ -27,6 +27,7 @@ type BookInfo struct {
 
 type Volume struct {
 	PrevChapterId int
+	PrevChapter   Chapter
 	CurrentVolume string
 	NextChapterId int
 }
@@ -72,6 +73,16 @@ func (this *BookInfo) ChangeVolumeState(hasVolume bool) {
 	this.HasVolume = hasVolume
 }
 
+func (this BookInfo) PrintVolumeInfo() {
+	volumes := this.Volumes
+	for index := 0; index < len(volumes); index++ {
+		fmt.Printf("index = %d\n", index)
+		fmt.Printf("PrevChapterId= %d\n", volumes[index].PrevChapterId)
+		fmt.Printf("PrevChapter.Title = %s\n", volumes[index].PrevChapter.Title)
+		fmt.Printf("CurrentVolume = %s\n", volumes[index].CurrentVolume)
+	}
+}
+
 //生成txt电子书
 func (this BookInfo) GenerateTxt() {
 	chapters := this.Chapters //小说的章节信息
@@ -292,18 +303,22 @@ func EbookDownloader(c *cli.Context) error {
 	isTxt := c.Bool("txt")
 	isMobi := c.Bool("mobi")
 	isAwz3 := c.Bool("awz3")
+	isPV := c.Bool("printvolume") //打印分卷信息，只用做调试时使用
 
 	var bookinfo BookInfo              //初始化变量
 	var EBDLInterface EBookDLInterface //初始化接口
 	//isTxt 或者 isMobi必须一个为真，或者两个都为真
-	if (isTxt || isMobi || isAwz3) || (isTxt && isMobi) || (isTxt && isAwz3) {
+	if (isTxt || isMobi || isAwz3) || (isTxt && isMobi) || (isTxt && isAwz3) || isPV {
 
 		if ebhost == "xsbiquge.com" {
 			xsbiquge := NewXSBiquge()
 			EBDLInterface = xsbiquge //实例化接口
 		} else if ebhost == "999xs.com" {
 			xs999 := New999XS()
 			EBDLInterface = xs999 //实例化接口
+		} else if ebhost == "23us.la" {
+			xs23 := New23US()
+			EBDLInterface = xs23 //实例化接口
 		} else {
 			cli.ShowAppHelpAndExit(c, 0)
 			return nil
@@ -315,9 +330,14 @@ func EbookDownloader(c *cli.Context) error {
 		}
 		bookinfo = EBDLInterface.GetBookInfo(bookid, proxy)
 
-		//下载章节内容
-		fmt.Printf("正在下载电子书的相应章节，请耐心等待！\n")
-		bookinfo = EBDLInterface.DownloadChapters(bookinfo, proxy)
+		//打印分卷信息，只用于调试
+		if isPV {
+			bookinfo.PrintVolumeInfo()
+		} else {
+			//下载章节内容
+			fmt.Printf("正在下载电子书的相应章节，请耐心等待！\n")
+			bookinfo = EBDLInterface.DownloadChapters(bookinfo, proxy)
+		}
 		//生成txt文件
 		if isTxt {
 			fmt.Printf("\n正在生成txt版本的电子书，请耐心等待！\n")
@@ -358,17 +378,17 @@ func main() {
 		},
 	}
 	app.Copyright = "(c) 2019 - 2020 Jimes Yang<[email protected]>"
-	app.Usage = "用于下载 笔趣阁(https://www.xsbiquge.com),999小说网(https://www.999xs.com/) 上面的电子书，并保存为txt格式或者(mobi格式,awz3格式)的电子书"
+	app.Usage = "用于下载 笔趣阁(https://www.xsbiquge.com),999小说网(https://www.999xs.com/) ,顶点小说网(https://www.23us.la) 上面的电子书，并保存为txt格式或者(mobi格式,awz3格式)的电子书"
 	app.Action = EbookDownloader
 	app.Flags = []cli.Flag{
 		cli.StringFlag{
 			Name:  "ebhost",
 			Value: "xsbiquge.com",
-			Usage: "定义下载ebook的网站地址(可选择xsbiquge.com,999xs.com)",
+			Usage: "定义下载ebook的网站地址(可选择xsbiquge.com,999xs.com,23us.la)",
 		},
 		cli.StringFlag{
 			Name:  "bookid,id",
-			Usage: "对应 笔趣阁id(https://www.xsbiquge.com/0_642/),其中0_642就是book_id;对应999小说网id(https://www.999xs.com/files/article/html/0/591/),其中591为book_id",
+			Usage: "对应笔趣阁id(https://www.xsbiquge.com/0_642/),其中0_642就是book_id;\n对应999小说网id(https://www.999xs.com/files/article/html/0/591/),其中591为book_id;\n对应顶点小说网id(https://www.23us.la/html/113/113444/),其中113444为bookid",
 		},
 		cli.StringFlag{
 			Name:  "proxy,p",
@@ -386,6 +406,10 @@ func main() {
 			Name:  "awz3",
 			Usage: "当使用的时候，生成awz3文件(不可与--mobi同时使用)",
 		},
+		cli.BoolFlag{
+			Name:  "printvolume,pv",
+			Usage: "打印分卷信息，只于调试时使用！(使用此功能的时候，不会下载章节内容)",
+		},
 	}
 
 	err := app.Run(os.Args)