From 107c4e91285c4187321820dd8f44e1f3172a17bf Mon Sep 17 00:00:00 2001 From: sndnvaps Date: Mon, 6 Jan 2020 23:51:17 +0800 Subject: [PATCH] =?UTF-8?q?add=20=E9=A1=B6=E7=82=B9=E5=B0=8F=E8=AF=B4?= =?UTF-8?q?=E7=BD=91=2023us.la=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 23us.la.go | 274 +++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 9 +- ebookdl.go | 38 ++++++-- 3 files changed, 312 insertions(+), 9 deletions(-) create mode 100644 23us.la.go diff --git a/23us.la.go b/23us.la.go new file mode 100644 index 0000000..ad7bd70 --- /dev/null +++ b/23us.la.go @@ -0,0 +1,274 @@ +package main + +import ( + "fmt" + "strings" + "sync" + + "github.com/Aiicy/htmlquery" + pool "github.com/dgrr/goslaves" + "gopkg.in/schollz/progressbar.v2" +) + +//参考地址,创建规则 +//https://www.23us.la/html/151/151850/ -> 罪域的骨终为王 +//https://www.23us.la/html/209/209550/ -> 文娱万岁 +//https://www.23us.la/html/113/113444/ -> 不朽凡人 +//顶点小说网 23us.la +type Ebook23US struct { + Url string +} + +func New23US() Ebook23US { + return Ebook23US{ + Url: "https://www.23us.la", + } +} + +func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo { + + var bi BookInfo + var volumes []Volume + var chapters []Chapter + pollURL := this.Url + "/" + "html/" + handleBookid(bookid) + "/" + + //当 proxy 不为空的时候,表示设置代理 + if proxy != "" { + doc, err := htmlquery.LoadURLWithProxy(pollURL, proxy) + if err != nil { + fmt.Println(err.Error()) + } + + //获取书名字 + bookNameMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:book_name']") + bookName := htmlquery.SelectAttr(bookNameMeta, "content") + fmt.Println("书名 = ", bookName) + + //获取书作者 + AuthorMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:author']") + author := htmlquery.SelectAttr(AuthorMeta, "content") + fmt.Println("作者 = ", author) + + //获取书的描述信息 + DescriptionMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:description']") + description := htmlquery.SelectAttr(DescriptionMeta, "content") + fmt.Println("简介 = ", description) + + //获取书分卷信息 + dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息 + testVolStr := htmlquery.InnerText(dtNode[1]) + if TestContainVolume(testVolStr) { + bi.ChangeVolumeState(true) + if len(dtNode) == 2 { //就是说刚好两个节点,我们要去除第一个,只保留第二个 + var tmp Volume + tmp.CurrentVolume = htmlquery.InnerText(dtNode[1]) + volumes = append(volumes, tmp) + } else { //当len(dtNode) >= 3 + for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分,需要去掉 + var tmp Volume + //tmp.PrevChapterId = + PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点,查找上一个dd节点 + aNode, _ := htmlquery.Find(PrevChapter, "//a") + tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href") + tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0]) + tmp.CurrentVolume = htmlquery.InnerText(dtNode[index]) + volumes = append(volumes, tmp) + } + } + volumes[0].PrevChapterId = 0 //第一分卷,前面的章节,设置为0 + volumes[0].PrevChapter.Link = "" //第一分卷,前面的章节,连接设置为空 + volumes[0].PrevChapter.Title = "" //第一分卷,前面的章节,标题设置为空 + } + //获取书章节列表 + ddNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dd") + for i := 0; i < len(ddNode); i++ { + var tmp Chapter + aNode, _ := htmlquery.Find(ddNode[i], "//a") + tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href") + tmp.Title = htmlquery.InnerText(aNode[0]) + if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId + for index := 1; index < len(volumes); index++ { //第二个分卷开始,前面就有章节内容了 + if volumes[index].PrevChapter.Link == tmp.Link { + volumes[index].PrevChapterId = i + } + } + } + chapters = append(chapters, tmp) + } + + //导入信息 + bi = BookInfo{ + Name: bookName, + Author: author, + Description: description, + Volumes: volumes, + Chapters: chapters, + } + } else { //没有设置代理 + doc, err := htmlquery.LoadURL(pollURL) + if err != nil { + fmt.Println(err.Error()) + } + + //获取书名字 + bookNameMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:book_name']") + bookName := htmlquery.SelectAttr(bookNameMeta, "content") + fmt.Println("书名 = ", bookName) + + //获取书作者 + AuthorMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:author']") + author := htmlquery.SelectAttr(AuthorMeta, "content") + fmt.Println("作者 = ", author) + + //获取书的描述信息 + DescriptionMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:description']") + description := htmlquery.SelectAttr(DescriptionMeta, "content") + fmt.Println("简介 = ", description) + + //获取书分卷信息 + dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息 + testVolStr := htmlquery.InnerText(dtNode[1]) + if TestContainVolume(testVolStr) { + bi.ChangeVolumeState(true) + if len(dtNode) == 2 { //就是说刚好两个节点,我们要去除第一个,只保留第二个 + var tmp Volume + tmp.CurrentVolume = htmlquery.InnerText(dtNode[1]) + volumes = append(volumes, tmp) + } else { //当len(dtNode) >= 3 + for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分,需要去掉 + var tmp Volume + //tmp.PrevChapterId = + PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点,查找上一个dd节点 + aNode, _ := htmlquery.Find(PrevChapter, "//a") + tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href") + tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0]) + tmp.CurrentVolume = htmlquery.InnerText(dtNode[index]) + volumes = append(volumes, tmp) + } + } + volumes[0].PrevChapterId = 0 //第一分卷,前面的章节,设置为0 + volumes[0].PrevChapter.Link = "" //第一分卷,前面的章节,连接设置为空 + volumes[0].PrevChapter.Title = "" //第一分卷,前面的章节,标题设置为空 + } + //获取书章节列表 + ddNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dd") + for i := 12; i < len(ddNode); i++ { //因为前面的12个ddNode为显示最新的12章,与后面的会重复,所以直接Drop + var tmp Chapter + aNode, _ := htmlquery.Find(ddNode[i], "//a") + tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href") + tmp.Title = htmlquery.InnerText(aNode[0]) + + if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId + for index := 1; index < len(volumes); index++ { //第二个分卷开始,前面就有章节内容了 + if volumes[index].PrevChapter.Link == tmp.Link { + volumes[index].PrevChapterId = i + } + } + } + chapters = append(chapters, tmp) + } + + //导入信息 + bi = BookInfo{ + Name: bookName, + Author: author, + Description: description, + Volumes: volumes, + Chapters: chapters, + } + } + return bi +} + +func (this Ebook23US) GetChapterContent(pc ProxyChapter) Chapter { + pollURL := pc.C.Link + proxy := pc.Proxy + var result Chapter + + if proxy != "" { + doc, _ := htmlquery.LoadURLWithProxy(pollURL, proxy) + contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']") + contentText := htmlquery.InnerText(contentNode) + + //替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n + tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1) + + //把 readx(); 替换成 "" + //tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1) + + //tmp = tmp + "\r\n" + //返回数据,填写Content内容 + result = Chapter{ + Title: pc.C.Title, + Link: pc.C.Link, + Content: tmp, + } + } else { + doc, _ := htmlquery.LoadURL(pollURL) + contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']") + contentText := htmlquery.InnerText(contentNode) + + //替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n + tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1) + + //把 readx(); 替换成 "" + //tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1) + + //tmp = tmp + "\r\n" + //返回数据,填写Content内容 + result = Chapter{ + Title: pc.C.Title, + Link: pc.C.Link, + Content: tmp, + } + } + + return result +} + +//根据每个章节的 url连接,下载每章对应的内容Content当中 +func (this Ebook23US) DownloadChapters(Bi BookInfo, proxy string) BookInfo { + chapters := Bi.Chapters + NumChapter := len(chapters) + ch := make(chan Chapter, 1) + locker := sync.Mutex{} + var bar *progressbar.ProgressBar + + sp := pool.NewPool(0, func(obj interface{}) { + locker.Lock() + tmp := obj.(ProxyChapter) + content := this.GetChapterContent(tmp) + locker.Unlock() + ch <- content + + }) + + go excuteServe(&sp, chapters, proxy) + + //下载章节的时候显示进度条 + bar = progressbar.New(NumChapter) + bar.RenderBlank() + + for i := 0; i < len(chapters); { + select { + case c := <-ch: + chapters[i].Content = c.Content + i++ + } + bar.Add(1) + } + sp.Close() + + result := BookInfo{ + Name: Bi.Name, + Author: Bi.Author, + Description: Bi.Description, + Chapters: chapters, + } + + return result +} + +func TestContainVolume(src string) bool { + return !strings.Contains(src, "正文卷") +} diff --git a/README.md b/README.md index aabcd2e..bada679 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,11 @@ .\ebookdownloader.exe --bookid=0_642 --txt #只生成txt文本 .\ebookdownloader.exe --bookid=0_642 --mobi #只生成mobi电子书 .\ebookdownloader.exe --bookid=0_642 --txt --mobi #生成txt 和 mobi - .\ebookdownloader.exe --bookid=0_642 --txt --awz3 #生成txt 和 awz3 + .\ebookdownloader.exe --bookid=0_642 --txt --awz3 #生成txt 和 awz3 .\ebookdownloader.exe --proxy="http://proxyip:proxyport" --bookid=0_642 --mobi #生成mobi电子书,在下载章节的过程中使用 Proxy .\ebookdownloader.exe --ebhost=xsbiquge.com --bookid=0_642 --txt --mobi #使用xsbiquge.com做为下载源,生成txt 和 mobi - .\ebookdownloader.exe --ebhost=999xs.com --bookid=0_642 --txt --mobi #使用999xs.com做为下载源,生成txt 和 mobi + .\ebookdownloader.exe --ebhost=999xs.com --bookid=0_642 --txt --mobi #使用999xs.com做为下载源,生成txt 和 mobi + .\ebookdownloader.exe --ebhost=23us.la --bookid=127064 --pv #新功能,用于打印小说的分卷信息,此时不下载小说任何内容 .\ebookdownloader.exe --help #显示帮助信息 ``` @@ -32,6 +33,10 @@ ## 更新日志 + 2020.01.06 go版本 更新 + 1. 添加顶点小说 23us.la支持 + 2. 初始支持把分卷信息写入相应的volumes结构体当中(还没有正式测试生成二级目录功能) + 2020.01.05 go版本 更新 1. 实现二级目录直接写入 tpl_*.html文件当中 2. 添加tpl/tpl_volume.html 用于生成目录分卷 diff --git a/ebookdl.go b/ebookdl.go index 05440d3..27b5905 100644 --- a/ebookdl.go +++ b/ebookdl.go @@ -27,6 +27,7 @@ type BookInfo struct { type Volume struct { PrevChapterId int + PrevChapter Chapter CurrentVolume string NextChapterId int } @@ -72,6 +73,16 @@ func (this *BookInfo) ChangeVolumeState(hasVolume bool) { this.HasVolume = hasVolume } +func (this BookInfo) PrintVolumeInfo() { + volumes := this.Volumes + for index := 0; index < len(volumes); index++ { + fmt.Printf("index = %d\n", index) + fmt.Printf("PrevChapterId= %d\n", volumes[index].PrevChapterId) + fmt.Printf("PrevChapter.Title = %s\n", volumes[index].PrevChapter.Title) + fmt.Printf("CurrentVolume = %s\n", volumes[index].CurrentVolume) + } +} + //生成txt电子书 func (this BookInfo) GenerateTxt() { chapters := this.Chapters //小说的章节信息 @@ -292,11 +303,12 @@ func EbookDownloader(c *cli.Context) error { isTxt := c.Bool("txt") isMobi := c.Bool("mobi") isAwz3 := c.Bool("awz3") + isPV := c.Bool("printvolume") //打印分卷信息,只用做调试时使用 var bookinfo BookInfo //初始化变量 var EBDLInterface EBookDLInterface //初始化接口 //isTxt 或者 isMobi必须一个为真,或者两个都为真 - if (isTxt || isMobi || isAwz3) || (isTxt && isMobi) || (isTxt && isAwz3) { + if (isTxt || isMobi || isAwz3) || (isTxt && isMobi) || (isTxt && isAwz3) || isPV { if ebhost == "xsbiquge.com" { xsbiquge := NewXSBiquge() @@ -304,6 +316,9 @@ func EbookDownloader(c *cli.Context) error { } else if ebhost == "999xs.com" { xs999 := New999XS() EBDLInterface = xs999 //实例化接口 + } else if ebhost == "23us.la" { + xs23 := New23US() + EBDLInterface = xs23 //实例化接口 } else { cli.ShowAppHelpAndExit(c, 0) return nil @@ -315,9 +330,14 @@ func EbookDownloader(c *cli.Context) error { } bookinfo = EBDLInterface.GetBookInfo(bookid, proxy) - //下载章节内容 - fmt.Printf("正在下载电子书的相应章节,请耐心等待!\n") - bookinfo = EBDLInterface.DownloadChapters(bookinfo, proxy) + //打印分卷信息,只用于调试 + if isPV { + bookinfo.PrintVolumeInfo() + } else { + //下载章节内容 + fmt.Printf("正在下载电子书的相应章节,请耐心等待!\n") + bookinfo = EBDLInterface.DownloadChapters(bookinfo, proxy) + } //生成txt文件 if isTxt { fmt.Printf("\n正在生成txt版本的电子书,请耐心等待!\n") @@ -358,17 +378,17 @@ func main() { }, } app.Copyright = "(c) 2019 - 2020 Jimes Yang" - app.Usage = "用于下载 笔趣阁(https://www.xsbiquge.com),999小说网(https://www.999xs.com/) 上面的电子书,并保存为txt格式或者(mobi格式,awz3格式)的电子书" + app.Usage = "用于下载 笔趣阁(https://www.xsbiquge.com),999小说网(https://www.999xs.com/) ,顶点小说网(https://www.23us.la) 上面的电子书,并保存为txt格式或者(mobi格式,awz3格式)的电子书" app.Action = EbookDownloader app.Flags = []cli.Flag{ cli.StringFlag{ Name: "ebhost", Value: "xsbiquge.com", - Usage: "定义下载ebook的网站地址(可选择xsbiquge.com,999xs.com)", + Usage: "定义下载ebook的网站地址(可选择xsbiquge.com,999xs.com,23us.la)", }, cli.StringFlag{ Name: "bookid,id", - Usage: "对应 笔趣阁id(https://www.xsbiquge.com/0_642/),其中0_642就是book_id;对应999小说网id(https://www.999xs.com/files/article/html/0/591/),其中591为book_id", + Usage: "对应笔趣阁id(https://www.xsbiquge.com/0_642/),其中0_642就是book_id;\n对应999小说网id(https://www.999xs.com/files/article/html/0/591/),其中591为book_id;\n对应顶点小说网id(https://www.23us.la/html/113/113444/),其中113444为bookid", }, cli.StringFlag{ Name: "proxy,p", @@ -386,6 +406,10 @@ func main() { Name: "awz3", Usage: "当使用的时候,生成awz3文件(不可与--mobi同时使用)", }, + cli.BoolFlag{ + Name: "printvolume,pv", + Usage: "打印分卷信息,只于调试时使用!(使用此功能的时候,不会下载章节内容)", + }, } err := app.Run(os.Args)