Skip to content

Commit

Permalink
add 顶点小说网 23us.la支持
Browse files Browse the repository at this point in the history
  • Loading branch information
sndnvaps committed Jan 6, 2020
1 parent f952aad commit 107c4e9
Show file tree
Hide file tree
Showing 3 changed files with 312 additions and 9 deletions.
274 changes: 274 additions & 0 deletions 23us.la.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
package main

import (
"fmt"
"strings"
"sync"

"github.com/Aiicy/htmlquery"
pool "github.com/dgrr/goslaves"
"gopkg.in/schollz/progressbar.v2"
)

//参考地址,创建规则
//https://www.23us.la/html/151/151850/ -> 罪域的骨终为王
//https://www.23us.la/html/209/209550/ -> 文娱万岁
//https://www.23us.la/html/113/113444/ -> 不朽凡人
//顶点小说网 23us.la
type Ebook23US struct {
Url string
}

func New23US() Ebook23US {
return Ebook23US{
Url: "https://www.23us.la",
}
}

func (this Ebook23US) GetBookInfo(bookid string, proxy string) BookInfo {

var bi BookInfo
var volumes []Volume
var chapters []Chapter
pollURL := this.Url + "/" + "html/" + handleBookid(bookid) + "/"

//当 proxy 不为空的时候,表示设置代理
if proxy != "" {
doc, err := htmlquery.LoadURLWithProxy(pollURL, proxy)
if err != nil {
fmt.Println(err.Error())
}

//获取书名字
bookNameMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:book_name']")
bookName := htmlquery.SelectAttr(bookNameMeta, "content")
fmt.Println("书名 = ", bookName)

//获取书作者
AuthorMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:author']")
author := htmlquery.SelectAttr(AuthorMeta, "content")
fmt.Println("作者 = ", author)

//获取书的描述信息
DescriptionMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:description']")
description := htmlquery.SelectAttr(DescriptionMeta, "content")
fmt.Println("简介 = ", description)

//获取书分卷信息
dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
testVolStr := htmlquery.InnerText(dtNode[1])
if TestContainVolume(testVolStr) {
bi.ChangeVolumeState(true)
if len(dtNode) == 2 { //就是说刚好两个节点,我们要去除第一个,只保留第二个
var tmp Volume
tmp.CurrentVolume = htmlquery.InnerText(dtNode[1])
volumes = append(volumes, tmp)
} else { //当len(dtNode) >= 3
for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分,需要去掉
var tmp Volume
//tmp.PrevChapterId =
PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点,查找上一个dd节点
aNode, _ := htmlquery.Find(PrevChapter, "//a")
tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])
volumes = append(volumes, tmp)
}
}
volumes[0].PrevChapterId = 0 //第一分卷,前面的章节,设置为0
volumes[0].PrevChapter.Link = "" //第一分卷,前面的章节,连接设置为空
volumes[0].PrevChapter.Title = "" //第一分卷,前面的章节,标题设置为空
}
//获取书章节列表
ddNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dd")
for i := 0; i < len(ddNode); i++ {
var tmp Chapter
aNode, _ := htmlquery.Find(ddNode[i], "//a")
tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.Title = htmlquery.InnerText(aNode[0])
if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
for index := 1; index < len(volumes); index++ { //第二个分卷开始,前面就有章节内容了
if volumes[index].PrevChapter.Link == tmp.Link {
volumes[index].PrevChapterId = i
}
}
}
chapters = append(chapters, tmp)
}

//导入信息
bi = BookInfo{
Name: bookName,
Author: author,
Description: description,
Volumes: volumes,
Chapters: chapters,
}
} else { //没有设置代理
doc, err := htmlquery.LoadURL(pollURL)
if err != nil {
fmt.Println(err.Error())
}

//获取书名字
bookNameMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:book_name']")
bookName := htmlquery.SelectAttr(bookNameMeta, "content")
fmt.Println("书名 = ", bookName)

//获取书作者
AuthorMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:novel:author']")
author := htmlquery.SelectAttr(AuthorMeta, "content")
fmt.Println("作者 = ", author)

//获取书的描述信息
DescriptionMeta, _ := htmlquery.FindOne(doc, "//meta[@property='og:description']")
description := htmlquery.SelectAttr(DescriptionMeta, "content")
fmt.Println("简介 = ", description)

//获取书分卷信息
dtNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dt") //获取书分卷信息
testVolStr := htmlquery.InnerText(dtNode[1])
if TestContainVolume(testVolStr) {
bi.ChangeVolumeState(true)
if len(dtNode) == 2 { //就是说刚好两个节点,我们要去除第一个,只保留第二个
var tmp Volume
tmp.CurrentVolume = htmlquery.InnerText(dtNode[1])
volumes = append(volumes, tmp)
} else { //当len(dtNode) >= 3
for index := 1; index < len(dtNode); index++ { //因为第一个为 最新章节部分,需要去掉
var tmp Volume
//tmp.PrevChapterId =
PrevChapter, _ := htmlquery.FindOne(dtNode[index], "//preceding-sibling::dd[1]") // 根据当前节点,查找上一个dd节点
aNode, _ := htmlquery.Find(PrevChapter, "//a")
tmp.PrevChapter.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.PrevChapter.Title = htmlquery.InnerText(aNode[0])
tmp.CurrentVolume = htmlquery.InnerText(dtNode[index])
volumes = append(volumes, tmp)
}
}
volumes[0].PrevChapterId = 0 //第一分卷,前面的章节,设置为0
volumes[0].PrevChapter.Link = "" //第一分卷,前面的章节,连接设置为空
volumes[0].PrevChapter.Title = "" //第一分卷,前面的章节,标题设置为空
}
//获取书章节列表
ddNode, _ := htmlquery.Find(doc, "//dl[@class='chapterlist']//dd")
for i := 12; i < len(ddNode); i++ { //因为前面的12个ddNode为显示最新的12章,与后面的会重复,所以直接Drop
var tmp Chapter
aNode, _ := htmlquery.Find(ddNode[i], "//a")
tmp.Link = this.Url + htmlquery.SelectAttr(aNode[0], "href")
tmp.Title = htmlquery.InnerText(aNode[0])

if bi.HasVolume && len(volumes) >= 2 { //正式写入 PrevChapterId
for index := 1; index < len(volumes); index++ { //第二个分卷开始,前面就有章节内容了
if volumes[index].PrevChapter.Link == tmp.Link {
volumes[index].PrevChapterId = i
}
}
}
chapters = append(chapters, tmp)
}

//导入信息
bi = BookInfo{
Name: bookName,
Author: author,
Description: description,
Volumes: volumes,
Chapters: chapters,
}
}
return bi
}

func (this Ebook23US) GetChapterContent(pc ProxyChapter) Chapter {
pollURL := pc.C.Link
proxy := pc.Proxy
var result Chapter

if proxy != "" {
doc, _ := htmlquery.LoadURLWithProxy(pollURL, proxy)
contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
contentText := htmlquery.InnerText(contentNode)

//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)

//把 readx(); 替换成 ""
//tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)

//tmp = tmp + "\r\n"
//返回数据,填写Content内容
result = Chapter{
Title: pc.C.Title,
Link: pc.C.Link,
Content: tmp,
}
} else {
doc, _ := htmlquery.LoadURL(pollURL)
contentNode, _ := htmlquery.FindOne(doc, "//div[@id='content']")
contentText := htmlquery.InnerText(contentNode)

//替换字符串中的特殊字符 \xE3\x80\x80\xE3\x80\x80 为换行符 \n
tmp := strings.Replace(contentText, "\xE3\x80\x80\xE3\x80\x80", "\r\n", -1)

//把 readx(); 替换成 ""
//tmp = strings.Replace(tmp, "999小说更新最快 电脑端:https://www.999xs.com/", "", -1)

//tmp = tmp + "\r\n"
//返回数据,填写Content内容
result = Chapter{
Title: pc.C.Title,
Link: pc.C.Link,
Content: tmp,
}
}

return result
}

//根据每个章节的 url连接,下载每章对应的内容Content当中
func (this Ebook23US) DownloadChapters(Bi BookInfo, proxy string) BookInfo {
chapters := Bi.Chapters
NumChapter := len(chapters)
ch := make(chan Chapter, 1)
locker := sync.Mutex{}
var bar *progressbar.ProgressBar

sp := pool.NewPool(0, func(obj interface{}) {
locker.Lock()
tmp := obj.(ProxyChapter)
content := this.GetChapterContent(tmp)
locker.Unlock()
ch <- content

})

go excuteServe(&sp, chapters, proxy)

//下载章节的时候显示进度条
bar = progressbar.New(NumChapter)
bar.RenderBlank()

for i := 0; i < len(chapters); {
select {
case c := <-ch:
chapters[i].Content = c.Content
i++
}
bar.Add(1)
}
sp.Close()

result := BookInfo{
Name: Bi.Name,
Author: Bi.Author,
Description: Bi.Description,
Chapters: chapters,
}

return result
}

func TestContainVolume(src string) bool {
return !strings.Contains(src, "正文卷")
}
9 changes: 7 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@
.\ebookdownloader.exe --bookid=0_642 --txt #只生成txt文本
.\ebookdownloader.exe --bookid=0_642 --mobi #只生成mobi电子书
.\ebookdownloader.exe --bookid=0_642 --txt --mobi #生成txt 和 mobi
.\ebookdownloader.exe --bookid=0_642 --txt --awz3 #生成txt 和 awz3
.\ebookdownloader.exe --bookid=0_642 --txt --awz3 #生成txt 和 awz3
.\ebookdownloader.exe --proxy="http://proxyip:proxyport" --bookid=0_642 --mobi #生成mobi电子书,在下载章节的过程中使用 Proxy
.\ebookdownloader.exe --ebhost=xsbiquge.com --bookid=0_642 --txt --mobi #使用xsbiquge.com做为下载源,生成txt 和 mobi
.\ebookdownloader.exe --ebhost=999xs.com --bookid=0_642 --txt --mobi #使用999xs.com做为下载源,生成txt 和 mobi
.\ebookdownloader.exe --ebhost=999xs.com --bookid=0_642 --txt --mobi #使用999xs.com做为下载源,生成txt 和 mobi
.\ebookdownloader.exe --ebhost=23us.la --bookid=127064 --pv #新功能,用于打印小说的分卷信息,此时不下载小说任何内容
.\ebookdownloader.exe --help #显示帮助信息
```

Expand All @@ -32,6 +33,10 @@

## 更新日志

2020.01.06 go版本 更新
1. 添加顶点小说 23us.la支持
2. 初始支持把分卷信息写入相应的volumes结构体当中(还没有正式测试生成二级目录功能)
2020.01.05 go版本 更新
1. 实现二级目录直接写入 tpl_*.html文件当中
2. 添加tpl/tpl_volume.html 用于生成目录分卷
Expand Down
38 changes: 31 additions & 7 deletions ebookdl.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ type BookInfo struct {

type Volume struct {
PrevChapterId int
PrevChapter Chapter
CurrentVolume string
NextChapterId int
}
Expand Down Expand Up @@ -72,6 +73,16 @@ func (this *BookInfo) ChangeVolumeState(hasVolume bool) {
this.HasVolume = hasVolume
}

func (this BookInfo) PrintVolumeInfo() {
volumes := this.Volumes
for index := 0; index < len(volumes); index++ {
fmt.Printf("index = %d\n", index)
fmt.Printf("PrevChapterId= %d\n", volumes[index].PrevChapterId)
fmt.Printf("PrevChapter.Title = %s\n", volumes[index].PrevChapter.Title)
fmt.Printf("CurrentVolume = %s\n", volumes[index].CurrentVolume)
}
}

//生成txt电子书
func (this BookInfo) GenerateTxt() {
chapters := this.Chapters //小说的章节信息
Expand Down Expand Up @@ -292,18 +303,22 @@ func EbookDownloader(c *cli.Context) error {
isTxt := c.Bool("txt")
isMobi := c.Bool("mobi")
isAwz3 := c.Bool("awz3")
isPV := c.Bool("printvolume") //打印分卷信息,只用做调试时使用

var bookinfo BookInfo //初始化变量
var EBDLInterface EBookDLInterface //初始化接口
//isTxt 或者 isMobi必须一个为真,或者两个都为真
if (isTxt || isMobi || isAwz3) || (isTxt && isMobi) || (isTxt && isAwz3) {
if (isTxt || isMobi || isAwz3) || (isTxt && isMobi) || (isTxt && isAwz3) || isPV {

if ebhost == "xsbiquge.com" {
xsbiquge := NewXSBiquge()
EBDLInterface = xsbiquge //实例化接口
} else if ebhost == "999xs.com" {
xs999 := New999XS()
EBDLInterface = xs999 //实例化接口
} else if ebhost == "23us.la" {
xs23 := New23US()
EBDLInterface = xs23 //实例化接口
} else {
cli.ShowAppHelpAndExit(c, 0)
return nil
Expand All @@ -315,9 +330,14 @@ func EbookDownloader(c *cli.Context) error {
}
bookinfo = EBDLInterface.GetBookInfo(bookid, proxy)

//下载章节内容
fmt.Printf("正在下载电子书的相应章节,请耐心等待!\n")
bookinfo = EBDLInterface.DownloadChapters(bookinfo, proxy)
//打印分卷信息,只用于调试
if isPV {
bookinfo.PrintVolumeInfo()
} else {
//下载章节内容
fmt.Printf("正在下载电子书的相应章节,请耐心等待!\n")
bookinfo = EBDLInterface.DownloadChapters(bookinfo, proxy)
}
//生成txt文件
if isTxt {
fmt.Printf("\n正在生成txt版本的电子书,请耐心等待!\n")
Expand Down Expand Up @@ -358,17 +378,17 @@ func main() {
},
}
app.Copyright = "(c) 2019 - 2020 Jimes Yang<[email protected]>"
app.Usage = "用于下载 笔趣阁(https://www.xsbiquge.com),999小说网(https://www.999xs.com/) 上面的电子书,并保存为txt格式或者(mobi格式,awz3格式)的电子书"
app.Usage = "用于下载 笔趣阁(https://www.xsbiquge.com),999小说网(https://www.999xs.com/) ,顶点小说网(https://www.23us.la) 上面的电子书,并保存为txt格式或者(mobi格式,awz3格式)的电子书"
app.Action = EbookDownloader
app.Flags = []cli.Flag{
cli.StringFlag{
Name: "ebhost",
Value: "xsbiquge.com",
Usage: "定义下载ebook的网站地址(可选择xsbiquge.com,999xs.com)",
Usage: "定义下载ebook的网站地址(可选择xsbiquge.com,999xs.com,23us.la)",
},
cli.StringFlag{
Name: "bookid,id",
Usage: "对应 笔趣阁id(https://www.xsbiquge.com/0_642/),其中0_642就是book_id;对应999小说网id(https://www.999xs.com/files/article/html/0/591/),其中591为book_id",
Usage: "对应笔趣阁id(https://www.xsbiquge.com/0_642/),其中0_642就是book_id;\n对应999小说网id(https://www.999xs.com/files/article/html/0/591/),其中591为book_id;\n对应顶点小说网id(https://www.23us.la/html/113/113444/),其中113444为bookid",
},
cli.StringFlag{
Name: "proxy,p",
Expand All @@ -386,6 +406,10 @@ func main() {
Name: "awz3",
Usage: "当使用的时候,生成awz3文件(不可与--mobi同时使用)",
},
cli.BoolFlag{
Name: "printvolume,pv",
Usage: "打印分卷信息,只于调试时使用!(使用此功能的时候,不会下载章节内容)",
},
}

err := app.Run(os.Args)
Expand Down

0 comments on commit 107c4e9

Please sign in to comment.