-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
58cbe30
commit d05f3f0
Showing
5 changed files
with
284 additions
and
2 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,237 @@ | ||
package com.github.catvod.spider; | ||
|
||
import com.github.catvod.bean.Class; | ||
import com.github.catvod.bean.Filter; | ||
import com.github.catvod.bean.Result; | ||
import com.github.catvod.bean.Vod; | ||
import com.github.catvod.crawler.Spider; | ||
import com.github.catvod.net.OkHttp; | ||
import com.github.catvod.utils.Utils; | ||
import okhttp3.FormBody; | ||
import okhttp3.Request; | ||
import okhttp3.RequestBody; | ||
import okhttp3.Response; | ||
import org.apache.commons.lang3.StringUtils; | ||
import org.jsoup.Jsoup; | ||
import org.jsoup.nodes.Document; | ||
import org.jsoup.nodes.Element; | ||
import org.jsoup.select.Elements; | ||
|
||
import java.util.*; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
|
||
/** | ||
* @author zhixc | ||
* 新版6V电影网 | ||
*/ | ||
public class Xb6v extends Spider { | ||
|
||
private final String siteUrl = Utils.base64Decode("aHR0cHM6Ly93d3cueGI2di5jb20="); | ||
private String nextSearchUrlPrefix; | ||
private String nextSearchUrlSuffix; | ||
|
||
private Map<String, String> getHeader() { | ||
Map<String, String> header = new HashMap<>(); | ||
header.put("User-Agent", Utils.CHROME); | ||
header.put("Referer", siteUrl + "/"); | ||
return header; | ||
} | ||
|
||
private Map<String, String> getDetailHeader() { | ||
Map<String, String> header = new HashMap<>(); | ||
header.put("User-Agent", Utils.CHROME); | ||
return header; | ||
} | ||
|
||
@Override | ||
public String homeContent(boolean filter) throws Exception { | ||
List<Class> classes = new ArrayList<>(); | ||
String html = OkHttp.string(siteUrl, getHeader()); | ||
Document doc = Jsoup.parse(html); | ||
Elements elements = doc.select("#menus > li > a"); | ||
LinkedHashMap<String, List<Filter>> filters = new LinkedHashMap<>(); | ||
for (int i = 0; i < elements.size(); i++) { | ||
if (i < 2 || i == elements.size() - 1) continue; | ||
Element e = elements.get(i); | ||
String typeId = e.attr("href"); | ||
String typeName = e.text(); | ||
if (typeName.equals("电视剧")) { | ||
List<Filter.Value> values = new ArrayList<>(); | ||
values.add(new Filter.Value("不限", "")); | ||
for (Element a : e.nextElementSibling().select("a")) { | ||
values.add(new Filter.Value(a.text(), a.attr("href").replaceAll(typeId, ""))); | ||
} | ||
List<Filter> filterList = new ArrayList<>(); | ||
filterList.add(new Filter("cateId", "类型", values)); | ||
filters.put(typeId, filterList); | ||
} | ||
classes.add(new Class(typeId, typeName)); | ||
} | ||
return Result.string(classes, parseVodListFromDoc(doc), filters); | ||
} | ||
|
||
private List<Vod> parseVodListFromDoc(String html) { | ||
return parseVodListFromDoc(Jsoup.parse(html)); | ||
} | ||
|
||
private List<Vod> parseVodListFromDoc(Document doc) { | ||
Elements items = doc.select("#post_container .post_hover"); | ||
List<Vod> list = new ArrayList<>(); | ||
for (Element item : items) { | ||
Element element = item.select("[class=zoom]").get(0); | ||
String vodId = element.attr("href"); | ||
String name = element.attr("title").replaceAll("</?[^>]+>", ""); | ||
String pic = element.select("img").attr("src"); | ||
String remark = item.select("[rel=category tag]").text(); | ||
list.add(new Vod(vodId, name, pic, remark)); | ||
} | ||
return list; | ||
} | ||
|
||
@Override | ||
public String categoryContent(String tid, String pg, boolean filter, HashMap<String, String> extend) throws Exception { | ||
String cateId = extend.get("cateId") == null ? "" : extend.get("cateId"); | ||
String cateUrl = siteUrl + tid + cateId; | ||
if (!pg.equals("1")) cateUrl += "index_" + pg + ".html"; | ||
String html = OkHttp.string(cateUrl, getHeader()); | ||
Document doc = Jsoup.parse(html); | ||
String href = doc.select(".pagination > a").last().attr("href"); | ||
int page = Integer.parseInt(pg); | ||
int count = Integer.parseInt(getStrByRegex(Pattern.compile("index_(.*?).html"), href)); | ||
int limit = 18; | ||
Elements items = doc.select("#post_container .post_hover"); | ||
int total = page == count ? (page - 1) * limit + items.size() : count * limit; | ||
return Result.get().vod(parseVodListFromDoc(doc)).page(page, count, limit, total).string(); | ||
} | ||
|
||
@Override | ||
public String detailContent(List<String> ids) throws Exception { | ||
String vodId = ids.get(0); | ||
String detailUrl = siteUrl + vodId; | ||
String html = OkHttp.string(detailUrl, getDetailHeader()); | ||
Document doc = Jsoup.parse(html); | ||
Elements sourceList = doc.select("#post_content"); | ||
|
||
String circuitName = "磁力线路"; | ||
Map<String, String> playMap = new LinkedHashMap<>(); | ||
int i = 0; | ||
for (Element source : sourceList) { | ||
Elements aList = source.select("table a"); | ||
List<String> vodItems = new ArrayList<>(); | ||
for (Element a : aList) { | ||
String episodeUrl = a.attr("href"); | ||
String episodeName = a.text(); | ||
if (!episodeUrl.toLowerCase().startsWith("magnet")) continue; | ||
vodItems.add(episodeName + "$" + episodeUrl); | ||
} | ||
if (vodItems.size() > 0) { | ||
i++; | ||
playMap.put(circuitName + i, StringUtils.join(vodItems, "#")); | ||
} | ||
} | ||
|
||
String partHTML = doc.select(".context").html(); | ||
String name = doc.select(".article_container > h1").text(); | ||
String pic = doc.select("#post_content img").attr("src"); | ||
String typeName = getStrByRegex(Pattern.compile("◎类 别 (.*?)<br>"), partHTML); | ||
if (typeName.equals("")) typeName = doc.select("[rel=category tag]").text(); | ||
String year = getStrByRegex(Pattern.compile("◎年 代 (.*?)<br>"), partHTML); | ||
if (year.equals("")) year = getStrByRegex(Pattern.compile("首播:(.*?)<br>"), partHTML); | ||
String area = getStrByRegex(Pattern.compile("◎产 地 (.*?)<br>"), partHTML); | ||
if (area.equals("")) area = getStrByRegex(Pattern.compile("地区:(.*?)<br>"), partHTML); | ||
String remark = getStrByRegex(Pattern.compile("◎上映日期 (.*?)<br>"), partHTML); | ||
String actor = getActorOrDirector(Pattern.compile("◎演 员 (.*?)</p>"), partHTML); | ||
if (actor.equals("")) actor = getActorOrDirector(Pattern.compile("◎主 演 (.*?)</p>"), partHTML); | ||
if (actor.equals("")) actor = getActorOrDirector(Pattern.compile("主演:(.*?)<br>"), partHTML); | ||
String director = getActorOrDirector(Pattern.compile("◎导 演 (.*?)<br>"), partHTML); | ||
if (director.equals("")) director = getActorOrDirector(Pattern.compile("导演:(.*?)<br>"), partHTML); | ||
String description = getDescription(Pattern.compile("◎简 介(.*?)<hr>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL), partHTML); | ||
if (description.equals("")) description = getDescription(Pattern.compile("简介(.*?)</p>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL), partHTML); | ||
|
||
Vod vod = new Vod(); | ||
vod.setVodId(ids.get(0)); | ||
vod.setVodName(name); | ||
vod.setVodPic(pic); | ||
vod.setTypeName(typeName); | ||
vod.setVodYear(year); | ||
vod.setVodArea(area); | ||
vod.setVodRemarks(remark); | ||
vod.setVodActor(actor); | ||
vod.setVodDirector(director); | ||
vod.setVodContent(description); | ||
vod.setVodPlayFrom(StringUtils.join(playMap.keySet(), "$$$")); | ||
vod.setVodPlayUrl(StringUtils.join(playMap.values(), "$$$")); | ||
|
||
return Result.string(vod); | ||
} | ||
|
||
private String getStrByRegex(Pattern pattern, String str) { | ||
Matcher matcher = pattern.matcher(str); | ||
if (matcher.find()) return matcher.group(1).trim(); | ||
return ""; | ||
} | ||
|
||
private String getActorOrDirector(Pattern pattern, String str) { | ||
return getStrByRegex(pattern, str) | ||
.replaceAll("<br>", "") | ||
.replaceAll(" ", "") | ||
.replaceAll("&", "") | ||
.replaceAll("middot;", "・") | ||
.replaceAll(" ", ",") | ||
.replaceAll(" ", ",") | ||
.replaceAll(" ", ""); | ||
} | ||
|
||
private String getDescription(Pattern pattern, String str) { | ||
return getStrByRegex(pattern, str) | ||
.replaceAll("</?[^>]+>", "") | ||
.replaceAll("\n", "") | ||
.replaceAll("&", "") | ||
.replaceAll("middot;", "・") | ||
.replaceAll("ldquo;", "【") | ||
.replaceAll("rdquo;", "】") | ||
.replaceAll(" ", ""); | ||
} | ||
|
||
@Override | ||
public String searchContent(String key, boolean quick) throws Exception { | ||
return searchContent(key, quick, "1"); | ||
} | ||
|
||
@Override | ||
public String searchContent(String key, boolean quick, String pg) throws Exception { | ||
String searchUrl = siteUrl + Utils.base64Decode("L2Uvc2VhcmNoLzFpbmRleC5waHA="); | ||
if (pg.equals("1")) { | ||
RequestBody formBody = new FormBody.Builder() | ||
.add("show", "title") | ||
.add("tempid", "1") | ||
.add("tbname", "article") | ||
.add("mid", "1") | ||
.add("dopost", "search") | ||
.add("submit", "") | ||
.addEncoded("keyboard", key) | ||
.build(); | ||
Request request = new Request.Builder().url(searchUrl) | ||
.addHeader("User-Agent", Utils.CHROME) | ||
.addHeader("Origin", siteUrl) | ||
.addHeader("Referer", siteUrl + "/") | ||
.post(formBody) | ||
.build(); | ||
Response response = OkHttp.newCall(request); | ||
String[] split = String.valueOf(response.request().url()).split("\\?searchid="); | ||
nextSearchUrlPrefix = split[0] + "index.php?page="; | ||
nextSearchUrlSuffix = "&searchid=" + split[1]; | ||
return Result.string(parseVodListFromDoc(response.body().string())); | ||
} else { | ||
int page = Integer.parseInt(pg) - 1; | ||
searchUrl = nextSearchUrlPrefix + page + nextSearchUrlSuffix; | ||
return Result.string(parseVodListFromDoc(OkHttp.string(searchUrl, getHeader()))); | ||
} | ||
} | ||
|
||
@Override | ||
public String playerContent(String flag, String id, List<String> vipFlags) throws Exception { | ||
return Result.get().url(id).string(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import com.github.catvod.spider.Xb6v | ||
import common.TestInterface | ||
import org.junit.jupiter.api.Test | ||
|
||
class Xb6vTest: TestInterface<Xb6v> { | ||
override var t: Xb6v | ||
get() = Xb6v() | ||
set(value) {} | ||
|
||
@Test | ||
override fun homeTest() { | ||
val homeContent = t.homeContent(false) | ||
assert(homeContent) | ||
} | ||
|
||
@Test | ||
override fun cateTest() { | ||
val categoryContent = t.categoryContent("/xijupian/", "1", false, hashMapOf()) | ||
assert(categoryContent) | ||
} | ||
|
||
@Test | ||
override fun detailTest() { | ||
val detailContent = t.detailContent(listOf("/xijupian/25112.html")) | ||
assert(detailContent) | ||
} | ||
|
||
override fun playTest() { | ||
} | ||
|
||
@Test | ||
override fun searchTest() { | ||
val searchContent = t.searchContent("阿凡达", false) | ||
assert(searchContent) | ||
} | ||
} |