From 96f098a5272f0dcb02916dd840bb2adfbe49e67f Mon Sep 17 00:00:00 2001
From: gzqx <24262751+gzqx@users.noreply.github.com>
Date: Wed, 6 Mar 2024 18:47:23 +0800
Subject: [PATCH] update makefile
---
.gitignore | 2 +
MYMETA.json | 4 +-
MYMETA.yml | 4 +-
Makefile | 76 ++++----
Makefile.PL | 6 -
blib/arch/.exists | 0
blib/arch/auto/stupid_rss_generate/.exists | 0
blib/bin/.exists | 0
blib/lib/.exists | 0
blib/lib/auto/stupid_rss_generate/.exists | 0
blib/lib/stupid_rss_generator.pl | 214 +++++++++++++++++++++
blib/man1/.exists | 0
blib/man3/.exists | 0
blib/script/.exists | 0
pm_to_blib | 0
15 files changed, 257 insertions(+), 49 deletions(-)
create mode 100644 blib/arch/.exists
create mode 100644 blib/arch/auto/stupid_rss_generate/.exists
create mode 100644 blib/bin/.exists
create mode 100644 blib/lib/.exists
create mode 100644 blib/lib/auto/stupid_rss_generate/.exists
create mode 100644 blib/lib/stupid_rss_generator.pl
create mode 100644 blib/man1/.exists
create mode 100644 blib/man3/.exists
create mode 100644 blib/script/.exists
create mode 100644 pm_to_blib
diff --git a/.gitignore b/.gitignore
index 18d4cb4..b887ece 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
record.yaml
rss/
rss/*
+MYMETA*
+Makefile
diff --git a/MYMETA.json b/MYMETA.json
index a320533..331e6d7 100644
--- a/MYMETA.json
+++ b/MYMETA.json
@@ -12,7 +12,7 @@
"url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",
"version" : 2
},
- "name" : "STUPID-RSS_GENERATOR",
+ "name" : "stupid_rss_generate",
"no_index" : {
"directory" : [
"t",
@@ -46,6 +46,6 @@
}
},
"release_status" : "stable",
- "version" : 0.01,
+ "version" : "",
"x_serialization_backend" : "JSON::PP version 4.16"
}
diff --git a/MYMETA.yml b/MYMETA.yml
index 318c443..a9e4823 100644
--- a/MYMETA.yml
+++ b/MYMETA.yml
@@ -12,7 +12,7 @@ license: unknown
meta-spec:
url: http://module-build.sourceforge.net/META-spec-v1.4.html
version: '1.4'
-name: STUPID-RSS_GENERATOR
+name: stupid_rss_generate
no_index:
directory:
- t
@@ -28,5 +28,5 @@ requires:
URI: '5.27'
XML::RSS: '1.63'
YAML::Tiny: '1.74'
-version: 0.01
+version: ''
x_serialization_backend: 'CPAN::Meta::YAML version 0.018'
diff --git a/Makefile b/Makefile
index 64b322b..4a95f86 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,4 @@
-# This Makefile is for the STUPID::RSS_GENERATOR extension to perl.
+# This Makefile is for the stupid_rss_generate extension to perl.
#
# It was generated automatically by MakeMaker version
# 7.70 (Revision: 77000) from the contents of
@@ -13,10 +13,8 @@
# BUILD_REQUIRES => { }
# CONFIGURE_REQUIRES => { }
-# NAME => q[STUPID::RSS_GENERATOR]
# PREREQ_PM => { Clone=>q[0.46], Getopt::Long=>q[2.54], HTML::TreeBuilder=>q[5.07], IO::Prompter=>q[0.005001], LWP::UserAgent=>q[6.76], Lingua::ZH::Numbers=>q[0.04], Time::Piece=>q[1.3401], URI=>q[5.27], XML::RSS=>q[1.63], YAML::Tiny=>q[1.74] }
# TEST_REQUIRES => { }
-# VERSION_FROM => q[stupid_rss_generator.pl]
# --- MakeMaker post_initialize section:
@@ -54,13 +52,13 @@ VENDORLIBEXP = /usr/share/perl5/vendor_perl
AR_STATIC_ARGS = cr
DIRFILESEP = /
DFSEP = $(DIRFILESEP)
-NAME = STUPID::RSS_GENERATOR
-NAME_SYM = STUPID_RSS_GENERATOR
-VERSION = 0.01
+NAME = stupid_rss_generate
+NAME_SYM = stupid_rss_generate
+VERSION =
VERSION_MACRO = VERSION
-VERSION_SYM = 0_01
+VERSION_SYM =
DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\"
-XS_VERSION = 0.01
+XS_VERSION =
XS_VERSION_MACRO = XS_VERSION
XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\"
INST_ARCHLIB = blib/arch
@@ -146,11 +144,11 @@ MM_REVISION = 77000
# PARENT_NAME = NAME without BASEEXT and no trailing :: (eg Foo::Bar)
# DLBASE = Basename part of dynamic library. May be just equal BASEEXT.
MAKE = make
-FULLEXT = STUPID/RSS_GENERATOR
-BASEEXT = RSS_GENERATOR
-PARENT_NAME = STUPID
+FULLEXT = stupid_rss_generate
+BASEEXT = stupid_rss_generate
+PARENT_NAME =
DLBASE = $(BASEEXT)
-VERSION_FROM = stupid_rss_generator.pl
+VERSION_FROM =
OBJECT =
LDFROM = $(OBJECT)
LINKTYPE = dynamic
@@ -168,8 +166,8 @@ MAN3PODS =
CONFIGDEP = $(PERL_ARCHLIBDEP)$(DFSEP)Config.pm $(PERL_INCDEP)$(DFSEP)config.h
# Where to build things
-INST_LIBDIR = $(INST_LIB)/STUPID
-INST_ARCHLIBDIR = $(INST_ARCHLIB)/STUPID
+INST_LIBDIR = $(INST_LIB)
+INST_ARCHLIBDIR = $(INST_ARCHLIB)
INST_AUTODIR = $(INST_LIB)/auto/$(FULLEXT)
INST_ARCHAUTODIR = $(INST_ARCHLIB)/auto/$(FULLEXT)
@@ -254,8 +252,8 @@ CI = ci -u
RCS_LABEL = rcs -Nv$(VERSION_SYM): -q
DIST_CP = best
DIST_DEFAULT = tardist
-DISTNAME = STUPID-RSS_GENERATOR
-DISTVNAME = STUPID-RSS_GENERATOR-0.01
+DISTNAME = stupid_rss_generate
+DISTVNAME = stupid_rss_generate-
# --- MakeMaker macro section:
@@ -491,7 +489,7 @@ metafile : create_distdir
$(NOECHO) $(ECHO) 'meta-spec:' >> META_new.yml
$(NOECHO) $(ECHO) ' url: http://module-build.sourceforge.net/META-spec-v1.4.html' >> META_new.yml
$(NOECHO) $(ECHO) ' version: '\''1.4'\''' >> META_new.yml
- $(NOECHO) $(ECHO) 'name: STUPID-RSS_GENERATOR' >> META_new.yml
+ $(NOECHO) $(ECHO) 'name: stupid_rss_generate' >> META_new.yml
$(NOECHO) $(ECHO) 'no_index:' >> META_new.yml
$(NOECHO) $(ECHO) ' directory:' >> META_new.yml
$(NOECHO) $(ECHO) ' - t' >> META_new.yml
@@ -507,7 +505,7 @@ metafile : create_distdir
$(NOECHO) $(ECHO) ' URI: '\''5.27'\''' >> META_new.yml
$(NOECHO) $(ECHO) ' XML::RSS: '\''1.63'\''' >> META_new.yml
$(NOECHO) $(ECHO) ' YAML::Tiny: '\''1.74'\''' >> META_new.yml
- $(NOECHO) $(ECHO) 'version: 0.01' >> META_new.yml
+ $(NOECHO) $(ECHO) 'version: '\'''\''' >> META_new.yml
$(NOECHO) $(ECHO) 'x_serialization_backend: '\''CPAN::Meta::YAML version 0.018'\''' >> META_new.yml
-$(NOECHO) $(MV) META_new.yml $(DISTVNAME)/META.yml
$(NOECHO) $(ECHO) Generating META.json
@@ -525,7 +523,7 @@ metafile : create_distdir
$(NOECHO) $(ECHO) ' "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",' >> META_new.json
$(NOECHO) $(ECHO) ' "version" : 2' >> META_new.json
$(NOECHO) $(ECHO) ' },' >> META_new.json
- $(NOECHO) $(ECHO) ' "name" : "STUPID-RSS_GENERATOR",' >> META_new.json
+ $(NOECHO) $(ECHO) ' "name" : "stupid_rss_generate",' >> META_new.json
$(NOECHO) $(ECHO) ' "no_index" : {' >> META_new.json
$(NOECHO) $(ECHO) ' "directory" : [' >> META_new.json
$(NOECHO) $(ECHO) ' "t",' >> META_new.json
@@ -559,7 +557,7 @@ metafile : create_distdir
$(NOECHO) $(ECHO) ' }' >> META_new.json
$(NOECHO) $(ECHO) ' },' >> META_new.json
$(NOECHO) $(ECHO) ' "release_status" : "stable",' >> META_new.json
- $(NOECHO) $(ECHO) ' "version" : 0.01,' >> META_new.json
+ $(NOECHO) $(ECHO) ' "version" : "",' >> META_new.json
$(NOECHO) $(ECHO) ' "x_serialization_backend" : "JSON::PP version 4.16"' >> META_new.json
$(NOECHO) $(ECHO) '}' >> META_new.json
-$(NOECHO) $(MV) META_new.json $(DISTVNAME)/META.json
@@ -863,31 +861,31 @@ testdb_static test_static :: subdirs-test_static
# --- MakeMaker ppd section:
# Creates a PPD (Perl Package Description) for a binary distribution.
ppd :
- $(NOECHO) $(ECHO) '' > STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd
- $(NOECHO) $(ECHO) '' >> STUPID-RSS_GENERATOR.ppd
+ $(NOECHO) $(ECHO) '' > stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd
+ $(NOECHO) $(ECHO) '' >> stupid_rss_generate.ppd
# --- MakeMaker pm_to_blib section:
pm_to_blib : $(FIRST_MAKEFILE) $(TO_INST_PM)
$(NOECHO) $(ABSPERLRUN) -MExtUtils::Install -e 'pm_to_blib({@ARGV}, '\''$(INST_LIB)/auto'\'', q[$(PM_FILTER)], '\''$(PERM_DIR)'\'')' -- \
- 'stupid_rss_generator.pl' '$(INST_LIB)/STUPID/stupid_rss_generator.pl'
+ 'stupid_rss_generator.pl' '$(INST_LIB)/stupid_rss_generator.pl'
$(NOECHO) $(TOUCH) pm_to_blib
diff --git a/Makefile.PL b/Makefile.PL
index ac52d79..1cecb0b 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -1,12 +1,6 @@
-use Module::Metadata;
use ExtUtils::MakeMaker;
-my %prereqs;
-
-
WriteMakefile(
- NAME => 'STUPID::RSS_GENERATOR',
- VERSION_FROM => 'stupid_rss_generator.pl',
PREREQ_PM => {
'Getopt::Long' => 2.54,
'Clone' => 0.46,
diff --git a/blib/arch/.exists b/blib/arch/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/blib/arch/auto/stupid_rss_generate/.exists b/blib/arch/auto/stupid_rss_generate/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/blib/bin/.exists b/blib/bin/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/blib/lib/.exists b/blib/lib/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/blib/lib/auto/stupid_rss_generate/.exists b/blib/lib/auto/stupid_rss_generate/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/blib/lib/stupid_rss_generator.pl b/blib/lib/stupid_rss_generator.pl
new file mode 100644
index 0000000..f50ed85
--- /dev/null
+++ b/blib/lib/stupid_rss_generator.pl
@@ -0,0 +1,214 @@
+#package STUPID::RSS_GENERATOR 0.01;
+$VERSION=0.01;
+
+use warnings;
+use strict;
+use utf8;
+use 5.027;
+use Getopt::Long;
+use HTML::TreeBuilder;
+use IO::Prompter;
+use LWP::UserAgent;
+use Time::Piece;
+use URI;
+use YAML::Tiny;
+use Clone qw(clone);
+use Lingua::ZH::Numbers;
+use XML::RSS;
+
+use constant DEFAULT_RECORD_NAME => 'record.yaml';
+use constant RECORD_TIME_FORMAT => '%Y-%m-%d-%H-%M-%S';
+use constant PRINT_HUMAN_TIME_FORMAT => '%Y-%m-%d %H:%M:%S';
+use constant GENESIS => Time::Piece->strptime('1970-01-01-00-00-00', RECORD_TIME_FORMAT);
+use constant RSS_FOLDER => './rss_folder';
+
+my $cliRecordFile;
+my $verbose=1;
+my $automation;
+my $help;
+my $rssFolderPath='./rss/';
+
+
+GetOptions{
+ 'R|record=s' => \$cliRecordFile,
+ 'f|feed-path=s' => \$rssFolderPath,
+ 'v|verbose' => \$verbose,
+ 'h|help' => \$help,
+ 'a|auto' => \$automation,
+} or die "Unknown option!\n";
+
+unless (-d $rssFolderPath){
+ mkdir $rssFolderPath or die "Failed to create $rssFolderPath.";
+ say "$rssFolderPath not exist. Created One.";
+}
+
+#book yaml template
+my $bookTemplate={
+ Title => '',
+ Author => '',
+ ContentPageUrl => '',
+ LastChapterFetched => '0',
+ LastFetchTime => GENESIS->strftime(RECORD_TIME_FORMAT),
+ LastCheckTime => GENESIS->strftime(RECORD_TIME_FORMAT),
+ HashOfTitle => '',
+ RegrexForTitle => '',
+ RegrexForChapterLinkAndNumber => '',
+ RegrexForChapterNumber => '',
+ RegrexForChapterTitle => '',
+ RegrexForText => '',
+ RSSFeed => '',
+};
+
+
+my $recordFile=DEFAULT_RECORD_NAME;
+
+# If recordfile is passed through argument
+if ($cliRecordFile){
+ $recordFile=$cliRecordFile;
+}
+
+sub vsay{
+ my $string=pop @_;
+ if ($verbose) {
+ say $string;
+ }
+}
+
+
+#create user agent
+#TODO: customize agent
+my $userAgent=LWP::UserAgent->new(timeout => 10);
+$userAgent->agent('Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0');
+
+#Proxy
+#TODO: customized proxy config
+my $useSystemProxy=prompt -yn, 'Do you want to use system proxy?';
+if ($useSystemProxy =~/^(y|yes)$/i) {
+ $userAgent->env_proxy; #use proxy
+}
+
+# create record.yaml if not exist, and trigger first book addition
+unless (-e $recordFile){
+ my $createRecrodFileInput = prompt "File '$recordFile' does not exist. Do you want to create a new one? (y/n)\n", -yn;
+ if ($createRecrodFileInput =~/^(y|yes)$/i) {
+ my $yaml=YAML::Tiny->new();
+ my $rss=XML::RSS->new(version => '2.0');
+ my $newBook=clone($bookTemplate);
+ ($newBook,$rss)=&addNewBook($newBook);
+ push @$yaml, $newBook;
+ $yaml->write($recordFile) or die ("Failed to save to $recordFile");
+ $rss->save($rssFolderPath.$rss->channel('title').'.xml') or die ("Failed to save to $rssFolderPath$rss->channel('title').xml");
+ } else {
+ say "No '$recordFile' found or created, exiting.";
+ exit;
+ }
+} else {
+#update books if record file found
+ my $yaml=YAML::Tiny->read("$recordFile");
+ foreach my $targetBook (@{$yaml}){
+ my $rss=XML::RSS->new(version => 2.0);
+ my $rssName=$targetBook->{Title}.".xml";
+ if (-e $rssFolderPath.$rssName){
+ $rss->parsefile($rssFolderPath.$rssName);
+ } else{
+ $rss->channel(
+ title => "$targetBook->{Title}",
+ link => "$targetBook->{ContentPageUrl}",
+ );
+ }
+ #TODO:support limit rss entries per file
+ my $updatedTargetBook;
+ ($targetBook, $rss)=&updateBooks($targetBook,$rss);
+ &vsay($rssFolderPath.$rssName);
+ &vsay($rss->as_string);
+ $rss->save($rssFolderPath.$rssName) or die ("Failed to save to $rssFolderPath$rssName");
+ }
+ $yaml->write($recordFile);
+}
+
+
+sub addNewBook{
+ #TODO: add new book to existing yaml
+ my $newBook=pop @_;
+ my $contentUrlInput= prompt -v, "Input the link to the content page:\n";
+ #format uri
+ my $contentUrl=URI->new($contentUrlInput);
+ if (not $contentUrl->scheme) {
+ say ("Using https by default. If you want http connection, specify it in the link");
+ $contentUrl->scheme('https'); #use https unless user specified http
+ }
+ #create RSS template
+ my $rssNewBook=XML::RSS->new(version => '2.0');
+
+ #file newbook content
+ $newBook->{ContentPageUrl}=$contentUrl->as_string;
+ #TODO Reuse regrex from same domain
+ $newBook->{RegrexForTitle}=prompt -v, "Input the regrex for extracting the book title from content page:\n";
+ $newBook->{RegrexForChapterLinkAndNumber}=prompt -v, "Input the regrex for extracting the Chapter Link and Number from content page:\n";
+ $newBook->{RegrexForChapterTitle}=prompt -v, "Input the regrex for extracting the Chapter Title from text page:\n";
+ $newBook->{RegrexForText}=prompt -v, "Input the regrex for extracting the text from text page:\n";
+
+ ($newBook,$rssNewBook)=&updateBooks($newBook,$rssNewBook);
+ return ($newBook, $rssNewBook);
+}
+
+sub updateBooks{
+ my ($targetBook,$rssBook)=@_;
+
+ #get content page
+ my $contentPageResponse=$userAgent->get($targetBook->{ContentPageUrl});
+ if ($contentPageResponse->is_success) {
+ my $contentPageContent=$contentPageResponse->decoded_content;
+ # if it is a new book
+ if ($targetBook->{Title} eq "" && $contentPageContent =~/$targetBook->{RegrexForTitle}/){
+ $targetBook->{Title} = $1;
+ &vsay("Title of book is $targetBook->{Title}");
+ $rssBook->channel(
+ title => "$targetBook->{Title}",
+ link => "$targetBook->{ContentPageUrl}",
+ );
+ }
+ while($contentPageContent =~/$targetBook->{RegrexForChapterLinkAndNumber}/g){
+ #TODO: Handle Chinese number with Lingua::ZH::Numbers
+ #TODO: Handle situation when link and number is not in same line or link somehow managed to come after chapter number
+ my $chapterLink=$1;
+ &vsay("Chapter link is $chapterLink");
+ my $chapterCounter = $2;
+ &vsay("Chapter checked is No.$chapterCounter");
+
+ #fetch new chapter if there is any
+ if ($chapterCounter > $targetBook->{LastChapterFetched}){
+ my $textPageResponse=$userAgent->get($chapterLink);
+ if ($textPageResponse->is_success){
+ my $textPageContent=$textPageResponse->decoded_content;
+ #get chapter title
+ my $chapterTitle='';
+ if ($textPageContent =~/$targetBook->{RegrexForChapterTitle}/){
+ $chapterTitle=$1;
+ &vsay("Chapter Title is $chapterTitle");
+ }else{
+ say "Failed to get chapter title. Check your regrex."
+ }
+ #get chapter text
+ my $text='';
+ while ($textPageContent=~/$targetBook->{RegrexForText}/g){
+ $text.=$1;
+ #&vsay("Get a new line of text as: \n $text");
+ }
+ $rssBook->add_item(
+ title => "$chapterTitle",
+ link => "$chapterLink",
+ description => "$text",
+ );
+ }
+ }
+ $targetBook->{LastChapterFetched}++;
+ #last; # for testing
+ sleep(20); #Prevent been blocked for too much request
+ }
+ }else{
+ say ("Timeout when requesting content page, check your url or internet connection.");
+ }
+ return ($targetBook,$rssBook);
+}
+
diff --git a/blib/man1/.exists b/blib/man1/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/blib/man3/.exists b/blib/man3/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/blib/script/.exists b/blib/script/.exists
new file mode 100644
index 0000000..e69de29
diff --git a/pm_to_blib b/pm_to_blib
new file mode 100644
index 0000000..e69de29