From 96f098a5272f0dcb02916dd840bb2adfbe49e67f Mon Sep 17 00:00:00 2001 From: gzqx <24262751+gzqx@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:47:23 +0800 Subject: [PATCH] update makefile --- .gitignore | 2 + MYMETA.json | 4 +- MYMETA.yml | 4 +- Makefile | 76 ++++---- Makefile.PL | 6 - blib/arch/.exists | 0 blib/arch/auto/stupid_rss_generate/.exists | 0 blib/bin/.exists | 0 blib/lib/.exists | 0 blib/lib/auto/stupid_rss_generate/.exists | 0 blib/lib/stupid_rss_generator.pl | 214 +++++++++++++++++++++ blib/man1/.exists | 0 blib/man3/.exists | 0 blib/script/.exists | 0 pm_to_blib | 0 15 files changed, 257 insertions(+), 49 deletions(-) create mode 100644 blib/arch/.exists create mode 100644 blib/arch/auto/stupid_rss_generate/.exists create mode 100644 blib/bin/.exists create mode 100644 blib/lib/.exists create mode 100644 blib/lib/auto/stupid_rss_generate/.exists create mode 100644 blib/lib/stupid_rss_generator.pl create mode 100644 blib/man1/.exists create mode 100644 blib/man3/.exists create mode 100644 blib/script/.exists create mode 100644 pm_to_blib diff --git a/.gitignore b/.gitignore index 18d4cb4..b887ece 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ record.yaml rss/ rss/* +MYMETA* +Makefile diff --git a/MYMETA.json b/MYMETA.json index a320533..331e6d7 100644 --- a/MYMETA.json +++ b/MYMETA.json @@ -12,7 +12,7 @@ "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, - "name" : "STUPID-RSS_GENERATOR", + "name" : "stupid_rss_generate", "no_index" : { "directory" : [ "t", @@ -46,6 +46,6 @@ } }, "release_status" : "stable", - "version" : 0.01, + "version" : "", "x_serialization_backend" : "JSON::PP version 4.16" } diff --git a/MYMETA.yml b/MYMETA.yml index 318c443..a9e4823 100644 --- a/MYMETA.yml +++ b/MYMETA.yml @@ -12,7 +12,7 @@ license: unknown meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' -name: STUPID-RSS_GENERATOR +name: stupid_rss_generate no_index: directory: - t @@ -28,5 +28,5 @@ requires: URI: '5.27' XML::RSS: '1.63' YAML::Tiny: '1.74' -version: 0.01 +version: '' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' diff --git a/Makefile b/Makefile index 64b322b..4a95f86 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -# This Makefile is for the STUPID::RSS_GENERATOR extension to perl. +# This Makefile is for the stupid_rss_generate extension to perl. # # It was generated automatically by MakeMaker version # 7.70 (Revision: 77000) from the contents of @@ -13,10 +13,8 @@ # BUILD_REQUIRES => { } # CONFIGURE_REQUIRES => { } -# NAME => q[STUPID::RSS_GENERATOR] # PREREQ_PM => { Clone=>q[0.46], Getopt::Long=>q[2.54], HTML::TreeBuilder=>q[5.07], IO::Prompter=>q[0.005001], LWP::UserAgent=>q[6.76], Lingua::ZH::Numbers=>q[0.04], Time::Piece=>q[1.3401], URI=>q[5.27], XML::RSS=>q[1.63], YAML::Tiny=>q[1.74] } # TEST_REQUIRES => { } -# VERSION_FROM => q[stupid_rss_generator.pl] # --- MakeMaker post_initialize section: @@ -54,13 +52,13 @@ VENDORLIBEXP = /usr/share/perl5/vendor_perl AR_STATIC_ARGS = cr DIRFILESEP = / DFSEP = $(DIRFILESEP) -NAME = STUPID::RSS_GENERATOR -NAME_SYM = STUPID_RSS_GENERATOR -VERSION = 0.01 +NAME = stupid_rss_generate +NAME_SYM = stupid_rss_generate +VERSION = VERSION_MACRO = VERSION -VERSION_SYM = 0_01 +VERSION_SYM = DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\" -XS_VERSION = 0.01 +XS_VERSION = XS_VERSION_MACRO = XS_VERSION XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\" INST_ARCHLIB = blib/arch @@ -146,11 +144,11 @@ MM_REVISION = 77000 # PARENT_NAME = NAME without BASEEXT and no trailing :: (eg Foo::Bar) # DLBASE = Basename part of dynamic library. May be just equal BASEEXT. MAKE = make -FULLEXT = STUPID/RSS_GENERATOR -BASEEXT = RSS_GENERATOR -PARENT_NAME = STUPID +FULLEXT = stupid_rss_generate +BASEEXT = stupid_rss_generate +PARENT_NAME = DLBASE = $(BASEEXT) -VERSION_FROM = stupid_rss_generator.pl +VERSION_FROM = OBJECT = LDFROM = $(OBJECT) LINKTYPE = dynamic @@ -168,8 +166,8 @@ MAN3PODS = CONFIGDEP = $(PERL_ARCHLIBDEP)$(DFSEP)Config.pm $(PERL_INCDEP)$(DFSEP)config.h # Where to build things -INST_LIBDIR = $(INST_LIB)/STUPID -INST_ARCHLIBDIR = $(INST_ARCHLIB)/STUPID +INST_LIBDIR = $(INST_LIB) +INST_ARCHLIBDIR = $(INST_ARCHLIB) INST_AUTODIR = $(INST_LIB)/auto/$(FULLEXT) INST_ARCHAUTODIR = $(INST_ARCHLIB)/auto/$(FULLEXT) @@ -254,8 +252,8 @@ CI = ci -u RCS_LABEL = rcs -Nv$(VERSION_SYM): -q DIST_CP = best DIST_DEFAULT = tardist -DISTNAME = STUPID-RSS_GENERATOR -DISTVNAME = STUPID-RSS_GENERATOR-0.01 +DISTNAME = stupid_rss_generate +DISTVNAME = stupid_rss_generate- # --- MakeMaker macro section: @@ -491,7 +489,7 @@ metafile : create_distdir $(NOECHO) $(ECHO) 'meta-spec:' >> META_new.yml $(NOECHO) $(ECHO) ' url: http://module-build.sourceforge.net/META-spec-v1.4.html' >> META_new.yml $(NOECHO) $(ECHO) ' version: '\''1.4'\''' >> META_new.yml - $(NOECHO) $(ECHO) 'name: STUPID-RSS_GENERATOR' >> META_new.yml + $(NOECHO) $(ECHO) 'name: stupid_rss_generate' >> META_new.yml $(NOECHO) $(ECHO) 'no_index:' >> META_new.yml $(NOECHO) $(ECHO) ' directory:' >> META_new.yml $(NOECHO) $(ECHO) ' - t' >> META_new.yml @@ -507,7 +505,7 @@ metafile : create_distdir $(NOECHO) $(ECHO) ' URI: '\''5.27'\''' >> META_new.yml $(NOECHO) $(ECHO) ' XML::RSS: '\''1.63'\''' >> META_new.yml $(NOECHO) $(ECHO) ' YAML::Tiny: '\''1.74'\''' >> META_new.yml - $(NOECHO) $(ECHO) 'version: 0.01' >> META_new.yml + $(NOECHO) $(ECHO) 'version: '\'''\''' >> META_new.yml $(NOECHO) $(ECHO) 'x_serialization_backend: '\''CPAN::Meta::YAML version 0.018'\''' >> META_new.yml -$(NOECHO) $(MV) META_new.yml $(DISTVNAME)/META.yml $(NOECHO) $(ECHO) Generating META.json @@ -525,7 +523,7 @@ metafile : create_distdir $(NOECHO) $(ECHO) ' "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec",' >> META_new.json $(NOECHO) $(ECHO) ' "version" : 2' >> META_new.json $(NOECHO) $(ECHO) ' },' >> META_new.json - $(NOECHO) $(ECHO) ' "name" : "STUPID-RSS_GENERATOR",' >> META_new.json + $(NOECHO) $(ECHO) ' "name" : "stupid_rss_generate",' >> META_new.json $(NOECHO) $(ECHO) ' "no_index" : {' >> META_new.json $(NOECHO) $(ECHO) ' "directory" : [' >> META_new.json $(NOECHO) $(ECHO) ' "t",' >> META_new.json @@ -559,7 +557,7 @@ metafile : create_distdir $(NOECHO) $(ECHO) ' }' >> META_new.json $(NOECHO) $(ECHO) ' },' >> META_new.json $(NOECHO) $(ECHO) ' "release_status" : "stable",' >> META_new.json - $(NOECHO) $(ECHO) ' "version" : 0.01,' >> META_new.json + $(NOECHO) $(ECHO) ' "version" : "",' >> META_new.json $(NOECHO) $(ECHO) ' "x_serialization_backend" : "JSON::PP version 4.16"' >> META_new.json $(NOECHO) $(ECHO) '}' >> META_new.json -$(NOECHO) $(MV) META_new.json $(DISTVNAME)/META.json @@ -863,31 +861,31 @@ testdb_static test_static :: subdirs-test_static # --- MakeMaker ppd section: # Creates a PPD (Perl Package Description) for a binary distribution. ppd : - $(NOECHO) $(ECHO) '' > STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) ' ' >> STUPID-RSS_GENERATOR.ppd - $(NOECHO) $(ECHO) '' >> STUPID-RSS_GENERATOR.ppd + $(NOECHO) $(ECHO) '' > stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) ' ' >> stupid_rss_generate.ppd + $(NOECHO) $(ECHO) '' >> stupid_rss_generate.ppd # --- MakeMaker pm_to_blib section: pm_to_blib : $(FIRST_MAKEFILE) $(TO_INST_PM) $(NOECHO) $(ABSPERLRUN) -MExtUtils::Install -e 'pm_to_blib({@ARGV}, '\''$(INST_LIB)/auto'\'', q[$(PM_FILTER)], '\''$(PERM_DIR)'\'')' -- \ - 'stupid_rss_generator.pl' '$(INST_LIB)/STUPID/stupid_rss_generator.pl' + 'stupid_rss_generator.pl' '$(INST_LIB)/stupid_rss_generator.pl' $(NOECHO) $(TOUCH) pm_to_blib diff --git a/Makefile.PL b/Makefile.PL index ac52d79..1cecb0b 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,12 +1,6 @@ -use Module::Metadata; use ExtUtils::MakeMaker; -my %prereqs; - - WriteMakefile( - NAME => 'STUPID::RSS_GENERATOR', - VERSION_FROM => 'stupid_rss_generator.pl', PREREQ_PM => { 'Getopt::Long' => 2.54, 'Clone' => 0.46, diff --git a/blib/arch/.exists b/blib/arch/.exists new file mode 100644 index 0000000..e69de29 diff --git a/blib/arch/auto/stupid_rss_generate/.exists b/blib/arch/auto/stupid_rss_generate/.exists new file mode 100644 index 0000000..e69de29 diff --git a/blib/bin/.exists b/blib/bin/.exists new file mode 100644 index 0000000..e69de29 diff --git a/blib/lib/.exists b/blib/lib/.exists new file mode 100644 index 0000000..e69de29 diff --git a/blib/lib/auto/stupid_rss_generate/.exists b/blib/lib/auto/stupid_rss_generate/.exists new file mode 100644 index 0000000..e69de29 diff --git a/blib/lib/stupid_rss_generator.pl b/blib/lib/stupid_rss_generator.pl new file mode 100644 index 0000000..f50ed85 --- /dev/null +++ b/blib/lib/stupid_rss_generator.pl @@ -0,0 +1,214 @@ +#package STUPID::RSS_GENERATOR 0.01; +$VERSION=0.01; + +use warnings; +use strict; +use utf8; +use 5.027; +use Getopt::Long; +use HTML::TreeBuilder; +use IO::Prompter; +use LWP::UserAgent; +use Time::Piece; +use URI; +use YAML::Tiny; +use Clone qw(clone); +use Lingua::ZH::Numbers; +use XML::RSS; + +use constant DEFAULT_RECORD_NAME => 'record.yaml'; +use constant RECORD_TIME_FORMAT => '%Y-%m-%d-%H-%M-%S'; +use constant PRINT_HUMAN_TIME_FORMAT => '%Y-%m-%d %H:%M:%S'; +use constant GENESIS => Time::Piece->strptime('1970-01-01-00-00-00', RECORD_TIME_FORMAT); +use constant RSS_FOLDER => './rss_folder'; + +my $cliRecordFile; +my $verbose=1; +my $automation; +my $help; +my $rssFolderPath='./rss/'; + + +GetOptions{ + 'R|record=s' => \$cliRecordFile, + 'f|feed-path=s' => \$rssFolderPath, + 'v|verbose' => \$verbose, + 'h|help' => \$help, + 'a|auto' => \$automation, +} or die "Unknown option!\n"; + +unless (-d $rssFolderPath){ + mkdir $rssFolderPath or die "Failed to create $rssFolderPath."; + say "$rssFolderPath not exist. Created One."; +} + +#book yaml template +my $bookTemplate={ + Title => '', + Author => '', + ContentPageUrl => '', + LastChapterFetched => '0', + LastFetchTime => GENESIS->strftime(RECORD_TIME_FORMAT), + LastCheckTime => GENESIS->strftime(RECORD_TIME_FORMAT), + HashOfTitle => '', + RegrexForTitle => '', + RegrexForChapterLinkAndNumber => '', + RegrexForChapterNumber => '', + RegrexForChapterTitle => '', + RegrexForText => '', + RSSFeed => '', +}; + + +my $recordFile=DEFAULT_RECORD_NAME; + +# If recordfile is passed through argument +if ($cliRecordFile){ + $recordFile=$cliRecordFile; +} + +sub vsay{ + my $string=pop @_; + if ($verbose) { + say $string; + } +} + + +#create user agent +#TODO: customize agent +my $userAgent=LWP::UserAgent->new(timeout => 10); +$userAgent->agent('Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0'); + +#Proxy +#TODO: customized proxy config +my $useSystemProxy=prompt -yn, 'Do you want to use system proxy?'; +if ($useSystemProxy =~/^(y|yes)$/i) { + $userAgent->env_proxy; #use proxy +} + +# create record.yaml if not exist, and trigger first book addition +unless (-e $recordFile){ + my $createRecrodFileInput = prompt "File '$recordFile' does not exist. Do you want to create a new one? (y/n)\n", -yn; + if ($createRecrodFileInput =~/^(y|yes)$/i) { + my $yaml=YAML::Tiny->new(); + my $rss=XML::RSS->new(version => '2.0'); + my $newBook=clone($bookTemplate); + ($newBook,$rss)=&addNewBook($newBook); + push @$yaml, $newBook; + $yaml->write($recordFile) or die ("Failed to save to $recordFile"); + $rss->save($rssFolderPath.$rss->channel('title').'.xml') or die ("Failed to save to $rssFolderPath$rss->channel('title').xml"); + } else { + say "No '$recordFile' found or created, exiting."; + exit; + } +} else { +#update books if record file found + my $yaml=YAML::Tiny->read("$recordFile"); + foreach my $targetBook (@{$yaml}){ + my $rss=XML::RSS->new(version => 2.0); + my $rssName=$targetBook->{Title}.".xml"; + if (-e $rssFolderPath.$rssName){ + $rss->parsefile($rssFolderPath.$rssName); + } else{ + $rss->channel( + title => "$targetBook->{Title}", + link => "$targetBook->{ContentPageUrl}", + ); + } + #TODO:support limit rss entries per file + my $updatedTargetBook; + ($targetBook, $rss)=&updateBooks($targetBook,$rss); + &vsay($rssFolderPath.$rssName); + &vsay($rss->as_string); + $rss->save($rssFolderPath.$rssName) or die ("Failed to save to $rssFolderPath$rssName"); + } + $yaml->write($recordFile); +} + + +sub addNewBook{ + #TODO: add new book to existing yaml + my $newBook=pop @_; + my $contentUrlInput= prompt -v, "Input the link to the content page:\n"; + #format uri + my $contentUrl=URI->new($contentUrlInput); + if (not $contentUrl->scheme) { + say ("Using https by default. If you want http connection, specify it in the link"); + $contentUrl->scheme('https'); #use https unless user specified http + } + #create RSS template + my $rssNewBook=XML::RSS->new(version => '2.0'); + + #file newbook content + $newBook->{ContentPageUrl}=$contentUrl->as_string; + #TODO Reuse regrex from same domain + $newBook->{RegrexForTitle}=prompt -v, "Input the regrex for extracting the book title from content page:\n"; + $newBook->{RegrexForChapterLinkAndNumber}=prompt -v, "Input the regrex for extracting the Chapter Link and Number from content page:\n"; + $newBook->{RegrexForChapterTitle}=prompt -v, "Input the regrex for extracting the Chapter Title from text page:\n"; + $newBook->{RegrexForText}=prompt -v, "Input the regrex for extracting the text from text page:\n"; + + ($newBook,$rssNewBook)=&updateBooks($newBook,$rssNewBook); + return ($newBook, $rssNewBook); +} + +sub updateBooks{ + my ($targetBook,$rssBook)=@_; + + #get content page + my $contentPageResponse=$userAgent->get($targetBook->{ContentPageUrl}); + if ($contentPageResponse->is_success) { + my $contentPageContent=$contentPageResponse->decoded_content; + # if it is a new book + if ($targetBook->{Title} eq "" && $contentPageContent =~/$targetBook->{RegrexForTitle}/){ + $targetBook->{Title} = $1; + &vsay("Title of book is $targetBook->{Title}"); + $rssBook->channel( + title => "$targetBook->{Title}", + link => "$targetBook->{ContentPageUrl}", + ); + } + while($contentPageContent =~/$targetBook->{RegrexForChapterLinkAndNumber}/g){ + #TODO: Handle Chinese number with Lingua::ZH::Numbers + #TODO: Handle situation when link and number is not in same line or link somehow managed to come after chapter number + my $chapterLink=$1; + &vsay("Chapter link is $chapterLink"); + my $chapterCounter = $2; + &vsay("Chapter checked is No.$chapterCounter"); + + #fetch new chapter if there is any + if ($chapterCounter > $targetBook->{LastChapterFetched}){ + my $textPageResponse=$userAgent->get($chapterLink); + if ($textPageResponse->is_success){ + my $textPageContent=$textPageResponse->decoded_content; + #get chapter title + my $chapterTitle=''; + if ($textPageContent =~/$targetBook->{RegrexForChapterTitle}/){ + $chapterTitle=$1; + &vsay("Chapter Title is $chapterTitle"); + }else{ + say "Failed to get chapter title. Check your regrex." + } + #get chapter text + my $text=''; + while ($textPageContent=~/$targetBook->{RegrexForText}/g){ + $text.=$1; + #&vsay("Get a new line of text as: \n $text"); + } + $rssBook->add_item( + title => "$chapterTitle", + link => "$chapterLink", + description => "$text", + ); + } + } + $targetBook->{LastChapterFetched}++; + #last; # for testing + sleep(20); #Prevent been blocked for too much request + } + }else{ + say ("Timeout when requesting content page, check your url or internet connection."); + } + return ($targetBook,$rssBook); +} + diff --git a/blib/man1/.exists b/blib/man1/.exists new file mode 100644 index 0000000..e69de29 diff --git a/blib/man3/.exists b/blib/man3/.exists new file mode 100644 index 0000000..e69de29 diff --git a/blib/script/.exists b/blib/script/.exists new file mode 100644 index 0000000..e69de29 diff --git a/pm_to_blib b/pm_to_blib new file mode 100644 index 0000000..e69de29