-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy patharxiv2md
executable file
·90 lines (83 loc) · 3.29 KB
/
arxiv2md
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl
use Getopt::Std;
use Switch 'Perl6'; use Sys::Hostname; use Time::HiRes qw(usleep);#usleep($microseconds)
use File::Basename; use File::Copy; use File::Find; use Cwd;use File::Path;
# --------------------- OPTIONS --------------------------------
$cdir = cwd;chomp($cdir);
$prog = basename($0);
getopt('Tt', \%opts); #here we add only options with args
foreach $opt (keys %opts){
given($opt){
when 'h' {usage();last;}
default {usage("Not a valid option: $opt");}
}
}
# -------------------------------------------------------------------
if($#ARGV < 0){usage();}
$arxiv_url = "http://arxiv.org/e-print/"; # url where source of papers is
$arxiv_abs = "http://arxiv.org/abs/"; # url where abstract is
$arxiv_no = $ARGV[0];
$arxiv_url .= $arxiv_no;
$meta_url = "${arxiv_abs}/${arxiv_no}";# html file with meta information (title/authors)
# in case the paper number is in the arxiv/000000 format, then redefine $arxiv_no:
$arxiv_ar = "$arxiv_no";
if( $arxiv_no =~ /(.*?)\/(.*?)$/){ $arxiv_ar = $1; $arxiv_no = $2;}
print "${prog}:arxiv_ar = $arxiv_ar ; arxiv_no = $arxiv_no\n";
$mobi = "${arxiv_no}.mobi"; # final output file!
#--------------------------------------------------------------------
$tmpdir = "/tmp/tmp-${prog}";
if( -d $tmpdir){
print "${prog}>rm -rf $tmpdir\n";
system("rm -rf $tmpdir");
}
mkdir $tmpdir;
chdir $tmpdir;
print "${prog}>wget --user-agent=Mozilla/1.2 $arxiv_url\n";
system("wget --user-agent=Mozilla/1.2 $arxiv_url");
print "${prog}>tar xvf $arxiv_no\n";
system("tar xvf $arxiv_no");
$meta_source = "${arxiv_no}.html"; # html file with meta information (title/authors)
print "${prog}>wget --user-agent=Mozilla/1.2 -O $meta_source $meta_url\n";
system("wget --user-agent=Mozilla/1.2 -O $meta_source $meta_url");
$latex_source = "";
foreach $f (<*.tex>){
$results = `grep '\documentclass' $f`;
$results .= `grep '\documentstyle' $f`;
if($results){$latex_source = $f;break;}
}
if( $latex_source){
print "${prog}:Found LaTeX source: $latex_source\n";
} else{
die "${prog}:LaTeX source not found!";
}
# Determine title and authors:
$title = "[${arxiv_no}] "; $authors = "";
open(META,$meta_source);
while(<META>){
if(/citation_title/ ){/content=\"(.*?)\"/; $title .= $1;}
if(/citation_author/){/content=\"(.*?)\"/; $authors .= "$1&";}
}
chop $authors; #remove trailing &
print "${prog}:title= $title\n${prog}:authors= $authors\n";
print "--------------------------------------------------------\n";
print "${prog}:Temporary files can be found in $tmpdir\n";
print "${prog}:Title: $title\n";
print "${prog}:Authors: $authors\n";
print "${prog}:arXiv: $ARGV[0]\n";
# ----------------------- HELP MESSAGE ------------------------------
sub usage(){
if(@_){$message = shift(@_)."\n";}
$message .= << "EOF";
Usage: ${prog} <arxiv number>
e.g. ${prog} 1010.0957
${prog} hep-lat/0402031
Downloads source files from arXiv (if they exist), converts them to html and then to the mobi format (Kindle friendly)
Uses htlatex and ebook-convert (assumes TeX4ht and Calibre exist on your system)
On Ubuntu: sudo apt-get install texlive-full calibre
K. N. Anagnostopoulos, NTUA 2013
EOF
print STDERR $message;
exit(1);
}
sub main::HELP_MESSAGE(){ usage();} #for --help (does not work when default?)
# -------------------------------------------------------------------