-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathimport-urls.pl
137 lines (128 loc) · 2.95 KB
/
import-urls.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Specify the file which has URLs to import and the file showing
# what was imported up to now. Then invoke importer to
# import everything
use Digest::MD5 qw(md5_hex);
use POSIX qw(strftime);
sub rewrite
{
$url = shift;
# if it looks like https://zenodo.org/doi/10.5281/zenodo.11298208
# or like https://zenodo.org/records/12811284
# should look like this https://doi.org/10.5281/zenodo.13117988
@elems = split /\//, $url;
$lastel = @elems[$#elems];
if ($lastel =~ /\./)
{
@elemsn = split /\./, $lastel;
$id = $elemsn[1];
}
else
{
$id = $lastel;
}
$url = "https://doi.org/10.5281/zenodo." . $id;
return $url;
}
$usage = "$0 new-file imported-objects log-path\n";
if ($#ARGV < 2)
{
print $usage;
exit 0;
}
%imported = ();
my $fh = new IO::File($ARGV[1]);
while(<$fh>)
{
$_ =~ s/\s+//g;
$url = $_;
if ($url =~ /zenodo.org/)
{
$url = rewrite($url);
}
$imported{$url} = 0;
print "Already imported $url\n";
}
close($fh);
@toimport = ();
my $fh = new IO::File($ARGV[0]);
while(<$fh>)
{
print "Read $_";
$_ =~ s/\s+//g;
$_ =~ s/\/$//;
$url = $_;
if ($url =~ /zenodo.org/)
{
$url = rewrite($url);
}
if (!exists($imported{$url}))
{
$orig = $url;
print "Original $orig\n";
# check if base URL is imported
$url =~ s/\/commit.*$//;
$url =~ s/\/tree.*$//;
$url =~ s/\/release.*$//;
$url =~ s/\/archive.*$//;
$url =~ s/\/$//;
if (!exists($imported{$url}))
{
push(@toimport, $orig);
}
}
else
{
print "Already exists $url\n";
}
}
for $u (@toimport)
{
print "Will import $u\n";
}
$myscript = "";
for $i (@toimport)
{
$myscript .= "bash import-and-publish $i\n";
$myscript .= "sleep 1\n";
}
$myscript .= "searcch-importer artifact.export -a -e searcch\n";
open (my $oh, '>', $ARGV[2] . '/logs/importer-script.sh');
print $oh $myscript;
close($oh);
print "$myscript\n";
print "Printed into $ARGV[2]/logs/importer-script.sh\n";
system("docker compose down");
system("docker compose up -d searcch-importer-prod");
sleep(10);
system("docker exec -i searcch-importer-prod /bin/sh -c \"bash logs/importer-script.sh\" | grep Exported > exported.txt");
system("docker exec -i searcch-importer-prod /bin/sh -c \"searcch-importer artifact.list -all\" >> nowimported.txt");
my $fh = new IO::File("nowimported.txt");
%importednow = ();
while(<$fh>)
{
@items = split /\,/, $_;
@elems = split /\=/, $items[0];
my $id = $elems[1];
@elems = split /\=/, $items[2];
my $url = $elems[1];
$url =~ s/\'//g;
$importednow{$id} = $url;
print "Now imported $url ID $id\n";
}
close($fh);
my $fh = new IO::File("exported.txt");
while(<$fh>)
{
@items = split /\,/, $_;
@elems = split /\=/, $items[0];
my $id = $elems[1];
$imported{$importednow{$id}} = 1;
print "Successfully exported $importednow{$id}\n";
}
close($fh);
open (my $oh, '>', 'imported.txt');
for $i (keys %imported)
{
print $oh "$i\n";
}
close($oh);