-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgetdbpedia.sh
executable file
·66 lines (53 loc) · 2.01 KB
/
getdbpedia.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#Naive script for crawling datasets from DBPedia webservers @http://wiki.dbpedia.org/Downloads2015-04
#added sleep comand for better readability in console
#added each crawl on different line, to know which crawl failed. We could begin from where it stopped
test crawl for smaller datasets
echo "Crawl initiated"
echo "crawling one dataset at a time.."
sleep 1
echo "currently crawling Mapping-based Types.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/instance-types_en.nq.bz2
echo "done"
echo "currently crawling Mapping-based Properties.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/mappingbased-properties_en.nq.bz2
echo "done"
echo "currently crawling Titles.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/labels_en.nq.bz2
echo "done"
echo "currently crawling Short Abstracts.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/short-abstracts_en.nq.bz2
echo "done"
echo "currently crawling Extended Abstracts.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/long-abstracts_en.nq.bz2
echo "done"
echo "currently crawling Articles Categories.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/article-categories_en.nq.bz2
echo "done"
echo "currently crawling Links to Wikipedia Article.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/wikipedia-links_en.nq.bz2
echo "done"
echo "currently crawling Wikipedia Pagelinks.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/core-i18n/en/page-links_en.nq.bz2
echo "done"
echo "currently crawling Mapping-based Types"
sleep 1
http://downloads.dbpedia.org/2015-04/core-i18n/en/instance-types_en.nq.bz2
echo "done"
echo "currently crawling DBpedia Ontology.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/dbpedia_2015-04.nt.bz2 -O
echo "done"
echo "Currently crawling Links to RDF Bookmashup.."
sleep 1
curl -O http://downloads.dbpedia.org/2015-04/links/bookmashup_links.nt.bz2
echo "done"
#v2.0 crawl
#with loops and failure detects if needed