forked from JonathanReeve/corpus-list
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcorpus-list.yaml
99 lines (93 loc) · 3.56 KB
/
corpus-list.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
- shortname: shc
title: Shakespeare His Contemporaries
categories: literature
authors: multiple
centuries: 16th, 17th
text:
markup: TEI-Simple
url: 'https://github.com/JonathanReeve/corpus-SHC.git'
file-format: git
- shortname: folger-shakespeare
title: Folger Shakespeare Library Digital Texts
categories: literature
centuries: 16th, 17th
authors: single
homepage: http://www.folgerdigitaltexts.org/
url-source: http://www.folgerdigitaltexts.org/download/
text:
- markup: TEI
url: http://www.folgerdigitaltexts.org/download/xml/FolgerDigitalTexts_XML_Complete.zip
file-format: zip
- markup: HTML
url: http://www.folgerdigitaltexts.org/download/html/FolgerDigitalTexts_HTML_Complete.zip
file-format: zip
- markup: TXT
url: http://www.folgerdigitaltexts.org/download/txt/FolgerDigitalTexts_TXT_Complete.zip
file-format: zip
- shortname: perseus-c-greek
title: Perseus Canonical Greek
categories: classics
authors: multiple
text:
markup: TEI
url: https://github.com/PerseusDL/canonical-greekLit.git
file-format: git
- shortname: stanford-1880s
title: 'Adult British Fiction of the 1880s, Assembled by the Stanford Literary Lab'
categories: literature
centuries: 19th
text:
markup: TXT
url: https://github.com/JonathanReeve/corpus-1880s-all.git
file-format: git
subcorpora:
- shortname: stanford-1880s-male
title: 'Adult British fiction of the 1880s, male authors. Assembled by the Stanford Literary Lab'
text:
markup: txt
url: https://github.com/JonathanReeve/corpus-1880s-male.git
file-format: git
- shortname: stanford-1880s-female
title: 'Adult British fiction of the 1880s, female authors. Assembled by the Stanford Literary Lab'
text:
markup: txt
url: https://github.com/JonathanReeve/corpus-1880s-female.git
file-format: git
- shortname: reuters-21578
title: Reuters-21578
homepage: http://www.daviddlewis.com/resources/testcollections/reuters21578/
categories: history
text:
markup: txt
url: http://www.daviddlewis.com/resources/testcollections/reuters21578/reuters21578.tar.gz
file-format: tar.gz
- shortname: ecco-tcp
title: Eighteenth Century Collections Online / Text Creation Partnership ECCO-TCP
homepage: http://www.textcreationpartnership.org/tcp-ecco/
categories: literature
centuries: 18th
text:
markup: xml
file-format: zip
url:
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200510.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200601.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200604.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200609.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200702.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200802.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200809.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200902.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-200909.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-201004.ecco.zip
- http://www.lib.umich.edu/tcp/docs/texts/ecco/xml-201106.ecco.zip
- shortname: dta
title: Deutsches Textarchiv (German Text Archive)
homepage: http://www.deutschestextarchiv.de/
categories: literature, science, history, etc.
centuries: 16th, 17th, 18th, 19th
text:
markup: TEI
file-format: zip
url:
- http://media.dwds.de/dta/download/dta_komplett_2016-02-11.zip