This repository has been archived by the owner on Feb 12, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
paper.bib
477 lines (477 loc) · 20.2 KB
/
paper.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
@inproceedings{stemle:2016:WAC-X,
title = {{bot.zen @ EmpiriST 2015 - A minimally-deep learning PoS-tagger (trained for German CMC and Web data)}},
author = {Stemle, Egon W.},
booktitle = {Proceedings of the 10th Web as Corpus Workshop (WAC-X) and the EmpiriST Shared Task},
pages = {115--119},
publisher = {Association for Computational Linguistics},
url = {http://anthology.aclweb.org/W/W16/W16-2614},
year = {2016}
}
@misc{Amazon2011,
archivePrefix = {arXiv},
arxivId = {arXiv:1105.1408v1},
author = {Amazon},
booktitle = {Amazon Web Services LLC},
eprint = {arXiv:1105.1408v1},
title = {{Amazon Elastic Compute Cloud (Amazon EC2)}},
url = {http://aws.amazon.com/ec2/},
volume = {2010},
year = {2011}
}
@inproceedings{baroni-dinu-kruszewski:2014:P14-1,
author = {Baroni, Marco and Dinu, Georgiana and Kruszewski, German},
booktitle = {Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
doi = {10.3115/v1/P14-1023},
isbn = {9781937284725},
pages = {238--247},
publisher = {Association for Computational Linguistics},
title = {{Don't count, predict! A systematic comparison of context-counting vs. context-predicting semantic vectors}},
note = {\url{http://www.aclweb.org/anthology/P14-1023}},
year = {2014}
}
@article{Bastien2012a,
archivePrefix = {arXiv},
arxivId = {arXiv:1211.5590v1},
author = {Bastien, F and Lamblin, Pascal and Pascanu, Razvan and Bergstra, James and Goodfellow, Ian and Bergeron, Arnaud and Bouchard, Nicolas and Warde-Farley, David and Bengio, Yoshua},
eprint = {arXiv:1211.5590v1},
journal = {arXiv preprint arXiv: {\ldots}},
pages = {1--10},
title = {{Theano: new features and speed improvements}},
note = {\url{http://arxiv.org/abs/1211.5590}},
year = {2012}
}
@article{Benello1989,
author = {Benello, Julian and Mackie, Andrew W. and Anderson, James A.},
doi = {10.1016/0885-2308(89)90018-1},
issn = {08852308},
journal = {Computer Speech {\&} Language},
month = jul,
number = {3},
pages = {203--217},
title = {{Syntactic category disambiguation with neural networks}},
note = {\url{http://www.sciencedirect.com/science/article/pii/0885230889900181}},
volume = {3},
year = {1989}
}
@article{Bergstra2010a,
author = {Bergstra, James and Breuleux, Olivier and Bastien, Frederic Fr{\'{e}}d{\'{e}}ric and Lamblin, Pascal and Pascanu, Razvan and Desjardins, Guillaume and Turian, Joseph and Warde-Farley, David and Bengio, Yoshua},
journal = {Proceedings of the Python for Scientific Computing Conference (SciPy)},
number = {Scipy},
pages = {1--7},
title = {{Theano: a CPU and GPU math compiler in Python}},
year = {2010}
}
@article{Chen2015,
archivePrefix = {arXiv},
arxivId = {1512.04906},
author = {Chen, Welin and Grangier, David and Auli, Michael},
eprint = {1512.04906},
isbn = {9781467303675},
pages = {12},
title = {{Strategies for Training Large Vocabulary Neural Language Models}},
note = {\url{http://arxiv.org/abs/1512.04906}},
year = {2015}
}
@article{Chetlur2014a,
archivePrefix = {arXiv},
arxivId = {1410.0759},
author = {Chetlur, Sharan and Woolley, Cliff},
eprint = {1410.0759},
journal = {arXiv preprint arXiv: {\ldots}},
pages = {1--9},
title = {{cuDNN: Efficient Primitives for Deep Learning}},
note = {\url{http://arxiv.org/abs/1410.0759}},
year = {2014}
}
@inproceedings{Habernal.et.al.2016.LREC,
address = {Portoro{\v{z}}, Slovenia},
author = {Habernal, Ivan and Zayed, Omnia and Gurevych, Iryna},
booktitle = {Proceedings of the 10th International Conference on Language Resources and Evaluation (LREC 2016)},
publisher = {European Language Resources Association (ELRA)},
title = {{C4Corpus: Multilingual Web-size corpus with free license}},
month = {May},
year = {2016},
pages = {(to appear)},
url = {TBA}
}
@article{DBLP:journals/corr/JozefowiczVSSW16,
archivePrefix = {arXiv},
arxivId = {1602.02410},
author = {Rafal J{\'{o}}zefowicz and Oriol Vinyals and Mike Schuster and Noam Shazeer and Yonghui Wu},
journal = {CoRR},
volume = {abs/1602.02410},
eprint = {1602.02410},
title = {{Exploring the Limits of Language Modeling}},
note = {\url{http://arxiv.org/abs/1602.02410}},
year = {2016}
}
@phdthesis{mikolov2012,
author = {Mikolov, Tom{\'{a}}{\v{s}}},
school = {Brno University of Technology},
title = {{Statistical Language Models Based on Neural Networks}},
year = {2012},
note = {\url{http://www.fit.vutbr.cz/~imikolov/rnnlm/thesis.pdf}}
}
@article{Karimi2015,
author = {Karimi, Sarvnaz and Yin, Jie and Baum, Jiri},
doi = {10.1162/COLI_a_00230},
journal = {Computational Linguistics},
month = {sep},
number = {3},
pages = {539--548},
title = {{Evaluation Methods for Statistically Dependent Text}},
note = {\url{http://www.mitpressjournals.org/doi/10.1162/COLI_a_00230}},
volume = {41},
year = {2015}
}
@article{DBLP:journals/corr/KimJSR15,
archivePrefix = {arXiv},
arxivId = {1508.06615},
author = {Kim, Yoon and Jernite, Yacine and Sontag, David and Rush, Alexander M.},
eprint = {1508.06615},
journal = {CoRR},
title = {{Character-Aware Neural Language Models}},
note = {\url{http://arxiv.org/abs/1508.06615}},
volume = {abs/1508.0},
year = {2015}
}
@book{whyweposthtwc2016,
author = {Miller, Daniel and Costa, Elisabetta and Haynes, Nell and McDonald, Tom and Nicolescu, Razvan and Sinanan, Jolyanna and Spyer, Juliano and Venkatraman, Shriram and Wang, Xinyuan},
doi = {10.14324/111.9781910634493},
edition = {Why We Pos},
isbn = {9781910634493},
month = {feb},
publisher = {UCL Press},
title = {{How the World Changed Social Media}},
note = {\url{http://discovery.ucl.ac.uk/1474805/1/How-the-World-Changed-Social-Media.pdf}},
year = {2016}
}
@inproceedings{santos2014learning,
author = {{Nogueira dos Santos}, C{\'{i}}cero and Zadrozny, Bianca},
booktitle = {Proceedings of the 31st International Conference on Machine Learning (ICML-14)},
pages = {1818--1826},
title = {{Learning Character-level Representations for Part-of-Speech Tagging}},
year = {2014},
note = {\url{http://jmlr.org/proceedings/papers/v32/santos14.pdf}}
}
@inproceedings{Samardzic2015,
author = {Samard{\v{z}}i{\'{c}}, Tanja and Scherrer, Yves and Glaser, Elvira},
booktitle = {Proceedings of the 7th Language and Technology Conference},
title = {{Normalising orthographic and dialectal variants for the automatic processing of Swiss German}},
note = {\url{http://archive-ouverte.unige.ch/unige:82397}},
year = {2015}
}
@article{Sundermeyer2012,
author = {Sundermeyer, Martin and Schl, Ralf and Ney, Hermann},
isbn = {9781622767595},
journal = {Proc. Interspeech},
keywords = {LSTM neural networks,language modeling,recurrent neural networks},
title = {{LSTM Neural Networks for Language Modeling}},
year = {2012}
}
@article{Sutskever2014,
archivePrefix = {arXiv},
arxivId = {1409.3215},
author = {Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
eprint = {1409.3215},
journal = {Advances in Neural Information Processing Systems (NIPS)},
pages = {3104--3112},
title = {{Sequence to sequence learning with neural networks}},
note = {\url{http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural}},
year = {2014}
}
@phdthesis{Weber2015,
author = {Weber, Daniel Julian},
month = {nov},
number = {November},
school = {Ludwig-Maxmilians-Universit{\"{a}}t M{\"{u}}nchen},
title = {{Text Normalisierung f{\"{u}}r Italienische Twitter-Microblogs}},
type = {bachelorsthesis},
note = {\url{http://www.zhekova.net/Teaching_Supervision_files/Daniel_Weber_BSc.pdf}},
year = {2015}
}
@inproceedings{Yang2013,
address = {Seattle, Washington, USA},
author = {Yang, Yi and Eisenstein, Jacob},
booktitle = {Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
isbn = {9781937284978},
number = {October},
pages = {61--72},
publisher = {Association for Computational Linguistics},
title = {{A Log-Linear Model for Unsupervised Text Normalization}},
year = {2013}
}
@article{DBLP:journals/corr/abs-1301-3781,
author = {Tomas Mikolov and Kai Chen and Greg Corrado and Jeffrey Dean},
title = {{Efficient Estimation of Word Representations in Vector Space}},
journal = {CoRR},
volume = {abs/1301.3781},
year = {2013},
note = {\url{http://arxiv.org/abs/1301.3781}},
timestamp = {Thu, 07 May 2015 20:02:01 +0200},
biburl = {http://dblp.uni-trier.de/rec/bib/journals/corr/abs-1301-3781},
bibsource = {dblp computer science bibliography, http://dblp.org}
}
@article{arXiv:1310.4546,
author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
title = {{Distributed Representations of Words and Phrases and their Compositionality}},
archivePrefix = {arXiv},
arxivId = {1310.4546},
eprint = {1310.4546},
journal = {CoRR},
volume = {abs/1310.4546},
month = oct,
year = {2013},
note = {\url{http://arxiv.org/abs/1310.4546}}
}
@article{Theano2016,
archivePrefix = {arXiv},
arxivId = {1605.02688},
author = {{The Theano Development Team} and Rami Al{-}Rfou and Guillaume Alain and
Amjad Almahairi and et al.},
eprint = {1605.02688},
title = {{Theano: {A} Python framework for fast computation of mathematical expressions}},
note = {\url{http://arxiv.org/abs/1605.02688}},
year = {2016},
journal = {CoRR},
volume = {abs/1605.02688}
}
@misc{chollet2015,
author = {Chollet, François},
title = {{Keras: Deep Learning library for Theano and TensorFlow}},
year = {2015},
publisher = {GitHub},
journal = {GitHub repository},
howpublished = {\url{https://github.com/fchollet/keras}},
urldate = {2016-03-22},
commit = {657b9fb48e93b59083d2e0b8a5e4daf237179dbc}
}
@article{Giesbrecht2009,
author = {Giesbrecht, Eugenie and Evert, Stefan},
journal = {Web as Corpus Workshop (WAC5)},
title = {{Is Part-of-Speech Tagging a Solved Task? An Evaluation of POS Taggers for the German Web as Corpus}},
note = {\url{http://sigwac.org.uk/raw-attachment/wiki/WAC5/WAC5_proceedings.pdf#page=27}},
year = {2009}
}
@Article{Brants2004,
author= {Brants, Sabine and Dipper, Stefanie and Eisenberg, Peter and Hansen-Schirra, Silvia and K{\"o}nig, Esther and Lezius, Wolfgang and Rohrer, Christian and Smith, George and Uszkoreit, Hans},
title= {{TIGER: Linguistic Interpretation of a German Corpus}},
journal = {Research on Language and Computation},
year = {2004},
volume = {2},
number = {4},
pages = {597--620},
issn = {1572-8706},
doi = {10.1007/s11168-004-7431-3},
note = {\url{http://dx.doi.org/10.1007/s11168-004-7431-3}}
}
@InProceedings{SchaeferBildhauer2012,
author = {Roland Sch{\"{a}}fer and Felix Bildhauer},
title = {Building Large Corpora from the Web Using a New Efficient Tool Chain},
booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)},
year = {2012},
month = {may},
date = {2012-05-23/2012-05-25},
address = {Istanbul, Turkey},
editor = {Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Mehmet Uğur Doğan and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis},
publisher = {European Language Resources Association (ELRA)},
isbn = {978-2-9517408-7-7},
language = {english}
}
@article{TACL570,
author = {Levy, Omer and Goldberg, Yoav and Dagan, Ido},
title = {Improving Distributional Similarity with Lessons Learned from Word Embeddings},
journal = {Transactions of the Association for Computational Linguistics},
volume = {3},
year = {2015},
keywords = {},
issn = {2307-387X},
note = {\url{https://tacl2013.cs.columbia.edu/ojs/index.php/tacl/article/view/570}},
pages = {211--225}
}
@article{Hochreiter:1997:LSM:1246443.1246450,
author = {Hochreiter, Sepp and Schmidhuber, J\"{u}rgen},
title = {Long Short-Term Memory},
journal = {Neural Computation},
issue_date = {November 15, 1997},
volume = {9},
number = {8},
month = nov,
year = {1997},
issn = {0899-7667},
pages = {1735--1780},
numpages = {46},
note = {\url{http://dx.doi.org/10.1162/neco.1997.9.8.1735}},
doi = {10.1162/neco.1997.9.8.1735},
acmid = {1246450},
publisher = {MIT Press},
address = {Cambridge, MA, USA},
}
@inproceedings{1120431,
author = {Jakub{\'{i}}{\v{c}}ek, Milo{\v{s}} and Kilgarriff, Adam and Kov{\'a}{\v{r}}, Vojt{\v{e}}ch and Rychl{\`{y}}, Pavel and Suchomel, V{\'{i}}t},
address = {Lancaster},
booktitle = {7th International Corpus Linguistics Conference ({CL} 2013)},
howpublished = {online},
location = {Lancaster},
pages = {125-127},
title = {The {TenTen} Corpus Family},
note = {\url{http://ucrel.lancs.ac.uk/cl2013/}},
year = {2013}
}
@article{Beisswenger2013,
author = {Bei{\ss}wenger, Michael},
journal = {LINSE - Linguistik Server Essen},
pages = {1--13},
title = {{Das Dortmunder Chat-Korpus: ein annotiertes Korpus zur Sprachverwendung und sprachlichen Variation in der deutschsprachigen Chat-Kommunikation}},
year = {2013}
}
@inproceedings{baldwin-EtAl:2013:IJCNLP,
author = {Baldwin, Timothy and Cook, Paul and Lui, Marco and MacKinlay, Andrew and Wang, Li},
title = {How Noisy Social Media Text, How Diffrnt Social Media Sources?},
booktitle = {Proceedings of the Sixth International Joint Conference on Natural Language Processing},
month = oct,
year = {2013},
address = {Nagoya, Japan},
publisher = {Asian Federation of Natural Language Processing},
pages = {356--364},
note = {\url{http://aclweb.org/anthology/I13-1041}}
}
@article{Androutsopoulos2007,
author = {Androutsopoulos, Jannis K.},
journal = {Mitteilungen des Deutschen Germanistenverbandes},
pages = {72--97},
title = {{Neue Medien -- neue Schriftlichkeit?}},
volume = {1},
year = {2007}
}
@book{Crystal2001,
address = {Cambridge, UK},
author = {Crystal, David},
doi = {10.1017/CBO9781139164771},
isbn = {9781139164771},
publisher = {Cambridge University Press},
title = {{Language and the Internet}},
note = {\url{http://ebooks.cambridge.org/ref/id/CBO9781139164771}},
year = {2001}
}
@article{KilgarriffGrefenstette03,
author = {Kilgarriff, Adam and Grefenstette, Gregory},
journal = {Computational Linguistics},
pages = {333--347},
title = {{Introduction to the special issue on the web as corpus}},
volume = {29},
year = {2003}
}
@incollection{BernardiniBaroniEvert2008,
title = {{A WaCky Introduction}},
author = {Bernardini, Silvia and Baroni, Marco and Evert, Stefan},
booktitle = {Wacky! Working papers on the Web as Corpus},
pages = {9--40},
publisher = {GEDIT},
address = {Bologna, Italy},
note = {\url{http://wackybook.sslmit.unibo.it/pdfs/bernardini.pdf}},
year = {2008}
}
@inproceedings{tran-bisazza-monz:2016:N16-1,
address = {San Diego, California},
author = {Tran, Ke and Bisazza, Arianna and Monz, Christof},
booktitle = {Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
month = {jun},
pages = {321--331},
publisher = {Association for Computational Linguistics},
title = {{Recurrent Memory Networks for Language Modeling}},
note = {\url{http://www.aclweb.org/anthology/N16-1036}},
year = {2016}
}
@article{collobert:2011b,
title = {{Natural Language Processing (almost) from Scratch}},
author = {Collobert, Ronan and Weston, Jason and Bottou, L{\'e}on and Karlen, Michael and Kavukcuoglu, Koray and Kuksa, Pavel},
journal = {Journal of Machine Learning Research},
volume = {12},
pages = {2493--2537},
year = {2011},
note = {\url{https://arxiv.org/abs/1103.0398}}
}
@inproceedings{Turian:2010:WRS:1858681.1858721,
author = {Turian, Joseph and Ratinov, Lev and Bengio, Yoshua},
title = {Word Representations: A Simple and General Method for Semi-supervised Learning},
booktitle = {Proceedings of the 48th Annual Meeting of the Association for Computational Linguistics},
series = {ACL '10},
year = {2010},
location = {Uppsala, Sweden},
pages = {384--394},
numpages = {11},
note = {\url{http://dl.acm.org/citation.cfm?id=1858681.1858721}},
acmid = {1858721},
publisher = {Association for Computational Linguistics},
address = {Stroudsburg, PA, USA},
}
@book{OReilly2000,
author = {O'Reilly, Randall C. and Munakata, Yuko},
isbn = {0262650541},
pages = {504},
publisher = {MIT Press},
title = {{Computational Explorations in Cognitive Neuroscience Understanding the Mind by Simulating the Brain}},
note = {\url{http://books.google.com/books?id=BLf34BFTaIUC{\&}pgis=1}},
year = {2000}
}
@phdthesis{Hochreiter1991,
author = {Hochreiter, Sepp},
school = {TU M{\"{u}}nchen},
title = {{Untersuchungen zu dynamischen neuronalen Netzen}},
type = {diploma thesis},
year = {1991}
}
@inproceedings{empirist:2016:WAC-X,
address = {Berlin, Germany},
author = {Bei{\ss}wenger, Michael and Bartsch, Sabine and Evert, Stefan and W{\"{u}}rzner, Kay-Michael},
booktitle = {Proceedings of the 10th Web as Corpus Workshop (WAC-X) and the EmpiriST Shared Task},
pages = {78--90},
publisher = {Association for Computational Linguistics},
title = {{EmpiriST 2015: A Shared Task on the Automatic Linguistic Annotation of Computer-Mediated Communication, Social Media and Web Corpora}},
url = {http://anthology.aclweb.org/W/W16/W16-2610},
year = {2016}
}
@article{Srivastava2014,
abstract = {Deep neural nets with a large number of parameters are very powerful machine learning systems. However, overfitting is a serious problem in such networks. Large networks are also slow to use, making it difficult to deal with overfitting by combining the predictions of many different large neural nets at test time. Dropout is a technique for addressing this problem. The key idea is to randomly drop units (along with their connections) from the neural network during training. This prevents units from co-adapting too much. During training, dropout samples from an exponential number of different “thinned” networks. At test time, it is easy to approximate the effect of averaging the predictions of all these thinned networks by simply using a single unthinned network that has smaller weights. This significantly reduces overfitting and gives major improvements over other regularization methods. We show that dropout improves the performance of neural networks on supervised learning tasks in vision, speech recognition, document classification and computational biology, obtaining state-of-the-art results on many benchmark data sets},
archivePrefix = {arXiv},
arxivId = {1102.4807},
author = {Srivastava, Nitish and Hinton, Geoffrey E. and Krizhevsky, Alex and Sutskever, Ilya and Salakhutdinov, Ruslan},
doi = {10.1214/12-AOS1000},
eprint = {1102.4807},
isbn = {1532-4435},
issn = {15337928},
journal = {Journal of Machine Learning Research (JMLR)},
keywords = {deep learning,model combination,neural networks,regularization},
pages = {1929--1958},
title = {{Dropout : A Simple Way to Prevent Neural Networks from Overfitting}},
volume = {15},
year = {2014}
}
@inproceedings{Bosco2014,
author = {Bosco, Cristina and Dell'Orletta, Felice and Montemagni, Simonetta and Sanguinetti, Manuela and Simi, Maria},
booktitle = {Proceedings of CLiC-it 2014 and EVALITA 2014},
isbn = {978-886741-472-7},
pages = {1--8},
publisher = {Pisa University Press},
title = {{The EVALITA 2014 Dependency Parsing Task}},
year = {2014}
}
@unpublished{frey-glaznieks-stemle:2016:didi,
abstract = {The DiDi corpus of South Tyrolean data of computer-mediated communication (CMC) is a multilingual sociolinguistic language corpus. It consists of around 600,000 tokens collected from 136 profiles of Facebook users residing in South Tyrol, Italy. In conformity with the multilingual situation of the territory, the main languages of the corpus are German and Italian (followed by English). The data has been manually anonymised and provides manually corrected part-of-speech tags for the Italian language texts and manually normalised data for German texts. Moreover, it is annotated with user-provided socio-demographic data (among others L1, gender, age, education, and internet communication habits) from a questionnaire, and linguistic annotations regarding CMC phenomena, languages and varieties. The anonymised corpus is freely available for research purposes.},
author = {Frey, Jennifer-Carmen and Glaznieks, Aivars and Stemle, Egon W.},
note = {Upcoming},
title = {{The DiDi Corpus of South Tyrolean CMC Data: A multilingual corpus of Facebook texts}},
year = {2016}
}
@inproceedings{paisa2014,
abstract = {PAIS{\`{A}} is a Creative Commons licensed, large web corpus of contemporary Italian. We describe the design, harvesting, and processing steps involved in its creation.},
address = {Gothenburg, Sweden},
author = {Lyding, Verena and Stemle, Egon and Borghetti, Claudia and Brunello, Marco and Castagnoli, Sara and Orletta, Felice Dell and Dittmann, Henrik and Lenci, Alessandro and Pirrelli, Vito},
booktitle = {Proceedings of the 9th Web as Corpus Workshop (WaC-9)},
pages = {36--43},
publisher = {Association for Computational Linguistics},
title = {{The PAIS{\`{A}} Corpus of Italian Web Texts}},
url = {http://aclweb.org/anthology/W14-0406},
year = {2014}
}