-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcite.bib
49 lines (48 loc) · 3.39 KB
/
cite.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
@inproceedings{Leonhardt:et:al:2023,
title = "Unlocking the Heterogeneous Landscape of Big Data {NLP} with {DUUI}",
author = "Leonhardt, Alexander and
Abrami, Giuseppe and
Baumartz, Daniel and
Mehler, Alexander",
editor = "Bouamor, Houda and
Pino, Juan and
Bali, Kalika",
booktitle = "Findings of the Association for Computational Linguistics: EMNLP 2023",
month = dec,
year = "2023",
address = "Singapore",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2023.findings-emnlp.29",
doi = "10.18653/v1/2023.findings-emnlp.29",
pages = "385--399",
abstract = "Automatic analysis of large corpora is a complex task, especially in terms of time efficiency. This complexity is increased by the fact that flexible, extensible text analysis requires the continuous integration of ever new tools. Since there are no adequate frameworks for these purposes in the field of NLP, and especially in the context of UIMA, that are not outdated or unusable for security reasons, we present a new approach to address the latter task: Docker Unified UIMA Interface (DUUI), a scalable, flexible, lightweight, and feature-rich framework for automatic distributed analysis of text corpora that leverages Big Data experience and virtualization with Docker. We evaluate DUUI{'}s communication approach against a state-of-the-art approach and demonstrate its outstanding behavior in terms of time efficiency, enabling the analysis of big text data.",
}
@article{Abrami:et:al:2025:a,
title = {Docker Unified UIMA Interface: New perspectives for NLP on big data},
journal = {SoftwareX},
volume = {29},
pages = {102033},
year = {2025},
issn = {2352-7110},
doi = {https://doi.org/10.1016/j.softx.2024.102033},
url = {https://www.sciencedirect.com/science/article/pii/S2352711024004047},
author = {Giuseppe Abrami and Markos Genios and Filip Fitzermann and Daniel Baumartz and Alexander Mehler},
abstract = {Processing large amounts of natural language text using machine
learning-based models is becoming important in many disciplines.
This demand is being met by a variety of approaches, resulting
in the heterogeneous deployment of separate, partly incompatible,
not natively scalable applications. To overcome the technological
bottleneck involved, we have developed Docker Unified UIMA Interface,
a system for the standardized, parallel, platform-independent,
distributed and microservices-based solution for processing large
and extensive text corpora with any NLP method. We present DUUI
as a framework that enables automated orchestration of GPU-based
NLP processes beyond the existing Docker Swarm cluster variant,
and in addition to the adaptation to new runtime environments
such as Kubernetes. Therefore, a new driver for DUUI is introduced,
which enables the lightweight orchestration of DUUI processes
within a Kubernetes environment in a scalable setup. In this way,
the paper opens up novel text-technological perspectives for existing
practices in disciplines that deal with the scientific analysis
of large amounts of data based on NLP.}
}