Skip to content

Commit

Permalink
Merge pull request #63 from lfoppiano/feature/dropwizard_4
Browse files Browse the repository at this point in the history
Update to Dropwizard 4, improve docker build, update grobid-quantities and grobid to version 0.8.0
  • Loading branch information
lfoppiano authored Apr 5, 2024
2 parents 8b901e4 + 685e942 commit edaf280
Show file tree
Hide file tree
Showing 49 changed files with 712 additions and 271 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci-build-unstable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
id: docker_build
uses: mr-smithers-excellent/docker-build-push@v6
with:
dockerfile: Dockerfile.local
dockerfile: Dockerfile
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
image: lfoppiano/grobid-superconductors
Expand Down
42 changes: 23 additions & 19 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,35 +28,36 @@ USER root
RUN apt-get update && \
apt-get -y --no-install-recommends install apt-utils libxml2 git unzip

RUN mkdir -p /opt/grobid-source/grobid-home/models

WORKDIR /opt/grobid-source
COPY gradle.properties .

WORKDIR /opt/grobid-source

RUN mkdir -p grobid-superconductors_source/resources/config grobid-superconductors_source/resources/models grobid-superconductors_source/gradle grobid-superconductors_source/localLibs grobid-superconductors_source/resources/web grobid-superconductors_source/src
RUN mkdir -p grobid-home/models \
&& mkdir -p grobid-superconductors_source/resources/config grobid-superconductors_source/resources/models grobid-superconductors_source/gradle grobid-superconductors_source/localLibs grobid-superconductors_source/resources/web grobid-superconductors_source/src

COPY ./.git/ ./grobid-superconductors_source/.git
COPY resources/models/ ./grobid-superconductors_source/resources/models/
COPY resources/config/ ./grobid-superconductors_source/resources/config/
COPY gradle/ ./grobid-superconductors_source/gradle/
COPY src/ ./grobid-superconductors_source/src/
COPY localLibs/ ./grobid-superconductors_source/localLibs/
COPY build.gradle ./grobid-superconductors_source/
COPY settings.gradle ./grobid-superconductors_source/
COPY gradlew* ./grobid-superconductors_source/
COPY gradle.properties ./grobid-superconductors_source/
COPY ["gradlew*", "build.gradle", "settings.gradle", "gradle.properties", "./grobid-superconductors_source/"]
COPY .git/ ./grobid-superconductors_source/.git/

# Preparing models
RUN rm -rf /opt/grobid-source/grobid-home/models/*
WORKDIR /opt/grobid-source/grobid-superconductors_source
RUN ./gradlew clean assemble -x shadowJar --no-daemon --stacktrace --info
RUN ./gradlew downloadTransformers --no-daemon --info --stacktrace && rm -f /opt/grobid-source/grobid-home/models/*.zip
RUN git remote prune origin && git repack && git prune-packed && git reflog expire --expire=1.day.ago && git gc --aggressive \
&& ./gradlew downloadTransformers --no-daemon --info --stacktrace \
&& rm -f /opt/grobid-source/grobid-home/models/*.zip \
&& rm -rf /opt/grobid-source/grobid-home/models/*.-with_ELMo \
&& rm -rf /opt/grobid-source/grobid-home/models/entityLinker* \
&& ./gradlew clean assemble -x shadowJar --no-daemon --stacktrace --info \
&& unzip -o build/distributions/grobid-superconductors-*.zip -d ../grobid-superconductors_distribution \
&& mv ../grobid-superconductors_distribution/grobid-superconductors-* ../grobid-superconductors \
&& rm -rf ../grobid-superconductors_distribution \
&& rm -rf /opt/grobid-source/grobid-superconductors_source/.git


# Preparing distribution
WORKDIR /opt/grobid-source
RUN unzip -o /opt/grobid-source/grobid-superconductors_source/build/distributions/grobid-superconductors-*.zip -d grobid-superconductors_distribution && mv grobid-superconductors_distribution/grobid-superconductors-* grobid-superconductors
WORKDIR /opt/grobid-source/grobid-superconductors_source


WORKDIR /opt

Expand All @@ -70,10 +71,13 @@ FROM lfoppiano/grobid-quantities:0.8.0 as runtime
ENV LANG C.UTF-8

WORKDIR /opt/grobid
RUN rm -rf /opt/grobid/grobid-quantities
RUN rm /opt/grobid/resources

RUN mkdir -p /opt/grobid/grobid-superconductors
RUN rm -rf /opt/grobid/grobid-quantities \
&& rm /opt/grobid/resources \
&& mkdir -p /opt/grobid/grobid-superconductors \
&& rm -rf /opt/grobid/grobid-home/models/*.-with_ELMo \
&& rm -rf /opt/grobid/grobid-service

COPY --from=builder /opt/grobid-source/grobid-home/models ./grobid-home/models
COPY --from=builder /opt/grobid-source/grobid-superconductors ./grobid-superconductors/
COPY --from=builder /opt/grobid-source/grobid-superconductors_source/resources/config/config-docker.yml ./grobid-superconductors/resources/config/config.yml
Expand Down
87 changes: 54 additions & 33 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -47,21 +47,26 @@ description = """Grobid module for extraction of superconductor information from

dependencies {
//Tests
testImplementation(platform('org.junit:junit-bom:5.8.1'))
testImplementation(platform('org.junit:junit-bom:5.10.2'))
testRuntimeOnly("org.junit.platform:junit-platform-launcher") {
because("Only needed to run tests in a version of IntelliJ IDEA that bundles older versions")
}
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine")
testImplementation('org.junit.jupiter:junit-jupiter')
testImplementation("junit:junit:4.13.2")
testRuntimeOnly("org.junit.vintage:junit-vintage-engine") {
because 'allows JUnit 3 and JUnit 4 tests to run'
}

testRuntimeOnly("org.junit.platform:junit-platform-launcher") {
because 'allows tests to run from IDEs that bundle older version of launcher'
}
testImplementation 'org.easymock:easymock:4.3'
testImplementation 'org.easymock:easymock:5.2.0'
testImplementation 'org.hamcrest:hamcrest-all:1.3'
testImplementation 'org.hamcrest:hamcrest-library:2.2'
testImplementation 'org.powermock:powermock-module-junit4:2.0.9'
testImplementation 'org.powermock:powermock-api-easymock:2.0.9'
testImplementation 'org.jetbrains.kotlin:kotlin-test'
testImplementation "io.mockk:mockk:1.13.9"

//GROBID and grobid-quantities
implementation 'org.grobid:grobid-quantities:0.8.0'
Expand All @@ -87,16 +92,17 @@ dependencies {
implementation 'org.apache.commons:commons-text:1.10.0'

//Dropwizard
implementation "io.dropwizard:dropwizard-core:1.3.29"
implementation 'io.dropwizard:dropwizard-jersey:1.3.29'
implementation "io.dropwizard:dropwizard-assets:1.3.29"
implementation "com.hubspot.dropwizard:dropwizard-guicier:1.3.5.2"
implementation "io.dropwizard:dropwizard-testing:1.3.29"
implementation "io.dropwizard:dropwizard-forms:1.3.29"
implementation "io.dropwizard:dropwizard-client:1.3.29"
implementation "io.dropwizard:dropwizard-auth:1.3.29"
implementation "io.dropwizard.metrics:metrics-core:4.0.0"
implementation "io.dropwizard.metrics:metrics-servlets:4.0.0"
implementation 'ru.vyarus:dropwizard-guicey:7.0.0'

implementation 'io.dropwizard:dropwizard-bom:4.0.0'
implementation 'io.dropwizard:dropwizard-core:4.0.0'
implementation 'io.dropwizard:dropwizard-assets:4.0.0'
implementation 'io.dropwizard:dropwizard-testing:4.0.0'
implementation 'io.dropwizard:dropwizard-forms:4.0.0'
implementation 'io.dropwizard:dropwizard-client:4.0.0'
implementation 'io.dropwizard:dropwizard-auth:4.0.0'
implementation 'io.dropwizard.metrics:metrics-core:4.2.22'
implementation 'io.dropwizard.metrics:metrics-servlets:4.2.22'

//Misc
implementation 'com.google.guava:guava:30.1.1-jre'
Expand Down Expand Up @@ -148,6 +154,9 @@ tasks.withType(JavaCompile) {
sourceSets.main.resources {
srcDirs = ["src/main/resources", "resources/config"]
}
sourceSets.test.resources {
srcDirs = ["src/test/resources"]
}

test {
exclude '**/**IntegrationTest**'
Expand All @@ -156,7 +165,7 @@ test {

def libraries = ""
if (Os.isFamily(Os.FAMILY_MAC)) {
if (Os.OS_ARCH.equals("aarch64")) {
if (Os.isArch("aarch64")) {
libraries = "${file("./grobid-home/lib/mac_arm-64").absolutePath}"
} else {
libraries = "${file("./grobid-home/lib/mac-64").absolutePath}"
Expand Down Expand Up @@ -190,7 +199,7 @@ run {

def libraries = ""
if (Os.isFamily(Os.FAMILY_MAC)) {
if (Os.OS_ARCH.equals("aarch64")) {
if (Os.isArch("aarch64")) {
libraries = "${file("../grobid-home/lib/mac_arm-64").absolutePath}"
} else {
libraries = "${file("../grobid-home/lib/mac-64").absolutePath}"
Expand Down Expand Up @@ -218,7 +227,7 @@ task integration(type: Test) {

def libraries = ""
if (Os.isFamily(Os.FAMILY_MAC)) {
if (Os.OS_ARCH.equals("aarch64")) {
if (Os.isArch("aarch64")) {
libraries = "${file("./grobid-home/lib/mac_arm-64").absolutePath}"
} else {
libraries = "${file("./grobid-home/lib/mac-64").absolutePath}"
Expand All @@ -233,12 +242,12 @@ task integration(type: Test) {
}

if (JavaVersion.current().compareTo(JavaVersion.VERSION_1_8) > 0) {
// jvmArgs "--add-opens", "java.base/java.util.stream=ALL-UNNAMED",
// "--add-opens", "java.base/java.io=ALL-UNNAMED",
// "--add-opens", "java.base/java.lang=ALL-UNNAMED",
// "--add-opens", "java.base/java.util.regex=ALL-UNNAMED",
// "--add-opens", "java.base/java.math=ALL-UNNAMED",
// "--add-opens", "java.base/java.text=ALL-UNNAMED"
jvmArgs "--add-opens", "java.base/java.util.stream=ALL-UNNAMED",
"--add-opens", "java.base/java.io=ALL-UNNAMED",
"--add-opens", "java.base/java.lang=ALL-UNNAMED",
"--add-opens", "java.base/java.util.regex=ALL-UNNAMED",
"--add-opens", "java.base/java.math=ALL-UNNAMED",
"--add-opens", "java.base/java.text=ALL-UNNAMED"
}
systemProperty "java.library.path", "${System.getProperty('java.library.path')}:" + libraries
}
Expand Down Expand Up @@ -299,12 +308,8 @@ jar {

artifacts {
archives shadowJar
// archives jar
}

//distZip.archiveClassifier = 'application'
//distTar.archiveClassifier = 'application'

distZip.enabled = true
distTar.enabled = false
shadowDistZip.enabled = false
Expand All @@ -323,6 +328,9 @@ publishing {

def conf = new org.yaml.snakeyaml.Yaml().load( new File("resources/config/config.yml").newInputStream() )
def grobidHome = conf.grobidHome.replace("\$", "").replace('{', "").replace("GROBID_HOME:- ", "").replace("}", "")
if (grobidHome.startsWith("../")) {
grobidHome = "${rootProject.rootDir}/${grobidHome}"
}

/** Model management **/

Expand All @@ -337,31 +345,46 @@ task copyModels(type: Copy) {
include "**/preprocessor.json"
exclude "**/features-engineering/**"
exclude "**/result-logs/**"
into "${rootDir}/${grobidHome}/models/"
into "${grobidHome}/models/"

doLast {
print "Copy models under grobid-home: ${grobidHome}"
ant.gunzip(src: "${rootDir}/${grobidHome}/models/superconductors/model.wapiti.gz", dest: "${rootDir}/${grobidHome}/models/superconductors/")
ant.gunzip(src: "${grobidHome}/models/superconductors/model.wapiti.gz", dest: "${grobidHome}/models/superconductors/")
}
}

task downloadTransformersQuantities(dependsOn: copyModels) {
doLast {
download {
src "https://transformers-data.s3.eu-central-1.amazonaws.com/quantities-transformers-240226.zip"
dest "${grobidHome}/models/quantities-transformers.zip"
overwrite false
print "Download bulky transformers files under grobid-home: ${grobidHome}"
}
ant.unzip(src: "${grobidHome}/models/quantities-transformers.zip", dest: "${grobidHome}/models/")
}
}

task downloadTransformers(dependsOn: copyModels) {
doLast {
download {
src "https://transformers-data.s3.eu-central-1.amazonaws.com/superconductors-transformers-230222.zip"
dest "${rootDir}/${grobidHome}/models/superconductors-transformers.zip"
dest "${grobidHome}/models/superconductors-transformers.zip"
overwrite false
print "Download bulky transformers files under grobid-home: ${grobidHome}"
}
ant.unzip(src: "${rootDir}/${grobidHome}/models/superconductors-transformers.zip", dest: "${rootDir}/${grobidHome}/models/")
ant.unzip(src: "${grobidHome}/models/superconductors-transformers.zip", dest: "${grobidHome}/models/")
}
}


wrapper {
gradleVersion "7.2"
}

test {
useJUnitPlatform()
}

jacocoTestReport {
reports {
xml.enabled = true // coveralls plugin depends on xml format report
Expand All @@ -382,5 +405,3 @@ release {
requireBranch.set('master')
}
}


8 changes: 3 additions & 5 deletions resources/config/config-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -124,17 +124,17 @@ models:

server:
type: custom
idleTimeout: 120 seconds
applicationConnectors:
- type: http
port: 8072
idleTimeout: 120 seconds
adminConnectors:
- type: http
port: 8073
registerDefaultExceptionMappers: false

logging:
level: DEBUG
level: WARN

# Logger-specific levels.
loggers:
Expand All @@ -146,6 +146,4 @@ logging:
appenders:
- type: console
threshold: INFO

version: 2
timeZone: UTC
timeZone: UTC
6 changes: 2 additions & 4 deletions resources/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,10 @@ models:

server:
type: custom
idleTimeout: 120 seconds
applicationConnectors:
- type: http
port: 8072
idleTimeout: 120 seconds
adminConnectors:
- type: http
port: 8073
Expand All @@ -145,6 +145,7 @@ logging:
appenders:
- type: console
threshold: INFO
timeZone: UTC
# The following line is used to remove the logger in the docker image, please don't modify it
#Docker-ignore-log-start
- type: file
Expand All @@ -156,6 +157,3 @@ logging:
timeZone: UTC
maxFileSize: 50MB
#Docker-ignore-log-end

version: 2
timeZone: UTC
Loading

0 comments on commit edaf280

Please sign in to comment.