Skip to content
This repository has been archived by the owner on Apr 12, 2024. It is now read-only.

Commit

Permalink
xgboost4j4s
Browse files Browse the repository at this point in the history
  • Loading branch information
hirofumi committed Feb 1, 2018
1 parent de6b0b3 commit 6ba0811
Show file tree
Hide file tree
Showing 16 changed files with 359 additions and 1 deletion.
27 changes: 27 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/docker-cache

/xgboost/R-package
/xgboost/build
/xgboost/lib
/xgboost/python-package
/xgboost/xgboost
/xgboost/xgboost.mock
/xgboost/xgboost.mpi

*.a
*.class
*.dll
*.dylib
*.la
*.lai
*.lo
*.o
*.page
*.slo
*.so

.git/
target/

.DS_Store
.idea/
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
*.class
*.log
.DS_Store
.idea/
target/
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "xgboost"]
path = xgboost
url = https://github.com/hirofumi/xgboost.git
ignore = dirty
11 changes: 11 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM openjdk:8-jdk

RUN apt-get update && apt-get -y install cmake g++ libgomp1

ADD . /root
WORKDIR /root

RUN sed -i -e 's/ \${JAVA_JVM_LIBRARY}//' ./xgboost/CMakeLists.txt

ENV CFLAGS -fvisibility=hidden -static-libgcc
ENV CXXFLAGS -fvisibility=hidden -fvisibility-inlines-hidden -static-libgcc -static-libstdc++
12 changes: 12 additions & 0 deletions Dockerfile.test-fedora
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM fedora:27

ENV JAVA_OPTS -XX:MaxRAMFraction=1 -XX:MinRAMFraction=1 -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap
ENV LC_NUMERIC C

RUN curl https://bintray.com/sbt/rpm/rpm | tee /etc/yum.repos.d/bintray-sbt-rpm.repo \
&& yum -y install java-1.8.0-openjdk-devel python sbt

ADD . /root
WORKDIR /root

CMD ["sbt", "test"]
14 changes: 14 additions & 0 deletions Dockerfile.test-ubuntu
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM ubuntu:16.04

ENV JAVA_OPTS -XX:MaxRAMFraction=1 -XX:MinRAMFraction=1 -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap
ENV LC_NUMERIC C

RUN echo 'deb http://dl.bintray.com/sbt/debian /' > /etc/apt/sources.list.d/sbt.list \
&& apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 \
&& apt-get update \
&& apt-get -y install openjdk-8-jdk python sbt

ADD . /root
WORKDIR /root

CMD ["sbt", "test"]
112 changes: 112 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
GCC_HOME=/usr/local/opt/gcc
GCC_VERSION=7
CC="$(GCC_HOME)/bin/gcc-$(GCC_VERSION)"
CXX="$(GCC_HOME)/bin/g++-$(GCC_VERSION)"
CFLAGS="-fvisibility=hidden -static-libgcc -I$(GCC_HOME)/include"
CXXFLAGS="-fvisibility=hidden -fvisibility-inlines-hidden -static-libgcc -static-libstdc++"
LIBGOMP_A="$$(cd $$(dirname $$($(CXX) -print-file-name=libgomp.a)); pwd)/libgomp.a"
RESOURCES_METAINF=xgboost/jvm-packages/xgboost4j/src/main/resources/META-INF
RESOURCES_LIB=xgboost/jvm-packages/xgboost4j/src/main/resources/lib
LIBXGBOOST4J_DYLIB=$(RESOURCES_LIB)/libxgboost4j.dylib
LIBXGBOOST4J_SO=$(RESOURCES_LIB)/libxgboost4j.so
RUN_ON_DOCKER=(docker image inspect xgboost4j4s-jni > /dev/null || docker build . -t xgboost4j4s-jni) && docker run --rm -v "`pwd`/$(RESOURCES_METAINF):/root/$(RESOURCES_METAINF)" -v "`pwd`/$(RESOURCES_LIB):/root/$(RESOURCES_LIB)" -i xgboost4j4s-jni

IVY2_CACHE=$$(pwd)/docker-cache/.ivy2/cache
SBT_CACHE=$$(pwd)/docker-cache/.sbt

IVY2_CACHE_VOLUME=$$(if [ -n "$(IVY2_CACHE)" ]; then echo -v "$(IVY2_CACHE):/root/.ivy2/cache"; else echo -n ''; fi)
SBT_CACHE_VOLUME=$$(if [ -n "$(SBT_CACHE)" ]; then echo -v "$(SBT_CACHE):/root/.sbt"; else echo -n ''; fi)

.PHONY: \
test test-mac test-linux test-fedora test-ubuntu \
release \
publish-local publish-snapshot \
inspect-dylib inspect-so \
clean clean-dylib clean-so \
doc \
jni jni-dylib jni-so

test: test-mac test-linux

test-mac: jni-dylib
LC_NUMERIC=C sbt test

test-linux: test-fedora test-ubuntu

test-fedora: jni-so
docker build . -f Dockerfile.test-fedora -t xgboost4j4s-test-fedora
docker run --rm $(IVY2_CACHE_VOLUME) $(SBT_CACHE_VOLUME) -i xgboost4j4s-test-fedora

test-ubuntu: jni-so
docker build . -f Dockerfile.test-ubuntu -t xgboost4j4s-test-ubuntu
docker run --rm $(IVY2_CACHE_VOLUME) $(SBT_CACHE_VOLUME) -i xgboost4j4s-test-ubuntu

release: clean doc jni
sbt release

publish-local: doc jni
sbt publishLocal

publish-snapshot: doc jni
sbt publishSigned

inspect: inspect-dylib inspect-so

inspect-dylib: jni-dylib
ls -lat "$(LIBXGBOOST4J_DYLIB)"
otool -L "$(LIBXGBOOST4J_DYLIB)"

inspect-so: jni-so
$(RUN_ON_DOCKER) ls -lat "$(LIBXGBOOST4J_SO)"
$(RUN_ON_DOCKER) ldd "$(LIBXGBOOST4J_SO)"
$(RUN_ON_DOCKER) strings "$(LIBXGBOOST4J_SO)" | grep ^GLIBC

clean: clean-doc clean-dylib clean-so
rm -rf docker-cache; git checkout HEAD -- docker-cache
-docker rmi xgboost4j4s-jni
-docker rmi xgboost4j4s-test-fedora
-docker rmi xgboost4j4s-test-ubuntu

clean-doc:
rm -rf "$(RESOURCES_METAINF)"

clean-dylib:
sbt +clean
-rm "$(LIBXGBOOST4J_DYLIB)"
rm -rf xgboost/build

clean-so:
-rm "$(RESOURCES_LIB)/libgomp.so"
-rm "$(LIBXGBOOST4J_SO)"

doc: $(RESOURCES_METAINF)/xgboost/LICENSE $(RESOURCES_METAINF)/g++/copyright

jni: jni-dylib jni-so

jni-dylib: $(LIBXGBOOST4J_DYLIB)

jni-so: $(LIBXGBOOST4J_SO)

$(LIBXGBOOST4J_DYLIB):
cd xgboost/jvm-packages \
&& cat create_jni.py \
| sed -e 's!CONFIG\["USE_OPENMP"\] = "OFF"!CONFIG["USE_OPENMP"] = "ON"!' \
| sed -e 's!join(args)!join(args + ["-DOpenMP_'$(LIBGOMP_A)'_LIBRARY='$(LIBGOMP_A)'"])!' \
> create_jni.py~ \
&& CC=$(CC) CXX=$(CXX) LDFLAGS=$(LDFLAGS) CXXFLAGS=$(CXXFLAGS) \
CMAKE_POLICY_DEFAULT_CMP0066=NEW \
python create_jni.py~

$(LIBXGBOOST4J_SO): $(RESOURCES_LIB)/libgomp.so
$(RUN_ON_DOCKER) bash -c "cd xgboost/jvm-packages && python create_jni.py"

$(RESOURCES_METAINF)/g++/copyright:
mkdir -p "$(RESOURCES_METAINF)/g++"
$(RUN_ON_DOCKER) cp -p /usr/share/doc/g++-6/copyright $(RESOURCES_METAINF)/g++

$(RESOURCES_METAINF)/xgboost/LICENSE:
mkdir -p "$(RESOURCES_METAINF)/xgboost"
cp -p xgboost/LICENSE "$(RESOURCES_METAINF)/xgboost"

$(RESOURCES_LIB)/libgomp.so:
$(RUN_ON_DOCKER) bash -c 'cp -Lpv "$$(gcc --print-file-name libgomp.so)" "$(RESOURCES_LIB)"'
31 changes: 30 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,30 @@
# xgboost4j-build
# xgboost4j4s

XGBoost4J with cross-version suffixes

## Development

### Prerequisites

* macOS
* Docker for Mac
* `g++-7` (installed by Homebrew, i.e. `brew install gcc`)

### How to Build and Test

```
$ make test
```

### How to Release

```
$ make release
```

## Prior Work

* https://github.com/dmlc/xgboost/pull/2767
* https://github.com/criteo-forks/xgboost-jars
* https://github.com/nevillelyh/xgboost-dist
* https://github.com/myui/build-xgboost-jvm
132 changes: 132 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import PgpKeys._
import sbtrelease.ReleasePlugin.autoImport.ReleaseTransformations._
import scala.sys.process._
import xerial.sbt.Sonatype._

lazy val `xgboost-jvm` =
project
.in(file("."))
.aggregate(
xgboost4j,
// `xgboost4j-example`,
`xgboost4j-flink`,
`xgboost4j-spark`
)
.settings(settings ++ notToPublish)
.settings(
makeClean := "make clean".!,
makeDoc := "make doc".!,
makeTest := "make test".!
)

lazy val xgboost4j =
project
.in(file("xgboost/jvm-packages/xgboost4j"))
.settings(settings ++ toPublish)
.settings(
libraryDependencies ++= Seq(
"com.typesafe.akka" %% "akka-actor" % "2.3.11",
"com.typesafe.akka" %% "akka-testkit" % "2.3.11" % Test,
"junit" % "junit" % "4.11" % Test
)
)

/* pom.xml of flink-ml 0.10.2 seems to cause a "Conflicting cross-version suffixes" error
lazy val `xgboost4j-example` =
project
.in(file("xgboost/jvm-packages/xgboost4j-example"))
.dependsOn(`xgboost4j-flink`, `xgboost4j-spark`)
.settings(settings ++ notToPublish)
.settings(
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-mllib" % "2.1.0" % Provided
)
)
*/

lazy val `xgboost4j-flink` =
project
.in(file("xgboost/jvm-packages/xgboost4j-flink"))
.dependsOn(xgboost4j % "compile;test->test")
.settings(settings ++ toPublish)
.settings(
libraryDependencies ++= Seq(
"org.apache.commons" % "commons-lang3" % "3.4",
"org.apache.flink" %% "flink-clients" % "0.10.2",
"org.apache.flink" %% "flink-ml" % "0.10.2",
"org.apache.flink" %% "flink-scala" % "0.10.2"
)
)

lazy val `xgboost4j-spark` =
project
.in(file("xgboost/jvm-packages/xgboost4j-spark"))
.dependsOn(xgboost4j % "compile;test->test")
.settings(settings ++ toPublish)
.settings(
parallelExecution in Test := false
)
.settings(
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-mllib" % "2.1.0" % Provided
)
)

lazy val settings =
Seq(
crossScalaVersions := Seq("2.11.8"),
javacOptions ++= Seq("-source", "1.7", "-target", "1.7"),
licenses := Seq("Apache-2.0" -> url("https://www.apache.org/licenses/LICENSE-2.0.txt")),
organization := "com.github.hirofumi",
scalaVersion := "2.11.8",
scalacOptions ++= Seq("-deprecation", "-encoding", "UTF-8", "-feature", "-target:jvm-1.7"),
testOptions += Tests.Argument(TestFrameworks.ScalaTest, "-oDF")
) ++ Seq(
baseDirectory in (Test, test) := (baseDirectory in ThisBuild).value / "xgboost" / "jvm-packages",
fork in (Test, test) := true
) ++ Seq(
libraryDependencies ++= Seq(
"com.esotericsoftware.kryo" % "kryo" % "2.21",
"commons-logging" % "commons-logging" % "1.2",
"org.scalatest" %% "scalatest" % "3.0.0" % Test
),
releaseProcess := Seq[ReleaseStep](
checkSnapshotDependencies,
inquireVersions,
runClean,
runTest,
setReleaseVersion,
commitReleaseVersion,
tagRelease,
releaseStepCommand("publishSigned"),
setNextVersion,
commitNextVersion,
releaseStepCommand("sonatypeReleaseAll"),
pushChanges
),
sonatypeProjectHosting := Some(
GithubHosting("hirofumi", "xgboost4j4s", "[email protected]")
)
)

lazy val notToPublish =
Seq(
publish := {},
publishArtifact := false,
publishLocal := {},
publishSigned := {}
)

lazy val toPublish =
Seq(
publishMavenStyle := true,
publishSignedConfiguration := publishSignedConfiguration.value.withOverwrite(isSnapshot.value),
publishTo := sonatypePublishTo.value
)

lazy val makeClean = taskKey[Int]("make clean")
lazy val makeDoc = taskKey[Int]("make doc")
lazy val makeJni = taskKey[Int]("make jni")
lazy val makeTest = taskKey[Int]("make test")

notToPublish
2 changes: 2 additions & 0 deletions docker-cache/.ivy2/cache/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
2 changes: 2 additions & 0 deletions docker-cache/.sbt/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*
!.gitignore
1 change: 1 addition & 0 deletions project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version=1.1.0
4 changes: 4 additions & 0 deletions project/build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
scalacOptions ++= Seq(
"-deprecation",
"-feature"
)
3 changes: 3 additions & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
addSbtPlugin("com.github.gseitz" % "sbt-release" % "1.0.7")
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.0")
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "2.1" )
1 change: 1 addition & 0 deletions version.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
version in ThisBuild := "0.7.0-SNAPSHOT"
1 change: 1 addition & 0 deletions xgboost
Submodule xgboost added at e5e4bd

0 comments on commit 6ba0811

Please sign in to comment.