diff --git a/.circleci/config.yml b/.circleci/config.yml index 0a6e8dc..2c7d0fa 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,7 +22,7 @@ jobs: command: | bash <(curl -s https://codecov.io/bash) - # The publish job only gets scheduled for commits to master; see workflows section below + # This publish job only runs for builds triggered by a git tag; see workflows section below. publish: docker: - image: mozilla/sbt:8u171_0.13.13 @@ -38,9 +38,12 @@ workflows: test-publish: jobs: - test + # Publish only runs on builds triggered by a new git tag of form vX.X.X - publish: requires: - test filters: branches: - only: master + ignore: /.*/ + tags: + only: /^v.*/ diff --git a/.gitignore b/.gitignore index 132642d..295b971 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,8 @@ target/ .idea/ .idea_modules/ .DS_Store -*.pyc \ No newline at end of file +*.pyc +venv/ +.tox/ +*.egg-info/ +.pytest_cache/ diff --git a/README.md b/README.md index f76d9b4..a7cf05b 100644 --- a/README.md +++ b/README.md @@ -44,5 +44,16 @@ yields: ``` ### Deployment -Any commits to master should also trigger a circleci build that will do the sbt publishing for you -to our local maven repo in s3 and to spark-packages.org. + +To publish a new version of the package, you need to +[create a new release on GitHub](https://github.com/mozilla/spark-hyperloglog/releases/new) +with a tag version starting with `v` like `v2.2.0`. The tag will trigger a CircleCI build +that publishes to Mozilla's maven repo in S3. + +The CircleCI build will also attempt to publish the new tag to spark-packages.org, +but due to +[an outstanding bug in the sbt-spark-package plugin](https://github.com/databricks/sbt-spark-package/issues/31) +that publish will likely fail. You can retry locally until is succeeds by creating a GitHub +personal access token and, exporting the environment variables `GITHUB_USERNAME` and +`GITHUB_PERSONAL_ACCESS_TOKEN`, and then repeatedly running `sbt spPublish` until you get a +non-404 response. diff --git a/VERSION b/VERSION deleted file mode 100644 index ccbccc3..0000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -2.2.0 diff --git a/build.sbt b/build.sbt index 37f308d..bc8b62c 100644 --- a/build.sbt +++ b/build.sbt @@ -1,6 +1,6 @@ name := "spark-hyperloglog" -version := scala.io.Source.fromFile("VERSION").mkString.stripLineEnd +version := sys.env.getOrElse("CIRCLE_TAG", "v2.2-SNAPSHOT").stripPrefix("v") scalaVersion := "2.11.8" @@ -25,6 +25,25 @@ credentials += Credentials( sys.env.getOrElse("GITHUB_USERNAME", ""), sys.env.getOrElse("GITHUB_PERSONAL_ACCESS_TOKEN", "")) + +// Include the contents of the python/ directory at the root of our packaged jar; +// `sbt spPublish` handles including python files for the zip sent to spark-packages.org, +// but we also want the python bindings to be present in the jar we upload to S3 maven +// via `sbt publish`. +val pythonBesidesPyspark = new SimpleFileFilter({ f => + val pythonDir = "/spark-hyperloglog/python" + val pyLibDir = pythonDir + "/pyspark_hyperloglog" + val p = f.getCanonicalPath + p match { + case _ if p.contains(pyLibDir) => false // Don't exclude contents of pyspark dir + case _ if p.contains(pythonDir + "/") => true // Exclude everything else under python/ + case _ => false // Don't exclude other files not under python/ + } +}) +unmanagedResourceDirectories in Compile += baseDirectory.value / "python" +excludeFilter in unmanagedResources := + HiddenFileFilter || pythonBesidesPyspark || "*.pyc" || "*.egg*" + publishMavenStyle := true publishTo := { diff --git a/python/VERSION b/python/VERSION deleted file mode 120000 index 6ff19de..0000000 --- a/python/VERSION +++ /dev/null @@ -1 +0,0 @@ -../VERSION \ No newline at end of file diff --git a/python/setup.py b/python/setup.py index a73ed67..ce6039b 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,11 +1,11 @@ from setuptools import setup +import os -with open('VERSION', 'r') as f: - VERSION = f.read().strip() +version = os.environ.get('CIRCLE_TAG', 'v2.2.snapshot').lstrip('v') setup( name='pyspark-hyperloglog', - version=VERSION.split('-')[0], + version=version, description='PySpark UDFs for HyperLogLog', keywords=['spark', 'udf', 'hyperloglog'], author='Anthony Miyaguchi',