Skip to content

Commit 9283aab

Browse files
authored
Switch to using setuptools to build tfdv.
* Update README.md * Update setup.py * Update RELEASE.md * Update install.md * Update BUILD * Update and rename build_pip_package.sh to move_generated_files.sh * Update build_manylinux.sh
1 parent 6044f27 commit 9283aab

File tree

7 files changed

+145
-113
lines changed

7 files changed

+145
-113
lines changed

README.md

+3-8
Original file line numberDiff line numberDiff line change
@@ -111,18 +111,13 @@ branch), pass `-b <branchname>` to the `git clone` command.
111111

112112
### 3. Build the pip package
113113

114-
TFDV uses Bazel to build the pip package from source. Before invoking the
115-
following commands, make sure the `python` in your `$PATH` is the one of the
116-
target version and has NumPy installed.
114+
`TFDV` wheel is Python version dependent -- to build the pip package that
115+
works for a specific Python version, use that Python binary to run:
117116

118117
```shell
119-
bazel run -c opt --cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0 tensorflow_data_validation:build_pip_package
118+
python setup.py bdist_wheel
120119
```
121120

122-
Note that we are assuming here that dependent packages (e.g. PyArrow) are built
123-
with a GCC older than 5.1 and use the flag `D_GLIBCXX_USE_CXX11_ABI=0` to be
124-
[compatible with the old std::string ABI](https://gcc.gnu.org/onlinedocs/libstdc++/manual/using_dual_abi.html).
125-
126121
You can find the generated `.whl` file in the `dist` subdirectory.
127122

128123
### 4. Install the pip package

g3doc/install.md

+5-6
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,14 @@ branch), pass `-b <branchname>` to the `git clone` command.
6464

6565
### 3. Build the pip package
6666

67-
Then, run the following at the project root:
67+
`TFDV` wheel is Python version dependent -- to build the pip package that
68+
works for a specific Python version, use that Python binary to run:
6869

69-
```bash
70-
sudo docker-compose build manylinux2010
71-
sudo docker-compose run -e PYTHON_VERSION=${PYTHON_VERSION} manylinux2010
70+
```shell
71+
python setup.py bdist_wheel
7272
```
73-
where `PYTHON_VERSION` is one of `{35, 36, 37}`.
7473

75-
A wheel will be produced under `dist/`.
74+
You can find the generated `.whl` file in the `dist` subdirectory.
7675

7776
### 4. Install the pip package
7877

setup.py

+75-4
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,84 @@
1313
# limitations under the License.
1414
"""Package Setup script for TensorFlow Data Validation."""
1515

16+
# pylint:disable=g-bad-import-order
17+
# setuptools must be imported prior to distutils.
18+
import setuptools
19+
from distutils import spawn
20+
from distutils.command import build
21+
# pylint:enable=g-bad-import-order
22+
import os
23+
import platform
24+
import subprocess
25+
1626
from setuptools import find_packages
1727
from setuptools import setup
1828
from setuptools.command.install import install
1929
from setuptools.dist import Distribution
2030

2131

32+
class _BuildCommand(build.build):
33+
"""Build everything that is needed to install.
34+
35+
This overrides the original distutils "build" command to to run bazel_build
36+
command before any sub_commands.
37+
38+
build command is also invoked from bdist_wheel and install command, therefore
39+
this implementation covers the following commands:
40+
- pip install . (which invokes bdist_wheel)
41+
- python setup.py install (which invokes install command)
42+
- python setup.py bdist_wheel (which invokes bdist_wheel command)
43+
"""
44+
45+
def _build_cc_extensions(self):
46+
return True
47+
48+
# Add "bazel_build" command as the first sub_command of "build". Each
49+
# sub_command of "build" (e.g. "build_py", "build_ext", etc.) is executed
50+
# sequentially when running a "build" command, if the second item in the tuple
51+
# (predicate method) is evaluated to true.
52+
sub_commands = [
53+
('bazel_build', _build_cc_extensions),
54+
] + build.build.sub_commands
55+
56+
57+
class _BazelBuildCommand(setuptools.Command):
58+
"""Build TFDV C++ extensions and public protos with Bazel.
59+
60+
Running this command will populate foo_pb2.py file next to your foo.proto
61+
file.
62+
"""
63+
64+
def initialize_options(self):
65+
pass
66+
67+
def finalize_options(self):
68+
self._bazel_cmd = spawn.find_executable('bazel')
69+
if not self._bazel_cmd:
70+
raise RuntimeError(
71+
'Could not find "bazel" binary. Please visit '
72+
'https://docs.bazel.build/versions/master/install.html for '
73+
'installation instruction.')
74+
self._additional_build_options = []
75+
if platform.system() == 'Darwin':
76+
self._additional_build_options = ['--macos_minimum_os=10.9']
77+
elif platform.system() == 'Windows':
78+
self._additional_build_options = ['--copt=-DWIN32_LEAN_AND_MEAN']
79+
80+
def run(self):
81+
subprocess.check_call(
82+
[self._bazel_cmd, 'run', '-c', 'opt'] + self._additional_build_options +
83+
['//tensorflow_data_validation:move_generated_files'],
84+
# Bazel should be invoked in a directory containing bazel WORKSPACE
85+
# file, which is the root directory.
86+
cwd=os.path.dirname(os.path.realpath(__file__)),
87+
)
88+
89+
2290
# TFDV is not a purelib. However because of the extension module is not built
2391
# by setuptools, it will be incorrectly treated as a purelib. The following
2492
# works around that bug.
25-
class _InstallPlatlib(install):
93+
class _InstallPlatlibCommand(install):
2694

2795
def finalize_options(self):
2896
install.finalize_options(self)
@@ -57,7 +125,7 @@ def _make_all_extra_requirements():
57125
# Get version from version module.
58126
with open('tensorflow_data_validation/version.py') as fp:
59127
globals_dict = {}
60-
exec (fp.read(), globals_dict) # pylint: disable=exec-used
128+
exec(fp.read(), globals_dict) # pylint: disable=exec-used
61129
__version__ = globals_dict['__version__']
62130

63131
# Get the long description from the README file.
@@ -81,7 +149,6 @@ def _make_all_extra_requirements():
81149
'Operating System :: Microsoft :: Windows',
82150
'Programming Language :: Python',
83151
'Programming Language :: Python :: 3',
84-
'Programming Language :: Python :: 3.5',
85152
'Programming Language :: Python :: 3.6',
86153
'Programming Language :: Python :: 3.7',
87154
'Programming Language :: Python :: 3 :: Only',
@@ -129,5 +196,9 @@ def _make_all_extra_requirements():
129196
url='https://www.tensorflow.org/tfx/data_validation',
130197
download_url='https://github.com/tensorflow/data-validation/tags',
131198
requires=[],
132-
cmdclass={'install': _InstallPlatlib})
199+
cmdclass={
200+
'install': _InstallPlatlibCommand,
201+
'build': _BuildCommand,
202+
'bazel_build': _BazelBuildCommand,
203+
})
133204

tensorflow_data_validation/BUILD

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ config_setting(
88
)
99

1010
sh_binary(
11-
name = "build_pip_package",
12-
srcs = ["build_pip_package.sh"],
11+
name = "move_generated_files",
12+
srcs = ["move_generated_files.sh"],
1313
data = select({
1414
":windows": [
1515
"//tensorflow_data_validation/anomalies/proto:validation_config_pb2.py",

tensorflow_data_validation/build_pip_package.sh

-81
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
#!/bin/bash
2+
# Copyright 2020 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# Moves the bazel generated files needed for packaging the wheel to the source
17+
# tree.
18+
19+
function _is_windows() {
20+
[[ "$(uname -s | tr 'A-Z' 'a-z')" =~ (cygwin|mingw32|mingw64|msys)_nt* ]]
21+
}
22+
23+
function tfdv::move_generated_files() {
24+
if _is_windows; then
25+
# Newer bazel does not create bazel-genfiles any more (
26+
# https://github.com/bazelbuild/bazel/issues/6761). It's merged with bazel-bin
27+
GENFILES=bazel-genfiles
28+
if [[ ! -d ${BUILD_WORKSPACE_DIRECTORY}/${GENFILES} ]]; then
29+
GENFILES=bazel-bin
30+
fi
31+
PYWRAP_TFDV="tensorflow_data_validation/pywrap/tensorflow_data_validation_extension.pyd"
32+
cp -f "${BUILD_WORKSPACE_DIRECTORY}/bazel-out/x64_windows-opt/bin/${PYWRAP_TFDV}" \
33+
"${BUILD_WORKSPACE_DIRECTORY}/${PYWRAP_TFDV}"
34+
35+
cp -f ${BUILD_WORKSPACE_DIRECTORY}/${GENFILES}/tensorflow_data_validation/anomalies/proto/validation_config_pb2.py \
36+
${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/anomalies/proto
37+
cp -f ${BUILD_WORKSPACE_DIRECTORY}/${GENFILES}/tensorflow_data_validation/anomalies/proto/validation_metadata_pb2.py \
38+
${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/anomalies/proto
39+
else
40+
PYWRAP_TFDV="tensorflow_data_validation/pywrap/tensorflow_data_validation_extension.so"
41+
cp -f "${BUILD_WORKSPACE_DIRECTORY}/bazel-bin/${PYWRAP_TFDV}" \
42+
"${BUILD_WORKSPACE_DIRECTORY}/${PYWRAP_TFDV}"
43+
44+
# If run by "bazel run", $(pwd) is the .runfiles dir that contains all the
45+
# data dependencies.
46+
RUNFILES_DIR=$(pwd)
47+
cp -f ${RUNFILES_DIR}/tensorflow_data_validation/anomalies/proto/validation_config_pb2.py \
48+
${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/anomalies/proto
49+
cp -f ${RUNFILES_DIR}/tensorflow_data_validation/anomalies/proto/validation_metadata_pb2.py \
50+
${BUILD_WORKSPACE_DIRECTORY}/tensorflow_data_validation/anomalies/proto
51+
fi
52+
chmod +w "${BUILD_WORKSPACE_DIRECTORY}/${PYWRAP_TFDV}"
53+
}
54+
55+
tfdv::move_generated_files

tensorflow_data_validation/tools/docker_build/build_manylinux.sh

+5-12
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,9 @@ function install_numpy() {
5555
${PIP_BIN} install "numpy>=1.16,<2"
5656
}
5757

58-
function bazel_build() {
58+
function build_wheel() {
5959
rm -rf dist
60-
bazel run -c opt \
61-
--cxxopt=-D_GLIBCXX_USE_CXX11_ABI=0 \
62-
tensorflow_data_validation:build_pip_package \
63-
--\
64-
--python_bin_path "${PYTHON_BIN_PATH}"
60+
"${PYTHON_BIN_PATH}" setup.py bdist_wheel
6561
}
6662

6763
function stamp_wheel() {
@@ -72,11 +68,8 @@ function stamp_wheel() {
7268
rm "${WHEEL_PATH}"
7369
}
7470

75-
setup_environment
76-
set -e
7771
set -x
78-
install_numpy
79-
bazel_build
72+
setup_environment && \
73+
install_numpy && \
74+
build_wheel && \
8075
stamp_wheel
81-
set +e
82-
set +x

0 commit comments

Comments
 (0)