From 1e75c69339da2fbf2c5c5fbeb891243badae7ff8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 15 Mar 2018 15:44:56 -0700 Subject: [PATCH] Automated g4 rollback of changelist 189231636 PiperOrigin-RevId: 189258641 --- README.md | 4 - SECURITY.md | 14 +- configure.py | 7 +- .../xla/service/generic_transfer_manager.cc | 9 +- .../compiler/xla/tests/convolution_test.cc | 2 +- tensorflow/contrib/BUILD | 10 +- tensorflow/contrib/__init__.py | 6 +- .../boosted_trees/lib/utils/batch_features.h | 6 +- tensorflow/contrib/cmake/README.md | 12 +- tensorflow/contrib/cmake/external/grpc.cmake | 1 - .../contrib/cmake/external/protobuf.cmake | 2 +- tensorflow/contrib/cmake/tf_tests.cmake | 4 - tensorflow/contrib/data/__init__.py | 4 - .../contrib/data/python/kernel_tests/BUILD | 17 - .../kernel_tests/slide_dataset_op_test.py | 242 --- tensorflow/contrib/data/python/ops/BUILD | 1 - tensorflow/contrib/data/python/ops/sliding.py | 102 -- tensorflow/contrib/distributions/BUILD | 2 - tensorflow/contrib/eager/python/BUILD | 5 +- .../python/examples/linear_regression/BUILD | 1 - tensorflow/contrib/factorization/BUILD | 5 +- .../contrib/ffmpeg/default/ffmpeg_lib.cc | 9 +- tensorflow/contrib/gan/BUILD | 1 - tensorflow/contrib/kafka/BUILD | 108 +- .../kafka/kernels/kafka_dataset_ops.cc | 4 +- tensorflow/contrib/kafka/ops/dataset_ops.cc | 44 - .../kafka/python/ops/kafka_dataset_ops.py | 9 +- .../kafka/python/ops/kafka_op_loader.py | 24 - .../contrib/kfac/python/kernel_tests/BUILD | 1 - tensorflow/contrib/labeled_tensor/BUILD | 1 - tensorflow/contrib/layers/BUILD | 2 - .../layers/python/layers/embedding_ops.py | 2 +- tensorflow/contrib/learn/BUILD | 12 +- .../learn/python/learn/ops/embeddings_ops.py | 2 +- tensorflow/contrib/lite/Makefile | 9 +- tensorflow/contrib/lite/arena_planner.h | 2 +- tensorflow/contrib/lite/build_rpi_lib.sh | 22 - tensorflow/contrib/lite/builtin_ops.h | 2 +- tensorflow/contrib/lite/error_reporter.h | 2 +- tensorflow/contrib/lite/g3doc/rpi.md | 50 - tensorflow/contrib/lite/interpreter.h | 2 +- tensorflow/contrib/lite/interpreter_test.cc | 2 +- tensorflow/contrib/lite/kernels/conv.cc | 2 +- .../contrib/lite/kernels/depthwise_conv.cc | 2 +- .../contrib/lite/kernels/fully_connected.cc | 2 +- tensorflow/contrib/lite/kernels/kernel_util.h | 2 +- .../contrib/lite/kernels/lsh_projection.cc | 2 +- tensorflow/contrib/lite/kernels/lstm.cc | 6 +- tensorflow/contrib/lite/kernels/reshape.cc | 12 +- .../contrib/lite/kernels/reshape_test.cc | 2 +- tensorflow/contrib/lite/kernels/test_util.cc | 4 +- .../kernels/unidirectional_sequence_lstm.cc | 2 +- tensorflow/contrib/lite/memory_planner.h | 4 +- tensorflow/contrib/lite/model.h | 2 +- .../contrib/lite/nnapi/NeuralNetworksShim.h | 2 +- tensorflow/contrib/lite/rpi_makefile.inc | 33 - .../schema/builtin_ops_header/generator.cc | 2 +- .../contrib/lite/simple_memory_arena.cc | 6 +- tensorflow/contrib/lite/simple_memory_arena.h | 6 +- tensorflow/contrib/lookup/BUILD | 1 - tensorflow/contrib/makefile/README.md | 2 - tensorflow/contrib/makefile/build_all_ios.sh | 3 +- tensorflow/contrib/mpi/mpi_utils.h | 2 - .../contrib/predictor/predictor_factories.py | 4 +- tensorflow/contrib/py2tf/converters/BUILD | 2 - .../contrib/py2tf/converters/single_return.py | 2 +- tensorflow/contrib/py2tf/utils/BUILD | 1 - .../quantize/python/fold_batch_norms.py | 4 +- .../contrib/quantize/python/quant_ops.py | 4 +- .../contrib/quantize/python/quantize.py | 2 +- .../contrib/quantize/python/quantize_graph.py | 2 +- .../python/quantize_parameterized_test.py | 8 +- .../contrib/quantize/python/quantize_test.py | 2 +- .../contrib/remote_fused_graph/pylib/BUILD | 1 + tensorflow/contrib/rnn/python/ops/rnn_cell.py | 6 +- tensorflow/contrib/saved_model/BUILD | 1 - .../seq2seq/python/ops/beam_search_decoder.py | 7 +- tensorflow/contrib/session_bundle/BUILD | 1 - .../contrib/slim/python/slim/data/BUILD | 1 - tensorflow/contrib/tensor_forest/BUILD | 1 + tensorflow/contrib/tensorboard/BUILD | 1 - tensorflow/contrib/tensorrt/BUILD | 2 - tensorflow/contrib/tensorrt/README.md | 23 +- tensorflow/contrib/tensorrt/__init__.py | 18 +- .../contrib/tensorrt/convert/convert_graph.cc | 256 +-- .../contrib/tensorrt/convert/convert_graph.h | 8 +- .../contrib/tensorrt/convert/convert_nodes.cc | 1469 +++-------------- .../contrib/tensorrt/convert/convert_nodes.h | 53 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 11 +- .../contrib/tensorrt/kernels/trt_engine_op.cc | 39 +- tensorflow/contrib/tensorrt/log/trt_logger.cc | 8 +- tensorflow/contrib/tensorrt/log/trt_logger.h | 4 +- .../contrib/tensorrt/python/__init__.py | 1 - .../contrib/tensorrt/python/trt_convert.py | 68 +- .../tensorrt/resources/trt_int8_calibrator.cc | 56 +- .../tensorrt/resources/trt_int8_calibrator.h | 15 +- .../contrib/tensorrt/test/test_tftrt.py | 57 +- tensorflow/contrib/tensorrt/trt_conversion.i | 63 +- tensorflow/contrib/timeseries/examples/BUILD | 5 +- .../timeseries/python/timeseries/BUILD | 5 +- .../timeseries/state_space_models/BUILD | 1 - tensorflow/contrib/tpu/BUILD | 1 - tensorflow/contrib/util/loader.py | 7 +- tensorflow/core/BUILD | 4 - .../base_api/api_def_SlideDataset.pbtxt | 18 - .../core/distributed_runtime/tensor_coding.cc | 4 +- tensorflow/core/framework/dataset.h | 4 +- tensorflow/core/graph/mkl_layout_pass.cc | 31 +- .../grappler/optimizers/loop_optimizer.cc | 8 +- tensorflow/core/kernels/BUILD | 12 +- tensorflow/core/kernels/data/BUILD | 14 - .../core/kernels/data/slide_dataset_op.cc | 252 --- tensorflow/core/kernels/depthtospace_op.cc | 3 - .../core/kernels/depthtospace_op_gpu.cu.cc | 6 - tensorflow/core/kernels/hexagon/BUILD | 1 - .../core/kernels/mkl_conv_grad_filter_ops.cc | 81 +- .../core/kernels/mkl_conv_grad_input_ops.cc | 18 +- tensorflow/core/kernels/mkl_conv_ops.cc | 146 +- tensorflow/core/kernels/mkl_conv_ops.h | 117 +- .../core/kernels/mkl_input_conversion_op.cc | 7 +- tensorflow/core/kernels/mkl_relu_op.cc | 23 +- .../core/kernels/segment_reduction_ops.h | 14 +- tensorflow/core/kernels/spacetodepth_op.cc | 3 - .../core/kernels/spacetodepth_op_gpu.cu.cc | 6 - tensorflow/core/lib/core/stringpiece.cc | 2 + tensorflow/core/lib/core/stringpiece.h | 2 +- tensorflow/core/lib/io/record_reader.cc | 2 - tensorflow/core/lib/io/record_reader.h | 4 +- tensorflow/core/ops/dataset_ops.cc | 12 +- tensorflow/core/ops/nn_ops.cc | 8 - tensorflow/core/platform/tracing.h | 2 +- .../platform/windows/windows_file_system.cc | 3 +- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/community/welcome.md | 4 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +- tensorflow/docs_src/install/install_linux.md | 56 +- tensorflow/docs_src/install/install_mac.md | 23 +- .../docs_src/install/install_sources.md | 4 +- .../docs_src/install/install_windows.md | 5 +- tensorflow/docs_src/performance/xla/jit.md | 2 +- .../docs_src/programmers_guide/debugger.md | 3 +- tensorflow/docs_src/programmers_guide/faq.md | 4 +- .../summaries_and_tensorboard.md | 2 +- .../docs_src/programmers_guide/using_tpu.md | 7 +- tensorflow/docs_src/tutorials/layers.md | 12 +- .../docs_src/tutorials/recurrent_quickdraw.md | 3 +- tensorflow/docs_src/tutorials/wide.md | 16 +- .../examples/android/AndroidManifest.xml | 4 - .../org/tensorflow/demo/CameraActivity.java | 7 +- .../org/tensorflow/demo/StylizeActivity.java | 60 - tensorflow/python/BUILD | 84 +- tensorflow/python/debug/BUILD | 1 - tensorflow/python/estimator/estimator.py | 32 +- tensorflow/python/estimator/training.py | 26 +- tensorflow/python/keras/BUILD | 5 +- tensorflow/python/kernel_tests/BUILD | 4 + .../python/kernel_tests/conv_ops_test.py | 20 +- .../kernel_tests/depthtospace_op_test.py | 10 +- .../kernel_tests/spacetodepth_op_test.py | 10 +- tensorflow/python/lib/io/file_io_test.py | 5 - tensorflow/python/ops/nn_ops.py | 2 +- tensorflow/python/ops/rnn.py | 17 +- tensorflow/python/ops/special_math_ops.py | 4 +- .../python/ops/special_math_ops_test.py | 5 - tensorflow/python/tools/freeze_graph.py | 36 +- tensorflow/python/tools/saved_model_cli.py | 60 - .../python/tools/saved_model_cli_test.py | 22 - tensorflow/stream_executor/cuda/cuda_dnn.cc | 33 +- tensorflow/tensorflow.bzl | 18 +- .../tools/api/tests/api_compatibility_test.py | 2 - tensorflow/tools/ci_build/Dockerfile.cmake | 5 +- .../windows/cpu/pip/build_tf_windows.sh | 3 +- tensorflow/tools/def_file_filter/BUILD | 0 tensorflow/tools/def_file_filter/BUILD.tpl | 15 - .../def_file_filter/def_file_filter.py.tpl | 168 -- .../def_file_filter_configure.bzl | 56 - tensorflow/tools/dist_test/README.md | 8 - tensorflow/tools/dist_test/local_test.sh | 22 +- tensorflow/tools/git/gen_git_source.py | 7 - tensorflow/tools/graph_transforms/BUILD | 1 - .../graph_transforms/fold_old_batch_norms.cc | 67 - .../fold_old_batch_norms_test.cc | 95 -- tensorflow/tools/pip_package/BUILD | 129 +- tensorflow/tools/pip_package/setup.py | 4 +- .../tools/test/upload_test_benchmarks.py | 9 +- tensorflow/workspace.bzl | 8 +- third_party/jpeg/jpeg.BUILD | 4 +- third_party/kafka/BUILD | 13 +- third_party/py/BUILD.tpl | 22 +- 191 files changed, 920 insertions(+), 4244 deletions(-) delete mode 100644 tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py delete mode 100644 tensorflow/contrib/data/python/ops/sliding.py delete mode 100644 tensorflow/contrib/kafka/ops/dataset_ops.cc delete mode 100644 tensorflow/contrib/kafka/python/ops/kafka_op_loader.py delete mode 100755 tensorflow/contrib/lite/build_rpi_lib.sh delete mode 100644 tensorflow/contrib/lite/g3doc/rpi.md delete mode 100644 tensorflow/contrib/lite/rpi_makefile.inc delete mode 100644 tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt delete mode 100644 tensorflow/core/kernels/data/slide_dataset_op.cc delete mode 100644 tensorflow/tools/def_file_filter/BUILD delete mode 100644 tensorflow/tools/def_file_filter/BUILD.tpl delete mode 100644 tensorflow/tools/def_file_filter/def_file_filter.py.tpl delete mode 100644 tensorflow/tools/def_file_filter/def_file_filter_configure.bzl diff --git a/README.md b/README.md index 3cdb6e478ddf4f..ef5bdc66ef0313 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,6 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. -Keep up to date with release announcements and security updates by -subscribing to -[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). - ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* diff --git a/SECURITY.md b/SECURITY.md index 2aaa9202d52800..fea24b27392088 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -6,7 +6,7 @@ report vulnerabilities in TensorFlow. ## TensorFlow models are programs -TensorFlow's runtime system interprets and executes programs. What machine +TensorFlow's runtime system interprets and executes programs. What machine learning practitioners term [**models**](https://developers.google.com/machine-learning/glossary/#model) are expressed as programs that TensorFlow executes. TensorFlow programs are encoded @@ -28,12 +28,12 @@ data you supply to TensorFlow to train a model, or to use a model to run inference on the data. **TensorFlow models are programs, and need to be treated as such from a security -perspective.** +perspective.** ## Running untrusted models As a general rule: **Always** execute untrusted models inside a sandbox (e.g., -[nsjail](https://github.com/google/nsjail)). +[nsjail](https://github.com/google/nsjail)). There are several ways in which a model could become untrusted. Obviously, if an untrusted party supplies TensorFlow kernels, arbitrary code may be executed. @@ -109,11 +109,11 @@ graphs known to the `ModelServer`. This means that an attacker may run graphs using untrusted inputs as described above, but they would not be able to execute arbitrary graphs. It is possible to safely expose a `ModelServer` directly to an untrusted network, **but only if the graphs it is configured to -use have been carefully audited to be safe**. +use have been carefully audited to be safe**. Similar to best practices for other servers, we recommend running any `ModelServer` with appropriate privileges (i.e., using a separate user with -reduced permissions). In the spirit of defense in depth, we recommend +reduced permisisons). In the spirit of defense in depth, we recommend authenticating requests to any TensorFlow server connected to an untrusted network, as well as sandboxing the server to minimize the adverse effects of any breach. @@ -133,7 +133,7 @@ which exhibit unexpected or unwanted behaviors. The fact that TensorFlow models can perform arbitrary computations means that they may read and write files, communicate via the network, produce deadlocks and infinite loops, or run out of memory. It is only when these behaviors are outside the specifications of the -operations involved that such behavior is a vulnerability. +operations involved that such behavior is a vulnerability. A `FileWriter` writing a file is not unexpected behavior and therefore is not a vulnerability in TensorFlow. A `MatMul` allowing arbitrary binary code execution @@ -168,7 +168,7 @@ below). Please use a descriptive subject line for your report email. After the initial reply to your report, the security team will endeavor to keep you informed of -the progress being made towards a fix and announcement. +the progress being made towards a fix and announcement. If you believe that an existing (public) issue is security-related, please send an email to `security@tensorflow.org`. The email should include the issue ID and diff --git a/configure.py b/configure.py index d14edef1be9e31..97f46757ee241b 100644 --- a/configure.py +++ b/configure.py @@ -1048,10 +1048,7 @@ def is_compatible(tensorrt_lib, cuda_ver, cudnn_ver): for lib_file in possible_files: if is_compatible(lib_file, cuda_ver, cudnn_ver): - matches = nvinfer_pattern.search(lib_file) - if len(matches.groups()) == 0: - continue - ver_str = matches.group(1) + ver_str = nvinfer_pattern.search(lib_file).group(1) ver = convert_version_to_int(ver_str) if len(ver_str) else 0 if ver > highest_ver[0]: highest_ver = [ver, ver_str, lib_file] @@ -1380,7 +1377,7 @@ def main(): # environment variables. environ_cp = dict(os.environ) - check_bazel_version('0.10.0') + check_bazel_version('0.5.4') reset_tf_configure_bazelrc(args.workspace) cleanup_makefile() diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc index a99e2b7794a399..78dc0ad4fcd167 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc @@ -38,7 +38,14 @@ namespace xla { GenericTransferManager::GenericTransferManager(se::Platform::Id platform_id, size_t pointer_size) - : platform_id_(platform_id), pointer_size_(pointer_size) {} + : platform_id_(platform_id), pointer_size_(pointer_size) { + // We currently only support kHostPlatformId for CPU, kCudaPlatformId for + // GPU and kInterpreterPlatformId for Interpreter. Before supporting other + // platforms, we need to test this transfer manager on them. + CHECK(platform_id_ == se::host::kHostPlatformId || + platform_id_ == se::interpreter::kInterpreterPlatformId || + platform_id_ == se::cuda::kCudaPlatformId); +} se::Platform::Id GenericTransferManager::PlatformId() const { return platform_id_; diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 72715398dea468..99640f5bb561a4 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -723,7 +723,7 @@ INSTANTIATE_TEST_CASE_P( ); #endif -XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { +TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); Shape input_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); Shape filter_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 986b61b3eaba79..bab37e8906e5c6 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -8,7 +8,6 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") -load("//tensorflow:tensorflow.bzl", "if_not_windows") py_library( name = "contrib_py", @@ -52,6 +51,7 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", + "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -63,6 +63,7 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", + "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", @@ -109,10 +110,6 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", - ]) + if_not_windows([ - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code - "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code - "//tensorflow/contrib/kafka", # has some linking issue on opensssl. ]), ) @@ -124,7 +121,6 @@ cc_library( "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", - "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -151,7 +147,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", - "//tensorflow/contrib/kafka:dataset_ops_op_lib", + "//tensorflow/contrib/kafka:kafka_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 669d611b01b585..4f6f539027b040 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -18,8 +18,6 @@ from __future__ import division from __future__ import print_function -import os - # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import batching from tensorflow.contrib import bayesflow @@ -85,8 +83,7 @@ from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager -if os.name != 'nt': - from tensorflow.contrib.lite.python import lite +from tensorflow.contrib.lite.python import lite from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs @@ -95,7 +92,6 @@ from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") -del os del LazyLoader del absolute_import diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index 7815fa049aa165..da5e7448519cb7 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -48,9 +48,9 @@ class BatchFeatures { Status GetFeatureColumnSizes(int64* const num_dense_float_features, int64* const num_sparse_float_features, int64* const num_sparse_int_features) const { - QCHECK_NE(num_dense_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_float_features, (int64*) nullptr); - QCHECK_NE(num_sparse_int_features, (int64*) nullptr); + QCHECK_NE(num_dense_float_features, nullptr); + QCHECK_NE(num_sparse_float_features, nullptr); + QCHECK_NE(num_sparse_int_features, nullptr); *num_dense_float_features = dense_float_feature_columns_.size(); *num_sparse_float_features = sparse_float_feature_columns_.size(); *num_sparse_int_features = sparse_int_feature_columns_.size(); diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index fe83bb32046cd7..8f85a75ee466db 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -26,7 +26,7 @@ The CMake files in this directory can build the core TensorFlow runtime, an example C++ binary, and a PIP package containing the runtime and Python bindings. -### Prerequisites +### Pre-requisites * CMake version 3.5 or later. @@ -34,16 +34,14 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* Additional prerequisites for Microsoft Windows: +* Additional pre-requisites for Microsoft Windows: - Visual Studio 2015 - Python 3.5 + - NumPy 1.11.0 or later -* Additional prerequisites for Linux: +* Additional pre-requisites for Linux: - Python 2.7 or later - [Docker](https://www.docker.com/) (for automated testing) - -* Python dependencies: - - wheel - NumPy 1.11.0 or later ### Known-good configurations @@ -104,7 +102,7 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the prerequisites detailed above, and set up your environment. +1. Install the pre-requisites detailed above, and set up your environment. * The following commands assume that you are using the Windows Command Prompt (`cmd.exe`). You will need to set up your environment to use the diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index 17f65999faaf5c..a9f43a3ecba483 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -35,7 +35,6 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a - ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index ab464bc99a4313..aba8a5244e17d7 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) +set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") diff --git a/tensorflow/contrib/cmake/tf_tests.cmake b/tensorflow/contrib/cmake/tf_tests.cmake index b3e5b30826097d..9f96a4b797508d 100644 --- a/tensorflow/contrib/cmake/tf_tests.cmake +++ b/tensorflow/contrib/cmake/tf_tests.cmake @@ -476,10 +476,6 @@ if (tensorflow_BUILD_CC_TESTS) "${tensorflow_source_dir}/tensorflow/core/profiler/internal/advisor/*_test.cc" ) - list(REMOVE_ITEM tf_test_src_simple - ${tf_core_profiler_test_srcs} - ) - set(tf_test_lib tf_test_lib) add_library(${tf_test_lib} STATIC ${tf_src_testlib}) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 9212b69700941c..f09d156832208b 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -40,7 +40,6 @@ @@rejection_resample @@scan @@shuffle_and_repeat -@@sliding_window_batch @@sloppy_interleave @@unbatch @@ -73,9 +72,6 @@ from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat -from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch -from tensorflow.python.data.ops.iterator_ops import Iterator -from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 2c4d4adfdad6d2..22418b38e3872f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -498,23 +498,6 @@ py_test( ], ) -tf_py_test( - name = "slide_dataset_op_test", - size = "small", - srcs = ["slide_dataset_op_test.py"], - additional_deps = [ - "//tensorflow/contrib/data/python/ops:dataset_ops", - "//tensorflow/contrib/data/python/ops:transformation_ops", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:errors", - "//tensorflow/python:math_ops", - "//tensorflow/python:sparse_tensor", - "//third_party/py/numpy", - ], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py deleted file mode 100644 index 33c48e20bea53b..00000000000000 --- a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py +++ /dev/null @@ -1,242 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for the experimental input pipeline ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.data.python.ops import sliding -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import sparse_tensor -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.platform import test - - -class SlideDatasetTest(test.TestCase): - - def testSlideDataset(self): - """Test an dataset that maps a TF function across its input elements.""" - components = (np.arange(7), - np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], - np.array(37.0) * np.arange(7)) - - count = array_ops.placeholder(dtypes.int64, shape=[]) - window_size = array_ops.placeholder(dtypes.int64, shape=[]) - stride = array_ops.placeholder(dtypes.int64, shape=[]) - - def _map_fn(x, y, z): - return math_ops.square(x), math_ops.square(y), math_ops.square(z) - - # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> - # RepeatDataset(count) -> _SlideDataset(window_size, stride). - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(_map_fn) - .repeat(count) - .apply(sliding.sliding_window_batch(window_size, stride)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - self.assertEqual([[None] + list(c.shape[1:]) for c in components], - [t.shape.as_list() for t in get_next]) - - with self.test_session() as sess: - # Slide over a finite input, where the window_size divides the - # total number of elements. - sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7}) - # Same formula with convolution layer. - num_batches = (20 * 7 - 14) // 7 + 1 - for i in range(num_batches): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(14): - self.assertAllEqual(component[(i*7 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over a finite input, where the window_size does not - # divide the total number of elements. - sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9}) - - num_batches = (20 * 7 - 17) // 9 + 1 - for i in range(num_batches): - result = sess.run(get_next) - for component, result_component in zip(components, result): - for j in range(17): - self.assertAllEqual(component[(i*9 + j) % 7]**2, - result_component[j]) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over a finite input, which is less than window_size, - # should fail straight away. - sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 8}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Slide over an empty input should fail straight away. - sess.run(init_op, feed_dict={count: 0, window_size: 8, stride: 4}) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - # Empty window_size should be an initialization time error. - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 0, stride: 0}) - - # Invalid stride should be an initialization time error. - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3}) - with self.assertRaises(errors.InvalidArgumentError): - sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5}) - - def assertSparseValuesEqual(self, a, b): - self.assertAllEqual(a.indices, b.indices) - self.assertAllEqual(a.values, b.values) - self.assertAllEqual(a.dense_shape, b.dense_shape) - - def testSlideSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( - sliding.sliding_window_batch(5, 3)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - num_batches = (10 - 5) // 3 + 1 - for i in range(num_batches): - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], - values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4], - dense_shape=[5, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSlideSparseWithDifferentDenseShapes(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=array_ops.expand_dims( - math_ops.range(i, dtype=dtypes.int64), 1), - values=array_ops.fill([math_ops.to_int32(i)], i), - dense_shape=[i]) - - iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( - sliding.sliding_window_batch(5, 3)).make_initializable_iterator() - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - num_batches = (10 - 5) // 3 + 1 - for i in range(num_batches): - actual = sess.run(get_next) - expected_indices = [] - expected_values = [] - for j in range(5): - for k in range(i * 3 + j): - expected_indices.append([j, k]) - expected_values.append(i * 3 + j) - expected = sparse_tensor.SparseTensorValue( - indices=expected_indices, - values=expected_values, - dense_shape=[5, i * 3 + 5 - 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testNestedSlideSparse(self): - - def _sparse(i): - return sparse_tensor.SparseTensorValue( - indices=[[0]], values=(i * [1]), dense_shape=[1]) - - iterator = (dataset_ops.Dataset.range(10) - .map(_sparse) - .apply(sliding.sliding_window_batch(4, 2)) - .apply(sliding.sliding_window_batch(3, 1)) - .make_initializable_iterator()) - init_op = iterator.initializer - get_next = iterator.get_next() - - with self.test_session() as sess: - sess.run(init_op) - # Slide: 1st batch. - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], - [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], - [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], - values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7], - dense_shape=[3, 4, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - # Slide: 2nd batch. - actual = sess.run(get_next) - expected = sparse_tensor.SparseTensorValue( - indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], - [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], - [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], - values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9], - dense_shape=[3, 4, 1]) - self.assertTrue(sparse_tensor.is_sparse(actual)) - self.assertSparseValuesEqual(actual, expected) - with self.assertRaises(errors.OutOfRangeError): - sess.run(get_next) - - def testSlideShapeError(self): - - def generator(): - yield [1.0, 2.0, 3.0] - yield [4.0, 5.0, 6.0] - yield [7.0, 8.0, 9.0, 10.0] - - iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32, - output_shapes=[None]) - .apply(sliding.sliding_window_batch(3, 1)) - .make_initializable_iterator()) - next_element = iterator.get_next() - - with self.test_session() as sess: - sess.run(iterator.initializer) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r"Cannot batch tensors with different shapes in component 0. " - r"First element had shape \[3\] and element 2 had shape \[4\]."): - sess.run(next_element) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index c3331e963602d6..f03430c5c5aaa9 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -106,7 +106,6 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", - "sliding.py", "stats_ops.py", "threadpool.py", "unique.py", diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py deleted file mode 100644 index 19cc3cb89fc5c4..00000000000000 --- a/tensorflow/contrib/data/python/ops/sliding.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Sliding dataset transformations.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import gen_dataset_ops - - -class _SlideDataset(dataset_ops.Dataset): - """A `Dataset` that passes a sliding window over its input.""" - - def __init__(self, input_dataset, window_size, stride=1): - """See `sliding_window_batch` for details.""" - super(_SlideDataset, self).__init__() - self._input_dataset = input_dataset - self._window_size = ops.convert_to_tensor( - window_size, dtype=dtypes.int64, name="window_size") - self._stride = ops.convert_to_tensor( - stride, dtype=dtypes.int64, name="stride") - - def _as_variant_tensor(self): - return gen_dataset_ops.slide_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - window_size=self._window_size, - stride=self._stride, - output_shapes=nest.flatten( - sparse.as_dense_shapes(self.output_shapes, self.output_classes)), - output_types=nest.flatten( - sparse.as_dense_types(self.output_types, self.output_classes))) - - @property - def output_classes(self): - return self._input_dataset.output_classes - - @property - def output_shapes(self): - input_shapes = self._input_dataset.output_shapes - return nest.pack_sequence_as(input_shapes, [ - tensor_shape.vector(None).concatenate(s) - for s in nest.flatten(self._input_dataset.output_shapes) - ]) - - @property - def output_types(self): - return self._input_dataset.output_types - - -def sliding_window_batch(window_size, stride=1): - """A sliding window with size of `window_size` and step of `stride`. - - This transformation passes a sliding window over this dataset. The - window size is `window_size` and step size is `stride`. If the left - elements cannot fill up the sliding window, this transformation will - drop the final smaller element. For example: - - ```python - # NOTE: The following examples use `{ ... }` to represent the - # contents of a dataset. - a = { [1], [2], [3], [4], [5], [6] } - - a.apply(tf.contrib.data.sliding_window_batch(window_size=3, stride=2)) == - { - [[1], [2], [3]], - [[3], [4], [5]], - } - ``` - - Args: - window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of - elements in the sliding window. - stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the - steps moving the sliding window forward for one iteration. The default - is `1`. It must be in `[1, window_size)`. - - Returns: - A `Dataset` transformation function, which can be passed to - @{tf.data.Dataset.apply}. - """ - def _apply_fn(dataset): - return _SlideDataset(dataset, window_size, stride) - - return _apply_fn diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 1bd73ee7044de3..6bd3f5f09b1a65 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -454,7 +454,6 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -1144,7 +1143,6 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index eb810e06ddba51..32aa2c0a4a6063 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -267,10 +267,7 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD index 2f6cfdf31e852d..f86331af6f7928 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD +++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD @@ -22,7 +22,6 @@ cuda_py_test( ":linear_regression", "//tensorflow:tensorflow_py", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index ad8568ad44ea84..90f10f1fa8a4f7 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -224,10 +224,7 @@ py_test( srcs = ["python/ops/kmeans_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = [ - "nomac", # b/73741358 - "notsan", # b/67512932 - ], + tags = ["notsan"], # b/67512932 deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", diff --git a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc index 35341406a08dc6..e61221a6b0d343 100644 --- a/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc +++ b/tensorflow/contrib/ffmpeg/default/ffmpeg_lib.cc @@ -256,9 +256,6 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, if (p != std::string::npos) { string rgb24 = line.substr(p + 9, line.find(" ", p + 9)); rgb24 = rgb24.substr(0, rgb24.find(",")); - // Strip anything after " ", in case the format is - // `640x360 [SAR 1:1 DAR 16:9]` - rgb24 = rgb24.substr(0, rgb24.find(" ")); string rgb24_width = rgb24.substr(0, rgb24.find("x")); string rgb24_height = rgb24.substr(rgb24_width.length() + 1); if (strings::safe_strtou32(rgb24_width, &width_value) && @@ -273,10 +270,8 @@ Status ReadInfoFile(const string& filename, uint32* width, uint32* height, // We only look for the first stream mapping to have the number of the // frames. // Once processed we will not further process stream mapping section. - if (line.find("frame=") == 0) { - // The format might be `frame= 166 ` or `frame=12488 ` - string number = line.substr(6); - number = number.substr(number.find_first_not_of(" ")); + if (line.find("frame= ") == 0) { + string number = line.substr(8, line.find(" ", 8)); number = number.substr(0, number.find(" ")); if (strings::safe_strtou32(number, &frames_value)) { in_mapping = false; diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index ff6f3b744190c9..0eb0e3cbe20f58 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -354,7 +354,6 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index 1c3974871c6291..efb403462a6e5d 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -1,93 +1,66 @@ -package(default_visibility = ["//tensorflow:internal"]) +package( + default_visibility = ["//visibility:private"], +) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load( - "//tensorflow:tensorflow.bzl", - "tf_gen_op_wrapper_py", - "tf_kernel_library", - "tf_custom_op_library", - "tf_custom_op_py_library", - "tf_gen_op_libs", - "tf_py_test", -) - -py_library( - name = "kafka", - srcs = ["__init__.py"], - srcs_version = "PY2AND3", - deps = [ - ":dataset_ops", - ], -) - -tf_custom_op_library( - name = "_dataset_ops.so", - srcs = ["ops/dataset_ops.cc"], - deps = [":dataset_kernels"], -) - -tf_gen_op_libs( - op_lib_names = ["dataset_ops"], -) +load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") +load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("//tensorflow:tensorflow.bzl", "tf_py_test") -cc_library( - name = "dataset_kernels", +tf_kernel_library( + name = "kafka_kernels", srcs = ["kernels/kafka_dataset_ops.cc"], + visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core/kernels:bounds_check_lib", + "//tensorflow/core/kernels:dataset", "//third_party/eigen3", "@kafka", - "@protobuf_archive//:protobuf_headers", ], - alwayslink = 1, ) -py_library( - name = "dataset_ops", - srcs = [ - "python/ops/kafka_dataset_ops.py", - ], - srcs_version = "PY2AND3", +tf_gen_op_libs( + op_lib_names = ["kafka_ops"], deps = [ - ":kafka_op_loader", - "//tensorflow/python:dataset_ops_gen", - "//tensorflow/python:util", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/util:nest", + "//tensorflow/core:lib", ], ) tf_gen_op_wrapper_py( - name = "gen_dataset_ops", - out = "python/ops/gen_dataset_ops.py", - deps = ["//tensorflow/contrib/kafka:dataset_ops_op_lib"], -) - -tf_kernel_library( - name = "dataset_ops_kernels", - deps = [ - ":dataset_kernels", - "//tensorflow/core:framework", - ], - alwayslink = 1, + name = "gen_kafka_ops", + out = "python/ops/gen_kafka_ops.py", + require_shape_functions = True, + deps = [":kafka_ops_op_lib"], ) -tf_custom_op_py_library( - name = "kafka_op_loader", - srcs = ["python/ops/kafka_op_loader.py"], - dso = ["//tensorflow/contrib/kafka:_dataset_ops.so"], - kernels = [ - ":dataset_ops_kernels", - "//tensorflow/contrib/kafka:dataset_ops_op_lib", +py_library( + name = "kafka", + srcs = [ + "__init__.py", + "python/ops/kafka_dataset_ops.py", ], srcs_version = "PY2AND3", + visibility = ["//visibility:public"], deps = [ - ":gen_dataset_ops", + ":gen_kafka_ops", "//tensorflow/contrib/util:util_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework", + "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", ], ) @@ -115,7 +88,6 @@ tf_py_test( ], tags = [ "manual", - "no_windows", "notap", ], ) @@ -123,9 +95,7 @@ tf_py_test( filegroup( name = "all_files", srcs = glob( - include = [ - "**/*", - ], + ["**/*"], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc index a4cd4a2cc4b99b..88ef5f35711337 100644 --- a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc +++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/framework/dataset.h" +#include "tensorflow/core/kernels/dataset.h" + +#include "tensorflow/core/framework/tensor.h" #include "src-cpp/rdkafkacpp.h" diff --git a/tensorflow/contrib/kafka/ops/dataset_ops.cc b/tensorflow/contrib/kafka/ops/dataset_ops.cc deleted file mode 100644 index 8cdf16103bab2b..00000000000000 --- a/tensorflow/contrib/kafka/ops/dataset_ops.cc +++ /dev/null @@ -1,44 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -REGISTER_OP("KafkaDataset") - .Input("topics: string") - .Input("servers: string") - .Input("group: string") - .Input("eof: bool") - .Input("timeout: int64") - .Output("handle: variant") - .SetIsStateful() - .SetShapeFn(shape_inference::ScalarShape) - .Doc(R"doc( -Creates a dataset that emits the messages of one or more Kafka topics. - -topics: A `tf.string` tensor containing one or more subscriptions, - in the format of [topic:partition:offset:length], - by default length is -1 for unlimited. -servers: A list of bootstrap servers. -group: The consumer group id. -eof: If True, the kafka reader will stop on EOF. -timeout: The timeout value for the Kafka Consumer to wait - (in millisecond). -)doc"); - -} // namespace tensorflow diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py index a1624614d1ab1b..8e51d27a342359 100644 --- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py +++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py @@ -17,9 +17,8 @@ from __future__ import division from __future__ import print_function -from tensorflow.contrib.kafka.python.ops import kafka_op_loader # pylint: disable=unused-import -from tensorflow.contrib.kafka.python.ops import gen_dataset_ops -from tensorflow.python.data.ops.dataset_ops import Dataset +from tensorflow.contrib.kafka.python.ops import gen_kafka_ops +from tensorflow.python.data.ops.readers import Dataset from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -59,8 +58,8 @@ def __init__(self, timeout, dtype=dtypes.int64, name="timeout") def _as_variant_tensor(self): - return gen_dataset_ops.kafka_dataset(self._topics, self._servers, - self._group, self._eof, self._timeout) + return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group, + self._eof, self._timeout) @property def output_classes(self): diff --git a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py deleted file mode 100644 index ec2fdea962ef94..00000000000000 --- a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Python helper for loading kafka ops and kernels.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.util import loader -from tensorflow.python.platform import resource_loader - -_dataset_ops = loader.load_op_library( - resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index d1c449402a697d..146ae8b7e2a3b2 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -114,7 +114,6 @@ py_test( name = "utils_test", srcs = ["utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ "//tensorflow/contrib/kfac/python/ops:utils", "//tensorflow/contrib/tpu", diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 544065dac6a100..894e6f6946bb59 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -70,7 +70,6 @@ py_test( "python/ops/core_test.py", ], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":_typecheck", ":core", diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index cc7bbabf210ded..852d06e1e3cc8f 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -188,7 +188,6 @@ py_test( size = "small", srcs = ["python/layers/normalization_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/contrib/framework:framework_py", @@ -354,7 +353,6 @@ py_test( size = "small", srcs = ["python/ops/sparse_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/layers/python/layers/embedding_ops.py b/tensorflow/contrib/layers/python/layers/embedding_ops.py index ffa208540dae97..b62e3050cd7003 100644 --- a/tensorflow/contrib/layers/python/layers/embedding_ops.py +++ b/tensorflow/contrib/layers/python/layers/embedding_ops.py @@ -470,7 +470,7 @@ def embedding_lookup_unique(params, ids, name=None): ids = ops.convert_to_tensor(ids) shape = array_ops.shape(ids) ids_flat = array_ops.reshape( - ids, math_ops.reduce_prod(shape, keepdims=True)) + ids, math_ops.reduce_prod(shape, keep_dims=True)) unique_ids, idx = array_ops.unique(ids_flat) unique_embeddings = embedding_ops.embedding_lookup(params, unique_ids) embeds_flat = array_ops.gather(unique_embeddings, idx) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index b05f5eeaeee8fb..f837ca3265b42a 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -5,8 +5,6 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "py_test") - package(default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", @@ -117,7 +115,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/python:client_testlib", @@ -173,7 +170,6 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/estimator", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -192,7 +188,6 @@ py_test( size = "small", srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -431,10 +426,7 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", - tags = [ - "noasan", # b/73741358 - "nomac", - ], + tags = ["noasan"], deps = [ ":learn", "//tensorflow/python:array_ops", @@ -593,7 +585,6 @@ py_test( size = "small", srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/learn/python/learn/datasets", @@ -823,7 +814,6 @@ py_test( size = "small", srcs = ["python/learn/utils/saved_model_export_utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index 8f9811cf251ae0..b3b067b8e1a4eb 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -61,7 +61,7 @@ def embedding_lookup(params, ids, name='embedding_lookup'): ids = ops.convert_to_tensor(ids) shape = array_ops_.shape(ids) ids_flat = array_ops_.reshape( - ids, math_ops.reduce_prod(shape, keepdims=True)) + ids, math_ops.reduce_prod(shape, keep_dims=True)) embeds_flat = nn.embedding_lookup(params, ids_flat, name) embed_shape = array_ops_.concat([shape, [-1]], 0) embeds = array_ops_.reshape(embeds_flat, embed_shape) diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index b4504f246a0f80..7f316292724ea0 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. -CXX := $(CC_PREFIX)gcc +CXX := $(CC_PREFIX) gcc CXXFLAGS := --std=c++11 -O3 -DNDEBUG -CC := $(CC_PREFIX)gcc -CFLAGS := -O3 -DNDEBUG +CC := $(CC_PREFIX) gcc +CFLAGS := LDOPTS := LDOPTS += -L/usr/local/lib ARFLAGS := -r @@ -57,11 +57,10 @@ LIBS := \ # If we're on Linux, also link in the dl library. ifeq ($(HOST_OS),LINUX) - LIBS += -ldl + LIBS += -ldl -lpthread endif include $(MAKEFILE_DIR)/ios_makefile.inc -include $(MAKEFILE_DIR)/rpi_makefile.inc # This library is the main target for this makefile. It will contain a minimal # runtime that can be linked in to other programs. diff --git a/tensorflow/contrib/lite/arena_planner.h b/tensorflow/contrib/lite/arena_planner.h index f84b3dad9550e7..58bc164619c2c0 100644 --- a/tensorflow/contrib/lite/arena_planner.h +++ b/tensorflow/contrib/lite/arena_planner.h @@ -33,7 +33,7 @@ class AllocationInfo; // each tensor needs to be allocated and deallocated, and preallocates all the // necessary memory (the PlanAllocations phase). It then assigns portions of // this memory buffer to each tensor (the ExecuteAllocations phase). Tensors may -// share some of the buffer if a tensor B is to be allocated after another tensor +// share some of the bufer if a tensor B is to be allocated after another tensor // A has been deallocated. // // If dynamic tensors are used the planning steps can be repeated during model diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh deleted file mode 100755 index 3824b16412ed26..00000000000000 --- a/tensorflow/contrib/lite/build_rpi_lib.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -x -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -set -e - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -cd "$SCRIPT_DIR/../../.." - -CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7 diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index ea3ae3489ecf07..2218ea8eac5760 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -24,7 +24,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin // ops. typedef enum { kTfLiteBuiltinAdd = 0, diff --git a/tensorflow/contrib/lite/error_reporter.h b/tensorflow/contrib/lite/error_reporter.h index 3c5f805f12f6a1..da193d2586e912 100644 --- a/tensorflow/contrib/lite/error_reporter.h +++ b/tensorflow/contrib/lite/error_reporter.h @@ -30,7 +30,7 @@ namespace tflite { // va_list args; // foo.Report("test %d", args); // where args is va_list // -// Subclass ErrorReporter to provide another reporting destination. +// Sublclass ErrorReporter to provide another reporting destination. // For example, if you have a GUI program, you might redirect to a buffer // that drives a GUI error log box. class ErrorReporter { diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md deleted file mode 100644 index 7a3a231626d0e1..00000000000000 --- a/tensorflow/contrib/lite/g3doc/rpi.md +++ /dev/null @@ -1,50 +0,0 @@ -# TensorFlow Lite for Raspberry Pi - -## Cross compiling -### Installing toolchian -This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/). - -To cross compiling TensorFlow Lite. First you should install the toolchain and libs. -```bash -sudo apt-get update -sudo apt-get install crossbuild-essential-armhf -``` -> If you are using docker, you may not use `sudo` - -### Building -Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies: -> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it. -```bash -./tensorflow/contrib/lite/download_dependencies.sh -``` -Note than you only need to to this once. - -You should then be able to compile: -```bash -./tensorflow/contrib/lite/build_rpi_lib.sh -``` - -This should compile a static library in: -`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. - -## Native compiling -This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1). - -Log in to you RPI, install the toolchain. -```bash -sudo apt-get instal build-essential -``` - -First, clone this TensorFlow repository. Run this at the root of the repository: -```bash -./tensorflow/contrib/lite/download_dependencies.sh -``` -Note than you only need to to this once. - -You should then be able to compile: -```bash -./tensorflow/contrib/lite/build_rpi_lib.sh -``` - -This should compile a static library in: -`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 3749869f58c4ce..af143370ee2b27 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -481,7 +481,7 @@ class Interpreter { // During Invoke(), Interpreter will allocate input tensors first, which are // known to be fixed size. Then it will allocate outputs from nodes as many // as possible. When there is a node that produces dynamic sized tensor. - // Interpreter will stop allocating tensors, set the value of next allocate + // Intepreter will stop allocating tensors, set the value of next allocate // node id, and execute the node to generate the output tensor before continue // to allocate successors. This process repeats until all nodes are executed. // NOTE: this relies on the order of nodes that is in topological order. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 72d4acedbe72be..7a029c7df83890 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -40,7 +40,7 @@ TEST(BasicInterpreter, InvokeInvalidModel) { ASSERT_EQ(interpreter.Invoke(), kTfLiteOk); } -// Test size accessor functions. +// Test size accesser functions. TEST(BasicInterpreter, TestSizeFunctions) { Interpreter interpreter; int base_index; diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index e0cd12f1b4042c..b91ba1a03d67e5 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -64,7 +64,7 @@ struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/depthwise_conv.cc b/tensorflow/contrib/lite/kernels/depthwise_conv.cc index cad9ce114c8387..15dbfe08c82bef 100644 --- a/tensorflow/contrib/lite/kernels/depthwise_conv.cc +++ b/tensorflow/contrib/lite/kernels/depthwise_conv.cc @@ -52,7 +52,7 @@ enum KernelType { struct OpData { TfLitePaddingValues padding; // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/fully_connected.cc b/tensorflow/contrib/lite/kernels/fully_connected.cc index 888e67966c0a40..a77fe94e499078 100644 --- a/tensorflow/contrib/lite/kernels/fully_connected.cc +++ b/tensorflow/contrib/lite/kernels/fully_connected.cc @@ -48,7 +48,7 @@ enum KernelType { struct OpData { // The scaling factor from input to output (aka the 'real multiplier') can - // be represented as a fixed point multiplier plus a left shift. + // be represented as a fixed point multipler plus a left shift. int32_t output_multiplier; int output_shift; // The range of the fused activation layer. For example for kNone and diff --git a/tensorflow/contrib/lite/kernels/kernel_util.h b/tensorflow/contrib/lite/kernels/kernel_util.h index 21da1daff7783b..28f53b9fbbc562 100644 --- a/tensorflow/contrib/lite/kernels/kernel_util.h +++ b/tensorflow/contrib/lite/kernels/kernel_util.h @@ -58,7 +58,7 @@ inline bool IsConstantTensor(TfLiteTensor* tensor) { } // Determines whether tensor is dynamic. Note that a tensor can be non-const and -// not dynamic. This function specifically checks for a dynamic tensor. +// not dynamic. This function specificially checks for a dynamic tensor. inline bool IsDynamicTensor(TfLiteTensor* tensor) { return tensor->allocation_type == kTfLiteDynamic; } diff --git a/tensorflow/contrib/lite/kernels/lsh_projection.cc b/tensorflow/contrib/lite/kernels/lsh_projection.cc index 0ee35775d50b87..5f73b56ed9790b 100644 --- a/tensorflow/contrib/lite/kernels/lsh_projection.cc +++ b/tensorflow/contrib/lite/kernels/lsh_projection.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// LSH Projection projects an input to a bit vector via locality sensitive +// LSH Projection projects an input to a bit vector via locality senstive // hashing. // // Options: diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 8cf1165135bdb0..b9255b23a55737 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -213,9 +213,9 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context, // present. // 2) If projection weight is present, then projection bias is optional. // TODO(ghodrat): make sure this is correct. - const bool projection_tensors_consistent = + const bool projecton_tensors_consistent = ((projection_weights != nullptr) || (projection_bias == nullptr)); - TF_LITE_ENSURE(context, projection_tensors_consistent == true); + TF_LITE_ENSURE(context, projecton_tensors_consistent == true); return kTfLiteOk; } @@ -357,7 +357,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. + // check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/kernels/reshape.cc b/tensorflow/contrib/lite/kernels/reshape.cc index 438f70d3115130..f3e6ddc9f480e3 100644 --- a/tensorflow/contrib/lite/kernels/reshape.cc +++ b/tensorflow/contrib/lite/kernels/reshape.cc @@ -49,20 +49,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArray* output_size = TfLiteIntArrayCreate(params->num_dimensions); int num_output_elements = 1; - int stretch_dim = -1; + int strech_dim = -1; for (int i = 0; i < params->num_dimensions; ++i) { int value = params->shape[i]; if (value == -1) { - TF_LITE_ENSURE_EQ(context, stretch_dim, -1); - stretch_dim = i; + TF_LITE_ENSURE_EQ(context, strech_dim, -1); + strech_dim = i; } else { num_output_elements *= value; output_size->data[i] = value; } } - if (stretch_dim != -1) { - output_size->data[stretch_dim] = num_input_elements / num_output_elements; - num_output_elements *= output_size->data[stretch_dim]; + if (strech_dim != -1) { + output_size->data[strech_dim] = num_input_elements / num_output_elements; + num_output_elements *= output_size->data[strech_dim]; } TF_LITE_ENSURE_EQ(context, num_input_elements, num_output_elements); diff --git a/tensorflow/contrib/lite/kernels/reshape_test.cc b/tensorflow/contrib/lite/kernels/reshape_test.cc index aecbd0399f7454..0fbcf6e6aa311d 100644 --- a/tensorflow/contrib/lite/kernels/reshape_test.cc +++ b/tensorflow/contrib/lite/kernels/reshape_test.cc @@ -60,7 +60,7 @@ TEST(ReshapeOpTest, TooManyDimensions) { TEST(ReshapeOpTest, TooManySpecialDimensions) { EXPECT_DEATH(ReshapeOpModel({1, 2, 4, 1}, {-1, -1, 2, 4}), - "stretch_dim != -1"); + "strech_dim != -1"); } TEST(ReshapeOpTest, SimpleTest) { diff --git a/tensorflow/contrib/lite/kernels/test_util.cc b/tensorflow/contrib/lite/kernels/test_util.cc index 0bb28b50b2a5e5..373310bd87370a 100644 --- a/tensorflow/contrib/lite/kernels/test_util.cc +++ b/tensorflow/contrib/lite/kernels/test_util.cc @@ -141,8 +141,8 @@ void SingleOpModel::SetBuiltinOp(BuiltinOperator type, void SingleOpModel::SetCustomOp( const string& name, const std::vector& custom_option, - const std::function& registration) { - custom_registrations_[name] = registration; + const std::function& registeration) { + custom_registrations_[name] = registeration; opcodes_.push_back( CreateOperatorCodeDirect(builder_, BuiltinOperator_CUSTOM, name.data())); operators_.push_back(CreateOperator( diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index 42941a97db70ad..508a570e2e5fd5 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -360,7 +360,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const int n_output = recurrent_to_output_weights->dims->data[1]; // Since we have already checked that weights are all there or none, we can - // check the existence of only one to get the condition. + // check the existense of only one to the get the condition. const bool use_cifg = (input_to_input_weights == nullptr); const bool use_peephole = (cell_to_output_weights != nullptr); diff --git a/tensorflow/contrib/lite/memory_planner.h b/tensorflow/contrib/lite/memory_planner.h index 0294ec815c4820..5cd6c208500f3e 100644 --- a/tensorflow/contrib/lite/memory_planner.h +++ b/tensorflow/contrib/lite/memory_planner.h @@ -34,8 +34,8 @@ class MemoryPlanner { // [first_node, last_node]. virtual TfLiteStatus ExecuteAllocations(int first_node, int last_node) = 0; - // Invalidates allocations made earlier. This is called when tensors sizes - // have changed. All planned allocations remain, but can't be used until + // Invalidates allocations made earliers. This is called when tensors sizes + // have change. All planned allocations remain, but can't be used until // ExecuteAllocations() is called. virtual TfLiteStatus ResetAllocations() = 0; }; diff --git a/tensorflow/contrib/lite/model.h b/tensorflow/contrib/lite/model.h index 38eea0e26bc97c..51a622a28de90c 100644 --- a/tensorflow/contrib/lite/model.h +++ b/tensorflow/contrib/lite/model.h @@ -81,7 +81,7 @@ class FlatBufferModel { const tflite::Model* model_spec, ErrorReporter* error_reporter = DefaultErrorReporter()); - // Releases memory or unmaps mmaped memory. + // Releases memory or unmaps mmaped meory. ~FlatBufferModel(); // Copying or assignment is disallowed to simplify ownership semantics. diff --git a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h index bd49d327c995ef..76032771af2c8e 100644 --- a/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h +++ b/tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h @@ -569,7 +569,7 @@ enum { ANEURALNETWORKS_LOGISTIC = 14, /** - * Projects an input to a bit vector via locality sensitive hashing. + * Projects an input to a bit vector via locality senstive hashing. * * Inputs: * * 0: Hash functions. Dim.size == 2, DataType: Float. diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc deleted file mode 100644 index 832ef5824bea86..00000000000000 --- a/tensorflow/contrib/lite/rpi_makefile.inc +++ /dev/null @@ -1,33 +0,0 @@ -# Settings for Raspberry Pi. -ifeq ($(TARGET), RPI) - ifeq ($(TARGET_ARCH), armv7) - CXXFLAGS += \ - -march=armv7-a \ - -mfpu=neon-vfpv4 \ - -funsafe-math-optimizations \ - -ftree-vectorize - - CCFLAGS += \ - -march=armv7-a \ - -mfpu=neon-vfpv4 \ - -funsafe-math-optimizations \ - -ftree-vectorize - - LDFLAGS := \ - -Wl,--no-export-dynamic \ - -Wl,--exclude-libs,ALL \ - -Wl,--gc-sections \ - -Wl,--as-needed - endif - - LIBS := \ - -lstdc++ \ - -lpthread \ - -lm \ - -ldl - - OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/ - LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/ - BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/ - DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/ -endif diff --git a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc index 640972de777d2c..08bcfe451685f4 100644 --- a/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc +++ b/tensorflow/contrib/lite/schema/builtin_ops_header/generator.cc @@ -46,7 +46,7 @@ extern "C" { #endif // __cplusplus // The enum for builtin operators. -// Note: CUSTOM and DELEGATE are 2 special ops which are not real builtin +// Note: CUSTOM and DELEGATE are 2 special ops which are not real biultin // ops. typedef enum { )"; diff --git a/tensorflow/contrib/lite/simple_memory_arena.cc b/tensorflow/contrib/lite/simple_memory_arena.cc index 2f2004f56bcad5..4aab244989ca53 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.cc +++ b/tensorflow/contrib/lite/simple_memory_arena.cc @@ -113,21 +113,21 @@ TfLiteStatus SimpleMemoryArena::Commit(TfLiteContext* context) { underlying_buffer_size_ = required_size; underlying_buffer_aligned_ptr_ = new_underlying_buffer_aligned_ptr; } - committed_ = true; + commited_ = true; return underlying_buffer_ != nullptr ? kTfLiteOk : kTfLiteError; } TfLiteStatus SimpleMemoryArena::ResolveAlloc(TfLiteContext* context, const ArenaAlloc& alloc, char** output_ptr) { - TF_LITE_ENSURE(context, committed_); + TF_LITE_ENSURE(context, commited_); TF_LITE_ENSURE(context, output_ptr != nullptr); *output_ptr = underlying_buffer_aligned_ptr_ + alloc.offset; return kTfLiteOk; } TfLiteStatus SimpleMemoryArena::Clear() { - committed_ = false; + commited_ = false; high_water_mark_ = 0; allocs_.clear(); return kTfLiteOk; diff --git a/tensorflow/contrib/lite/simple_memory_arena.h b/tensorflow/contrib/lite/simple_memory_arena.h index 5faf78b59e3755..0535522374c634 100644 --- a/tensorflow/contrib/lite/simple_memory_arena.h +++ b/tensorflow/contrib/lite/simple_memory_arena.h @@ -22,7 +22,7 @@ limitations under the License. namespace tflite { // This little structure holds the offset and the size for a dynamic memory -// allocation in the memory arena. When the arena is committed and the +// allocation in the memory arena. When the arena is commited and the // underlying buffer is set, the alloc can be resolved into an actual memory // pointer. struct ArenaAlloc { @@ -43,7 +43,7 @@ struct ArenaAlloc { class SimpleMemoryArena { public: explicit SimpleMemoryArena(size_t arena_alignment) - : committed_(false), + : commited_(false), arena_alignment_(arena_alignment), high_water_mark_(0), underlying_buffer_size_(0), @@ -73,7 +73,7 @@ class SimpleMemoryArena { } private: - bool committed_; + bool commited_; size_t arena_alignment_; size_t high_water_mark_; std::unique_ptr underlying_buffer_; diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 0a6edc33c57435..8ca03f4193f260 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -46,7 +46,6 @@ tf_py_test( "//tensorflow/python:variables", ], grpc_enabled = True, - tags = ["no_windows"], # TODO: needs investigation on Windows ) filegroup( diff --git a/tensorflow/contrib/makefile/README.md b/tensorflow/contrib/makefile/README.md index 6c3b02e12b3082..995230dfa84853 100644 --- a/tensorflow/contrib/makefile/README.md +++ b/tensorflow/contrib/makefile/README.md @@ -194,8 +194,6 @@ with: srcs = glob(["libs/arm64-v8a/*.so"]), ``` -If you are building for Android TV (Shield TV devices), replace "portrait" with "landscape" for android:screenOrientation in all four activities in tensorflow/examples/android/AndroidManifest.xml - Then run: ```bash # Create dir for native libs diff --git a/tensorflow/contrib/makefile/build_all_ios.sh b/tensorflow/contrib/makefile/build_all_ios.sh index 9b148688c4c948..2d9979183975e6 100755 --- a/tensorflow/contrib/makefile/build_all_ios.sh +++ b/tensorflow/contrib/makefile/build_all_ios.sh @@ -80,9 +80,10 @@ if [[ ! -z "${OPTIMIZE_FOR_GRAPH}" ]]; then fi else echo "${PRNT_SLCTV_BIN} found. Using it" + ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h + fi - ${PRNT_SLCTV_BIN} --graphs=${OPTIMIZE_FOR_GRAPH} > ${TOP_SRCDIR}/tensorflow/core/framework/ops_to_register.h fi if [[ "${ONLY_MAKE_TENSORFLOW}" != "true" ]]; then diff --git a/tensorflow/contrib/mpi/mpi_utils.h b/tensorflow/contrib/mpi/mpi_utils.h index df055ff5673114..fa297c28cb47d4 100644 --- a/tensorflow/contrib/mpi/mpi_utils.h +++ b/tensorflow/contrib/mpi/mpi_utils.h @@ -24,8 +24,6 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" -// Skip MPI C++ bindings support, this matches the usage in other places -#define OMPI_SKIP_MPICXX #include "third_party/mpi/mpi.h" #define MPI_CHECK(cmd) \ do { \ diff --git a/tensorflow/contrib/predictor/predictor_factories.py b/tensorflow/contrib/predictor/predictor_factories.py index 6e77e934fe1985..04b5d5bdf158dc 100644 --- a/tensorflow/contrib/predictor/predictor_factories.py +++ b/tensorflow/contrib/predictor/predictor_factories.py @@ -53,7 +53,7 @@ def from_contrib_estimator(estimator, `Estimator`. """ if isinstance(estimator, core_estimator.Estimator): - raise TypeError('Expected estimator to be of type ' + raise TypeError('Espected estimator to be of type ' 'tf.contrib.learn.Estimator, but got type ' 'tf.python.estimator.Estimator. You likely want to call ' 'from_estimator.') @@ -88,7 +88,7 @@ def from_estimator(estimator, `Estimator`. """ if isinstance(estimator, contrib_estimator.Estimator): - raise TypeError('Expected estimator to be of type ' + raise TypeError('Espected estimator to be of type ' 'tf.python.estimator.Estimator, but got type ' 'tf.contrib.learn.Estimator. You likely want to call ' 'from_contrib_estimator.') diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 4bb6f76019739f..f624c426863964 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -81,7 +81,6 @@ py_test( name = "builtin_functions_test", srcs = ["builtin_functions_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -92,7 +91,6 @@ py_test( name = "call_trees_test", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/contrib/py2tf/impl", diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py index 1194b98f5ebeff..90bc22008f0edb 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def __init__(self): self.cant_return = False - super(DetectReturnInUnsupportedControlFlow, self).__init__() + super(gast.NodeVisitor, self).__init__() def visit_While(self, node): self.cant_return = True diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 8bc338e801aa28..d029289f5aea82 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -83,7 +83,6 @@ py_test( name = "py_func_test", srcs = ["py_func_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":utils", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 1afcbb850496fa..b278265639ad8b 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -237,7 +237,7 @@ def _FindFusedBatchNorms(graph): # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's - # correction). The variance tensor read from FuseBatchNorm has Bessel's + # correction). The variance tensor read from FuseBatchNorm has bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() @@ -306,7 +306,7 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, Args: context: The scope under which we look for batch norm params - match: Object containing required batch norm tensors for correction + match: Object containg required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index a4f7b1b2213958..0a8e35080cb08f 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -282,8 +282,8 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, Args: inputs: a tensor containing values to be quantized. min_var: a variable containing quantization range lower end(s). - max_var: a variable containing quantization range upper end(s). - per_channel: a boolean specifying whether to use per-channel quantization. + max_var: a variable containing quantization range lupper end(s). + per_channel: a boolean specifying whether to use per-channel quantizatioh. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index ec721afbc8d1be..0608ab93022a88 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -267,7 +267,7 @@ def _InsertQuantOp(context, """Inserts a quant op between a producer op and (multiple) consumer ops. Args: - context: Context where producer and consumer operations are nested. + context: Context w,here producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 5abdcd2475ce48..5a3a74cec4864a 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -158,7 +158,7 @@ def experimental_create_training_graph(input_graph=None, often fail. Args: - input_graph: The tf.Graph to be transformed, if None then defaults to the + input_graph: The tf.Graph to be transformed,if None then defaults to the default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index db745aa56212af..0624cc878b8f7e 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -419,7 +419,7 @@ def _TestQuantize_Conv2dWithBatchNorm(self, activation, activation_op_name, normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -470,7 +470,7 @@ def _TestQuantize_FCWithBatchNorm(self, activation, activation_op_name, normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -526,7 +526,7 @@ def _TestQuantize_DepthwiseConv2dWithBatchNorm( normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optional) and an activation. + # Manually add a bypass (optionaly) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -565,7 +565,7 @@ def _WeightInit(self, stddev): stddev: Standard deviation of normal variable. Returns: - An initialized that initializes with a truncated normal variable. + An initialized that initialzes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index b2e5707a6d5bcc..ef59475167137e 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -144,7 +144,7 @@ def _WeightInit(self, stddev): stddev: Standard deviation of normal variable. Returns: - An initialized that initializes with a truncated normal variable. + An initialized that initialzes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 54c66271cd43c8..27f0a7f58f7813 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -38,6 +38,7 @@ py_test( size = "small", srcs = ["python/ops/remote_fused_graph_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":remote_fused_graph_ops_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 358b2eb02be81d..73f2607d8442f2 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2133,7 +2133,7 @@ class Conv1DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_1d_lstm_cell", **kwargs): """Construct Conv1DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv1DLSTMCell, self).__init__(conv_ndims=1, name=name, **kwargs) + super(Conv1DLSTMCell, self).__init__(conv_ndims=1, **kwargs) class Conv2DLSTMCell(ConvLSTMCell): @@ -2144,7 +2144,7 @@ class Conv2DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_2d_lstm_cell", **kwargs): """Construct Conv2DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv2DLSTMCell, self).__init__(conv_ndims=2, name=name, **kwargs) + super(Conv2DLSTMCell, self).__init__(conv_ndims=2, **kwargs) class Conv3DLSTMCell(ConvLSTMCell): @@ -2155,7 +2155,7 @@ class Conv3DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_3d_lstm_cell", **kwargs): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv3DLSTMCell, self).__init__(conv_ndims=3, name=name, **kwargs) + super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) def _conv(args, filter_size, num_features, bias, bias_start=0.0): diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index b10757df47a4c3..245fe07f2bcdad 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -53,7 +53,6 @@ py_test( size = "small", srcs = ["python/saved_model/reader_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":saved_model_py", diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 6adbb8be40fb18..03fe31abf736c0 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -299,13 +299,12 @@ def initialize(self, name=None): """ finished, start_inputs = self._finished, self._start_inputs - dtype = nest.flatten(self._initial_cell_state)[0].dtype log_probs = array_ops.one_hot( # shape(batch_sz, beam_sz) array_ops.zeros([self._batch_size], dtype=dtypes.int32), depth=self._beam_width, - on_value=ops.convert_to_tensor(0.0, dtype=dtype), - off_value=ops.convert_to_tensor(-np.Inf, dtype=dtype), - dtype=dtype) + on_value=0.0, + off_value=-np.Inf, + dtype=nest.flatten(self._initial_cell_state)[0].dtype) initial_state = BeamSearchDecoderState( cell_state=self._initial_cell_state, diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 3ad88a8a22966a..67011c8fef6c4f 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -165,7 +165,6 @@ py_test( name = "gc_test", srcs = ["gc_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":gc", diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index 7aa16848391849..5daabbd62e7e63 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -61,7 +61,6 @@ py_test( name = "dataset_data_provider_test", srcs = ["dataset_data_provider_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":dataset", ":dataset_data_provider", diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 07b6b1f142ba4d..1e4cc3f0952ef7 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -553,6 +553,7 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ + "no_windows", "nomac", # b/63258195 "notsan", ], diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index db2e000ef875c3..d833744d0c7e85 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -9,7 +9,6 @@ exports_files(["LICENSE"]) # For platform specific build config load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") -load("//tensorflow:tensorflow.bzl", "py_test") tf_proto_library( name = "protos_all", diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 906cc3f0344e7c..c832c6f2e0cefe 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -83,7 +83,6 @@ cc_library( "kernels/trt_engine_op.h", ], copts = tf_copts(), - visibility = ["//visibility:public"], deps = [ ":trt_logging", ":trt_resources", @@ -155,7 +154,6 @@ py_library( deps = [ ":trt_convert_py", ":trt_ops_py", - "//tensorflow/python:errors", ], ) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index 461e627e99c38f..dfcce0fd00eedf 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -2,8 +2,7 @@ Using TensorRT in TensorFlow ============================ This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. This is still a work in progress -but should be useable with most common graphs. +operator that wraps a subgraph in TensorRT. Compilation ----------- @@ -16,10 +15,26 @@ configure script should find the necessary components from the system automatically. If installed from tar packages, user has to set path to location where the library is installed during configuration. -```shell + +``` bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use can be found in test/test_tftrt.py directory +will be available. An example use is shown below. + +```python +import tensorflow as tf +import tensorflow.contrib.tensorrt as trt +#... create and train or load model +gdef = sess.graph.as_graph_def() +trt_gdef = trt.create_inference_graph( + gdef, #original graph_def + ["output"], #name of output node(s) + max_batch_size, #maximum batch size to run the inference + max_workspace_size_bytes) # max memory for TensorRT to use +tf.reset_default_graph() +tf.import_graph_def(graph_def=trt_gdef) +#...... run inference +``` diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index 140ad4828208ae..fd551d70b4385b 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -18,18 +18,6 @@ from __future__ import division from __future__ import print_function -from tensorflow.python.framework import errors - -# pylint: disable=unused-import,wildcard-import,g-import-not-at-top -try: - from tensorflow.contrib.tensorrt.python import * -except errors.NotFoundError as e: - no_trt_message = ( - '**** Failed to initialize TensorRT. This is either because the TensorRT' - ' installation path is not in LD_LIBRARY_PATH, or because you do not have' - ' it installed. If not installed, please go to' - ' https://developer.nvidia.com/tensorrt to download and install' - ' TensorRT ****') - print(no_trt_message) - raise e -# pylint: enable=unused-import,wildcard-import,g-import-not-at-top +# pylint: disable=unused-import,wildcard-import +from tensorflow.contrib.tensorrt.python import * +# pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index eea8c8efa28c1b..970f8104736d95 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -15,7 +15,6 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/convert/convert_graph.h" -#include #include #include #include @@ -49,29 +48,13 @@ namespace tensorrt { namespace convert { namespace { -bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { +static bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { // LINT.IfChange // TODO(jie): Segmentation shouldn't associated with op name. // Split it into a registration for each kernel. static const std::set candidate_ops = { - "Identity", - "Const", - "Conv2D", - "MaxPool", - "BiasAdd", - "Relu", - "Add", - "Mul", - "Sub", - "Rsqrt", - "Pad", - "Mean", - "AvgPool", - "ConcatV2", - "DepthwiseConv2dNative", - "FusedBatchNorm", - "FusedBatchNormV2", - // TODO(ben,jie): ... + "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", + "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); @@ -86,8 +69,6 @@ void GetSubGraphIncomingEdges(const tensorflow::Graph& graph, if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { incoming_edges->insert(edge); - } else { - VLOG(2) << edge->src()->name() << " N, "; } } } @@ -101,10 +82,7 @@ void GetSubGraphOutgoingEdges(const tensorflow::Graph& graph, for (const tensorflow::Edge* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { - VLOG(2) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); - } else { - VLOG(2) << edge->dst()->name() << " N, "; } } } @@ -131,150 +109,74 @@ std::unordered_map> BuildTensorNameMap( } return result; } -// TODO(sami): convert references to pointers -struct ConvertGraphParams { - ConvertGraphParams( - tensorflow::Graph& inp_graph, - const std::vector& output_node_names, - const std::set& subgraph_node_id_numbers, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map>* output_edges, - int engine_precision_mode) - : graph(inp_graph), - output_names(output_node_names), - subgraph_node_ids(subgraph_node_id_numbers), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - precision_mode(engine_precision_mode) {} - tensorflow::Graph& graph; - const std::vector& output_names; - const std::set& subgraph_node_ids; - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map>* output_edge_map; - int precision_mode; - std::vector> subgraph_inputs; - std::vector> subgraph_outputs; + +tensorflow::Status ConvertSubGraphToTensorRT( + const std::vector& output_names, + const std::set& subgraph_node_ids, + size_t max_batch_size, // Max batch size that engine will be created for + // Max amount of memory that engine will be allowed to consume, in bytes + size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + tensorflow::Graph* graph) { tensorflow::EdgeSet subgraph_incoming_edges; - tensorflow::EdgeSet subgraph_outgoing_edges; -}; + GetSubGraphIncomingEdges(*graph, subgraph_node_ids, &subgraph_incoming_edges); + + std::vector> subgraph_inputs; -static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { - GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_incoming_edges); - for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { - p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); + // Collect inputs by looking for incoming edges + for (const tensorflow::Edge* edge : subgraph_incoming_edges) { + subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } - auto output_name_to_index_map = BuildTensorNameMap(p->output_names); std::set> subgraph_outputs_set; // Collect outputs referenced from output_names - for (int node_id : p->subgraph_node_ids) { - tensorflow::Node* node = p->graph.FindNodeId(node_id); + auto output_name_to_index_map = BuildTensorNameMap(output_names); + for (int node_id : subgraph_node_ids) { + tensorflow::Node* node = graph->FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, - &p->subgraph_outgoing_edges); - for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { + // Collect outputs referenced from outgoing edges + tensorflow::EdgeSet subgraph_outgoing_edges; + GetSubGraphOutgoingEdges(*graph, subgraph_node_ids, &subgraph_outgoing_edges); + for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - p->subgraph_outputs.reserve(subgraph_outputs_set.size()); - p->subgraph_outputs.insert(p->subgraph_outputs.begin(), - subgraph_outputs_set.begin(), - subgraph_outputs_set.end()); - return tensorflow::Status::OK(); -}; - -tensorflow::Status GetCalibNode(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); + // Impose an ordering on the outputs + std::vector> subgraph_outputs( + subgraph_outputs_set.begin(), subgraph_outputs_set.end()); + // Build TensorRT node and add it to the graph tensorflow::NodeDef trt_node_def; - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); - TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( + *graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, + max_batch_size, max_workspace_size_bytes, graph_properties, + &trt_node_def)); tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - TF_RETURN_IF_ERROR(status); - - for (auto in_edge : - params->subgraph_incoming_edges) { // loop over incoming edges and - // attach them to calib node - // tensorflow::Node* src_node = in_edge->src(); - auto src_output = in_edge->src_output(); - auto dst_node = in_edge->dst(); - auto dst_input = in_edge->dst_input(); - VLOG(1) << " update edge " << trt_node->name() << ":" << src_output - << " -> " << dst_node->name() << ":" << dst_input; - TF_RETURN_IF_ERROR( - params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); - } - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { - TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); - tensorflow::NodeDef trt_node_def; - - SubGraphParams s(params->graph, params->subgraph_node_ids, - params->subgraph_inputs, params->subgraph_outputs, - params->max_batch_size, params->max_workspace_size_bytes, - params->graph_properties, params->output_edge_map, - &trt_node_def, params->precision_mode); - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); - tensorflow::Status status; - tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); - - // AddNode does not wire edges. - // Re-map incoming edges to use the new TRT node instead of the orig subgraph - std::map, int> subgraph_edge_to_input_map; - for (size_t i = 0; i < params->subgraph_inputs.size(); ++i) { - subgraph_edge_to_input_map.insert({params->subgraph_inputs.at(i), i}); - } - for (const tensorflow::Edge* edge : params->subgraph_incoming_edges) { - std::pair old_src = {edge->src()->id(), edge->src_output()}; - int new_src_output = subgraph_edge_to_input_map.at(old_src); - params->graph.AddEdge(edge->src(), edge->src_output(), trt_node, - new_src_output); - params->graph.RemoveEdge(edge); - } - - VLOG(2) << "new wiring edges: " << trt_node->in_edges().size(); - for (const tensorflow::Edge* edge : trt_node->in_edges()) { - VLOG(2) << edge->src()->name() << " port: " << edge->src_output(); - } - + tensorflow::Node* trt_node = graph->AddNode(trt_node_def, &status); TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); + for (size_t i = 0; i < subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (const tensorflow::Edge* edge : params->subgraph_outgoing_edges) { + for (const tensorflow::Edge* edge : subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - TF_RETURN_IF_ERROR(params->graph.UpdateEdge( - trt_node, new_src_output, edge->dst(), edge->dst_input())); + TF_RETURN_IF_ERROR(graph->UpdateEdge(trt_node, new_src_output, edge->dst(), + edge->dst_input())); } // Remove the original subgraph - for (int node_id : params->subgraph_node_ids) { - tensorflow::Node* node = params->graph.FindNodeId(node_id); + for (int node_id : subgraph_node_ids) { + tensorflow::Node* node = graph->FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - params->graph.RemoveNode(node); + graph->RemoveNode(node); } return tensorflow::Status::OK(); } @@ -292,39 +194,12 @@ tensorflow::Status BuildNodeMap( } } // namespace -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* infer_graph) { - VLOG(0) << "Starting Calib Conversion"; - tensorflow::Graph graph(tensorflow::OpRegistry::Global()); - TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &graph)); - // get calib nodes - std::vector calib_nodes; - for (auto node : graph.op_nodes()) { - if (node->type_string() == "TRTCalibOp") { - VLOG(1) << "Found Calib Node"; - calib_nodes.push_back(node); - } - } - VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); - if (calib_nodes.size() == 0) - return tensorflow::errors::FailedPrecondition( - "Graph doesn't contain any calibration nodes!." - " Please generate calibration graph and run calibration first"); - for (auto n : calib_nodes) { - TF_RETURN_IF_ERROR( - tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); - } - graph.ToGraphDef(infer_graph); - return tensorflow::Status::OK(); -} tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode = FP32MODE, int minimum_segment_size = 3) { - // optimization pass + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def) { + // Optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; tensorflow::GraphDef gdef; @@ -334,23 +209,16 @@ tensorflow::Status ConvertGraphDefToTensorRT( tensorflow::grappler::LayoutOptimizer optimizer; tensorflow::grappler::Cluster* cluster; - // virtual cluster + // Virtual cluster tensorflow::DeviceProperties device_properties; - device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); - // single machine - int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); - int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); - VLOG(2) << "cpu_cores: " << num_cpu_cores; - VLOG(2) << "gpus: " << num_gpus; - TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); - // constant folding + // Constant folding item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); TF_RETURN_IF_ERROR(fold.Optimize(nullptr, item, &gdef)); @@ -358,6 +226,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); + // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -374,7 +243,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = minimum_segment_size; + segment_options.minimum_segment_size = 2; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -383,37 +252,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); - std::unordered_map> output_edge_map; - int count = 0; - float total_num_nodes_in_segments = 0.; - for (auto s : segments) { - total_num_nodes_in_segments += s.size(); - } for (const std::set& subgraph_node_names : segments) { std::set subgraph_node_ids; - size_t max_mem_per_engine = - max_workspace_size_bytes * - ((float)subgraph_node_names.size() / total_num_nodes_in_segments); - std::stringstream oss; for (const string& node_name : subgraph_node_names) { - oss << " " << node_name; subgraph_node_ids.insert(node_map.at(node_name)->id()); } - VLOG(2) << "Subgraph nodes" << oss.str(); - ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, - max_mem_per_engine, static_graph_properties, - &output_edge_map, precision_mode); - if (precision_mode == INT8MODE) { - TF_RETURN_IF_ERROR(GetCalibNode(&p)); - } else { - tensorflow::Status status = ConvertSubGraphToTensorRT(&p); - if (status != tensorflow::Status::OK()) { - LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count - << " due to: \n" - << status.ToString() << " SKIPPING......"; - } - count++; - } + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( + output_names, subgraph_node_ids, max_batch_size, + max_workspace_size_bytes, static_graph_properties, &graph)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index e1596e89e22981..154ad3f2e8fb0a 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -28,11 +28,6 @@ namespace tensorflow { namespace tensorrt { namespace convert { -// This method converts an already generated calibration graph which was used in -// calibration runs to an inference graph -tensorflow::Status ConvertCalibGraphToInferGraph( - const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def); - // max_batch_size: maximum batch size which can be used for inference for // optimization targets inference run with max batch size. // max_workspace_size_bytes: The upper bound of memory allowence for @@ -40,8 +35,7 @@ tensorflow::Status ConvertCalibGraphToInferGraph( tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode, int minimum_segment_size); + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 75a3c3d034dff3..9ee717dd7fb1ef 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -24,10 +24,6 @@ limitations under the License. #include #include -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" -#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" -#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" -#include "tensorflow/core/framework/node_def.pb.h" // NOLINT #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT #include "tensorflow/core/framework/types.h" @@ -36,7 +32,6 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/tensor_coding.h" @@ -44,6 +39,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorrt/include/NvInfer.h" // Check if the types are equal. Cast to int first so that failure log message @@ -53,7 +49,6 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { -using ::tensorflow::strings::StrCat; namespace { @@ -70,8 +65,7 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, *trt_dtype = nvinfer1::DataType::kHALF; break; default: - return tensorflow::errors::InvalidArgument( - "Unsupported data type " + tensorflow::DataTypeString(tf_dtype)); + return tensorflow::errors::InvalidArgument("Unsupported data type"); } return tensorflow::Status::OK(); } @@ -118,18 +112,6 @@ static std::vector> CreateSamePadding( return padding; } -string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { - size_t last_scope_separator = 0; - for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { - if (op_name_a[i] != op_name_b[i]) { - break; - } else if (op_name_a[i] == '/') { - last_scope_separator = i + 1; - } - } - return op_name_a.substr(0, last_scope_separator); -} - class TRT_ShapedWeights { public: TRT_ShapedWeights(tensorflow::DataType type, const void* values, @@ -262,11 +244,6 @@ std::vector TFAttrs::get>(string key) const { return std::vector(attr.begin(), attr.end()); } -template <> -std::vector TFAttrs::get>(string key) const { - auto attr = this->at(key)->list().s(); - return std::vector(attr.begin(), attr.end()); -} template <> nvinfer1::Dims TFAttrs::get(string key) const { auto values = this->get>(key); @@ -289,17 +266,6 @@ tensorflow::DataType TFAttrs::get(string key) const { return this->at(key)->type(); } -template <> -float TFAttrs::get(string key) const { - return this->at(key)->f(); -} - -template <> -bool TFAttrs::get(string key) const { - return this->at(key)->b(); -} - -// TODO(jie): reorder4 & reorder2 should be merged? template void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, nvinfer1::DimsNCHW istrides, T* odata, @@ -317,87 +283,29 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, } } -template -void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, - T* odata, nvinfer1::DimsHW ostrides) { - for (int h = 0; h < shape.h(); ++h) { - for (int w = 0; w < shape.w(); ++w) { - odata[h * ostrides.h() + w * ostrides.w()] = - idata[h * ostrides.h() + w * ostrides.w()]; - } - } -} - -// TODO(jie): fallback to tensorflow!! -void ReorderCKtoKC(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights) { - int c = iweights.shape_.d[0]; - int k = iweights.shape_.d[1]; - oweights->shape_.d[0] = k; - oweights->shape_.d[1] = c; - nvinfer1::DimsHW istrides = {1, k}; - nvinfer1::DimsHW ostrides = {c, 1}; - switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: { - Reorder2({k, c}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); - break; - } - case tensorflow::DataType::DT_HALF: { - Reorder2( - {k, c}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); - break; - } - default: - LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got " - << DataTypeString(iweights.type_); - } -} - void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights, int num_groups) { + TRT_ShapedWeights* oweights) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; - // TRT requires GKcRS, while TF depthwise has RSCK - // where c=1, C=G - VLOG(2) << "num_groups: " << num_groups; - int c = iweights.shape_.d[2] / num_groups; - VLOG(2) << "c" << iweights.shape_.d[2] << " then " << c; - int k = iweights.shape_.d[3] * num_groups; - VLOG(2) << "k" << iweights.shape_.d[3] << " then " << k; - oweights->shape_.d[0] = k / num_groups; - oweights->shape_.d[1] = c * num_groups; + int c = iweights.shape_.d[2]; + int k = iweights.shape_.d[3]; + oweights->shape_.d[0] = k; + oweights->shape_.d[1] = c; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: { + case tensorflow::DataType::DT_FLOAT: Reorder4({k, c, r, s}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; - } - case tensorflow::DataType::DT_HALF: { - Reorder4( - {k, c, r, s}, static_cast(iweights.GetValues()), - istrides, - static_cast(const_cast(oweights->GetValues())), - ostrides); - break; - } - default: - LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got " - << DataTypeString(iweights.type_); + LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; } } @@ -415,11 +323,12 @@ inline std::shared_ptr infer_object(T* obj) { return std::shared_ptr(obj, InferDeleter()); } +// Logger for GIE info/warning/errors class Converter; using OpConverter = std::function&, + std::vector const&, std::vector*)>; class Converter { @@ -427,57 +336,34 @@ class Converter { std::unordered_map op_registry_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; - tensorflow::tensorrt::TRTWeightStore* weight_store_; - bool fp16_; + void register_op_converters(); + std::vector get_inputs( const tensorflow::NodeDef& node_def) { std::vector inputs; - for (auto const& input_name : node_def.input()) { - /************************************************************************* - * TODO(jie) handle case 1) here - * Normalizes the inputs and extracts associated metadata: - * 1) Inputs can contain a colon followed by a suffix of characters. - * That suffix may be a single number (e.g. inputName:1) or several - * word characters separated from a number by a colon - * (e.g. inputName:foo:1). The - * latter case is used to denote inputs and outputs of functions. - * 2) Control dependency inputs contain caret at the beginning and we - * remove this and annotate the edge as a control dependency. - ************************************************************************/ - string name = input_name[0] == '^' ? input_name.substr(1) : input_name; - auto first = name.find_first_of(':'); - if (first != string::npos && first + 2 == name.size() && - name[first + 1] == '0') - name.erase(first); - - VLOG(2) << "retrieve input: " << name; - if (trt_tensors_.count(name)) { - inputs.push_back(trt_tensors_.at(name)); - } else { - LOG(FATAL) << "input: " << name << " not availabled for node at, " - << node_def.name(); - } + for (const auto& input_name : node_def.input()) { + VLOG(2) << "Retrieve input: " << input_name; + inputs.push_back(trt_tensors_.at(input_name)); } return inputs; } public: - explicit Converter(nvinfer1::INetworkDefinition* trt_network, - tensorflow::tensorrt::TRTWeightStore* ws, bool fp16) - : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { + explicit Converter(nvinfer1::INetworkDefinition* trt_network) + : trt_network_(trt_network) { this->register_op_converters(); } - tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; } + TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error - weight_store_->store_.push_back(std::vector(weights.size_bytes())); - weights.SetValues(weight_store_->store_.back().data()); + temp_bufs_.push_back(std::vector(weights.size_bytes())); + weights.SetValues(temp_bufs_.back().data()); return weights; } - bool isFP16() { return fp16_; }; + TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -496,7 +382,7 @@ class Converter { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); - if (i != 0) output_name = StrCat(output_name, ":", i); + if (i != 0) output_name = output_name + ":" + std::to_string(i); if (output.is_tensor()) { output.tensor()->setName(output_name.c_str()); } @@ -562,7 +448,7 @@ struct LambdaFactory { switch (op) { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; - return [](T t) -> T { return 1.0 / sqrt(t); }; + return [](T t) -> T { return 1.0 / std::sqrt(t); }; } case OP_CATEGORY::NEG: return [](T t) -> T { return -t; }; @@ -648,22 +534,6 @@ struct LambdaFactory { } }; -template <> -std::function LambdaFactory::unary() { - switch (op) { - case OP_CATEGORY::RSQRT: { - VLOG(2) << "RSQRT GETS DONE"; - return [](Eigen::half t) -> Eigen::half { - return Eigen::half(1.0 / sqrt(float(t))); - }; - } - case OP_CATEGORY::NEG: - return [](Eigen::half t) -> Eigen::half { return -t; }; - default: - VLOG(2) << "Not supported op for unary: " << static_cast(op); - return nullptr; - } -} tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights, LambdaFactory unary_op) { @@ -675,14 +545,6 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, std::transform(inp, inp + iweights.count(), oup, unary_op.unary()); break; } - case tensorflow::DataType::DT_HALF: { - auto inp = static_cast(iweights.GetValues()); - auto oup = - static_cast(const_cast(oweights->GetValues())); - std::transform(inp, inp + iweights.count(), oup, - unary_op.unary()); - break; - } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -726,32 +588,6 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, } break; } - case tensorflow::DataType::DT_HALF: { - auto inp_l = static_cast(iweights_l.GetValues()); - auto inp_r = static_cast(iweights_r.GetValues()); - auto oup = - static_cast(const_cast(oweights->GetValues())); - - if (iweights_l.count() != iweights_r.count()) { - // We only supports broadcast of RankZero - if (iweights_l.count() == 1) { - VLOG(2) << "I bet it is not working!" << (*inp_l); - std::transform(inp_r, inp_r + iweights_r.count(), oup, - binary_op.broadcast_l(*inp_l)); - } else if (iweights_r.count() == 1) { - VLOG(2) << "I bet it is not working!" << (*inp_r); - std::transform(inp_l, inp_l + iweights_l.count(), oup, - binary_op.broadcast_r(*inp_r)); - } else { - return tensorflow::errors::Unimplemented( - "Binary op with non-rankZero broadcast not supported"); - } - } else { - std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, - binary_op.binary()); - } - break; - } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -763,7 +599,7 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, tensorflow::Status ConstantFoldUnary( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input = inputs.at(0).weights(); @@ -777,12 +613,13 @@ tensorflow::Status ConstantFoldUnary( CHECK_EQ(weights_input.type_, TFAttrs(node_def).get("T")); + // Maybe I should do a switch LambdaFactory unary_op; if (node_def.op() == "Rsqrt") { // Compute rsqrt unary_op.op = LambdaFactory::OP_CATEGORY::RSQRT; auto ret = UnaryCompute(weights_input, &weights_output, unary_op); - // Pass the output + // PAss the output if (ret == tensorflow::Status::OK()) { outputs->push_back(TRT_TensorOrWeights(weights_output)); } @@ -794,11 +631,11 @@ tensorflow::Status ConstantFoldUnary( } // TODO(jie,ben) broadcast is needed yet not implemented -// Let's get the simple stuff working first. Maybe we should fall back to TF +// Let's get the simple stuff working first. Maybe we should fall bakc to TF // approach for constant folding tensorflow::Status ConstantFoldBinary( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input_l = inputs.at(0).weights(); TRT_ShapedWeights weights_input_r = inputs.at(1).weights(); @@ -811,12 +648,12 @@ tensorflow::Status ConstantFoldBinary( "Binary op implicit broadcast not supported: " + node_def.op()); // TODO(jie): constant fold should really fall back to TF. - int num_dims = weights_input_l.shape_.nbDims; + int nb_dims = weights_input_l.shape_.nbDims; nvinfer1::Dims output_shape; - output_shape.nbDims = num_dims; - VLOG(2) << "nb_dims: " << num_dims + output_shape.nbDims = nb_dims; + VLOG(2) << "nb_dims: " << nb_dims << ", the other: " << weights_input_r.shape_.nbDims; - for (int i = 0; i < num_dims; i++) { + for (int i = 0; i < nb_dims; i++) { if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) { output_shape.d[i] = weights_input_l.shape_.d[i]; } else if (weights_input_l.shape_.d[i] == 1 || @@ -841,6 +678,7 @@ tensorflow::Status ConstantFoldBinary( // Allocate output weights TRT_ShapedWeights weights_output = ctx.get_temp_weights(dtype, output_shape); + // Maybe I should do a switch LambdaFactory binary_op; if (node_def.op() == "Sub") { binary_op.op = LambdaFactory::OP_CATEGORY::SUB; @@ -874,90 +712,48 @@ tensorflow::Status BinaryTensorOpWeight( // Maybe this part has to be moved into the block of rsqrt later // Check type consistency + auto dtype = TFAttrs(node_def).get("T"); + CHECK_EQ_TYPE(tensor->getType(), dtype); // Cast to int for error messages nvinfer1::DataType ttype; TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); + CHECK_EQ_TYPE(ttype, dtype); // Cast to int for error message // Check scale mode auto dims_w = weights.shape_; auto dims_t = tensor->getDimensions(); - // default to element-wise + // Default to channel-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - // TODO(jie): maybe use a permuatation instead to support more cases; - bool permutation_flag = false; - if (weights.count() == 1) { VLOG(2) << "UNIFORM"; scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { - // no broadcasting on Batch dimension; - VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims - << " tensor DIM: " << dims_t.nbDims; - if (dims_w.nbDims == dims_t.nbDims + 1) { - if (dims_w.d[0] == 1) { - for (int i = 1; i < dims_w.nbDims; i++) { - dims_w.d[i - 1] = dims_w.d[i]; - } - dims_w.nbDims--; - } else { - return tensorflow::errors::InvalidArgument( - "Binary op cannot operate on batch, " + node_def.name()); - } - } + // No broadcasting on Batch dimension; + assert(dims_w.d[0] == 1); - if (dims_w.nbDims == dims_t.nbDims && dims_w.d[0] == dims_t.d[0]) { - scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; - // default is element; - for (int i = 1; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != dims_t.d[i]) { - // if dimension does not match, switch back to channel; - VLOG(2) << "channel"; - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - break; - } - } - // if channel as candidate, validate it - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { - for (int i = 1; i < dims_w.nbDims; i++) { - if (dims_w.d[i] != 1) - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); - } - } else { - VLOG(2) << "elementwise"; + // Broadcasting on Channel dimension only allowed in kUNIFORM + assert(dims_w.d[1] == dims_t.d[0]); + assert(dims_w.nbDims == dims_t.nbDims); + + // Default is element; + for (int i = 2; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != dims_t.d[i - 1]) { + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + break; } - } else if (dims_w.nbDims == 1 && - dims_w.d[0] == dims_t.d[dims_t.nbDims - 1]) { - // channel wise and broadcast required; - permutation_flag = true; - scale_mode = nvinfer1::ScaleMode::kCHANNEL; - } else { - return tensorflow::errors::InvalidArgument( - "Weight shape not compatible at, " + node_def.name()); } - } - - // transpose last dimension - std::vector permutation(dims_t.nbDims + 1); - if (permutation_flag) { - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { - // we swap the last dimension into channel for trt. - // because of tensorflow default broadcasting rules. - for (int i = 0; i < static_cast(permutation.size()); i++) { - permutation[i] = i; + if (scale_mode == nvinfer1::ScaleMode::kELEMENTWISE) { + scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + for (int i = 2; i < dims_w.nbDims; i++) { + if (dims_w.d[i] != 1) + return tensorflow::errors::InvalidArgument( + "Weight shape not compatible at, " + node_def.name()); } - permutation[1] = dims_t.nbDims; - permutation[dims_t.nbDims] = 1; - tensor = ctx.TransposeTensor(const_cast(tensor), - permutation); - } else { - return tensorflow::errors::InvalidArgument( - "Transpose cannot be applied, " + node_def.name()); } } - // prepare weights + // Prepare weights TRT_ShapedWeights shift_weights(weights.type_); TRT_ShapedWeights scale_weights(weights.type_); TRT_ShapedWeights power_weights(weights.type_); @@ -983,26 +779,88 @@ tensorflow::Status BinaryTensorOpWeight( scale_weights, power_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); - // transpose back dimension - if (permutation_flag) { - output_tensor = ctx.TransposeTensor(output_tensor, permutation); - } // Pass the output outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } -enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; +tensorflow::Status BinaryTensorOpTensor( + Converter& ctx, const tensorflow::NodeDef& node_def, + const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, + std::vector* outputs) { + static const std::unordered_map ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + // {"max", nvinfer1::ElementWiseOperation::kMAX}, + // {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; + + // FIXME assume type matches input weights + // Get trt type & shape + TFAttrs attrs(node_def); + // Maybe this part has to be moved into the block of rsqrt later + nvinfer1::DataType dtype = attrs.get("T"); + + // Check type consistency + CHECK_EQ_TYPE(tensor_l->getType(), dtype); + CHECK_EQ_TYPE(tensor_r->getType(), dtype); + auto op_pair = ops.find(node_def.op()); + if (op_pair == ops.end()) + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); + + nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( + *const_cast(tensor_l), + *const_cast(tensor_r), op_pair->second); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + // Pass the output + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} -tensorflow::Status ConvertConv2DHelper( +tensorflow::Status ConvertPlaceholder( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs, - int group // group ==0 specifies depthwise conv -) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + std::vector const& inputs, + std::vector* outputs) { + VLOG(2) << "Placeholder should have been replace already"; + return tensorflow::errors::Unimplemented(", cannot convert Placeholder op"); + // OK this make sense since we are supposed to replace it with input + TFAttrs attrs(node_def); + nvinfer1::DataType dtype = attrs.get("dtype"); + nvinfer1::Dims dims = attrs.get("shape"); + + dims.nbDims--; + for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; + + nvinfer1::ITensor* output = + ctx.network()->addInput(node_def.name().c_str(), dtype, dims); + if (!output) { + return tensorflow::errors::InvalidArgument("Failed to create Input layer"); + } + outputs->push_back(TRT_TensorOrWeights(output)); + return tensorflow::Status::OK(); +} +tensorflow::Status ConvertConv2D(Converter& ctx, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, + std::vector* outputs) { + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + // TODO(jie): handle NHWC/NCHW transpose; + TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); + ReorderRSCKToKCRS(weights_rsck, &weights); + TRT_ShapedWeights biases(weights.type_); + int noutput = weights.shape_.d[0]; + nvinfer1::DimsHW kernel_size; + kernel_size.h() = weights.shape_.d[2]; + kernel_size.w() = weights.shape_.d[3]; TFAttrs attrs(node_def); int h_index = 2; @@ -1016,31 +874,11 @@ tensorflow::Status ConvertConv2DHelper( // TODO(jie): transpose it } - // tensor after transpose (NCHW) - auto tensor_dim = tensor->getDimensions(); - - int num_groups = group; - if (num_groups == 0) // depthwise convolution - num_groups = tensor_dim.d[0]; - VLOG(2) << "groups count: " << num_groups; - - TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); - TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0] * num_groups; - nvinfer1::DimsHW kernel_size; - kernel_size.h() = weights.shape_.d[2]; - kernel_size.w() = weights.shape_.d[3]; - VLOG(2) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); - // TODO(jie): stride. (NHWC/NCHW) auto tf_stride = attrs.get>("strides"); - VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index; - VLOG(2) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] - << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); + auto tensor_dim = tensor->getDimensions(); std::vector> padding; // TODO(jie): padding. if (attrs.get("padding") == "SAME") { @@ -1081,11 +919,10 @@ tensorflow::Status ConvertConv2DHelper( layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); - layer->setNbGroups(num_groups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << ", " + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { @@ -1098,101 +935,11 @@ tensorflow::Status ConvertConv2DHelper( return tensorflow::Status::OK(); } -tensorflow::Status ConvertConv2DHelper( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs, ConvolutionType type) { - switch (type) { - case ConvolutionType::DEFAULT: - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); - case ConvolutionType::DEPTHWISE_CONV: - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); - } - return tensorflow::errors::Unimplemented("unsupported convolution type at, " + - node_def.name()); -} - -tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, const tensorflow::NodeDef& node_def, - const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, - std::vector* outputs) { - static const std::unordered_map ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; - - // FIXME assume type matches input weights - // get trt type & shape - TFAttrs attrs(node_def); - // maybe this part has to be moved into the block of rsqrt later - nvinfer1::DataType dtype = attrs.get("T"); - - // check type consistency - CHECK_EQ_TYPE(tensor_l->getType(), dtype); - CHECK_EQ_TYPE(tensor_r->getType(), dtype); - auto op_pair = ops.find(node_def.op()); - if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + - " not supported at: " + - node_def.name()); - - nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( - *const_cast(tensor_l), - *const_cast(tensor_r), op_pair->second); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - // pass the output - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertPlaceholder( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - VLOG(2) << "Placeholder should have been replace already"; - return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); - // OK this make sense since we are supposed to replace it with input - TFAttrs attrs(node_def); - nvinfer1::DataType dtype = attrs.get("dtype"); - nvinfer1::Dims dims = attrs.get("shape"); - - dims.nbDims--; - for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; - - nvinfer1::ITensor* output = - ctx.network()->addInput(node_def.name().c_str(), dtype, dims); - if (!output) { - return tensorflow::errors::InvalidArgument("Failed to create Input layer"); - } - outputs->push_back(TRT_TensorOrWeights(output)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertConv2D(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, - ConvolutionType::DEFAULT); -} - -tensorflow::Status ConvertConv2DDepthwise( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - return ConvertConv2DHelper(ctx, node_def, inputs, outputs, - ConvolutionType::DEPTHWISE_CONV); -} - tensorflow::Status ConvertPool(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); int h_index = 2; @@ -1210,8 +957,6 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): support other pooling type if (node_def.op() == "MaxPool") type = nvinfer1::PoolingType::kMAX; - else if (node_def.op() == "AvgPool") - type = nvinfer1::PoolingType::kAVERAGE; else return tensorflow::errors::Unimplemented("Only supports Max pool"); @@ -1274,9 +1019,9 @@ tensorflow::Status ConvertPool(Converter& ctx, tensorflow::Status ConvertActivation( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); nvinfer1::IActivationLayer* layer = ctx.network()->addActivation( *const_cast(tensor), nvinfer1::ActivationType::kRELU); nvinfer1::ITensor* output_tensor = layer->getOutput(0); @@ -1286,14 +1031,14 @@ tensorflow::Status ConvertActivation( tensorflow::Status ConvertScale(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) return tensorflow::errors::Unimplemented( "Only supports tensor op weight for now, at " + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); @@ -1310,26 +1055,9 @@ tensorflow::Status ConvertScale(Converter& ctx, } else { VLOG(2) << "NCHW !!!!"; } - - auto dims = tensor->getDimensions(); - VLOG(2) << "tensor dimensions: " << dims.nbDims; - for (int i = 0; i < dims.nbDims; i++) { - VLOG(2) << "i: " << dims.d[i]; - } - dims = weights.shape_; - VLOG(2) << "tensor dimensions: " << dims.nbDims; - for (int i = 0; i < dims.nbDims; i++) { - VLOG(2) << "i: " << dims.d[i]; - } - - nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; - if (weights.shape_.d[0] == 1) { - mode = nvinfer1::ScaleMode::kUNIFORM; - } - - nvinfer1::IScaleLayer* layer = - ctx.network()->addScale(*const_cast(tensor), mode, - weights, empty_weights, empty_weights); + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, + weights, empty_weights, empty_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); if (data_format == "NHWC") { @@ -1344,7 +1072,7 @@ tensorflow::Status ConvertScale(Converter& ctx, tensorflow::Status ConvertConst(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { const auto& weights_tensor = node_def.attr().at("value").tensor(); @@ -1363,144 +1091,20 @@ tensorflow::Status ConvertConst(Converter& ctx, VLOG(2) << "SCALAR!!!" << node_def.name(); nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { - VLOG(2) << "dimensions: " << tensor.dims(); - VLOG(2) << "size: " << weights_tensor.float_val_size(); - scalar_shape = GetTensorShape(tensor); - for (int i = 0; i < scalar_shape.nbDims; i++) - VLOG(2) << scalar_shape.d[i]; - if (GetShapeSize(scalar_shape) != weights_tensor.float_val_size()) { - if (weights_tensor.float_val_size() == 1 || - scalar_shape.d[0] == weights_tensor.float_val_size()) { - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.float_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - } - } - } else { VLOG(2) << "Dimensions: " << tensor.dims(); - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.float_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { - scalar_shape.d[i] = 0; - scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; - } - } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - auto half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - half_tensor.device(defd) = - tensor.flat().template cast(); - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.float_val().begin(), - weights_tensor.float_val() - .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); - } - } else if (!weights_tensor.int_val().empty()) { - VLOG(2) << "int!!!" << node_def.name(); - nvinfer1::Dims scalar_shape; - if (tensor.dims() > 0) { - VLOG(2) << "dimensions: " << tensor.dims(); - scalar_shape = GetTensorShape(tensor); - if (GetShapeSize(scalar_shape) != weights_tensor.int_val_size()) { - if (weights_tensor.int_val_size() == 1 || - scalar_shape.d[0] == weights_tensor.int_val_size()) { - scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.int_val_size(); - scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; - } else { - LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" - << " kUNIFORM, at: " << node_def.name(); - } - } + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + GetTensorShape(tensor)); } else { - VLOG(2) << "dimensions: " << tensor.dims(); + VLOG(2) << "Dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - // no dimension provided. flatten it - scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.d[0] = 1; scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - } - if (ctx.isFP16()) { - auto dtype_new = tensorflow::DataType::DT_HALF; - size_t len_data = tensorflow::DataTypeSize(dtype_new); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); - TTypes::Flat half_tensor = temp_tensor.flat(); - Eigen::DefaultDevice defd; - switch (dtype) { - case (tensorflow::DT_INT32): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT16): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_INT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - case (tensorflow::DT_UINT8): { - half_tensor.device(defd) = - tensor.flat().template cast(); - break; - } - default: - return tensorflow::errors::InvalidArgument( - "Datatype " + tensorflow::DataTypeString(dtype) + - " for FP16 conversion"); - break; - }; - memcpy(dst, half_tensor.data(), len_data); // store into weight store - weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); - } else { - size_t len_data = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) - len_data *= scalar_shape.d[i]; - size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); - len_data = std::max(len_data, len_tensor); - ctx.weight_store()->store_.push_back(std::vector(len_data)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.int_val().begin(), - weights_tensor.int_val() - .end()); // make a local copy first to flatten - // doesn't have to be contigous - memcpy(dst, tensor_data.data(), len_tensor); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + scalar_shape); } } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); @@ -1526,7 +1130,7 @@ tensorflow::Status ConvertConst(Converter& ctx, tensorflow::Status ConvertIdentity( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { outputs->push_back(inputs.at(0)); return tensorflow::Status::OK(); @@ -1534,7 +1138,7 @@ tensorflow::Status ConvertIdentity( tensorflow::Status ConvertBinary(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2) return tensorflow::errors::FailedPrecondition( @@ -1561,7 +1165,7 @@ tensorflow::Status ConvertBinary(Converter& ctx, tensorflow::Status ConvertUnary(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 1) return tensorflow::errors::FailedPrecondition( @@ -1579,7 +1183,7 @@ tensorflow::Status ConvertUnary(Converter& ctx, tensorflow::Status ConvertReduce(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1587,7 +1191,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1625,7 +1229,6 @@ tensorflow::Status ConvertReduce(Converter& ctx, return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" + node_def.name()); if (index_list_data[i] == 1) permuted_index = 1; - idx_set.emplace(index_list_data[i]); } @@ -1633,7 +1236,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nb_dims; i++) { - if (idx_set.count(i) == 0) { + if (idx_set.count(i)) { permuted_index = i; break; } @@ -1668,13 +1271,12 @@ tensorflow::Status ConvertReduce(Converter& ctx, output_tensor = ctx.TransposeTensor( const_cast(output_tensor), permutation_order); } - outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } tensorflow::Status ConvertPad(Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, + std::vector const& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1682,7 +1284,7 @@ tensorflow::Status ConvertPad(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1769,287 +1371,19 @@ tensorflow::Status ConvertPad(Converter& ctx, return tensorflow::Status::OK(); } -tensorflow::Status ConvertConcat(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - // not including the last input (axis) here - int input_size = static_cast(inputs.size()) - 1; - - if (!inputs.at(0).is_tensor()) - return tensorflow::errors::InvalidArgument( - "Concat in TRT support only Tensor input, at " + node_def.name()); - - // We are retrieving the axis - TRT_ShapedWeights axis = inputs.at(input_size).weights(); - - TFAttrs attrs(node_def); - // auto attr_size = attrs.at("N")->i(); - // auto data_type = attrs.get("T"); - auto index_type = attrs.get("Tidx"); - - // TODO(jie): handle data type - // Only expect to handle INT32 as index attributes for now - if (index_type != tensorflow::DataType::DT_INT32) - return tensorflow::errors::Unimplemented( - "Tidx supports only DT_INT32, at " + node_def.name()); - - int index = *(static_cast(const_cast(axis.GetValues()))); - - // TODO(jie): early termination with no-op (attr_size==1) - - auto dim = inputs.at(0).tensor()->getDimensions(); - // dimension check - if (index > dim.nbDims + 1) - return tensorflow::errors::InvalidArgument( - "Concatenate on axis out of dimension range, at " + node_def.name()); - - if (index == 0) - return tensorflow::errors::InvalidArgument( - "Concatenate on batch dimension not supported, at " + node_def.name()); - - // incase we need permutation; - std::vector permutation_order(dim.nbDims + 1); - - for (int i = 0; i < dim.nbDims + 1; i++) permutation_order[i] = i; - - if (index != 1) { - permutation_order[1] = index - 1; - permutation_order[index - 1] = 1; - } - - std::vector inputs_vec; - // Shap chack (all input tensor should have same shape) - // starting from 0 since we are probably also doing transpose here; - for (int i = 0; i < input_size; i++) { - auto tensor_i = inputs.at(i).tensor(); - auto dim_i = tensor_i->getDimensions(); - if (dim_i.nbDims != dim.nbDims) - return tensorflow::errors::InvalidArgument( - "Concatenate receives inputs with inconsistent dimensions, at " + - node_def.name()); - - for (int j = 0; j < dim.nbDims; j++) { - // check dimension consistency on non-concatenate axis - if (j != index - 1 && dim_i.d[j] != dim.d[j]) - return tensorflow::errors::InvalidArgument( - "Concatenate receives inputs with inconsistent shape, at" + - node_def.name()); - } - - // TRT does concatenation only on channel! - if (index != 1) - tensor_i = ctx.TransposeTensor(const_cast(tensor_i), - permutation_order); - - inputs_vec.push_back(tensor_i); - } - - // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( - const_cast(inputs_vec.data()), - inputs_vec.size()); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - if (index != 1) { - output_tensor = ctx.TransposeTensor(output_tensor, permutation_order); - } - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertFusedBatchNorm( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - TFAttrs attrs(node_def); - float epsilon = attrs.get("epsilon"); - auto data_format = attrs.get("data_format"); - if (data_format != "NCHW") { - return tensorflow::errors::Unimplemented( - "only data_format=NCHW is supported, at " + node_def.name()); - } - bool is_training = attrs.get("is_training"); - if (is_training) { - return tensorflow::errors::Unimplemented( - "only is_training=false is supported, at " + node_def.name()); - } - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - TRT_ShapedWeights scale_weights = inputs.at(1).weights(); - TRT_ShapedWeights offset_weights = inputs.at(2).weights(); - TRT_ShapedWeights mean_weights = inputs.at(3).weights(); - TRT_ShapedWeights variance_weights = inputs.at(4).weights(); - TRT_ShapedWeights dummy_power_weights(scale_weights.type_); - TRT_ShapedWeights combined_scale_weights = - ctx.get_temp_weights_like(scale_weights); - TRT_ShapedWeights combined_offset_weights = - ctx.get_temp_weights_like(offset_weights); - size_t nweight = scale_weights.count(); - if ((scale_weights.type_ == offset_weights.type_) && - (mean_weights.type_ == variance_weights.type_) && - (scale_weights.type_ == variance_weights.type_)) { - if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && - (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { - return tensorflow::errors::Unimplemented( - "only float32 or float16 weight data type is supported, for node " + - node_def.name() + " got " + - tensorflow::DataTypeString(scale_weights.type_)); - } - if (scale_weights.type_ == tensorflow::DT_FLOAT) { - for (size_t i = 0; i < nweight; ++i) { - float scale = (static_cast(scale_weights.GetValues()))[i]; - float offset = - (static_cast(offset_weights.GetValues()))[i]; - float mean = (static_cast(mean_weights.GetValues()))[i]; - float variance = - (static_cast(variance_weights.GetValues()))[i]; - float& combined_scale_ref = const_cast( - static_cast(combined_scale_weights.GetValues()))[i]; - float& combined_offset_ref = const_cast( - static_cast(combined_offset_weights.GetValues()))[i]; - combined_scale_ref = scale / sqrtf(variance + epsilon); - combined_offset_ref = offset - mean * combined_scale_ref; - } - } else { - const Eigen::half* scale_vals = - (static_cast(scale_weights.GetValues())); - const Eigen::half* off_vals = - (static_cast(offset_weights.GetValues())); - const Eigen::half* mean_vals = - (static_cast(mean_weights.GetValues())); - const Eigen::half* variance_vals = - (static_cast(variance_weights.GetValues())); - Eigen::half* comb_scale_vals = const_cast( - static_cast(combined_scale_weights.GetValues())); - Eigen::half* comb_off_vals = const_cast( - static_cast(combined_offset_weights.GetValues())); - for (size_t i = 0; i < nweight; ++i) { - float scale(scale_vals[i]); - float offset(off_vals[i]); - float mean(mean_vals[i]); - float variance(variance_vals[i]); - float combined_scale_ref = scale / sqrtf(variance + epsilon); - comb_scale_vals[i] = Eigen::half(combined_scale_ref); - float combined_offset_ref = offset - mean * combined_scale_ref; - comb_off_vals[i] = Eigen::half(combined_offset_ref); - } - } - } - nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - combined_offset_weights.GetWeightsForTRT(), - combined_scale_weights.GetWeightsForTRT(), - dummy_power_weights.GetWeightsForTRT()); - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertMatMul(Converter& ctx, - const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - - // TODO(jie): transpose! - TFAttrs attrs(node_def); - - TRT_ShapedWeights weights_ck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); - ReorderCKtoKC(weights_ck, &weights); - TRT_ShapedWeights biases(weights.type_); - - int noutput = weights.shape_.d[0]; - - nvinfer1::IFullyConnectedLayer* layer = ctx.network()->addFullyConnected( - *const_cast(tensor), noutput, weights, biases); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertReshape( - Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector& inputs, - std::vector* outputs) { - if (inputs.size() != 2 || !inputs.at(0).is_tensor() || - !inputs.at(1).is_weights()) - return tensorflow::errors::InvalidArgument( - "Input expects tensor and weights, at" + node_def.name()); - - // implement tensor binaryOp weight [channel wise] for now; - const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - auto dims = tensor->getDimensions(); - // restore implicit batch dimension - - TRT_ShapedWeights shape = inputs.at(1).weights(); - - TFAttrs attrs(node_def); - - auto padding_type = attrs.get("Tshape"); - - if (shape.shape_.nbDims != 1) - return tensorflow::errors::InvalidArgument( - "reshape new shape is not 1 dimensional, at " + node_def.name()); - - // Only expect to handle INT32 as attributes for now - if (padding_type != tensorflow::DataType::DT_INT32) - return tensorflow::errors::Unimplemented( - "reshape new shape supports only DT_INT32, at " + node_def.name()); - - auto shape_data = static_cast(const_cast(shape.GetValues())); - - if (shape_data[0] != -1) - return tensorflow::errors::InvalidArgument( - "reshape new shape first dimension is not -1, at " + node_def.name()); - - auto shape_num_dims = shape.shape_.d[0]; - VLOG(2) << "shape dimensions: " << shape_num_dims; - int volume_w = 1; - for (int i = 1; i < shape.shape_.d[0]; i++) volume_w *= shape_data[i]; - - int volume_t = 1; - for (int i = 0; i < dims.nbDims; i++) volume_t *= dims.d[i]; - - VLOG(2) << "volume: " << volume_t << " volume weights: " << volume_w; - if (volume_w != volume_t) - return tensorflow::errors::InvalidArgument( - "volume does not agree between tensor and new shape, at " + - node_def.name()); - - nvinfer1::IShuffleLayer* layer = - ctx.network()->addShuffle(*const_cast(tensor)); - - nvinfer1::Dims reshape_dims; - VLOG(2) << "new dimension: " << shape_num_dims - 1; - reshape_dims.nbDims = shape_num_dims - 1; - for (int32_t i = 0; i < reshape_dims.nbDims; ++i) { - reshape_dims.d[i] = shape_data[i + 1]; - } - layer->setReshapeDimensions(reshape_dims); - VLOG(2) << "new dimension: " << shape_num_dims - 1; - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - auto dims_output = output_tensor->getDimensions(); - VLOG(2) << "output tensor dimension:" << dims_output.nbDims; - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - void Converter::register_op_converters() { // vgg_16 slim implementation op_registry_["Placeholder"] = ConvertPlaceholder; op_registry_["Conv2D"] = ConvertConv2D; - op_registry_["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; op_registry_["Relu"] = ConvertActivation; op_registry_["MaxPool"] = ConvertPool; - op_registry_["AvgPool"] = ConvertPool; // This could be really handled as ConvertBinary op_registry_["BiasAdd"] = ConvertScale; op_registry_["Const"] = ConvertConst; + // op_registry_["MatMul"] = ConvertFullyConnected; // Not used in vgg // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed + // op_registry_["AvgPool"] = ConvertPool; // resnet_50_v1 slim implementation op_registry_["Add"] = ConvertBinary; @@ -2059,364 +1393,26 @@ void Converter::register_op_converters() { op_registry_["Mean"] = ConvertReduce; op_registry_["Pad"] = ConvertPad; // TODO(ben,jie): Add more ops - - op_registry_["ConcatV2"] = ConvertConcat; - op_registry_["MatMul"] = ConvertMatMul; - op_registry_["Reshape"] = ConvertReshape; - op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; - op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; } } // namespace -tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { - return tensorflow::errors::Unimplemented("Not implemented yet"); -} -tensorflow::Status ConvertCalibrationNodeToEngineNode( - tensorflow::Graph& graph, tensorflow::Node* c_node) { - const auto ndef = c_node->def(); - - TFAttrs attrs(ndef); - std::vector segment_nodes( - attrs.get>("segment_nodes")); - std::vector output_nodes( - attrs.get>("segment_output_names")); - std::vector input_names( - attrs.get>("input_names")); - string res_name = attrs.get("resource_name"); - VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; - string engine_name = "my_trt_op"; - { - const auto node_id = tensorflow::str_util::Split(res_name, "_"); - engine_name += node_id.back(); - } - std::map node_maps; - - for (auto n : graph.op_nodes()) { - node_maps.insert({n->name(), n}); - } - VLOG(1) << "Output Nodes:"; - std::vector out_types; - std::vector out_edges; - for (auto& i : output_nodes) { - auto node_port = tensorflow::str_util::Split(i, ":"); - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto out_node_name = node_port.at(0); - if (node_port.size() > 1) { - VLOG(1) << "Multi port output" << node_port.at(0) << " " - << node_port.at(1) << " size=" << node_port.size(); - } - auto node_it = node_maps.find(out_node_name); - if (node_it != node_maps.end()) { - tensorflow::Node* out_node = node_it->second; - int port = 0; - if (node_port.size() == 2) { - port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); - out_types.push_back(out_node->output_type(port)); - } else { - out_types.push_back(out_node->output_type(0)); - } - for (auto out_edge : out_node->out_edges()) { - if (out_edge->src_output() == port) { - out_edges.push_back(out_edge); - break; - } - } - } else { - LOG(WARNING) << " couldn't find output node " << out_node_name; - } - } - VLOG(1) << "Input Nodes:"; - for (auto& i : input_names) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - } - auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); - auto resmgr = trt_rm->getManager("TRTCalibOps"); - tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; - auto status = resmgr->Lookup(res_name, res_name, &calib_res); - if (!status.ok() || !calib_res->calibrator_) { - return tensorflow::errors::FailedPrecondition( - "You must run calibration" - " and inference conversion in the same proces"); - } - - calib_res->calibrator_->setDone(); - calib_res->thr_->join(); - delete calib_res->thr_; - if (!calib_res->engine_) { - LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " - "calibration graph?"; - } - auto weight_rmgr = trt_rm->getManager("WeightStore"); - TF_CHECK_OK(weight_rmgr->Delete( - res_name, res_name)); - auto engine_plan = calib_res->engine_->serialize(); - calib_res->engine_->destroy(); - calib_res->network_->destroy(); - calib_res->builder_->destroy(); - calib_res->thr_ = nullptr; - calib_res->engine_ = nullptr; - calib_res->builder_ = nullptr; - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); - std::vector income_edges; - for (const auto in_edge : c_node->in_edges()) { - auto src = in_edge->src(); - int dest_port = in_edge->dst_input(); - income_edges.emplace_back(src->name(), in_edge->src_output(), - c_node->input_type(dest_port)); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); - tensorflow::NodeDef engine_node; - const char* engine_plan_data = static_cast(engine_plan->data()); - string engine_plan_string(engine_plan_data, - engine_plan_data + engine_plan->size()); - status = op_builder.Attr("serialized_engine", engine_plan_string) - .Attr("input_nodes", input_names) - .Attr("output_nodes", output_nodes) - .Attr("OutT", out_types) - .Finalize(&engine_node); - if (!status.ok()) { - LOG(ERROR) << "Engine Node creation failed"; - return status; - } - auto trt_engine_node = graph.AddNode(engine_node, &status); - TF_CHECK_OK(status); - for (size_t i = 0; i < out_edges.size(); i++) { - VLOG(1) << "Connecting trt_engine_node output " << i << " with " - << out_edges.at(i)->dst()->name() << " port " - << out_edges.at(i)->dst_input(); - TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, - out_edges.at(i)->dst(), - out_edges.at(i)->dst_input())); - } - VLOG(1) << "Segment nodes:"; - for (auto& i : segment_nodes) { - VLOG(1) << " " << i << " in graph " << node_maps.count(i); - auto it = node_maps.find(i); - if (it != node_maps.end()) { - graph.RemoveNode(it->second); - } - } - graph.RemoveNode(c_node); - return tensorflow::Status::OK(); -} - -tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { - // Visit nodes in reverse topological order and construct the TRT network. - - // Toposort - std::vector order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); - // Select just the subgraph - std::list order; - for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { - order.push_front(node); // we want topological order to contstruct the - // network layer by layer - } - } - // topological order is needed to build TRT network - static int static_id = 0; - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - // TODO(sami,ben,jie): proper naming! - string calib_op_name = - StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); - string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); - static_id++; - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); - auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); - TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); - op_res->logger_ = new tensorflow::tensorrt::Logger(); - op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); - - if (!op_res->builder_) { - return tensorflow::errors::Internal( - "failed to create TensorRT builder object"); - } - - op_res->network_ = op_res->builder_->createNetwork(); - if (!op_res->network_) { - return tensorflow::errors::Internal( - "failed to create TensorRT network object"); - } - - // Build the network - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); - std::vector input_names; - std::vector input_dtypes; - for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input. Node id= " << input.first; - int node_id = input.first; - int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - auto node_name = node->name(); - input_names.push_back(node_name); // insert original node name without port - // TODO(jie): alternative :) - if (!s.graph_properties.HasOutputProperties(node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - node_name); - - auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); - if (static_cast(op_info_vec.size()) < output_idx) - return tensorflow::errors::Internal( - "accessing output index of: ", output_idx, ", at node: ", node_name, - "with output entry from shape_map: ", op_info_vec.size()); - - auto op_info = op_info_vec.at(output_idx); - - tensorflow::DataType tf_dtype = op_info.dtype(); - input_dtypes.push_back(tf_dtype); - - nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - - VLOG(2) << "accessing output index of: " << output_idx - << ", at node: " << node_name - << "with output entry from shape_map: " << op_info_vec.size(); - - // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_psuedo_chw; - for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - - for (int i = 1; i < op_info.shape().dim_size(); i++) { - VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); - input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); - } - - // TODO(ben,jie): proper way to restore input tensor name? - auto input_tensor_name = node_name; - if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx); - - nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); - - if (!input_tensor) - return tensorflow::errors::InvalidArgument( - "Failed to create Input layer"); - VLOG(2) << "input tensor name :" << input_tensor_name; - - if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) - return tensorflow::errors::AlreadyExists( - "output tensor already exists for op: " + input_tensor_name); - } - - VLOG(2) << "finished sorting"; - - for (const tensorflow::Node* node : order) { - const tensorflow::NodeDef& node_def = node->def(); - VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); - TF_RETURN_IF_ERROR(converter.convert_node(node_def)); - } - - VLOG(2) << "finished conversion"; - - // Gather output metadata - std::vector output_names; - std::vector output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair& output : s.output_inds) { - int node_id = output.first; - int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); - string op_name = node->name(); - string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - VLOG(1) << "output tensor name: " << tensor_name; - output_names.push_back(tensor_name); - auto tensor_or_weights = converter.get_tensor(tensor_name); - if (!tensor_or_weights.is_tensor()) { - return tensorflow::errors::InvalidArgument( - "Output node is weights not tensor"); - } - nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); - if (!tensor) { - return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); - } - converter.network()->markOutput(*tensor); - tensorflow::DataType tf_dtype = node->output_type(output_idx); - output_dtypes.push_back(tf_dtype); - nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; - TF_RETURN_IF_ERROR(ConvertDType(tf_dtype, &trt_dtype)); - tensor->setType(trt_dtype); - } - - VLOG(2) << "finished output"; - - // Build the engine - op_res->builder_->setMaxBatchSize(s.max_batch_size); - op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); - - // Build the TRT op - // TODO(sami,ben,jie): proper naming! - tensorflow::NodeDefBuilder op_builder(calib_op_name, "TRTCalibOp"); - std::vector income_edges; - for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) - auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( - input_names.at(i), output_idx, input_dtypes.at(i)); - VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) - << ":" << output_idx - << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); - income_edges.push_back(incoming_edge); - } - tensorflow::gtl::ArraySlice input_list( - income_edges); - op_builder.Input(input_list); - std::vector segment_names; - segment_names.reserve(s.subgraph_node_ids.size()); - for (int i : s.subgraph_node_ids) { - auto node = s.graph.FindNodeId(i); - segment_names.push_back(node->name()); - } - LOG(INFO) << "finished op preparation"; - - auto status = op_builder.Attr("segment_nodes", segment_names) - .Attr("input_names", input_names) - .Attr("segment_output_names", output_names) - .Attr("resource_name", calib_op_name) - .Finalize(s.trt_node); - - LOG(INFO) << status.ToString(); - LOG(INFO) << "finished op building"; - - return tensorflow::Status::OK(); -} tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - tensorrt::convert::SubGraphParams& s) { + const tensorflow::Graph& graph, const std::set& subgraph_node_ids, + const std::vector>& input_inds, + const std::vector>& output_inds, size_t max_batch_size, + size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. // Toposort std::vector order_vec; - tensorflow::GetPostOrder(s.graph, &order_vec); + tensorflow::GetPostOrder(graph, &order_vec); // Select just the subgraph std::list order; for (tensorflow::Node* node : order_vec) { - if (s.subgraph_node_ids.count(node->id())) { + if (subgraph_node_ids.count(node->id())) { // We want topological order to contstruct the // network layer by layer order.push_front(node); @@ -2438,86 +1434,46 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( "Failed to create TensorRT network object"); } - string subgraph_name_scope; - if (!order.empty()) { - subgraph_name_scope = order.front()->name(); - } - for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); - } - static int static_id = 0; - // TODO(sami,ben,jie): proper naming! - string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); - engine_name = StrCat(engine_name, static_id++); - auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); - auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::tensorrt::TRTWeightStore(); - TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); - // Build the network - Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE); + Converter converter(trt_network.get()); std::vector input_names; std::vector input_dtypes; - for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input!!!!!"; + for (std::pair const& input : input_inds) { int node_id = input.first; int output_idx = input.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); + tensorflow::Node* node = graph.FindNodeId(node_id); auto node_name = node->name(); - // input_names should use the node name in the graph - // here it should be the input tensor name -> matching the binding - // insert original node name without port - auto tensor_name = node_name; - if (output_idx != 0) { - tensor_name = StrCat(tensor_name, ":", output_idx); - } - - VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name - << " idx: " << output_idx; - - auto shape_inference_node_name = node_name; - auto shape_inference_output_idx = output_idx; - // rewire the shape inference to original node in the graph - if (s.output_edge_map->count(tensor_name)) { - shape_inference_node_name = s.output_edge_map->at(tensor_name).second; - shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; - } - if (shape_inference_output_idx < 0) continue; - VLOG(2) << "shapeinference name: " << shape_inference_node_name - << " idx: " << shape_inference_output_idx; - - if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) - return tensorflow::errors::Internal("failed to find input node: " + - shape_inference_node_name); + input_names.push_back(node_name); // Insert original node name without port + // TODO(jie): alternative :) + if (!graph_properties.HasOutputProperties(node_name)) + return tensorflow::errors::Internal("Failed to find input node: " + + node_name); - auto op_info_vec = - s.graph_properties.GetOutputProperties(shape_inference_node_name); - if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) + auto op_info_vec = graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( - "accessing output index of: ", shape_inference_output_idx, - ", at node: ", shape_inference_node_name, - " with output entry from shape_map: ", op_info_vec.size()); + "Accessing output index of: " + std::to_string(output_idx) + + ", at node: " + node_name + " with output entry from shape_map: " + + std::to_string(op_info_vec.size())); + + auto op_info = op_info_vec.at(output_idx); - auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - VLOG(2) << "Accessing output index of: " << output_idx + VLOG(2) << "Accessing output index of: " << std::to_string(output_idx) << ", at node: " << node_name - << " with output entry from shape_map: " << op_info_vec.size(); + << " with output entry from shape_map: " + << std::to_string(op_info_vec.size()); + // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - // TODO(jie): TRT 3.x only support 4 dimensional input tensor. - // update the code once TRT 4.0 comes out. - if (op_info.shape().dim_size() != 4) - return tensorflow::errors::Unimplemented("require 4 dimensional input"); - for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); @@ -2526,11 +1482,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) { - input_tensor_name = StrCat(node_name, ":", output_idx); - } + if (output_idx != 0) + input_tensor_name = node_name + ":" + std::to_string(output_idx); - input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -2557,22 +1511,14 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Gather output metadata std::vector output_names; std::vector output_dtypes; - int trt_engine_op_output_idx = 0; - for (const std::pair& output : s.output_inds) { + for (std::pair const& output : output_inds) { int node_id = output.first; int output_idx = output.second; - tensorflow::Node* node = s.graph.FindNodeId(node_id); + tensorflow::Node* node = graph.FindNodeId(node_id); string op_name = node->name(); string tensor_name = op_name; - - s.output_edge_map->insert( - {trt_engine_op_output_idx == 0 - ? engine_name - : StrCat(engine_name, ":", trt_engine_op_output_idx), - {output_idx, tensor_name}}); - trt_engine_op_output_idx++; if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); + tensor_name = tensor_name + ":" + std::to_string(output_idx); VLOG(2) << "Output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -2594,25 +1540,19 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } VLOG(2) << "Finished output"; + // TODO(jie): static_id is not thread safe. + static int static_id = 0; // Build the engine - trt_builder->setMaxBatchSize(s.max_batch_size); - trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); - VLOG(0) << "Max batch size= " << s.max_batch_size - << " max workspace size= " << s.max_workspace_size_bytes; - if (s.precision_mode == FP16MODE) { - trt_builder->setHalf2Mode(true); - VLOG(0) << "Using FP16 precision mode"; - } - LOG(INFO) << "starting build engine"; + trt_builder->setMaxBatchSize(max_batch_size); + trt_builder->setMaxWorkspaceSize(max_workspace_size_bytes); + VLOG(0) << "Starting build engine " << static_id; + // TODO(ben,jie): half2 and int8 mode support string engine_plan_string; { auto trt_engine = infer_object(trt_builder->buildCudaEngine(*converter.network())); VLOG(0) << "Built network"; - if (trt_engine.get() == nullptr) { - return tensorflow::errors::Internal("Engine building failure"); - } auto engine_plan = infer_object(trt_engine->serialize()); VLOG(0) << "Serialized engine"; const char* engine_plan_data = @@ -2620,19 +1560,18 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - TF_RETURN_IF_ERROR(weight_rmgr->Delete( - engine_name, engine_name)); - LOG(INFO) << "finished engine " << engine_name; + + VLOG(0) << "Finished engine"; // Build the TRT op - tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); + // TODO(sami,ben,jie): proper naming! + tensorflow::NodeDefBuilder op_builder( + tensorflow::strings::StrCat("my_trt_op", static_id++), "TRTEngineOp"); std::vector income_edges; - VLOG(2) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { - VLOG(2) << "input edges: " << i << " " << input_names.at(i); - int output_idx = s.input_inds.at(i).second; - // we wired up the input here already, it is redundant to do it again in - // ConvertSubGraphToTensorRT(convert_graph.cc) + int output_idx = input_inds.at(i).second; + // We wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( input_names.at(i), output_idx, input_dtypes.at(i)); income_edges.push_back(incoming_edge); @@ -2647,7 +1586,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) - .Finalize(s.trt_node); + .Finalize(trt_node); VLOG(0) << status.ToString() << " finished op building"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 954a1e72f86043..2e7fd19566e1ed 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -17,8 +17,6 @@ limitations under the License. #define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_ #include -#include -#include #include #include @@ -34,49 +32,16 @@ namespace tensorflow { namespace tensorrt { namespace convert { -const int FP32MODE = 0; -const int FP16MODE = 1; -const int INT8MODE = 2; +tensorflow::Status ConvertSubGraphToTensorRTNodeDef( + const tensorflow::Graph& graph, const std::set& subgraph_node_ids, + const std::vector>& + input_inds, // {node_id, output_idx} + const std::vector>& + output_inds, // {node_id, output_idx} + size_t max_batch_size, size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_prop, + tensorflow::NodeDef* trt_node); -struct SubGraphParams { - SubGraphParams( - tensorflow::Graph& inp_graph, - const std::set& subgraph_node_id_numbers, - const std::vector>& input_indices, - const std::vector>& output_indices, - size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& current_graph_properties, - std::unordered_map>* output_edges, - tensorflow::NodeDef* constructed_trt_node, - int engine_precision_mode = FP32MODE) - : graph(inp_graph), - subgraph_node_ids(subgraph_node_id_numbers), - input_inds(input_indices), - output_inds(output_indices), - max_batch_size(max_supported_batch_size), - max_workspace_size_bytes(max_consumed_workspace_size_bytes), - graph_properties(current_graph_properties), - output_edge_map(output_edges), - trt_node(constructed_trt_node), - precision_mode(engine_precision_mode) {} - - tensorflow::Graph& graph; - const std::set& subgraph_node_ids; - const std::vector>& input_inds; // {node_id, output_idx} - const std::vector>& output_inds; // {node_id, output_idx} - size_t max_batch_size; - size_t max_workspace_size_bytes; - const tensorflow::grappler::GraphProperties& graph_properties; - std::unordered_map>* output_edge_map; - tensorflow::NodeDef* trt_node; - const int precision_mode; -}; - -// TODO(sami): Replace references with const reference or pointers -tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); -tensorflow::Status InjectCalibrationNode(SubGraphParams& params); -tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, - tensorflow::Node* c_node); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index aea44fd8a2fcc4..1dcb87e7683ad7 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -21,11 +21,10 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" -#include "tensorflow/core/platform/stream_executor.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda/include/cuda_runtime_api.h" +#include "cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -114,13 +113,7 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { ctx->set_output(i, t); } VLOG(2) << "Filled map for sending"; - // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files - const cudaStream_t* stream = CHECK_NOTNULL( - reinterpret_cast(ctx->op_device_context() - ->stream() - ->implementation() - ->CudaStreamMemberHack())); - calib_res->calibrator_->setBatch(input_data, *stream); + calib_res->calibrator_->setBatch(input_data); VLOG(2) << "Passed calibration data"; // TODO(aaroey): make sure we wait for the completion of calibration on the // last batch in future PR. diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index b32371b642f38b..8efdf63ebebc4d 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,12 +24,8 @@ limitations under the License. #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { -static ::tensorflow::tensorrt::Logger logger; -namespace gpu = ::perftools::gputools; -using IRuntime = nvinfer1::IRuntime; -using Dims = nvinfer1::Dims; - namespace tensorrt { +static ::tensorflow::tensorrt::Logger logger; TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // read serialized_engine @@ -44,21 +40,10 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same - // gpu where the input/output is also located. - int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; - cudaSetDevice(gpu_id); - int device; - cudaGetDevice(&device); - if (gpu_id != device) LOG(FATAL) << "set device failed!"; - - // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager - - IRuntime* infer = nvinfer1::createInferRuntime(logger); + nvinfer1::IRuntime* infer = nvinfer1::createInferRuntime(logger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); + trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); // Runtime is safe to delete after engine creation infer->destroy(); @@ -70,6 +55,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { size_t binding_index; int num_batch = 0; + bool valid = true; for (int i = 0; i < context->num_inputs(); i++) { // Grab the input tensor binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str()); @@ -78,12 +64,8 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { num_batch = input_shape.dim_size(0); - if (num_batch > trt_engine_ptr_->getMaxBatchSize()) { - LOG(FATAL) << "input tensor batch larger than max_batch_size: " - << trt_engine_ptr_->getMaxBatchSize(); - } } else if (num_batch != input_shape.dim_size(0)) { - LOG(FATAL) << "input data inconsistent batch size"; + valid = false; break; } switch (trt_engine_ptr_->getBindingDataType(binding_index)) { @@ -99,6 +81,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } + // Might want a different way to inform the user of batch size inconsistency + if (!valid) LOG(WARNING) << "input data inconsistent batch size"; + for (int i = 0; i < static_cast(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. // Create an output tensor @@ -141,11 +126,9 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->implementation() ->CudaStreamMemberHack())); - // TODO(jie): trt enqueue does not return error - auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], - *stream, nullptr); - VLOG(2) << "enqueue returns: " << ret; - // sync should be done by TF. + // execution handled by TF since we are getting stream from TF. + // it is safe for CPU pointer array (buffers) to go out of scope after enqueue + trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], *stream, nullptr); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index dda0dc9e712eb7..7add8cb8b3d2a0 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,19 +27,19 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(2) << name_ << " " << msg; + VLOG(2) << msg; break; } case Severity::kWARNING: { - LOG(WARNING) << name_ << " " << msg; + LOG(WARNING) << msg; break; } case Severity::kERROR: { - LOG(ERROR) << name_ << " " << msg; + LOG(ERROR) << msg; break; } case Severity::kINTERNAL_ERROR: { - LOG(FATAL) << name_ << " " << msg; + LOG(FATAL) << msg; break; } // This is useless for now. But would catch it in future if enum changes. It diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index 7f3544f8cfda8d..d71f66b933a806 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -27,11 +27,9 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { - public: - Logger(string name = "DefaultLogger") : name_(name){}; + private: void log(nvinfer1::ILogger::Severity severity, const char* msg) override; - private: string name_; }; diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 0b2321b5fc7bcb..7e050a768ce97a 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -20,6 +20,5 @@ # pylint: disable=unused-import,line-too-long from tensorflow.contrib.tensorrt.python.ops import trt_engine_op -from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph # pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 666220d78c7696..9454862f857ab7 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -20,17 +20,11 @@ # pylint: disable=unused-import,line-too-long import six as _six -from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 -from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl -from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops -from tensorflow.python.grappler import tf_optimizer -from tensorflow.python.util import compat -# pylint: enable=unused-import,line-too-long # TODO(skama): get outputs from session when implemented as c++ @@ -38,33 +32,22 @@ def create_inference_graph(input_graph_def, outputs, max_batch_size=1, - max_workspace_size_bytes=2 << 20, - precision_mode="FP32", - minimum_segment_size=3): + max_workspace_size_bytes=2 << 20): """Python wrapper for the TRT transormation. + Args: input_graph_def: GraphDef object containing a model to be transformed. - outputs: list of tensors or node names for the model outputs. + outputs: List of tensors or node names for the model outputs. max_batch_size: max size for the input batch max_workspace_size_bytes: parameter to control memory allocation (in Bytes) - precision_mode: one of 'FP32', 'FP16' and 'INT8' - minimum_segment_size: the minimum number of nodes required for a subgraph to - be replaced by TRTEngineOp. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. Raises: - ValueError: if the provided precision mode is invalid. RuntimeError: if the returned status message is malformed. """ - supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2} - if precision_mode.upper() not in supported_precision_modes: - raise ValueError(("precision mode '{}' is not supported." - "It should be one of {}").format( - precision_mode, "{'FP32', 'FP16', 'INT8'}")) - mode = supported_precision_modes[precision_mode.upper()] def py2bytes(inp): return inp @@ -100,7 +83,7 @@ def py3string(inp): # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes, mode, minimum_segment_size) + max_workspace_size_bytes) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory @@ -118,46 +101,3 @@ def py3string(inp): output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string # Save some memory return output_graph_def - - -def calib_graph_to_infer_graph(calibration_graph_def): - """Convert an existing calibration graph to inference graph. - - Args: - calibration_graph_def: the calibration GraphDef object with calibration data - Returns: - New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. - Raises: - RuntimeError: if the returned status message is malformed. - """ - - def py2string(inp): - return inp - - def py3string(inp): - return inp.decode("utf-8") - - if _six.PY2: - to_string = py2string - else: - to_string = py3string - - graph_str = calibration_graph_def.SerializeToString() - out = calib_convert(graph_str) - status = to_string(out[0]) - output_graph_def_string = out[1] - del graph_str # Save some memory - if len(status) < 2: - raise _impl.UnknownError(None, None, status) - if status[:2] != "OK": - msg = status.split(";") - if len(msg) == 1: - raise RuntimeError("Status message is malformed {}".format(status)) - # pylint: disable=protected-access - raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), - int(msg[0])) - # pylint: enable=protected-access - output_graph_def = graph_pb2.GraphDef() - output_graph_def.ParseFromString(output_graph_def_string) - del output_graph_def_string # Save some memory - return output_graph_def diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 74df75902ed4ba..3d5cc76c4256be 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -23,7 +23,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda/include/cuda_runtime_api.h" +#include "cuda_runtime_api.h" namespace tensorflow { namespace tensorrt { @@ -38,18 +38,22 @@ TRTInt8Calibrator::TRTInt8Calibrator( done_(false), dev_buffers_(dev_buffers), calib_running_(false), - batch_is_set_(false), engine_name_(engine_name) {} -bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, - const cudaStream_t stream) { - tensorflow::mutex_lock lock(cond_mtx_); - while ((calib_running_ || batch_is_set_) && - !done_) { // wait while calibration is running - cond_.wait(lock); - } +bool TRTInt8Calibrator::setBatch( + const std::unordered_map& data) { + // TODO(aaroey): make sure that in future PR: + // 1. the mutex_lock is outside of the loop + // 2. wait() is used instead of wait_for() + // 3. done_ is to be protected by the mutex + // 4. the first batch is not missed if (done_) return false; - CHECK(!calib_running_ && !batch_is_set_); + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } VLOG(1) << "Set Batch Waiting finished"; for (const auto it : data) { auto devptr = dev_buffers_.find(it.first); @@ -61,32 +65,27 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. - // TODO(sami,aaroey): Need to figureout a way to ensure synchronization - // between stream, perhaps using a tensor? - auto status = cudaMemcpyAsync(d.first, it.second, d.second, - cudaMemcpyDeviceToDevice, stream); + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); if (status != cudaSuccess) { LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first << "' failed with " << status; } } - - // TODO(Sami, aaorey): Find an alternative way! - cudaStreamSynchronize( - stream); // we have to wait for the stream before returning! - batch_is_set_ = true; + calib_running_.store(true, std::memory_order_release); // release builder cond_.notify_all(); return true; } bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, int num_bindings) { - tensorflow::mutex_lock lock(cond_mtx_); - calib_running_ = false; + calib_running_.store(false, std::memory_order_release); // wait for new batch cond_.notify_all(); - while ((!batch_is_set_ && !done_)) { // wait until new batch arrives - cond_.wait(lock); - + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; } if (done_) { return false; @@ -101,8 +100,6 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, bindings[i] = it->second.first; } - batch_is_set_ = false; - calib_running_ = true; return true; } @@ -110,12 +107,6 @@ const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { return nullptr; } -void TRTInt8Calibrator::setDone() { - tensorflow::mutex_lock lock(cond_mtx_); - done_ = true; - cond_.notify_all(); -} - void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, std::size_t length) {} TRTInt8Calibrator::~TRTInt8Calibrator() { @@ -124,6 +115,5 @@ TRTInt8Calibrator::~TRTInt8Calibrator() { } // namespace tensorrt } // namespace tensorflow - #endif #endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index d77aa2c5ab1847..8830f7efe75b42 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -24,10 +24,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT - -#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" - namespace tensorflow { namespace tensorrt { // This class provides a 1 element queue to match TFs push model to @@ -42,9 +39,8 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { int getBatchSize() const override; bool getBatch(void* bindings[], const char* names[], int num_bindings) override; - bool setBatch(const std::unordered_map& data, - const cudaStream_t stream); - void setDone(); + bool setBatch(const std::unordered_map& data); + void setDone() { done_ = true; } const void* readCalibrationCache(std::size_t& length) override; void writeCalibrationCache(const void* ptr, std::size_t length) override; ~TRTInt8Calibrator(); @@ -59,14 +55,11 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { const std::unordered_map> dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with // buffer names - bool calib_running_; - bool batch_is_set_; + std::atomic_bool calib_running_; string engine_name_; }; - } // namespace tensorrt } // namespace tensorflow - +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ #endif #endif -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 0b661bd536c7c7..c78f6f222457a8 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -60,7 +60,6 @@ def get_simple_graph_def(): def run_graph(gdef, dumm_inp): - """Run given graphdef once.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() @@ -75,65 +74,15 @@ def run_graph(gdef, dumm_inp): return val -# Use real data that is representatitive of the inference dataset -# for calibration. For this test script it is random data. -def run_calibration(gdef, dumm_inp): - """Run given calibration graph multiple times.""" - gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) - ops.reset_default_graph() - g = ops.Graph() - with g.as_default(): - inp, out = importer.import_graph_def( - graph_def=gdef, return_elements=["input", "output"]) - inp = inp.outputs[0] - out = out.outputs[0] - with csess.Session( - config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: - # run over real calibration data here, we are mimicking a calibration set of - # 30 different batches. Use as much calibration data as you want - for _ in range(30): - val = sess.run(out, {inp: dumm_inp}) - return val - - if "__main__" in __name__: inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) - orig_graph = get_simple_graph_def() # use a frozen graph for inference + gdef = get_simple_graph_def() # Get optimized graph - trt_graph = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - o1 = run_graph(orig_graph, dummy_input) + trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0]) + o1 = run_graph(gdef, dummy_input) o2 = run_graph(trt_graph, dummy_input) o3 = run_graph(trt_graph, dummy_input) assert np.array_equal(o1, o2) assert np.array_equal(o3, o2) # sanity check - fp16_graph = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - int8_calib_gdef = trt.create_inference_graph( - input_graph_def=orig_graph, - outputs=["output"], - max_batch_size=inp_dims[0], - max_workspace_size_bytes=1 << 25, - precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8" - minimum_segment_size=2 # minimum number of nodes in an engine - ) - o4 = run_graph(fp16_graph, dummy_input) - _ = run_calibration(int8_calib_gdef, dummy_input) - int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef) - o5 = run_graph(int8_graph, dummy_input) - assert np.allclose(o1, o4) - assert np.allclose(o1, o5) print("Pass") diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 46480e99a113af..d679945d569c17 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -64,17 +64,13 @@ PyObject* pair_helper(std::pair* in) { %ignoreall %unignore tensorflow; %unignore trt_convert; -%unignore calib_convert; %{ - std::pair trt_convert( string graph_def_string, // The serialized GraphDef string. std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, - int precision_mode, - int minimum_segment_size + size_t max_workspace_size_bytes // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -94,64 +90,16 @@ std::pair trt_convert( return std::pair{out_status, ""}; } - if(precision_mode < 0 || precision_mode > 2){ - out_status = "InvalidArgument;Invalid precision_mode"; - return std::pair{out_status, ""}; - } if (!output_names.size()) { out_status = "InvalidArgument;Size of the output_names vector is 0"; return std::pair{out_status, ""}; + // return ""; } tensorflow::GraphDef outGraph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph, precision_mode, minimum_segment_size); - if (!conversion_status.ok()) { - auto retCode = (int)conversion_status.code(); - char buff[2000]; - snprintf(buff, 2000, "%d;%s", retCode, - conversion_status.error_message().c_str()); - out_status = buff; - return std::pair{out_status, ""}; - } - string result; - if (!outGraph.SerializeToString(&result)) { - out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; - return std::pair{out_status, ""}; - } - out_status = "OK;All good!"; - return std::pair{out_status, result}; -#else - // Returns FAILED_PRECONDITION. - return std::pair{"9;TensorRT is not enabled!", ""}; -#endif // GOOGLE_CUDA && GOOGLE_TENSORRT -} - -std::pair calib_convert(string graph_def_string // const tensorflow::GraphDef& - // unfortunately we can't use TF_Status here since it - // is in c/c_api and brings in a lot of other libraries - // which in turn declare ops. These ops are included - // statically in our library and cause an abort when - // module is loaded due to double registration - // until Tensorflow properly exposes these headers - // we have to work around this by returning a string - // and converting it to exception on python side. - //,TF_Status* out_status) { -) { -#if GOOGLE_CUDA && GOOGLE_TENSORRT - string out_status; - - tensorflow::GraphDef graph_def; - if (!graph_def.ParseFromString(graph_def_string)) { - out_status = "InvalidArgument;Couldn't interpret input as a GraphDef"; - return std::pair{out_status, ""}; - } - - tensorflow::GraphDef outGraph; - tensorflow::Status conversion_status = - tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, - &outGraph); + &outGraph); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -174,13 +122,10 @@ std::pair calib_convert(string graph_def_string // const tenso } %} -std::pair calib_convert(string graph_def_string); - std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, - size_t max_workspace_size_bytes, - int precision_mode, int minimum_segment_size); + size_t max_workspace_size_bytes); %unignoreall diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index 70bf67c7793ba9..bb86ecb2209f9b 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -25,10 +25,7 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", # b/67513579 - ], + tags = ["notsan"], # b/67513579 deps = [ ":predict", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 64f5cd83575e1e..ed3ed4c0e1731d 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -156,7 +156,9 @@ py_test( "head_test.py", ], srcs_version = "PY2AND3", - tags = ["no_pip_gpu"], # b/63391119 + tags = [ + "no_pip_gpu", # b/63391119 + ], deps = [ ":feature_keys", ":head", @@ -425,7 +427,6 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 - "no_windows", # TODO: needs investigation on Windows ], deps = [ ":feature_keys", diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index 07df7bc9a5cd45..c86d06e9236962 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -40,7 +40,6 @@ py_test( timeout = "long", # Moderate but for asan srcs = ["state_space_model_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":state_space_model", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index f9d433a45b1263..ed930e44e8c0cc 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -225,7 +225,6 @@ tf_py_test( "//tensorflow/python:framework", "//tensorflow/python:layers", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) tf_py_test( diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py index dca01d26f4c629..f4283cd9ed6eb1 100644 --- a/tensorflow/contrib/util/loader.py +++ b/tensorflow/contrib/util/loader.py @@ -42,10 +42,9 @@ def load_op_library(path): plugin. """ if os.name == 'nt': - # To avoid making every user_ops aware of windows, re-write - # the file extension from .so to .dll if .so file doesn't exist. - if not os.path.exists(path): - path = re.sub(r'\.so$', '.dll', path) + # To avoid makeing every user_ops aware of windows, re-write + # the file extension from .so to .dll. + path = re.sub(r'\.so$', '.dll', path) # Currently we have only some user_ops as dlls on windows - don't try # to load them if the dll is not found. diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 5c9fd2f406de20..352e183104983e 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3327,10 +3327,6 @@ tf_cc_test( size = "small", srcs = ["common_runtime/function_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), - tags = [ - "manual", - "no_oss", - ], deps = [ ":core", ":core_cpu", diff --git a/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt deleted file mode 100644 index 9fabe7863e4bf8..00000000000000 --- a/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt +++ /dev/null @@ -1,18 +0,0 @@ -op { - graph_op_name: "SlideDataset" - in_arg { - name: "window_size" - description: <contents()); - input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX); // Unlimited // Pre-parse into local storage, then delegate to device. if (!meta_.ParseFromCodedStream(&input) || !input.ConsumedEntireMessage()) { @@ -217,7 +217,7 @@ bool TensorResponse::ParseTensorSubmessage( bool TensorResponse::ParseFast(Source* source) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX); // Unlimited while (true) { auto p = input.ReadTagWithCutoff(127); int tag = GetTagFieldNumber(p.first); diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index cfe23d1ffe47cc..beaf0adbc5e097 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -474,11 +474,11 @@ class GraphDatasetBase : public DatasetBase { } // Key for storing the Dataset graph in the serialized format. - TF_EXPORT static const char kDatasetGraphKey[]; + static const char kDatasetGraphKey[]; // Key for storing the output node of the Dataset graph in the serialized // format. - TF_EXPORT static const char kDatasetGraphOutputNodeKey[]; + static const char kDatasetGraphOutputNodeKey[]; private: Status Serialize(OpKernelContext* ctx, string* serialized_graph_def, diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 1507b6eae26596..02038c5d77d010 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -2492,10 +2492,10 @@ class MklLayoutRewritePass : public GraphOptimizationPass { mkl_op_registry::GetMklOpName(csinfo_.identity), CopyAttrsDataType, AlwaysRewrite}); rinfo_.push_back({csinfo_.lrn, mkl_op_registry::GetMklOpName(csinfo_.lrn), - CopyAttrsLRN, LrnRewrite}); + CopyAttrsLRN, AlwaysRewrite}); rinfo_.push_back({csinfo_.lrn_grad, mkl_op_registry::GetMklOpName(csinfo_.lrn_grad), - CopyAttrsLRN, LrnRewrite}); + CopyAttrsLRN, AlwaysRewrite}); rinfo_.push_back({csinfo_.max_pool, mkl_op_registry::GetMklOpName(csinfo_.max_pool), CopyAttrsPooling, NonDepthBatchWisePoolRewrite}); @@ -2865,28 +2865,6 @@ class MklLayoutRewritePass : public GraphOptimizationPass { return false; } - // If the depth_radius of LRN is not 2, then MKL DNN takes unoptimized - // path. The unoptimized path is slow. Thus we dont rewrite the node - // and use default Eigen. But for depth_radius=2, MKL DNN optimized - // path is taken, i.e., eigen node is rewritten by MKl DNN node. - static bool LrnRewrite(const Node* n) { - CHECK_NOTNULL(n); - - int depth_radius; - CHECK_EQ(GetNodeAttr(n->def(), "depth_radius", &depth_radius).ok(), true); - - // if the depth_radius of LRN is not 2, don't rewrite the node by MKL DNN - // and use eigen node instead - if (depth_radius == 2) { - return true; - } - VLOG(1) << "LrnRewrite: The model sets depth_radius as not 2 which" - << "case is not optimized by Intel MKL, thus using Eigen op" - << "for LRN " ; - - return false; - } - static bool AddNRewrite(const Node* n) { CHECK_NOTNULL(n); @@ -3550,13 +3528,11 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, string data_format; string padding; std::vector strides; - std::vector dilations; bool use_cudnn_on_gpu; // Get all attributes from old node. TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); TF_CHECK_OK( @@ -3565,7 +3541,6 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, // Add attributes to new node. nb->Attr("T", T); nb->Attr("strides", strides); - nb->Attr("dilations", dilations); nb->Attr("padding", padding); nb->Attr("data_format", data_format); nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); @@ -3803,14 +3778,12 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, DataType T_pred, T_succ; string padding; std::vector strides; - std::vector dilations; string data_format_pred, data_format_succ; bool use_cudnn_on_gnu; TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides)); - TF_CHECK_OK(GetNodeAttr(pred->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 8f13c4a7022165..244653504d1899 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -25,7 +25,6 @@ limitations under the License. #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -409,7 +408,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { frame_children_[frame_ids[0]].insert(frame_ids[1]); frame_parent_[frame_ids.back()] = frame_ids[frame_ids.size() - 2]; } - if (frame_ids.size() >= 1) { + if (!frame_ids.empty()) { frame_children_.insert(std::make_pair(frame_ids.back(), empty_set_)); if (node->op() == "LoopCond") { if (loop_cond_.count(frame_ids.back())) { @@ -428,7 +427,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { } for (auto it = frame_children_.begin(); it != frame_children_.end(); ++it) { - if (it->second.size() == 0) { + if (it->second.empty()) { worklist.push_back(it->first); } } @@ -441,7 +440,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { if (parent_it != frame_parent_.end()) { int parent_id = parent_it->second; frame_children_[parent_id].erase(frame_id); - if (frame_children_[parent_id].size() == 0) { + if (frame_children_[parent_id].empty()) { worklist.push_back(parent_id); } } @@ -465,6 +464,7 @@ Status LoopOptimizer::LoopInvariantNodeMotion() { Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { TF_RETURN_IF_ERROR(RemoveStackOps(item.graph, optimized_graph)); + optimized_graph_ = optimized_graph; // Set up helper data structures. diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 2e39f25fc15c3f..48d5955ad15ad4 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5157,6 +5157,7 @@ tf_kernel_library( ], hdrs = [ "meta_support.h", + "quantization_utils.h", "reference_gemm.h", ], deps = [ @@ -5231,7 +5232,6 @@ tf_cc_test( name = "quantization_utils_test", srcs = ["quantization_utils_test.cc"], deps = [ - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5294,7 +5294,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5356,7 +5355,6 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5379,7 +5377,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5444,7 +5441,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5465,7 +5461,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5505,7 +5500,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5562,7 +5556,6 @@ tf_cc_test( ":math", ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/cc:cc_ops", "//tensorflow/cc:client_session", @@ -5585,7 +5578,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:framework", @@ -5622,7 +5614,6 @@ tf_cc_test( deps = [ ":ops_testutil", ":ops_util", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", @@ -5644,7 +5635,6 @@ tf_cc_test( deps = [ ":batch_norm_op", ":ops_testutil", - ":quantization_utils", ":quantized_ops", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu_internal", diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 01754ec21acd21..484d4f88d611c8 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -113,19 +113,6 @@ tf_kernel_library( ], ) -tf_kernel_library( - name = "slide_dataset_op", - srcs = ["slide_dataset_op.cc"], - deps = [ - ":dataset", - "//tensorflow/core:dataset_ops_op_lib", - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/kernels:batch_util", - ], -) - tf_kernel_library( name = "padded_batch_dataset_op", srcs = ["padded_batch_dataset_op.cc"], @@ -551,7 +538,6 @@ tf_kernel_library( ":scan_dataset_op", ":shuffle_dataset_op", ":skip_dataset_op", - ":slide_dataset_op", ":sparse_tensor_slice_dataset_op", ":sql_dataset_ops", ":stats_aggregator_ops", diff --git a/tensorflow/core/kernels/data/slide_dataset_op.cc b/tensorflow/core/kernels/data/slide_dataset_op.cc deleted file mode 100644 index 4f3537b6912bff..00000000000000 --- a/tensorflow/core/kernels/data/slide_dataset_op.cc +++ /dev/null @@ -1,252 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/core/framework/partial_tensor_shape.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/kernels/batch_util.h" -#include "tensorflow/core/kernels/data/dataset.h" - -namespace tensorflow { - -namespace { - -// See documentation in ../ops/dataset_ops.cc for a high-level -// description of the following op. - -class SlideDatasetOp : public UnaryDatasetOpKernel { - public: - explicit SlideDatasetOp(OpKernelConstruction* ctx) - : UnaryDatasetOpKernel(ctx) {} - - void MakeDataset(OpKernelContext* ctx, DatasetBase* input, - DatasetBase** output) override { - int64 window_size = 0; - int64 stride = 1; - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "window_size", &window_size)); - OP_REQUIRES_OK(ctx, - ParseScalarArgument(ctx, "stride", &stride)); - OP_REQUIRES( - ctx, window_size > 0, - errors::InvalidArgument("Window size must be greater than zero.")); - OP_REQUIRES( - ctx, stride > 0 && stride < window_size, - errors::InvalidArgument("Stride must be in [1, window_size).")); - - *output = new Dataset(ctx, window_size, stride, input); - } - - private: - class Dataset : public GraphDatasetBase { - public: - Dataset(OpKernelContext* ctx, int64 window_size, int64 stride, const DatasetBase* input) - : GraphDatasetBase(ctx), window_size_(window_size), stride_(stride), input_(input) { - input_->Ref(); - - const auto& input_shapes = input_->output_shapes(); - output_shapes_.reserve(input_shapes.size()); - for (const auto& input_shape : input_shapes) { - output_shapes_.emplace_back( - PartialTensorShape({-1}).Concatenate(input_shape)); - } - } - - ~Dataset() override { input_->Unref(); } - - std::unique_ptr MakeIterator( - const string& prefix) const override { - return std::unique_ptr(new Iterator( - Iterator::Params{this, strings::StrCat(prefix, "::Slide")})); - } - - const DataTypeVector& output_dtypes() const override { - return input_->output_dtypes(); - } - - const std::vector& output_shapes() const override { - return output_shapes_; - } - - string DebugString() override { - return strings::StrCat("SlideDatasetOp(", window_size_, ", ", stride_, ")::Dataset"); - } - - protected: - Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, - Node** output) const override { - Node* input_graph_node = nullptr; - TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); - Node* window_size = nullptr; - Node* stride = nullptr; - TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size)); - TF_RETURN_IF_ERROR(b->AddScalar(stride_, &stride)); - TF_RETURN_IF_ERROR( - b->AddDataset(this, {input_graph_node, window_size, stride}, output)); - return Status::OK(); - } - - private: - - class Iterator : public DatasetIterator { - public: - explicit Iterator(const Params& params) - : DatasetIterator(params), - input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} - - Status GetNextInternal(IteratorContext* ctx, - std::vector* out_tensors, - bool* end_of_sequence) override { - const int64 window_size = dataset()->window_size_; - const int64 stride = dataset()->stride_; - std::vector> batch_elements; - { - mutex_lock l(mu_); - if (!input_impl_) { - *end_of_sequence = true; - return Status::OK(); - } - batch_elements.reserve(window_size); - const bool first_call = cache_.empty(); - if (first_call) { - cache_.reserve(window_size); - } else { - // Reuse cache in the previous iteration. - cache_.swap(batch_elements); - } - // Fill up with new elements. - *end_of_sequence = false; - for (size_t i = batch_elements.size(); i < window_size && !*end_of_sequence; - ++i) { - std::vector batch_element_tuple; - TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple, - end_of_sequence)); - if (!*end_of_sequence) { - batch_elements.push_back(std::move(batch_element_tuple)); - } else { - input_impl_.reset(); - } - } - // Drop the final smaller blocks. - if (batch_elements.size() < window_size) { - DCHECK(*end_of_sequence); - return Status::OK(); - } - // Cache the data used for the next iteration. - for (size_t i = stride; i < window_size; ++i) { - cache_.emplace_back(batch_elements[i]); - } - } - - // Construct output tensors. - // Those codes below are copied from batch_dataset_op.cc. - const size_t num_tuple_components = batch_elements[0].size(); - const int64 num_batch_elements = batch_elements.size(); - for (size_t component_index = 0; component_index < num_tuple_components; - ++component_index) { - const Tensor& first_element = batch_elements[0][component_index]; - TensorShape batch_component_shape({num_batch_elements}); - batch_component_shape.AppendShape(first_element.shape()); - Tensor batch_component(cpu_allocator(), first_element.dtype(), - batch_component_shape); - // Build the output tuple component by copying one slice - // from each input element in the batch. - for (size_t i = 0; i < num_batch_elements; ++i) { - if (batch_elements[i][component_index].shape() != - first_element.shape()) { - return errors::InvalidArgument( - "Cannot batch tensors with different shapes in component ", - component_index, ". First element had shape ", - first_element.shape().DebugString(), " and element ", i, - " had shape ", - batch_elements[i][component_index].shape().DebugString(), - "."); - } - TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice( - std::move(batch_elements[i][component_index]), &batch_component, - i)); - } - out_tensors->emplace_back(std::move(batch_component)); - } - *end_of_sequence = false; - return Status::OK(); - } - - protected: - Status SaveInternal(IteratorStateWriter* writer) override { - mutex_lock l(mu_); - if (!input_impl_) { - TF_RETURN_IF_ERROR( - writer->WriteScalar(full_name("input_impl_empty"), "")); - } else { - TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); - } - // Save cache. - TF_RETURN_IF_ERROR( - writer->WriteScalar(strings::StrCat("cache_size"), cache_.size())); - for (int64 i = 0; i < cache_.size(); i++) { - TF_RETURN_IF_ERROR(writer->WriteScalar( - strings::StrCat("cache[", i, "]_size"), cache_[i].size())); - for (int64 j = 0; j < cache_[i].size(); j++) { - TF_RETURN_IF_ERROR(writer->WriteTensor( - strings::StrCat("cache[", i, "][", j, "]"), cache_[i][j])); - } - } - return Status::OK(); - } - - Status RestoreInternal(IteratorContext* ctx, - IteratorStateReader* reader) override { - mutex_lock l(mu_); - if (!reader->Contains(full_name("input_impl_empty"))) { - TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); - } else { - input_impl_.reset(); - } - // Restore cache. - int64 cache_size; - TF_RETURN_IF_ERROR( - reader->ReadScalar(strings::StrCat("cache_size"), &cache_size)); - cache_.resize(cache_size); - for (int64 i = 0; i < cache_size; i++) { - int64 vector_size; - TF_RETURN_IF_ERROR(reader->ReadScalar( - strings::StrCat("cache[", i, "]_size"), &vector_size)); - cache_[i].resize(vector_size); - for (int64 j = 0; j < vector_size; j++) { - TF_RETURN_IF_ERROR(reader->ReadTensor( - strings::StrCat("cache[", i, "][", j, "]"), &cache_[i][j])); - } - } - return Status::OK(); - } - - private: - mutex mu_; - std::vector> cache_ GUARDED_BY(mu_); - std::unique_ptr input_impl_ GUARDED_BY(mu_); - }; - - const int64 window_size_; - const int64 stride_; - const DatasetBase* const input_; - std::vector output_shapes_; - }; -}; - -REGISTER_KERNEL_BUILDER(Name("SlideDataset").Device(DEVICE_CPU), - SlideDatasetOp); - -} // namespace - -} // namespace tensorflow diff --git a/tensorflow/core/kernels/depthtospace_op.cc b/tensorflow/core/kernels/depthtospace_op.cc index b74a09e2cb8b83..39aa3e9eb07720 100644 --- a/tensorflow/core/kernels/depthtospace_op.cc +++ b/tensorflow/core/kernels/depthtospace_op.cc @@ -187,9 +187,6 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); -REGISTER_KERNEL_BUILDER( - Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), - DepthToSpaceOp); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 0656081177e867..184c703599d440 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -238,12 +238,6 @@ struct DepthToSpaceOpFunctor { template struct functor::DepthToSpaceOpFunctor; template struct functor::DepthToSpaceOpFunctor; -// Instantiate the GPU implementations for Eigen::half. -template struct functor::DepthToSpaceOpFunctor; -template struct functor::DepthToSpaceOpFunctor; - // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::DepthToSpaceOpFunctor; diff --git a/tensorflow/core/kernels/hexagon/BUILD b/tensorflow/core/kernels/hexagon/BUILD index 7688305019cdbc..108d59db2c21ca 100644 --- a/tensorflow/core/kernels/hexagon/BUILD +++ b/tensorflow/core/kernels/hexagon/BUILD @@ -45,7 +45,6 @@ tf_cc_test( "//tensorflow/core:test_main", "//tensorflow/core:testlib", "//tensorflow/core/kernels:cwise_op", - "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:reduction_ops", "//tensorflow/core/kernels:remote_fused_graph_execute_utils", diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index e0706568b15204..1401bc65a45bd8 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -444,7 +444,6 @@ class MklConv2DCustomBackpropFilterOp ~MklConv2DCustomBackpropFilterOp() {} private: - const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -493,9 +492,7 @@ class MklConv2DCustomBackpropFilterOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, - const memory::dims& strides, - const memory::dims& dilations, + Tensor** output_tensor, const memory::dims& strides, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -521,32 +518,31 @@ class MklConv2DCustomBackpropFilterOp bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x); } - if (biasEnabled && (bias_grad != nullptr)) { - // Create convolution backward weights with bias primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding) : - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); - auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, - cpu_engine, - conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, - bwd_output_format, output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); + // Create convolution backward weights primitive. + auto bwd_desc = + (biasEnabled && (bias_grad != nullptr)) + ? convolution_backward_weights::desc( + convolution_direct, input->GetOpMemDesc(), + output->GetOpMemDesc(), bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, + padding) + : convolution_backward_weights::desc( + convolution_direct, input->GetOpMemDesc(), + output->GetOpMemDesc(), outbackprop->GetOpMemDesc(), strides, + padding_l, padding_r, padding); + + auto bwd_pd = convolution_backward_weights::primitive_desc( + bwd_desc, cpu_engine, conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, bwd_output_format, + output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); + if (biasEnabled && (bias_grad != nullptr)) { // Allocate bias_grad tensor TensorShape bias_grad_shape({depth}); Tensor* bias_grad_tensor = nullptr; @@ -557,32 +553,11 @@ class MklConv2DCustomBackpropFilterOp memory::desc({bias_grad_dims}, MklDnnType(), memory::format::x); bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor); bias_grad->SetUsrMemDataHandle(bias_grad_tensor); + } - PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, - bias_grad); + if (biasEnabled && (bias_grad != nullptr)) { + PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad); } else { - // Create convolution backward weights primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding) : - convolution_backward_weights::desc(convolution_direct, - input->GetOpMemDesc(), output->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); - auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, - cpu_engine, - conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, - bwd_output_format, output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output); } } diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index d203c04934131e..eeed0095310280 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -369,7 +369,6 @@ class MklConv2DCustomBackpropInputOp private: const int kInputIndex_Filter = 1, kInputIndex_InputSizes = 0, kInputIndex_OutBackProp = 2; - const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -420,9 +419,7 @@ class MklConv2DCustomBackpropInputOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, - const memory::dims& strides, - const memory::dims& dilations, + Tensor** output_tensor, const memory::dims& strides, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -435,16 +432,9 @@ class MklConv2DCustomBackpropInputOp CHECK_NOTNULL(output_tensor); // Create convolution backward data primitive. - // Use dilated convolution in case dilate rates are greater than zero. - auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? - convolution_backward_data::desc(convolution_direct, - output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, - dilations, padding_l, padding_r, padding): - convolution_backward_data::desc(convolution_direct, - output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), - strides, padding_l, padding_r, padding); + auto bwd_desc = convolution_backward_data::desc( + convolution_direct, output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, padding); auto bwd_pd = convolution_backward_data::primitive_desc( bwd_desc, cpu_engine, conv_fwd_pd); diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index f0818eb96daaab..1440da8f822116 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -493,7 +493,6 @@ class MklConv2DOp : public OpKernel { ~MklConv2DOp() {} explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); @@ -510,20 +509,6 @@ class MklConv2DOp : public OpKernel { errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); - OP_REQUIRES(context, dilations_.size() == 4, - errors::InvalidArgument("Sliding window dilations field must " - "specify 4 dimensions")); - const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N'); - const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C'); - const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H'); - const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W'); - OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1, - errors::InvalidArgument( - "Current implementation does not yet support " - "dilations in the batch and depth dimensions.")); - OP_REQUIRES( - context, dilation_h > 0 && dilation_w > 0, - errors::InvalidArgument("Dilated rates should be larger than 0.")); } void Compute(OpKernelContext* context) override { @@ -545,19 +530,17 @@ class MklConv2DOp : public OpKernel { MklDnnData filter(&cpu_engine); MklDnnData output(&cpu_engine); - memory::dims src_dims, filter_dims, padding_l, padding_r, - dilations, strides; + memory::dims src_dims, filter_dims, padding_l, padding_r, strides; memory::dims output_dims_tf_order, output_dims_mkl_order; // Get shapes of input tensors in MKL-DNN order - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, - dilations_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); auto src_tf_shape = GetTfShape(context, kInputIndex_Src); auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter); conv_utl.GetConvFwdSizesInMklOrder( src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, - &dilations, &output_dims_tf_order, &output_dims_mkl_order, - &padding_l, &padding_r); + &output_dims_tf_order, &output_dims_mkl_order, &padding_l, + &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. @@ -570,7 +553,6 @@ class MklConv2DOp : public OpKernel { // Need semantics for Null MKL tensor MklDnnShape output_mkl_shape; output_mkl_shape.SetMklTensor(false); - AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor, src_tf_shape, output_mkl_shape); @@ -614,79 +596,55 @@ class MklConv2DOp : public OpKernel { filter.SetOpMemDesc(filter_dims, memory::format::any); output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); - // MKLDNN dilation starts from 0. - dilations[kDilationH] -= 1; - dilations[kDilationW] -= 1; - + // If bias is enabled, then do the same steps as above for bias. if (biasEnabled) { - // Create convolution primitive with Bias. - MklDnnData bias(&cpu_engine); - memory::dims bias_size; - conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); - const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); - bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); - bias.SetOpMemDesc(bias_size, memory::format::any); - - // Create convolution primitive with Bias. - // Use MKLDNN dilated convolution in case of dilated rate (>0). - auto conv_desc = (dilations[kDilationH] > 0 || - dilations[kDilationW] > 0) ? - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), - output.GetOpMemDesc(), strides, dilations, - padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)): - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), - output.GetOpMemDesc(), strides, - padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, - cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, - output_dims_mkl_order, tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, - filter_out_tensor); + MklDnnData bias(&cpu_engine); + memory::dims bias_size; + conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); + const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); + bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); + bias.SetOpMemDesc(bias_size, memory::format::any); + + // Create convolution primitive with Bias. + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), + strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, + tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, + filter_out_tensor); } else { - // Create convolution primitive without Bias. - // Use MKLDNN dilated convolution in case of dilated rate (>0). - auto conv_desc = (dilations[kDilationH] > 0 || - dilations[kDilationW] > 0) ? - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)): - convolution_forward::desc(prop_kind::forward, - convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), - strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, - cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, - nullptr, &output, filter_out_tensor); + // Create convolution primitive without Bias. + auto conv_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = + convolution_forward::primitive_desc(conv_desc, cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, + tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output, + filter_out_tensor); } } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -700,12 +658,10 @@ class MklConv2DOp : public OpKernel { private: std::vector strides_; - std::vector dilations_; Padding padding_; TensorFormat data_format_; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; - const int kDilationH = 0, kDilationW = 1; // Allocate output tensor. void AllocateOutputTensor( diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 7ca10db895c222..9dd88221a84671 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -58,16 +58,13 @@ class MklDnnConvUtil { protected: OpKernelContext* context_; // We don't own this. std::vector strides_; - std::vector dilations_; Padding padding_; TensorFormat data_format_; public: MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, - Padding pad, TensorFormat fm, - const std::vector& dilations) : - context_(context), strides_(strides), padding_(pad), - data_format_(fm), dilations_(dilations) {} + Padding pad, TensorFormat fm) + : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -81,16 +78,6 @@ class MklDnnConvUtil { *strides = {stride_rows, stride_cols}; } - // Calculate Convolution dilations - virtual inline void GetDilationsInMklOrder(memory::dims *dilations) { - // For now we take the dilation from the second and third dimensions only - // (we do not support dilation on the batch or depth dimension). - CHECK_NOTNULL(dilations); - int dilations_rows = GetTensorDim(dilations_, data_format_, 'H'); - int dilations_cols = GetTensorDim(dilations_, data_format_, 'W'); - *dilations = {dilations_rows, dilations_cols}; - } - // Calculate Convolution input size in MKL-DNN order. MKL-DNN // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's @@ -226,8 +213,7 @@ class MklDnnConvUtil { // TODO(nhasabni): Add similar function for input and filter in MklShape. virtual inline void GetOutputAndPadSizeInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, - const memory::dims& strides, const memory::dims& dilations, - memory::dims* output_dims_tf_order, + const memory::dims& strides, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -246,8 +232,6 @@ class MklDnnConvUtil { // Stride is vector of 2 elements: {s_r, s_c} int stride_rows = strides[0]; int stride_cols = strides[1]; - int dilation_rows = dilations[0]; - int dilation_cols = dilations[1]; // Output batch is same as input batch. int out_batch = GetTensorDim(input_shape, data_format_, 'N'); @@ -257,13 +241,11 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, - GetWindowedOutputSizeVerboseV2(input_rows, filter_rows, - dilation_rows, stride_rows, padding_, + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_rows, filter_rows, stride_rows, padding_, &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, - GetWindowedOutputSizeVerboseV2(input_cols, filter_cols, - dilation_cols, stride_cols, padding_, + OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( + input_cols, filter_cols, stride_cols, padding_, &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) @@ -289,8 +271,7 @@ class MklDnnConvUtil { // // Function does not return anything, but sets error in context status. inline void GetOutputAndPadSizeInMklOrder( - size_t src_index, size_t filter_index, - const memory::dims& strides, const memory::dims& dilations, + size_t src_index, size_t filter_index, const memory::dims& strides, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -305,9 +286,9 @@ class MklDnnConvUtil { errors::InvalidArgument("input must be 4-dimensional", input_tf_shape.DebugString())); - GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, - strides, dilations, output_dims_tf_order, - output_dims_mkl_order, pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, strides, + output_dims_tf_order, output_dims_mkl_order, + pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -319,14 +300,12 @@ class MklDnnConvUtil { inline void GetConvFwdSizesInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, memory::dims* input_dims, memory::dims* filter_dims, - memory::dims* strides, memory::dims *dilations, - memory::dims* output_dims_tf_order, + memory::dims* strides, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); - CHECK_NOTNULL(dilations); CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -337,9 +316,7 @@ class MklDnnConvUtil { GetFilterSizeInMklOrder(input_shape, filter_shape, filter_dims); if (!context_->status().ok()) return; GetStridesInMklOrder(strides); - GetDilationsInMklOrder(dilations); - GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, - *strides, *dilations, + GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, output_dims_tf_order, output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; @@ -367,21 +344,7 @@ class MklConv2DBackpropCommonOp : public OpKernel { context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); - OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); - OP_REQUIRES(context, dilations_.size() == 4, - errors::InvalidArgument("Sliding window dilations field must " - "specify 4 dimensions")); - int dilation_n = GetTensorDim(dilations_, data_format_, 'N'); - int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); - int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); - int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); - OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1), - errors::InvalidArgument( - "Current implementation does not yet support " - "dilations in the batch and depth dimensions.")); - OP_REQUIRES( - context, dilation_h > 0 && dilation_w > 0, - errors::InvalidArgument("Dilated rates should be larger than 0.")); + OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } @@ -443,16 +406,15 @@ class MklConv2DBackpropCommonOp : public OpKernel { // By default, all dims are in MKL order. Only dims in TF order // are those with prefix tf_order. memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims; - memory::dims padding_l, padding_r, dilations, strides, fwd_output_dims; + memory::dims padding_l, padding_r, strides, fwd_output_dims; memory::dims fwd_output_dims_tf_order; // Get forward convolution parameters. - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, - dilations_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); conv_utl.GetConvFwdSizesInMklOrder( input_tf_shape, filter_tf_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &dilations, &fwd_output_dims_tf_order, &fwd_output_dims, - &padding_l, &padding_r); + &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, + &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward @@ -475,21 +437,10 @@ class MklConv2DBackpropCommonOp : public OpKernel { memory::format::hwio); // Tensorflow Output of Conv2D is in data_format order. auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), tf_fmt); - - const int kDilationH = 0, kDilationW = 1; - dilations[kDilationH] -= 1; - dilations[kDilationW] -= 1; - auto fwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0)? - convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_input_md, - fwd_filter_md, fwd_out_md, - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)) : - convolution_forward::desc(prop_kind::forward, - convolution_direct, fwd_input_md, - fwd_filter_md, fwd_out_md, - strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + auto fwd_desc = convolution_forward::desc( + prop_kind::forward, convolution_direct, fwd_input_md, fwd_filter_md, + fwd_out_md, strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Create memory for user data. Describe how the inputs and outputs of @@ -534,9 +485,8 @@ class MklConv2DBackpropCommonOp : public OpKernel { // Operator-specific call to create and execute primitive. CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter, - &outbackprop, &output, &output_tensor, - strides, dilations, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_), + &outbackprop, &output, &output_tensor, strides, padding_l, + padding_r, TFPaddingToMklDnnPadding(padding_), bwd_output_dims, bwd_output_format); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -585,21 +535,20 @@ class MklConv2DBackpropCommonOp : public OpKernel { virtual memory::format GetOutputFormat(const memory::format data_format) = 0; /// Create and execute the primitive storing output in the output_tensor. - virtual void CreatePrimitive(OpKernelContext* context, - const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, - MklDnnData* output, Tensor** output_tensor, const memory::dims& strides, - const memory::dims& dilations, const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) = 0; + virtual void CreatePrimitive( + OpKernelContext* context, const engine& cpu_engine, + const convolution_forward::primitive_desc& conv_fwd_pd, + MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, + MklDnnData* output, Tensor** output_tensor, + const memory::dims& strides, const memory::dims& padding_l, + const memory::dims& padding_r, padding_kind padding, + const memory::dims& bwd_output_dims, + memory::format bwd_output_format) = 0; // Get the data_format {NCHW, NHWC} TensorFormat GetTFDataFormat() { return data_format_; } private: - std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index d91f7107c5b1ef..e9a2376b545fce 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -442,11 +442,12 @@ class MklInputConversionOp : public OpKernel { auto input_tf_md = mkl_output_mkl_shape.GetTfLayout(); tf_input.SetUsrMem(input_tf_md, tf_tensor); - // Create reorder between tensorflow layout and Mkl layout if necessary + // Create reorder between tensorflow layout and Mkl layout. std::vector net; - tf_input.CheckReorderToOpMem( + CHECK_EQ(tf_input.CheckReorderToOpMem( memory::primitive_desc(output_mkl_md, cpu_engine), - tensor_out, &net); + tensor_out, &net), + true); stream(stream::kind::eager).submit(net).wait(); // -- The tensor in MKL format passes through -- diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 0a0f69522fad9a..267f4f8d12c171 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -392,7 +392,7 @@ class MklReluOpBase : public OpKernel { Tensor* dst_tensor = nullptr; if (src_tensor.dims() == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } @@ -437,15 +437,11 @@ class MklReluOpBase : public OpKernel { dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } - - // Allocate output and MklDnnShape tensors separately for possible - // in-place operation - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {src_index}, dst_index, tf_shape_dst, &dst_tensor)); - AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); + AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst, + dnn_shape_dst); // Destination memory descriptor is same as source memory descriptor. - auto &dst_md = src_md; + auto dst_md = src_md; dst.SetUsrMem(dst_md, dst_tensor); // execute net @@ -496,7 +492,7 @@ class MklReluGradOpBase : public OpKernel { int src_dims_size = src_tensor.dims(); if (src_dims_size == 0) { - Compute_Scalar(context); // scalar case doesn't use in-place operation + Compute_Scalar(context); return; } @@ -607,13 +603,8 @@ class MklReluGradOpBase : public OpKernel { // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } - - // Allocate diff_src and MklDnnShape tensors separately for possible - // in-place operation - OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( - {diff_dst_index}, diff_src_index, tf_shape_diff_src, - &diff_src_tensor)); - AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); + AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, + tf_shape_diff_src, dnn_shape_diff_src); // diff_src memory descriptor is same as memory descriptor for both // inputs. diff --git a/tensorflow/core/kernels/segment_reduction_ops.h b/tensorflow/core/kernels/segment_reduction_ops.h index d0703d7576932c..4abfbfb1a66c37 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.h +++ b/tensorflow/core/kernels/segment_reduction_ops.h @@ -13,16 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ -#define THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ - - -// This file requires the following include because it uses CudaAtomicMax: -// #include "tensorflow/core/util/cuda_kernel_helper.h" - -// Unfortunately we can't add the #include, since it breaks compilation for -// non-GPU targets. This only breaks in clang, because it's more strict for -// template code and CudaAtomicMax is used in template context. +#ifndef TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#define TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ // This file requires the following include because it uses CudaAtomicMax: // #include "tensorflow/core/util/cuda_kernel_helper.h" @@ -138,4 +130,4 @@ struct Highest { } // namespace functor } // namespace tensorflow -#endif // THIRD_PARTY_TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ +#endif // TENSORFLOW_CORE_KERNELS_SEGMENT_REDUCTION_OPS_H_ diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index e59adfc6acbeef..23df1c35e5205f 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -187,9 +187,6 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); -REGISTER_KERNEL_BUILDER( - Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), - SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index f38459724abcb5..db05ca1ed2b0a7 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -234,12 +234,6 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; -// Instantiate the GPU implementations for Eigen::half. -template struct functor::SpaceToDepthOpFunctor; -template struct functor::SpaceToDepthOpFunctor; - // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 0b006fa2b46e57..5bd79778a66f65 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -55,4 +55,6 @@ StringPiece StringPiece::substr(size_t pos, size_t n) const { return StringPiece(data_ + pos, n); } +const StringPiece::size_type StringPiece::npos = size_type(-1); + } // namespace tensorflow diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index 2d00f717dcb26d..910e4d9e2aa47d 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -65,7 +65,7 @@ class StringPiece { iterator begin() const { return data_; } iterator end() const { return data_ + size_; } - static const size_t npos = size_type(-1); + static const size_t npos; // Return the ith byte in the referenced data. // REQUIRES: n < size() diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 6de850bb20716e..254fdf115da132 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -205,9 +205,7 @@ Status RecordReader::SkipNBytes(uint64 offset) { if (options_.buffer_size > 0) { TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(offset)); } -#if !defined(IS_SLIM_BUILD) } -#endif return Status::OK(); } // namespace io diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 26278e03284df7..62dd2efb792988 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LIB_IO_RECORD_READER_H_ #define TENSORFLOW_LIB_IO_RECORD_READER_H_ -#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/lib/io/inputstream_interface.h" #if !defined(IS_SLIM_BUILD) +#include "tensorflow/core/lib/io/inputstream_interface.h" #include "tensorflow/core/lib/io/zlib_compression_options.h" #include "tensorflow/core/lib/io/zlib_inputstream.h" #endif // IS_SLIM_BUILD diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index 9a4b616e5ded32..bdbbf6d7c32014 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -265,16 +265,6 @@ REGISTER_OP("BatchDataset") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(shape_inference::ScalarShape); -// TODO(mrry): move SlideDataset to contrib in the future. -REGISTER_OP("SlideDataset") - .Input("input_dataset: variant") - .Input("window_size: int64") - .Input("stride: int64") - .Output("handle: variant") - .Attr("output_types: list(type) >= 1") - .Attr("output_shapes: list(shape) >= 1") - .SetShapeFn(shape_inference::ScalarShape); - REGISTER_OP("PaddedBatchDataset") .Input("input_dataset: variant") .Input("batch_size: int64") diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index d6a0f380336f4e..910fbaca9e72d4 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1498,7 +1498,6 @@ REGISTER_OP("_MklConv2D") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn(shape_inference::Conv2DShape) .Doc(R"doc( MKL version of Conv2D operator. Uses MKL DNN APIs to perform 2D convolution. @@ -1517,7 +1516,6 @@ REGISTER_OP("__MklDummyConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node does not perform anything. It is just created as an intermediate output of @@ -1543,7 +1541,6 @@ REGISTER_OP("_MklConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2D and BiasAdd operator. Uses MKL DNN APIs to perform 2D convolution and add Bias to the output of convolution. @@ -1566,7 +1563,6 @@ REGISTER_OP("_MklConv2DBackpropFilter") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -1593,7 +1589,6 @@ REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1638,7 +1633,6 @@ REGISTER_OP("_MklConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1674,7 +1668,6 @@ REGISTER_OP("_MklConv2DWithBiasBackpropBias") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2DBackpropBias. Uses MKL DNN APIs to compute the gradients of convolution with respect to the bias. @@ -1697,7 +1690,6 @@ REGISTER_OP("_MklConv2DBackpropInput") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) - .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h index eebbeaeba602fe..8f7bff1bb020ee 100644 --- a/tensorflow/core/platform/tracing.h +++ b/tensorflow/core/platform/tracing.h @@ -103,7 +103,7 @@ class Tracing { friend class ScopedAnnotation; friend class TraceMe; - TF_EXPORT static std::atomic tracing_engine_; + static std::atomic tracing_engine_; static Tracing::Engine* engine() { return tracing_engine_.load(std::memory_order_acquire); } diff --git a/tensorflow/core/platform/windows/windows_file_system.cc b/tensorflow/core/platform/windows/windows_file_system.cc index 682e46e0fcd032..b6b3722caae4dc 100644 --- a/tensorflow/core/platform/windows/windows_file_system.cc +++ b/tensorflow/core/platform/windows/windows_file_system.cc @@ -382,8 +382,7 @@ Status WindowsFileSystem::NewReadOnlyMemoryRegionFromFile( Status WindowsFileSystem::FileExists(const string& fname) { constexpr int kOk = 0; - std::wstring ws_translated_fname = Utf8ToWideChar(TranslateName(fname)); - if (_waccess(ws_translated_fname.c_str(), kOk) == 0) { + if (_access(TranslateName(fname).c_str(), kOk) == 0) { return Status::OK(); } return errors::NotFound(fname, " not found"); diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 22f2c02b78b0b0..7405e01e14494f 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "" +#define TF_VERSION_SUFFIX "-rc1" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 6d0458e678b550..9f6fe91b1490ef 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -51,8 +51,6 @@ Europe: TensorFlow provides multiple communication paths. To pick the right path, please read the following list carefully: - * For new release announcements and security updates, subscribe to - [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). * To ask or answer technical questions about TensorFlow, use [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). For example, ask or search Stack Overflow about a particular error message @@ -67,5 +65,5 @@ please read the following list carefully: on GitHub. For example, use the issue tracker to request a new operation in TensorFlow. * To report vulnerabilities, please follow our - [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md). + [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/SECURITY.md). diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index 0481c97885df97..818798555aec3a 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 8f89898c92d00e..4c6dfa8dafe204 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc1.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 0ee9c849e11448..527884863ea510 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0 + 1.6.0-rc1 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0 + 1.6.0-rc1 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0 + 1.6.0-rc1 org.tensorflow libtensorflow_jni_gpu - 1.6.0 + 1.6.0-rc1 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc1.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0-rc1.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0-rc1.jar;. -Djava.library.path=jni HelloTF
d If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 3e8744bf9d1e01..e3e115d9f61826 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -41,8 +41,7 @@ must be installed on your system: [NVIDIA's documentation](https://developer.nvidia.com/cudnn). Ensure that you create the `CUDA_HOME` environment variable as described in the NVIDIA documentation. - * GPU card with CUDA Compute Capability 3.0 or higher for building - from source and 3.5 or higher for our binaries. See + * GPU card with CUDA Compute Capability 3.0 or higher. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface. @@ -189,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -294,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -357,23 +356,24 @@ where: to 6006. * TensorFlowCPUImage is required. It identifies the Docker container. Specify one of the following values: - * tensorflow/tensorflow, which is the TensorFlow CPU binary image. - * tensorflow/tensorflow:latest-devel, which is the latest + * gcr.io/tensorflow/tensorflow, which is the TensorFlow CPU binary image. + * gcr.io/tensorflow/tensorflow:latest-devel, which is the latest TensorFlow CPU Binary image plus source code. - * tensorflow/tensorflow:version, which is the + * gcr.io/tensorflow/tensorflow:version, which is the specified version (for example, 1.1.0rc1) of TensorFlow CPU binary image. - * tensorflow/tensorflow:version-devel, which is + * gcr.io/tensorflow/tensorflow:version-devel, which is the specified version (for example, 1.1.0rc1) of the TensorFlow GPU binary image plus source code. - TensorFlow images are available at + gcr.io is the Google Container Registry. Note that some + TensorFlow images are also available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches the latest TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ docker run -it tensorflow/tensorflow bash
+$ docker run -it gcr.io/tensorflow/tensorflow bash
 
The following command also launches the latest TensorFlow CPU binary image in a @@ -381,7 +381,7 @@ Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ docker run -it -p 8888:8888 tensorflow/tensorflow
+$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -405,14 +405,14 @@ where: hostPort and containerPort to `8888`. * TensorFlowGPUImage specifies the Docker container. You must specify one of the following values: - * tensorflow/tensorflow:latest-gpu, which is the latest + * gcr.io/tensorflow/tensorflow:latest-gpu, which is the latest TensorFlow GPU binary image. - * tensorflow/tensorflow:latest-devel-gpu, which is + * gcr.io/tensorflow/tensorflow:latest-devel-gpu, which is the latest TensorFlow GPU Binary image plus source code. - * tensorflow/tensorflow:version-gpu, which is the + * gcr.io/tensorflow/tensorflow:version-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image. - * tensorflow/tensorflow:version-devel-gpu, which is + * gcr.io/tensorflow/tensorflow:version-devel-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image plus source code. @@ -421,7 +421,7 @@ following command launches the latest TensorFlow GPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ nvidia-docker run -it tensorflow/tensorflow:latest-gpu bash
+$ nvidia-docker run -it gcr.io/tensorflow/tensorflow:latest-gpu bash
 
The following command also launches the latest TensorFlow GPU binary image @@ -429,13 +429,13 @@ in a Docker container. In this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:latest-gpu
+$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu
 
The following command installs an older TensorFlow version (0.12.1):
-$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:0.12.1-gpu
+$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:0.12.1-gpu
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl ## Validate your installation @@ -505,7 +505,7 @@ If you installed through Docker, start a Docker container from which you can run bash. For example:
-$ docker run -it tensorflow/tensorflow bash
+$ docker run -it gcr.io/tensorflow/tensorflow bash
 
@@ -647,14 +647,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -666,14 +666,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -685,14 +685,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
 
@@ -704,14 +704,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 205db8e6bdd438..623ca6bb7919bf 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -119,7 +119,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -242,7 +242,7 @@ take the following steps: issue the following command:
 $ sudo pip install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -292,23 +292,24 @@ where: to 6006. * TensorFlowImage is required. It identifies the Docker container. You must specify one of the following values: - * tensorflow/tensorflow: TensorFlow binary image. - * tensorflow/tensorflow:latest-devel: TensorFlow + * gcr.io/tensorflow/tensorflow: TensorFlow binary image. + * gcr.io/tensorflow/tensorflow:latest-devel: TensorFlow Binary image plus source code. -The TensorFlow images are available at +gcr.io is the Google Container Registry. Note that some +TensorFlow images are also available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches a TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell: -
$ docker run -it tensorflow/tensorflow bash
+
$ docker run -it gcr.io/tensorflow/tensorflow bash
The following command also launches a TensorFlow CPU binary image in a Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook: -
$ docker run -it -p 8888:8888 tensorflow/tensorflow
+
$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
Docker will download the TensorFlow binary image the first time you launch it. @@ -350,7 +351,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl @@ -375,7 +376,7 @@ do the following: If you installed through Docker, start a Docker container that runs bash. For example: -
$ docker run -it tensorflow/tensorflow bash
+
$ docker run -it gcr.io/tensorflow/tensorflow bash
@@ -523,7 +524,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
 
@@ -531,5 +532,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index c09c9c2c0c6eaa..acf0af0d9d558d 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0 on Linux: +for TensorFlow 1.6.0rc1 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-py2-none-any.whl
 
## Validate your installation diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 2413bc9cfbbfd5..f0a30ee39448c0 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -17,7 +17,7 @@ You must choose one of the following types of TensorFlow to install: NVIDIA® GPU, you must install this version. Note that this version of TensorFlow is typically much easier to install (typically, in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend - installing this version first. Prebuilt binaries will use AVX instructions. + installing this version first. * **TensorFlow with GPU support**. TensorFlow programs typically run significantly faster on a GPU than on a CPU. Therefore, if your system has a NVIDIA® GPU meeting the prerequisites shown below @@ -41,8 +41,7 @@ installed on your system: Note that cuDNN is typically installed in a different location from the other CUDA DLLs. Ensure that you add the directory where you installed the cuDNN DLL to your `%PATH%` environment variable. - * GPU card with CUDA Compute Capability 3.0 or higher for building - from source and 3.5 or higher for our binaries. See + * GPU card with CUDA Compute Capability 3.0 or higher. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. diff --git a/tensorflow/docs_src/performance/xla/jit.md b/tensorflow/docs_src/performance/xla/jit.md index d9a979ccbd3177..d4dc3e57c8fb5e 100644 --- a/tensorflow/docs_src/performance/xla/jit.md +++ b/tensorflow/docs_src/performance/xla/jit.md @@ -157,7 +157,7 @@ to fuse Ops is visible by starting at `hlo_graph_0.dot` and viewing each diagram in succession. To Render the .dot file into a png, install -[GraphViz](https://www.graphviz.org/download/) and run: +[GraphViz](http://www.graphviz.org/Download..php) and run: ```shell dot -Tpng hlo_graph_80.dot -o hlo_graph_80.png diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index d1399814ee862f..5fb1c2da88c163 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -459,7 +459,7 @@ accuracy_score = classifier.evaluate(x=test_set.data, [debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/debug_tflearn_iris.py), -based on [tf-learn's iris tutorial](https://www.tensorflow.org/versions/r1.2/get_started/tflearn), contains a full example of how to +based on {$tflearn$tf-learn's iris tutorial}, contains a full example of how to use the tfdbg with `Estimator`s. To run this example, do: ```none @@ -753,7 +753,6 @@ There are three possible workarounds or solutions: # For LocalCLIDebugHook hooks = [tf_debug.LocalCLIDebugHook(dump_root="/with/lots/of/space")] ``` - Make sure that the directory pointed to by dump_root is empty or nonexistent. tfdbg cleans up the dump directories before exiting. * Reduce the batch size used during the runs. diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 392ac6f7f12532..1548d438778afb 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -159,7 +159,7 @@ available. These operations allow you to build sophisticated @{$reading_data$input pipelines}, at the cost of making the TensorFlow computation somewhat more complicated. See the how-to documentation for -@{$reading_data#creating_threads_to_prefetch_using_queuerunner_objects$using `QueueRunner` objects to drive queues and readers} +@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using `QueueRunner` objects to drive queues and readers} for more information on how to use them. ## Variables @@ -272,7 +272,7 @@ Prefer predefined TensorFlow operations such as @{tf.decode_raw}, If your data is not easily parsable with the built-in TensorFlow operations, consider converting it, offline, to a format that is easily parsable, such -as @{tf.python_io.TFRecordWriter$`TFRecord`} format. +as ${tf.python_io.TFRecordWriter$`TFRecord`} format. The more efficient method to customize the parsing behavior is to @{$adding_an_op$add a new op written in C++} that parses your diff --git a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md index fadfa03e783498..79280d246a8681 100644 --- a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md +++ b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md @@ -83,7 +83,7 @@ data than you need, though. Instead, consider running the merged summary op every `n` steps. The code example below is a modification of the -[simple MNIST tutorial](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/tutorials/mnist/mnist.py), +@{$layers$simple MNIST tutorial}, in which we have added some summary ops, and run them every ten steps. If you run this and then launch `tensorboard --logdir=/tmp/tensorflow/mnist`, you'll be able to visualize statistics, such as how the weights or accuracy varied during diff --git a/tensorflow/docs_src/programmers_guide/using_tpu.md b/tensorflow/docs_src/programmers_guide/using_tpu.md index a9c2cb3e33d481..d74d7f3181c9cf 100644 --- a/tensorflow/docs_src/programmers_guide/using_tpu.md +++ b/tensorflow/docs_src/programmers_guide/using_tpu.md @@ -129,9 +129,10 @@ my_tpu_estimator = tf.contrib.tpu.TPUEstimator( Typically the `FLAGS` would be set by command line arguments. To switch from training locally to training on a cloud TPU you would need to: -* Set `FLAGS.use_tpu` to `True` -* Set `FLAGS.tpu_name` so the `tf.contrib.cluster_resolver.TPUClusterResolver` can find it -* Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). + 1) Set `FLAGS.use_tpu` to `True` + 1) Set `FLAGS.tpu_name` so the + `tf.contrib.cluster_resolver.TPUClusterResolver` can find it + 1) Set `FLAGS.model_dir` to a Google Cloud Storage bucket url (`gs://`). ## Optimizer diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index 9b17d0d4d52e6c..ee03f440c9b4bc 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -193,7 +193,7 @@ to calculate loss, configure the training op, and generate predictions. If you're already experienced with CNNs and @{$get_started/custom_estimators$TensorFlow `Estimator`s}, and find the above code intuitive, you may want to skim these sections or just skip ahead to ["Training and Evaluating the CNN MNIST -Classifier"](#training_and_evaluating_the_cnn_mnist_classifier). +Classifier"](#training-and-evaluating-the-cnn-mnist-classifier). ### Input Layer @@ -446,7 +446,7 @@ tf.nn.softmax(logits, name="softmax_tensor") > Note: We use the `name` argument to explicitly name this operation > `softmax_tensor`, so we can reference it later. (We'll set up logging for the -> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook)). +> softmax values in ["Set Up a Logging Hook"](#set-up-a-logging-hook). We compile our predictions in a dict, and return an `EstimatorSpec` object: @@ -534,8 +534,9 @@ if mode == tf.estimator.ModeKeys.TRAIN: ``` > Note: For a more in-depth look at configuring training ops for Estimator model -> functions, see @{$get_started/custom_estimators#defining_the_training_op_for_the_model$"Defining the training op for the model"} -> in the @{$get_started/custom_estimators$"Creating Estimators in tf.estimator."} tutorial. +> functions, see @{$get_started/custom_estimators#defining-the-training-op-for-the-model$"Defining +> the training op for the model"} in the @{$get_started/custom_estimators$"Creating Estimations in +> tf.estimator"} tutorial. ### Add evaluation metrics @@ -624,8 +625,7 @@ operation earlier when we generated the probabilities in `cnn_model_fn`. > Note: If you don't explicitly assign a name to an operation via the `name` > argument, TensorFlow will assign a default name. A couple easy ways to > discover the names applied to operations are to visualize your graph on -> @{$graph_viz$TensorBoard}) or to enable the -> @{$programmers_guide/debugger$TensorFlow Debugger (tfdbg)}. +> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger (tfdbg)}. Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the `tensors` argument. We set `every_n_iter=50`, which specifies that probabilities diff --git a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md index 7584a76ba5d3a0..e22536adb6f0b8 100644 --- a/tensorflow/docs_src/tutorials/recurrent_quickdraw.md +++ b/tensorflow/docs_src/tutorials/recurrent_quickdraw.md @@ -109,8 +109,7 @@ This download will take a while and download a bit more than 23GB of data. To convert the `ndjson` files to @{$python/python_io#tfrecords_format_details$TFRecord} files containing -[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) -protos run the following command. +${tf.train.Example} protos run the following command. ```shell python create_dataset.py --ndjson_path rnn_tutorial_data \ diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md index 27ce75a30dd2ac..005dc020f94f66 100644 --- a/tensorflow/docs_src/tutorials/wide.md +++ b/tensorflow/docs_src/tutorials/wide.md @@ -74,8 +74,8 @@ Here's a list of columns available in the Census Income dataset: | relationship | Categorical | Wife, Own-child, Husband, | : : : Not-in-family, Other-relative, : : : : Unmarried. : -| race | Categorical | Amer-Indian-Eskimo, Asian-Pac- | -: : : Islander, Black, White, Other. : +| race | Categorical | White, Asian-Pac-Islander, | +: : : Amer-Indian-Eskimo, Other, Black. : | gender | Categorical | Female, Male. | | capital_gain | Continuous | Capital gains recorded. | | capital_loss | Continuous | Capital Losses recorded. | @@ -247,7 +247,7 @@ hours_per_week = tf.feature_column.numeric_column('hours_per_week') ### Making Continuous Features Categorical through Bucketization Sometimes the relationship between a continuous feature and the label is not -linear. As a hypothetical example, a person's income may grow with age in the +linear. As an hypothetical example, a person's income may grow with age in the early stage of one's career, then the growth may slow at some point, and finally the income decreases after retirement. In this scenario, using the raw `age` as a real-valued feature column might not be a good choice because the model can @@ -361,16 +361,6 @@ The first line of the final output should be something like `accuracy: 0.83557522`, which means the accuracy is 83.6%. Feel free to try more features and transformations and see if you can do even better! -After the model is evaluated, we can use the model to predict whether an individual has an annual income of over -50,000 dollars given an individual's information input. -```python - pred_iter = model.predict(input_fn=lambda: input_fn(FLAGS.test_data, 1, False, 1)) - for pred in pred_iter: - print(pred['classes']) -``` - -The model prediction output would be like `[b'1']` or `[b'0']` which means whether corresponding individual has an annual income of over 50,000 dollars or not. - If you'd like to see a working end-to-end example, you can download our [example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py) and set the `model_type` flag to `wide`. diff --git a/tensorflow/examples/android/AndroidManifest.xml b/tensorflow/examples/android/AndroidManifest.xml index 5c47ce6b673e4c..bb75431a1f8bab 100644 --- a/tensorflow/examples/android/AndroidManifest.xml +++ b/tensorflow/examples/android/AndroidManifest.xml @@ -40,7 +40,6 @@ - @@ -50,7 +49,6 @@ - @@ -60,7 +58,6 @@ - @@ -70,7 +67,6 @@ - diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java index 429138abe5338e..8bd4abb154a8f8 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/CameraActivity.java @@ -351,10 +351,6 @@ private String chooseCamera() { protected void setFragment() { String cameraId = chooseCamera(); - if (cameraId == null) { - Toast.makeText(this, "No Camera Detected", Toast.LENGTH_SHORT).show(); - finish(); - } Fragment fragment; if (useCamera2API) { @@ -420,8 +416,7 @@ public void onSetDebug(final boolean debug) {} @Override public boolean onKeyDown(final int keyCode, final KeyEvent event) { - if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP - || keyCode == KeyEvent.KEYCODE_BUTTON_L1 || keyCode == KeyEvent.KEYCODE_DPAD_CENTER) { + if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN || keyCode == KeyEvent.KEYCODE_VOLUME_UP) { debug = !debug; requestRender(); onSetDebug(debug); diff --git a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java index 33ec65e9f73a1d..6a66ec3927be62 100644 --- a/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java +++ b/tensorflow/examples/android/src/org/tensorflow/demo/StylizeActivity.java @@ -16,10 +16,8 @@ package org.tensorflow.demo; -import android.app.UiModeManager; import android.content.Context; import android.content.res.AssetManager; -import android.content.res.Configuration; import android.graphics.Bitmap; import android.graphics.Bitmap.Config; import android.graphics.BitmapFactory; @@ -33,11 +31,9 @@ import android.media.ImageReader.OnImageAvailableListener; import android.os.Bundle; import android.os.SystemClock; -import android.util.DisplayMetrics; import android.util.Size; import android.util.TypedValue; import android.view.Display; -import android.view.KeyEvent; import android.view.MotionEvent; import android.view.View; import android.view.View.OnClickListener; @@ -47,7 +43,6 @@ import android.widget.Button; import android.widget.GridView; import android.widget.ImageView; -import android.widget.RelativeLayout; import android.widget.Toast; import java.io.IOException; import java.io.InputStream; @@ -386,27 +381,6 @@ public void drawCallback(final Canvas canvas) { grid = (GridView) findViewById(R.id.grid_layout); grid.setAdapter(adapter); grid.setOnTouchListener(gridTouchAdapter); - - // Change UI on Android TV - UiModeManager uiModeManager = (UiModeManager) getSystemService(UI_MODE_SERVICE); - if (uiModeManager.getCurrentModeType() == Configuration.UI_MODE_TYPE_TELEVISION) { - DisplayMetrics displayMetrics = new DisplayMetrics(); - getWindowManager().getDefaultDisplay().getMetrics(displayMetrics); - int styleSelectorHeight = displayMetrics.heightPixels; - int styleSelectorWidth = displayMetrics.widthPixels - styleSelectorHeight; - RelativeLayout.LayoutParams layoutParams = new RelativeLayout.LayoutParams(styleSelectorWidth, ViewGroup.LayoutParams.MATCH_PARENT); - - // Calculate number of style in a row, so all the style can show up without scrolling - int numOfStylePerRow = 3; - while (styleSelectorWidth / numOfStylePerRow * Math.ceil((float) (adapter.getCount() - 2) / numOfStylePerRow) > styleSelectorHeight) { - numOfStylePerRow++; - } - grid.setNumColumns(numOfStylePerRow); - layoutParams.addRule(RelativeLayout.ALIGN_PARENT_RIGHT); - grid.setLayoutParams(layoutParams); - adapter.buttons.clear(); - } - setStyle(adapter.items[0], 1.0f); } @@ -628,38 +602,4 @@ private void renderDebug(final Canvas canvas) { borderedText.drawLines(canvas, 10, canvas.getHeight() - 10, lines); } - - @Override - public boolean onKeyDown(int keyCode, KeyEvent event) { - int moveOffset = 0; - switch (keyCode) { - case KeyEvent.KEYCODE_DPAD_LEFT: - moveOffset = -1; - break; - case KeyEvent.KEYCODE_DPAD_RIGHT: - moveOffset = 1; - break; - case KeyEvent.KEYCODE_DPAD_UP: - moveOffset = -1 * grid.getNumColumns(); - break; - case KeyEvent.KEYCODE_DPAD_DOWN: - moveOffset = grid.getNumColumns(); - break; - default: - return super.onKeyDown(keyCode, event); - } - - // get the highest selected style - int currentSelect = 0; - float highestValue = 0; - for (int i = 0; i < adapter.getCount(); i++) { - if (adapter.items[i].value > highestValue) { - currentSelect = i; - highestValue = adapter.items[i].value; - } - } - setStyle(adapter.items[(currentSelect + moveOffset + adapter.getCount()) % adapter.getCount()], 1); - - return true; - } } diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3cbeb34c5471cc..a206685af6669e 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -28,7 +28,6 @@ load("//tensorflow:tensorflow.bzl", "py_tests") load("//tensorflow:tensorflow.bzl", "tf_py_build_info_genrule") load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") -load("//tensorflow:tensorflow.bzl", "tf_custom_op_library_additional_deps_impl") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_tests") load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library") @@ -107,19 +106,20 @@ py_library( ":training", ":util", ":weights_broadcast_ops", - "//tensorflow/contrib:contrib_py", + "//third_party/py/numpy", "//tensorflow/core:protos_all_py", "//tensorflow/python/data", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/keras", + "//tensorflow/python/ops/losses", "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/linalg", - "//tensorflow/python/ops/losses", "//tensorflow/python/profiler", "//tensorflow/python/saved_model", - "//third_party/py/numpy", - ], + ] + if_not_windows([ + "//tensorflow/contrib:contrib_py", + ]), ) tf_py_build_info_genrule() @@ -947,6 +947,7 @@ py_test( srcs = ["framework/contrib_test.py"], main = "framework/contrib_test.py", srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", @@ -1311,6 +1312,7 @@ py_test( srcs = ["framework/dtypes_test.py"], main = "framework/dtypes_test.py", srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -1652,6 +1654,7 @@ py_test( size = "small", srcs = ["ops/clip_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":client_testlib", ":clip_ops", @@ -2718,6 +2721,7 @@ cuda_py_test( ], data = ["//tensorflow/core:image_testdata"], shard_count = 5, + tags = ["no_windows"], ) cuda_py_test( @@ -3301,65 +3305,6 @@ tf_py_wrap_cc( tf_additional_gdr_deps()), ) -# ** Targets for Windows build (start) ** -# We need the following targets to expose symbols from _pywrap_tensorflow.dll - -# Build a cc_binary from tf_custom_op_library_additional_deps_impl, -# it contains all object code from its dependencies. -cc_binary( - name = "tf_custom_op_library_additional_deps.so", - linkshared = 1, - linkstatic = 1, - deps = tf_custom_op_library_additional_deps_impl(), -) - -# Get a DEF file generated by parsing all object files -# of tf_custom_op_library_additional_deps.so -filegroup( - name = "pywrap_tensorflow_def_file", - srcs = [":tf_custom_op_library_additional_deps.so"], - output_group = "def_file", -) - -# Filter the DEF file to reduce the number of symbols to 64K or less. -# Note that we also write the name of the pyd file into DEF file so that -# the dynamic libraries of custom ops can find it at runtime. -genrule( - name = "pywrap_tensorflow_filtered_def_file", - srcs = [":pywrap_tensorflow_def_file"], - outs = ["pywrap_tensorflow_filtered_def_file.def"], - cmd = select({ - "//tensorflow:windows": """ - $(location @local_config_def_file_filter//:def_file_filter) \\ - --input $(location :pywrap_tensorflow_def_file) \\ - --output $@ \\ - --target _pywrap_tensorflow_internal.pyd - """, - "//conditions:default": "touch $@", # Just a placeholder for Unix platforms - }), - tools = ["@local_config_def_file_filter//:def_file_filter"], -) - -# Get the import library of _pywrap_tensorflow_internal.dll -filegroup( - name = "pywrap_tensorflow_import_lib_file", - srcs = [":_pywrap_tensorflow_internal.so"], - output_group = "interface_library", -) - -# Create a cc_import rule for the import library of _pywrap_tensorflow_internal.dll -# so that custom ops' dynamic libraries can link against it. -cc_import( - name = "pywrap_tensorflow_import_lib", - interface_library = select({ - "//tensorflow:windows": ":pywrap_tensorflow_import_lib_file", - "//conditions:default": "not_exsiting_on_unix.lib", # Just a placeholder for Unix platforms - }), - system_provided = 1, -) - -# ** Targets for Windows build (end) ** - py_library( name = "lib", srcs = [ @@ -3732,6 +3677,7 @@ py_test( size = "small", srcs = ["lib/core/bfloat16_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":client_testlib", ":lib", @@ -4012,11 +3958,7 @@ py_test( srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", tags = [ - "manual", - "no_cuda_on_cpu_tap", - "no_oss", "no_windows", - "notap", ], deps = [ ":client", @@ -4039,6 +3981,7 @@ py_test( size = "small", srcs = ["training/checkpoint_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], deps = [ ":checkpoint_ops_gen", ":client", @@ -4079,7 +4022,10 @@ py_test( size = "medium", srcs = ["training/monitored_session_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], # b/67945581 + tags = [ + "no_windows", + "notsan", # b/67945581 + ], deps = [ ":array_ops", ":client_testlib", diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index c60f6923900fb1..512d292ee2ffa3 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -913,7 +913,6 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python:variables", ], - tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 5245a050a1ed87..9fcbd4ff77dad0 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -139,8 +139,8 @@ def __init__(self, model_fn, model_dir=None, config=None, params=None, to configure Estimators from hyper parameter tuning. * `config`: Optional configuration object. Will receive what is passed to Estimator in `config` parameter, or the default `config`. - Allows updating things in your `model_fn` based on - configuration such as `num_ps_replicas`, or `model_dir`. + Allows updating things in your model_fn based on configuration + such as `num_ps_replicas`, or `model_dir`. * Returns: `EstimatorSpec` @@ -301,11 +301,11 @@ def train(self, * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where `features` is a `Tensor` or a - dictionary of string feature name to `Tensor` and `labels` is a + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a `Tensor` or a dictionary of string label name to `Tensor`. Both - `features` and `labels` are consumed by `model_fn`. They should - satisfy the expectation of `model_fn` from inputs. + features and labels are consumed by `model_fn`. They should satisfy + the expectation of `model_fn` from inputs. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the training loop. @@ -381,11 +381,11 @@ def evaluate(self, input_fn, steps=None, hooks=None, checkpoint_path=None, * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where `features` is a `Tensor` or a - dictionary of string feature name to `Tensor` and `labels` is a + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a `Tensor` or a dictionary of string label name to `Tensor`. Both - `features` and `labels` are consumed by `model_fn`. They should - satisfy the expectation of `model_fn` from inputs. + features and labels are consumed by `model_fn`. They should satisfy + the expectation of `model_fn` from inputs. steps: Number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. @@ -457,17 +457,17 @@ def predict(self, checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. yield_single_examples: If False, yield the whole batch as returned by the - `model_fn` instead of decomposing the batch into individual elements. - This is useful if `model_fn` returns some tensors whose first dimension - is not equal to the batch size. + model_fn instead of decomposing the batch into individual elements. This + is useful if model_fn return some tensor with first dimension not + equal to the batch size Yields: Evaluated values of `predictions` tensors. Raises: - ValueError: Could not find a trained model in `model_dir`. - ValueError: If batch length of predictions is not the same and - `yield_single_examples` is True. + ValueError: Could not find a trained model in model_dir. + ValueError: if batch length of predictions are not same and + yield_single_examples is True. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index e38b765da52a7b..2cc3331a15867e 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -128,16 +128,9 @@ def __new__(cls, input_fn, max_steps=None, hooks=None): """Creates a validated `TrainSpec` instance. Args: - input_fn: A function that provides input data for training as minibatches. - See @{$get_started/premade_estimators#create_input_functions} for more - information. The function should construct and return one of - the following: - * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a - tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a - `Tensor` or a dictionary of string label name to `Tensor`. - + input_fn: Training input function returning a tuple of: + features - `Tensor` or dictionary of string feature name to `Tensor`. + labels - `Tensor` or dictionary of `Tensor` with labels. max_steps: Int. Positive number of total steps for which to train model. If `None`, train forever. The training `input_fn` is not expected to generate `OutOfRangeError` or `StopIteration` exceptions. See the @@ -192,16 +185,9 @@ def __new__(cls, """Creates a validated `EvalSpec` instance. Args: - input_fn: A function that constructs the input data for evaluation. - See @{$get_started/premade_estimators#create_input_functions} for more - information. The function should construct and return one of - the following: - * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a - tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a - `Tensor` or a dictionary of string label name to `Tensor`. - + input_fn: Evaluation input function returning a tuple of: + features - `Tensor` or dictionary of string feature name to `Tensor`. + labels - `Tensor` or dictionary of `Tensor` with labels. steps: Int. Positive number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. See `Estimator.evaluate` for details. diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index f74881f179115e..eef91e9c5b3b6f 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -636,10 +636,7 @@ py_test( size = "small", srcs = ["_impl/keras/utils/io_utils_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_windows", # TODO: needs investigation on Windows - "notsan", - ], + tags = ["notsan"], deps = [ ":keras", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index f27ca5c2051be8..5b0c38fa5d43c3 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -295,6 +295,7 @@ tf_py_test( "//tensorflow/python:nn_grad", ], data = ["//tensorflow/core:image_testdata"], + tags = ["no_windows"], ) tf_py_test( @@ -1138,6 +1139,7 @@ tf_py_test( "//tensorflow/python:variables", ], data = ["//tensorflow/core:lmdb_testdata"], + tags = ["no_windows"], ) cuda_py_test( @@ -2327,6 +2329,7 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 4, + tags = ["no_windows"], ) cuda_py_test( @@ -2457,6 +2460,7 @@ cuda_py_test( "//tensorflow/python/eager:context", ], shard_count = 10, + tags = ["no_windows"], ) cuda_py_test( diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index 25525cc1285d8d..f4fe01f868da25 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -970,7 +970,7 @@ def _RunAndVerifyBackpropFilterDilation(self, input_sizes, filter_sizes, self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 6, 1], @@ -984,7 +984,7 @@ def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): err=1e-5) def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -998,7 +998,7 @@ def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self): err=1e-5) def testConv2DEmptyBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -1012,7 +1012,7 @@ def testConv2DEmptyBackpropFilterDilation1x2(self): err=1e-5) def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 4, 3], @@ -1026,7 +1026,7 @@ def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self): err=1e-5) def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 3, 1], @@ -1040,7 +1040,7 @@ def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 6, 1], @@ -1054,7 +1054,7 @@ def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): err=1e-5) def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 2, 3, 1], @@ -1068,7 +1068,7 @@ def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self): err=1e-5) def testConv2DEmptyBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[0, 2, 3, 1], @@ -1082,7 +1082,7 @@ def testConv2DEmptyBackpropInputDilation1x2(self): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): # The GPU version of this test is not very stable. So adjusting the # error threshold to 1e-4. @@ -1098,7 +1098,7 @@ def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self): err=1e-4) def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self): - if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): + if test.is_gpu_available(cuda_only=True): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 3, 1], diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py index f0beabb4e20e4e..96c9718b83a485 100644 --- a/tensorflow/python/kernel_tests/depthtospace_op_test.py +++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py @@ -35,8 +35,8 @@ class DepthToSpaceTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): - input_nhwc = math_ops.cast(inputs, dtype) + def _testOne(self, inputs, block_size, outputs): + input_nhwc = math_ops.to_float(inputs) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.depth_to_space(input_nhwc, block_size) @@ -59,12 +59,6 @@ def testBasic(self): x_out = [[[[1], [2]], [[3], [4]]]] self._testOne(x_np, block_size, x_out) - def testBasicFloat16(self): - x_np = [[[[1, 2, 3, 4]]]] - block_size = 2 - x_out = [[[[1], [2]], [[3], [4]]]] - self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) - # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testBlockSize2(self): diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index cd90d16aacb432..b76135764f4494 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -34,8 +34,8 @@ class SpaceToDepthTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): - input_nhwc = math_ops.cast(inputs, dtype) + def _testOne(self, inputs, block_size, outputs): + input_nhwc = math_ops.to_float(inputs) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.space_to_depth(input_nhwc, block_size) @@ -58,12 +58,6 @@ def testBasic(self): x_out = [[[[1, 2, 3, 4]]]] self._testOne(x_np, block_size, x_out) - def testBasicFloat16(self): - x_np = [[[[1], [2]], [[3], [4]]]] - block_size = 2 - x_out = [[[[1, 2, 3, 4]]]] - self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) - # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testLargerInput2x2(self): diff --git a/tensorflow/python/lib/io/file_io_test.py b/tensorflow/python/lib/io/file_io_test.py index 223858edfa84ea..a751607aaa1f47 100644 --- a/tensorflow/python/lib/io/file_io_test.py +++ b/tensorflow/python/lib/io/file_io_test.py @@ -485,11 +485,6 @@ def testEof(self): f.flush() self.assertEqual(content, f.read(len(content) + 1)) - def testUTF8StringPathExists(self): - file_path = os.path.join(self._base_dir, "UTF8测试_file_exist") - file_io.write_string_to_file(file_path, "testing") - v = file_io.file_exists(file_path) - self.assertEqual(v, True) if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 87fe253f182382..fb3fe77b4ddfbe 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -699,7 +699,7 @@ def convolution( `padded_input` is obtained by zero padding the input using an effective spatial filter shape of `(spatial_filter_shape-1) * dilation_rate + 1` and output striding `strides` as described in the - @{$python/nn#Convolution$comment here}. + @{tf.nn.convolution$comment here}. In the case that `data_format` does start with `"NC"`, the `input` and output (but not the `filter`) are simply transposed as follows: diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 42af7f8b274c55..c59eccc174091f 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -867,7 +867,7 @@ def raw_rnn(cell, loop_fn, ```python time = tf.constant(0, dtype=tf.int32) - (finished, next_input, initial_state, emit_structure, loop_state) = loop_fn( + (finished, next_input, initial_state, _, loop_state) = loop_fn( time=time, cell_output=None, cell_state=None, loop_state=None) emit_ta = TensorArray(dynamic_size=True, dtype=initial_state.dtype) state = initial_state @@ -878,7 +878,7 @@ def raw_rnn(cell, loop_fn, loop_state=loop_state) # Emit zeros and copy forward state for minibatch entries that are finished. state = tf.where(finished, state, next_state) - emit = tf.where(finished, tf.zeros_like(emit_structure), emit) + emit = tf.where(finished, tf.zeros_like(emit), emit) emit_ta = emit_ta.write(time, emit) # If any new minibatch entries are marked as finished, mark these. finished = tf.logical_or(finished, next_finished) @@ -938,15 +938,10 @@ def loop_fn(time, cell_output, cell_state, loop_state): and `emit_output`: the output to store for this iteration. Note that `emit_output` should be a `Tensor` or (possibly nested) - tuple of tensors which is aggregated in the `emit_ta` inside the - `while_loop`. For the first call to `loop_fn`, the `emit_output` - corresponds to the `emit_structure` which is then used to determine the - size of the `zero_tensor` for the `emit_ta` (defaults to - `cell.output_size`). For the subsequent calls to the `loop_fn`, the - `emit_output` corresponds to the actual output tensor - that is to be aggregated in the `emit_ta`. The parameter `cell_state` - and output `next_cell_state` may be either a single or (possibly nested) - tuple of tensors. The parameter `loop_state` and + tuple of tensors with shapes and structure matching `cell.output_size` + and `cell_output` above. The parameter `cell_state` and output + `next_cell_state` may be either a single or (possibly nested) tuple + of tensors. The parameter `loop_state` and output `next_loop_state` may be either a single or (possibly nested) tuple of `Tensor` and `TensorArray` objects. This last parameter may be ignored by `loop_fn` and the return value may be `None`. If it diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py index 5e2146b79f08e6..6d7eaababcd94d 100644 --- a/tensorflow/python/ops/special_math_ops.py +++ b/tensorflow/python/ops/special_math_ops.py @@ -163,7 +163,7 @@ def einsum(equation, *inputs, **kwargs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) + match = re.match('([a-z,]+)(->[a-z]*)?', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) @@ -402,7 +402,7 @@ def _exponential_space_einsum(equation, *inputs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) + match = re.match('([a-z,]+)(->[a-z]*)?', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py index d7c3a7e8dc7c2a..2c212f45483eac 100644 --- a/tensorflow/python/ops/special_math_ops_test.py +++ b/tensorflow/python/ops/special_math_ops_test.py @@ -192,9 +192,6 @@ class EinsumTest(test.TestCase): 'abc,cba', 'dba,ead,cad->bce', 'aef,fbc,dca->bde', - 'iJ,Jk->ik', - 'iJ,Ki->JK', - 'iJk,Jklm->Jk' ] long_cases = [ @@ -211,8 +208,6 @@ class EinsumTest(test.TestCase): 'ijk ijk', 'ij.jk->ik', 'ij...,jk...->ik...', - 'ij,k ->kji', - 'ij,k-> kji', # axis in output that does not exist 'ij,jk->im', diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index e9f1def48c462d..a52f325ddbcd90 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -56,6 +56,8 @@ from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as saver_lib +FLAGS = None + def freeze_graph_with_def_protos(input_graph_def, input_saver_def, @@ -254,24 +256,25 @@ def freeze_graph(input_graph, checkpoint_version=checkpoint_version) -def main(unused_args, flags): - if flags.checkpoint_version == 1: +def main(unused_args): + if FLAGS.checkpoint_version == 1: checkpoint_version = saver_pb2.SaverDef.V1 - elif flags.checkpoint_version == 2: + elif FLAGS.checkpoint_version == 2: checkpoint_version = saver_pb2.SaverDef.V2 else: print("Invalid checkpoint version (must be '1' or '2'): %d" % - flags.checkpoint_version) + FLAGS.checkpoint_version) return -1 - freeze_graph(flags.input_graph, flags.input_saver, flags.input_binary, - flags.input_checkpoint, flags.output_node_names, - flags.restore_op_name, flags.filename_tensor_name, - flags.output_graph, flags.clear_devices, flags.initializer_nodes, - flags.variable_names_whitelist, flags.variable_names_blacklist, - flags.input_meta_graph, flags.input_saved_model_dir, - flags.saved_model_tags, checkpoint_version) + freeze_graph(FLAGS.input_graph, FLAGS.input_saver, FLAGS.input_binary, + FLAGS.input_checkpoint, FLAGS.output_node_names, + FLAGS.restore_op_name, FLAGS.filename_tensor_name, + FLAGS.output_graph, FLAGS.clear_devices, FLAGS.initializer_nodes, + FLAGS.variable_names_whitelist, FLAGS.variable_names_blacklist, + FLAGS.input_meta_graph, FLAGS.input_saved_model_dir, + FLAGS.saved_model_tags, checkpoint_version) + -def run_main(): +if __name__ == "__main__": parser = argparse.ArgumentParser() parser.register("type", "bool", lambda v: v.lower() == "true") parser.add_argument( @@ -373,10 +376,5 @@ def run_main(): separated by \',\'. For tag-set contains multiple tags, all tags \ must be passed in.\ """) - flags, unparsed = parser.parse_known_args() - - my_main = lambda unused_args: main(unused_args, flags) - app.run(main=my_main, argv=[sys.argv[0]] + unparsed) - -if __name__ == '__main__': - run_main() + FLAGS, unparsed = parser.parse_known_args() + app.run(main=main, argv=[sys.argv[0]] + unparsed) diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index b88be4ae04d5dc..b0e9e3e5ed2117 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -38,15 +38,11 @@ from tensorflow.core.framework import types_pb2 from tensorflow.python.client import session from tensorflow.python.debug.wrappers import local_cli_wrapper -from tensorflow.python.framework import meta_graph as meta_graph_lib from tensorflow.python.framework import ops as ops_lib from tensorflow.python.platform import app # pylint: disable=unused-import from tensorflow.python.saved_model import loader from tensorflow.python.tools import saved_model_utils -# Set of ops to blacklist. -_OP_BLACKLIST = set(['WriteFile', 'ReadFile']) - def _show_tag_sets(saved_model_dir): """Prints the tag-sets stored in SavedModel directory. @@ -246,27 +242,6 @@ def get_signature_def_map(saved_model_dir, tag_set): return meta_graph.signature_def -def scan_meta_graph_def(meta_graph_def): - """Scans meta_graph_def and reports if there are ops on blacklist. - - Print ops if they are on black list, or print success if no blacklisted ops - found. - - Args: - meta_graph_def: MetaGraphDef protocol buffer. - """ - all_ops_set = set( - meta_graph_lib.ops_used_by_graph_def(meta_graph_def.graph_def)) - blacklisted_ops = _OP_BLACKLIST & all_ops_set - if blacklisted_ops: - # TODO(yifeif): print more warnings - print('MetaGraph with tag set %s contains the following blacklisted ops:' % - meta_graph_def.meta_info_def.tags, blacklisted_ops) - else: - print('MetaGraph with tag set %s does not contain blacklisted ops.' % - meta_graph_def.meta_info_def.tags) - - def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key, input_tensor_key_feed_dict, outdir, overwrite_flag, tf_debug=False): @@ -634,21 +609,6 @@ def run(args): args.overwrite, tf_debug=args.tf_debug) -def scan(args): - """Function triggered by scan command. - - Args: - args: A namespace parsed from command line. - """ - if args.tag_set: - scan_meta_graph_def( - saved_model_utils.get_meta_graph_def(args.dir, args.tag_set)) - else: - saved_model = reader.read_saved_model(args.dir) - for meta_graph_def in saved_model.meta_graphs: - scan_meta_graph_def(meta_graph_def) - - def create_parser(): """Creates a parser that parse the command line arguments. @@ -770,26 +730,6 @@ def create_parser(): 'SavedModel.') parser_run.set_defaults(func=run) - # scan command - scan_msg = ('Usage example:\n' - 'To scan for blacklisted ops in SavedModel:\n' - '$saved_model_cli scan --dir /tmp/saved_model\n' - 'To scan a specific MetaGraph, pass in --tag_set\n') - parser_scan = subparsers.add_parser( - 'scan', - description=scan_msg, - formatter_class=argparse.RawTextHelpFormatter) - parser_scan.add_argument( - '--dir', - type=str, - required=True, - help='directory containing the SavedModel to execute') - parser_scan.add_argument( - '--tag_set', - type=str, - help='tag-set of graph in SavedModel to scan, separated by \',\'') - parser_scan.set_defaults(func=scan) - return parser diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index eedc893a38d3d0..f99c8448458078 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -525,28 +525,6 @@ def fake_wrapper_session(sess): y_expected = np.array([[2.5], [3.0]]) self.assertAllClose(y_expected, y_actual) - def testScanCommand(self): - self.parser = saved_model_cli.create_parser() - base_path = test.test_src_dir_path(SAVED_MODEL_PATH) - args = self.parser.parse_args(['scan', '--dir', base_path]) - with captured_output() as (out, _): - saved_model_cli.scan(args) - output = out.getvalue().strip() - self.assertTrue('does not contain blacklisted ops' in output) - - def testScanCommandFoundBlacklistedOp(self): - self.parser = saved_model_cli.create_parser() - base_path = test.test_src_dir_path(SAVED_MODEL_PATH) - args = self.parser.parse_args( - ['scan', '--dir', base_path, '--tag_set', 'serve']) - op_blacklist = saved_model_cli._OP_BLACKLIST - saved_model_cli._OP_BLACKLIST = set(['VariableV2']) - with captured_output() as (out, _): - saved_model_cli.scan(args) - saved_model_cli._OP_BLACKLIST = op_blacklist - output = out.getvalue().strip() - self.assertTrue('\'VariableV2\'' in output) - if __name__ == '__main__': test.main() diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 03e3e0857f9f70..0b3b060fe75e10 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -274,8 +274,7 @@ CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 7000 #define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \ - __macro(cudnnSetConvolutionMathType) \ - __macro(cudnnSetRNNMatrixMathType) + __macro(cudnnSetConvolutionMathType) // clang-format on CUDNN_DNN_ROUTINE_EACH_R7(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -587,19 +586,6 @@ static bool TensorOpMathEnabled() { return is_enabled; } -// A helper function to decide whether to enable the TENSOR_OP_MATH math type -// for RNNs. -static bool RnnTensorOpMathEnabled() { - static bool is_enabled = [] { - bool is_disabled = false; - TF_CHECK_OK( - tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH", - /*default_val=*/false, &is_disabled)); - return !is_disabled; - }(); - return is_enabled; -} - // A helper function to decide whether to use CUDNN_BATCHNORM_SPATIAL_PERSISTENT // in batchnorm. This mode can be faster in some tasks because an optimized path // may be selected for CUDNN_DATA_FLOAT and CUDNN_DATA_HALF data types, compute @@ -1138,9 +1124,6 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { SetFailure(cudnn_params_desc_->Status()); return; } - if (data_type == CUDNN_DATA_HALF) { - set_use_tensor_op_math(true); - } } ~CudnnRnnDescriptor() override { if (rnn_desc_) { @@ -1149,20 +1132,6 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { CUDNN_RETURN_IF_FAIL(status, "Unable to destroy RNN descriptor"); } } - void set_use_tensor_op_math(bool use_tensor_op_math) { -#if CUDNN_VERSION >= 7000 - cudnnMathType_t math_type = - (use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH); - if (RnnTensorOpMathEnabled()) { - cudnnStatus_t status = - wrap::cudnnSetRNNMatrixMathType(parent_, rnn_desc_, math_type); - if (status != CUDNN_STATUS_SUCCESS) { - LOG(FATAL) << "could not set cudnn RNN math type: " - << ToString(status); - } - } -#endif - } cudnnRNNDescriptor_t handle() const { if (!ok()) return nullptr; return rnn_desc_; diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index bab1e82c86672b..9b0db8a1129cf5 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1176,20 +1176,6 @@ def tf_custom_op_library_additional_deps(): "@protobuf_archive//:protobuf_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), - ] + if_windows(["//tensorflow/python:pywrap_tensorflow_import_lib"]) - -# A list of targets that contains the implemenation of -# tf_custom_op_library_additional_deps. It's used to generate a DEF file for -# exporting symbols from _pywrap_tensorflow.dll on Windows. -def tf_custom_op_library_additional_deps_impl(): - return [ - # for @nsync//:nsync_headers - "//third_party/nsync:nsync_cpp", - # for //third_party/eigen3 - clean_dep("//third_party/eigen3"), - # for //tensorflow/core:framework_headers_lib - clean_dep("//tensorflow/core:framework"), - clean_dep("//tensorflow/core:reader_base"), ] # Traverse the dependency graph along the "deps" attribute of the @@ -1276,7 +1262,6 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): deps=deps + if_cuda(cuda_deps), data=[name + "_check_deps"], copts=tf_copts(is_external=True), - features = ["windows_export_all_symbols"], linkopts=linkopts + select({ "//conditions:default": [ "-lm", @@ -1423,8 +1408,7 @@ def tf_py_wrap_cc(name, ]) + tf_extension_copts()), linkopts=tf_extension_linkopts() + extra_linkopts, linkstatic=1, - deps=deps + extra_deps, - **kwargs) + deps=deps + extra_deps) native.genrule( name="gen_" + cc_library_pyd_name, srcs=[":" + cc_library_name], diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index baa7a0889de253..5268bba3cc102c 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -247,8 +247,6 @@ def testNewAPIBackwardsCompatibility(self): public_api_visitor = public_api.PublicAPIVisitor(visitor) public_api_visitor.do_not_descend_map['tf'].append('contrib') public_api_visitor.do_not_descend_map['tf.GPUOptions'] = ['Experimental'] - # TODO(annarev): Make slide_dataset available in API. - public_api_visitor.private_map['tf'] = ['slide_dataset'] traverse.traverse(api, public_api_visitor) proto_dict = visitor.GetProtos() diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake index d5dea4f3e41841..ec90c83aacd068 100644 --- a/tensorflow/tools/ci_build/Dockerfile.cmake +++ b/tensorflow/tools/ci_build/Dockerfile.cmake @@ -23,12 +23,11 @@ RUN /install/install_deb_packages.sh RUN apt-get update RUN apt-get install -y --no-install-recommends python-pip -RUN pip install --upgrade wheel RUN pip install --upgrade astor RUN pip install --upgrade gast RUN pip install --upgrade numpy RUN pip install --upgrade termcolor # Install golang -RUN apt-get install -t xenial-backports -y golang-1.9 -ENV PATH=${PATH}:/usr/lib/go-1.9/bin +RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable +RUN apt-get install -y golang diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 40189a6d1b1388..8b8ba31a0dda88 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -65,5 +65,4 @@ bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ - //${PY_TEST_DIR}/tensorflow/python/... \ - //${PY_TEST_DIR}/tensorflow/contrib/... + //${PY_TEST_DIR}/tensorflow/python/... diff --git a/tensorflow/tools/def_file_filter/BUILD b/tensorflow/tools/def_file_filter/BUILD deleted file mode 100644 index e69de29bb2d1d6..00000000000000 diff --git a/tensorflow/tools/def_file_filter/BUILD.tpl b/tensorflow/tools/def_file_filter/BUILD.tpl deleted file mode 100644 index 3cb72f49797d80..00000000000000 --- a/tensorflow/tools/def_file_filter/BUILD.tpl +++ /dev/null @@ -1,15 +0,0 @@ -# Description: -# Tools for filtering DEF file for TensorFlow on Windows -# -# On Windows, we use a DEF file generated by Bazel to export -# symbols from the tensorflow dynamic library(_pywrap_tensorflow.dll). -# The maximum number of symbols that can be exported per DLL is 64K, -# so we have to filter some useless symbols through this python script. - -package(default_visibility = ["//visibility:public"]) - -py_binary( - name = "def_file_filter", - srcs = ["def_file_filter.py"], - srcs_version = "PY2AND3", -) diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl deleted file mode 100644 index 8bdc03eb0f19fd..00000000000000 --- a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""def_file_filter.py - tool to filter a windows def file. - -The def file can be used to export symbols from the tensorflow dll to enable -tf.load_library(). - -Because the linker allows only 64K symbols to be exported per dll -we filter the symbols down to the essentials. The regular expressions -we use for this are specific to tensorflow. - -TODO: this works fine but there is an issue with exporting -'const char * const' and importing it from a user_ops. The problem is -on the importing end and using __declspec(dllimport) works around it. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import io -import os -import re -import subprocess -import sys -import tempfile - -# External tools we use that come with visual studio sdk -UNDNAME = "%{undname_bin_path}" - -# Exclude if matched -EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") - -# Include if matched before exclude -INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" - r"google::protobuf::internal::ArenaImpl::AllocateAligned|" # for contrib/data/_prefetching_ops - r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops - r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops - r"tensorflow::internal::LogMessage|" - r"tensorflow::internal::LogString|" - r"tensorflow::internal::CheckOpMessageBuilder|" - r"tensorflow::internal::MakeCheckOpValueString|" - r"tensorflow::internal::PickUnusedPortOrDie|" - r"tensorflow::internal::ValidateDevice|" - r"tensorflow::ops::internal::Enter|" - r"tensorflow::strings::internal::AppendPieces|" - r"tensorflow::strings::internal::CatPieces|" - r"tensorflow::io::internal::JoinPathImpl") - -# Include if matched after exclude -INCLUDE_RE = re.compile(r"^(TF_\w*)$|" - r"^(TFE_\w*)$|" - r"nsync::|" - r"tensorflow::|" - r"functor::|" - r"perftools::gputools") - -# We want to identify data members explicitly in the DEF file, so that no one -# can implicitly link against the DLL if they use one of the variables exported -# from the DLL and the header they use does not decorate the symbol with -# __declspec(dllimport). It is easier to detect what a data symbol does -# NOT look like, so doing it with the below regex. -DATA_EXCLUDE_RE = re.compile(r"[)(]|" - r"vftable|" - r"vbtable|" - r"vcall|" - r"RTTI|" - r"protobuf::internal::ExplicitlyConstructed") - -def get_args(): - """Parse command line.""" - filename_list = lambda x: x.split(";") - parser = argparse.ArgumentParser() - parser.add_argument("--input", type=filename_list, - help="paths to input def file", - required=True) - parser.add_argument("--output", help="output deffile", required=True) - parser.add_argument("--target", help="name of the target", required=True) - args = parser.parse_args() - return args - - -def main(): - """main.""" - args = get_args() - - # Pipe dumpbin to extract all linkable symbols from libs. - # Good symbols are collected in candidates and also written to - # a temp file. - candidates = [] - tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False) - for def_file_path in args.input: - def_file = open(def_file_path, 'r') - for line in def_file: - cols = line.split() - sym = cols[0] - tmpfile.file.write(sym + "\n") - candidates.append(sym) - tmpfile.file.close() - - # Run the symbols through undname to get their undecorated name - # so we can filter on something readable. - with open(args.output, "w") as def_fp: - # track dupes - taken = set() - - # Header for the def file. - def_fp.write("LIBRARY " + args.target + "\n") - def_fp.write("EXPORTS\n") - def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") - - # Each symbols returned by undname matches the same position in candidates. - # We compare on undname but use the decorated name from candidates. - dupes = 0 - proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE) - for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")): - decorated = candidates[idx] - if decorated in taken: - # Symbol is already in output, done. - dupes += 1 - continue - - if not INCLUDEPRE_RE.search(line): - if EXCLUDE_RE.search(line): - continue - if not INCLUDE_RE.search(line): - continue - - if "deleting destructor" in line: - # Some of the symbols convered by INCLUDEPRE_RE export deleting - # destructor symbols, which is a bad idea. - # So we filter out such symbols here. - continue - - if DATA_EXCLUDE_RE.search(line): - def_fp.write("\t" + decorated + "\n") - else: - def_fp.write("\t" + decorated + " DATA\n") - taken.add(decorated) - def_fp.close() - - exit_code = proc.wait() - if exit_code != 0: - print("{} failed, exit={}".format(UNDNAME, exit_code)) - return exit_code - - os.unlink(tmpfile.name) - - print("symbols={}, taken={}, dupes={}" - .format(len(candidates), len(taken), dupes)) - return 0 - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl deleted file mode 100644 index 47539b2423e602..00000000000000 --- a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl +++ /dev/null @@ -1,56 +0,0 @@ -"""Repository rule for def file filter autoconfiguration. - -This repository reuses Bazel's VC detect mechanism to find undname.exe, -which is a tool used in def_file_filter.py. - -def_file_filter.py is for filtering the DEF file for TensorFlow on Windows. -On Windows, we use a DEF file generated by Bazel to export symbols from the -tensorflow dynamic library(_pywrap_tensorflow.dll). The maximum number of -symbols that can be exported per DLL is 64K, so we have to filter some useless -symbols through this python script. - -`def_file_filter_config` depends on the following environment variables: - * `BAZEL_VC` - * `BAZEL_VS` - * `VS90COMNTOOLS` - * `VS100COMNTOOLS` - * `VS110COMNTOOLS` - * `VS120COMNTOOLS` - * `VS140COMNTOOLS` -""" - -load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_vc_path") -load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_msvc_tool") -load("@bazel_tools//tools/cpp:lib_cc_configure.bzl", "auto_configure_fail") - -def _def_file_filter_configure_impl(repository_ctx): - if repository_ctx.os.name.lower().find("windows") == -1: - repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") - repository_ctx.file("def_file_filter.py", "") - return - vc_path = find_vc_path(repository_ctx) - if vc_path == "visual-studio-not-found": - auto_configure_fail("Visual C++ build tools not found on your machine") - undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") - - repository_ctx.template( - "def_file_filter.py", - Label("//tensorflow/tools/def_file_filter:def_file_filter.py.tpl"), - { - "%{undname_bin_path}": undname_bin_path, - }) - repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") - - -def_file_filter_configure = repository_rule( - implementation = _def_file_filter_configure_impl, - environ = [ - "BAZEL_VC", - "BAZEL_VS", - "VS90COMNTOOLS", - "VS100COMNTOOLS", - "VS110COMNTOOLS", - "VS120COMNTOOLS", - "VS140COMNTOOLS" - ], -) diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index 228d5ee35d1839..c1b1f79bbd4b65 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -17,14 +17,6 @@ cesnsu model: ./local_test.sh --model_name CENSUS_WIDENDEEP -You can test specify version of TensorFlow: - -```shell -./local_test.sh ${whl_file_url} -``` - -For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu. - **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the test suite on it** diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index caae7fd5305af9..435f9d0dc9c55a 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -16,11 +16,12 @@ # # Tests distributed TensorFlow on a locally running TF GRPC cluster. # -# This script performs the following steps: -# 1) Build the docker image capable of running distributed TensorFlow in docker. +# This script peforms the following steps: +# 1) Build the docker-in-docker (dind) image capable of running docker and +# Kubernetes (k8s) cluster inside. # 2) Run a container from the aforementioned image and start docker service # in it -# 3) Call a script to launch a distributed TensorFlow GRPC cluster inside the container +# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container # and run the distributed test suite. # # Usage: local_test.sh @@ -63,9 +64,15 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" +LOCAL_K8S_CACHE=${HOME}/kubernetes -# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below -DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" +# Helper function +get_container_id_by_image_name() { + # Get the id of a container by image name + # Usage: get_docker_container_id_by_image_name + + docker ps | grep $1 | awk '{print $1}' +} # Parse input arguments LEAVE_CONTAINER_RUNNING=0 @@ -77,8 +84,7 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} - echo "use default whl file location" + die "whl file location is not specified" fi while true; do @@ -115,7 +121,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh -# Build docker image for local distributed TensorFlow cluster. +# Build docker-in-docker image for local k8s cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index cbcdbf5b807a58..3630dbd740e981 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -114,13 +114,6 @@ def configure(src_base_path, gen_path, debug=False): for target, src in link_map.items(): if src is None: open(os.path.join(gen_path, target), "w").write("") - elif not os.path.exists(src): - # Git repo is configured in a way we don't support such as having - # packed refs. Even though in a git repo, tf.__git_version__ will not - # be accurate. - # TODO(mikecase): Support grabbing git info when using packed refs. - open(os.path.join(gen_path, target), "w").write("") - spec["git"] = False else: try: # In python 3.5, symlink function exists even on Windows. But requires diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index 6e21aa28461819..b7d7fac3153678 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -178,7 +178,6 @@ tf_cc_test( "//tensorflow/core:test", "//tensorflow/core:test_main", "//tensorflow/core:testlib", - "//tensorflow/core/kernels:quantization_utils", "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/util/tensor_bundle", ], diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d86f65325be1c3..d89afe85c72883 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -182,36 +182,6 @@ Status FuseBatchNormWithConv(const NodeMatch& match, return Status::OK(); } -Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, - std::vector* new_nodes) { - // Calculate the scale and offset values to apply. - std::vector scale_values; - std::vector offset_values; - TF_RETURN_IF_ERROR( - GetScaleAndOffsetValues(match, &scale_values, &offset_values)); - - // Fuse conv weights, and set the final output node name as batch_norm_node. - const NodeDef& batch_norm_node = match.node; - const NodeMatch& batch_to_space_node_match = match.inputs[0]; - const NodeMatch& conv_node_match = batch_to_space_node_match.inputs[0]; - const NodeDef& batch_to_space_node = batch_to_space_node_match.node; - const NodeDef& conv_node = conv_node_match.node; - - string biasadd_name = conv_node.name() + "/biasadd"; - TF_RETURN_IF_ERROR( - FuseScaleOffsetToConvWeights(scale_values, offset_values, conv_node_match, - biasadd_name , new_nodes)); - - NodeDef new_batch_to_space_node = batch_to_space_node; - // reuse batch_norm node name - new_batch_to_space_node.set_name(batch_norm_node.name()); - new_batch_to_space_node.set_input(0, biasadd_name); - new_nodes->push_back(batch_to_space_node_match.inputs[1].node); - new_nodes->push_back(batch_to_space_node_match.inputs[2].node); - new_nodes->push_back(new_batch_to_space_node); - return Status::OK(); -} - Status FuseBatchNormWithConvConcat(const NodeMatch& match, std::vector* new_nodes) { // Calculate the scale and offset values to apply. @@ -314,43 +284,6 @@ Status FoldOldBatchNorms(const GraphDef& input_graph_def, current_graph_def = replaced_graph_def; } while (did_graph_change); - do { - did_graph_change = false; - GraphDef replaced_graph_def; - TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( - current_graph_def, // clang-format off - {"BatchNormWithGlobalNormalization|FusedBatchNorm", // batch_norm_node - { - {"BatchToSpaceND", // batch_to_space_node - { - {"Conv2D", // conv_node - { - {"*"}, // input_node - {"Const"}, // weights_node - } - }, - {"Const"}, // block_shape - {"Const"}, // crops - } - }, - {"Const"}, // mean_node - {"Const"}, // variance_node - {"Const"}, // beta_node - {"Const"}, // gamma_node - } - }, // clang-format on - [&did_graph_change](const NodeMatch& match, - const std::set& input_nodes, - const std::set& output_nodes, - std::vector* new_nodes) { - TF_RETURN_IF_ERROR(FuseBatchNormWithBatchToSpace(match, new_nodes)); - did_graph_change = true; - return Status::OK(); - }, - {}, &replaced_graph_def)); - current_graph_def = replaced_graph_def; - } while (did_graph_change); - do { did_graph_change = false; GraphDef replaced_graph_def; diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index 272410c693ae8d..b30ba9ac8b92db 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" -#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -299,96 +298,6 @@ class FoldOldBatchNormsTest : public ::testing::Test { } }; -void TestFoldFusedBatchNormsWithBatchToSpace() { - auto root = tensorflow::Scope::NewRootScope(); - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - - Tensor input_data(DT_FLOAT, TensorShape({2, 1, 3, 2})); - test::FillValues( - &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, - -5.0f, -3.0f, -6.0f}); - Output input_op = - Const(root.WithOpName("input_op"), Input::Initializer(input_data)); - - Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); - test::FillValues(&weights_data, - {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); - Output weights_op = - Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); - - Output conv_op = Conv2D(root.WithOpName("conv_op"), input_op, weights_op, - {1, 1, 1, 1}, "VALID"); - - Tensor block_shape_data(DT_INT32, TensorShape({2})); - test::FillValues(&block_shape_data, {1, 2}); - Output block_shape_op = - Const(root.WithOpName("block_shape_op"), Input::Initializer(block_shape_data)); - - Tensor crops_data(DT_INT32, TensorShape({2, 2})); - test::FillValues(&crops_data, {0, 0, 0, 1}); - Output crops_op = - Const(root.WithOpName("crops_op"), Input::Initializer(crops_data)); - - Output batch_to_space_op = BatchToSpaceND(root.WithOpName("batch_to_space_op"), - conv_op, block_shape_op, crops_data); - - Tensor mean_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&mean_data, {10.0f, 20.0f}); - Output mean_op = - Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); - - Tensor variance_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&variance_data, {0.25f, 0.5f}); - Output variance_op = Const(root.WithOpName("variance_op"), - Input::Initializer(variance_data)); - - Tensor beta_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&beta_data, {0.1f, 0.6f}); - Output beta_op = - Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); - - Tensor gamma_data(DT_FLOAT, TensorShape({2})); - test::FillValues(&gamma_data, {1.0f, 2.0f}); - Output gamma_op = - Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); - - GraphDef original_graph_def; - TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); - - NodeDef batch_norm_node; - batch_norm_node.set_op("FusedBatchNorm"); - batch_norm_node.set_name("output"); - AddNodeInput("batch_to_space_op", &batch_norm_node); - AddNodeInput("gamma_op", &batch_norm_node); - AddNodeInput("beta_op", &batch_norm_node); - AddNodeInput("mean_op", &batch_norm_node); - AddNodeInput("variance_op", &batch_norm_node); - SetNodeAttr("T", DT_FLOAT, &batch_norm_node); - SetNodeAttr("epsilon", 0.00001f, &batch_norm_node); - SetNodeAttr("is_training", false, &batch_norm_node); - *(original_graph_def.mutable_node()->Add()) = batch_norm_node; - - std::unique_ptr original_session(NewSession(SessionOptions())); - TF_ASSERT_OK(original_session->Create(original_graph_def)); - std::vector original_outputs; - TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); - - GraphDef fused_graph_def; - TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, - &fused_graph_def)); - - std::unique_ptr fused_session(NewSession(SessionOptions())); - TF_ASSERT_OK(fused_session->Create(fused_graph_def)); - std::vector fused_outputs; - TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); - - test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); - - for (const NodeDef& node : fused_graph_def.node()) { - EXPECT_NE("FusedBatchNormWithBatchToSpace", node.op()); - } -} - TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNorms) { TestFoldOldBatchNorms(); } @@ -406,9 +315,5 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { TestFoldFusedBatchNormsWithConcat(/*split=*/false); } -TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithBatchToSpace) { - TestFoldFusedBatchNormsWithBatchToSpace(); -} - } // namespace graph_transforms } // namespace tensorflow diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 2607b9d7049828..1833d67d82b956 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -48,65 +48,36 @@ py_binary( deps = ["//tensorflow:tensorflow_py"], ) -COMMON_PIP_DEPS = [ - ":licenses", - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/boosted_trees:boosted_trees_pip", - "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", - "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable_utils", - "//tensorflow/contrib/eager/python:evaluator", - "//tensorflow/contrib/gan:gan", - "//tensorflow/contrib/graph_editor:graph_editor_pip", - "//tensorflow/contrib/keras:keras", - "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/predictor:predictor_pip", - "//tensorflow/contrib/py2tf:py2tf", - "//tensorflow/contrib/py2tf/converters:converters", - "//tensorflow/contrib/py2tf/converters:test_lib", - "//tensorflow/contrib/py2tf/impl:impl", - "//tensorflow/contrib/py2tf/pyct:pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", - "//tensorflow/contrib/receptive_field:receptive_field_pip", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/signal:test_util", - "//tensorflow/contrib/slim:slim", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - "//tensorflow/contrib/specs:specs", - "//tensorflow/contrib/summary:summary_test_util", - "//tensorflow/contrib/tensor_forest:init_py", - "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - "//tensorflow/contrib/timeseries:timeseries_pip", - "//tensorflow/contrib/tpu", - "//tensorflow/examples/tutorials/mnist:package", - "//tensorflow/python:distributed_framework_test_lib", - "//tensorflow/python:meta_graph_testdata", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/python/saved_model:saved_model", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python:test_ops", - "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", -] - # On Windows, python binary is a zip file of runfiles tree. # Add everything to its data dependency for generating a runfiles tree # for building the pip package on Windows. py_binary( name = "simple_console_for_windows", srcs = ["simple_console_for_windows.py"], - data = COMMON_PIP_DEPS, + data = [ + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/saved_model", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/contrib/summary:summary_test_util", + # These targets don't build on Windows yet. Exclude them for now. + # "//tensorflow/contrib/slim", + # "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + # "//tensorflow/contrib/specs", + # "//tensorflow/contrib/tensor_forest:init_py", + # "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + # "//tensorflow/examples/tutorials/mnist:package", + ], srcs_version = "PY2AND3", deps = ["//tensorflow:tensorflow_py"], ) @@ -137,7 +108,6 @@ filegroup( "@highwayhash//:LICENSE", "@jemalloc//:COPYING", "@jpeg//:LICENSE.md", - "@kafka//:LICENSE", "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", @@ -167,12 +137,61 @@ sh_binary( data = select({ "//tensorflow:windows": [":simple_console_for_windows"], "//tensorflow:windows_msvc": [":simple_console_for_windows"], - "//conditions:default": COMMON_PIP_DEPS + [ + "//conditions:default": [ + ":licenses", + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", ":simple_console", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/boosted_trees:boosted_trees_pip", + "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:contrib_op_loader", + "//tensorflow/contrib/eager/python/examples:examples_pip", + "//tensorflow/contrib/eager/python:checkpointable_utils", + "//tensorflow/contrib/eager/python:evaluator", + "//tensorflow/contrib/gan:gan", + "//tensorflow/contrib/graph_editor:graph_editor_pip", + "//tensorflow/contrib/keras:keras", + "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", "//tensorflow/contrib/lite/python:interpreter_test_data", "//tensorflow/contrib/lite/toco:toco", "//tensorflow/contrib/lite/toco/python:toco_wrapper", "//tensorflow/contrib/lite/toco/python:toco_from_protos", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/py2tf:py2tf", + "//tensorflow/contrib/py2tf/converters:converters", + "//tensorflow/contrib/py2tf/converters:test_lib", + "//tensorflow/contrib/py2tf/impl:impl", + "//tensorflow/contrib/py2tf/pyct:pyct", + "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", + "//tensorflow/contrib/receptive_field:receptive_field_pip", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/signal:test_util", + "//tensorflow/contrib/slim:slim", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + "//tensorflow/contrib/specs:specs", + "//tensorflow/contrib/summary:summary_test_util", + "//tensorflow/contrib/tensor_forest:init_py", + "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + "//tensorflow/contrib/timeseries:timeseries_pip", + "//tensorflow/contrib/tpu", + "//tensorflow/examples/tutorials/mnist:package", + "//tensorflow/python:distributed_framework_test_lib", + "//tensorflow/python:meta_graph_testdata", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/python/saved_model:saved_model", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python:test_ops", + "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", ], }) + if_mkl(["//third_party/mkl:intel_binary_blob"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index e0152da4dff362..e1a5f091ba0afc 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0' +_VERSION = '1.6.0-rc1' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', @@ -72,7 +72,7 @@ # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ - 'freeze_graph = tensorflow.python.tools.freeze_graph:run_main', + 'freeze_graph = tensorflow.python.tools.freeze_graph:main', 'toco_from_protos = tensorflow.contrib.lite.toco.python.toco_from_protos:main', 'toco = tensorflow.contrib.lite.toco.python.toco_wrapper:main', 'saved_model_cli = tensorflow.python.tools.saved_model_cli:main', diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index edd093510e5483..77cc9f75f77254 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -88,7 +88,6 @@ import shutil from google.cloud import datastore -from six import text_type def is_real_file(dirpath, fname): @@ -151,7 +150,7 @@ def upload_benchmark_data(client, data): """ test_result = json.loads(data) - test_name = text_type(test_result["name"]) + test_name = unicode(test_result["name"]) start_time = datetime.datetime.utcfromtimestamp( float(test_result["startTime"])) batch = [] @@ -163,7 +162,7 @@ def upload_benchmark_data(client, data): t_val.update({ "test": test_name, "start": start_time, - "info": text_type(data) + "info": unicode(data) }) batch.append(t_val) @@ -171,7 +170,7 @@ def upload_benchmark_data(client, data): # the attribute to be fetched and displayed. The full entry information is # also stored as a non-indexed JSON blob. for ent in test_result["entries"].get("entry", []): - ent_name = text_type(ent["name"]) + ent_name = unicode(ent["name"]) e_key = client.key("Entry") e_val = datastore.Entity(e_key, exclude_from_indexes=["info"]) e_val.update({ @@ -179,7 +178,7 @@ def upload_benchmark_data(client, data): "start": start_time, "entry": ent_name, "timing": ent["wallTime"], - "info": text_type(json.dumps(ent)) + "info": unicode(json.dumps(ent)) }) batch.append(e_val) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index a922808a70f8bd..abc9eb9bc17c6f 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -12,8 +12,6 @@ load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compil load("//third_party:repo.bzl", "tf_http_archive") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") -load("//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", - "def_file_filter_configure") def _extract_version_number(bazel_version): """Extracts the semantic version number from a version string @@ -69,7 +67,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # We must check the bazel version before trying to parse any other BUILD # files, in case the parsing of those build files depends on the bazel # version we require here. - check_bazel_version_at_least("0.10.0") + check_bazel_version_at_least("0.5.4") clang6_configure(name="local_config_clang6") cuda_configure(name="local_config_cuda") tensorrt_configure(name="local_config_tensorrt") @@ -77,10 +75,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sycl_configure(name="local_config_sycl") python_configure(name="local_config_python") - # For windows bazel build - # TODO: Remove def file filter when TensorFlow can export symbols properly on Windows. - def_file_filter_configure(name = "local_config_def_file_filter") - # Point //external/local_config_arm_compiler to //external/arm_compiler arm_compiler_configure( name="local_config_arm_compiler", diff --git a/third_party/jpeg/jpeg.BUILD b/third_party/jpeg/jpeg.BUILD index 4418ac32fc4b08..87a23925c4316c 100644 --- a/third_party/jpeg/jpeg.BUILD +++ b/third_party/jpeg/jpeg.BUILD @@ -526,12 +526,12 @@ config_setting( config_setting( name = "armeabi-v7a", - values = {"cpu": "armeabi-v7a"}, + values = {"android_cpu": "armeabi-v7a"}, ) config_setting( name = "arm64-v8a", - values = {"cpu": "arm64-v8a"}, + values = {"android_cpu": "arm64-v8a"}, ) config_setting( diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index a839ca717e695f..a61a9e1f6c2b29 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -130,23 +130,18 @@ cc_library( ], hdrs = [ "config.h", - "src-cpp/rdkafkacpp.h", - "src-cpp/rdkafkacpp_int.h", - "src/lz4.c", - "src/snappy_compat.h", - ], - copts = [ - "-Iexternal/kafka/src", - "-Iexternal/kafka/src-cpp", ], defines = [ ], + includes = [ + "src", + "src-cpp", + ], linkopts = [ "-lpthread", ], visibility = ["//visibility:public"], deps = [ "@boringssl//:ssl", - "@zlib_archive//:zlib", ], ) diff --git a/third_party/py/BUILD.tpl b/third_party/py/BUILD.tpl index 1dd8ab433a37a1..de06ad5f27e7c0 100644 --- a/third_party/py/BUILD.tpl +++ b/third_party/py/BUILD.tpl @@ -2,26 +2,20 @@ licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) -# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib -# See https://docs.python.org/3/extending/windows.html -cc_import( - name = "python_lib", - interface_library = select({ - ":windows": ":python_import_lib", - # A placeholder for Unix platforms which makes --no_build happy. - "//conditions:default": "not-existing.lib", - }), - system_provided = 1, -) - cc_library( name = "python_headers", hdrs = [":python_include"], - deps = select({ - ":windows": [":python_lib"], + data = select({ + ":windows": [":python_import_lib"], "//conditions:default": [], }), includes = ["python_include"], + linkopts = select({ + # TODO(pcloudy): Ideally, this should just go into deps after resolving + # https://github.com/bazelbuild/bazel/issues/3237, + ":windows": ["$(locations :python_import_lib)"], + "//conditions:default": [], + }), ) cc_library(