setup.py

# Welcome to the PyTorch setup.py.
#
# Environment variables you are probably interested in:
#
#   DEBUG
#     build with -O0 and -g (debug symbols)
#
#   MAX_JOBS
#     maximum number of compile jobs we should use to compile your code
#
#   NO_CUDA
#     disables CUDA build
#
#   CFLAGS
#     flags to apply to both C and C++ files to be compiled (a quirk of setup.py
#     which we have faithfully adhered to in our build system is that CFLAGS
#     also applies to C++ files, in contrast to the default behavior of autogoo
#     and cmake build systems.)
#
#   CC
#     the C/C++ compiler to use (NB: the CXX flag has no effect for distutils
#     compiles, because distutils always uses CC to compile, even for C++
#     files.
#
# Environment variables for feature toggles:
#
#   NO_CUDNN
#     disables the cuDNN build
#
#   NO_TEST
#     disables the test build
#
#   NO_MIOPEN
#     disables the MIOpen build
#
#   NO_MKLDNN
#     disables the MKLDNN build
#
#   NO_NNPACK
#     disables NNPACK build
#
#   NO_DISTRIBUTED
#     disables distributed (c10d, gloo, mpi, etc.) build
#
#   NO_SYSTEM_NCCL
#     disables use of system-wide nccl (we will use our submoduled
#     copy in third_party/nccl)
#
#   USE_GLOO_IBVERBS
#     toggle features related to distributed support
#
#   USE_OPENCV
#     enables use of OpenCV for additional operators
#
#   BUILD_BINARY
#     enables the additional binaries/ build
#
#   PYTORCH_BUILD_VERSION
#   PYTORCH_BUILD_NUMBER
#     specify the version of PyTorch, rather than the hard-coded version
#     in this file; used when we're building binaries for distribution
#
#   TORCH_CUDA_ARCH_LIST
#     specify which CUDA architectures to build for.
#     ie `TORCH_CUDA_ARCH_LIST="6.0;7.0"`
#     These are not CUDA versions, instead, they specify what
#     classes of NVIDIA hardware we should generate PTX for.
#
#   ONNX_NAMESPACE
#     specify a namespace for ONNX built here rather than the hard-coded
#     one in this file; needed to build with other frameworks that share ONNX.
#
# Environment variables we respect (these environment variables are
# conventional and are often understood/set by other software.)
#
#   CUDA_HOME (Linux/OS X)
#   CUDA_PATH (Windows)
#     specify where CUDA is installed; usually /usr/local/cuda or
#     /usr/local/cuda-x.y
#
#   CUDNN_LIB_DIR
#   CUDNN_INCLUDE_DIR
#   CUDNN_LIBRARY
#     specify where cuDNN is installed
#
#   MIOPEN_LIB_DIR
#   MIOPEN_INCLUDE_DIR
#   MIOPEN_LIBRARY
#     specify where MIOpen is installed
#
#   NCCL_ROOT_DIR
#   NCCL_LIB_DIR
#   NCCL_INCLUDE_DIR
#     specify where nccl is installed
#
#   MKLDNN_LIB_DIR
#   MKLDNN_LIBRARY
#   MKLDNN_INCLUDE_DIR
#     specify where MKLDNN is installed
#
#   NVTOOLSEXT_PATH (Windows only)
#     specify where nvtoolsext is installed
#
#   LIBRARY_PATH
#   LD_LIBRARY_PATH
#     we will search for libraries in these paths

from setuptools import setup, Extension, distutils, Command, find_packages
import setuptools.command.build_ext
import setuptools.command.install
import setuptools.command.develop
import setuptools.command.build_py
import distutils.unixccompiler
import distutils.command.build
import distutils.command.clean
import distutils.sysconfig
import platform
import subprocess
import shutil
import multiprocessing
import sys
import os
import json
import glob
import importlib

from tools.setup_helpers.env import check_env_flag, check_negative_env_flag


def hotpatch_var(var, prefix='USE_'):
    if check_env_flag('NO_' + var):
        os.environ[prefix + var] = '0'
    elif check_negative_env_flag('NO_' + var):
        os.environ[prefix + var] = '1'
    elif check_env_flag('WITH_' + var):
        os.environ[prefix + var] = '1'
    elif check_negative_env_flag('WITH_' + var):
        os.environ[prefix + var] = '0'

# Before we run the setup_helpers, let's look for NO_* and WITH_*
# variables and hotpatch environment with the USE_* equivalent
use_env_vars = ['CUDA', 'CUDNN', 'MIOPEN', 'MKLDNN', 'NNPACK', 'DISTRIBUTED',
                'OPENCV', 'SYSTEM_NCCL', 'GLOO_IBVERBS']
list(map(hotpatch_var, use_env_vars))

# Also hotpatch a few with BUILD_* equivalent
build_env_vars = ['BINARY', 'TEST']
[hotpatch_var(v, 'BUILD_') for v in build_env_vars]

from tools.setup_helpers.cuda import USE_CUDA, CUDA_HOME, CUDA_VERSION
from tools.setup_helpers.build import BUILD_BINARY, BUILD_TEST, USE_OPENCV
from tools.setup_helpers.rocm import USE_ROCM, ROCM_HOME, ROCM_VERSION
from tools.setup_helpers.cudnn import (USE_CUDNN, CUDNN_LIBRARY,
                                       CUDNN_LIB_DIR, CUDNN_INCLUDE_DIR)
from tools.setup_helpers.miopen import (USE_MIOPEN, MIOPEN_LIBRARY,
                                        MIOPEN_LIB_DIR, MIOPEN_INCLUDE_DIR)
from tools.setup_helpers.nccl import USE_NCCL, USE_SYSTEM_NCCL, NCCL_LIB_DIR, \
    NCCL_INCLUDE_DIR, NCCL_ROOT_DIR, NCCL_SYSTEM_LIB
from tools.setup_helpers.mkldnn import (USE_MKLDNN, MKLDNN_LIBRARY,
                                        MKLDNN_LIB_DIR, MKLDNN_INCLUDE_DIR)
from tools.setup_helpers.nnpack import USE_NNPACK
from tools.setup_helpers.nvtoolext import NVTOOLEXT_HOME
from tools.setup_helpers.generate_code import generate_code
from tools.setup_helpers.ninja_builder import NinjaBuilder, ninja_build_ext
from tools.setup_helpers.dist_check import USE_DISTRIBUTED, \
    USE_GLOO_IBVERBS

################################################################################
# Parameters parsed from environment
################################################################################

DEBUG = check_env_flag('DEBUG')
IS_WINDOWS = (platform.system() == 'Windows')
IS_DARWIN = (platform.system() == 'Darwin')
IS_LINUX = (platform.system() == 'Linux')

BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH')
USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK')

NUM_JOBS = multiprocessing.cpu_count()
max_jobs = os.getenv("MAX_JOBS")
if max_jobs is not None:
    NUM_JOBS = min(NUM_JOBS, int(max_jobs))

ONNX_NAMESPACE = os.getenv("ONNX_NAMESPACE")
if not ONNX_NAMESPACE:
    ONNX_NAMESPACE = "onnx_torch"

# Ninja
try:
    import ninja
    USE_NINJA = True
    ninja_global = NinjaBuilder('global')
except ImportError:
    USE_NINJA = False
    ninja_global = None

# Constant known variables used throughout this file
cwd = os.path.dirname(os.path.abspath(__file__))
lib_path = os.path.join(cwd, "torch", "lib")
third_party_path = os.path.join(cwd, "third_party")
tmp_install_path = lib_path + "/tmp_install"
caffe2_build_dir = os.path.join(cwd, "build")
# lib/pythonx.x/site-packages
rel_site_packages = distutils.sysconfig.get_python_lib(prefix='')
# full absolute path to the dir above
full_site_packages = distutils.sysconfig.get_python_lib()


class PytorchCommand(setuptools.Command):
    """
    Base Pytorch command to avoid implementing initialize/finalize_options in
    every subclass
    """
    user_options = []

    def initialize_options(self):
        pass

    def finalize_options(self):
        pass


################################################################################
# Patches and workarounds
################################################################################
# Monkey-patch setuptools to compile in parallel
if not USE_NINJA:
    def parallelCCompile(self, sources, output_dir=None, macros=None,
                         include_dirs=None, debug=0, extra_preargs=None,
                         extra_postargs=None, depends=None):
        # those lines are copied from distutils.ccompiler.CCompiler directly
        macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
            output_dir, macros, include_dirs, sources, depends, extra_postargs)
        cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)

        # compile using a thread pool
        import multiprocessing.pool

        def _single_compile(obj):
            src, ext = build[obj]
            self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
        multiprocessing.pool.ThreadPool(NUM_JOBS).map(_single_compile, objects)

        return objects
    distutils.ccompiler.CCompiler.compile = parallelCCompile

# Patch for linking with ccache
original_link = distutils.unixccompiler.UnixCCompiler.link


def patched_link(self, *args, **kwargs):
    _cxx = self.compiler_cxx
    self.compiler_cxx = None
    result = original_link(self, *args, **kwargs)
    self.compiler_cxx = _cxx
    return result

distutils.unixccompiler.UnixCCompiler.link = patched_link

# Workaround setuptools -Wstrict-prototypes warnings
# I lifted this code from https://stackoverflow.com/a/29634231/23845
cfg_vars = distutils.sysconfig.get_config_vars()
for key, value in cfg_vars.items():
    if type(value) == str:
        cfg_vars[key] = value.replace("-Wstrict-prototypes", "")


################################################################################
# Version, create_version_file, and package_name
################################################################################
package_name = os.getenv('TORCH_PACKAGE_NAME', 'torch')
version = '0.5.0a0'
if os.getenv('PYTORCH_BUILD_VERSION'):
    assert os.getenv('PYTORCH_BUILD_NUMBER') is not None
    build_number = int(os.getenv('PYTORCH_BUILD_NUMBER'))
    version = os.getenv('PYTORCH_BUILD_VERSION')
    if build_number > 1:
        version += '.post' + str(build_number)
else:
    try:
        sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip()
        version += '+' + sha[:7]
    except Exception:
        pass
print("Building wheel {}-{}".format(package_name, version))


class create_version_file(PytorchCommand):
    def run(self):
        global version, cwd
        print('-- Building version ' + version)
        version_path = os.path.join(cwd, 'torch', 'version.py')
        with open(version_path, 'w') as f:
            f.write("__version__ = '{}'\n".format(version))
            # NB: This is not 100% accurate, because you could have built the
            # library code with DEBUG, but csrc without DEBUG (in which case
            # this would claim to be a release build when it's not.)
            f.write("debug = {}\n".format(repr(DEBUG)))
            f.write("cuda = {}\n".format(repr(CUDA_VERSION)))


################################################################################
# Building dependent libraries
################################################################################

# All libraries that torch could depend on
dep_libs = [
    'nccl', 'caffe2',
    'libshm', 'libshm_windows', 'gloo', 'THD', 'c10d',
]

missing_pydep = '''
Missing build dependency: Unable to `import {importname}`.
Please install it via `conda install {module}` or `pip install {module}`
'''.strip()


def check_pydep(importname, module):
    try:
        importlib.import_module(importname)
    except ImportError:
        raise RuntimeError(missing_pydep.format(importname=importname, module=module))


# Calls build_pytorch_libs.sh/bat with the correct env variables
def build_libs(libs):
    for lib in libs:
        assert lib in dep_libs, 'invalid lib: {}'.format(lib)
    if IS_WINDOWS:
        build_libs_cmd = ['tools\\build_pytorch_libs.bat']
    else:
        build_libs_cmd = ['bash', os.path.join('..', 'tools', 'build_pytorch_libs.sh')]
    my_env = os.environ.copy()
    my_env["PYTORCH_PYTHON"] = sys.executable
    my_env["CMAKE_PREFIX_PATH"] = full_site_packages
    my_env["NUM_JOBS"] = str(NUM_JOBS)
    my_env["ONNX_NAMESPACE"] = ONNX_NAMESPACE
    if not IS_WINDOWS:
        if USE_NINJA:
            my_env["CMAKE_GENERATOR"] = '-GNinja'
            my_env["CMAKE_INSTALL"] = 'ninja install'
        else:
            my_env['CMAKE_GENERATOR'] = ''
            my_env['CMAKE_INSTALL'] = 'make install'
    if USE_SYSTEM_NCCL:
        my_env["NCCL_ROOT_DIR"] = NCCL_ROOT_DIR
    if USE_CUDA:
        my_env["CUDA_BIN_PATH"] = CUDA_HOME
        build_libs_cmd += ['--use-cuda']
        if IS_WINDOWS:
            my_env["NVTOOLEXT_HOME"] = NVTOOLEXT_HOME
    if USE_CUDA_STATIC_LINK:
        build_libs_cmd += ['--cuda-static-link']
    if USE_ROCM:
        build_libs_cmd += ['--use-rocm']
    if USE_NNPACK:
        build_libs_cmd += ['--use-nnpack']
    if USE_CUDNN:
        my_env["CUDNN_LIB_DIR"] = CUDNN_LIB_DIR
        my_env["CUDNN_LIBRARY"] = CUDNN_LIBRARY
        my_env["CUDNN_INCLUDE_DIR"] = CUDNN_INCLUDE_DIR
    if USE_MIOPEN:
        my_env["MIOPEN_LIB_DIR"] = MIOPEN_LIB_DIR
        my_env["MIOPEN_LIBRARY"] = MIOPEN_LIBRARY
        my_env["MIOPEN_INCLUDE_DIR"] = MIOPEN_INCLUDE_DIR
    if USE_MKLDNN:
        my_env["MKLDNN_LIB_DIR"] = MKLDNN_LIB_DIR
        my_env["MKLDNN_LIBRARY"] = MKLDNN_LIBRARY
        my_env["MKLDNN_INCLUDE_DIR"] = MKLDNN_INCLUDE_DIR
        build_libs_cmd += ['--use-mkldnn']
    if USE_GLOO_IBVERBS:
        build_libs_cmd += ['--use-gloo-ibverbs']

    my_env["BUILD_TORCH"] = "ON"
    my_env["BUILD_PYTHON"] = "ON"
    my_env["BUILD_BINARY"] = "ON" if BUILD_BINARY else "OFF"
    my_env["BUILD_TEST"] = "ON" if BUILD_TEST else "OFF"
    my_env["INSTALL_TEST"] = "ON" if BUILD_TEST else "OFF"
    my_env["USE_OPENCV"] = "ON" if USE_OPENCV else "OFF"

    try:
        os.mkdir('build')
    except OSError:
        pass

    kwargs = {'cwd': 'build'} if not IS_WINDOWS else {}

    if subprocess.call(build_libs_cmd + libs, env=my_env, **kwargs) != 0:
        print("Failed to run '{}'".format(' '.join(build_libs_cmd + libs)))
        sys.exit(1)


# Build all dependent libraries
class build_deps(PytorchCommand):
    def run(self):
        # Check if you remembered to check out submodules
        def check_file(f):
            if not os.path.exists(f):
                print("Could not find {}".format(f))
                print("Did you run 'git submodule update --init'?")
                sys.exit(1)
        check_file(os.path.join(third_party_path, "gloo", "CMakeLists.txt"))
        check_file(os.path.join(third_party_path, "pybind11", "CMakeLists.txt"))
        check_file(os.path.join(third_party_path, 'cpuinfo', 'CMakeLists.txt'))
        check_file(os.path.join(third_party_path, 'catch', 'CMakeLists.txt'))
        check_file(os.path.join(third_party_path, 'onnx', 'CMakeLists.txt'))

        check_pydep('yaml', 'pyyaml')
        check_pydep('typing', 'typing')

        libs = []
        if USE_NCCL and not USE_SYSTEM_NCCL:
            libs += ['nccl']
        libs += ['caffe2']
        if IS_WINDOWS:
            libs += ['libshm_windows']
        else:
            libs += ['libshm']
        if USE_DISTRIBUTED:
            if IS_LINUX:
                libs += ['gloo']
                # TODO: make c10d build without CUDA
                if USE_CUDA:
                    libs += ['c10d']
            libs += ['THD']
        build_libs(libs)

        # Use copies instead of symbolic files.
        # Windows has very poor support for them.
        sym_files = ['tools/shared/cwrap_common.py', 'tools/shared/_utils_internal.py']
        orig_files = ['aten/src/ATen/common_with_cwrap.py', 'torch/_utils_internal.py']
        for sym_file, orig_file in zip(sym_files, orig_files):
            if os.path.exists(sym_file):
                os.remove(sym_file)
            shutil.copyfile(orig_file, sym_file)

        # Copy headers necessary to compile C++ extensions.
        #
        # This is not perfect solution as build does not depend on any of
        # the auto-generated code and auto-generated files will not be
        # included in this copy. If we want to use auto-generated files,
        # we need to find a better way to do this.
        # More information can be found in conversation thread of PR #5772

        self.copy_tree('torch/csrc', 'torch/lib/include/torch/csrc/')
        self.copy_tree('third_party/pybind11/include/pybind11/',
                       'torch/lib/include/pybind11')
        self.copy_file('torch/csrc/torch.h', 'torch/lib/include/torch/torch.h')
        self.copy_file('torch/op.h', 'torch/lib/include/torch/op.h')


build_dep_cmds = {}

for lib in dep_libs:
    # wrap in function to capture lib
    class build_dep(build_deps):
        description = 'Build {} external library'.format(lib)

        def run(self):
            build_libs([self.lib])
    build_dep.lib = lib
    build_dep_cmds['build_' + lib.lower()] = build_dep


class build_module(PytorchCommand):
    def run(self):
        self.run_command('build_py')
        self.run_command('build_ext')


class build_py(setuptools.command.build_py.build_py):

    def run(self):
        self.run_command('create_version_file')
        setuptools.command.build_py.build_py.run(self)


class develop(setuptools.command.develop.develop):

    def run(self):
        self.run_command('create_version_file')
        setuptools.command.develop.develop.run(self)
        self.create_compile_commands()

        # Copy Caffe2's Python proto files (generated during the build with the
        # protobuf python compiler) from the build folder to the root folder
        # cp root/build/caffe2/proto/proto.py root/caffe2/proto/proto.py
        for src in glob.glob(
                os.path.join(caffe2_build_dir, 'caffe2', 'proto', '*.py')):
            dst = os.path.join(
                cwd, os.path.relpath(src, caffe2_build_dir))
            self.copy_file(src, dst)

    def create_compile_commands(self):
        def load(filename):
            with open(filename) as f:
                return json.load(f)
        ninja_files = glob.glob('build/*compile_commands.json')
        cmake_files = glob.glob('torch/lib/build/*/compile_commands.json')
        all_commands = [entry
                        for f in ninja_files + cmake_files
                        for entry in load(f)]
        with open('compile_commands.json', 'w') as f:
            json.dump(all_commands, f, indent=2)
        if not USE_NINJA:
            print("WARNING: 'develop' is not building C++ code incrementally")
            print("because ninja is not installed. Run this to enable it:")
            print(" > pip install ninja")


def monkey_patch_THD_link_flags():
    '''
    THD's dynamic link deps are not determined until after build_deps is run
    So, we need to monkey-patch them in later
    '''
    # read tmp_install_path/THD_deps.txt for THD's dynamic linkage deps
    with open(tmp_install_path + '/THD_deps.txt', 'r') as f:
        thd_deps_ = f.read()
    thd_deps = []
    # remove empty lines
    for l in thd_deps_.split(';'):
        if l != '':
            thd_deps.append(l)

    C.extra_link_args += thd_deps


def monkey_patch_C10D_inc_flags():
    '''
    C10D's include deps are not determined until after build c10d is run, so
    we need to monkey-patch it.
    '''
    mpi_include_path_file = tmp_install_path + "/include/c10d/mpi_include_path"
    if os.path.exists(mpi_include_path_file):
        with open(mpi_include_path_file, 'r') as f:
            mpi_include_paths = f.readlines()
        mpi_include_paths = [p.strip() for p in mpi_include_paths]
        C.include_dirs += mpi_include_paths
        print("-- For c10d, will include MPI paths: {}".format(mpi_include_paths))


build_ext_parent = ninja_build_ext if USE_NINJA \
    else setuptools.command.build_ext.build_ext


class build_ext(build_ext_parent):

    def run(self):
        # Print build options
        if USE_NUMPY:
            print('-- Building with NumPy bindings')
        else:
            print('-- NumPy not found')
        if USE_CUDNN:
            print('-- Detected cuDNN at ' + CUDNN_LIBRARY + ', ' + CUDNN_INCLUDE_DIR)
        else:
            print('-- Not using cuDNN')
        if USE_MIOPEN:
            print('-- Detected MIOpen at ' + MIOPEN_LIBRARY + ', ' + MIOPEN_INCLUDE_DIR)
        else:
            print('-- Not using MIOpen')
        if USE_CUDA:
            print('-- Detected CUDA at ' + CUDA_HOME)
        else:
            print('-- Not using CUDA')
        if USE_MKLDNN:
            print('-- Detected MKLDNN at ' + MKLDNN_LIBRARY + ', ' + MKLDNN_INCLUDE_DIR)
        else:
            print('-- Not using MKLDNN')
        if USE_NCCL and USE_SYSTEM_NCCL:
            print('-- Using system provided NCCL library at ' +
                  NCCL_SYSTEM_LIB + ', ' + NCCL_INCLUDE_DIR)
        elif USE_NCCL:
            print('-- Building NCCL library')
        else:
            print('-- Not using NCCL')
        if USE_DISTRIBUTED:
            print('-- Building with THD distributed package ')
            monkey_patch_THD_link_flags()
            if IS_LINUX and USE_CUDA:
                print('-- Building with c10d distributed package ')
                monkey_patch_C10D_inc_flags()
            else:
                print('-- Building without c10d distributed package')
        else:
            print('-- Building without distributed package')

        generate_code(ninja_global)

        if USE_NINJA:
            # before we start the normal build make sure all generated code
            # gets built
            ninja_global.run()

        # It's an old-style class in Python 2.7...
        setuptools.command.build_ext.build_ext.run(self)

        # Copy the essential export library to compile C++ extensions.
        if IS_WINDOWS:
            build_temp = self.build_temp

            ext_filename = self.get_ext_filename('_C')
            lib_filename = '.'.join(ext_filename.split('.')[:-1]) + '.lib'

            export_lib = os.path.join(
                build_temp, 'torch', 'csrc', lib_filename).replace('\\', '/')

            build_lib = self.build_lib

            target_lib = os.path.join(
                build_lib, 'torch', 'lib', '_C.lib').replace('\\', '/')

            self.copy_file(export_lib, target_lib)

    def build_extensions(self):
        # The caffe2 extensions are created in
        # tmp_install/lib/pythonM.m/site-packages/caffe2/python/
        # and need to be copied to build/lib.linux.... , which will be a
        # platform dependent build folder created by the "build" command of
        # setuptools. Only the contents of this folder are installed in the
        # "install" command by default.
        # We only make this copy for Caffe2's pybind extensions
        caffe2_pybind_exts = [
            'caffe2.python.caffe2_pybind11_state',
            'caffe2.python.caffe2_pybind11_state_gpu',
            'caffe2.python.caffe2_pybind11_state_hip',
        ]
        i = 0
        while i < len(self.extensions):
            ext = self.extensions[i]
            if ext.name not in caffe2_pybind_exts:
                i += 1
                continue
            fullname = self.get_ext_fullname(ext.name)
            filename = self.get_ext_filename(fullname)
            print("\nCopying extension {}".format(ext.name))

            src = os.path.join(tmp_install_path, rel_site_packages, filename)
            if not os.path.exists(src):
                print("{} does not exist".format(src))
                del self.extensions[i]
            else:
                dst = os.path.join(os.path.realpath(self.build_lib), filename)
                print("Copying {} from {} to {}".format(ext.name, src, dst))
                dst_dir = os.path.dirname(dst)
                if not os.path.exists(dst_dir):
                    os.makedirs(dst_dir)
                self.copy_file(src, dst)
                i += 1
        distutils.command.build_ext.build_ext.build_extensions(self)

    def get_outputs(self):
        outputs = distutils.command.build_ext.build_ext.get_outputs(self)
        outputs.append(os.path.join(self.build_lib, "caffe2"))
        print("setup.py::get_outputs returning {}".format(outputs))
        return outputs


class build(distutils.command.build.build):
    sub_commands = [
        ('build_deps', lambda self: True),
    ] + distutils.command.build.build.sub_commands


class install(setuptools.command.install.install):

    def run(self):
        if not self.skip_build:
            self.run_command('build_deps')

        setuptools.command.install.install.run(self)


class rebuild_libtorch(distutils.command.build.build):
    def run(self):
        if subprocess.call(['ninja', 'install'], cwd='build') != 0:
            print("Failed to run `ninja install` for the `rebuild_libtorch` command")
            sys.exit(1)


class clean(distutils.command.clean.clean):

    def run(self):
        import glob
        import re
        with open('.gitignore', 'r') as f:
            ignores = f.read()
            pat = re.compile(r'^#( BEGIN NOT-CLEAN-FILES )?')
            for wildcard in filter(None, ignores.split('\n')):
                match = pat.match(wildcard)
                if match:
                    if match.group(1):
                        # Marker is found and stop reading .gitignore.
                        break
                    # Ignore lines which begin with '#'.
                else:
                    for filename in glob.glob(wildcard):
                        try:
                            os.remove(filename)
                        except OSError:
                            shutil.rmtree(filename, ignore_errors=True)

        # It's an old-style class in Python 2.7...
        distutils.command.clean.clean.run(self)


################################################################################
# Configure compile flags
################################################################################

include_dirs = []
library_dirs = []

if IS_WINDOWS:
    # /NODEFAULTLIB makes sure we only link to DLL runtime
    # and matches the flags set for protobuf and ONNX
    extra_link_args = ['/NODEFAULTLIB:LIBCMT.LIB']
    # /MD links against DLL runtime
    # and matches the flags set for protobuf and ONNX
    # /Z7 turns on symbolic debugging information in .obj files
    # /EHa is about native C++ catch support for asynchronous
    # structured exception handling (SEH)
    # /DNOMINMAX removes builtin min/max functions
    # /wdXXXX disables warning no. XXXX
    extra_compile_args = ['/MD', '/Z7',
                          '/EHa', '/DNOMINMAX',
                          '/wd4267', '/wd4251', '/wd4522', '/wd4522', '/wd4838',
                          '/wd4305', '/wd4244', '/wd4190', '/wd4101', '/wd4996',
                          '/wd4275']
    if sys.version_info[0] == 2:
        if not check_env_flag('FORCE_PY27_BUILD'):
            print('The support for PyTorch with Python 2.7 on Windows is very experimental.')
            print('Please set the flag `FORCE_PY27_BUILD` to 1 to continue build.')
            sys.exit(1)
        # /bigobj increases number of sections in .obj file, which is needed to link
        # against libaries in Python 2.7 under Windows
        extra_compile_args.append('/bigobj')
else:
    extra_link_args = []
    extra_compile_args = [
        '-std=c++11',
        '-Wall',
        '-Wextra',
        '-Wno-strict-overflow',
        '-Wno-unused-parameter',
        '-Wno-missing-field-initializers',
        '-Wno-write-strings',
        '-Wno-zero-length-array',
        '-Wno-unknown-pragmas',
        # This is required for Python 2 declarations that are deprecated in 3.
        '-Wno-deprecated-declarations',
        # Python 2.6 requires -fno-strict-aliasing, see
        # http://legacy.python.org/dev/peps/pep-3123/
        # We also depend on it in our code (even Python 3).
        '-fno-strict-aliasing',
        # Clang has an unfixed bug leading to spurious missing
        # braces warnings, see
        # https://bugs.llvm.org/show_bug.cgi?id=21629
        '-Wno-missing-braces',
        # gcc7 seems to report spurious warnings with this enabled
        "-Wno-stringop-overflow",
    ]
    if check_env_flag('WERROR'):
        extra_compile_args.append('-Werror')

include_dirs += [
    cwd,
    tmp_install_path + "/include",
    tmp_install_path + "/include/TH",
    tmp_install_path + "/include/THNN",
    tmp_install_path + "/include/ATen",
    third_party_path + "/pybind11/include",
    os.path.join(cwd, "torch", "csrc"),
    "build/third_party",
]

library_dirs.append(lib_path)

# we specify exact lib names to avoid conflict with lua-torch installs
CAFFE2_LIBS = [os.path.join(lib_path, 'libcaffe2.so')]
if USE_CUDA:
    CAFFE2_LIBS.extend(['-Wl,--no-as-needed', os.path.join(lib_path, 'libcaffe2_gpu.so'), '-Wl,--as-needed'])
if USE_ROCM:
    CAFFE2_LIBS.extend(['-Wl,--no-as-needed', os.path.join(lib_path, 'libcaffe2_hip.so'), '-Wl,--as-needed'])
THD_LIB = os.path.join(lib_path, 'libTHD.a')
NCCL_LIB = os.path.join(lib_path, 'libnccl.so.1')
C10D_LIB = os.path.join(lib_path, 'libc10d.a')

# static library only
if DEBUG:
    PROTOBUF_STATIC_LIB = os.path.join(lib_path, 'libprotobufd.a')
else:
    PROTOBUF_STATIC_LIB = os.path.join(lib_path, 'libprotobuf.a')

if IS_DARWIN:
    CAFFE2_LIBS = [os.path.join(lib_path, 'libcaffe2.dylib')]
    if USE_CUDA:
        CAFFE2_LIBS.append(os.path.join(lib_path, 'libcaffe2_gpu.dylib'))
    if USE_ROCM:
        CAFFE2_LIBS.append(os.path.join(lib_path, 'libcaffe2_hip.dylib'))
    NCCL_LIB = os.path.join(lib_path, 'libnccl.1.dylib')

if IS_WINDOWS:
    CAFFE2_LIBS = [os.path.join(lib_path, 'caffe2.lib')]
    if USE_CUDA:
        CAFFE2_LIBS.append(os.path.join(lib_path, 'caffe2_gpu.lib'))
    if USE_ROCM:
        CAFFE2_LIBS.append(os.path.join(lib_path, 'caffe2_hip.lib'))
    # Windows needs direct access to ONNX libraries as well
    # as through Caffe2 library
    CAFFE2_LIBS += [
        os.path.join(lib_path, 'onnx.lib'),
        os.path.join(lib_path, 'onnx_proto.lib'),
    ]
    if DEBUG:
        PROTOBUF_STATIC_LIB = os.path.join(lib_path, 'libprotobufd.lib')
    else:
        PROTOBUF_STATIC_LIB = os.path.join(lib_path, 'libprotobuf.lib')

main_compile_args = ['-D_THP_CORE', '-DONNX_NAMESPACE=' + ONNX_NAMESPACE]
main_libraries = ['shm']
main_link_args = CAFFE2_LIBS + [PROTOBUF_STATIC_LIB]
if IS_WINDOWS:
    main_link_args.append(os.path.join(lib_path, 'torch.lib'))
elif IS_DARWIN:
    main_link_args.append(os.path.join(lib_path, 'libtorch.dylib'))
else:
    main_link_args.append(os.path.join(lib_path, 'libtorch.so'))
main_sources = [
    "torch/csrc/DataLoader.cpp",
    "torch/csrc/Device.cpp",
    "torch/csrc/Dtype.cpp",
    "torch/csrc/DynamicTypes.cpp",
    "torch/csrc/Exceptions.cpp",
    "torch/csrc/Generator.cpp",
    "torch/csrc/Layout.cpp",
    "torch/csrc/Module.cpp",
    "torch/csrc/PtrWrapper.cpp",
    "torch/csrc/Size.cpp",
    "torch/csrc/Storage.cpp",
    "torch/csrc/autograd/functions/init.cpp",
    "torch/csrc/autograd/generated/python_functions.cpp",
    "torch/csrc/autograd/generated/python_nn_functions.cpp",
    "torch/csrc/autograd/generated/python_torch_functions.cpp",
    "torch/csrc/autograd/generated/python_variable_methods.cpp",
    "torch/csrc/autograd/init.cpp",
    "torch/csrc/autograd/python_anomaly_mode.cpp",
    "torch/csrc/autograd/python_cpp_function.cpp",
    "torch/csrc/autograd/python_engine.cpp",
    "torch/csrc/autograd/python_function.cpp",
    "torch/csrc/autograd/python_hook.cpp",
    "torch/csrc/autograd/python_legacy_variable.cpp",
    "torch/csrc/autograd/python_variable.cpp",
    "torch/csrc/autograd/python_variable_indexing.cpp",
    "torch/csrc/byte_order.cpp",
    "torch/csrc/jit/batched/BatchTensor.cpp",
    "torch/csrc/jit/init.cpp",
    "torch/csrc/jit/ivalue.cpp",
    "torch/csrc/jit/passes/onnx.cpp",
    "torch/csrc/jit/passes/onnx/fixup_onnx_loop.cpp",
    "torch/csrc/jit/passes/onnx/prepare_division_for_onnx.cpp",
    "torch/csrc/jit/passes/onnx/peephole.cpp",
    "torch/csrc/jit/passes/to_batch.cpp",
    "torch/csrc/jit/python_arg_flatten.cpp",
    "torch/csrc/jit/python_interpreter.cpp",
    "torch/csrc/jit/python_ir.cpp",
    "torch/csrc/jit/python_tracer.cpp",
    "torch/csrc/jit/script/init.cpp",
    "torch/csrc/jit/script/lexer.cpp",
    "torch/csrc/jit/script/module.cpp",
    "torch/csrc/jit/script/python_tree_views.cpp",
    "torch/csrc/nn/THNN.cpp",
    "torch/csrc/onnx/init.cpp",
    "torch/csrc/serialization.cpp",
    "torch/csrc/tensor/python_tensor.cpp",
    "torch/csrc/utils.cpp",
    "torch/csrc/utils/cuda_lazy_init.cpp",
    "torch/csrc/utils/invalid_arguments.cpp",
    "torch/csrc/utils/object_ptr.cpp",
    "torch/csrc/utils/python_arg_parser.cpp",
    "torch/csrc/utils/tensor_apply.cpp",
    "torch/csrc/utils/tensor_conversion_dispatch.cpp",
    "torch/csrc/utils/tensor_dtypes.cpp",
    "torch/csrc/utils/tensor_flatten.cpp",
    "torch/csrc/utils/tensor_layouts.cpp",
    "torch/csrc/utils/tensor_list.cpp",
    "torch/csrc/utils/tensor_new.cpp",
    "torch/csrc/utils/tensor_numpy.cpp",
    "torch/csrc/utils/tensor_types.cpp",
    "torch/csrc/utils/tuple_parser.cpp",
]

try:
    import numpy as np
    include_dirs.append(np.get_include())
    extra_compile_args.append('-DUSE_NUMPY')
    USE_NUMPY = True
except ImportError:
    USE_NUMPY = False

if USE_DISTRIBUTED:
    extra_compile_args += ['-DUSE_DISTRIBUTED']
    main_sources += [
        "torch/csrc/distributed/Module.cpp",
    ]
    include_dirs += [tmp_install_path + "/include/THD"]
    main_link_args += [THD_LIB]
    if IS_LINUX and USE_CUDA:
        extra_compile_args.append('-DUSE_C10D')
        main_sources.append('torch/csrc/distributed/c10d/init.cpp')
        main_sources.append('torch/csrc/distributed/c10d/ddp.cpp')
        main_link_args.append(C10D_LIB)

if USE_CUDA:
    nvtoolext_lib_name = None
    if IS_WINDOWS:
        cuda_lib_path = CUDA_HOME + '/lib/x64/'
        nvtoolext_lib_path = NVTOOLEXT_HOME + '/lib/x64/'
        nvtoolext_include_path = os.path.join(NVTOOLEXT_HOME, 'include')

        library_dirs.append(nvtoolext_lib_path)
        include_dirs.append(nvtoolext_include_path)

        nvtoolext_lib_name = 'nvToolsExt64_1'

        # MSVC doesn't support runtime symbol resolving, `nvrtc` and `cuda` should be linked
        main_libraries += ['nvrtc', 'cuda']
    else:
        cuda_lib_dirs = ['lib64', 'lib']

        for lib_dir in cuda_lib_dirs:
            cuda_lib_path = os.path.join(CUDA_HOME, lib_dir)
            if os.path.exists(cuda_lib_path):
                break
        extra_link_args.append('-Wl,-rpath,' + cuda_lib_path)

        nvtoolext_lib_name = 'nvToolsExt'

    library_dirs.append(cuda_lib_path)
    cuda_include_path = os.path.join(CUDA_HOME, 'include')
    include_dirs.append(cuda_include_path)
    include_dirs.append(tmp_install_path + "/include/THCUNN")
    extra_compile_args += ['-DUSE_CUDA']
    extra_compile_args += ['-DCUDA_LIB_PATH=' + cuda_lib_path]
    main_libraries += ['cudart', nvtoolext_lib_name]
    main_sources += [
        "torch/csrc/cuda/Module.cpp",
        "torch/csrc/cuda/Storage.cpp",
        "torch/csrc/cuda/Stream.cpp",
        "torch/csrc/cuda/utils.cpp",
        "torch/csrc/cuda/comm.cpp",
        "torch/csrc/cuda/python_comm.cpp",
        "torch/csrc/cuda/serialization.cpp",
        "torch/csrc/nn/THCUNN.cpp",
    ]

if USE_ROCM:
    rocm_include_path = '/opt/rocm/include'
    hcc_include_path = '/opt/rocm/hcc/include'
    rocblas_include_path = '/opt/rocm/rocblas/include'
    hipsparse_include_path = '/opt/rocm/hipsparse/include'
    rocfft_include_path = '/opt/rocm/rocfft/include'
    hiprand_include_path = '/opt/rocm/hiprand/include'
    rocrand_include_path = '/opt/rocm/rocrand/include'
    hip_lib_path = '/opt/rocm/hip/lib'
    hcc_lib_path = '/opt/rocm/hcc/lib'
    include_dirs.append(rocm_include_path)
    include_dirs.append(hcc_include_path)
    include_dirs.append(rocblas_include_path)
    include_dirs.append(rocfft_include_path)
    include_dirs.append(hipsparse_include_path)
    include_dirs.append(hiprand_include_path)
    include_dirs.append(rocrand_include_path)
    include_dirs.append(tmp_install_path + "/include/THCUNN")
    extra_link_args.append('-L' + hip_lib_path)
    extra_link_args.append('-Wl,-rpath,' + hip_lib_path)
    extra_compile_args += ['-DUSE_ROCM']
    extra_compile_args += ['-D__HIP_PLATFORM_HCC__']

    main_sources += [
        "torch/csrc/cuda/Module.cpp",
        "torch/csrc/cuda/Storage.cpp",
        "torch/csrc/cuda/Stream.cpp",
        "torch/csrc/cuda/utils.cpp",
        "torch/csrc/cuda/comm.cpp",
        "torch/csrc/cuda/python_comm.cpp",
        "torch/csrc/cuda/serialization.cpp",
        "torch/csrc/nn/THCUNN.cpp",
    ]

if USE_NCCL:
    if USE_SYSTEM_NCCL:
        main_link_args += [NCCL_SYSTEM_LIB]
        include_dirs.append(NCCL_INCLUDE_DIR)
    else:
        main_link_args += [NCCL_LIB]
    extra_compile_args += ['-DUSE_NCCL']
    main_sources += [
        "torch/csrc/cuda/nccl.cpp",
        "torch/csrc/cuda/python_nccl.cpp",
    ]
if USE_CUDNN:
    main_libraries += [CUDNN_LIBRARY]
    # NOTE: these are at the front, in case there's another cuDNN in CUDA path
    include_dirs.insert(0, CUDNN_INCLUDE_DIR)
    if not IS_WINDOWS:
        extra_link_args.insert(0, '-Wl,-rpath,' + CUDNN_LIB_DIR)
    extra_compile_args += ['-DUSE_CUDNN']

if USE_MIOPEN:
    main_libraries += [MIOPEN_LIBRARY]
    include_dirs.insert(0, MIOPEN_INCLUDE_DIR)
    extra_link_args.append('-L' + MIOPEN_LIB_DIR)
    if not IS_WINDOWS:
        extra_link_args.insert(0, '-Wl,-rpath,' + MIOPEN_LIB_DIR)
    extra_compile_args += ['-DWITH_MIOPEN']

if DEBUG:
    if IS_WINDOWS:
        extra_link_args.append('/DEBUG:FULL')
    else:
        extra_compile_args += ['-O0', '-g']
        extra_link_args += ['-O0', '-g']


def make_relative_rpath(path):
    if IS_DARWIN:
        return '-Wl,-rpath,@loader_path/' + path
    elif IS_WINDOWS:
        return ''
    else:
        return '-Wl,-rpath,$ORIGIN/' + path

################################################################################
# Declare extensions and package
################################################################################

extensions = []
packages = find_packages(exclude=('tools', 'tools.*'))
C = Extension("torch._C",
              libraries=main_libraries,
              sources=main_sources,
              language='c++',
              extra_compile_args=main_compile_args + extra_compile_args,
              include_dirs=include_dirs,
              library_dirs=library_dirs,
              extra_link_args=extra_link_args + main_link_args + [make_relative_rpath('lib')],
              )
extensions.append(C)

if not IS_WINDOWS:
    DL = Extension("torch._dl",
                   sources=["torch/csrc/dl.c"],
                   language='c'
                   )
    extensions.append(DL)


if USE_CUDA:
    thnvrtc_link_flags = extra_link_args + [make_relative_rpath('lib')]
    if IS_LINUX:
        thnvrtc_link_flags = thnvrtc_link_flags + ['-Wl,--no-as-needed']
    # these have to be specified as -lcuda in link_flags because they
    # have to come right after the `no-as-needed` option
    if IS_WINDOWS:
        thnvrtc_link_flags += ['cuda.lib', 'nvrtc.lib']
    else:
        thnvrtc_link_flags += ['-lcuda', '-lnvrtc']
    cuda_stub_path = [cuda_lib_path + '/stubs']
    if IS_DARWIN:
        # on macOS this is where the CUDA stub is installed according to the manual
        cuda_stub_path = ["/usr/local/cuda/lib"]
    THNVRTC = Extension("torch._nvrtc",
                        sources=['torch/csrc/nvrtc.cpp'],
                        language='c++',
                        extra_compile_args=main_compile_args + extra_compile_args,
                        include_dirs=include_dirs,
                        library_dirs=library_dirs + cuda_stub_path,
                        extra_link_args=thnvrtc_link_flags,
                        )
    extensions.append(THNVRTC)

# These extensions are built by cmake and copied manually in build_extensions()
# inside the build_ext implementaiton
extensions.append(
    setuptools.Extension(
        name=str('caffe2.python.caffe2_pybind11_state'),
        sources=[]),
)
if USE_CUDA:
    extensions.append(
        setuptools.Extension(
            name=str('caffe2.python.caffe2_pybind11_state_gpu'),
            sources=[]),
    )

cmdclass = {
    'create_version_file': create_version_file,
    'build': build,
    'build_py': build_py,
    'build_ext': build_ext,
    'build_deps': build_deps,
    'build_module': build_module,
    'rebuild_libtorch': rebuild_libtorch,
    'develop': develop,
    'install': install,
    'clean': clean,
}
cmdclass.update(build_dep_cmds)

entry_points = {
    'console_scripts': [
        'convert-caffe2-to-onnx = caffe2.python.onnx.bin.conversion:caffe2_to_onnx',
        'convert-onnx-to-caffe2 = caffe2.python.onnx.bin.conversion:onnx_to_caffe2',
    ]
}

if __name__ == '__main__':
    setup(
        name=package_name,
        version=version,
        description=("Tensors and Dynamic neural networks in "
                     "Python with strong GPU acceleration"),
        ext_modules=extensions,
        cmdclass=cmdclass,
        packages=packages,
        entry_points=entry_points,
        package_data={
            'torch': [
                'lib/*.so*',
                'lib/*.dylib*',
                'lib/*.dll',
                'lib/*.lib',
                'lib/torch_shm_manager',
                'lib/*.h',
                'lib/include/ATen/*.h',
                'lib/include/ATen/core/*.h',
                'lib/include/ATen/detail/*.h',
                'lib/include/ATen/cuda/*.h',
                'lib/include/ATen/cuda/*.cuh',
                'lib/include/ATen/cuda/detail/*.h',
                'lib/include/ATen/cudnn/*.h',
                'lib/include/ATen/cuda/detail/*.cuh',
                'lib/include/pybind11/*.h',
                'lib/include/pybind11/detail/*.h',
                'lib/include/TH/*.h*',
                'lib/include/TH/generic/*.h*',
                'lib/include/THC/*.h*',
                'lib/include/THC/*.cuh',
                'lib/include/THC/generic/*.h',
                'lib/include/THCUNN/*.cuh',
                'lib/include/THNN/*.h',
                'lib/include/torch/csrc/*.h',
                'lib/include/torch/csrc/autograd/*.h',
                'lib/include/torch/csrc/jit/*.h',
                'lib/include/torch/csrc/utils/*.h',
                'lib/include/torch/csrc/cuda/*.h',
                'lib/include/torch/torch.h',
            ],
            'caffe2': [
                rel_site_packages + '/caffe2/**/*.py'
            ]
        },
    )