Skip to content

Commit

Permalink
use at::Half in THC (pytorch#11322)
Browse files Browse the repository at this point in the history
Summary:
- use Half instead of half in THC
- clean up TH_float2half, TH_half2float, etc. conversions
Pull Request resolved: pytorch#11322

Differential Revision: D9799553

Pulled By: li-roy

fbshipit-source-id: 9aa3e003bff73d9df6224a393f3ec0624b1f44ed
  • Loading branch information
Roy Li authored and facebook-github-bot committed Sep 13, 2018
1 parent daa379f commit f00f99e
Show file tree
Hide file tree
Showing 111 changed files with 271 additions and 1,043 deletions.
8 changes: 8 additions & 0 deletions aten/src/ATen/core/Half-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ inline AT_HOSTDEVICE Half::operator __half() const {
}
#endif

// CUDA intrinsics

#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 350)
inline __device__ Half __ldg(const Half* ptr) {
return __ldg(reinterpret_cast<const __half*>(ptr));
}
#endif

/// Arithmetic

inline AT_HOSTDEVICE Half operator+(const Half& a, const Half& b) {
Expand Down
3 changes: 2 additions & 1 deletion aten/src/ATen/cuda/NumericLimits.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <cuda.h>
#include <limits.h>
#include <math.h>
#include <float.h>

// NumericLimits.cuh is a holder for numeric limits definitions of commonly used
// types. This header is very specific to ROCm HIP and may be removed in the future.
Expand Down Expand Up @@ -101,4 +102,4 @@ struct numeric_limits<double> {
static inline __host__ __device__ double upper_bound() { return inf; }
};

} // namespace at
} // namespace at
6 changes: 0 additions & 6 deletions aten/src/ATen/function_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,6 @@ def TypedDict(name, attrs, total=True): # type: ignore

CALL_TEMPLATE = CodeTemplate("${cname}(${actuals})")

HALF_CONVERSION = CodeTemplate("convert<half>(${value})")


class NYIError(Exception):
"""Indicates we don't support this declaration yet"""
Expand Down Expand Up @@ -1202,8 +1200,6 @@ def create_derived(backend_type_env, declarations):

is_cuda = 'CUDA' in backend_type_env['Backend']

real_is_half = backend_type_env['ScalarName'] == 'Half'

def replace_with_null(argument):
# type: (THFormal) -> bool
return (argument['type'] == 'THGenerator*' and
Expand All @@ -1230,8 +1226,6 @@ def get_argument(argument, option):
elif requires_checked_cast(argument):
checked_use = CHECKED_USE.get(
argument['type'], '{}_').format(argument['name'])
if real_is_half and argument['type'] == 'real':
checked_use = HALF_CONVERSION.substitute(value=checked_use)
if nullable_argument(argument):
checked_use = CHECKED_USE_NULLABLE.substitute(
env={}, arg_name=argument['name'], usage=checked_use)
Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def check_all_files_written(self):
('Int', 'int', 'Long', 'int32_t', False),
('Long', 'int64_t', 'Long', 'int64_t', False),
('Short', 'int16_t', 'Long', 'int16_t', False),
('Half', 'Half', 'Double', 'THHalf', True),
('Half', 'Half', 'Double', 'at::Half', True),
]

# shared environment for non-derived base classes Type.h Tensor.h Storage.h
Expand Down Expand Up @@ -292,7 +292,7 @@ def generate_storage_type_and_tensor(backend, density, scalar_type, declarations
if scalar_name == "Half":
env['SparseTensor'] = 'Tensor'
if backend == "CUDA":
env['AS_REAL'] = 'convert<half,double>'
env['AS_REAL'] = 'convert<at::Half,double>'

declarations, definitions = function_wrapper.create_derived(
env, declarations)
Expand Down
1 change: 0 additions & 1 deletion aten/src/TH/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ SET(hdr

set(ATen_TH_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/THGeneral.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THHalf.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THAllocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THSize.cpp
${CMAKE_CURRENT_SOURCE_DIR}/THStorageFunctions.cpp
Expand Down
6 changes: 3 additions & 3 deletions aten/src/TH/THDiskFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,9 +359,9 @@ READ_WRITE_METHODS(float, Float,
int ret = fscanf(dfself->handle, "%g", &data[i]); if(ret <= 0) break; else nread++,
int ret = fprintf(dfself->handle, "%.9g", data[i]); if(ret <= 0) break; else nwrite++)

READ_WRITE_METHODS(THHalf, Half,
float buf; int ret = fscanf(dfself->handle, "%g", &buf); if(ret <= 0) break; else { data[i]= TH_float2half(buf); nread++; },
int ret = fprintf(dfself->handle, "%.9g", TH_half2float(data[i])); if(ret <= 0) break; else nwrite++)
READ_WRITE_METHODS(at::Half, Half,
float buf; int ret = fscanf(dfself->handle, "%g", &buf); if(ret <= 0) break; else { data[i]= static_cast<at::Half>(buf); nread++; },
int ret = fprintf(dfself->handle, "%.9g", static_cast<float>(data[i])); if(ret <= 0) break; else nwrite++)

READ_WRITE_METHODS(double, Double,
int ret = fscanf(dfself->handle, "%lg", &data[i]); if(ret <= 0) break; else nread++,
Expand Down
4 changes: 2 additions & 2 deletions aten/src/TH/THGenerateHalfType.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#include "THHalf.h"
#define scalar_t THHalf
#define accreal float
#define TH_CONVERT_REAL_TO_ACCREAL(_val) TH_half2float(_val)
#define TH_CONVERT_ACCREAL_TO_REAL(_val) TH_float2half(_val)
#define TH_CONVERT_REAL_TO_ACCREAL(_val) (accreal)(_val)
#define TH_CONVERT_ACCREAL_TO_REAL(_val) (scalar_t)(_val)
#define Real Half
#define THInf TH_HALF_BITS_TO_LITERAL(TH_HALF_INF)
#define TH_REAL_IS_HALF
Expand Down
30 changes: 0 additions & 30 deletions aten/src/TH/THHalf.cpp

This file was deleted.

10 changes: 1 addition & 9 deletions aten/src/TH/THHalf.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
#ifndef TH_HALF_H
#define TH_HALF_H

#include <TH/THGeneral.h>

#ifdef __cplusplus
#include <ATen/core/TensorImpl.h>
#include <ATen/core/Half.h>
#endif

#ifdef __cplusplus
Expand All @@ -14,10 +12,4 @@ typedef struct at_Half at_Half;
#define THHalf at_Half
#endif

TH_API void TH_float2halfbits(float*, unsigned short*);
TH_API void TH_halfbits2float(unsigned short*, float*);

TH_API THHalf TH_float2half(float);
TH_API float TH_half2float(THHalf);

#endif
6 changes: 3 additions & 3 deletions aten/src/TH/THMemoryFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,11 +343,11 @@ READ_WRITE_METHODS(float, Float,
nByteWritten = snprintf((char*) THCharStorage_data(mfself->storage)+mfself->position, mfself->storage->numel()-mfself->position, "%.9g", data[i]),
1)

READ_WRITE_METHODS(THHalf, Half,
READ_WRITE_METHODS(at::Half, Half,
int nByteRead_; float buf; \
int ret = sscanf((char*) THCharStorage_data(mfself->storage)+mfself->position, "%g%n", &buf, &nByteRead_); \
data[i] = TH_float2half(buf); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,
nByteWritten = snprintf((char*) THCharStorage_data(mfself->storage)+mfself->position, mfself->storage->numel()-mfself->position, "%.9g", TH_half2float(data[i])),
data[i] = static_cast<at::Half>(buf); nByteRead = nByteRead_; if(ret <= 0) break; else nread++,
nByteWritten = snprintf((char*) THCharStorage_data(mfself->storage)+mfself->position, mfself->storage->numel()-mfself->position, "%.9g", static_cast<float>(data[i])),
1)

READ_WRITE_METHODS(double, Double,
Expand Down
48 changes: 1 addition & 47 deletions aten/src/TH/generic/THStorageCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,59 +29,13 @@ void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage
data[i] = static_cast<scalar_t>(src_data[i]); \
}

#define IMPLEMENT_THStorage_COPY_FROM_HALF(TYPENAMESRC) \
void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \
{ \
THArgCheck(storage->numel() == src->numel(), 2, "size mismatch"); \
ptrdiff_t i; \
auto data = THStorage_(data)(storage); \
auto src_data = TH##TYPENAMESRC##Storage_data(src); \
for(i = 0; i < storage->numel(); i++) \
data[i] = (scalar_t)TH_half2float(src_data[i]); \
}

#define IMPLEMENT_THStorage_COPY_TO_HALF(TYPENAMESRC) \
void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \
{ \
THArgCheck(storage->numel() == src->numel(), 2, "size mismatch"); \
ptrdiff_t i; \
auto data = THStorage_(data)(storage); \
auto src_data = TH##TYPENAMESRC##Storage_data(src); \
for(i = 0; i < storage->numel(); i++) \
data[i] = TH_float2half((float)(src_data[i])); \
}

#define IMPLEMENT_THStorage_COPY_TO_FROM_HALF(TYPENAMESRC) \
void THStorage_(copy##TYPENAMESRC)(THStorage *storage, TH##TYPENAMESRC##Storage *src) \
{ \
THArgCheck(storage->numel() == src->numel(), 2, "size mismatch"); \
ptrdiff_t i; \
auto data = THStorage_(data)(storage); \
auto src_data = TH##TYPENAMESRC##Storage_data(src); \
for(i = 0; i < storage->numel(); i++) \
data[i] = static_cast<scalar_t>(src_data[i]); \
}

#ifndef TH_REAL_IS_HALF
IMPLEMENT_THStorage_COPY(Byte)
IMPLEMENT_THStorage_COPY(Char)
IMPLEMENT_THStorage_COPY(Short)
IMPLEMENT_THStorage_COPY(Int)
IMPLEMENT_THStorage_COPY(Long)
IMPLEMENT_THStorage_COPY(Float)
IMPLEMENT_THStorage_COPY(Double)
IMPLEMENT_THStorage_COPY_FROM_HALF(Half)
#else
/* only allow pass-through for Half */
IMPLEMENT_THStorage_COPY_TO_FROM_HALF(Half)
IMPLEMENT_THStorage_COPY_TO_HALF(Byte)
IMPLEMENT_THStorage_COPY_TO_HALF(Char)
IMPLEMENT_THStorage_COPY_TO_HALF(Short)
IMPLEMENT_THStorage_COPY_TO_HALF(Int)
IMPLEMENT_THStorage_COPY_TO_HALF(Long)
IMPLEMENT_THStorage_COPY_TO_HALF(Float)
IMPLEMENT_THStorage_COPY_TO_HALF(Double)
#endif

IMPLEMENT_THStorage_COPY(Half)

#endif
36 changes: 1 addition & 35 deletions aten/src/TH/generic/THTensorCopy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,47 +203,13 @@ void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src
static_cast<inter_copy_type_t<scalar_t>>(*src_data));) \
}

#define IMPLEMENT_THTensor_COPY_TO_HALF(TYPENAMESRC, TYPE_SRC) \
void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \
{ \
TH_TENSOR_APPLY2(scalar_t, tensor, TYPE_SRC, src, *tensor_data = TH_float2half((float)*src_data);) \
}

#define IMPLEMENT_THTensor_COPY_FROM_HALF(TYPENAMESRC, TYPE_SRC) \
void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \
{ \
TH_TENSOR_APPLY2(scalar_t, tensor, TYPE_SRC, src, \
*tensor_data = static_cast<scalar_t>( \
static_cast<inter_copy_type_t<scalar_t>>( \
TH_half2float(*src_data)));) \
}

#define IMPLEMENT_THTensor_COPY_TO_FROM_HALF(TYPENAMESRC, TYPE_SRC) \
void THTensor_(copy##TYPENAMESRC)(THTensor *tensor, TH##TYPENAMESRC##Tensor *src) \
{ \
TH_TENSOR_APPLY2(scalar_t, tensor, TYPE_SRC, src, *tensor_data = *src_data;) \
}

#ifndef TH_REAL_IS_HALF
IMPLEMENT_THTensor_COPY(Byte, uint8_t)
IMPLEMENT_THTensor_COPY(Char, int8_t)
IMPLEMENT_THTensor_COPY(Short, int16_t)
IMPLEMENT_THTensor_COPY(Int, int32_t)
IMPLEMENT_THTensor_COPY(Long, int64_t)
IMPLEMENT_THTensor_COPY(Float, float)
IMPLEMENT_THTensor_COPY(Double, double)
IMPLEMENT_THTensor_COPY_FROM_HALF(Half, THHalf)
#else
/* only allow pass-through for Half */
IMPLEMENT_THTensor_COPY_TO_FROM_HALF(Half, THHalf)
IMPLEMENT_THTensor_COPY_TO_HALF(Byte, uint8_t)
IMPLEMENT_THTensor_COPY_TO_HALF(Char, int8_t)
IMPLEMENT_THTensor_COPY_TO_HALF(Short, int16_t)
IMPLEMENT_THTensor_COPY_TO_HALF(Int, int32_t)
IMPLEMENT_THTensor_COPY_TO_HALF(Long, int64_t)
IMPLEMENT_THTensor_COPY_TO_HALF(Float, float)
IMPLEMENT_THTensor_COPY_TO_HALF(Double, double)

#endif /* REAL_IS_HALF */
IMPLEMENT_THTensor_COPY(Half, at::Half)

#endif
1 change: 0 additions & 1 deletion aten/src/THC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ INSTALL(FILES
THCGenerateFloatType.h
THCGenerateFloatTypes.h
THCGenerateDoubleType.h
THCHalf.h
THCIntegerDivider.cuh
THCNumerics.cuh
THCTensorSort.cuh
Expand Down
18 changes: 4 additions & 14 deletions aten/src/THC/THCAtomics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#define THC_ATOMICS_INC

#include "THC.h"
#include "THCHalf.h"
#include "TH/THHalf.h"
#include "THCNumerics.cuh"
#include "ATen/ATen.h"

Expand Down Expand Up @@ -95,31 +95,21 @@ static inline __device__ void atomicAdd(int64_t *address, int64_t val) {
AtomicAddIntegerImpl<int64_t, sizeof(int64_t)>()(address, val);
}

static inline __device__ void atomicAdd(half *address, half val) {
static inline __device__ void atomicAdd(at::Half *address, at::Half val) {
unsigned int * address_as_ui =
(unsigned int *) ((char *)address - ((size_t)address & 2));
unsigned int old = *address_as_ui;
unsigned int assumed;

do {
assumed = old;
#if CUDA_VERSION < 9000 && !defined(__HIP_PLATFORM_HCC__)
half hsum;
at::Half hsum;
hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff);
hsum = THCNumerics<half>::add(hsum, val);
#else
__half_raw hsum;
hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff);
half tmpres = THCNumerics<half>::add(hsum, val);
hsum = __half_raw(tmpres);
#endif
hsum = THCNumerics<at::Half>::add(hsum, val);
old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) : (old & 0xffff0000) | hsum.x;
old = atomicCAS(address_as_ui, assumed, old);
} while (assumed != old);
}
static inline __device__ void atomicAdd(at::Half *address, at::Half val) {
atomicAdd(reinterpret_cast<half*>(address), val);
}

#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
// from CUDA C Programmic Guide
Expand Down
Loading

0 comments on commit f00f99e

Please sign in to comment.