forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTensorFactories.cpp
175 lines (156 loc) · 6.18 KB
/
TensorFactories.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#include <ATen/ATen.h>
#include <ATen/quantized/Quantizer.h>
#include <c10/core/QScheme.h>
#include <c10/core/TensorOptions.h>
#include <utility>
namespace at::native {
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ empty ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// We explicitly pass in scale and zero_point because we don't have the infra
// ready to support quantizer in python frontend, once that is ready, we'll
// change to use quantizer
Tensor empty_affine_quantized(
IntArrayRef size,
std::optional<ScalarType> dtype,
std::optional<Layout> layout,
std::optional<Device> device,
std::optional<bool> pin_memory,
double scale,
int64_t zero_point,
std::optional<c10::MemoryFormat> optional_memory_format) {
// See [Note: hacky wrapper removal for TensorOptions]
TensorOptions options_ = TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory);
TORCH_CHECK(
!(options_.has_memory_format() && optional_memory_format.has_value()),
"Cannot set memory_format both in TensorOptions and explicit argument; please delete "
"the redundant setter.");
auto options = options_.merge_memory_format(optional_memory_format);
TORCH_CHECK(
options.has_dtype(),
"Must provide data type for Tensor creation functions.");
return new_qtensor(
size,
options,
make_per_tensor_affine_quantizer(
scale, zero_point, typeMetaToScalarType(options.dtype())));
}
Tensor empty_per_channel_affine_quantized(
IntArrayRef size,
const Tensor& scales,
const Tensor& zero_points,
int64_t axis,
std::optional<ScalarType> dtype,
std::optional<Layout> layout,
std::optional<Device> device,
std::optional<bool> pin_memory,
std::optional<c10::MemoryFormat> optional_memory_format) {
// See [Note: hacky wrapper removal for TensorOptions]
TensorOptions options_ = TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory);
TORCH_CHECK(
!(options_.has_memory_format() && optional_memory_format.has_value()),
"Cannot set memory_format both in TensorOptions and explicit argument; please delete "
"the redundant setter.");
auto options = options_.merge_memory_format(optional_memory_format);
TORCH_CHECK(
options.has_dtype(),
"Must provide data type for Tensor creation functions.");
QuantizerPtr quantizer = make_per_channel_affine_quantizer(
scales.to(options.device()), zero_points.to(options.device()), axis, typeMetaToScalarType(options.dtype()));
return new_qtensor(
size,
options,
std::move(quantizer));
}
Tensor empty_unknown_quantized(
IntArrayRef size,
std::optional<ScalarType> dtype,
std::optional<Layout> layout,
std::optional<Device> device,
std::optional<bool> pin_memory,
std::optional<c10::MemoryFormat> optional_memory_format) {
// See [Note: hacky wrapper removal for TensorOptions]
TensorOptions options_ = TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory);
TORCH_CHECK(
!(options_.has_memory_format() && optional_memory_format.has_value()),
"Cannot set memory_format both in TensorOptions and explicit argument; please delete "
"the redundant setter.");
auto options = options_.merge_memory_format(optional_memory_format);
TORCH_CHECK(
options.has_dtype(),
"Must provide data type for Tensor creation functions.");
QuantizerPtr quantizer = make_unknown_quantizer(typeMetaToScalarType(options.dtype()));
return new_qtensor(size, options, std::move(quantizer));
}
Tensor empty_strided_unknown_quantized(
IntArrayRef size,
IntArrayRef strided,
std::optional<ScalarType> dtype,
std::optional<Layout> layout,
std::optional<Device> device,
std::optional<bool> pin_memory) {
TORCH_CHECK(false, "empty_strided not supported on quantized tensors yet see https://github.com/pytorch/pytorch/issues/74540")
}
// Provide better error message if dtype is wrong
Tensor empty_affine_quantized_other_backends_stub(
IntArrayRef,
std::optional<ScalarType>,
std::optional<Layout>,
std::optional<Device>,
std::optional<bool>,
double,
int64_t,
std::optional<c10::MemoryFormat>) {
TORCH_CHECK(false, "Creation of quantized tensor requires quantized dtype like torch.quint8");
}
Tensor empty_per_channel_affine_quantized_other_backends_stub(
IntArrayRef,
const Tensor&,
const Tensor&,
int64_t,
std::optional<ScalarType>,
std::optional<Layout>,
std::optional<Device>,
std::optional<bool>,
std::optional<c10::MemoryFormat>) {
TORCH_CHECK(false, "Creation of quantized tensor requires quantized dtype like torch.quint8");
}
// Create an empty quantized Tensor with size, based on the options
// and quantization parameters of the input quantized Tensor
Tensor empty_quantized(
IntArrayRef size,
const Tensor& qtensor,
std::optional<ScalarType> dtype,
std::optional<Layout> layout,
std::optional<Device> device,
std::optional<bool> pin_memory,
std::optional<c10::MemoryFormat> memory_format) {
TensorOptions specified_options =
TensorOptions().dtype(dtype).layout(layout).device(device).pinned_memory(pin_memory);
TORCH_CHECK(
!(specified_options.has_memory_format() && memory_format.has_value()),
"Cannot set memory_format both in TensorOptions and explicit argument; please delete "
"the redundant setter.");
TensorOptions options = qtensor.options()
.merge_in(specified_options)
.merge_memory_format(memory_format);
Tensor output;
if (qtensor.qscheme() == kPerTensorAffine) {
output = at::_empty_affine_quantized(
size, options, qtensor.q_scale(), qtensor.q_zero_point());
} else if (
qtensor.qscheme() == kPerChannelAffine ||
qtensor.qscheme() == kPerChannelAffineFloatQParams) {
output = at::_empty_per_channel_affine_quantized(
size,
qtensor.q_per_channel_scales(),
qtensor.q_per_channel_zero_points(),
qtensor.q_per_channel_axis(),
options);
} else {
TORCH_CHECK(
false,
"QScheme not supported by empty_quantized:",
toString(qtensor.qscheme()));
}
return output;
}
} // namespace at::native